The following commit has been merged in the master branch: commit 94426ed2137a948d212302df571445d4245772b9 Merge: a55b39e858901986408391c574f414ef889f1c53 2ae9a8972ce04046957f8af214509cebfd3bfb9c Author: Jakub Kicinski kuba@kernel.org Date: Thu Apr 11 14:20:04 2024 -0700
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR.
Conflicts:
net/unix/garbage.c 47d8ac011fe1 ("af_unix: Fix garbage collector racing against connect()") 4090fa373f0e ("af_unix: Replace garbage collection algorithm.")
Adjacent changes:
drivers/net/ethernet/broadcom/bnxt/bnxt.c faa12ca24558 ("bnxt_en: Reset PTP tx_avail after possible firmware reset") b3d0083caf9a ("bnxt_en: Support RSS contexts in ethtool .{get|set}_rxfh()")
drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c 7ac10c7d728d ("bnxt_en: Fix possible memory leak in bnxt_rdma_aux_device_init()") 194fad5b2781 ("bnxt_en: Refactor bnxt_rdma_aux_device_init/uninit functions")
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c 958f56e48385 ("net/mlx5e: Un-expose functions in en.h") 49e6c9387051 ("net/mlx5e: RSS, Block XOR hash with over 128 channels")
Signed-off-by: Jakub Kicinski kuba@kernel.org
diff --combined MAINTAINERS index 4745ea94d4636,b5b89687680b9..d71eb49aaa068 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -2191,7 -2191,6 +2191,6 @@@ N: mx
ARM/FREESCALE LAYERSCAPE ARM ARCHITECTURE M: Shawn Guo shawnguo@kernel.org - M: Li Yang leoyang.li@nxp.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/shawnguo/linux.git @@@ -3573,6 -3572,7 +3572,7 @@@ S: Supporte C: irc://irc.oftc.net/bcache T: git https://evilpiepirate.org/git/bcachefs.git F: fs/bcachefs/ + F: Documentation/filesystems/bcachefs/
BDISP ST MEDIA DRIVER M: Fabien Dessenne fabien.dessenne@foss.st.com @@@ -8015,8 -8015,6 +8015,8 @@@ F: include/linux/mii. F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h +F: include/linux/phy_link_topology.h +F: include/linux/phy_link_topology_core.h F: include/linux/phylib_stubs.h F: include/linux/platform_data/mdio-bcm-unimac.h F: include/linux/platform_data/mdio-gpio.h @@@ -8525,7 -8523,6 +8525,6 @@@ S: Maintaine F: drivers/video/fbdev/fsl-diu-fb.*
FREESCALE DMA DRIVER - M: Li Yang leoyang.li@nxp.com M: Zhang Wei zw@zh-kernel.org L: linuxppc-dev@lists.ozlabs.org S: Maintained @@@ -8690,10 -8687,9 +8689,9 @@@ F: drivers/soc/fsl/qe/tsa. F: include/dt-bindings/soc/cpm1-fsl,tsa.h
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER - M: Li Yang leoyang.li@nxp.com L: netdev@vger.kernel.org L: linuxppc-dev@lists.ozlabs.org - S: Maintained + S: Orphan F: drivers/net/ethernet/freescale/ucc_geth*
FREESCALE QUICC ENGINE UCC HDLC DRIVER @@@ -8710,10 -8706,9 +8708,9 @@@ S: Maintaine F: drivers/tty/serial/ucc_uart.c
FREESCALE SOC DRIVERS - M: Li Yang leoyang.li@nxp.com L: linuxppc-dev@lists.ozlabs.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) - S: Maintained + S: Orphan F: Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml F: Documentation/devicetree/bindings/soc/fsl/ F: drivers/soc/fsl/ @@@ -8747,10 -8742,9 +8744,9 @@@ F: Documentation/devicetree/bindings/so F: sound/soc/fsl/fsl_qmc_audio.c
FREESCALE USB PERIPHERAL DRIVERS - M: Li Yang leoyang.li@nxp.com L: linux-usb@vger.kernel.org L: linuxppc-dev@lists.ozlabs.org - S: Maintained + S: Orphan F: drivers/usb/gadget/udc/fsl*
FREESCALE USB PHY DRIVER @@@ -16735,9 -16729,9 +16731,9 @@@ F: include/uapi/linux/ppdev.
PARAVIRT_OPS INTERFACE M: Juergen Gross jgross@suse.com - R: Ajay Kaher akaher@vmware.com - R: Alexey Makhalov amakhalov@vmware.com - R: VMware PV-Drivers Reviewers pv-drivers@vmware.com + R: Ajay Kaher ajay.kaher@broadcom.com + R: Alexey Makhalov alexey.amakhalov@broadcom.com + R: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported @@@ -21680,7 -21674,6 +21676,7 @@@ TEAM DRIVE M: Jiri Pirko jiri@resnulli.us L: netdev@vger.kernel.org S: Supported +F: Documentation/netlink/specs/team.yaml F: drivers/net/team/ F: include/linux/if_team.h F: include/uapi/linux/if_team.h @@@ -22433,6 -22426,7 +22429,7 @@@ S: Maintaine W: https://kernsec.org/wiki/index.php/Linux_Kernel_Integrity Q: https://patchwork.kernel.org/project/linux-integrity/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jarkko/linux-tpmdd.git + F: Documentation/devicetree/bindings/tpm/ F: drivers/char/tpm/
TPS546D24 DRIVER @@@ -22579,6 -22573,7 +22576,7 @@@ Q: https://patchwork.kernel.org/project B: https://bugzilla.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/lenb/linux.git turbostat F: tools/power/x86/turbostat/ + F: tools/testing/selftests/turbostat/
TW5864 VIDEO4LINUX DRIVER M: Bluecherry Maintainers maintainers@bluecherrydvr.com @@@ -23657,9 -23652,9 +23655,9 @@@ S: Supporte F: drivers/misc/vmw_balloon.c
VMWARE HYPERVISOR INTERFACE - M: Ajay Kaher akaher@vmware.com - M: Alexey Makhalov amakhalov@vmware.com - R: VMware PV-Drivers Reviewers pv-drivers@vmware.com + M: Ajay Kaher ajay.kaher@broadcom.com + M: Alexey Makhalov alexey.amakhalov@broadcom.com + R: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com L: virtualization@lists.linux.dev L: x86@kernel.org S: Supported @@@ -23668,33 -23663,34 +23666,34 @@@ F: arch/x86/include/asm/vmware. F: arch/x86/kernel/cpu/vmware.c
VMWARE PVRDMA DRIVER - M: Bryan Tan bryantan@vmware.com - M: Vishnu Dasa vdasa@vmware.com - R: VMware PV-Drivers Reviewers pv-drivers@vmware.com + M: Bryan Tan bryan-bt.tan@broadcom.com + M: Vishnu Dasa vishnu.dasa@broadcom.com + R: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com L: linux-rdma@vger.kernel.org S: Supported F: drivers/infiniband/hw/vmw_pvrdma/
VMWARE PVSCSI DRIVER - M: Vishal Bhakta vbhakta@vmware.com - R: VMware PV-Drivers Reviewers pv-drivers@vmware.com + M: Vishal Bhakta vishal.bhakta@broadcom.com + R: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/vmw_pvscsi.c F: drivers/scsi/vmw_pvscsi.h
VMWARE VIRTUAL PTP CLOCK DRIVER - R: Ajay Kaher akaher@vmware.com - R: Alexey Makhalov amakhalov@vmware.com - R: VMware PV-Drivers Reviewers pv-drivers@vmware.com + M: Nick Shi nick.shi@broadcom.com + R: Ajay Kaher ajay.kaher@broadcom.com + R: Alexey Makhalov alexey.amakhalov@broadcom.com + R: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com L: netdev@vger.kernel.org S: Supported F: drivers/ptp/ptp_vmw.c
VMWARE VMCI DRIVER - M: Bryan Tan bryantan@vmware.com - M: Vishnu Dasa vdasa@vmware.com - R: VMware PV-Drivers Reviewers pv-drivers@vmware.com + M: Bryan Tan bryan-bt.tan@broadcom.com + M: Vishnu Dasa vishnu.dasa@broadcom.com + R: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com L: linux-kernel@vger.kernel.org S: Supported F: drivers/misc/vmw_vmci/ @@@ -23709,16 -23705,16 +23708,16 @@@ F: drivers/input/mouse/vmmouse. F: drivers/input/mouse/vmmouse.h
VMWARE VMXNET3 ETHERNET DRIVER - M: Ronak Doshi doshir@vmware.com - R: VMware PV-Drivers Reviewers pv-drivers@vmware.com + M: Ronak Doshi ronak.doshi@broadcom.com + R: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com L: netdev@vger.kernel.org S: Supported F: drivers/net/vmxnet3/
VMWARE VSOCK VMCI TRANSPORT DRIVER - M: Bryan Tan bryantan@vmware.com - M: Vishnu Dasa vdasa@vmware.com - R: VMware PV-Drivers Reviewers pv-drivers@vmware.com + M: Bryan Tan bryan-bt.tan@broadcom.com + M: Vishnu Dasa vishnu.dasa@broadcom.com + R: Broadcom internal kernel review list bcm-kernel-feedback-list@broadcom.com L: linux-kernel@vger.kernel.org S: Supported F: net/vmw_vsock/vmci_transport* diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c index d728df139c9c7,57e61f9631678..a30df865be7b3 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@@ -76,7 -76,7 +76,7 @@@ NETIF_MSG_TX_ERR)
MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Broadcom BCM573xx network driver"); +MODULE_DESCRIPTION("Broadcom NetXtreme network driver");
#define BNXT_RX_OFFSET (NET_SKB_PAD + NET_IP_ALIGN) #define BNXT_RX_DMA_OFFSET NET_SKB_PAD @@@ -1296,9 -1296,9 +1296,9 @@@ static int bnxt_agg_bufs_valid(struct b return RX_AGG_CMP_VALID(agg, *raw_cons); }
-static inline struct sk_buff *bnxt_copy_skb(struct bnxt_napi *bnapi, u8 *data, - unsigned int len, - dma_addr_t mapping) +static struct sk_buff *bnxt_copy_data(struct bnxt_napi *bnapi, u8 *data, + unsigned int len, + dma_addr_t mapping) { struct bnxt *bp = bnapi->bp; struct pci_dev *pdev = bp->pdev; @@@ -1318,39 -1318,6 +1318,39 @@@ bp->rx_dir);
skb_put(skb, len); + + return skb; +} + +static struct sk_buff *bnxt_copy_skb(struct bnxt_napi *bnapi, u8 *data, + unsigned int len, + dma_addr_t mapping) +{ + return bnxt_copy_data(bnapi, data, len, mapping); +} + +static struct sk_buff *bnxt_copy_xdp(struct bnxt_napi *bnapi, + struct xdp_buff *xdp, + unsigned int len, + dma_addr_t mapping) +{ + unsigned int metasize = 0; + u8 *data = xdp->data; + struct sk_buff *skb; + + len = xdp->data_end - xdp->data_meta; + metasize = xdp->data - xdp->data_meta; + data = xdp->data_meta; + + skb = bnxt_copy_data(bnapi, data, len, mapping); + if (!skb) + return skb; + + if (metasize) { + skb_metadata_set(skb, metasize); + __skb_pull(skb, metasize); + } + return skb; }
@@@ -2137,17 -2104,14 +2137,17 @@@ static int bnxt_rx_pkt(struct bnxt *bp }
if (xdp_active) { - if (bnxt_rx_xdp(bp, rxr, cons, xdp, data, &data_ptr, &len, event)) { + if (bnxt_rx_xdp(bp, rxr, cons, &xdp, data, &data_ptr, &len, event)) { rc = 1; goto next_rx; } }
if (len <= bp->rx_copy_thresh) { - skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); + if (!xdp_active) + skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); + else + skb = bnxt_copy_xdp(bnapi, &xdp, len, dma_addr); bnxt_reuse_rx_data(rxr, cons, data); if (!skb) { if (agg_bufs) { @@@ -2525,9 -2489,6 +2525,9 @@@ static bool bnxt_event_error_report(str } return false; } + case ASYNC_EVENT_CMPL_ERROR_REPORT_BASE_EVENT_DATA1_ERROR_TYPE_DUAL_DATA_RATE_NOT_SUPPORTED: + netdev_warn(bp->dev, "Speed change not supported with dual rate transceivers on this board\n"); + break; default: netdev_err(bp->dev, "FW reported unknown error type %u\n", err_type); @@@ -3598,15 -3559,14 +3598,15 @@@ static void bnxt_free_rx_rings(struct b }
static int bnxt_alloc_rx_page_pool(struct bnxt *bp, - struct bnxt_rx_ring_info *rxr) + struct bnxt_rx_ring_info *rxr, + int numa_node) { struct page_pool_params pp = { 0 };
pp.pool_size = bp->rx_agg_ring_size; if (BNXT_RX_PAGE_MODE(bp)) pp.pool_size += bp->rx_ring_size; - pp.nid = dev_to_node(&bp->pdev->dev); + pp.nid = numa_node; pp.napi = &rxr->bnapi->napi; pp.netdev = bp->dev; pp.dev = &bp->pdev->dev; @@@ -3626,8 -3586,7 +3626,8 @@@
static int bnxt_alloc_rx_rings(struct bnxt *bp) { - int i, rc = 0, agg_rings = 0; + int numa_node = dev_to_node(&bp->pdev->dev); + int i, rc = 0, agg_rings = 0, cpu;
if (!bp->rx_ring) return -ENOMEM; @@@ -3638,15 -3597,10 +3638,15 @@@ for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring; + int cpu_node;
ring = &rxr->rx_ring_struct;
- rc = bnxt_alloc_rx_page_pool(bp, rxr); + cpu = cpumask_local_spread(i, numa_node); + cpu_node = cpu_to_node(cpu); + netdev_dbg(bp->dev, "Allocating page pool for rx_ring[%d] on numa_node: %d\n", + i, cpu_node); + rc = bnxt_alloc_rx_page_pool(bp, rxr, cpu_node); if (rc) return rc;
@@@ -3905,12 -3859,13 +3905,12 @@@ static int bnxt_alloc_cp_sub_ring(struc static int bnxt_alloc_cp_rings(struct bnxt *bp) { bool sh = !!(bp->flags & BNXT_FLAG_SHARED_RINGS); - int i, j, rc, ulp_base_vec, ulp_msix; + int i, j, rc, ulp_msix; int tcs = bp->num_tc;
if (!tcs) tcs = 1; ulp_msix = bnxt_get_ulp_msix_num(bp); - ulp_base_vec = bnxt_get_ulp_msix_base(bp); for (i = 0, j = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; struct bnxt_cp_ring_info *cpr, *cpr2; @@@ -3929,7 -3884,10 +3929,7 @@@ if (rc) return rc;
- if (ulp_msix && i >= ulp_base_vec) - ring->map_idx = i + ulp_msix; - else - ring->map_idx = i; + ring->map_idx = ulp_msix + i;
if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) continue; @@@ -4283,7 -4241,6 +4283,7 @@@ static void bnxt_init_vnics(struct bnx int j;
vnic->fw_vnic_id = INVALID_HW_RING_ID; + vnic->vnic_id = i; for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++) vnic->fw_rss_cos_lb_ctx[j] = INVALID_HW_RING_ID;
@@@ -5831,22 -5788,8 +5831,22 @@@ void bnxt_fill_ipv6_mask(__be32 mask[4] static void bnxt_cfg_rfs_ring_tbl_idx(struct bnxt *bp, struct hwrm_cfa_ntuple_filter_alloc_input *req, - u16 rxq) + struct bnxt_ntuple_filter *fltr) { + struct bnxt_rss_ctx *rss_ctx, *tmp; + u16 rxq = fltr->base.rxq; + + if (fltr->base.flags & BNXT_ACT_RSS_CTX) { + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) { + if (rss_ctx->index == fltr->base.fw_vnic_id) { + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + req->dst_id = cpu_to_le16(vnic->fw_vnic_id); + break; + } + } + return; + } if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { struct bnxt_vnic_info *vnic; u32 enables; @@@ -5887,7 -5830,7 +5887,7 @@@ int bnxt_hwrm_cfa_ntuple_filter_alloc(s req->flags = cpu_to_le32(CFA_NTUPLE_FILTER_ALLOC_REQ_FLAGS_DROP); } else if (bp->fw_cap & BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2) { - bnxt_cfg_rfs_ring_tbl_idx(bp, req, fltr->base.rxq); + bnxt_cfg_rfs_ring_tbl_idx(bp, req, fltr); } else { vnic = &bp->vnic_info[fltr->base.rxq + 1]; req->dst_id = cpu_to_le16(vnic->fw_vnic_id); @@@ -5995,9 -5938,9 +5995,9 @@@ static void bnxt_hwrm_vnic_update_tunl_ req->tnl_tpa_en_bitmap = cpu_to_le32(tunl_tpa_bmap); }
-static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) +int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, struct bnxt_vnic_info *vnic, + u32 tpa_flags) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX; struct hwrm_vnic_tpa_cfg_input *req; int rc; @@@ -6082,10 -6025,9 +6082,10 @@@ static u16 bnxt_cp_ring_for_tx(struct b return bnxt_cp_ring_from_grp(bp, &txr->tx_ring_struct); }
-static int bnxt_alloc_rss_indir_tbl(struct bnxt *bp) +int bnxt_alloc_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx) { int entries; + u16 *tbl;
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) entries = BNXT_MAX_RSS_TABLE_ENTRIES_P5; @@@ -6093,22 -6035,16 +6093,22 @@@ entries = HW_HASH_INDEX_SIZE;
bp->rss_indir_tbl_entries = entries; - bp->rss_indir_tbl = kmalloc_array(entries, sizeof(*bp->rss_indir_tbl), - GFP_KERNEL); - if (!bp->rss_indir_tbl) + tbl = kmalloc_array(entries, sizeof(*bp->rss_indir_tbl), GFP_KERNEL); + if (!tbl) return -ENOMEM; + + if (rss_ctx) + rss_ctx->rss_indir_tbl = tbl; + else + bp->rss_indir_tbl = tbl; + return 0; }
-static void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp) +void bnxt_set_dflt_rss_indir_tbl(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx) { u16 max_rings, max_entries, pad, i; + u16 *rss_indir_tbl;
if (!bp->rx_nr_rings) return; @@@ -6119,17 -6055,13 +6119,17 @@@ max_rings = bp->rx_nr_rings;
max_entries = bnxt_get_rxfh_indir_size(bp->dev); + if (rss_ctx) + rss_indir_tbl = &rss_ctx->rss_indir_tbl[0]; + else + rss_indir_tbl = &bp->rss_indir_tbl[0];
for (i = 0; i < max_entries; i++) - bp->rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, max_rings); + rss_indir_tbl[i] = ethtool_rxfh_indir_default(i, max_rings);
pad = bp->rss_indir_tbl_entries - max_entries; if (pad) - memset(&bp->rss_indir_tbl[i], 0, pad * sizeof(u16)); + memset(&rss_indir_tbl[i], 0, pad * sizeof(u16)); }
static u16 bnxt_get_max_rss_ring(struct bnxt *bp) @@@ -6185,8 -6117,6 +6185,8 @@@ static void bnxt_fill_hw_rss_tbl_p5(str
if (vnic->flags & BNXT_VNIC_NTUPLE_FLAG) j = ethtool_rxfh_indir_default(i, bp->rx_nr_rings); + else if (vnic->flags & BNXT_VNIC_RSSCTX_FLAG) + j = vnic->rss_ctx->rss_indir_tbl[i]; else j = bp->rss_indir_tbl[i]; rxr = &bp->rx_ring[j]; @@@ -6224,9 -6154,9 +6224,9 @@@ __bnxt_hwrm_vnic_set_rss(struct bnxt *b req->hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr); }
-static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) +static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, struct bnxt_vnic_info *vnic, + bool set_rss) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_rss_cfg_input *req; int rc;
@@@ -6244,9 -6174,9 +6244,9 @@@ return hwrm_req_send(bp, req); }
-static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, u16 vnic_id, bool set_rss) +static int bnxt_hwrm_vnic_set_rss_p5(struct bnxt *bp, + struct bnxt_vnic_info *vnic, bool set_rss) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_rss_cfg_input *req; dma_addr_t ring_tbl_map; u32 i, nr_ctxs; @@@ -6299,8 -6229,9 +6299,8 @@@ static void bnxt_hwrm_update_rss_hash_c hwrm_req_drop(bp, req); }
-static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) +static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_plcmodes_cfg_input *req; int rc;
@@@ -6325,8 -6256,7 +6325,8 @@@ return hwrm_req_send(bp, req); }
-static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id, +static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 ctx_idx) { struct hwrm_vnic_rss_cos_lb_ctx_free_input *req; @@@ -6335,10 -6265,10 +6335,10 @@@ return;
req->rss_cos_lb_ctx_id = - cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx]); + cpu_to_le16(vnic->fw_rss_cos_lb_ctx[ctx_idx]);
hwrm_req_send(bp, req); - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; + vnic->fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; }
static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp) @@@ -6350,14 -6280,13 +6350,14 @@@
for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++) { if (vnic->fw_rss_cos_lb_ctx[j] != INVALID_HW_RING_ID) - bnxt_hwrm_vnic_ctx_free_one(bp, i, j); + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, j); } } bp->rsscos_nr_ctxs = 0; }
-static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx) +static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 ctx_idx) { struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp; struct hwrm_vnic_rss_cos_lb_ctx_alloc_input *req; @@@ -6370,7 -6299,7 +6370,7 @@@ resp = hwrm_req_hold(bp, req); rc = hwrm_req_send(bp, req); if (!rc) - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = + vnic->fw_rss_cos_lb_ctx[ctx_idx] = le16_to_cpu(resp->rss_cos_lb_ctx_id); hwrm_req_drop(bp, req);
@@@ -6384,9 -6313,10 +6384,9 @@@ static u32 bnxt_get_roce_vnic_mode(stru return VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE; }
-int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) +int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic) { struct bnxt_vnic_info *vnic0 = &bp->vnic_info[BNXT_VNIC_DEFAULT]; - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_cfg_input *req; unsigned int ring = 0, grp_idx; u16 def_vlan = 0; @@@ -6434,8 -6364,8 +6434,8 @@@ if (vnic->flags & BNXT_VNIC_RSS_FLAG) ring = 0; else if (vnic->flags & BNXT_VNIC_RFS_FLAG) - ring = vnic_id - 1; - else if ((vnic_id == 1) && BNXT_CHIP_TYPE_NITRO_A0(bp)) + ring = vnic->vnic_id - 1; + else if ((vnic->vnic_id == 1) && BNXT_CHIP_TYPE_NITRO_A0(bp)) ring = bp->rx_nr_rings - 1;
grp_idx = bp->rx_ring[ring].bnapi->index; @@@ -6451,25 -6381,25 +6451,25 @@@ vnic_mru #endif if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan) req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE); - if (!vnic_id && bnxt_ulp_registered(bp->edev)) + if (vnic->vnic_id == BNXT_VNIC_DEFAULT && bnxt_ulp_registered(bp->edev)) req->flags |= cpu_to_le32(bnxt_get_roce_vnic_mode(bp));
return hwrm_req_send(bp, req); }
-static void bnxt_hwrm_vnic_free_one(struct bnxt *bp, u16 vnic_id) +static void bnxt_hwrm_vnic_free_one(struct bnxt *bp, + struct bnxt_vnic_info *vnic) { - if (bp->vnic_info[vnic_id].fw_vnic_id != INVALID_HW_RING_ID) { + if (vnic->fw_vnic_id != INVALID_HW_RING_ID) { struct hwrm_vnic_free_input *req;
if (hwrm_req_init(bp, req, HWRM_VNIC_FREE)) return;
- req->vnic_id = - cpu_to_le32(bp->vnic_info[vnic_id].fw_vnic_id); + req->vnic_id = cpu_to_le32(vnic->fw_vnic_id);
hwrm_req_send(bp, req); - bp->vnic_info[vnic_id].fw_vnic_id = INVALID_HW_RING_ID; + vnic->fw_vnic_id = INVALID_HW_RING_ID; } }
@@@ -6478,14 -6408,15 +6478,14 @@@ static void bnxt_hwrm_vnic_free(struct u16 i;
for (i = 0; i < bp->nr_vnics; i++) - bnxt_hwrm_vnic_free_one(bp, i); + bnxt_hwrm_vnic_free_one(bp, &bp->vnic_info[i]); }
-static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id, - unsigned int start_rx_ring_idx, - unsigned int nr_rings) +int bnxt_hwrm_vnic_alloc(struct bnxt *bp, struct bnxt_vnic_info *vnic, + unsigned int start_rx_ring_idx, + unsigned int nr_rings) { unsigned int i, j, grp_idx, end_idx = start_rx_ring_idx + nr_rings; - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_alloc_output *resp; struct hwrm_vnic_alloc_input *req; int rc; @@@ -6511,7 -6442,7 +6511,7 @@@ vnic_no_ring_grps: for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) vnic->fw_rss_cos_lb_ctx[i] = INVALID_HW_RING_ID; - if (vnic_id == BNXT_VNIC_DEFAULT) + if (vnic->vnic_id == BNXT_VNIC_DEFAULT) req->flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT);
resp = hwrm_req_hold(bp, req); @@@ -7343,7 -7274,17 +7343,7 @@@ static int bnxt_hwrm_reserve_rings(stru
int bnxt_nq_rings_in_use(struct bnxt *bp) { - int cp = bp->cp_nr_rings; - int ulp_msix, ulp_base; - - ulp_msix = bnxt_get_ulp_msix_num(bp); - if (ulp_msix) { - ulp_base = bnxt_get_ulp_msix_base(bp); - cp += ulp_msix; - if ((ulp_base + ulp_msix) > cp) - cp = ulp_base + ulp_msix; - } - return cp; + return bp->cp_nr_rings + bnxt_get_ulp_msix_num(bp); }
static int bnxt_cp_rings_in_use(struct bnxt *bp) @@@ -7359,7 -7300,16 +7359,7 @@@
static int bnxt_get_func_stat_ctxs(struct bnxt *bp) { - int ulp_stat = bnxt_get_ulp_stat_ctxs(bp); - int cp = bp->cp_nr_rings; - - if (!ulp_stat) - return cp; - - if (bnxt_nq_rings_in_use(bp) > cp + bnxt_get_ulp_msix_num(bp)) - return bnxt_get_ulp_msix_base(bp) + ulp_stat; - - return cp + ulp_stat; + return bp->cp_nr_rings + bnxt_get_ulp_stat_ctxs(bp); }
static int bnxt_get_total_rss_ctxs(struct bnxt *bp, struct bnxt_hw_rings *hwr) @@@ -7391,7 -7341,7 +7391,7 @@@ static void bnxt_check_rss_tbl_no_rmgr( if (hw_resc->resv_rx_rings != bp->rx_nr_rings) { hw_resc->resv_rx_rings = bp->rx_nr_rings; if (!netif_is_rxfh_configured(bp->dev)) - bnxt_set_dflt_rss_indir_tbl(bp); + bnxt_set_dflt_rss_indir_tbl(bp, NULL); } }
@@@ -7399,7 -7349,7 +7399,7 @@@ static int bnxt_get_total_vnics(struct { if (bp->flags & BNXT_FLAG_RFS) { if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) - return 2; + return 2 + bp->num_rss_ctx; if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) return rx_rings + 1; } @@@ -7470,27 -7420,14 +7470,27 @@@ static bool bnxt_rings_ok(struct bnxt * static int __bnxt_reserve_rings(struct bnxt *bp) { struct bnxt_hw_rings hwr = {0}; + int cp = bp->cp_nr_rings; int rx_rings, rc; + int ulp_msix = 0; bool sh = false; int tx_cp;
if (!bnxt_need_reserve_rings(bp)) return 0;
- hwr.cp = bnxt_nq_rings_in_use(bp); + if (!bnxt_ulp_registered(bp->edev)) { + ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); + if (!ulp_msix) + bnxt_set_ulp_stat_ctxs(bp, 0); + + if (ulp_msix > bp->ulp_num_msix_want) + ulp_msix = bp->ulp_num_msix_want; + hwr.cp = cp + ulp_msix; + } else { + hwr.cp = bnxt_nq_rings_in_use(bp); + } + hwr.tx = bp->tx_nr_rings; hwr.rx = bp->rx_nr_rings; if (bp->flags & BNXT_FLAG_SHARED_RINGS) @@@ -7560,20 -7497,7 +7560,20 @@@ return -ENOMEM;
if (!netif_is_rxfh_configured(bp->dev)) - bnxt_set_dflt_rss_indir_tbl(bp); + bnxt_set_dflt_rss_indir_tbl(bp, NULL); + + if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) { + int resv_msix, resv_ctx, ulp_ctxs; + struct bnxt_hw_resc *hw_resc; + + hw_resc = &bp->hw_resc; + resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings; + ulp_msix = min_t(int, resv_msix, ulp_msix); + bnxt_set_ulp_msix_num(bp, ulp_msix); + resv_ctx = hw_resc->resv_stat_ctxs - bp->cp_nr_rings; + ulp_ctxs = min(resv_ctx, bnxt_get_ulp_stat_ctxs(bp)); + bnxt_set_ulp_stat_ctxs(bp, ulp_ctxs); + }
return rc; } @@@ -9752,7 -9676,7 +9752,7 @@@ static int bnxt_set_tpa(struct bnxt *bp else if (BNXT_NO_FW_ACCESS(bp)) return 0; for (i = 0; i < bp->nr_vnics; i++) { - rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags); + rc = bnxt_hwrm_vnic_set_tpa(bp, &bp->vnic_info[i], tpa_flags); if (rc) { netdev_err(bp->dev, "hwrm vnic set tpa failure rc for vnic %d: %x\n", i, rc); @@@ -9767,7 -9691,7 +9767,7 @@@ static void bnxt_hwrm_clear_vnic_rss(st int i;
for (i = 0; i < bp->nr_vnics; i++) - bnxt_hwrm_vnic_set_rss(bp, i, false); + bnxt_hwrm_vnic_set_rss(bp, &bp->vnic_info[i], false); }
static void bnxt_clear_vnic(struct bnxt *bp) @@@ -9845,27 -9769,28 +9845,27 @@@ static int bnxt_hwrm_set_cache_line_siz return hwrm_req_send(bp, req); }
-static int __bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) +static int __bnxt_setup_vnic(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; int rc;
if (vnic->flags & BNXT_VNIC_RFS_NEW_RSS_FLAG) goto skip_rss_ctx;
/* allocate context for vnic */ - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 0); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, 0); if (rc) { netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } bp->rsscos_nr_ctxs++;
if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 1); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, 1); if (rc) { netdev_err(bp->dev, "hwrm vnic %d cos ctx alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; } bp->rsscos_nr_ctxs++; @@@ -9873,26 -9798,26 +9873,26 @@@
skip_rss_ctx: /* configure default vnic, ring grp */ - rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); + rc = bnxt_hwrm_vnic_cfg(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; }
/* Enable RSS hashing on vnic */ - rc = bnxt_hwrm_vnic_set_rss(bp, vnic_id, true); + rc = bnxt_hwrm_vnic_set_rss(bp, vnic, true); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); goto vnic_setup_err; }
if (bp->flags & BNXT_FLAG_AGG_RINGS) { - rc = bnxt_hwrm_vnic_set_hds(bp, vnic_id); + rc = bnxt_hwrm_vnic_set_hds(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set hds failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); } }
@@@ -9900,33 -9825,16 +9900,33 @@@ vnic_setup_err return rc; }
-static int __bnxt_setup_vnic_p5(struct bnxt *bp, u16 vnic_id) +int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) +{ + int rc; + + rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic, true); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", + vnic->vnic_id, rc); + return rc; + } + rc = bnxt_hwrm_vnic_cfg(bp, vnic); + if (rc) + netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", + vnic->vnic_id, rc); + return rc; +} + +int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic) { int rc, i, nr_ctxs;
nr_ctxs = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings); for (i = 0; i < nr_ctxs; i++) { - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, i); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic, i); if (rc) { netdev_err(bp->dev, "hwrm vnic %d ctx %d alloc failure rc: %x\n", - vnic_id, i, rc); + vnic->vnic_id, i, rc); break; } bp->rsscos_nr_ctxs++; @@@ -9934,57 -9842,63 +9934,57 @@@ if (i < nr_ctxs) return -ENOMEM;
- rc = bnxt_hwrm_vnic_set_rss_p5(bp, vnic_id, true); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d set rss failure rc: %d\n", - vnic_id, rc); - return rc; - } - rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); - if (rc) { - netdev_err(bp->dev, "hwrm vnic %d cfg failure rc: %x\n", - vnic_id, rc); + rc = bnxt_hwrm_vnic_rss_cfg_p5(bp, vnic); + if (rc) return rc; - } + if (bp->flags & BNXT_FLAG_AGG_RINGS) { - rc = bnxt_hwrm_vnic_set_hds(bp, vnic_id); + rc = bnxt_hwrm_vnic_set_hds(bp, vnic); if (rc) { netdev_err(bp->dev, "hwrm vnic %d set hds failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); } } return rc; }
-static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) +static int bnxt_setup_vnic(struct bnxt *bp, struct bnxt_vnic_info *vnic) { if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) - return __bnxt_setup_vnic_p5(bp, vnic_id); + return __bnxt_setup_vnic_p5(bp, vnic); else - return __bnxt_setup_vnic(bp, vnic_id); + return __bnxt_setup_vnic(bp, vnic); }
-static int bnxt_alloc_and_setup_vnic(struct bnxt *bp, u16 vnic_id, +static int bnxt_alloc_and_setup_vnic(struct bnxt *bp, + struct bnxt_vnic_info *vnic, u16 start_rx_ring_idx, int rx_rings) { int rc;
- rc = bnxt_hwrm_vnic_alloc(bp, vnic_id, start_rx_ring_idx, rx_rings); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, start_rx_ring_idx, rx_rings); if (rc) { netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n", - vnic_id, rc); + vnic->vnic_id, rc); return rc; } - return bnxt_setup_vnic(bp, vnic_id); + return bnxt_setup_vnic(bp, vnic); }
static int bnxt_alloc_rfs_vnics(struct bnxt *bp) { + struct bnxt_vnic_info *vnic; int i, rc = 0;
- if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) - return bnxt_alloc_and_setup_vnic(bp, BNXT_VNIC_NTUPLE, 0, - bp->rx_nr_rings); + if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { + vnic = &bp->vnic_info[BNXT_VNIC_NTUPLE]; + return bnxt_alloc_and_setup_vnic(bp, vnic, 0, bp->rx_nr_rings); + }
if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) return 0;
for (i = 0; i < bp->rx_nr_rings; i++) { - struct bnxt_vnic_info *vnic; u16 vnic_id = i + 1; u16 ring_id = i;
@@@ -9995,104 -9909,12 +9995,104 @@@ vnic->flags |= BNXT_VNIC_RFS_FLAG; if (bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP) vnic->flags |= BNXT_VNIC_RFS_NEW_RSS_FLAG; - if (bnxt_alloc_and_setup_vnic(bp, vnic_id, ring_id, 1)) + if (bnxt_alloc_and_setup_vnic(bp, &bp->vnic_info[vnic_id], ring_id, 1)) break; } return rc; }
+void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, + bool all) +{ + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + struct bnxt_filter_base *usr_fltr, *tmp; + struct bnxt_ntuple_filter *ntp_fltr; + int i; + + bnxt_hwrm_vnic_free_one(bp, &rss_ctx->vnic); + for (i = 0; i < BNXT_MAX_CTX_PER_VNIC; i++) { + if (vnic->fw_rss_cos_lb_ctx[i] != INVALID_HW_RING_ID) + bnxt_hwrm_vnic_ctx_free_one(bp, vnic, i); + } + if (!all) + return; + + list_for_each_entry_safe(usr_fltr, tmp, &bp->usr_fltr_list, list) { + if ((usr_fltr->flags & BNXT_ACT_RSS_CTX) && + usr_fltr->fw_vnic_id == rss_ctx->index) { + ntp_fltr = container_of(usr_fltr, + struct bnxt_ntuple_filter, + base); + bnxt_hwrm_cfa_ntuple_filter_free(bp, ntp_fltr); + bnxt_del_ntp_filter(bp, ntp_fltr); + bnxt_del_one_usr_fltr(bp, usr_fltr); + } + } + + if (vnic->rss_table) + dma_free_coherent(&bp->pdev->dev, vnic->rss_table_size, + vnic->rss_table, + vnic->rss_table_dma_addr); + kfree(rss_ctx->rss_indir_tbl); + list_del(&rss_ctx->list); + bp->num_rss_ctx--; + clear_bit(rss_ctx->index, bp->rss_ctx_bmap); + kfree(rss_ctx); +} + +static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) +{ + bool set_tpa = !!(bp->flags & BNXT_FLAG_TPA); + struct bnxt_rss_ctx *rss_ctx, *tmp; + + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) { + struct bnxt_vnic_info *vnic = &rss_ctx->vnic; + + if (bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings) || + bnxt_hwrm_vnic_set_tpa(bp, vnic, set_tpa) || + __bnxt_setup_vnic_p5(bp, vnic)) { + netdev_err(bp->dev, "Failed to restore RSS ctx %d\n", + rss_ctx->index); + bnxt_del_one_rss_ctx(bp, rss_ctx, true); + } + } +} + +struct bnxt_rss_ctx *bnxt_alloc_rss_ctx(struct bnxt *bp) +{ + struct bnxt_rss_ctx *rss_ctx = NULL; + + rss_ctx = kzalloc(sizeof(*rss_ctx), GFP_KERNEL); + if (rss_ctx) { + rss_ctx->vnic.rss_ctx = rss_ctx; + list_add_tail(&rss_ctx->list, &bp->rss_ctx_list); + bp->num_rss_ctx++; + } + return rss_ctx; +} + +void bnxt_clear_rss_ctxs(struct bnxt *bp, bool all) +{ + struct bnxt_rss_ctx *rss_ctx, *tmp; + + list_for_each_entry_safe(rss_ctx, tmp, &bp->rss_ctx_list, list) + bnxt_del_one_rss_ctx(bp, rss_ctx, all); + + if (all) + bitmap_free(bp->rss_ctx_bmap); +} + +static void bnxt_init_multi_rss_ctx(struct bnxt *bp) +{ + bp->rss_ctx_bmap = bitmap_zalloc(BNXT_RSS_CTX_BMAP_LEN, GFP_KERNEL); + if (bp->rss_ctx_bmap) { + /* burn index 0 since we cannot have context 0 */ + __set_bit(0, bp->rss_ctx_bmap); + INIT_LIST_HEAD(&bp->rss_ctx_list); + bp->rss_cap |= BNXT_RSS_CAP_MULTI_RSS_CTX; + } +} + /* Allow PF, trusted VFs and VFs with default VLAN to be in promiscuous mode */ static bool bnxt_promisc_ok(struct bnxt *bp) { @@@ -10105,17 -9927,16 +10105,17 @@@
static int bnxt_setup_nitroa0_vnic(struct bnxt *bp) { + struct bnxt_vnic_info *vnic = &bp->vnic_info[1]; unsigned int rc = 0;
- rc = bnxt_hwrm_vnic_alloc(bp, 1, bp->rx_nr_rings - 1, 1); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, bp->rx_nr_rings - 1, 1); if (rc) { netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n", rc); return rc; }
- rc = bnxt_hwrm_vnic_cfg(bp, 1); + rc = bnxt_hwrm_vnic_cfg(bp, vnic); if (rc) { netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n", rc); @@@ -10158,7 -9979,7 +10158,7 @@@ static int bnxt_init_chip(struct bnxt * rx_nr_rings--;
/* default vnic 0 */ - rc = bnxt_hwrm_vnic_alloc(bp, BNXT_VNIC_DEFAULT, 0, rx_nr_rings); + rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, rx_nr_rings); if (rc) { netdev_err(bp->dev, "hwrm vnic alloc failure rc: %x\n", rc); goto err_out; @@@ -10167,7 -9988,7 +10167,7 @@@ if (BNXT_VF(bp)) bnxt_hwrm_func_qcfg(bp);
- rc = bnxt_setup_vnic(bp, BNXT_VNIC_DEFAULT); + rc = bnxt_setup_vnic(bp, vnic); if (rc) goto err_out; if (bp->rss_cap & BNXT_RSS_CAP_RSS_HASH_TYPE_DELTA) @@@ -10621,23 -10442,13 +10621,23 @@@ int bnxt_reserve_rings(struct bnxt *bp { bool irq_cleared = false; int tcs = bp->num_tc; + int irqs_required; int rc;
if (!bnxt_need_reserve_rings(bp)) return 0;
- if (irq_re_init && BNXT_NEW_RM(bp) && - bnxt_get_num_msix(bp) != bp->total_irqs) { + if (!bnxt_ulp_registered(bp->edev)) { + int ulp_msix = bnxt_get_avail_msix(bp, bp->ulp_num_msix_want); + + if (ulp_msix > bp->ulp_num_msix_want) + ulp_msix = bp->ulp_num_msix_want; + irqs_required = ulp_msix + bp->cp_nr_rings; + } else { + irqs_required = bnxt_get_num_msix(bp); + } + + if (irq_re_init && BNXT_NEW_RM(bp) && irqs_required != bp->total_irqs) { bnxt_ulp_irq_stop(bp); bnxt_clear_int_mode(bp); irq_cleared = true; @@@ -11863,46 -11674,6 +11863,46 @@@ static void bnxt_cfg_usr_fltrs(struct b bnxt_cfg_one_usr_fltr(bp, usr_fltr); }
+static int bnxt_set_xps_mapping(struct bnxt *bp) +{ + int numa_node = dev_to_node(&bp->pdev->dev); + unsigned int q_idx, map_idx, cpu, i; + const struct cpumask *cpu_mask_ptr; + int nr_cpus = num_online_cpus(); + cpumask_t *q_map; + int rc = 0; + + q_map = kcalloc(bp->tx_nr_rings_per_tc, sizeof(*q_map), GFP_KERNEL); + if (!q_map) + return -ENOMEM; + + /* Create CPU mask for all TX queues across MQPRIO traffic classes. + * Each TC has the same number of TX queues. The nth TX queue for each + * TC will have the same CPU mask. + */ + for (i = 0; i < nr_cpus; i++) { + map_idx = i % bp->tx_nr_rings_per_tc; + cpu = cpumask_local_spread(i, numa_node); + cpu_mask_ptr = get_cpu_mask(cpu); + cpumask_or(&q_map[map_idx], &q_map[map_idx], cpu_mask_ptr); + } + + /* Register CPU mask for each TX queue except the ones marked for XDP */ + for (q_idx = 0; q_idx < bp->dev->real_num_tx_queues; q_idx++) { + map_idx = q_idx % bp->tx_nr_rings_per_tc; + rc = netif_set_xps_queue(bp->dev, &q_map[map_idx], q_idx); + if (rc) { + netdev_warn(bp->dev, "Error setting XPS for q:%d\n", + q_idx); + break; + } + } + + kfree(q_map); + + return rc; +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@@ -11965,12 -11736,8 +11965,12 @@@ } }
- if (irq_re_init) + if (irq_re_init) { udp_tunnel_nic_reset_ntf(bp->dev); + rc = bnxt_set_xps_mapping(bp); + if (rc) + netdev_warn(bp->dev, "failed to set xps mapping\n"); + }
if (bp->tx_nr_rings_xdp < num_possible_cpus()) { if (!static_key_enabled(&bnxt_xdp_locking_key)) @@@ -11991,10 -11758,10 +11991,12 @@@ /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); + if (bp->ptp_cfg) + atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_hwrm_realloc_rss_ctx_vnic(bp); bnxt_cfg_usr_fltrs(bp); return 0;
@@@ -12143,8 -11910,6 +12145,8 @@@ static void __bnxt_close_nic(struct bnx while (bnxt_drv_busy(bp)) msleep(20);
+ if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, false); /* Flush rings and disable interrupts */ bnxt_shutdown_nic(bp, irq_re_init);
@@@ -12642,26 -12407,33 +12644,26 @@@ static bool bnxt_rfs_supported(struct b }
/* If runtime conditions support RFS */ -static bool bnxt_rfs_capable(struct bnxt *bp) +bool bnxt_rfs_capable(struct bnxt *bp, bool new_rss_ctx) { struct bnxt_hw_rings hwr = {0}; int max_vnics, max_rss_ctxs;
- hwr.rss_ctx = 1; - if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) { - /* 2 VNICS: default + Ntuple */ - hwr.vnic = 2; - hwr.rss_ctx = bnxt_get_nr_rss_ctxs(bp, bp->rx_nr_rings) * - hwr.vnic; - goto check_reserve_vnic; - } - if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) + if ((bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && + !BNXT_SUPPORTS_NTUPLE_VNIC(bp)) return bnxt_rfs_supported(bp); + if (!(bp->flags & BNXT_FLAG_MSIX_CAP) || !bnxt_can_reserve_rings(bp) || !bp->rx_nr_rings) return false;
- hwr.vnic = 1 + bp->rx_nr_rings; -check_reserve_vnic: + hwr.grp = bp->rx_nr_rings; + hwr.vnic = bnxt_get_total_vnics(bp, bp->rx_nr_rings); + if (new_rss_ctx) + hwr.vnic++; + hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); max_vnics = bnxt_get_max_func_vnics(bp); max_rss_ctxs = bnxt_get_max_func_rss_ctxs(bp);
- if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && - !(bp->rss_cap & BNXT_RSS_CAP_NEW_RSS_CAP)) - hwr.rss_ctx = hwr.vnic; - if (hwr.vnic > max_vnics || hwr.rss_ctx > max_rss_ctxs) { if (bp->rx_nr_rings > 1) netdev_warn(bp->dev, @@@ -12695,7 -12467,7 +12697,7 @@@ static netdev_features_t bnxt_fix_featu struct bnxt *bp = netdev_priv(dev); netdev_features_t vlan_features;
- if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp)) + if ((features & NETIF_F_NTUPLE) && !bnxt_rfs_capable(bp, false)) features &= ~NETIF_F_NTUPLE;
if ((bp->flags & BNXT_FLAG_NO_AGG_RINGS) || bp->xdp_prog) @@@ -13647,8 -13419,8 +13649,8 @@@ int bnxt_check_rings(struct bnxt *bp, i return -ENOMEM; hwr.stat = hwr.cp; if (BNXT_NEW_RM(bp)) { - hwr.cp += bnxt_get_ulp_msix_num(bp); - hwr.stat += bnxt_get_ulp_stat_ctxs(bp); + hwr.cp += bnxt_get_ulp_msix_num_in_use(bp); + hwr.stat += bnxt_get_ulp_stat_ctxs_in_use(bp); hwr.grp = rx; hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); } @@@ -13831,7 -13603,7 +13833,7 @@@ static void bnxt_set_dflt_rfs(struct bn bp->flags &= ~BNXT_FLAG_RFS; if (bnxt_rfs_supported(bp)) { dev->hw_features |= NETIF_F_NTUPLE; - if (bnxt_rfs_capable(bp)) { + if (bnxt_rfs_capable(bp, false)) { bp->flags |= BNXT_FLAG_RFS; dev->features |= NETIF_F_NTUPLE; } @@@ -14684,9 -14456,12 +14686,9 @@@ static int bnxt_bridge_setlink(struct n if (!br_spec) return -EINVAL;
- nla_for_each_nested(attr, br_spec, rem) { + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { u16 mode;
- if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - mode = nla_get_u16(attr); if (mode == bp->br_mode) break; @@@ -14822,17 -14597,12 +14824,17 @@@ static void bnxt_remove_one(struct pci_ if (BNXT_PF(bp)) bnxt_sriov_disable(bp);
- bnxt_rdma_aux_device_uninit(bp); + bnxt_rdma_aux_device_del(bp);
bnxt_ptp_clear(bp); unregister_netdev(dev); + + bnxt_rdma_aux_device_uninit(bp); + bnxt_free_l2_filters(bp, true); bnxt_free_ntp_fltrs(bp, true); + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, true); clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); /* Flush any pending tasks */ cancel_work_sync(&bp->sp_task); @@@ -14921,9 -14691,8 +14923,9 @@@ static void _bnxt_get_max_rings(struct *max_rx = hw_resc->max_rx_rings; *max_cp = bnxt_get_max_func_cp_rings_for_en(bp); max_irq = min_t(int, bnxt_get_max_func_irqs(bp) - - bnxt_get_ulp_msix_num(bp), - hw_resc->max_stat_ctxs - bnxt_get_ulp_stat_ctxs(bp)); + bnxt_get_ulp_msix_num_in_use(bp), + hw_resc->max_stat_ctxs - + bnxt_get_ulp_stat_ctxs_in_use(bp)); if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) *max_cp = min_t(int, *max_cp, max_irq); max_ring_grps = hw_resc->max_hw_ring_grps; @@@ -15019,7 -14788,6 +15021,7 @@@ static void bnxt_trim_dflt_sh_rings(str static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) { int dflt_rings, max_rx_rings, max_tx_rings, rc; + int avail_msix;
if (!bnxt_can_reserve_rings(bp)) return 0; @@@ -15047,14 -14815,6 +15049,14 @@@ bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; bp->tx_nr_rings = bp->tx_nr_rings_per_tc;
+ avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings; + if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) { + int ulp_num_msix = min(avail_msix, bp->ulp_num_msix_want); + + bnxt_set_ulp_msix_num(bp, ulp_num_msix); + bnxt_set_dflt_ulp_stat_ctxs(bp); + } + rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "Unable to reserve tx rings\n"); @@@ -15301,7 -15061,7 +15303,7 @@@ static int bnxt_init_one(struct pci_de bp->flags |= BNXT_FLAG_CHIP_P7; }
- rc = bnxt_alloc_rss_indir_tbl(bp); + rc = bnxt_alloc_rss_indir_tbl(bp, NULL); if (rc) goto init_err_pci_clean;
@@@ -15404,7 -15164,6 +15406,7 @@@ bnxt_set_rx_skb_mode(bp, false); bnxt_set_tpa_flags(bp); bnxt_set_ring_params(bp); + bnxt_rdma_aux_device_init(bp); rc = bnxt_set_dflt_rings(bp, true); if (rc) { if (BNXT_VF(bp) && rc == -ENODEV) { @@@ -15455,17 -15214,13 +15457,17 @@@
INIT_LIST_HEAD(&bp->usr_fltr_list);
+ if (BNXT_SUPPORTS_NTUPLE_VNIC(bp)) + bnxt_init_multi_rss_ctx(bp); + + rc = register_netdev(dev); if (rc) goto init_err_cleanup;
bnxt_dl_fw_reporters_create(bp);
- bnxt_rdma_aux_device_init(bp); + bnxt_rdma_aux_device_add(bp);
bnxt_print_device_info(bp);
@@@ -15473,15 -15228,12 +15475,15 @@@
return 0; init_err_cleanup: + bnxt_rdma_aux_device_uninit(bp); bnxt_dl_unregister(bp); init_err_dl: bnxt_shutdown_tc(bp); bnxt_clear_int_mode(bp);
init_err_pci_clean: + if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) + bnxt_clear_rss_ctxs(bp, true); bnxt_hwrm_func_drv_unrgtr(bp); bnxt_free_hwrm_resources(bp); bnxt_hwmon_uninit(bp); @@@ -15669,10 -15421,6 +15671,10 @@@ static pci_ers_result_t bnxt_io_slot_re
netdev_info(bp->dev, "PCI Slot Reset\n");
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS) && + test_bit(BNXT_STATE_PCI_CHANNEL_IO_FROZEN, &bp->state)) + msleep(900); + rtnl_lock();
if (pci_enable_device(pdev)) { diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index edb10aebd0958,195c02dc06830..d8927838f1cfb --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@@ -31,74 -31,21 +31,74 @@@ static DEFINE_IDA(bnxt_aux_dev_ids) static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent) { struct bnxt_en_dev *edev = bp->edev; - int num_msix, idx, i; + int num_msix, i;
if (!edev->ulp_tbl->msix_requested) { netdev_warn(bp->dev, "Requested MSI-X vectors insufficient\n"); return; } num_msix = edev->ulp_tbl->msix_requested; - idx = edev->ulp_tbl->msix_base; for (i = 0; i < num_msix; i++) { - ent[i].vector = bp->irq_tbl[idx + i].vector; - ent[i].ring_idx = idx + i; + ent[i].vector = bp->irq_tbl[i].vector; + ent[i].ring_idx = i; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) ent[i].db_offset = bp->db_offset; else - ent[i].db_offset = (idx + i) * 0x80; + ent[i].db_offset = i * 0x80; + } +} + +int bnxt_get_ulp_msix_num(struct bnxt *bp) +{ + if (bp->edev) + return bp->edev->ulp_num_msix_vec; + return 0; +} + +void bnxt_set_ulp_msix_num(struct bnxt *bp, int num) +{ + if (bp->edev) + bp->edev->ulp_num_msix_vec = num; +} + +int bnxt_get_ulp_msix_num_in_use(struct bnxt *bp) +{ + if (bnxt_ulp_registered(bp->edev)) + return bp->edev->ulp_num_msix_vec; + return 0; +} + +int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) +{ + if (bp->edev) + return bp->edev->ulp_num_ctxs; + return 0; +} + +void bnxt_set_ulp_stat_ctxs(struct bnxt *bp, int num_ulp_ctx) +{ + if (bp->edev) + bp->edev->ulp_num_ctxs = num_ulp_ctx; +} + +int bnxt_get_ulp_stat_ctxs_in_use(struct bnxt *bp) +{ + if (bnxt_ulp_registered(bp->edev)) + return bp->edev->ulp_num_ctxs; + return 0; +} + +void bnxt_set_dflt_ulp_stat_ctxs(struct bnxt *bp) +{ + if (bp->edev) { + bp->edev->ulp_num_ctxs = BNXT_MIN_ROCE_STAT_CTXS; + /* Reserve one additional stat_ctx for PF0 (except + * on 1-port NICs) as it also creates one stat_ctx + * for PF1 in case of RoCE bonding. + */ + if (BNXT_PF(bp) && !bp->pf.port_id && + bp->port_count > 1) + bp->edev->ulp_num_ctxs++; } }
@@@ -110,34 -57,25 +110,34 @@@ int bnxt_register_dev(struct bnxt_en_de struct bnxt *bp = netdev_priv(dev); unsigned int max_stat_ctxs; struct bnxt_ulp *ulp; + int rc = 0;
+ rtnl_lock(); + if (!bp->irq_tbl) { + rc = -ENODEV; + goto exit; + } max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp); if (max_stat_ctxs <= BNXT_MIN_ROCE_STAT_CTXS || - bp->cp_nr_rings == max_stat_ctxs) - return -ENOMEM; + bp->cp_nr_rings == max_stat_ctxs) { + rc = -ENOMEM; + goto exit; + }
ulp = edev->ulp_tbl; - if (!ulp) - return -ENOMEM; - ulp->handle = handle; rcu_assign_pointer(ulp->ulp_ops, ulp_ops);
if (test_bit(BNXT_STATE_OPEN, &bp->state)) - bnxt_hwrm_vnic_cfg(bp, 0); + bnxt_hwrm_vnic_cfg(bp, &bp->vnic_info[BNXT_VNIC_DEFAULT]); + + edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp);
bnxt_fill_msix_vecs(bp, bp->edev->msix_entries); edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED; - return 0; +exit: + rtnl_unlock(); + return rc; } EXPORT_SYMBOL(bnxt_register_dev);
@@@ -149,10 -87,8 +149,10 @@@ void bnxt_unregister_dev(struct bnxt_en int i = 0;
ulp = edev->ulp_tbl; + rtnl_lock(); if (ulp->msix_requested) edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED; + edev->ulp_tbl->msix_requested = 0;
if (ulp->max_async_event_id) bnxt_hwrm_func_drv_rgtr(bp, NULL, 0, true); @@@ -165,12 -101,11 +165,12 @@@ msleep(100); i++; } + rtnl_unlock(); return; } EXPORT_SYMBOL(bnxt_unregister_dev);
-int bnxt_get_ulp_msix_num(struct bnxt *bp) +static int bnxt_set_dflt_ulp_msix(struct bnxt *bp) { u32 roce_msix = BNXT_VF(bp) ? BNXT_MAX_VF_ROCE_MSIX : BNXT_MAX_ROCE_MSIX; @@@ -179,6 -114,29 +179,6 @@@ min_t(u32, roce_msix, num_online_cpus()) : 0); }
-int bnxt_get_ulp_msix_base(struct bnxt *bp) -{ - if (bnxt_ulp_registered(bp->edev)) { - struct bnxt_en_dev *edev = bp->edev; - - if (edev->ulp_tbl->msix_requested) - return edev->ulp_tbl->msix_base; - } - return 0; -} - -int bnxt_get_ulp_stat_ctxs(struct bnxt *bp) -{ - if (bnxt_ulp_registered(bp->edev)) { - struct bnxt_en_dev *edev = bp->edev; - - if (edev->ulp_tbl->msix_requested) - return BNXT_MIN_ROCE_STAT_CTXS; - } - - return 0; -} - int bnxt_send_msg(struct bnxt_en_dev *edev, struct bnxt_fw_msg *fw_msg) { @@@ -252,6 -210,9 +252,9 @@@ void bnxt_ulp_start(struct bnxt *bp, in if (err) return;
+ if (edev->ulp_tbl->msix_requested) + bnxt_fill_msix_vecs(bp, edev->msix_entries); + if (aux_priv) { struct auxiliary_device *adev;
@@@ -348,6 -309,7 +351,6 @@@ void bnxt_rdma_aux_device_uninit(struc
aux_priv = bp->aux_priv; adev = &aux_priv->aux_dev; - auxiliary_device_delete(adev); auxiliary_device_uninit(adev); }
@@@ -365,14 -327,6 +368,14 @@@ static void bnxt_aux_dev_release(struc bp->aux_priv = NULL; }
+void bnxt_rdma_aux_device_del(struct bnxt *bp) +{ + if (!bp->edev) + return; + + auxiliary_device_delete(&bp->aux_priv->aux_dev); +} + static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp) { edev->net = bp->dev; @@@ -393,23 -347,7 +396,23 @@@ edev->pf_port_id = bp->pf.port_id; edev->en_state = bp->state; edev->bar0 = bp->bar0; - edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp); +} + +void bnxt_rdma_aux_device_add(struct bnxt *bp) +{ + struct auxiliary_device *aux_dev; + int rc; + + if (!bp->edev) + return; + + aux_dev = &bp->aux_priv->aux_dev; + rc = auxiliary_device_add(aux_dev); + if (rc) { + netdev_warn(bp->dev, "Failed to add auxiliary device for ROCE\n"); + auxiliary_device_uninit(aux_dev); + bp->flags &= ~BNXT_FLAG_ROCE_CAP; + } }
void bnxt_rdma_aux_device_init(struct bnxt *bp) @@@ -457,15 -395,22 +460,16 @@@ if (!edev) goto aux_dev_uninit;
+ aux_priv->edev = edev; + ulp = kzalloc(sizeof(*ulp), GFP_KERNEL); if (!ulp) goto aux_dev_uninit;
edev->ulp_tbl = ulp; - aux_priv->edev = edev; bp->edev = edev; bnxt_set_edev_info(edev, bp); - - rc = auxiliary_device_add(aux_dev); - if (rc) { - netdev_warn(bp->dev, - "Failed to add auxiliary device for ROCE\n"); - goto aux_dev_uninit; - } + bp->ulp_num_msix_want = bnxt_set_dflt_ulp_msix(bp);
return;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index a4fe164a1c4c2,8f101181648c6..2261a2b854dcb --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@@ -219,13 -219,6 +219,13 @@@ void mlx5e_build_ptys2ethtool_map(void ETHTOOL_LINK_MODE_400000baseLR4_ER4_FR4_Full_BIT, ETHTOOL_LINK_MODE_400000baseDR4_Full_BIT, ETHTOOL_LINK_MODE_400000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_8_800GBASE_CR8_KR8, ext, + ETHTOOL_LINK_MODE_800000baseCR8_Full_BIT, + ETHTOOL_LINK_MODE_800000baseKR8_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR8_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT, + ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT, + ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT); }
static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, @@@ -276,7 -269,8 +276,7 @@@ void mlx5e_ethtool_get_strings(struct m switch (stringset) { case ETH_SS_PRIV_FLAGS: for (i = 0; i < MLX5E_NUM_PFLAGS; i++) - strcpy(data + i * ETH_GSTRING_LEN, - mlx5e_priv_flags[i].name); + ethtool_puts(&data, mlx5e_priv_flags[i].name); break;
case ETH_SS_TEST: @@@ -457,6 -451,34 +457,34 @@@ int mlx5e_ethtool_set_channels(struct m
mutex_lock(&priv->state_lock);
+ if (mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc == ETH_RSS_HASH_XOR) { + unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8(); + + if (count > xor8_max_channels) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: Requested number of channels (%d) exceeds the maximum allowed by the XOR8 RSS hfunc (%d)\n", + __func__, count, xor8_max_channels); + goto out; + } + } + + /* If RXFH is configured, changing the channels number is allowed only if + * it does not require resizing the RSS table. This is because the previous + * configuration may no longer be compatible with the new RSS table. + */ + if (netif_is_rxfh_configured(priv->netdev)) { + int cur_rqt_size = mlx5e_rqt_size(priv->mdev, cur_params->num_channels); + int new_rqt_size = mlx5e_rqt_size(priv->mdev, count); + + if (new_rqt_size != cur_rqt_size) { + err = -EINVAL; + netdev_err(priv->netdev, + "%s: RXFH is configured, block changing channels number that affects RSS table size (new: %d, current: %d)\n", + __func__, new_rqt_size, cur_rqt_size); + goto out; + } + } + /* Don't allow changing the number of channels if HTB offload is active, * because the numeration of the QoS SQs will change, while per-queue * qdiscs are attached. @@@ -996,8 -1018,8 +1024,8 @@@ static void get_lp_advertising(struct m ptys2ethtool_adver_link(lp_advertising, eth_proto_lp, ext); }
-int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, - struct ethtool_link_ksettings *link_ksettings) +static int mlx5e_ethtool_get_link_ksettings(struct mlx5e_priv *priv, + struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {}; @@@ -1167,8 -1189,8 +1195,8 @@@ static bool ext_requested(u8 autoneg, c return autoneg == AUTONEG_ENABLE ? ext_link_mode : ext_supported; }
-int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, - const struct ethtool_link_ksettings *link_ksettings) +static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, + const struct ethtool_link_ksettings *link_ksettings) { struct mlx5_core_dev *mdev = priv->mdev; struct mlx5_port_eth_proto eproto; @@@ -1268,7 -1290,7 +1296,7 @@@ static u32 mlx5e_get_rxfh_indir_size(st return mlx5e_ethtool_get_rxfh_indir_size(priv); }
-int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) +static int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param *rxfh) { struct mlx5e_priv *priv = netdev_priv(netdev); u32 rss_context = rxfh->rss_context; @@@ -1281,23 -1303,36 +1309,36 @@@ return err; }
-int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, - struct netlink_ext_ack *extack) +static int mlx5e_set_rxfh(struct net_device *dev, struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { struct mlx5e_priv *priv = netdev_priv(dev); u32 *rss_context = &rxfh->rss_context; u8 hfunc = rxfh->hfunc; + unsigned int count; int err;
mutex_lock(&priv->state_lock); + + count = priv->channels.params.num_channels; + + if (hfunc == ETH_RSS_HASH_XOR) { + unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8(); + + if (count > xor8_max_channels) { + err = -EINVAL; + netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n", + __func__, count, xor8_max_channels); + goto unlock; + } + } + if (*rss_context && rxfh->rss_delete) { err = mlx5e_rx_res_rss_destroy(priv->rx_res, *rss_context); goto unlock; }
if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { - unsigned int count = priv->channels.params.num_channels; - err = mlx5e_rx_res_rss_init(priv->rx_res, rss_context, count); if (err) goto unlock; @@@ -1411,8 -1446,8 +1452,8 @@@ static void mlx5e_get_pause_stats(struc mlx5e_stats_pause_get(priv, pause_stats); }
-void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, - struct ethtool_pauseparam *pauseparam) +static void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { struct mlx5_core_dev *mdev = priv->mdev; int err; @@@ -1433,8 -1468,8 +1474,8 @@@ static void mlx5e_get_pauseparam(struc mlx5e_ethtool_get_pauseparam(priv, pauseparam); }
-int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, - struct ethtool_pauseparam *pauseparam) +static int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, + struct ethtool_pauseparam *pauseparam) { struct mlx5_core_dev *mdev = priv->mdev; int err; @@@ -2387,14 -2422,6 +2428,14 @@@ static void mlx5e_get_rmon_stats(struc mlx5e_stats_rmon_get(priv, rmon_stats, ranges); }
+static void mlx5e_get_ts_stats(struct net_device *netdev, + struct ethtool_ts_stats *ts_stats) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_stats_ts_get(priv, ts_stats); +} + const struct ethtool_ops mlx5e_ethtool_ops = { .cap_rss_ctx_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | @@@ -2444,6 -2471,5 +2485,6 @@@ .get_eth_mac_stats = mlx5e_get_eth_mac_stats, .get_eth_ctrl_stats = mlx5e_get_eth_ctrl_stats, .get_rmon_stats = mlx5e_get_rmon_stats, + .get_ts_stats = mlx5e_get_ts_stats, .get_link_ext_stats = mlx5e_get_link_ext_stats }; diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a0d3af96dcb16,b375ef268671a..dec60a6836dbb --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -4950,7 -4950,10 +4950,7 @@@ static int mlx5e_bridge_setlink(struct if (!br_spec) return -EINVAL;
- nla_for_each_nested(attr, br_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_MODE) - continue; - + nla_for_each_nested_type(attr, IFLA_BRIDGE_MODE, br_spec, rem) { mode = nla_get_u16(attr); if (mode > BRIDGE_MODE_VEPA) return -EINVAL; @@@ -5562,7 -5565,7 +5562,7 @@@ static void mlx5e_nic_disable(struct ml mlx5e_ipsec_cleanup(priv); }
-int mlx5e_update_nic_rx(struct mlx5e_priv *priv) +static int mlx5e_update_nic_rx(struct mlx5e_priv *priv) { return mlx5e_refresh_tirs(priv, false, false); } @@@ -5723,9 -5726,7 +5723,7 @@@ void mlx5e_priv_cleanup(struct mlx5e_pr kfree(priv->tx_rates); kfree(priv->txq2sq); destroy_workqueue(priv->wq); - mutex_lock(&priv->state_lock); mlx5e_selq_cleanup(&priv->selq); - mutex_unlock(&priv->state_lock); free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb_max_qos_sqs; i++) diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 09a5929091015,e21a3b4128ce8..099bf10788899 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@@ -398,6 -398,8 +398,8 @@@ mlx5e_txwqe_complete(struct mlx5e_txqs (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) { u8 metadata_index = be32_to_cpu(eseg->flow_table_metadata);
+ mlx5e_ptp_metadata_fifo_pop(&sq->ptpsq->metadata_freelist); + mlx5e_skb_cb_hwtstamp_init(skb); mlx5e_ptp_metadata_map_put(&sq->ptpsq->metadata_map, skb, metadata_index); @@@ -496,9 -498,6 +498,6 @@@ mlx5e_sq_xmit_wqe(struct mlx5e_txqsq *s
err_drop: stats->dropped++; - if (unlikely(sq->ptpsq && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))) - mlx5e_ptp_metadata_fifo_push(&sq->ptpsq->metadata_freelist, - be32_to_cpu(eseg->flow_table_metadata)); dev_kfree_skb_any(skb); mlx5e_tx_flush(sq); } @@@ -657,7 -656,7 +656,7 @@@ static void mlx5e_cqe_ts_id_eseg(struc { if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) eseg->flow_table_metadata = - cpu_to_be32(mlx5e_ptp_metadata_fifo_pop(&ptpsq->metadata_freelist)); + cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist)); }
static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, @@@ -750,13 -749,11 +749,13 @@@ static void mlx5e_consume_skb(struct ml u64 ts = get_cqe_ts(cqe);
hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts); - if (sq->ptpsq) + if (sq->ptpsq) { mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP, hwts.hwtstamp, sq->ptpsq->cq_stats); - else + } else { skb_tstamp_tx(skb, &hwts); + sq->stats->timestamps++; + } }
napi_consume_skb(skb, napi_budget); diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 20927f65ac2c6,1f60954c12f72..8a359d283bb23 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@@ -43,6 -43,7 +43,7 @@@ #include "rdma.h" #include "en.h" #include "fs_core.h" + #include "lib/mlx5.h" #include "lib/devcom.h" #include "lib/eq.h" #include "lib/fs_chains.h" @@@ -66,8 -67,6 +67,8 @@@
#define MLX5_ESW_FT_OFFLOADS_DROP_RULE (1)
+#define MLX5_ESW_MAX_CTRL_EQS 4 + static struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = { .max_fte = MLX5_ESW_VPORT_TBL_SIZE, .max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS, @@@ -2478,6 -2477,10 +2479,10 @@@ int esw_offloads_init(struct mlx5_eswit if (err) return err;
+ if (MLX5_ESWITCH_MANAGER(esw->dev) && + mlx5_esw_vport_match_metadata_supported(esw)) + esw->flags |= MLX5_ESWITCH_VPORT_MATCH_METADATA; + err = devl_params_register(priv_to_devlink(esw->dev), esw_devlink_params, ARRAY_SIZE(esw_devlink_params)); @@@ -3709,6 -3712,12 +3714,12 @@@ int mlx5_devlink_eswitch_mode_set(struc if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL;
+ if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && mlx5_get_sd(esw->dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change E-Switch mode to switchdev when multi-PF netdev (Socket Direct) is configured."); + return -EPERM; + } + mlx5_lag_disable_change(esw->dev); err = mlx5_esw_try_lock(esw); if (err < 0) { @@@ -4559,98 -4568,3 +4570,98 @@@ unlock return err; } #endif /* CONFIG_XFRM_OFFLOAD */ + +int +mlx5_devlink_port_fn_max_io_eqs_get(struct devlink_port *port, u32 *max_io_eqs, + struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + u16 vport_num = vport->vport; + struct mlx5_eswitch *esw; + void *query_ctx; + void *hca_caps; + u32 max_eqs; + int err; + + esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support VHCA management"); + return -EOPNOTSUPP; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + mutex_lock(&esw->state_lock); + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, + MLX5_CAP_GENERAL); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); + goto out; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + max_eqs = MLX5_GET(cmd_hca_cap, hca_caps, max_num_eqs); + if (max_eqs < MLX5_ESW_MAX_CTRL_EQS) + *max_io_eqs = 0; + else + *max_io_eqs = max_eqs - MLX5_ESW_MAX_CTRL_EQS; +out: + mutex_unlock(&esw->state_lock); + kfree(query_ctx); + return err; +} + +int +mlx5_devlink_port_fn_max_io_eqs_set(struct devlink_port *port, u32 max_io_eqs, + struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = mlx5_devlink_port_vport_get(port); + int query_out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out); + u16 vport_num = vport->vport; + struct mlx5_eswitch *esw; + void *query_ctx; + void *hca_caps; + u16 max_eqs; + int err; + + esw = mlx5_devlink_eswitch_nocheck_get(port->devlink); + if (!MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { + NL_SET_ERR_MSG_MOD(extack, + "Device doesn't support VHCA management"); + return -EOPNOTSUPP; + } + + if (check_add_overflow(max_io_eqs, MLX5_ESW_MAX_CTRL_EQS, &max_eqs)) { + NL_SET_ERR_MSG_MOD(extack, "Supplied value out of range"); + return -EINVAL; + } + + query_ctx = kzalloc(query_out_sz, GFP_KERNEL); + if (!query_ctx) + return -ENOMEM; + + mutex_lock(&esw->state_lock); + err = mlx5_vport_get_other_func_cap(esw->dev, vport_num, query_ctx, + MLX5_CAP_GENERAL); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed getting HCA caps"); + goto out; + } + + hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); + MLX5_SET(cmd_hca_cap, hca_caps, max_num_eqs, max_eqs); + + err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport_num, + MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE); + if (err) + NL_SET_ERR_MSG_MOD(extack, "Failed setting HCA caps"); + +out: + mutex_unlock(&esw->state_lock); + kfree(query_ctx); + return err; +} diff --combined drivers/net/ethernet/realtek/r8169_main.c index 60540ca634646,0fc5fe564ae50..fab21d2bc4ffa --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@@ -647,6 -647,8 +647,8 @@@ struct rtl8169_private const char *fw_name; struct rtl_fw *rtl_fw;
+ struct r8169_led_classdev *leds; + u32 ocp_base; };
@@@ -2227,8 -2229,6 +2229,8 @@@ static enum mac_version rtl8169_get_mac * the wild. Let's disable detection. * { 0x7cf, 0x540, RTL_GIGA_MAC_VER_45 }, */ + /* Realtek calls it RTL8168M, but it's handled like RTL8168H */ + { 0x7cf, 0x6c0, RTL_GIGA_MAC_VER_46 },
/* 8168G family. */ { 0x7cf, 0x5c8, RTL_GIGA_MAC_VER_44 }, @@@ -5046,6 -5046,9 +5048,9 @@@ static void rtl_remove_one(struct pci_d
cancel_work_sync(&tp->wk.work);
+ if (IS_ENABLED(CONFIG_R8169_LEDS)) + r8169_remove_leds(tp->leds); + unregister_netdev(tp->dev);
if (tp->dash_type != RTL_DASH_NONE) @@@ -5503,9 -5506,9 +5508,9 @@@ static int rtl_init_one(struct pci_dev
if (IS_ENABLED(CONFIG_R8169_LEDS)) { if (rtl_is_8125(tp)) - rtl8125_init_leds(dev); + tp->leds = rtl8125_init_leds(dev); else if (tp->mac_version > RTL_GIGA_MAC_VER_06) - rtl8168_init_leds(dev); + tp->leds = rtl8168_init_leds(dev); }
netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n", diff --combined drivers/net/geneve.c index 163f94a5a58f3,6c2835086b57e..f918ca6146c82 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@@ -225,11 -225,10 +225,11 @@@ static void geneve_rx(struct geneve_de void *oiph;
if (ip_tunnel_collect_metadata() || gs->collect_md) { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags) = { };
- flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) | - (gnvh->critical ? TUNNEL_CRIT_OPT : 0); + __set_bit(IP_TUNNEL_KEY_BIT, flags); + __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam); + __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical);
tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags, vni_to_tunnel_id(gnvh->vni), @@@ -239,11 -238,9 +239,11 @@@ goto drop; } /* Update tunnel dst according to Geneve options. */ + ip_tunnel_flags_zero(flags); + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags); ip_tunnel_info_opts_set(&tun_dst->u.tun_info, gnvh->options, gnvh->opt_len * 4, - TUNNEL_GENEVE_OPT); + flags); } else { /* Drop packets w/ critical options, * since we don't support any... @@@ -748,15 -745,14 +748,15 @@@ static void geneve_build_header(struct { geneveh->ver = GENEVE_VER; geneveh->opt_len = info->options_len / 4; - geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM); - geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT); + geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags); + geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT, + info->key.tun_flags); geneveh->rsvd1 = 0; tunnel_id_to_vni(info->key.tun_id, geneveh->vni); geneveh->proto_type = inner_proto; geneveh->rsvd2 = 0;
- if (info->key.tun_flags & TUNNEL_GENEVE_OPT) + if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags)) ip_tunnel_info_opts_get(geneveh->options, info); }
@@@ -765,7 -761,7 +765,7 @@@ static int geneve_build_skb(struct dst_ bool xnet, int ip_hdr_len, bool inner_proto_inherit) { - bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); struct genevehdr *gnvh; __be16 inner_proto; int min_headroom; @@@ -826,7 -822,7 +826,7 @@@ static int geneve_xmit_skb(struct sk_bu __be16 sport; int err;
- if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL;
if (!gs4) @@@ -882,8 -878,7 +882,8 @@@ if (geneve->cfg.collect_md) { ttl = key->ttl;
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ? + htons(IP_DF) : 0; } else { if (geneve->cfg.ttl_inherit) ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb); @@@ -915,8 -910,7 +915,8 @@@ udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst, tos, ttl, df, sport, geneve->cfg.info.key.tp_dst, !net_eq(geneve->net, dev_net(geneve->dev)), - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; }
@@@ -935,7 -929,7 +935,7 @@@ static int geneve6_xmit_skb(struct sk_b __be16 sport; int err;
- if (!pskb_inet_may_pull(skb)) + if (!skb_vlan_inet_prepare(skb)) return -EINVAL;
if (!gs6) @@@ -1004,8 -998,7 +1004,8 @@@ udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev, &saddr, &key->u.ipv6.dst, prio, ttl, info->key.label, sport, geneve->cfg.info.key.tp_dst, - !(info->key.tun_flags & TUNNEL_CSUM)); + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags)); return 0; } #endif @@@ -1304,8 -1297,7 +1304,8 @@@ static struct geneve_dev *geneve_find_d
static bool is_tnl_info_zero(const struct ip_tunnel_info *info) { - return !(info->key.tun_id || info->key.tun_flags || info->key.tos || + return !(info->key.tun_id || info->key.tos || + !ip_tunnel_flags_empty(info->key.tun_flags) || info->key.ttl || info->key.label || info->key.tp_src || memchr_inv(&info->key.u, 0, sizeof(info->key.u))); } @@@ -1443,7 -1435,7 +1443,7 @@@ static int geneve_nl2info(struct nlatt "Remote IPv6 address cannot be Multicast"); return -EINVAL; } - info->key.tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); cfg->use_udp6_rx_checksums = true; #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6], @@@ -1518,7 -1510,7 +1518,7 @@@ goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM])) - info->key.tun_flags |= TUNNEL_CSUM; + __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); }
if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) { @@@ -1528,7 -1520,7 +1528,7 @@@ goto change_notsup; } if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX])) - info->key.tun_flags &= ~TUNNEL_CSUM; + __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); #else NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX], "IPv6 support not enabled in the kernel"); @@@ -1761,8 -1753,7 +1761,8 @@@ static int geneve_fill_info(struct sk_b info->key.u.ipv4.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM, - !!(info->key.tun_flags & TUNNEL_CSUM))) + test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags))) goto nla_put_failure;
#if IS_ENABLED(CONFIG_IPV6) @@@ -1771,8 -1762,7 +1771,8 @@@ &info->key.u.ipv6.dst)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX, - !(info->key.tun_flags & TUNNEL_CSUM))) + !test_bit(IP_TUNNEL_CSUM_BIT, + info->key.tun_flags))) goto nla_put_failure; #endif } diff --combined include/net/ip_tunnels.h index d8f574fbb11ef,c286cc2e766ee..9a6a08ec77139 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@@ -36,24 -36,6 +36,24 @@@ (sizeof_field(struct ip_tunnel_key, u) - \ sizeof_field(struct ip_tunnel_key, u.ipv4))
+#define __ipt_flag_op(op, ...) \ + op(__VA_ARGS__, __IP_TUNNEL_FLAG_NUM) + +#define IP_TUNNEL_DECLARE_FLAGS(...) \ + __ipt_flag_op(DECLARE_BITMAP, __VA_ARGS__) + +#define ip_tunnel_flags_zero(...) __ipt_flag_op(bitmap_zero, __VA_ARGS__) +#define ip_tunnel_flags_copy(...) __ipt_flag_op(bitmap_copy, __VA_ARGS__) +#define ip_tunnel_flags_and(...) __ipt_flag_op(bitmap_and, __VA_ARGS__) +#define ip_tunnel_flags_or(...) __ipt_flag_op(bitmap_or, __VA_ARGS__) + +#define ip_tunnel_flags_empty(...) \ + __ipt_flag_op(bitmap_empty, __VA_ARGS__) +#define ip_tunnel_flags_intersect(...) \ + __ipt_flag_op(bitmap_intersects, __VA_ARGS__) +#define ip_tunnel_flags_subset(...) \ + __ipt_flag_op(bitmap_subset, __VA_ARGS__) + struct ip_tunnel_key { __be64 tun_id; union { @@@ -66,11 -48,11 +66,11 @@@ struct in6_addr dst; } ipv6; } u; - __be16 tun_flags; - u8 tos; /* TOS for IPv4, TC for IPv6 */ - u8 ttl; /* TTL for IPv4, HL for IPv6 */ + IP_TUNNEL_DECLARE_FLAGS(tun_flags); __be32 label; /* Flow Label for IPv6 */ u32 nhid; + u8 tos; /* TOS for IPv4, TC for IPv6 */ + u8 ttl; /* TTL for IPv4, HL for IPv6 */ __be16 tp_src; __be16 tp_dst; __u8 flow_flags; @@@ -128,17 -110,6 +128,17 @@@ struct ip_tunnel_prl_entry
struct metadata_dst;
+/* Kernel-side variant of ip_tunnel_parm */ +struct ip_tunnel_parm_kern { + char name[IFNAMSIZ]; + IP_TUNNEL_DECLARE_FLAGS(i_flags); + IP_TUNNEL_DECLARE_FLAGS(o_flags); + __be32 i_key; + __be32 o_key; + int link; + struct iphdr iph; +}; + struct ip_tunnel { struct ip_tunnel __rcu *next; struct hlist_node hash_node; @@@ -165,7 -136,7 +165,7 @@@
struct dst_cache dst_cache;
- struct ip_tunnel_parm parms; + struct ip_tunnel_parm_kern parms;
int mlink; int encap_hlen; /* Encap header length (FOU,GUE) */ @@@ -186,7 -157,7 +186,7 @@@ };
struct tnl_ptk_info { - __be16 flags; + IP_TUNNEL_DECLARE_FLAGS(flags); __be16 proto; __be32 key; __be32 seq; @@@ -208,80 -179,11 +208,80 @@@ struct ip_tunnel_net int type; };
+static inline void ip_tunnel_set_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); + + ip_tunnel_flags_or(flags, flags, present); +} + +static inline void ip_tunnel_clear_options_present(unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); + + __ipt_flag_op(bitmap_andnot, flags, flags, present); +} + +static inline bool ip_tunnel_is_options_present(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(present) = { }; + + __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, present); + __set_bit(IP_TUNNEL_VXLAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT, present); + __set_bit(IP_TUNNEL_GTP_OPT_BIT, present); + __set_bit(IP_TUNNEL_PFCP_OPT_BIT, present); + + return ip_tunnel_flags_intersect(flags, present); +} + +static inline bool ip_tunnel_flags_is_be16_compat(const unsigned long *flags) +{ + IP_TUNNEL_DECLARE_FLAGS(supp) = { }; + + bitmap_set(supp, 0, BITS_PER_TYPE(__be16)); + __set_bit(IP_TUNNEL_VTI_BIT, supp); + + return ip_tunnel_flags_subset(flags, supp); +} + +static inline void ip_tunnel_flags_from_be16(unsigned long *dst, __be16 flags) +{ + ip_tunnel_flags_zero(dst); + + bitmap_write(dst, be16_to_cpu(flags), 0, BITS_PER_TYPE(__be16)); + __assign_bit(IP_TUNNEL_VTI_BIT, dst, flags & VTI_ISVTI); +} + +static inline __be16 ip_tunnel_flags_to_be16(const unsigned long *flags) +{ + __be16 ret; + + ret = cpu_to_be16(bitmap_read(flags, 0, BITS_PER_TYPE(__be16))); + if (test_bit(IP_TUNNEL_VTI_BIT, flags)) + ret |= VTI_ISVTI; + + return ret; +} + static inline void ip_tunnel_key_init(struct ip_tunnel_key *key, __be32 saddr, __be32 daddr, u8 tos, u8 ttl, __be32 label, __be16 tp_src, __be16 tp_dst, - __be64 tun_id, __be16 tun_flags) + __be64 tun_id, + const unsigned long *tun_flags) { key->tun_id = tun_id; key->u.ipv4.src = saddr; @@@ -291,7 -193,7 +291,7 @@@ key->tos = tos; key->ttl = ttl; key->label = label; - key->tun_flags = tun_flags; + ip_tunnel_flags_copy(key->tun_flags, tun_flags);
/* For the tunnel types on the top of IPsec, the tp_src and tp_dst of * the upper tunnel are used. @@@ -312,8 -214,12 +312,8 @@@ ip_tunnel_dst_cache_usable(const struc { if (skb->mark) return false; - if (!info) - return true; - if (info->key.tun_flags & TUNNEL_NOCACHE) - return false;
- return true; + return !info || !test_bit(IP_TUNNEL_NOCACHE_BIT, info->key.tun_flags); }
static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info @@@ -385,18 -291,14 +385,18 @@@ void ip_tunnel_xmit(struct sk_buff *skb const struct iphdr *tnl_params, const u8 protocol); void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const u8 proto, int tunnel_hlen); -int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); +int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p, + int cmd); +bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp, + const void __user *data); +bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp); int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn, - int link, __be16 flags, + int link, const unsigned long *flags, __be32 remote, __be32 local, __be32 key);
@@@ -405,16 -307,16 +405,16 @@@ int ip_tunnel_rcv(struct ip_tunnel *tun const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst, bool log_ecn_error); int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark); + struct ip_tunnel_parm_kern *p, __u32 fwmark); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], - struct ip_tunnel_parm *p, __u32 fwmark); + struct ip_tunnel_parm_kern *p, __u32 fwmark); void ip_tunnel_setup(struct net_device *dev, unsigned int net_id);
bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], struct ip_tunnel_encap *encap);
void ip_tunnel_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms); + struct ip_tunnel_parm_kern *parms);
extern const struct header_ops ip_tunnel_header_ops; __be16 ip_tunnel_parse_protocol(const struct sk_buff *skb); @@@ -459,6 -361,39 +459,39 @@@ static inline bool pskb_inet_may_pull(s return pskb_network_may_pull(skb, nhlen); }
+ /* Variant of pskb_inet_may_pull(). + */ + static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) + { + int nhlen = 0, maclen = ETH_HLEN; + __be16 type = skb->protocol; + + /* Essentially this is skb_protocol(skb, true) + * And we get MAC len. + */ + if (eth_type_vlan(type)) + type = __vlan_get_protocol(skb, type, &maclen); + + switch (type) { + #if IS_ENABLED(CONFIG_IPV6) + case htons(ETH_P_IPV6): + nhlen = sizeof(struct ipv6hdr); + break; + #endif + case htons(ETH_P_IP): + nhlen = sizeof(struct iphdr); + break; + } + /* For ETH_P_IPV6/ETH_P_IP we make sure to pull + * a base network header in skb->head. + */ + if (!pskb_may_pull(skb, maclen + nhlen)) + return false; + + skb_set_network_header(skb, maclen); + return true; + } + static inline int ip_encap_hlen(struct ip_tunnel_encap *e) { const struct ip_tunnel_encap_ops *ops; @@@ -612,13 -547,12 +645,13 @@@ static inline void ip_tunnel_info_opts_
static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = len; if (len > 0) { memcpy(ip_tunnel_info_opts(info), from, len); - info->key.tun_flags |= flags; + ip_tunnel_flags_or(info->key.tun_flags, info->key.tun_flags, + flags); } }
@@@ -662,7 -596,7 +695,7 @@@ static inline void ip_tunnel_info_opts_
static inline void ip_tunnel_info_opts_set(struct ip_tunnel_info *info, const void *from, int len, - __be16 flags) + const unsigned long *flags) { info->options_len = 0; } diff --combined net/batman-adv/translation-table.c index 0555cb6114898,2243cec18ecc8..b21ff3c36b07d --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@@ -208,6 -208,20 +208,6 @@@ batadv_tt_global_hash_find(struct batad return tt_global_entry; }
-/** - * batadv_tt_local_entry_free_rcu() - free the tt_local_entry - * @rcu: rcu pointer of the tt_local_entry - */ -static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_local_entry *tt_local_entry; - - tt_local_entry = container_of(rcu, struct batadv_tt_local_entry, - common.rcu); - - kmem_cache_free(batadv_tl_cache, tt_local_entry); -} - /** * batadv_tt_local_entry_release() - release tt_local_entry from lists and queue * for free after rcu grace period @@@ -222,7 -236,7 +222,7 @@@ static void batadv_tt_local_entry_relea
batadv_softif_vlan_put(tt_local_entry->vlan);
- call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu); + kfree_rcu(tt_local_entry, common.rcu); }
/** @@@ -240,6 -254,20 +240,6 @@@ batadv_tt_local_entry_put(struct batadv batadv_tt_local_entry_release); }
-/** - * batadv_tt_global_entry_free_rcu() - free the tt_global_entry - * @rcu: rcu pointer of the tt_global_entry - */ -static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_global_entry *tt_global_entry; - - tt_global_entry = container_of(rcu, struct batadv_tt_global_entry, - common.rcu); - - kmem_cache_free(batadv_tg_cache, tt_global_entry); -} - /** * batadv_tt_global_entry_release() - release tt_global_entry from lists and * queue for free after rcu grace period @@@ -254,7 -282,7 +254,7 @@@ void batadv_tt_global_entry_release(str
batadv_tt_global_del_orig_list(tt_global_entry);
- call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu); + kfree_rcu(tt_global_entry, common.rcu); }
/** @@@ -379,6 -407,19 +379,6 @@@ static void batadv_tt_global_size_dec(s batadv_tt_global_size_mod(orig_node, vid, -1); }
-/** - * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry - * @rcu: rcu pointer of the orig_entry - */ -static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) -{ - struct batadv_tt_orig_list_entry *orig_entry; - - orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); - - kmem_cache_free(batadv_tt_orig_cache, orig_entry); -} - /** * batadv_tt_orig_list_entry_release() - release tt orig entry from lists and * queue for free after rcu grace period @@@ -392,7 -433,7 +392,7 @@@ static void batadv_tt_orig_list_entry_r refcount);
batadv_orig_node_put(orig_entry->orig_node); - call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); + kfree_rcu(orig_entry, rcu); }
/** @@@ -3907,7 -3948,7 +3907,7 @@@ void batadv_tt_local_resize_to_mtu(stru
spin_lock_bh(&bat_priv->tt.commit_lock);
- while (true) { + while (timeout) { table_size = batadv_tt_local_table_transmit_size(bat_priv); if (packet_size_max >= table_size) break; diff --combined net/ipv6/addrconf.c index 82ad6b8fa007b,779aa6ecdd499..3a92d002abe4b --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@@ -63,7 -63,6 +63,7 @@@ #include <linux/string.h> #include <linux/hash.h>
+#include <net/ip_tunnels.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/snmp.h> @@@ -2092,9 -2091,10 +2092,10 @@@ struct inet6_ifaddr *ipv6_get_ifaddr(st if (ipv6_addr_equal(&ifp->addr, addr)) { if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { - result = ifp; - in6_ifa_hold(ifp); - break; + if (in6_ifa_hold_safe(ifp)) { + result = ifp; + break; + } } } } @@@ -2918,7 -2918,7 +2919,7 @@@ put static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev, struct in6_ifreq *ireq) { - struct ip_tunnel_parm p = { }; + struct ip_tunnel_parm_kern p = { }; int err;
if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4)) diff --combined net/ipv6/ip6_fib.c index ddd8e3c2df4a5,c1f62352a4814..31d77885bcae3 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@@ -623,22 -623,23 +623,22 @@@ static int inet6_dump_fib(struct sk_buf struct rt6_rtnl_dump_arg arg = { .filter.dump_exceptions = true, .filter.dump_routes = true, - .filter.rtnl_held = true, + .filter.rtnl_held = false, }; const struct nlmsghdr *nlh = cb->nlh; struct net *net = sock_net(skb->sk); - unsigned int h, s_h; unsigned int e = 0, s_e; + struct hlist_head *head; struct fib6_walker *w; struct fib6_table *tb; - struct hlist_head *head; - int res = 0; + unsigned int h, s_h; + int err = 0;
+ rcu_read_lock(); if (cb->strict_check) { - int err; - err = ip_valid_fib_dump_req(net, nlh, &arg.filter, cb); if (err < 0) - return err; + goto unlock; } else if (nlmsg_len(nlh) >= sizeof(struct rtmsg)) { struct rtmsg *rtm = nlmsg_data(nlh);
@@@ -653,10 -654,8 +653,10 @@@ * 1. allocate and initialize walker. */ w = kzalloc(sizeof(*w), GFP_ATOMIC); - if (!w) - return -ENOMEM; + if (!w) { + err = -ENOMEM; + goto unlock; + } w->func = fib6_dump_node; cb->args[2] = (long)w;
@@@ -676,46 -675,46 +676,46 @@@ tb = fib6_get_table(net, arg.filter.table_id); if (!tb) { if (rtnl_msg_family(cb->nlh) != PF_INET6) - goto out; + goto unlock;
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); - return -ENOENT; + err = -ENOENT; + goto unlock; }
if (!cb->args[0]) { - res = fib6_dump_table(tb, skb, cb); - if (!res) + err = fib6_dump_table(tb, skb, cb); + if (!err) cb->args[0] = 1; } - goto out; + goto unlock; }
s_h = cb->args[0]; s_e = cb->args[1];
- rcu_read_lock(); for (h = s_h; h < FIB6_TABLE_HASHSZ; h++, s_e = 0) { e = 0; head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(tb, head, tb6_hlist) { if (e < s_e) goto next; - res = fib6_dump_table(tb, skb, cb); - if (res != 0) - goto out_unlock; + err = fib6_dump_table(tb, skb, cb); + if (err != 0) + goto out; next: e++; } } -out_unlock: - rcu_read_unlock(); +out: cb->args[1] = e; cb->args[0] = h; -out: - res = res < 0 ? res : skb->len; - if (res <= 0) + +unlock: + rcu_read_unlock(); + if (err <= 0) fib6_dump_end(cb); - return res; + return err; }
void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val) @@@ -1386,7 -1385,10 +1386,10 @@@ int fib6_add(struct fib6_node *root, st struct nl_info *info, struct netlink_ext_ack *extack) { struct fib6_table *table = rt->fib6_table; - struct fib6_node *fn, *pn = NULL; + struct fib6_node *fn; + #ifdef CONFIG_IPV6_SUBTREES + struct fib6_node *pn = NULL; + #endif int err = -ENOMEM; int allow_create = 1; int replace_required = 0; @@@ -1410,9 -1412,9 +1413,9 @@@ goto out; }
+ #ifdef CONFIG_IPV6_SUBTREES pn = fn;
- #ifdef CONFIG_IPV6_SUBTREES if (rt->fib6_src.plen) { struct fib6_node *sn;
@@@ -2507,7 -2509,7 +2510,7 @@@ int __init fib6_init(void goto out_kmem_cache_create;
ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, - inet6_dump_fib, 0); + inet6_dump_fib, RTNL_FLAG_DUMP_UNLOCKED); if (ret) goto out_unregister_subsys;
diff --combined net/unix/af_unix.c index 61ecfa9c9c6b1,d032eb5fa6df1..142d210b5b035 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@@ -546,7 -546,7 +546,7 @@@ static void unix_write_space(struct soc if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } @@@ -979,11 -979,11 +979,11 @@@ static struct sock *unix_create1(struc sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); - u->inflight = 0; + u->listener = NULL; + u->vertex = NULL; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); @@@ -1597,7 -1597,6 +1597,7 @@@ restart newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); + newu->listener = other; RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other);
@@@ -1693,8 -1692,8 +1693,8 @@@ static int unix_accept(struct socket *s bool kern) { struct sock *sk = sock->sk; - struct sock *tsk; struct sk_buff *skb; + struct sock *tsk; int err;
err = -EOPNOTSUPP; @@@ -1719,7 -1718,6 +1719,7 @@@ }
tsk = skb->sk; + unix_update_edges(unix_sk(tsk)); skb_free_datagram(sk, skb); wake_up_interruptible(&unix_sk(sk)->peer_wait);
@@@ -1791,29 -1789,81 +1791,29 @@@ static inline bool too_many_unix_fds(st
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { - int i; - if (too_many_unix_fds(current)) return -ETOOMANYREFS;
- /* Need to duplicate file references for the sake of garbage - * collection. Otherwise a socket in the fps might become a - * candidate for GC while the skb is not yet queued. - */ - UNIXCB(skb).fp = scm_fp_dup(scm->fp); - if (!UNIXCB(skb).fp) - return -ENOMEM; + UNIXCB(skb).fp = scm->fp; + scm->fp = NULL;
- for (i = scm->fp->count - 1; i >= 0; i--) - unix_inflight(scm->fp->user, scm->fp->fp[i]); + if (unix_prepare_fpl(UNIXCB(skb).fp)) + return -ENOMEM;
return 0; }
static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) { - int i; - scm->fp = UNIXCB(skb).fp; UNIXCB(skb).fp = NULL;
- for (i = scm->fp->count - 1; i >= 0; i--) - unix_notinflight(scm->fp->user, scm->fp->fp[i]); + unix_destroy_fpl(scm->fp); }
static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = scm_fp_dup(UNIXCB(skb).fp); - - /* - * Garbage collection of unix sockets starts by selecting a set of - * candidate sockets which have reference only from being in flight - * (total_refs == inflight_refs). This condition is checked once during - * the candidate collection phase, and candidates are marked as such, so - * that non-candidates can later be ignored. While inflight_refs is - * protected by unix_gc_lock, total_refs (file count) is not, hence this - * is an instantaneous decision. - * - * Once a candidate, however, the socket must not be reinstalled into a - * file descriptor while the garbage collection is in progress. - * - * If the above conditions are met, then the directed graph of - * candidates (*) does not change while unix_gc_lock is held. - * - * Any operations that changes the file count through file descriptors - * (dup, close, sendmsg) does not change the graph since candidates are - * not installed in fds. - * - * Dequeing a candidate via recvmsg would install it into an fd, but - * that takes unix_gc_lock to decrement the inflight count, so it's - * serialized with garbage collection. - * - * MSG_PEEK is special in that it does not change the inflight count, - * yet does install the socket into an fd. The following lock/unlock - * pair is to ensure serialization with garbage collection. It must be - * done between incrementing the file count and installing the file into - * an fd. - * - * If garbage collection starts after the barrier provided by the - * lock/unlock, then it will see the elevated refcount and not mark this - * as a candidate. If a garbage collection is already in progress - * before the file count was incremented, then the lock/unlock pair will - * ensure that garbage collection is finished before progressing to - * installing the fd. - * - * (*) A -> B where B is on the queue of A or B is on the queue of C - * which is on the queue of listening socket A. - */ - spin_lock(&unix_gc_lock); - spin_unlock(&unix_gc_lock); }
static void unix_destruct_scm(struct sk_buff *skb) @@@ -1887,10 -1937,8 +1887,10 @@@ static void scm_stat_add(struct sock *s struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk);
- if (unlikely(fp && fp->count)) + if (unlikely(fp && fp->count)) { atomic_add(fp->count, &u->scm_stat.nr_fds); + unix_add_edges(fp, u); + } }
static void scm_stat_del(struct sock *sk, struct sk_buff *skb) @@@ -1898,10 -1946,8 +1898,10 @@@ struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk);
- if (unlikely(fp && fp->count)) + if (unlikely(fp && fp->count)) { atomic_sub(fp->count, &u->scm_stat.nr_fds); + unix_del_edges(fp); + } }
/* @@@ -2619,7 -2665,9 +2619,9 @@@ static struct sk_buff *manage_oob(struc } } else if (!(flags & MSG_PEEK)) { skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + WRITE_ONCE(u->oob_skb, NULL); + if (!WARN_ON_ONCE(skb_unref(skb))) + kfree_skb(skb); skb = skb_peek(&sk->sk_receive_queue); } } diff --combined tools/include/linux/mm.h index 7a6b98f4e5792,7d73da0980473..dc0fc7125bc31 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@@ -2,8 -2,8 +2,8 @@@ #ifndef _TOOLS_LINUX_MM_H #define _TOOLS_LINUX_MM_H
+#include <linux/align.h> #include <linux/mmzone.h> -#include <uapi/linux/const.h>
#define PAGE_SHIFT 12 #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) @@@ -11,6 -11,9 +11,6 @@@
#define PHYS_ADDR_MAX (~(phys_addr_t)0)
-#define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) -#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) - #define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
#define __va(x) ((void *)((unsigned long)(x))) @@@ -34,4 -37,9 +34,9 @@@ static inline void totalram_pages_add(l { }
+ static inline int early_pfn_to_nid(unsigned long pfn) + { + return 0; + } + #endif