The following commit has been merged in the master branch: commit adc2e56ebe6377f5c032d96aee0feac30a640453 Merge: 4bea7207a80c8bba3b3eb5b84c407b162968475f 9ed13a17e38e0537e24d9b507645002bf8d0201f Author: Jakub Kicinski kuba@kernel.org Date: Fri Jun 18 19:47:02 2021 -0700
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Trivial conflicts in net/can/isotp.c and tools/testing/selftests/net/mptcp/mptcp_connect.sh
scaled_ppm_to_ppb() was moved from drivers/ptp/ptp_clock.c to include/linux/ptp_clock_kernel.h in -next so re-apply the fix there.
Signed-off-by: Jakub Kicinski kuba@kernel.org
diff --combined MAINTAINERS index fbf792962d7b,8c5ee008301a..395b052635ca --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -3877,6 -3877,7 +3877,7 @@@ L: linux-btrfs@vger.kernel.or S: Maintained W: http://btrfs.wiki.kernel.org/ Q: http://patchwork.kernel.org/project/linux-btrfs/list/ + C: irc://irc.libera.chat/btrfs T: git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git F: Documentation/filesystems/btrfs.rst F: fs/btrfs/ @@@ -6811,8 -6812,6 +6812,8 @@@ F: Documentation/devicetree/bindings/ne F: Documentation/devicetree/bindings/net/qca,ar803x.yaml F: Documentation/networking/phy.rst F: drivers/net/mdio/ +F: drivers/net/mdio/acpi_mdio.c +F: drivers/net/mdio/fwnode_mdio.c F: drivers/net/mdio/of_mdio.c F: drivers/net/pcs/ F: drivers/net/phy/ @@@ -6947,6 -6946,7 +6948,7 @@@ F: net/core/failover. FANOTIFY M: Jan Kara jack@suse.cz R: Amir Goldstein amir73il@gmail.com + R: Matthew Bobrowski repnop@google.com L: linux-fsdevel@vger.kernel.org S: Maintained F: fs/notify/fanotify/ @@@ -9139,7 -9139,6 +9141,7 @@@ F: Documentation/networking/device_driv F: drivers/net/ethernet/intel/ F: drivers/net/ethernet/intel/*/ F: include/linux/avf/virtchnl.h +F: include/linux/net/intel/iidc.h
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815) M: Maik Broemme mbroemme@libmpq.org @@@ -9453,13 -9452,6 +9455,13 @@@ L: Dell.Client.Kernel@dell.co S: Maintained F: drivers/platform/x86/intel-wmi-thunderbolt.c
+INTEL WWAN IOSM DRIVER +M: M Chetan Kumar m.chetan.kumar@intel.com +M: Intel Corporation linuxwwan@intel.com +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/wwan/iosm/ + INTEL(R) TRACE HUB M: Alexander Shishkin alexander.shishkin@linux.intel.com S: Supported @@@ -12396,12 -12388,6 +12398,12 @@@ F: Documentation/userspace-api/media/dr F: drivers/media/pci/meye/ F: include/uapi/linux/meye.h
+MOTORCOMM PHY DRIVER +M: Peter Geis pgwipeout@gmail.com +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/phy/motorcomm.c + MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD S: Orphan F: Documentation/driver-api/serial/moxa-smartio.rst @@@ -13210,7 -13196,6 +13212,7 @@@ M: Vladimir Oltean <olteanv@gmail.com L: linux-kernel@vger.kernel.org S: Maintained F: drivers/net/dsa/sja1105 +F: drivers/net/pcs/pcs-xpcs-nxp.c
NXP TDA998X DRM DRIVER M: Russell King linux@armlinux.org.uk @@@ -14134,6 -14119,7 +14136,7 @@@ F: drivers/pci/controller/pci-v3-semi. PCI ENDPOINT SUBSYSTEM M: Kishon Vijay Abraham I kishon@ti.com M: Lorenzo Pieralisi lorenzo.pieralisi@arm.com + R: Krzysztof Wilczy��ski kw@linux.com L: linux-pci@vger.kernel.org S: Supported F: Documentation/PCI/endpoint/* @@@ -14182,6 -14168,7 +14185,7 @@@ F: drivers/pci/controller/pci-xgene-msi PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS M: Lorenzo Pieralisi lorenzo.pieralisi@arm.com R: Rob Herring robh@kernel.org + R: Krzysztof Wilczy��ski kw@linux.com L: linux-pci@vger.kernel.org S: Supported Q: http://patchwork.ozlabs.org/project/linux-pci/list/ @@@ -14341,10 -14328,12 +14345,12 @@@ PER-CPU MEMORY ALLOCATO M: Dennis Zhou dennis@kernel.org M: Tejun Heo tj@kernel.org M: Christoph Lameter cl@linux.com + L: linux-mm@kvack.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git F: arch/*/include/asm/percpu.h F: include/linux/percpu*.h + F: lib/percpu*.c F: mm/percpu*.c
PER-TASK DELAY ACCOUNTING @@@ -15587,13 -15576,6 +15593,13 @@@ F: include/linux/rpmsg F: include/uapi/linux/rpmsg.h F: samples/rpmsg/
+REMOTE PROCESSOR MESSAGING (RPMSG) WWAN CONTROL DRIVER +M: Stephan Gerhold stephan@gerhold.net +L: netdev@vger.kernel.org +L: linux-remoteproc@vger.kernel.org +S: Maintained +F: drivers/net/wwan/rpmsg_wwan_ctrl.c + RENESAS CLOCK DRIVERS M: Geert Uytterhoeven geert+renesas@glider.be L: linux-renesas-soc@vger.kernel.org @@@ -16578,6 -16560,7 +16584,7 @@@ F: drivers/misc/sgi-xp
SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS M: Karsten Graul kgraul@linux.ibm.com + M: Guvenc Gulce guvenc@linux.ibm.com L: linux-s390@vger.kernel.org S: Supported W: http://www.ibm.com/developerworks/linux/linux390/ @@@ -17691,7 -17674,6 +17698,7 @@@ M: Jose Abreu <Jose.Abreu@synopsys.com L: netdev@vger.kernel.org S: Supported F: drivers/net/pcs/pcs-xpcs.c +F: drivers/net/pcs/pcs-xpcs.h F: include/linux/pcs/pcs-xpcs.h
SYNOPSYS DESIGNWARE I2C DRIVER @@@ -17701,7 -17683,6 +17708,6 @@@ R: Mika Westerberg <mika.westerberg@lin L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-designware-* - F: include/linux/platform_data/i2c-designware.h
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER M: Jaehoon Chung jh80.chung@samsung.com @@@ -18893,6 -18874,13 +18899,13 @@@ S: Maintaine F: drivers/usb/host/isp116x* F: include/linux/usb/isp116x.h
+ USB ISP1760 DRIVER + M: Rui Miguel Silva rui.silva@linaro.org + L: linux-usb@vger.kernel.org + S: Maintained + F: drivers/usb/isp1760/* + F: Documentation/devicetree/bindings/usb/nxp,isp1760.yaml + USB LAN78XX ETHERNET DRIVER M: Woojung Huh woojung.huh@microchip.com M: UNGLinuxDriver@microchip.com @@@ -20037,6 -20025,7 +20050,7 @@@ F: arch/x86/xen/*swiotlb F: drivers/xen/*swiotlb*
XFS FILESYSTEM + C: irc://irc.oftc.net/xfs M: Darrick J. Wong djwong@kernel.org M: linux-xfs@vger.kernel.org L: linux-xfs@vger.kernel.org diff --combined drivers/base/core.c index b6836bfa985c,54ba506e5a89..2a61003ea2c1 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@@ -194,6 -194,17 +194,17 @@@ int device_links_read_lock_held(void { return srcu_read_lock_held(&device_links_srcu); } + + static void device_link_synchronize_removal(void) + { + synchronize_srcu(&device_links_srcu); + } + + static void device_link_remove_from_lists(struct device_link *link) + { + list_del_rcu(&link->s_node); + list_del_rcu(&link->c_node); + } #else /* !CONFIG_SRCU */ static DECLARE_RWSEM(device_links_lock);
@@@ -224,6 -235,16 +235,16 @@@ int device_links_read_lock_held(void return lockdep_is_held(&device_links_lock); } #endif + + static inline void device_link_synchronize_removal(void) + { + } + + static void device_link_remove_from_lists(struct device_link *link) + { + list_del(&link->s_node); + list_del(&link->c_node); + } #endif /* !CONFIG_SRCU */
static bool device_is_ancestor(struct device *dev, struct device *target) @@@ -445,8 -466,13 +466,13 @@@ static struct attribute *devlink_attrs[ }; ATTRIBUTE_GROUPS(devlink);
- static void device_link_free(struct device_link *link) + static void device_link_release_fn(struct work_struct *work) { + struct device_link *link = container_of(work, struct device_link, rm_work); + + /* Ensure that all references to the link object have been dropped. */ + device_link_synchronize_removal(); + while (refcount_dec_not_one(&link->rpm_active)) pm_runtime_put(link->supplier);
@@@ -455,24 -481,19 +481,19 @@@ kfree(link); }
- #ifdef CONFIG_SRCU - static void __device_link_free_srcu(struct rcu_head *rhead) - { - device_link_free(container_of(rhead, struct device_link, rcu_head)); - } - static void devlink_dev_release(struct device *dev) { struct device_link *link = to_devlink(dev);
- call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu); - } - #else - static void devlink_dev_release(struct device *dev) - { - device_link_free(to_devlink(dev)); + INIT_WORK(&link->rm_work, device_link_release_fn); + /* + * It may take a while to complete this work because of the SRCU + * synchronization in device_link_release_fn() and if the consumer or + * supplier devices get deleted when it runs, so put it into the "long" + * workqueue. + */ + queue_work(system_long_wq, &link->rm_work); } - #endif
static struct class devlink_class = { .name = "devlink", @@@ -846,7 -867,6 +867,6 @@@ out } EXPORT_SYMBOL_GPL(device_link_add);
- #ifdef CONFIG_SRCU static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); @@@ -856,25 -876,9 +876,9 @@@
pm_runtime_drop_link(link);
- list_del_rcu(&link->s_node); - list_del_rcu(&link->c_node); - device_unregister(&link->link_dev); - } - #else /* !CONFIG_SRCU */ - static void __device_link_del(struct kref *kref) - { - struct device_link *link = container_of(kref, struct device_link, kref); - - dev_info(link->consumer, "Dropping the link to %s\n", - dev_name(link->supplier)); - - pm_runtime_drop_link(link); - - list_del(&link->s_node); - list_del(&link->c_node); + device_link_remove_from_lists(link); device_unregister(&link->link_dev); } - #endif /* !CONFIG_SRCU */
static void device_link_put_kref(struct device_link *link) { @@@ -4723,13 -4727,6 +4727,13 @@@ void device_set_of_node_from_dev(struc } EXPORT_SYMBOL_GPL(device_set_of_node_from_dev);
+void device_set_node(struct device *dev, struct fwnode_handle *fwnode) +{ + dev->fwnode = fwnode; + dev->of_node = to_of_node(fwnode); +} +EXPORT_SYMBOL_GPL(device_set_node); + int device_match_name(struct device *dev, const void *name) { return sysfs_streq(dev_name(dev), name); diff --combined drivers/infiniband/hw/mlx5/fs.c index 941adf5cf3d0,18ee2f293825..5fbc0a8454b9 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@@ -1194,9 -1194,8 +1194,8 @@@ static struct ib_flow *mlx5_ib_create_f goto free_ucmd; }
- if (flow_attr->port > dev->num_ports || - (flow_attr->flags & - ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) { + if (flow_attr->flags & + ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) { err = -EINVAL; goto free_ucmd; } @@@ -2134,6 -2133,12 +2133,12 @@@ static int UVERBS_HANDLER(MLX5_IB_METHO if (err) goto end;
+ if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB && + mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) { + err = -EINVAL; + goto end; + } + uobj->object = obj; obj->mdev = dev->mdev; atomic_set(&obj->usecnt, 0); @@@ -2280,7 -2285,6 +2285,7 @@@ static int mlx5_ib_flow_action_create_p u8 ft_type, u8 dv_prt, void *in, size_t len) { + struct mlx5_pkt_reformat_params reformat_params; enum mlx5_flow_namespace_type namespace; u8 prm_prt; int ret; @@@ -2293,13 -2297,9 +2298,13 @@@ if (ret) return ret;
+ memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = prm_prt; + reformat_params.size = len; + reformat_params.data = in; maction->flow_action_raw.pkt_reformat = - mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len, - in, namespace); + mlx5_packet_reformat_alloc(dev->mdev, &reformat_params, + namespace); if (IS_ERR(maction->flow_action_raw.pkt_reformat)) { ret = PTR_ERR(maction->flow_action_raw.pkt_reformat); return ret; diff --combined drivers/net/ethernet/amazon/ena/ena_netdev.c index 3bb0e66b2c7e,52571486705e..edaf37823c50 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@@ -35,6 -35,9 +35,6 @@@ MODULE_LICENSE("GPL")
#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \ NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) -static int debug = -1; -module_param(debug, int, 0); -MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
static struct ena_aenq_handlers aenq_handlers;
@@@ -86,12 -89,6 +86,12 @@@ static void ena_increase_stat(u64 *stat u64_stats_update_end(syncp); }
+static void ena_ring_tx_doorbell(struct ena_ring *tx_ring) +{ + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp); +} + static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ena_adapter *adapter = netdev_priv(dev); @@@ -150,7 -147,7 +150,7 @@@ static int ena_xmit_common(struct net_d netif_dbg(adapter, tx_queued, dev, "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n", ring->qid); - ena_com_write_sq_doorbell(ring->ena_com_io_sq); + ena_ring_tx_doorbell(ring); }
/* prepare the packet's descriptors to dma engine */ @@@ -200,6 -197,7 +200,6 @@@ static int ena_xdp_io_poll(struct napi_ int ret;
xdp_ring = ena_napi->xdp_ring; - xdp_ring->first_interrupt = ena_napi->first_interrupt;
xdp_budget = budget;
@@@ -231,7 -229,6 +231,7 @@@ xdp_ring->tx_stats.napi_comp += napi_comp_call; xdp_ring->tx_stats.tx_poll++; u64_stats_update_end(&xdp_ring->syncp); + xdp_ring->tx_stats.last_napi_jiffies = jiffies;
return ret; } @@@ -239,36 -236,48 +239,48 @@@ static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring, struct ena_tx_buffer *tx_info, struct xdp_frame *xdpf, - void **push_hdr, - u32 *push_len) + struct ena_com_tx_ctx *ena_tx_ctx) { struct ena_adapter *adapter = xdp_ring->adapter; struct ena_com_buf *ena_buf; - dma_addr_t dma = 0; + int push_len = 0; + dma_addr_t dma; + void *data; u32 size;
tx_info->xdpf = xdpf; + data = tx_info->xdpf->data; size = tx_info->xdpf->len; - ena_buf = tx_info->bufs;
- /* llq push buffer */ - *push_len = min_t(u32, size, xdp_ring->tx_max_header_size); - *push_hdr = tx_info->xdpf->data; + if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + /* Designate part of the packet for LLQ */ + push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
- if (size - *push_len > 0) { + ena_tx_ctx->push_header = data; + + size -= push_len; + data += push_len; + } + + ena_tx_ctx->header_len = push_len; + + if (size > 0) { dma = dma_map_single(xdp_ring->dev, - *push_hdr + *push_len, - size - *push_len, + data, + size, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) goto error_report_dma_error;
- tx_info->map_linear_data = 1; - tx_info->num_of_bufs = 1; - } + tx_info->map_linear_data = 0;
- ena_buf->paddr = dma; - ena_buf->len = size; + ena_buf = tx_info->bufs; + ena_buf->paddr = dma; + ena_buf->len = size; + + ena_tx_ctx->ena_bufs = ena_buf; + ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1; + }
return 0;
@@@ -277,10 -286,6 +289,6 @@@ error_report_dma_error &xdp_ring->syncp); netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
- xdp_return_frame_rx_napi(tx_info->xdpf); - tx_info->xdpf = NULL; - tx_info->num_of_bufs = 0; - return -EINVAL; }
@@@ -292,8 -297,6 +300,6 @@@ static int ena_xdp_xmit_frame(struct en struct ena_com_tx_ctx ena_tx_ctx = {}; struct ena_tx_buffer *tx_info; u16 next_to_use, req_id; - void *push_hdr; - u32 push_len; int rc;
next_to_use = xdp_ring->next_to_use; @@@ -301,15 -304,11 +307,11 @@@ tx_info = &xdp_ring->tx_buffer_info[req_id]; tx_info->num_of_bufs = 0;
- rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len); + rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx); if (unlikely(rc)) return rc;
- ena_tx_ctx.ena_bufs = tx_info->bufs; - ena_tx_ctx.push_header = push_hdr; - ena_tx_ctx.num_bufs = tx_info->num_of_bufs; ena_tx_ctx.req_id = req_id; - ena_tx_ctx.header_len = push_len;
rc = ena_xmit_common(dev, xdp_ring, @@@ -319,12 -318,14 +321,12 @@@ xdpf->len); if (rc) goto error_unmap_dma; - /* trigger the dma engine. ena_com_write_sq_doorbell() - * has a mb + + /* trigger the dma engine. ena_ring_tx_doorbell() + * calls a memory barrier inside it. */ - if (flags & XDP_XMIT_FLUSH) { - ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq); - ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1, - &xdp_ring->syncp); - } + if (flags & XDP_XMIT_FLUSH) + ena_ring_tx_doorbell(xdp_ring);
return rc;
@@@ -365,8 -366,11 +367,8 @@@ static int ena_xdp_xmit(struct net_devi }
/* Ring doorbell to make device aware of the packets */ - if (flags & XDP_XMIT_FLUSH) { - ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq); - ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1, - &xdp_ring->syncp); - } + if (flags & XDP_XMIT_FLUSH) + ena_ring_tx_doorbell(xdp_ring);
spin_unlock(&xdp_ring->xdp_tx_lock);
@@@ -381,6 -385,7 +383,6 @@@ static int ena_xdp_execute(struct ena_r u32 verdict = XDP_PASS; struct xdp_frame *xdpf; u64 *xdp_stat; - int qid;
rcu_read_lock(); xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); @@@ -401,7 -406,8 +403,7 @@@ }
/* Find xmit queue */ - qid = rx_ring->qid + rx_ring->adapter->num_io_queues; - xdp_ring = &rx_ring->adapter->tx_ring[qid]; + xdp_ring = rx_ring->xdp_ring;
/* The XDP queues are shared between XDP_TX and XDP_REDIRECT */ spin_lock(&xdp_ring->xdp_tx_lock); @@@ -528,7 -534,7 +530,7 @@@ static void ena_xdp_exchange_program_rx rx_ring->rx_headroom = XDP_PACKET_HEADROOM; } else { ena_xdp_unregister_rxq_info(rx_ring); - rx_ring->rx_headroom = 0; + rx_ring->rx_headroom = NET_SKB_PAD; } } } @@@ -677,6 -683,7 +679,6 @@@ static void ena_init_io_rings_common(st ring->ena_dev = adapter->ena_dev; ring->per_napi_packets = 0; ring->cpu = 0; - ring->first_interrupt = false; ring->no_interrupt_event_cnt = 0; u64_stats_init(&ring->syncp); } @@@ -719,9 -726,7 +721,9 @@@ static void ena_init_io_rings(struct en rxr->smoothed_interval = ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); rxr->empty_rx_queue = 0; + rxr->rx_headroom = NET_SKB_PAD; adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues]; } } } @@@ -975,66 -980,47 +977,66 @@@ static void ena_free_all_io_rx_resource ena_free_rx_resources(adapter, i); }
-static int ena_alloc_rx_page(struct ena_ring *rx_ring, - struct ena_rx_buffer *rx_info, gfp_t gfp) +static struct page *ena_alloc_map_page(struct ena_ring *rx_ring, + dma_addr_t *dma) { - int headroom = rx_ring->rx_headroom; - struct ena_com_buf *ena_buf; struct page *page; - dma_addr_t dma;
- /* restore page offset value in case it has been changed by device */ - rx_info->page_offset = headroom; - - /* if previous allocated page is not used */ - if (unlikely(rx_info->page)) - return 0; - - page = alloc_page(gfp); - if (unlikely(!page)) { + /* This would allocate the page on the same NUMA node the executing code + * is running on. + */ + page = dev_alloc_page(); + if (!page) { ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1, &rx_ring->syncp); - return -ENOMEM; + return ERR_PTR(-ENOSPC); }
/* To enable NIC-side port-mirroring, AKA SPAN port, * we make the buffer readable from the nic as well */ - dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE, - DMA_BIDIRECTIONAL); - if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { + *dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE, + DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) { ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1, &rx_ring->syncp); - __free_page(page); - return -EIO; + return ERR_PTR(-EIO); } + + return page; +} + +static int ena_alloc_rx_buffer(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info) +{ + int headroom = rx_ring->rx_headroom; + struct ena_com_buf *ena_buf; + struct page *page; + dma_addr_t dma; + int tailroom; + + /* restore page offset value in case it has been changed by device */ + rx_info->page_offset = headroom; + + /* if previous allocated page is not used */ + if (unlikely(rx_info->page)) + return 0; + + /* We handle DMA here */ + page = ena_alloc_map_page(rx_ring, &dma); + if (unlikely(IS_ERR(page))) + return PTR_ERR(page); + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "Allocate page %p, rx_info %p\n", page, rx_info);
+ tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + rx_info->page = page; ena_buf = &rx_info->ena_buf; ena_buf->paddr = dma + headroom; - ena_buf->len = ENA_PAGE_SIZE - headroom; + ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
return 0; } @@@ -1081,7 -1067,8 +1083,7 @@@ static int ena_refill_rx_bufs(struct en
rx_info = &rx_ring->rx_buffer_info[req_id];
- rc = ena_alloc_rx_page(rx_ring, rx_info, - GFP_ATOMIC | __GFP_COMP); + rc = ena_alloc_rx_buffer(rx_ring, rx_info); if (unlikely(rc < 0)) { netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, "Failed to allocate buffer for rx queue %d\n", @@@ -1399,23 -1386,21 +1401,23 @@@ static int ena_clean_tx_irq(struct ena_ return tx_pkts; }
-static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags) +static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag) { struct sk_buff *skb;
- if (frags) - skb = napi_get_frags(rx_ring->napi); - else + if (!first_frag) skb = netdev_alloc_skb_ip_align(rx_ring->netdev, rx_ring->rx_copybreak); + else + skb = build_skb(first_frag, ENA_PAGE_SIZE);
if (unlikely(!skb)) { ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1, &rx_ring->syncp); + netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev, - "Failed to allocate skb. frags: %d\n", frags); + "Failed to allocate skb. first_frag %s\n", + first_frag ? "provided" : "not provided"); return NULL; }
@@@ -1427,12 -1412,10 +1429,12 @@@ static struct sk_buff *ena_rx_skb(struc u32 descs, u16 *next_to_clean) { - struct sk_buff *skb; struct ena_rx_buffer *rx_info; u16 len, req_id, buf = 0; - void *va; + struct sk_buff *skb; + void *page_addr; + u32 page_offset; + void *data_addr;
len = ena_bufs[buf].len; req_id = ena_bufs[buf].req_id; @@@ -1450,14 -1433,12 +1452,14 @@@ rx_info, rx_info->page);
/* save virt address of first buffer */ - va = page_address(rx_info->page) + rx_info->page_offset; + page_addr = page_address(rx_info->page); + page_offset = rx_info->page_offset; + data_addr = page_addr + page_offset;
- prefetch(va); + prefetch(data_addr);
if (len <= rx_ring->rx_copybreak) { - skb = ena_alloc_skb(rx_ring, false); + skb = ena_alloc_skb(rx_ring, NULL); if (unlikely(!skb)) return NULL;
@@@ -1470,7 -1451,7 +1472,7 @@@ dma_unmap_addr(&rx_info->ena_buf, paddr), len, DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, va, len); + skb_copy_to_linear_data(skb, data_addr, len); dma_sync_single_for_device(rx_ring->dev, dma_unmap_addr(&rx_info->ena_buf, paddr), len, @@@ -1484,18 -1465,16 +1486,18 @@@ return skb; }
- skb = ena_alloc_skb(rx_ring, true); + ena_unmap_rx_buff(rx_ring, rx_info); + + skb = ena_alloc_skb(rx_ring, page_addr); if (unlikely(!skb)) return NULL;
- do { - ena_unmap_rx_buff(rx_ring, rx_info); - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, - rx_info->page_offset, len, ENA_PAGE_SIZE); + /* Populate skb's linear part */ + skb_reserve(skb, page_offset); + skb_put(skb, len); + skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+ do { netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, "RX skb updated. len %d. data_len %d\n", skb->len, skb->data_len); @@@ -1514,12 -1493,6 +1516,12 @@@ req_id = ena_bufs[buf].req_id;
rx_info = &rx_ring->rx_buffer_info[req_id]; + + ena_unmap_rx_buff(rx_ring, rx_info); + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, + rx_info->page_offset, len, ENA_PAGE_SIZE); + } while (1);
return skb; @@@ -1732,12 -1705,14 +1734,12 @@@ static int ena_clean_rx_irq(struct ena_
skb_record_rx_queue(skb, rx_ring->qid);
- if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) { - total_len += rx_ring->ena_bufs[0].len; + if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) rx_copybreak_pkt++; - napi_gro_receive(napi, skb); - } else { - total_len += skb->len; - napi_gro_frags(napi); - } + + total_len += skb->len; + + napi_gro_receive(napi, skb);
res_budget--; } while (likely(res_budget)); @@@ -1949,6 -1924,9 +1951,6 @@@ static int ena_io_poll(struct napi_stru tx_ring = ena_napi->tx_ring; rx_ring = ena_napi->rx_ring;
- tx_ring->first_interrupt = ena_napi->first_interrupt; - rx_ring->first_interrupt = ena_napi->first_interrupt; - tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) || @@@ -2003,8 -1981,6 +2005,8 @@@ tx_ring->tx_stats.tx_poll++; u64_stats_update_end(&tx_ring->syncp);
+ tx_ring->tx_stats.last_napi_jiffies = jiffies; + return ret; }
@@@ -2029,8 -2005,7 +2031,8 @@@ static irqreturn_t ena_intr_msix_io(in { struct ena_napi *ena_napi = data;
- ena_napi->first_interrupt = true; + /* Used to check HW health */ + WRITE_ONCE(ena_napi->first_interrupt, true);
WRITE_ONCE(ena_napi->interrupts_masked, true); smp_wmb(); /* write interrupts_masked before calling napi */ @@@ -3116,11 -3091,14 +3118,11 @@@ static netdev_tx_t ena_start_xmit(struc } }
- if (netif_xmit_stopped(txq) || !netdev_xmit_more()) { - /* trigger the dma engine. ena_com_write_sq_doorbell() - * has a mb + if (netif_xmit_stopped(txq) || !netdev_xmit_more()) + /* trigger the dma engine. ena_ring_tx_doorbell() + * calls a memory barrier inside it. */ - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); - ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, - &tx_ring->syncp); - } + ena_ring_tx_doorbell(tx_ring);
return NETDEV_TX_OK;
@@@ -3370,7 -3348,7 +3372,7 @@@ static int ena_set_queues_placement_pol
llq_feature_mask = 1 << ENA_ADMIN_LLQ; if (!(ena_dev->supported_features & llq_feature_mask)) { - dev_err(&pdev->dev, + dev_warn(&pdev->dev, "LLQ is not supported Fallback to host mode policy.\n"); ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; return 0; @@@ -3681,9 -3659,7 +3683,9 @@@ static void ena_fw_reset_device(struct static int check_for_rx_interrupt_queue(struct ena_adapter *adapter, struct ena_ring *rx_ring) { - if (likely(rx_ring->first_interrupt)) + struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi); + + if (likely(READ_ONCE(ena_napi->first_interrupt))) return 0;
if (ena_com_cq_empty(rx_ring->ena_com_io_cq)) @@@ -3707,10 -3683,6 +3709,10 @@@ static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter, struct ena_ring *tx_ring) { + struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi); + unsigned int time_since_last_napi; + unsigned int missing_tx_comp_to; + bool is_tx_comp_time_expired; struct ena_tx_buffer *tx_buf; unsigned long last_jiffies; u32 missed_tx = 0; @@@ -3724,10 -3696,8 +3726,10 @@@ /* no pending Tx at this location */ continue;
- if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies + - 2 * adapter->missing_tx_completion_to))) { + is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies + + 2 * adapter->missing_tx_completion_to); + + if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) { /* If after graceful period interrupt is still not * received, we schedule a reset */ @@@ -3740,17 -3710,12 +3742,17 @@@ return -EIO; }
- if (unlikely(time_is_before_jiffies(last_jiffies + - adapter->missing_tx_completion_to))) { - if (!tx_buf->print_once) + is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies + + adapter->missing_tx_completion_to); + + if (unlikely(is_tx_comp_time_expired)) { + if (!tx_buf->print_once) { + time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies); + missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to); netif_notice(adapter, tx_err, adapter->netdev, - "Found a Tx that wasn't completed on time, qid %d, index %d.\n", - tx_ring->qid, i); + "Found a Tx that wasn't completed on time, qid %d, index %d. %u usecs have passed since last napi execution. Missing Tx timeout value %u msecs\n", + tx_ring->qid, i, time_since_last_napi, missing_tx_comp_to); + }
tx_buf->print_once = 1; missed_tx++; @@@ -4281,7 -4246,7 +4283,7 @@@ static int ena_probe(struct pci_dev *pd adapter->ena_dev = ena_dev; adapter->netdev = netdev; adapter->pdev = pdev; - adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + adapter->msg_enable = DEFAULT_MSG_ENABLE;
ena_dev->net_device = netdev;
diff --combined drivers/net/ethernet/atheros/alx/main.c index 45e380f3b065,7748b276e5fd..11ef1fbe7aee --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@@ -1,5 -1,5 +1,5 @@@ /* - * Copyright (c) 2013 Johannes Berg johannes@sipsolutions.net + * Copyright (c) 2013, 2021 Johannes Berg johannes@sipsolutions.net * * This file is free software: you may copy, redistribute and/or modify it * under the terms of the GNU General Public License as published by the @@@ -1091,9 -1091,8 +1091,9 @@@ static int alx_init_sw(struct alx_priv ALX_MAC_CTRL_RXFC_EN | ALX_MAC_CTRL_TXFC_EN | 7 << ALX_MAC_CTRL_PRMBLEN_SHIFT; + mutex_init(&alx->mtx);
- return err; + return 0; }
@@@ -1123,8 -1122,6 +1123,8 @@@ static void alx_halt(struct alx_priv *a { struct alx_hw *hw = &alx->hw;
+ lockdep_assert_held(&alx->mtx); + alx_netif_stop(alx); hw->link_speed = SPEED_UNKNOWN; hw->duplex = DUPLEX_UNKNOWN; @@@ -1150,8 -1147,6 +1150,8 @@@ static void alx_configure(struct alx_pr
static void alx_activate(struct alx_priv *alx) { + lockdep_assert_held(&alx->mtx); + /* hardware setting lost, restore it */ alx_reinit_rings(alx); alx_configure(alx); @@@ -1166,7 -1161,7 +1166,7 @@@
static void alx_reinit(struct alx_priv *alx) { - ASSERT_RTNL(); + lockdep_assert_held(&alx->mtx);
alx_halt(alx); alx_activate(alx); @@@ -1254,8 -1249,6 +1254,8 @@@ out_disable_adv_intr
static void __alx_stop(struct alx_priv *alx) { + lockdep_assert_held(&alx->mtx); + alx_free_irq(alx);
cancel_work_sync(&alx->link_check_wk); @@@ -1291,8 -1284,6 +1291,8 @@@ static void alx_check_link(struct alx_p int old_speed; int err;
+ lockdep_assert_held(&alx->mtx); + /* clear PHY internal interrupt status, otherwise the main * interrupt status will be asserted forever */ @@@ -1347,24 -1338,12 +1347,24 @@@ reset
static int alx_open(struct net_device *netdev) { - return __alx_open(netdev_priv(netdev), false); + struct alx_priv *alx = netdev_priv(netdev); + int ret; + + mutex_lock(&alx->mtx); + ret = __alx_open(alx, false); + mutex_unlock(&alx->mtx); + + return ret; }
static int alx_stop(struct net_device *netdev) { - __alx_stop(netdev_priv(netdev)); + struct alx_priv *alx = netdev_priv(netdev); + + mutex_lock(&alx->mtx); + __alx_stop(alx); + mutex_unlock(&alx->mtx); + return 0; }
@@@ -1374,18 -1353,18 +1374,18 @@@ static void alx_link_check(struct work_
alx = container_of(work, struct alx_priv, link_check_wk);
- rtnl_lock(); + mutex_lock(&alx->mtx); alx_check_link(alx); - rtnl_unlock(); + mutex_unlock(&alx->mtx); }
static void alx_reset(struct work_struct *work) { struct alx_priv *alx = container_of(work, struct alx_priv, reset_wk);
- rtnl_lock(); + mutex_lock(&alx->mtx); alx_reinit(alx); - rtnl_unlock(); + mutex_unlock(&alx->mtx); }
static int alx_tpd_req(struct sk_buff *skb) @@@ -1792,8 -1771,6 +1792,8 @@@ static int alx_probe(struct pci_dev *pd goto out_unmap; }
+ mutex_lock(&alx->mtx); + alx_reset_pcie(hw);
phy_configured = alx_phy_configured(hw); @@@ -1804,7 -1781,7 +1804,7 @@@ err = alx_reset_mac(hw); if (err) { dev_err(&pdev->dev, "MAC Reset failed, error = %d\n", err); - goto out_unmap; + goto out_unlock; }
/* setup link to put it in a known good starting state */ @@@ -1814,7 -1791,7 +1814,7 @@@ dev_err(&pdev->dev, "failed to configure PHY speed/duplex (err=%d)\n", err); - goto out_unmap; + goto out_unlock; } }
@@@ -1847,11 -1824,9 +1847,11 @@@ if (!alx_get_phy_info(hw)) { dev_err(&pdev->dev, "failed to identify PHY\n"); err = -EIO; - goto out_unmap; + goto out_unlock; }
+ mutex_unlock(&alx->mtx); + INIT_WORK(&alx->link_check_wk, alx_link_check); INIT_WORK(&alx->reset_wk, alx_reset); netif_carrier_off(netdev); @@@ -1868,14 -1843,13 +1868,15 @@@
return 0;
+out_unlock: + mutex_unlock(&alx->mtx); out_unmap: iounmap(hw->hw_addr); out_free_netdev: free_netdev(netdev); out_pci_release: pci_release_mem_regions(pdev); + pci_disable_pcie_error_reporting(pdev); out_pci_disable: pci_disable_device(pdev); return err; @@@ -1896,8 -1870,6 +1897,8 @@@ static void alx_remove(struct pci_dev * pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev);
+ mutex_destroy(&alx->mtx); + free_netdev(alx->dev); }
@@@ -1909,11 -1881,7 +1910,11 @@@ static int alx_suspend(struct device *d if (!netif_running(alx->dev)) return 0; netif_device_detach(alx->dev); + + mutex_lock(&alx->mtx); __alx_stop(alx); + mutex_unlock(&alx->mtx); + return 0; }
@@@ -1923,23 -1891,20 +1924,23 @@@ static int alx_resume(struct device *de struct alx_hw *hw = &alx->hw; int err;
+ mutex_lock(&alx->mtx); alx_reset_phy(hw);
- if (!netif_running(alx->dev)) - return 0; + if (!netif_running(alx->dev)) { + err = 0; + goto unlock; + }
- rtnl_lock(); err = __alx_open(alx, true); - rtnl_unlock(); if (err) - return err; + goto unlock;
netif_device_attach(alx->dev);
- return 0; +unlock: + mutex_unlock(&alx->mtx); + return err; }
static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); @@@ -1958,7 -1923,7 +1959,7 @@@ static pci_ers_result_t alx_pci_error_d
dev_info(&pdev->dev, "pci error detected\n");
- rtnl_lock(); + mutex_lock(&alx->mtx);
if (netif_running(netdev)) { netif_device_detach(netdev); @@@ -1970,7 -1935,7 +1971,7 @@@ else pci_disable_device(pdev);
- rtnl_unlock(); + mutex_unlock(&alx->mtx);
return rc; } @@@ -1983,7 -1948,7 +1984,7 @@@ static pci_ers_result_t alx_pci_error_s
dev_info(&pdev->dev, "pci error slot reset\n");
- rtnl_lock(); + mutex_lock(&alx->mtx);
if (pci_enable_device(pdev)) { dev_err(&pdev->dev, "Failed to re-enable PCI device after reset\n"); @@@ -1996,7 -1961,7 +1997,7 @@@ if (!alx_reset_mac(hw)) rc = PCI_ERS_RESULT_RECOVERED; out: - rtnl_unlock(); + mutex_unlock(&alx->mtx);
return rc; } @@@ -2008,14 -1973,14 +2009,14 @@@ static void alx_pci_error_resume(struc
dev_info(&pdev->dev, "pci error resume\n");
- rtnl_lock(); + mutex_lock(&alx->mtx);
if (netif_running(netdev)) { alx_activate(alx); netif_device_attach(netdev); }
- rtnl_unlock(); + mutex_unlock(&alx->mtx); }
static const struct pci_error_handlers alx_err_handlers = { diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 6479ceedc352,762113a04dde..9a2b166d651e --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@@ -3894,6 -3894,7 +3894,6 @@@ static const struct net_device_ops cxgb .ndo_set_vf_vlan = cxgb4_mgmt_set_vf_vlan, .ndo_set_vf_link_state = cxgb4_mgmt_set_vf_link_state, }; -#endif
static void cxgb4_mgmt_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) @@@ -3908,7 -3909,6 +3908,7 @@@ static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = { .get_drvinfo = cxgb4_mgmt_get_drvinfo, }; +#endif
static void notify_fatal_err(struct work_struct *work) { @@@ -4424,10 -4424,8 +4424,8 @@@ static int adap_init0_phy(struct adapte
/* Load PHY Firmware onto adapter. */ - spin_lock_bh(&adap->win0_lock); ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version, (u8 *)phyf->data, phyf->size); - spin_unlock_bh(&adap->win0_lock); if (ret < 0) dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n", -ret); diff --combined drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 9e3ea5f7be2e,a0555f4d76fc..6606fb8b3e42 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@@ -3060,16 -3060,19 +3060,19 @@@ int t4_read_flash(struct adapter *adapt * @addr: the start address to write * @n: length of data to write in bytes * @data: the data to write + * @byte_oriented: whether to store data as bytes or as words * * Writes up to a page of data (256 bytes) to the serial flash starting * at the given address. All the data must be written to the same page. + * If @byte_oriented is set the write data is stored as byte stream + * (i.e. matches what on disk), otherwise in big-endian. */ static int t4_write_flash(struct adapter *adapter, unsigned int addr, - unsigned int n, const u8 *data) + unsigned int n, const u8 *data, bool byte_oriented) { - int ret; - u32 buf[64]; unsigned int i, c, left, val, offset = addr & 0xff; + u32 buf[64]; + int ret;
if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE) return -EINVAL; @@@ -3080,10 -3083,14 +3083,14 @@@ (ret = sf1_write(adapter, 4, 1, 1, val)) != 0) goto unlock;
- for (left = n; left; left -= c) { + for (left = n; left; left -= c, data += c) { c = min(left, 4U); - for (val = 0, i = 0; i < c; ++i) - val = (val << 8) + *data++; + for (val = 0, i = 0; i < c; ++i) { + if (byte_oriented) + val = (val << 8) + data[i]; + else + val = (val << 8) + data[c - i - 1]; + }
ret = sf1_write(adapter, c, c != left, 1, val); if (ret) @@@ -3096,7 -3103,8 +3103,8 @@@ t4_write_reg(adapter, SF_OP_A, 0); /* unlock SF */
/* Read the page to verify the write succeeded */ - ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1); + ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, + byte_oriented); if (ret) return ret;
@@@ -3692,7 -3700,7 +3700,7 @@@ int t4_load_fw(struct adapter *adap, co */ memcpy(first_page, fw_data, SF_PAGE_SIZE); ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff); - ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page); + ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true); if (ret) goto out;
@@@ -3700,14 -3708,14 +3708,14 @@@ for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; fw_data += SF_PAGE_SIZE; - ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data); + ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true); if (ret) goto out; }
- ret = t4_write_flash(adap, - fw_start + offsetof(struct fw_hdr, fw_ver), - sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver); + ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver), + sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver, + true); out: if (ret) dev_err(adap->pdev_dev, "firmware download failed, error %d\n", @@@ -3812,9 -3820,11 +3820,11 @@@ int t4_load_phy_fw(struct adapter *adap /* Copy the supplied PHY Firmware image to the adapter memory location * allocated by the adapter firmware. */ + spin_lock_bh(&adap->win0_lock); ret = t4_memory_rw(adap, win, mtype, maddr, phy_fw_size, (__be32 *)phy_fw_data, T4_MEMORY_WRITE); + spin_unlock_bh(&adap->win0_lock); if (ret) return ret;
@@@ -6983,7 -6993,7 +6993,7 @@@ int t4_fw_bye(struct adapter *adap, uns }
/** - * t4_init_cmd - ask FW to initialize the device + * t4_early_init - ask FW to initialize the device * @adap: the adapter * @mbox: mailbox to use for the FW command * @@@ -7782,6 -7792,7 +7792,6 @@@ int t4_free_encap_mac_filt(struct adapt int idx, bool sleep_ok) { struct fw_vi_mac_exact *p; - u8 addr[] = {0, 0, 0, 0, 0, 0}; struct fw_vi_mac_cmd c; int ret = 0; u32 exact; @@@ -7798,7 -7809,7 +7808,7 @@@ p = c.u.exact; p->valid_to_idx = cpu_to_be16(FW_VI_MAC_CMD_VALID_F | FW_VI_MAC_CMD_IDX_V(idx)); - memcpy(p->macaddr, addr, sizeof(p->macaddr)); + eth_zero_addr(p->macaddr); ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok); return ret; } @@@ -10207,7 -10218,7 +10217,7 @@@ int t4_load_cfg(struct adapter *adap, c n = size - i; else n = SF_PAGE_SIZE; - ret = t4_write_flash(adap, addr, n, cfg_data); + ret = t4_write_flash(adap, addr, n, cfg_data, true); if (ret) goto out;
@@@ -10223,7 -10234,7 +10233,7 @@@ out }
/** - * t4_set_vf_mac - Set MAC address for the specified VF + * t4_set_vf_mac_acl - Set MAC address for the specified VF * @adapter: The adapter * @vf: one of the VFs instantiated by the specified PF * @naddr: the number of MAC addresses @@@ -10676,13 -10687,14 +10686,14 @@@ int t4_load_boot(struct adapter *adap, for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) { addr += SF_PAGE_SIZE; boot_data += SF_PAGE_SIZE; - ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data); + ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data, + false); if (ret) goto out; }
ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE, - (const u8 *)header); + (const u8 *)header, false);
out: if (ret) @@@ -10757,7 -10769,7 +10768,7 @@@ int t4_load_bootcfg(struct adapter *ada for (i = 0; i < size; i += SF_PAGE_SIZE) { n = min_t(u32, size - i, SF_PAGE_SIZE);
- ret = t4_write_flash(adap, addr, n, cfg_data); + ret = t4_write_flash(adap, addr, n, cfg_data, false); if (ret) goto out;
@@@ -10769,7 -10781,8 +10780,8 @@@ for (i = 0; i < npad; i++) { u8 data = 0;
- ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data); + ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data, + false); if (ret) goto out; } diff --combined drivers/net/ethernet/intel/ice/ice_lib.c index cb858be8f4de,27f9dac8719c..dde9802c6c72 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@@ -169,13 -169,12 +169,13 @@@ static void ice_vsi_set_num_qs(struct i
switch (vsi->type) { case ICE_VSI_PF: - vsi->alloc_txq = min3(pf->num_lan_msix, - ice_get_avail_txq_count(pf), - (u16)num_online_cpus()); if (vsi->req_txq) { vsi->alloc_txq = vsi->req_txq; vsi->num_txq = vsi->req_txq; + } else { + vsi->alloc_txq = min3(pf->num_lan_msix, + ice_get_avail_txq_count(pf), + (u16)num_online_cpus()); }
pf->num_lan_tx = vsi->alloc_txq; @@@ -184,13 -183,12 +184,13 @@@ if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { vsi->alloc_rxq = 1; } else { - vsi->alloc_rxq = min3(pf->num_lan_msix, - ice_get_avail_rxq_count(pf), - (u16)num_online_cpus()); if (vsi->req_rxq) { vsi->alloc_rxq = vsi->req_rxq; vsi->num_rxq = vsi->req_rxq; + } else { + vsi->alloc_rxq = min3(pf->num_lan_msix, + ice_get_avail_rxq_count(pf), + (u16)num_online_cpus()); } }
@@@ -630,17 -628,6 +630,17 @@@ bool ice_is_safe_mode(struct ice_pf *pf return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags); }
+/** + * ice_is_aux_ena + * @pf: pointer to the PF struct + * + * returns true if AUX devices/drivers are supported, false otherwise + */ +bool ice_is_aux_ena(struct ice_pf *pf) +{ + return test_bit(ICE_FLAG_AUX_ENA, pf->flags); +} + /** * ice_vsi_clean_rss_flow_fld - Delete RSS configuration * @vsi: the VSI being cleaned up @@@ -1205,11 -1192,11 +1205,11 @@@ static int ice_vsi_setup_vector_base(st num_q_vectors = vsi->num_q_vectors; /* reserve slots from OS requested IRQs */ if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { - struct ice_vf *vf; int i;
ice_for_each_vf(pf, i) { - vf = &pf->vf[i]; + struct ice_vf *vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) { base = pf->vsi[vf->ctrl_vsi_idx]->base_vector; break; @@@ -1298,7 -1285,6 +1298,7 @@@ static int ice_vsi_alloc_rings(struct i ring->reg_idx = vsi->txq_map[i]; ring->ring_active = false; ring->vsi = vsi; + ring->tx_tstamps = &pf->ptp.port.tx; ring->dev = dev; ring->count = vsi->num_tx_desc; WRITE_ONCE(vsi->tx_rings[i], ring); @@@ -1676,11 -1662,9 +1676,11 @@@ void ice_vsi_cfg_frame_size(struct ice_ * @pf_q: index of the Rx queue in the PF's queue space * @rxdid: flexible descriptor RXDID * @prio: priority for the RXDID for this queue + * @ena_ts: true to enable timestamp and false to disable timestamp */ void -ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio) +ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio, + bool ena_ts) { int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
@@@ -1695,40 -1679,9 +1695,40 @@@ regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) & QRXFLXP_CNTXT_RXDID_PRIO_M;
+ if (ena_ts) + /* Enable TimeSync on this queue */ + regval |= QRXFLXP_CNTXT_TS_M; + wr32(hw, QRXFLXP_CNTXT(pf_q), regval); }
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) +{ + if (q_idx >= vsi->num_rxq) + return -EINVAL; + + return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]); +} + +int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx) +{ + struct ice_aqc_add_tx_qgrp *qg_buf; + int err; + + if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx]) + return -EINVAL; + + qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL); + if (!qg_buf) + return -ENOMEM; + + qg_buf->num_txqs = 1; + + err = ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf); + kfree(qg_buf); + return err; +} + /** * ice_vsi_cfg_rxqs - Configure the VSI for Rx * @vsi: the VSI being configured @@@ -1746,11 -1699,15 +1746,11 @@@ int ice_vsi_cfg_rxqs(struct ice_vsi *vs ice_vsi_cfg_frame_size(vsi); setup_rings: /* set up individual rings */ - for (i = 0; i < vsi->num_rxq; i++) { - int err; + ice_for_each_rxq(vsi, i) { + int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]);
- err = ice_setup_rx_ctx(vsi->rx_rings[i]); - if (err) { - dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n", - i, err); + if (err) return err; - } }
return 0; @@@ -1760,12 -1717,13 +1760,13 @@@ * ice_vsi_cfg_txqs - Configure the VSI for Tx * @vsi: the VSI being configured * @rings: Tx ring array to be configured + * @count: number of Tx ring array elements * * Return 0 on success and a negative value on error * Configure the Tx VSI for operation. */ static int - ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings) + ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count) { struct ice_aqc_add_tx_qgrp *qg_buf; u16 q_idx = 0; @@@ -1777,7 -1735,7 +1778,7 @@@
qg_buf->num_txqs = 1;
- for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { + for (q_idx = 0; q_idx < count; q_idx++) { err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf); if (err) goto err_cfg_txqs; @@@ -1797,7 -1755,7 +1798,7 @@@ err_cfg_txqs */ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi) { - return ice_vsi_cfg_txqs(vsi, vsi->tx_rings); + return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq); }
/** @@@ -1812,7 -1770,7 +1813,7 @@@ int ice_vsi_cfg_xdp_txqs(struct ice_vs int ret; int i;
- ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings); + ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq); if (ret) return ret;
@@@ -2052,17 -2010,18 +2053,18 @@@ int ice_vsi_stop_all_rx_rings(struct ic * @rst_src: reset source * @rel_vmvf_num: Relative ID of VF/VM * @rings: Tx ring array to be stopped + * @count: number of Tx ring array elements */ static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring **rings) + u16 rel_vmvf_num, struct ice_ring **rings, u16 count) { u16 q_idx;
if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS) return -EINVAL;
- for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) { + for (q_idx = 0; q_idx < count; q_idx++) { struct ice_txq_meta txq_meta = { }; int status;
@@@ -2090,7 -2049,7 +2092,7 @@@ in ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, u16 rel_vmvf_num) { - return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings); + return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq); }
/** @@@ -2099,7 -2058,7 +2101,7 @@@ */ int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi) { - return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings); + return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq); }
/** @@@ -2258,7 -2217,7 +2260,7 @@@ void ice_cfg_sw_lldp(struct ice_vsi *vs }
if (status) - dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n", + dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n", create ? "adding" : "removing", tx ? "TX" : "RX", vsi->vsi_num, ice_stat_str(status)); } @@@ -2873,11 -2832,11 +2875,11 @@@ int ice_vsi_release(struct ice_vsi *vsi * cleared in the same manner. */ if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) { - struct ice_vf *vf; int i;
ice_for_each_vf(pf, i) { - vf = &pf->vf[i]; + struct ice_vf *vf = &pf->vf[i]; + if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) break; } @@@ -3237,34 -3196,6 +3239,34 @@@ bool ice_is_reset_in_progress(unsigned test_bit(ICE_GLOBR_REQ, state); }
+/** + * ice_wait_for_reset - Wait for driver to finish reset and rebuild + * @pf: pointer to the PF structure + * @timeout: length of time to wait, in jiffies + * + * Wait (sleep) for a short time until the driver finishes cleaning up from + * a device reset. The caller must be able to sleep. Use this to delay + * operations that could fail while the driver is cleaning up after a device + * reset. + * + * Returns 0 on success, -EBUSY if the reset is not finished within the + * timeout, and -ERESTARTSYS if the thread was interrupted. + */ +int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout) +{ + long ret; + + ret = wait_event_interruptible_timeout(pf->reset_wait_queue, + !ice_is_reset_in_progress(pf->state), + timeout); + if (ret < 0) + return ret; + else if (!ret) + return -EBUSY; + else + return 0; +} + #ifdef CONFIG_DCB /** * ice_vsi_update_q_map - update our copy of the VSI info with new queue map @@@ -3399,22 -3330,13 +3401,22 @@@ int ice_status_to_errno(enum ice_statu case ICE_ERR_DOES_NOT_EXIST: return -ENOENT; case ICE_ERR_OUT_OF_RANGE: - return -ENOTTY; + case ICE_ERR_AQ_ERROR: + case ICE_ERR_AQ_TIMEOUT: + case ICE_ERR_AQ_EMPTY: + case ICE_ERR_AQ_FW_CRITICAL: + return -EIO; case ICE_ERR_PARAM: + case ICE_ERR_INVAL_SIZE: return -EINVAL; case ICE_ERR_NO_MEMORY: return -ENOMEM; case ICE_ERR_MAX_LIMIT: return -EAGAIN; + case ICE_ERR_RESET_ONGOING: + return -EBUSY; + case ICE_ERR_AQ_FULL: + return -ENOSPC; default: return -EINVAL; } diff --combined drivers/net/ethernet/intel/ice/ice_main.c index 5ca6c0356499,0eb2307325d3..5c3ea504770a --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@@ -35,8 -35,6 +35,8 @@@ MODULE_PARM_DESC(debug, "netif level (0 MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); #endif /* !CONFIG_DYNAMIC_DEBUG */
+static DEFINE_IDA(ice_aux_ida); + static struct workqueue_struct *ice_wq; static const struct net_device_ops ice_netdev_safe_mode_ops; static const struct net_device_ops ice_netdev_ops; @@@ -456,8 -454,6 +456,8 @@@ ice_prepare_for_reset(struct ice_pf *pf if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return;
+ ice_unplug_aux_dev(pf); + /* Notify VFs of impending reset */ if (ice_check_sq_alive(hw, &hw->mailboxq)) ice_vc_notify_reset(pf); @@@ -471,9 -467,6 +471,9 @@@ /* disable the VSIs and their queues that are not already DOWN */ ice_pf_dis_all_vsi(pf, false);
+ if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_release(pf); + if (hw->port_info) ice_sched_clear_port(hw->port_info);
@@@ -506,7 -499,6 +506,7 @@@ static void ice_do_reset(struct ice_pf clear_bit(ICE_PFR_REQ, pf->state); clear_bit(ICE_CORER_REQ, pf->state); clear_bit(ICE_GLOBR_REQ, pf->state); + wake_up(&pf->reset_wait_queue); return; }
@@@ -519,7 -511,6 +519,7 @@@ ice_rebuild(pf, reset_type); clear_bit(ICE_PREPARED_FOR_RESET, pf->state); clear_bit(ICE_PFR_REQ, pf->state); + wake_up(&pf->reset_wait_queue); ice_reset_all_vfs(pf, true); } } @@@ -570,7 -561,6 +570,7 @@@ static void ice_reset_subtask(struct ic clear_bit(ICE_PFR_REQ, pf->state); clear_bit(ICE_CORER_REQ, pf->state); clear_bit(ICE_GLOBR_REQ, pf->state); + wake_up(&pf->reset_wait_queue); ice_reset_all_vfs(pf, true); }
@@@ -867,38 -857,6 +867,38 @@@ static void ice_set_dflt_mib(struct ice kfree(lldpmib); }
+/** + * ice_check_module_power + * @pf: pointer to PF struct + * @link_cfg_err: bitmap from the link info structure + * + * check module power level returned by a previous call to aq_get_link_info + * and print error messages if module power level is not supported + */ +static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err) +{ + /* if module power level is supported, clear the flag */ + if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT | + ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) { + clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); + return; + } + + /* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the + * above block didn't clear this bit, there's nothing to do + */ + if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags)) + return; + + if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) { + dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n"); + set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); + } else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) { + dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n"); + set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags); + } +} + /** * ice_link_event - process the link event * @pf: PF that the link event is associated with @@@ -934,8 -892,6 +934,8 @@@ ice_link_event(struct ice_pf *pf, struc pi->lport, ice_stat_str(status), ice_aq_str(pi->hw->adminq.sq_last_status));
+ ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + /* Check if the link state is up after updating link info, and treat * this event as an UP event since the link is actually UP now. */ @@@ -1234,10 -1190,6 +1234,10 @@@ static int __ice_clean_ctrlq(struct ice cq = &hw->adminq; qtype = "Admin"; break; + case ICE_CTL_Q_SB: + cq = &hw->sbq; + qtype = "Sideband"; + break; case ICE_CTL_Q_MAILBOX: cq = &hw->mailboxq; qtype = "Mailbox"; @@@ -1411,34 -1363,6 +1411,34 @@@ static void ice_clean_mailboxq_subtask( ice_flush(hw); }
+/** + * ice_clean_sbq_subtask - clean the Sideband Queue rings + * @pf: board private structure + */ +static void ice_clean_sbq_subtask(struct ice_pf *pf) +{ + struct ice_hw *hw = &pf->hw; + + /* Nothing to do here if sideband queue is not supported */ + if (!ice_is_sbq_supported(hw)) { + clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); + return; + } + + if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state)) + return; + + if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB)) + return; + + clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state); + + if (ice_ctrlq_pending(hw, &hw->sbq)) + __ice_clean_ctrlq(pf, ICE_CTL_Q_SB); + + ice_flush(hw); +} + /** * ice_service_task_schedule - schedule the service task to wake up * @pf: board private structure @@@ -2082,8 -2006,6 +2082,8 @@@ static void ice_check_media_subtask(str if (err) return;
+ ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) ice_init_phy_user_cfg(pi); @@@ -2141,7 -2063,6 +2141,7 @@@ static void ice_service_task(struct wor
ice_process_vflr_event(pf); ice_clean_mailboxq_subtask(pf); + ice_clean_sbq_subtask(pf); ice_sync_arfs_fltrs(pf); ice_flush_fdir_ctx(pf);
@@@ -2157,7 -2078,6 +2157,7 @@@ test_bit(ICE_VFLR_EVENT_PENDING, pf->state) || test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) || test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) || + test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) || test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state)) mod_timer(&pf->serv_tmr, jiffies); } @@@ -2176,10 -2096,6 +2176,10 @@@ static void ice_set_ctrlq_len(struct ic hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN; hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN; hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN; + hw->sbq.num_rq_entries = ICE_SBQ_LEN; + hw->sbq.num_sq_entries = ICE_SBQ_LEN; + hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN; + hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN; }
/** @@@ -2202,8 -2118,6 +2202,8 @@@ int ice_schedule_reset(struct ice_pf *p return -EBUSY; }
+ ice_unplug_aux_dev(pf); + switch (reset) { case ICE_RESET_PFR: set_bit(ICE_PFR_REQ, pf->state); @@@ -2641,6 -2555,20 +2641,20 @@@ ice_xdp_setup_prog(struct ice_vsi *vsi return (ret || xdp_ring_err) ? -ENOMEM : 0; }
+ /** + * ice_xdp_safe_mode - XDP handler for safe mode + * @dev: netdevice + * @xdp: XDP command + */ + static int ice_xdp_safe_mode(struct net_device __always_unused *dev, + struct netdev_bpf *xdp) + { + NL_SET_ERR_MSG_MOD(xdp->extack, + "Please provide working DDP firmware package in order to use XDP\n" + "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst"); + return -EOPNOTSUPP; + } + /** * ice_xdp - implements XDP handler * @dev: netdevice @@@ -2694,7 -2622,6 +2708,7 @@@ static void ice_ena_misc_vector(struct PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_VFLR_M | PFINT_OICR_HMC_ERR_M | + PFINT_OICR_PE_PUSH_M | PFINT_OICR_PE_CRITERR_M);
wr32(hw, PFINT_OICR_ENA, val); @@@ -2720,7 -2647,6 +2734,7 @@@ static irqreturn_t ice_misc_intr(int __ dev = ice_pf_to_dev(pf); set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state); set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state); + set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
oicr = rd32(hw, PFINT_OICR); ena_mask = rd32(hw, PFINT_OICR_ENA); @@@ -2766,6 -2692,8 +2780,6 @@@
/* If a reset cycle isn't already in progress, we set a bit in * pf->state so that the service task can start a reset/rebuild. - * We also make note of which reset happened so that peer - * devices/drivers can be informed. */ if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) { if (reset == ICE_RESET_CORER) @@@ -2792,24 -2720,11 +2806,24 @@@ } }
- if (oicr & PFINT_OICR_HMC_ERR_M) { - ena_mask &= ~PFINT_OICR_HMC_ERR_M; - dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n", - rd32(hw, PFHMC_ERRORINFO), - rd32(hw, PFHMC_ERRORDATA)); + if (oicr & PFINT_OICR_TSYN_TX_M) { + ena_mask &= ~PFINT_OICR_TSYN_TX_M; + ice_ptp_process_ts(pf); + } + +#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M) + if (oicr & ICE_AUX_CRIT_ERR) { + struct iidc_event *event; + + ena_mask &= ~ICE_AUX_CRIT_ERR; + event = kzalloc(sizeof(*event), GFP_KERNEL); + if (event) { + set_bit(IIDC_EVENT_CRIT_ERR, event->type); + /* report the entire OICR value to AUX driver */ + event->reg = oicr; + ice_send_event_to_aux(pf, event); + kfree(event); + } }
/* Report any remaining unexpected interrupts */ @@@ -2819,7 -2734,8 +2833,7 @@@ /* If a critical error is pending there is no choice but to * reset the device. */ - if (oicr & (PFINT_OICR_PE_CRITERR_M | - PFINT_OICR_PCI_EXCEPTION_M | + if (oicr & (PFINT_OICR_PCI_EXCEPTION_M | PFINT_OICR_ECC_ERR_M)) { set_bit(ICE_PFR_REQ, pf->state); ice_service_task_schedule(pf); @@@ -2847,9 -2763,6 +2861,9 @@@ static void ice_dis_ctrlq_interrupts(st wr32(hw, PFINT_MBX_CTL, rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
+ wr32(hw, PFINT_SB_CTL, + rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M); + /* disable Control queue Interrupt causes */ wr32(hw, PFINT_OICR_CTL, rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M); @@@ -2904,11 -2817,6 +2918,11 @@@ static void ice_ena_ctrlq_interrupts(st PFINT_MBX_CTL_CAUSE_ENA_M); wr32(hw, PFINT_MBX_CTL, val);
+ /* This enables Sideband queue Interrupt causes */ + val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) | + PFINT_SB_CTL_CAUSE_ENA_M); + wr32(hw, PFINT_SB_CTL, val); + ice_flush(hw); }
@@@ -3078,6 -2986,7 +3092,6 @@@ static void ice_set_netdev_features(str */ static int ice_cfg_netdev(struct ice_vsi *vsi) { - struct ice_pf *pf = vsi->back; struct ice_netdev_priv *np; struct net_device *netdev; u8 mac_addr[ETH_ALEN]; @@@ -3097,7 -3006,7 +3111,7 @@@ ice_set_ops(netdev);
if (vsi->type == ICE_VSI_PF) { - SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf)); + SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back)); ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); ether_addr_copy(netdev->dev_addr, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); @@@ -3371,9 -3280,6 +3385,9 @@@ static void ice_deinit_pf(struct ice_p bitmap_free(pf->avail_rxqs); pf->avail_rxqs = NULL; } + + if (pf->ptp.clock) + ptp_clock_unregister(pf->ptp.clock); }
/** @@@ -3384,12 -3290,6 +3398,12 @@@ static void ice_set_pf_caps(struct ice_ { struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
+ clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + clear_bit(ICE_FLAG_AUX_ENA, pf->flags); + if (func_caps->common_cap.rdma) { + set_bit(ICE_FLAG_RDMA_ENA, pf->flags); + set_bit(ICE_FLAG_AUX_ENA, pf->flags); + } clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); if (func_caps->common_cap.dcb) set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags); @@@ -3420,10 -3320,6 +3434,10 @@@ func_caps->fd_fltr_best_effort); }
+ clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); + if (func_caps->common_cap.ieee_1588) + set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags); + pf->max_pf_txqs = func_caps->common_cap.num_txq; pf->max_pf_rxqs = func_caps->common_cap.num_rxq; } @@@ -3443,8 -3339,6 +3457,8 @@@ static int ice_init_pf(struct ice_pf *p spin_lock_init(&pf->aq_wait_lock); init_waitqueue_head(&pf->aq_wait_queue);
+ init_waitqueue_head(&pf->reset_wait_queue); + /* setup service timer and periodic service task */ timer_setup(&pf->serv_tmr, ice_service_timer, 0); pf->serv_tmr_period = HZ; @@@ -3475,12 -3369,11 +3489,12 @@@ */ static int ice_ena_msix_range(struct ice_pf *pf) { - int v_left, v_actual, v_other, v_budget = 0; + int num_cpus, v_left, v_actual, v_other, v_budget = 0; struct device *dev = ice_pf_to_dev(pf); int needed, err, i;
v_left = pf->hw.func_caps.common_cap.num_msix_vectors; + num_cpus = num_online_cpus();
/* reserve for LAN miscellaneous handler */ needed = ICE_MIN_LAN_OICR_MSIX; @@@ -3502,23 -3395,13 +3516,23 @@@ v_other = v_budget;
/* reserve vectors for LAN traffic */ - needed = min_t(int, num_online_cpus(), v_left); + needed = num_cpus; if (v_left < needed) goto no_hw_vecs_left_err; pf->num_lan_msix = needed; v_budget += needed; v_left -= needed;
+ /* reserve vectors for RDMA auxiliary driver */ + if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) { + needed = num_cpus + ICE_RDMA_NUM_AEQ_MSIX; + if (v_left < needed) + goto no_hw_vecs_left_err; + pf->num_rdma_msix = needed; + v_budget += needed; + v_left -= needed; + } + pf->msix_entries = devm_kcalloc(dev, v_budget, sizeof(*pf->msix_entries), GFP_KERNEL); if (!pf->msix_entries) { @@@ -3548,46 -3431,16 +3562,46 @@@ err = -ERANGE; goto msix_err; } else { - int v_traffic = v_actual - v_other; + int v_remain = v_actual - v_other; + int v_rdma = 0, v_min_rdma = 0; + + if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) { + /* Need at least 1 interrupt in addition to + * AEQ MSIX + */ + v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1; + v_min_rdma = ICE_MIN_RDMA_MSIX; + }
if (v_actual == ICE_MIN_MSIX || - v_traffic < ICE_MIN_LAN_TXRX_MSIX) + v_remain < ICE_MIN_LAN_TXRX_MSIX + v_min_rdma) { + dev_warn(dev, "Not enough MSI-X vectors to support RDMA.\n"); + clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); + + pf->num_rdma_msix = 0; pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX; - else - pf->num_lan_msix = v_traffic; + } else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) || + (v_remain - v_rdma < v_rdma)) { + /* Support minimum RDMA and give remaining + * vectors to LAN MSIX + */ + pf->num_rdma_msix = v_min_rdma; + pf->num_lan_msix = v_remain - v_min_rdma; + } else { + /* Split remaining MSIX with RDMA after + * accounting for AEQ MSIX + */ + pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 + + ICE_RDMA_NUM_AEQ_MSIX; + pf->num_lan_msix = v_remain - pf->num_rdma_msix; + }
dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n", pf->num_lan_msix); + + if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) + dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n", + pf->num_rdma_msix); } }
@@@ -3602,7 -3455,6 +3616,7 @@@ no_hw_vecs_left_err needed, v_left); err = -ERANGE; exit_err: + pf->num_rdma_msix = 0; pf->num_lan_msix = 0; return err; } @@@ -4366,8 -4218,6 +4380,8 @@@ ice_probe(struct pci_dev *pdev, const s
ice_init_link_dflt_override(pf->hw.port_info);
+ ice_check_module_power(pf, pf->hw.port_info->phy.link_info.link_cfg_err); + /* if media available, initialize PHY settings */ if (pf->hw.port_info->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { @@@ -4406,8 -4256,6 +4420,8 @@@ }
/* initialize DDP driven features */ + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_init(pf);
/* Note: Flow director init failure is non-fatal to load */ if (ice_init_fdir(pf)) @@@ -4434,29 -4282,8 +4448,29 @@@ probe_done
/* ready to go, so clear down state bit */ clear_bit(ICE_DOWN, pf->state); + if (ice_is_aux_ena(pf)) { + pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL); + if (pf->aux_idx < 0) { + dev_err(dev, "Failed to allocate device ID for AUX driver\n"); + err = -ENOMEM; + goto err_netdev_reg; + } + + err = ice_init_rdma(pf); + if (err) { + dev_err(dev, "Failed to initialize RDMA: %d\n", err); + err = -EIO; + goto err_init_aux_unroll; + } + } else { + dev_warn(dev, "RDMA is not supported on this device\n"); + } + return 0;
+err_init_aux_unroll: + pf->adev = NULL; + ida_free(&ice_aux_ida, pf->aux_idx); err_netdev_reg: err_send_version_unroll: ice_vsi_release_all(pf); @@@ -4566,17 -4393,13 +4580,17 @@@ static void ice_remove(struct pci_dev * ice_free_vfs(pf); }
- set_bit(ICE_DOWN, pf->state); ice_service_task_stop(pf);
ice_aq_cancel_waiting_tasks(pf); + ice_unplug_aux_dev(pf); + ida_free(&ice_aux_ida, pf->aux_idx); + set_bit(ICE_DOWN, pf->state);
mutex_destroy(&(&pf->hw)->fdir_fltr_lock); ice_deinit_lag(pf); + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_release(pf); if (!ice_is_safe_mode(pf)) ice_remove_arfs(pf); ice_setup_mc_magic_wake(pf); @@@ -4729,8 -4552,6 +4743,8 @@@ static int __maybe_unused ice_suspend(s */ disabled = ice_service_task_stop(pf);
+ ice_unplug_aux_dev(pf); + /* Already suspended?, then there is nothing to do */ if (test_and_set_bit(ICE_SUSPENDED, pf->state)) { if (!disabled) @@@ -5630,6 -5451,7 +5644,6 @@@ ice_update_vsi_tx_ring_stats(struct ice static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) { struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; - struct ice_ring *ring; u64 pkts, bytes; int i;
@@@ -5653,8 -5475,7 +5667,8 @@@
/* update Rx rings counters */ ice_for_each_rxq(vsi, i) { - ring = READ_ONCE(vsi->rx_rings[i]); + struct ice_ring *ring = READ_ONCE(vsi->rx_rings[i]); + ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; @@@ -6321,12 -6142,6 +6335,12 @@@ static void ice_rebuild(struct ice_pf *
ice_clear_pxe_mode(hw);
+ ret = ice_init_nvm(hw); + if (ret) { + dev_err(dev, "ice_init_nvm failed %s\n", ice_stat_str(ret)); + goto err_init_ctrlq; + } + ret = ice_get_caps(hw); if (ret) { dev_err(dev, "ice_get_caps failed %s\n", ice_stat_str(ret)); @@@ -6368,13 -6183,6 +6382,13 @@@ if (test_bit(ICE_FLAG_DCB_ENA, pf->flags)) ice_dcb_rebuild(pf);
+ /* If the PF previously had enabled PTP, PTP init needs to happen before + * the VSI rebuild. If not, this causes the PTP link status events to + * fail. + */ + if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags)) + ice_ptp_init(pf); + /* rebuild PF VSI */ err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF); if (err) { @@@ -6414,8 -6222,6 +6428,8 @@@
/* if we get here, reset flow is successful */ clear_bit(ICE_RESET_FAILED, pf->state); + + ice_plug_aux_dev(pf); return;
err_vsi_rebuild: @@@ -6454,9 -6260,7 +6468,9 @@@ static int ice_change_mtu(struct net_de struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; + struct iidc_event *event; u8 count = 0; + int err = 0;
if (new_mtu == (int)netdev->mtu) { netdev_warn(netdev, "MTU is already %u\n", netdev->mtu); @@@ -6489,59 -6293,27 +6503,59 @@@ return -EBUSY; }
+ event = kzalloc(sizeof(*event), GFP_KERNEL); + if (!event) + return -ENOMEM; + + set_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type); + ice_send_event_to_aux(pf, event); + clear_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type); + netdev->mtu = (unsigned int)new_mtu;
/* if VSI is up, bring it down and then back up */ if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) { - int err; - err = ice_down(vsi); if (err) { netdev_err(netdev, "change MTU if_down err %d\n", err); - return err; + goto event_after; }
err = ice_up(vsi); if (err) { netdev_err(netdev, "change MTU if_up err %d\n", err); - return err; + goto event_after; } }
netdev_dbg(netdev, "changed MTU to %d\n", new_mtu); - return 0; +event_after: + set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type); + ice_send_event_to_aux(pf, event); + kfree(event); + + return err; +} + +/** + * ice_do_ioctl - Access the hwtstamp interface + * @netdev: network interface device structure + * @ifr: interface request data + * @cmd: ioctl command + */ +static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + switch (cmd) { + case SIOCGHWTSTAMP: + return ice_ptp_get_ts_config(pf, ifr); + case SIOCSHWTSTAMP: + return ice_ptp_set_ts_config(pf, ifr); + default: + return -EOPNOTSUPP; + } }
/** @@@ -7060,8 -6832,6 +7074,8 @@@ int ice_open_internal(struct net_devic return -EIO; }
+ ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + /* Set PHY if there is media, otherwise, turn off PHY */ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { clear_bit(ICE_FLAG_NO_MEDIA, pf->flags); @@@ -7181,6 -6951,7 +7195,7 @@@ static const struct net_device_ops ice_ .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_tx_timeout = ice_tx_timeout, + .ndo_bpf = ice_xdp_safe_mode, };
static const struct net_device_ops ice_netdev_ops = { @@@ -7194,7 -6965,6 +7209,7 @@@ .ndo_change_mtu = ice_change_mtu, .ndo_get_stats64 = ice_get_stats64, .ndo_set_tx_maxrate = ice_set_tx_maxrate, + .ndo_do_ioctl = ice_do_ioctl, .ndo_set_vf_spoofchk = ice_set_vf_spoofchk, .ndo_set_vf_mac = ice_set_vf_mac, .ndo_get_vf_config = ice_get_vf_cfg, diff --combined drivers/net/ethernet/lantiq_xrx200.c index 27df06ed355e,21ef2f128070..fb78f17d734f --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@@ -154,6 -154,7 +154,7 @@@ static int xrx200_close(struct net_devi
static int xrx200_alloc_skb(struct xrx200_chan *ch) { + struct sk_buff *skb = ch->skb[ch->dma.desc]; dma_addr_t mapping; int ret = 0;
@@@ -168,6 -169,7 +169,7 @@@ XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) { dev_kfree_skb_any(ch->skb[ch->dma.desc]); + ch->skb[ch->dma.desc] = skb; ret = -ENOMEM; goto skip; } @@@ -198,7 -200,6 +200,6 @@@ static int xrx200_hw_receive(struct xrx ch->dma.desc %= LTQ_DESC_NUM;
if (ret) { - ch->skb[ch->dma.desc] = skb; net_dev->stats.rx_dropped++; netdev_err(net_dev, "failed to allocate new rx buffer\n"); return ret; @@@ -352,8 -353,8 +353,8 @@@ static irqreturn_t xrx200_dma_irq(int i struct xrx200_chan *ch = ptr;
if (napi_schedule_prep(&ch->napi)) { - __napi_schedule(&ch->napi); ltq_dma_disable_irq(&ch->dma); + __napi_schedule(&ch->napi); }
ltq_dma_ack_irq(&ch->dma); @@@ -436,6 -437,7 +437,6 @@@ static int xrx200_probe(struct platform { struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; - struct resource *res; struct xrx200_priv *priv; struct net_device *net_dev; int err; @@@ -455,7 -457,13 +456,7 @@@ net_dev->max_mtu = XRX200_DMA_DATA_LEN;
/* load the memory ranges */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "failed to get resources\n"); - return -ENOENT; - } - - priv->pmac_reg = devm_ioremap_resource(dev, res); + priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(priv->pmac_reg)) return PTR_ERR(priv->pmac_reg);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index f0b98f5b2a92,85eaadc989df..059799e4f483 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@@ -94,13 -94,9 +94,9 @@@ void mlx5e_rep_update_flows(struct mlx5
ASSERT_RTNL();
- /* wait for encap to be fully initialized */ - wait_for_completion(&e->res_ready); - mutex_lock(&esw->offloads.encap_tbl_lock); encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID); - if (e->compl_result < 0 || (encap_connected == neigh_connected && - ether_addr_equal(e->h_dest, ha))) + if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha)) goto unlock;
mlx5e_take_all_encap_flows(e, &flow_list); @@@ -617,7 -613,7 +613,7 @@@ static bool mlx5e_restore_skb(struct sk struct mlx5e_tc_update_priv *tc_priv) { struct mlx5e_priv *priv = netdev_priv(skb->dev); - u32 tunnel_id = reg_c1 >> ESW_TUN_OFFSET; + u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
if (chain) { struct mlx5_rep_uplink_priv *uplink_priv; diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 0dfd51d2d178,490131e06efb..2e846b741280 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@@ -120,7 -120,6 +120,7 @@@ void mlx5e_tc_encap_flows_add(struct ml struct list_head *flow_list) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_pkt_reformat_params reformat_params; struct mlx5_esw_flow_attr *esw_attr; struct mlx5_flow_handle *rule; struct mlx5_flow_attr *attr; @@@ -131,12 -130,9 +131,12 @@@ if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE) return;
+ memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = e->reformat_type; + reformat_params.size = e->encap_size; + reformat_params.data = e->encap_header; e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, - e->reformat_type, - e->encap_size, e->encap_header, + &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", @@@ -255,9 -251,12 +255,12 @@@ static void mlx5e_take_all_route_decap_ mlx5e_take_tmp_flow(flow, flow_list, 0); }
+ typedef bool (match_cb)(struct mlx5e_encap_entry *); + static struct mlx5e_encap_entry * - mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, - struct mlx5e_encap_entry *e) + mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e, + match_cb match) { struct mlx5e_encap_entry *next = NULL;
@@@ -292,7 -291,7 +295,7 @@@ retry /* wait for encap to be fully initialized */ wait_for_completion(&next->res_ready); /* continue searching if encap entry is not in valid state after completion */ - if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) { + if (!match(next)) { e = next; goto retry; } @@@ -300,6 -299,30 +303,30 @@@ return next; }
+ static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e) + { + return e->flags & MLX5_ENCAP_ENTRY_VALID; + } + + static struct mlx5e_encap_entry * + mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) + { + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid); + } + + static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e) + { + return e->compl_result >= 0; + } + + struct mlx5e_encap_entry * + mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e) + { + return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized); + } + void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe) { struct mlx5e_neigh *m_neigh = &nhe->m_neigh; @@@ -816,7 -839,6 +843,7 @@@ int mlx5e_attach_decap(struct mlx5e_pri { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr; + struct mlx5_pkt_reformat_params reformat_params; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_decap_entry *d; struct mlx5e_decap_key key; @@@ -858,12 -880,10 +885,12 @@@ hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); mutex_unlock(&esw->offloads.decap_tbl_lock);
+ memset(&reformat_params, 0, sizeof(reformat_params)); + reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2; + reformat_params.size = sizeof(parse_attr->eth); + reformat_params.data = &parse_attr->eth; d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, - MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, - sizeof(parse_attr->eth), - &parse_attr->eth, + &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(d->pkt_reformat)) { err = PTR_ERR(d->pkt_reformat); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 930b225dfe77,d26b8ed51195..7d732fac09f0 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -91,16 -91,12 +91,16 @@@ void mlx5e_update_carrier(struct mlx5e_ { struct mlx5_core_dev *mdev = priv->mdev; u8 port_state; + bool up;
port_state = mlx5_query_vport_state(mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0);
- if (port_state == VPORT_STATE_UP) { + up = port_state == VPORT_STATE_UP; + if (up == netif_carrier_ok(priv->netdev)) + netif_carrier_event(priv->netdev); + if (up) { netdev_info(priv->netdev, "Link up\n"); netif_carrier_on(priv->netdev); } else { @@@ -857,7 -853,7 +857,7 @@@ int mlx5e_open_rq(struct mlx5e_params * if (err) goto err_destroy_rq;
- if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev)) + if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev)) __set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
if (MLX5_CAP_ETH(mdev, cqe_checksum_full)) @@@ -2709,8 -2705,6 +2709,6 @@@ static int mlx5e_update_netdev_queues(s nch = priv->channels.params.num_channels; ntc = priv->channels.params.num_tc; num_rxqs = nch * priv->profile->rq_groups; - if (priv->channels.params.ptp_rx) - num_rxqs++;
mlx5e_netdev_set_tcs(netdev, nch, ntc);
@@@ -4667,10 -4661,12 +4665,10 @@@ void mlx5e_build_nic_params(struct mlx5 params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, - MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
/* XDP SQ */ - MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, - MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe)); + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
/* set CQE compression */ params->rx_cqe_compress_def = false; @@@ -4826,22 -4822,15 +4824,15 @@@ static void mlx5e_build_nic_netdev(stru }
if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) { - netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; - netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; - netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM; - netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_UDP_TUNNEL_CSUM; + netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL; + netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL; }
if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) { - netdev->hw_features |= NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM; - netdev->hw_enc_features |= NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM; - netdev->gso_partial_features |= NETIF_F_GSO_GRE | - NETIF_F_GSO_GRE_CSUM; + netdev->hw_features |= NETIF_F_GSO_GRE; + netdev->hw_enc_features |= NETIF_F_GSO_GRE; + netdev->gso_partial_features |= NETIF_F_GSO_GRE; }
if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) { @@@ -5114,7 -5103,7 +5105,7 @@@ static void mlx5e_nic_enable(struct mlx mlx5e_set_netdev_mtu_boundaries(priv); mlx5e_set_dev_port_mtu(priv);
- mlx5_lag_add(mdev, netdev); + mlx5_lag_add_netdev(mdev, netdev);
mlx5e_enable_async_events(priv); mlx5e_enable_blocking_events(priv); @@@ -5162,7 -5151,7 +5153,7 @@@ static void mlx5e_nic_disable(struct ml priv->en_trap = NULL; } mlx5e_disable_async_events(priv); - mlx5_lag_remove(mdev); + mlx5_lag_remove_netdev(mdev, priv->netdev); mlx5_vxlan_reset_to_default(mdev->vxlan); }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index cf4558e12325,d4b0f270b6bb..8d84d0712c20 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@@ -83,17 -83,17 +83,17 @@@ struct mlx5e_tc_attr_to_reg_mapping mlx [CHAIN_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, .moffset = 0, - .mlen = 2, + .mlen = 16, }, [VPORT_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0, - .moffset = 2, - .mlen = 2, + .moffset = 16, + .mlen = 16, }, [TUNNEL_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1, - .moffset = 1, - .mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8), + .moffset = 8, + .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS, .soffset = MLX5_BYTE_OFF(fte_match_param, misc_parameters_2.metadata_reg_c_1), }, @@@ -110,7 -110,7 +110,7 @@@ [NIC_CHAIN_TO_REG] = { .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B, .moffset = 0, - .mlen = 2, + .mlen = 16, }, [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct, }; @@@ -128,46 -128,23 +128,46 @@@ static void mlx5e_put_flow_tunnel_id(st void mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec, enum mlx5e_tc_attr_to_reg type, - u32 data, + u32 val, u32 mask) { + void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; - void *headers_c = spec->match_criteria; - void *headers_v = spec->match_value; - void *fmask, *fval; + u32 max_mask = GENMASK(match_len - 1, 0); + __be32 curr_mask_be, curr_val_be; + u32 curr_mask, curr_val;
fmask = headers_c + soffset; fval = headers_v + soffset;
- mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8)); - data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8)); + memcpy(&curr_mask_be, fmask, 4); + memcpy(&curr_val_be, fval, 4); + + curr_mask = be32_to_cpu(curr_mask_be); + curr_val = be32_to_cpu(curr_val_be); + + //move to correct offset + WARN_ON(mask > max_mask); + mask <<= moffset; + val <<= moffset; + max_mask <<= moffset; + + //zero val and mask + curr_mask &= ~max_mask; + curr_val &= ~max_mask;
- memcpy(fmask, &mask, match_len); - memcpy(fval, &data, match_len); + //add current to mask + curr_mask |= mask; + curr_val |= val; + + //back to be32 and write + curr_mask_be = cpu_to_be32(curr_mask); + curr_val_be = cpu_to_be32(curr_val); + + memcpy(fmask, &curr_mask_be, 4); + memcpy(fval, &curr_val_be, 4);
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; } @@@ -175,28 -152,23 +175,28 @@@ void mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec, enum mlx5e_tc_attr_to_reg type, - u32 *data, + u32 *val, u32 *mask) { + void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval; int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset; + int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset; int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen; - void *headers_c = spec->match_criteria; - void *headers_v = spec->match_value; - void *fmask, *fval; + u32 max_mask = GENMASK(match_len - 1, 0); + __be32 curr_mask_be, curr_val_be; + u32 curr_mask, curr_val;
fmask = headers_c + soffset; fval = headers_v + soffset;
- memcpy(mask, fmask, match_len); - memcpy(data, fval, match_len); + memcpy(&curr_mask_be, fmask, 4); + memcpy(&curr_val_be, fval, 4); + + curr_mask = be32_to_cpu(curr_mask_be); + curr_val = be32_to_cpu(curr_val_be);
- *mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8)))); - *data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8)))); + *mask = (curr_mask >> moffset) & max_mask; + *val = (curr_val >> moffset) & max_mask; }
int @@@ -220,13 -192,13 +220,13 @@@ mlx5e_tc_match_to_reg_set_and_get_id(st (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
/* Firmware has 5bit length field and 0 means 32bits */ - if (mlen == 4) + if (mlen == 32) mlen = 0;
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, modact, field, mfield); - MLX5_SET(set_action_in, modact, offset, moffset * 8); - MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, offset, moffset); + MLX5_SET(set_action_in, modact, length, mlen); MLX5_SET(set_action_in, modact, data, data); err = mod_hdr_acts->num_actions; mod_hdr_acts->num_actions++; @@@ -324,13 -296,13 +324,13 @@@ void mlx5e_tc_match_to_reg_mod_hdr_chan modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ);
/* Firmware has 5bit length field and 0 means 32bits */ - if (mlen == 4) + if (mlen == 32) mlen = 0;
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, modact, field, mfield); - MLX5_SET(set_action_in, modact, offset, moffset * 8); - MLX5_SET(set_action_in, modact, length, mlen * 8); + MLX5_SET(set_action_in, modact, offset, moffset); + MLX5_SET(set_action_in, modact, length, mlen); MLX5_SET(set_action_in, modact, data, data); }
@@@ -4793,7 -4765,7 +4793,7 @@@ static void mlx5e_tc_hairpin_update_dea list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { wait_for_completion(&hpe->res_ready); if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) - hpe->hp->pair->peer_gone = true; + mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
mlx5e_hairpin_put(priv, hpe); } @@@ -5133,7 -5105,7 +5133,7 @@@ bool mlx5e_tc_update_skb(struct mlx5_cq
tc_skb_ext->chain = chain;
- zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) & + zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) & ESW_ZONE_ID_MASK;
if (!mlx5e_tc_ct_restore_flow(tc->ct, skb, diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.h index 721093b55acc,17027536efba..f7cbeb0b66d2 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h @@@ -129,7 -129,7 +129,7 @@@ struct tunnel_match_enc_opts */ #define TUNNEL_INFO_BITS 12 #define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0) -#define ENC_OPTS_BITS 12 +#define ENC_OPTS_BITS 11 #define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0) #define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS) #define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0) @@@ -178,6 -178,9 +178,9 @@@ void mlx5e_take_all_encap_flows(struct void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
struct mlx5e_neigh_hash_entry; + struct mlx5e_encap_entry * + mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe, + struct mlx5e_encap_entry *e); void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work); @@@ -198,10 -201,10 +201,10 @@@ enum mlx5e_tc_attr_to_reg
struct mlx5e_tc_attr_to_reg_mapping { int mfield; /* rewrite field */ - int moffset; /* offset of mfield */ - int mlen; /* bytes to rewrite/match */ + int moffset; /* bit offset of mfield */ + int mlen; /* bits to rewrite/match */
- int soffset; /* offset of spec for match */ + int soffset; /* byte offset of spec for match */ };
extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[]; diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 669ff58107e4,320fe0cda917..c63d78eda606 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@@ -32,7 -32,6 +32,6 @@@
#include <linux/tcp.h> #include <linux/if_vlan.h> - #include <linux/ptp_classify.h> #include <net/geneve.h> #include <net/dsfield.h> #include "en.h" @@@ -67,24 -66,6 +66,6 @@@ static inline int mlx5e_get_dscp_up(str } #endif
- static bool mlx5e_use_ptpsq(struct sk_buff *skb) - { - struct flow_keys fk; - - if (!skb_flow_dissect_flow_keys(skb, &fk, 0)) - return false; - - if (fk.basic.n_proto == htons(ETH_P_1588)) - return true; - - if (fk.basic.n_proto != htons(ETH_P_IP) && - fk.basic.n_proto != htons(ETH_P_IPV6)) - return false; - - return (fk.basic.ip_proto == IPPROTO_UDP && - fk.ports.dst == htons(PTP_EV_PORT)); - } - static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb) { struct mlx5e_priv *priv = netdev_priv(dev); @@@ -145,9 -126,9 +126,9 @@@ u16 mlx5e_select_queue(struct net_devic }
ptp_channel = READ_ONCE(priv->channels.ptp); - if (unlikely(ptp_channel) && - test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && - mlx5e_use_ptpsq(skb)) + if (unlikely(ptp_channel && + test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) && + mlx5e_use_ptpsq(skb))) return mlx5e_select_ptpsq(dev, skb);
txq_ix = netdev_pick_tx(dev, skb, NULL); @@@ -706,12 -687,16 +687,12 @@@ void mlx5e_tx_mpwqe_ensure_complete(str mlx5e_tx_mpwqe_session_complete(sq); }
-static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, +static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, struct mlx5_wqe_eth_seg *eseg, u16 ihs) { - if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg, ihs))) - return false; - + mlx5e_accel_tx_eseg(priv, skb, eseg, ihs); mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); - - return true; }
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) @@@ -740,7 -725,10 +721,7 @@@ if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) { struct mlx5_wqe_eth_seg eseg = {};
- if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, - attr.ihs))) - return NETDEV_TX_OK; - + mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs); mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more()); return NETDEV_TX_OK; } @@@ -755,7 -743,9 +736,7 @@@ /* May update the WQE, but may not post other WQEs. */ mlx5e_accel_tx_finish(sq, wqe, &accel, (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl)); - if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs))) - return NETDEV_TX_OK; - + mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs); mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
return NETDEV_TX_OK; diff --combined drivers/net/ethernet/mellanox/mlx5/core/eq.c index 7e5b3826eae5,940333410267..c7efd177da1f --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@@ -1,6 -1,33 +1,6 @@@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2021, Mellanox Technologies inc. All rights reserved. */
#include <linux/interrupt.h> @@@ -18,7 -45,6 +18,7 @@@ #include "eswitch.h" #include "lib/clock.h" #include "diag/fw_tracer.h" +#include "mlx5_irq.h"
enum { MLX5_EQE_OWNER_INIT_VAL = 0x1, @@@ -58,9 -84,6 +58,9 @@@ struct mlx5_eq_table struct mutex lock; /* sync async eqs creations */ int num_comp_eqs; struct mlx5_irq_table *irq_table; +#ifdef CONFIG_RFS_ACCEL + struct cpu_rmap *rmap; +#endif };
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \ @@@ -113,7 -136,7 +113,7 @@@ static int mlx5_eq_comp_int(struct noti
eqe = next_eqe_sw(eq); if (!eqe) - return 0; + goto out;
do { struct mlx5_core_cq *cq; @@@ -138,6 -161,8 +138,8 @@@ ++eq->cons_index;
} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); + + out: eq_update_ci(eq, 1);
if (cqn != -1) @@@ -225,9 -250,9 +227,9 @@@ static int mlx5_eq_async_int(struct not ++eq->cons_index;
} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq))); - eq_update_ci(eq, 1);
out: + eq_update_ci(eq, 1); mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
return unlikely(recovery) ? num_eqes : 0; @@@ -263,7 -288,7 +265,7 @@@ create_map_eq(struct mlx5_core_dev *dev u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; u8 log_eq_stride = ilog2(MLX5_EQE_SIZE); struct mlx5_priv *priv = &dev->priv; - u8 vecidx = param->irq_index; + u16 vecidx = param->irq_index; __be64 *pas; void *eqc; int inlen; @@@ -286,20 -311,13 +288,20 @@@ mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc); init_eq_buf(eq);
+ eq->irq = mlx5_irq_request(dev, vecidx, param->affinity); + if (IS_ERR(eq->irq)) { + err = PTR_ERR(eq->irq); + goto err_buf; + } + + vecidx = mlx5_irq_get_index(eq->irq); inlen = MLX5_ST_SZ_BYTES(create_eq_in) + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; - goto err_buf; + goto err_irq; }
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); @@@ -343,8 -361,6 +345,8 @@@ err_eq err_in: kvfree(in);
+err_irq: + mlx5_irq_release(eq->irq); err_buf: mlx5_frag_buf_free(dev, &eq->frag_buf); return err; @@@ -363,9 -379,10 +365,9 @@@ int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct notifier_block *nb) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; int err;
- err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb); + err = mlx5_irq_attach_nb(eq->irq, nb); if (!err) eq_update_ci(eq, 1);
@@@ -384,7 -401,9 +386,7 @@@ EXPORT_SYMBOL(mlx5_eq_enable) void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq, struct notifier_block *nb) { - struct mlx5_eq_table *eq_table = dev->priv.eq_table; - - mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb); + mlx5_irq_detach_nb(eq->irq, nb); } EXPORT_SYMBOL(mlx5_eq_disable);
@@@ -398,9 -417,10 +400,9 @@@ static int destroy_unmap_eq(struct mlx5 if (err) mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n", eq->eqn); - synchronize_irq(eq->irqn); + mlx5_irq_release(eq->irq);
mlx5_frag_buf_free(dev, &eq->frag_buf); - return err; }
@@@ -472,7 -492,14 +474,7 @@@ static int create_async_eq(struct mlx5_ int err;
mutex_lock(&eq_table->lock); - /* Async EQs must share irq index 0 */ - if (param->irq_index != 0) { - err = -EINVAL; - goto unlock; - } - err = create_map_eq(dev, eq, param); -unlock: mutex_unlock(&eq_table->lock); return err; } @@@ -591,11 -618,8 +593,11 @@@ setup_async_eq(struct mlx5_core_dev *de
eq->irq_nb.notifier_call = mlx5_eq_async_int; spin_lock_init(&eq->lock); + if (!zalloc_cpumask_var(¶m->affinity, GFP_KERNEL)) + return -ENOMEM;
err = create_async_eq(dev, &eq->core, param); + free_cpumask_var(param->affinity); if (err) { mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err); return err; @@@ -630,6 -654,7 +632,6 @@@ static int create_async_eqs(struct mlx5 mlx5_eq_notifier_register(dev, &table->cq_err_nb);
param = (struct mlx5_eq_param) { - .irq_index = 0, .nent = MLX5_NUM_CMD_EQE, .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD, }; @@@ -642,6 -667,7 +644,6 @@@ mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
param = (struct mlx5_eq_param) { - .irq_index = 0, .nent = MLX5_NUM_ASYNC_EQE, };
@@@ -651,6 -677,7 +653,6 @@@ goto err2;
param = (struct mlx5_eq_param) { - .irq_index = 0, .nent = /* TODO: sriov max_vf + */ 1, .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST, }; @@@ -710,9 -737,6 +712,9 @@@ mlx5_eq_create_generic(struct mlx5_core struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL); int err;
+ if (!param->affinity) + return ERR_PTR(-EINVAL); + if (!eq) return ERR_PTR(-ENOMEM);
@@@ -823,21 -847,16 +825,21 @@@ static int create_comp_eqs(struct mlx5_ .irq_index = vecidx, .nent = nent, }; - err = create_map_eq(dev, &eq->core, ¶m); - if (err) { - kfree(eq); - goto clean; + + if (!zalloc_cpumask_var(¶m.affinity, GFP_KERNEL)) { + err = -ENOMEM; + goto clean_eq; } + cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node), + param.affinity); + err = create_map_eq(dev, &eq->core, ¶m); + free_cpumask_var(param.affinity); + if (err) + goto clean_eq; err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb); if (err) { destroy_unmap_eq(dev, &eq->core); - kfree(eq); - goto clean; + goto clean_eq; }
mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn); @@@ -846,8 -865,7 +848,8 @@@ }
return 0; - +clean_eq: + kfree(eq); clean: destroy_comp_eqs(dev); return err; @@@ -883,23 -901,17 +885,23 @@@ EXPORT_SYMBOL(mlx5_comp_vectors_count) struct cpumask * mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector) { - int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE; + struct mlx5_eq_table *table = dev->priv.eq_table; + struct mlx5_eq_comp *eq, *n; + int i = 0;
- return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table, - vecidx); + list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) { + if (i++ == vector) + break; + } + + return mlx5_irq_get_affinity_mask(eq->core.irq); } EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
#ifdef CONFIG_RFS_ACCEL struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev) { - return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table); + return dev->priv.eq_table->rmap; } #endif
@@@ -916,57 -928,12 +918,57 @@@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(s return ERR_PTR(-ENOENT); }
+static void clear_rmap(struct mlx5_core_dev *dev) +{ +#ifdef CONFIG_RFS_ACCEL + struct mlx5_eq_table *eq_table = dev->priv.eq_table; + + free_irq_cpu_rmap(eq_table->rmap); +#endif +} + +static int set_rmap(struct mlx5_core_dev *mdev) +{ + int err = 0; +#ifdef CONFIG_RFS_ACCEL + struct mlx5_eq_table *eq_table = mdev->priv.eq_table; + int vecidx; + + eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs); + if (!eq_table->rmap) { + err = -ENOMEM; + mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err); + goto err_out; + } + + vecidx = MLX5_IRQ_VEC_COMP_BASE; + for (; vecidx < eq_table->num_comp_eqs + MLX5_IRQ_VEC_COMP_BASE; + vecidx++) { + err = irq_cpu_rmap_add(eq_table->rmap, + pci_irq_vector(mdev->pdev, vecidx)); + if (err) { + mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d", + err); + goto err_irq_cpu_rmap_add; + } + } + return 0; + +err_irq_cpu_rmap_add: + clear_rmap(mdev); +err_out: +#endif + return err; +} + /* This function should only be called after mlx5_cmd_force_teardown_hca */ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */ + if (!mlx5_core_is_sf(dev)) + clear_rmap(dev); mlx5_irq_table_destroy(dev); mutex_unlock(&table->lock); } @@@ -983,19 -950,12 +985,19 @@@ int mlx5_eq_table_create(struct mlx5_co int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ? MLX5_CAP_GEN(dev, max_num_eqs) : 1 << MLX5_CAP_GEN(dev, log_max_eq); + int max_eqs_sf; int err;
eq_table->num_comp_eqs = min_t(int, - mlx5_irq_get_num_comp(eq_table->irq_table), + mlx5_irq_table_get_num_comp(eq_table->irq_table), num_eqs - MLX5_MAX_ASYNC_EQS); + if (mlx5_core_is_sf(dev)) { + max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF, + mlx5_irq_table_get_sfs_vec(eq_table->irq_table)); + eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs, + max_eqs_sf); + }
err = create_async_eqs(dev); if (err) { @@@ -1003,18 -963,6 +1005,18 @@@ goto err_async_eqs; }
+ if (!mlx5_core_is_sf(dev)) { + /* rmap is a mapping between irq number and queue number. + * each irq can be assign only to a single rmap. + * since SFs share IRQs, rmap mapping cannot function correctly + * for irqs that are shared for different core/netdev RX rings. + * Hence we don't allow netdev rmap for SFs + */ + err = set_rmap(dev); + if (err) + goto err_rmap; + } + err = create_comp_eqs(dev); if (err) { mlx5_core_err(dev, "Failed to create completion EQs\n"); @@@ -1023,9 -971,6 +1025,9 @@@
return 0; err_comp_eqs: + if (!mlx5_core_is_sf(dev)) + clear_rmap(dev); +err_rmap: destroy_async_eqs(dev); err_async_eqs: return err; @@@ -1033,8 -978,6 +1035,8 @@@
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev) { + if (!mlx5_core_is_sf(dev)) + clear_rmap(dev); destroy_comp_eqs(dev); destroy_async_eqs(dev); } diff --combined drivers/net/ethernet/mellanox/mlx5/core/main.c index 390b1d3a6fde,0d0f63a27aba..eb1b316560a8 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@@ -76,7 -76,6 +76,7 @@@ #include "sf/vhca_event.h" #include "sf/dev/dev.h" #include "sf/sf.h" +#include "mlx5_irq.h"
MODULE_AUTHOR("Eli Cohen eli@mellanox.com"); MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver"); @@@ -1162,7 -1161,7 +1162,7 @@@ static int mlx5_load(struct mlx5_core_d err = mlx5_core_set_hca_defaults(dev); if (err) { mlx5_core_err(dev, "Failed to set hca defaults\n"); - goto err_sriov; + goto err_set_hca; }
mlx5_vhca_event_start(dev); @@@ -1186,7 -1185,6 +1186,7 @@@ }
mlx5_sf_dev_table_create(dev); + mlx5_lag_add_mdev(dev);
return 0;
@@@ -1196,6 -1194,7 +1196,7 @@@ err_ec mlx5_sf_hw_table_destroy(dev); err_vhca: mlx5_vhca_event_stop(dev); + err_set_hca: mlx5_cleanup_fs(dev); err_fs: mlx5_accel_tls_cleanup(dev); @@@ -1221,7 -1220,6 +1222,7 @@@ err_irq_table
static void mlx5_unload(struct mlx5_core_dev *dev) { + mlx5_lag_remove_mdev(dev); mlx5_sf_dev_table_destroy(dev); mlx5_sriov_detach(dev); mlx5_ec_cleanup(dev); diff --combined drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 42668de01abc,7466f016375c..4aaca8eb7597 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@@ -116,8 -116,6 +116,8 @@@ enum DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3 = 0x4f, DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0 = 0x5e, DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1 = 0x5f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0 = 0x6f, + DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1 = 0x70, DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE = 0x7b, DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE = 0x7c, DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2 = 0x8c, @@@ -248,12 -246,6 +248,12 @@@ static const struct mlx5dr_ste_action_m [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = { .hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15, }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31, + }, + [MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = { + .hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15, + }, };
static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type) @@@ -369,8 -361,8 +369,8 @@@ static void dr_ste_v1_set_reparse(u8 *h MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1); }
-static void dr_ste_v1_set_tx_encap(u8 *hw_ste_p, u8 *d_action, - u32 reformat_id, int size) +static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, int size) { MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); @@@ -382,26 -374,6 +382,26 @@@ dr_ste_v1_set_reparse(hw_ste_p); }
+static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action, + u32 reformat_id, + u8 anchor, u8 offset, + int size) +{ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, + action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_anchor, anchor); + + /* The hardware expects here size and offset in words (2 byte) */ + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_offset, offset / 2); + + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id); + MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes, + DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE); + + dr_ste_v1_set_reparse(hw_ste_p); +} + static void dr_ste_v1_set_tx_push_vlan(u8 *hw_ste_p, u8 *d_action, u32 vlan_hdr) { @@@ -429,11 -401,11 +429,11 @@@ static void dr_ste_v1_set_rx_pop_vlan(u dr_ste_v1_set_reparse(hw_ste_p); }
-static void dr_ste_v1_set_tx_encap_l3(u8 *hw_ste_p, - u8 *frst_s_action, - u8 *scnd_d_action, - u32 reformat_id, - int size) +static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p, + u8 *frst_s_action, + u8 *scnd_d_action, + u32 reformat_id, + int size) { /* Remove L2 headers */ MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id, @@@ -547,9 -519,9 +547,9 @@@ static void dr_ste_v1_set_actions_tx(st action_sz = DR_STE_ACTION_TRIPLE_SZ; allow_encap = true; } - dr_ste_v1_set_tx_encap(last_ste, action, - attr->reformat_id, - attr->reformat_size); + dr_ste_v1_set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_DOUBLE_SZ; action += DR_STE_ACTION_DOUBLE_SZ; } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { @@@ -560,25 -532,12 +560,25 @@@ action_sz = DR_STE_ACTION_TRIPLE_SZ; d_action = action + DR_STE_ACTION_SINGLE_SZ;
- dr_ste_v1_set_tx_encap_l3(last_ste, - action, d_action, - attr->reformat_id, - attr->reformat_size); + dr_ste_v1_set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); action_sz -= DR_STE_ACTION_TRIPLE_SZ; action += DR_STE_ACTION_TRIPLE_SZ; + } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { + if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; }
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); @@@ -657,9 -616,7 +657,9 @@@ static void dr_ste_v1_set_actions_rx(st }
if (action_type_set[DR_ACTION_TYP_CTR]) { - /* Counter action set after decap to exclude decaped header */ + /* Counter action set after decap and before insert_hdr + * to exclude decaped / encaped header respectively. + */ if (!allow_ctr) { dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); @@@ -670,52 -627,6 +670,52 @@@ dr_ste_v1_set_counter_id(last_ste, attr->ctr_id); }
+ if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) { + if (action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_encap(last_ste, action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) { + u8 *d_action; + + if (action_sz < DR_STE_ACTION_TRIPLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + + d_action = action + DR_STE_ACTION_SINGLE_SZ; + + dr_ste_v1_set_encap_l3(last_ste, + action, d_action, + attr->reformat.id, + attr->reformat.size); + action_sz -= DR_STE_ACTION_TRIPLE_SZ; + allow_modify_hdr = false; + } else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) { + /* Modify header, decap, and encap must use different STEs */ + if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) { + dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi); + action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action); + action_sz = DR_STE_ACTION_TRIPLE_SZ; + } + dr_ste_v1_set_insert_hdr(last_ste, action, + attr->reformat.id, + attr->reformat.param_0, + attr->reformat.param_1, + attr->reformat.size); + action_sz -= DR_STE_ACTION_DOUBLE_SZ; + action += DR_STE_ACTION_DOUBLE_SZ; + allow_modify_hdr = false; + } + dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi); dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1); } @@@ -783,7 -694,11 +783,11 @@@ static int dr_ste_v1_set_action_decap_l if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM) return -EINVAL;
- memcpy(padded_data, data, data_sz); + inline_data_sz = + MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data); + + /* Add an alignment padding */ + memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
/* Remove L2L3 outer headers */ MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id, @@@ -795,32 -710,34 +799,34 @@@ hw_action += DR_STE_ACTION_DOUBLE_SZ; used_actions++; /* Remove and NOP are a single double action */
- inline_data_sz = - MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data); + /* Point to the last dword of the header */ + data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
- /* Add the new header inline + 2 extra bytes */ + /* Add the new header using inline action 4Byte at a time, the header + * is added in reversed order to the beginning of the packet to avoid + * incorrect parsing by the HW. Since header is 14B or 18B an extra + * two bytes are padded and later removed. + */ for (i = 0; i < data_sz / inline_data_sz + 1; i++) { void *addr_inline;
MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id, DR_STE_V1_ACTION_ID_INSERT_INLINE); /* The hardware expects here offset to words (2 bytes) */ - MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, - i * 2); + MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
/* Copy bytes one by one to avoid endianness problem */ addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1, hw_action, inline_data); - memcpy(addr_inline, data_ptr, inline_data_sz); + memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz); hw_action += DR_STE_ACTION_DOUBLE_SZ; - data_ptr += inline_data_sz; used_actions++; }
- /* Remove 2 extra bytes */ + /* Remove first 2 extra bytes */ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id, DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE); - MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2); + MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0); /* The hardware expects here size in words (2 bytes) */ MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1); used_actions++; @@@ -1954,7 -1871,6 +1960,7 @@@ struct mlx5dr_ste_ctx ste_ctx_v1 = .set_byte_mask = &dr_ste_v1_set_byte_mask, .get_byte_mask = &dr_ste_v1_get_byte_mask, /* Actions */ + .actions_caps = DR_STE_CTX_ACTION_CAP_RX_ENCAP, .set_actions_rx = &dr_ste_v1_set_actions_rx, .set_actions_tx = &dr_ste_v1_set_actions_tx, .modify_field_arr_sz = ARRAY_SIZE(dr_ste_v1_action_modify_field_arr), diff --combined drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h index 0e2b73731117,9737565cd8d4..b2aa6c93c3a1 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h @@@ -26,7 -26,6 +26,7 @@@ enum mlx5dr_action_reformat_type DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2, DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2, DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3, + DR_ACTION_REFORMAT_TYP_INSERT_HDR, };
struct mlx5dr_match_parameters { @@@ -106,8 -105,6 +106,8 @@@ mlx5dr_action_create_flow_counter(u32 c struct mlx5dr_action * mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn, enum mlx5dr_action_reformat_type reformat_type, + u8 reformat_param_0, + u8 reformat_param_1, size_t data_sz, void *data);
@@@ -127,10 -124,11 +127,11 @@@ int mlx5dr_action_destroy(struct mlx5dr static inline bool mlx5dr_is_supported(struct mlx5_core_dev *dev) { - return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || - (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && - (MLX5_CAP_GEN(dev, steering_format_version) <= - MLX5_STEERING_FORMAT_CONNECTX_6DX)); + return MLX5_CAP_GEN(dev, roce) && + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) || + (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) && + (MLX5_CAP_GEN(dev, steering_format_version) <= + MLX5_STEERING_FORMAT_CONNECTX_6DX))); }
/* buddy functions & structure */ diff --combined drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 677a53f65008,85f0ce285146..0998dcc9cac0 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@@ -149,27 -149,22 +149,27 @@@ mlxsw_thermal_module_trips_reset(struc
static int mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core, - struct mlxsw_thermal_module *tz) + struct mlxsw_thermal_module *tz, + int crit_temp, int emerg_temp) { - int crit_temp, emerg_temp; int err;
- err = mlxsw_env_module_temp_thresholds_get(core, tz->module, - SFP_TEMP_HIGH_WARN, - &crit_temp); - if (err) - return err; + /* Do not try to query temperature thresholds directly from the module's + * EEPROM if we got valid thresholds from MTMP. + */ + if (!emerg_temp || !crit_temp) { + err = mlxsw_env_module_temp_thresholds_get(core, tz->module, + SFP_TEMP_HIGH_WARN, + &crit_temp); + if (err) + return err;
- err = mlxsw_env_module_temp_thresholds_get(core, tz->module, - SFP_TEMP_HIGH_ALARM, - &emerg_temp); - if (err) - return err; + err = mlxsw_env_module_temp_thresholds_get(core, tz->module, + SFP_TEMP_HIGH_ALARM, + &emerg_temp); + if (err) + return err; + }
if (crit_temp > emerg_temp) { dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n", @@@ -286,7 -281,7 +286,7 @@@ static int mlxsw_thermal_get_temp(struc dev_err(dev, "Failed to query temp sensor\n"); return err; } - mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); if (temp > 0) mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips, temp); @@@ -425,57 -420,36 +425,57 @@@ static int mlxsw_thermal_module_unbind( return err; }
-static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, - int *p_temp) +static void +mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core, + u16 sensor_index, int *p_temp, + int *p_crit_temp, + int *p_emerg_temp) { - struct mlxsw_thermal_module *tz = tzdev->devdata; - struct mlxsw_thermal *thermal = tz->parent; - struct device *dev = thermal->bus_info->dev; char mtmp_pl[MLXSW_REG_MTMP_LEN]; - int temp; int err;
- /* Read module temperature. */ - mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN + - tz->module, false, false); - err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl); + /* Read module temperature and thresholds. */ + mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false); + err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl); if (err) { - /* Do not return error - in case of broken module's sensor - * it will cause error message flooding. + /* Set temperature and thresholds to zero to avoid passing + * uninitialized data back to the caller. */ - temp = 0; - *p_temp = (int) temp; - return 0; + *p_temp = 0; + *p_crit_temp = 0; + *p_emerg_temp = 0; + + return; } - mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp, + NULL); +} + +static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev, + int *p_temp) +{ + struct mlxsw_thermal_module *tz = tzdev->devdata; + struct mlxsw_thermal *thermal = tz->parent; + int temp, crit_temp, emerg_temp; + struct device *dev; + u16 sensor_index; + int err; + + dev = thermal->bus_info->dev; + sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module; + + /* Read module temperature and thresholds. */ + mlxsw_thermal_module_temp_and_thresholds_get(thermal->core, + sensor_index, &temp, + &crit_temp, &emerg_temp); *p_temp = temp;
if (!temp) return 0;
/* Update trip points. */ - err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz); + err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz, + crit_temp, emerg_temp); if (!err && temp > 0) mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
@@@ -586,7 -560,7 +586,7 @@@ static int mlxsw_thermal_gearbox_temp_g if (err) return err;
- mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL); + mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL); if (temp > 0) mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
@@@ -719,7 -693,8 +719,8 @@@ mlxsw_thermal_module_tz_init(struct mlx MLXSW_THERMAL_TRIP_MASK, module_tz, &mlxsw_thermal_module_ops, - NULL, 0, 0); + NULL, 0, + module_tz->parent->polling_delay); if (IS_ERR(module_tz->tzdev)) { err = PTR_ERR(module_tz->tzdev); return err; @@@ -742,10 -717,7 +743,10 @@@ mlxsw_thermal_module_init(struct devic struct mlxsw_thermal *thermal, u8 module) { struct mlxsw_thermal_module *module_tz; + int dummy_temp, crit_temp, emerg_temp; + u16 sensor_index;
+ sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module; module_tz = &thermal->tz_module_arr[module]; /* Skip if parent is already set (case of port split). */ if (module_tz->parent) @@@ -756,12 -728,8 +757,12 @@@ sizeof(thermal->trips)); /* Initialize all trip point. */ mlxsw_thermal_module_trips_reset(module_tz); + /* Read module temperature and thresholds. */ + mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, &dummy_temp, + &crit_temp, &emerg_temp); /* Update trip point according to the module data. */ - return mlxsw_thermal_module_trips_update(dev, core, module_tz); + return mlxsw_thermal_module_trips_update(dev, core, module_tz, + crit_temp, emerg_temp); }
static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz) @@@ -848,7 -816,8 +849,8 @@@ mlxsw_thermal_gearbox_tz_init(struct ml MLXSW_THERMAL_TRIP_MASK, gearbox_tz, &mlxsw_thermal_gearbox_ops, - NULL, 0, 0); + NULL, 0, + gearbox_tz->parent->polling_delay); if (IS_ERR(gearbox_tz->tzdev)) return PTR_ERR(gearbox_tz->tzdev);
diff --combined drivers/net/ethernet/mellanox/mlxsw/reg.h index 5304309ecb9d,2bc5a9003c6d..93f1db3927af --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@@ -3907,7 -3907,7 +3907,7 @@@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS 25 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1 5 #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2 11 - #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 5 + #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3 11
static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port, enum mlxsw_reg_qeec_hr hr, u8 index, @@@ -8305,8 -8305,6 +8305,8 @@@ enum MLXSW_REG_RECR2_TCP_UDP_EN_IPV4 = 7, /* Enable TCP/UDP header fields if packet is IPv6 */ MLXSW_REG_RECR2_TCP_UDP_EN_IPV6 = 8, + + __MLXSW_REG_RECR2_HEADER_CNT, };
/* reg_recr2_outer_header_enables @@@ -8341,8 -8339,6 +8341,8 @@@ enum MLXSW_REG_RECR2_TCP_UDP_SPORT = 74, /* TCP/UDP Destination Port */ MLXSW_REG_RECR2_TCP_UDP_DPORT = 75, + + __MLXSW_REG_RECR2_FIELD_CNT, };
/* reg_recr2_outer_header_fields_enable @@@ -8351,47 -8347,47 +8351,47 @@@ */ MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1);
-static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload) -{ - int i; - - for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++) - mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, - true); -} - -static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload) -{ - int i; - - for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++) - mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, - true); -} - -static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload) -{ - int i = MLXSW_REG_RECR2_IPV6_SIP0_7; - - mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true); - - i = MLXSW_REG_RECR2_IPV6_SIP8; - for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++) - mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, - true); -} - -static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload) -{ - int i = MLXSW_REG_RECR2_IPV6_DIP0_7; +/* reg_recr2_inner_header_enables + * Bit mask where each bit enables a specific inner layer to be included in the + * hash calculation. Same values as reg_recr2_outer_header_enables. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, inner_header_enables, 0x2C, 0x04, 1);
- mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true); +enum { + /* Inner IPv4 Source IP */ + MLXSW_REG_RECR2_INNER_IPV4_SIP0 = 3, + MLXSW_REG_RECR2_INNER_IPV4_SIP3 = 6, + /* Inner IPv4 Destination IP */ + MLXSW_REG_RECR2_INNER_IPV4_DIP0 = 7, + MLXSW_REG_RECR2_INNER_IPV4_DIP3 = 10, + /* Inner IP Protocol */ + MLXSW_REG_RECR2_INNER_IPV4_PROTOCOL = 11, + /* Inner IPv6 Source IP */ + MLXSW_REG_RECR2_INNER_IPV6_SIP0_7 = 12, + MLXSW_REG_RECR2_INNER_IPV6_SIP8 = 20, + MLXSW_REG_RECR2_INNER_IPV6_SIP15 = 27, + /* Inner IPv6 Destination IP */ + MLXSW_REG_RECR2_INNER_IPV6_DIP0_7 = 28, + MLXSW_REG_RECR2_INNER_IPV6_DIP8 = 36, + MLXSW_REG_RECR2_INNER_IPV6_DIP15 = 43, + /* Inner IPv6 Next Header */ + MLXSW_REG_RECR2_INNER_IPV6_NEXT_HEADER = 44, + /* Inner IPv6 Flow Label */ + MLXSW_REG_RECR2_INNER_IPV6_FLOW_LABEL = 45, + /* Inner TCP/UDP Source Port */ + MLXSW_REG_RECR2_INNER_TCP_UDP_SPORT = 46, + /* Inner TCP/UDP Destination Port */ + MLXSW_REG_RECR2_INNER_TCP_UDP_DPORT = 47, + + __MLXSW_REG_RECR2_INNER_FIELD_CNT, +};
- i = MLXSW_REG_RECR2_IPV6_DIP8; - for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++) - mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, - true); -} +/* reg_recr2_inner_header_fields_enable + * Inner packet fields to enable for ECMP hash subject to inner_header_enables. + * Access: RW + */ +MLXSW_ITEM_BIT_ARRAY(reg, recr2, inner_header_fields_enable, 0x30, 0x08, 1);
static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed) { @@@ -9463,14 -9459,6 +9463,14 @@@ MLXSW_ITEM32(reg, mtmp, sensor_index, 0 ((s16)((GENMASK(15, 0) + (v_) + 1) \ * 125)); })
+/* reg_mtmp_max_operational_temperature + * The highest temperature in the nominal operational range. Reading is in + * 0.125 Celsius degrees units. + * In case of module this is SFF critical temperature threshold. + * Access: RO + */ +MLXSW_ITEM32(reg, mtmp, max_operational_temperature, 0x04, 16, 16); + /* reg_mtmp_temperature * Temperature reading from the sensor. Reading is in 0.125 Celsius * degrees units. @@@ -9549,9 -9537,7 +9549,9 @@@ static inline void mlxsw_reg_mtmp_pack( }
static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp, - int *p_max_temp, char *sensor_name) + int *p_max_temp, int *p_temp_hi, + int *p_max_oper_temp, + char *sensor_name) { s16 temp;
@@@ -9563,14 -9549,6 +9563,14 @@@ temp = mlxsw_reg_mtmp_max_temperature_get(payload); *p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); } + if (p_temp_hi) { + temp = mlxsw_reg_mtmp_temperature_threshold_hi_get(payload); + *p_temp_hi = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } + if (p_max_oper_temp) { + temp = mlxsw_reg_mtmp_max_operational_temperature_get(payload); + *p_max_oper_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp); + } if (sensor_name) mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name); } diff --combined drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 14282472c7a6,3beafc60747e..b307264e59cf --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@@ -319,8 -319,10 +319,8 @@@ int qlcnic_read_mac_addr(struct qlcnic_ static void qlcnic_delete_adapter_mac(struct qlcnic_adapter *adapter) { struct qlcnic_mac_vlan_list *cur; - struct list_head *head;
- list_for_each(head, &adapter->mac_list) { - cur = list_entry(head, struct qlcnic_mac_vlan_list, list); + list_for_each_entry(cur, &adapter->mac_list, list) { if (ether_addr_equal_unaligned(adapter->mac_addr, cur->mac_addr)) { qlcnic_sre_macaddr_change(adapter, cur->mac_addr, 0, QLCNIC_MAC_DEL); @@@ -2688,6 -2690,7 +2688,7 @@@ err_out_free_hw_res kfree(ahw);
err_out_free_res: + pci_disable_pcie_error_reporting(pdev); pci_release_regions(pdev);
err_out_disable_pdev: @@@ -3341,6 -3344,9 +3342,6 @@@ qlcnic_can_start_firmware(struct qlcnic do { msleep(1000); prev_state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE); - - if (prev_state == QLCNIC_DEV_QUISCENT) - continue; } while ((prev_state != QLCNIC_DEV_READY) && --dev_init_timeo);
if (!dev_init_timeo) { diff --combined drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index 6556b5381ce8,ab1e0fcccabb..13d8eb43a485 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@@ -126,24 -126,24 +126,24 @@@ static void rmnet_get_stats64(struct ne struct rtnl_link_stats64 *s) { struct rmnet_priv *priv = netdev_priv(dev); - struct rmnet_vnd_stats total_stats; + struct rmnet_vnd_stats total_stats = { }; struct rmnet_pcpu_stats *pcpu_ptr; + struct rmnet_vnd_stats snapshot; unsigned int cpu, start;
- memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats)); - for_each_possible_cpu(cpu) { pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
do { start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp); - total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts; - total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes; - total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts; - total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes; + snapshot = pcpu_ptr->stats; /* struct assignment */ } while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start));
- total_stats.tx_drops += pcpu_ptr->stats.tx_drops; + total_stats.rx_pkts += snapshot.rx_pkts; + total_stats.rx_bytes += snapshot.rx_bytes; + total_stats.tx_pkts += snapshot.tx_pkts; + total_stats.tx_bytes += snapshot.tx_bytes; + total_stats.tx_drops += snapshot.tx_drops; }
s->rx_packets = total_stats.rx_pkts; @@@ -166,7 -166,6 +166,7 @@@ static const struct net_device_ops rmne
static const char rmnet_gstrings_stats[][ETH_GSTRING_LEN] = { "Checksum ok", + "Bad IPv4 header checksum", "Checksum valid bit not set", "Checksum validation failed", "Checksum error bad buffer", @@@ -175,7 -174,6 +175,7 @@@ "Checksum skipped on ip fragment", "Checksum skipped", "Checksum computed in software", + "Checksum computed in hardware", };
static void rmnet_get_strings(struct net_device *dev, u32 stringset, u8 *buf) @@@ -356,4 -354,4 +356,4 @@@ int rmnet_vnd_update_dev_mtu(struct rmn }
return 0; - } + } diff --combined drivers/net/ethernet/realtek/r8169_main.c index 6a9fe9f7e0be,2ee72dc431cd..f744557c33a3 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@@ -34,6 -34,8 +34,6 @@@ #include "r8169.h" #include "r8169_firmware.h"
-#define MODULENAME "r8169" - #define FIRMWARE_8168D_1 "rtl_nic/rtl8168d-1.fw" #define FIRMWARE_8168D_2 "rtl_nic/rtl8168d-2.fw" #define FIRMWARE_8168E_1 "rtl_nic/rtl8168e-1.fw" @@@ -1452,7 -1454,7 +1452,7 @@@ static void rtl8169_get_drvinfo(struct struct rtl8169_private *tp = netdev_priv(dev); struct rtl_fw *rtl_fw = tp->rtl_fw;
- strlcpy(info->driver, MODULENAME, sizeof(info->driver)); + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); strlcpy(info->bus_info, pci_name(tp->pci_dev), sizeof(info->bus_info)); BUILD_BUG_ON(sizeof(info->fw_version) < sizeof(rtl_fw->version)); if (rtl_fw) @@@ -1669,7 -1671,7 +1669,7 @@@ static void rtl8169_get_strings(struct { switch(stringset) { case ETH_SS_STATS: - memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings)); + memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings)); break; } } @@@ -3508,6 -3510,7 +3508,6 @@@ static void rtl_hw_start_8106(struct rt rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
rtl_pcie_state_l2l3_disable(tp); - rtl_hw_aspm_clkreq_enable(tp, true); }
DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond) @@@ -4114,7 -4117,6 +4114,7 @@@ static unsigned int rtl_quirk_packet_pa case RTL_GIGA_MAC_VER_61: case RTL_GIGA_MAC_VER_63: padto = max_t(unsigned int, padto, ETH_ZLEN); + break; default: break; } @@@ -5303,7 -5305,7 +5303,7 @@@ static int rtl_init_one(struct pci_dev return -ENODEV; }
- rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME); + rc = pcim_iomap_regions(pdev, BIT(region), KBUILD_MODNAME); if (rc < 0) { dev_err(&pdev->dev, "cannot remap MMIO, aborting\n"); return rc; @@@ -5438,7 -5440,7 +5438,7 @@@ }
static struct pci_driver rtl8169_pci_driver = { - .name = MODULENAME, + .name = KBUILD_MODNAME, .id_table = rtl8169_pci_tbl, .probe = rtl_init_one, .remove = rtl_remove_one, diff --combined drivers/net/ethernet/renesas/sh_eth.c index 177523be4fb6,713d3629b4c1..840478692a37 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@@ -2287,7 -2287,7 +2287,7 @@@ static void sh_eth_get_strings(struct n { switch (stringset) { case ETH_SS_STATS: - memcpy(data, *sh_eth_gstrings_stats, + memcpy(data, sh_eth_gstrings_stats, sizeof(sh_eth_gstrings_stats)); break; } @@@ -3225,6 -3225,9 +3225,6 @@@ static int sh_eth_drv_probe(struct plat struct net_device *ndev; int ret;
- /* get base addr */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ndev = alloc_etherdev(sizeof(struct sh_eth_private)); if (!ndev) return -ENOMEM; @@@ -3242,7 -3245,7 +3242,7 @@@ mdp = netdev_priv(ndev); mdp->num_tx_ring = TX_RING_SIZE; mdp->num_rx_ring = RX_RING_SIZE; - mdp->addr = devm_ioremap_resource(&pdev->dev, res); + mdp->addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(mdp->addr)) { ret = PTR_ERR(mdp->addr); goto out_release; diff --combined drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index d8ae58bdbbe3,a696ada013eb..072eff8079d0 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@@ -230,6 -230,8 +230,6 @@@ static int stmmac_mtl_setup(struct plat plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WFQ; else if (of_property_read_bool(tx_node, "snps,tx-sched-dwrr")) plat->tx_sched_algorithm = MTL_TX_ALGORITHM_DWRR; - else if (of_property_read_bool(tx_node, "snps,tx-sched-sp")) - plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP; else plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
@@@ -600,13 -602,6 +600,13 @@@ stmmac_probe_config_dt(struct platform_ goto error_hw_init; }
+ plat->stmmac_ahb_rst = devm_reset_control_get_optional_shared( + &pdev->dev, "ahb"); + if (IS_ERR(plat->stmmac_ahb_rst)) { + ret = plat->stmmac_ahb_rst; + goto error_hw_init; + } + return plat;
error_hw_init: @@@ -627,6 -622,8 +627,8 @@@ error_pclk_get void stmmac_remove_config_dt(struct platform_device *pdev, struct plat_stmmacenet_data *plat) { + clk_disable_unprepare(plat->stmmac_clk); + clk_disable_unprepare(plat->pclk); of_node_put(plat->phy_node); of_node_put(plat->mdio_node); } diff --combined drivers/net/hamradio/mkiss.c index 9933c87c1327,7685a1721597..b99128669bc8 --- a/drivers/net/hamradio/mkiss.c +++ b/drivers/net/hamradio/mkiss.c @@@ -276,7 -276,7 +276,7 @@@ static void ax_bump(struct mkiss *ax */ *ax->rbuff &= ~0x20; } - } + }
count = ax->rcount;
@@@ -501,7 -501,7 +501,7 @@@ static void ax_encaps(struct net_devic default: count = kiss_esc(p, ax->xbuff, len); } - } + } spin_unlock_bh(&ax->buflock);
set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); @@@ -799,6 -799,7 +799,7 @@@ static void mkiss_close(struct tty_stru ax->tty = NULL;
unregister_netdev(ax->dev); + free_netdev(ax->dev); }
/* Perform I/O control on an active ax25 channel. */ @@@ -815,7 -816,7 +816,7 @@@ static int mkiss_ioctl(struct tty_struc dev = ax->dev;
switch (cmd) { - case SIOCGIFNAME: + case SIOCGIFNAME: err = copy_to_user((void __user *) arg, ax->dev->name, strlen(ax->dev->name) + 1) ? -EFAULT : 0; break; diff --combined drivers/net/mhi/net.c index 832d9de42f62,b806f2f8f859..6aa753387372 --- a/drivers/net/mhi/net.c +++ b/drivers/net/mhi/net.c @@@ -11,7 -11,6 +11,7 @@@ #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/u64_stats_sync.h> +#include <linux/wwan.h>
#include "mhi.h"
@@@ -19,12 -18,6 +19,12 @@@ #define MHI_NET_MAX_MTU 0xffff #define MHI_NET_DEFAULT_MTU 0x4000
+/* When set to false, the default netdev (link 0) is not created, and it's up + * to user to create the link (via wwan rtnetlink). + */ +static bool create_default_iface = true; +module_param(create_default_iface, bool, 0); + struct mhi_device_info { const char *netname; const struct mhi_net_proto *proto; @@@ -56,7 -49,7 +56,7 @@@ static int mhi_ndo_stop(struct net_devi return 0; }
- static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) + static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev) { struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); const struct mhi_net_proto *proto = mhi_netdev->proto; @@@ -302,33 -295,32 +302,33 @@@ static void mhi_net_rx_refill_work(stru schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2); }
-static struct device_type wwan_type = { - .name = "wwan", -}; - -static int mhi_net_probe(struct mhi_device *mhi_dev, - const struct mhi_device_id *id) +static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id, + struct netlink_ext_ack *extack) { - const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data; - struct device *dev = &mhi_dev->dev; + const struct mhi_device_info *info; + struct mhi_device *mhi_dev = ctxt; struct mhi_net_dev *mhi_netdev; - struct net_device *ndev; int err;
- ndev = alloc_netdev(sizeof(*mhi_netdev), info->netname, - NET_NAME_PREDICTABLE, mhi_net_setup); - if (!ndev) - return -ENOMEM; + info = (struct mhi_device_info *)mhi_dev->id->driver_data; + + /* For now we only support one link (link context 0), driver must be + * reworked to break 1:1 relationship for net MBIM and to forward setup + * call to rmnet(QMAP) otherwise. + */ + if (if_id != 0) + return -EINVAL; + + if (dev_get_drvdata(&mhi_dev->dev)) + return -EBUSY;
mhi_netdev = netdev_priv(ndev); - dev_set_drvdata(dev, mhi_netdev); + + dev_set_drvdata(&mhi_dev->dev, mhi_netdev); mhi_netdev->ndev = ndev; mhi_netdev->mdev = mhi_dev; mhi_netdev->skbagg_head = NULL; mhi_netdev->proto = info->proto; - SET_NETDEV_DEV(ndev, &mhi_dev->dev); - SET_NETDEV_DEVTYPE(ndev, &wwan_type);
INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work); u64_stats_init(&mhi_netdev->stats.rx_syncp); @@@ -342,10 -334,7 +342,10 @@@ /* Number of transfer descriptors determines size of the queue */ mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
- err = register_netdev(ndev); + if (extack) + err = register_netdevice(ndev); + else + err = register_netdev(ndev); if (err) goto out_err;
@@@ -358,89 -347,23 +358,89 @@@ return 0;
out_err_proto: - unregister_netdev(ndev); + unregister_netdevice(ndev); out_err: free_netdev(ndev); return err; }
-static void mhi_net_remove(struct mhi_device *mhi_dev) +static void mhi_net_dellink(void *ctxt, struct net_device *ndev, + struct list_head *head) { - struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); + struct mhi_net_dev *mhi_netdev = netdev_priv(ndev); + struct mhi_device *mhi_dev = ctxt;
- unregister_netdev(mhi_netdev->ndev); + if (head) + unregister_netdevice_queue(ndev, head); + else + unregister_netdev(ndev);
- mhi_unprepare_from_transfer(mhi_netdev->mdev); + mhi_unprepare_from_transfer(mhi_dev);
kfree_skb(mhi_netdev->skbagg_head);
- free_netdev(mhi_netdev->ndev); + dev_set_drvdata(&mhi_dev->dev, NULL); +} + +static const struct wwan_ops mhi_wwan_ops = { + .owner = THIS_MODULE, + .priv_size = sizeof(struct mhi_net_dev), + .setup = mhi_net_setup, + .newlink = mhi_net_newlink, + .dellink = mhi_net_dellink, +}; + +static int mhi_net_probe(struct mhi_device *mhi_dev, + const struct mhi_device_id *id) +{ + const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data; + struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; + struct net_device *ndev; + int err; + + err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev); + if (err) + return err; + + if (!create_default_iface) + return 0; + + /* Create a default interface which is used as either RMNET real-dev, + * MBIM link 0 or ip link 0) + */ + ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname, + NET_NAME_PREDICTABLE, mhi_net_setup); + if (!ndev) { + err = -ENOMEM; + goto err_unregister; + } + + SET_NETDEV_DEV(ndev, &mhi_dev->dev); + + err = mhi_net_newlink(mhi_dev, ndev, 0, NULL); + if (err) + goto err_release; + + return 0; + +err_release: + free_netdev(ndev); +err_unregister: + wwan_unregister_ops(&cntrl->mhi_dev->dev); + + return err; +} + +static void mhi_net_remove(struct mhi_device *mhi_dev) +{ + struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev); + struct mhi_controller *cntrl = mhi_dev->mhi_cntrl; + + /* rtnetlink takes care of removing remaining links */ + wwan_unregister_ops(&cntrl->mhi_dev->dev); + + if (create_default_iface) + mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL); }
static const struct mhi_device_info mhi_hwip0 = { diff --combined drivers/net/usb/cdc_ncm.c index c67f11e0e9a7,df0d1837e4ed..24753a4da7e6 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@@ -192,8 -192,7 +192,8 @@@ static u32 cdc_ncm_check_tx_max(struct return val; }
-static ssize_t cdc_ncm_show_min_tx_pkt(struct device *d, struct device_attribute *attr, char *buf) +static ssize_t min_tx_pkt_show(struct device *d, + struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@@ -201,8 -200,7 +201,8 @@@ return sprintf(buf, "%u\n", ctx->min_tx_pkt); }
-static ssize_t cdc_ncm_show_rx_max(struct device *d, struct device_attribute *attr, char *buf) +static ssize_t rx_max_show(struct device *d, + struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@@ -210,8 -208,7 +210,8 @@@ return sprintf(buf, "%u\n", ctx->rx_max); }
-static ssize_t cdc_ncm_show_tx_max(struct device *d, struct device_attribute *attr, char *buf) +static ssize_t tx_max_show(struct device *d, + struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@@ -219,8 -216,7 +219,8 @@@ return sprintf(buf, "%u\n", ctx->tx_max); }
-static ssize_t cdc_ncm_show_tx_timer_usecs(struct device *d, struct device_attribute *attr, char *buf) +static ssize_t tx_timer_usecs_show(struct device *d, + struct device_attribute *attr, char *buf) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@@ -228,9 -224,7 +228,9 @@@ return sprintf(buf, "%u\n", ctx->timer_interval / (u32)NSEC_PER_USEC); }
-static ssize_t cdc_ncm_store_min_tx_pkt(struct device *d, struct device_attribute *attr, const char *buf, size_t len) +static ssize_t min_tx_pkt_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@@ -244,9 -238,7 +244,9 @@@ return len; }
-static ssize_t cdc_ncm_store_rx_max(struct device *d, struct device_attribute *attr, const char *buf, size_t len) +static ssize_t rx_max_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@@ -259,9 -251,7 +259,9 @@@ return len; }
-static ssize_t cdc_ncm_store_tx_max(struct device *d, struct device_attribute *attr, const char *buf, size_t len) +static ssize_t tx_max_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@@ -274,9 -264,7 +274,9 @@@ return len; }
-static ssize_t cdc_ncm_store_tx_timer_usecs(struct device *d, struct device_attribute *attr, const char *buf, size_t len) +static ssize_t tx_timer_usecs_store(struct device *d, + struct device_attribute *attr, + const char *buf, size_t len) { struct usbnet *dev = netdev_priv(to_net_dev(d)); struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; @@@ -297,10 -285,10 +297,10 @@@ return len; }
-static DEVICE_ATTR(min_tx_pkt, 0644, cdc_ncm_show_min_tx_pkt, cdc_ncm_store_min_tx_pkt); -static DEVICE_ATTR(rx_max, 0644, cdc_ncm_show_rx_max, cdc_ncm_store_rx_max); -static DEVICE_ATTR(tx_max, 0644, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max); -static DEVICE_ATTR(tx_timer_usecs, 0644, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs); +static DEVICE_ATTR_RW(min_tx_pkt); +static DEVICE_ATTR_RW(rx_max); +static DEVICE_ATTR_RW(tx_max); +static DEVICE_ATTR_RW(tx_timer_usecs);
static ssize_t ndp_to_end_show(struct device *d, struct device_attribute *attr, char *buf) { @@@ -640,7 -628,7 +640,7 @@@ out /* set MTU to max supported by the device if necessary */ dev->net->mtu = min_t(int, dev->net->mtu, ctx->max_datagram_size - cdc_ncm_eth_hlen(dev));
- /* do not exceed operater preferred MTU */ + /* do not exceed operator preferred MTU */ if (ctx->mbim_extended_desc) { mbim_mtu = le16_to_cpu(ctx->mbim_extended_desc->wMTU); if (mbim_mtu != 0 && mbim_mtu < dev->net->mtu) @@@ -697,7 -685,7 +697,7 @@@ static int cdc_ncm_setup(struct usbnet struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0]; u32 def_rx, def_tx;
- /* be conservative when selecting intial buffer size to + /* be conservative when selecting initial buffer size to * increase the number of hosts this will work for */ def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX, @@@ -1892,7 -1880,7 +1892,7 @@@ static void cdc_ncm_status(struct usbne static const struct driver_info cdc_ncm_info = { .description = "CDC NCM", .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET - | FLAG_LINK_INTR, + | FLAG_LINK_INTR | FLAG_ETHER, .bind = cdc_ncm_bind, .unbind = cdc_ncm_unbind, .manage_power = usbnet_manage_power, diff --combined drivers/net/usb/qmi_wwan.c index db157f21a322,bc55ec739af9..6a2e4f884b12 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@@ -575,7 -575,7 +575,7 @@@ static int qmi_wwan_rx_fixup(struct usb
if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) { skb->protocol = htons(ETH_P_MAP); - return (netif_rx(skb) == NET_RX_SUCCESS); + return 1; }
switch (skb->data[0] & 0xf0) { @@@ -710,8 -710,7 +710,8 @@@ static int qmi_wwan_register_subdriver(
/* register subdriver */ subdriver = usb_cdc_wdm_register(info->control, &dev->status->desc, - 4096, &qmi_wwan_cdc_wdm_manage_power); + 4096, WWAN_PORT_QMI, + &qmi_wwan_cdc_wdm_manage_power); if (IS_ERR(subdriver)) { dev_err(&info->control->dev, "subdriver registration failed\n"); rv = PTR_ERR(subdriver); diff --combined drivers/net/usb/r8152.c index 62cd48dc2878,e25bfb7021ed..1692d3b1b6e1 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@@ -931,8 -931,6 +931,8 @@@ struct r8152 u32 rx_pending; u32 fc_pause_on, fc_pause_off;
+ unsigned int pipe_in, pipe_out, pipe_intr, pipe_ctrl_in, pipe_ctrl_out; + u32 support_2500full:1; u32 lenovo_macpassthru:1; u32 dell_tb_rx_agg_bug:1; @@@ -1200,7 -1198,7 +1200,7 @@@ int get_registers(struct r8152 *tp, u1 if (!tmp) return -ENOMEM;
- ret = usb_control_msg(tp->udev, usb_rcvctrlpipe(tp->udev, 0), + ret = usb_control_msg(tp->udev, tp->pipe_ctrl_in, RTL8152_REQ_GET_REGS, RTL8152_REQT_READ, value, index, tmp, size, 500); if (ret < 0) @@@ -1223,7 -1221,7 +1223,7 @@@ int set_registers(struct r8152 *tp, u1 if (!tmp) return -ENOMEM;
- ret = usb_control_msg(tp->udev, usb_sndctrlpipe(tp->udev, 0), + ret = usb_control_msg(tp->udev, tp->pipe_ctrl_out, RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE, value, index, tmp, size, 500);
@@@ -2043,7 -2041,7 +2043,7 @@@ static int alloc_all_mem(struct r8152 * goto err1;
tp->intr_interval = (int)ep_intr->desc.bInterval; - usb_fill_int_urb(tp->intr_urb, tp->udev, usb_rcvintpipe(tp->udev, 3), + usb_fill_int_urb(tp->intr_urb, tp->udev, tp->pipe_intr, tp->intr_buff, INTBUFSIZE, intr_callback, tp, tp->intr_interval);
@@@ -2307,7 -2305,7 +2307,7 @@@ static int r8152_tx_agg_fill(struct r81 if (ret < 0) goto out_tx_fill;
- usb_fill_bulk_urb(agg->urb, tp->udev, usb_sndbulkpipe(tp->udev, 2), + usb_fill_bulk_urb(agg->urb, tp->udev, tp->pipe_out, agg->head, (int)(tx_data - (u8 *)agg->head), (usb_complete_t)write_bulk_callback, agg);
@@@ -2447,7 -2445,7 +2447,7 @@@ static int rx_bottom(struct r8152 *tp, unsigned int pkt_len, rx_frag_head_sz; struct sk_buff *skb;
- /* limite the skb numbers for rx_queue */ + /* limit the skb numbers for rx_queue */ if (unlikely(skb_queue_len(&tp->rx_queue) >= 1000)) break;
@@@ -2622,7 -2620,7 +2622,7 @@@ int r8152_submit_rx(struct r8152 *tp, s !test_bit(WORK_ENABLE, &tp->flags) || !netif_carrier_ok(tp->netdev)) return 0;
- usb_fill_bulk_urb(agg->urb, tp->udev, usb_rcvbulkpipe(tp->udev, 1), + usb_fill_bulk_urb(agg->urb, tp->udev, tp->pipe_in, agg->buffer, tp->rx_buf_sz, (usb_complete_t)read_bulk_callback, agg);
@@@ -8213,7 -8211,7 +8213,7 @@@ static int rtl8152_post_reset(struct us if (!tp) return 0;
- /* reset the MAC adddress in case of policy change */ + /* reset the MAC address in case of policy change */ if (determine_ethernet_addr(tp, &sa) >= 0) { rtnl_lock(); dev_set_mac_address (tp->netdev, &sa, NULL); @@@ -8680,7 -8678,7 +8680,7 @@@ static void rtl8152_get_strings(struct { switch (stringset) { case ETH_SS_STATS: - memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings)); + memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings)); break; } } @@@ -8969,79 -8967,6 +8969,79 @@@ static int rtl8152_set_ringparam(struc return 0; }
+static void rtl8152_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + struct r8152 *tp = netdev_priv(netdev); + u16 bmcr, lcladv, rmtadv; + u8 cap; + + if (usb_autopm_get_interface(tp->intf) < 0) + return; + + mutex_lock(&tp->control); + + bmcr = r8152_mdio_read(tp, MII_BMCR); + lcladv = r8152_mdio_read(tp, MII_ADVERTISE); + rmtadv = r8152_mdio_read(tp, MII_LPA); + + mutex_unlock(&tp->control); + + usb_autopm_put_interface(tp->intf); + + if (!(bmcr & BMCR_ANENABLE)) { + pause->autoneg = 0; + pause->rx_pause = 0; + pause->tx_pause = 0; + return; + } + + pause->autoneg = 1; + + cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv); + + if (cap & FLOW_CTRL_RX) + pause->rx_pause = 1; + + if (cap & FLOW_CTRL_TX) + pause->tx_pause = 1; +} + +static int rtl8152_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + struct r8152 *tp = netdev_priv(netdev); + u16 old, new1; + u8 cap = 0; + int ret; + + ret = usb_autopm_get_interface(tp->intf); + if (ret < 0) + return ret; + + mutex_lock(&tp->control); + + if (pause->autoneg && !(r8152_mdio_read(tp, MII_BMCR) & BMCR_ANENABLE)) { + ret = -EINVAL; + goto out; + } + + if (pause->rx_pause) + cap |= FLOW_CTRL_RX; + + if (pause->tx_pause) + cap |= FLOW_CTRL_TX; + + old = r8152_mdio_read(tp, MII_ADVERTISE); + new1 = (old & ~(ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM)) | mii_advertise_flowctrl(cap); + if (old != new1) + r8152_mdio_write(tp, MII_ADVERTISE, new1); + +out: + mutex_unlock(&tp->control); + usb_autopm_put_interface(tp->intf); + + return ret; +} + static const struct ethtool_ops ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = rtl8152_get_drvinfo, @@@ -9064,8 -8989,6 +9064,8 @@@ .set_tunable = rtl8152_set_tunable, .get_ringparam = rtl8152_get_ringparam, .set_ringparam = rtl8152_set_ringparam, + .get_pauseparam = rtl8152_get_pauseparam, + .set_pauseparam = rtl8152_set_pauseparam, };
static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) @@@ -9509,12 -9432,6 +9509,12 @@@ static int rtl8152_probe(struct usb_int tp->intf = intf; tp->version = version;
+ tp->pipe_ctrl_in = usb_rcvctrlpipe(udev, 0); + tp->pipe_ctrl_out = usb_sndctrlpipe(udev, 0); + tp->pipe_in = usb_rcvbulkpipe(udev, 1); + tp->pipe_out = usb_sndbulkpipe(udev, 2); + tp->pipe_intr = usb_rcvintpipe(udev, 3); + switch (version) { case RTL_VER_01: case RTL_VER_02: diff --combined drivers/net/vrf.c index 07eaef5e73c2,28a6c4cfe9b8..452822f88214 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@@ -274,7 -274,7 +274,7 @@@ vrf_map_register_dev(struct net_device int res;
/* we pre-allocate elements used in the spin-locked section (so that we - * keep the spinlock as short as possibile). + * keep the spinlock as short as possible). */ new_me = vrf_map_elem_alloc(GFP_KERNEL); if (!new_me) @@@ -1183,9 -1183,6 +1183,6 @@@ static int vrf_dev_init(struct net_devi
dev->flags = IFF_MASTER | IFF_NOARP;
- /* MTU is irrelevant for VRF device; set to 64k similar to lo */ - dev->mtu = 64 * 1024; - /* similarly, oper state is irrelevant; set to up to avoid confusion */ dev->operstate = IF_OPER_UP; netdev_lockdep_set_classes(dev); @@@ -1685,7 -1682,8 +1682,8 @@@ static void vrf_setup(struct net_devic * which breaks networking. */ dev->min_mtu = IPV6_MIN_MTU; - dev->max_mtu = ETH_MAX_MTU; + dev->max_mtu = IP6_MAX_MTU; + dev->mtu = dev->max_mtu; }
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], diff --combined drivers/ptp/ptp_clock.c index a780435331c8,21c4c34c52d8..841d8900504d --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@@ -63,6 -63,27 +63,6 @@@ static void enqueue_external_timestamp( spin_unlock_irqrestore(&queue->lock, flags); }
-long scaled_ppm_to_ppb(long ppm) -{ - /* - * The 'freq' field in the 'struct timex' is in parts per - * million, but with a 16 bit binary fractional field. - * - * We want to calculate - * - * ppb = scaled_ppm * 1000 / 2^16 - * - * which simplifies to - * - * ppb = scaled_ppm * 125 / 2^13 - */ - s64 ppb = 1 + ppm; - ppb *= 125; - ppb >>= 13; - return (long) ppb; -} -EXPORT_SYMBOL(scaled_ppm_to_ppb); - /* posix clock implementation */
static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp) @@@ -117,7 -138,7 +117,7 @@@ static int ptp_clock_adjtime(struct pos delta = ktime_to_ns(kt); err = ops->adjtime(ops, delta); } else if (tx->modes & ADJ_FREQUENCY) { - s32 ppb = scaled_ppm_to_ppb(tx->freq); + long ppb = scaled_ppm_to_ppb(tx->freq); if (ppb > ops->max_adj || ppb < -ops->max_adj) return -ERANGE; if (ops->adjfine) diff --combined include/linux/device.h index a1e7cab2c7bf,f1a00040fa53..8f0ec3081a24 --- a/include/linux/device.h +++ b/include/linux/device.h @@@ -570,7 -570,7 +570,7 @@@ struct device * @flags: Link flags. * @rpm_active: Whether or not the consumer device is runtime-PM-active. * @kref: Count repeated addition of the same link. - * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks. + * @rm_work: Work structure used for removing the link. * @supplier_preactivated: Supplier has been made active before consumer probe. */ struct device_link { @@@ -583,9 -583,7 +583,7 @@@ u32 flags; refcount_t rpm_active; struct kref kref; - #ifdef CONFIG_SRCU - struct rcu_head rcu_head; - #endif + struct work_struct rm_work; bool supplier_preactivated; /* Owned by consumer probe. */ };
@@@ -819,7 -817,6 +817,7 @@@ int device_online(struct device *dev) void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode); void device_set_of_node_from_dev(struct device *dev, const struct device *dev2); +void device_set_node(struct device *dev, struct fwnode_handle *fwnode);
static inline int dev_num_vf(struct device *dev) { diff --combined include/linux/mlx5/driver.h index f90f84061438,f8902bcd91e2..1efe37466969 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@@ -542,6 -542,10 +542,10 @@@ struct mlx5_core_roce enum { MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0, MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1, + /* Set during device detach to block any further devices + * creation/deletion on drivers rescan. Unset during device attach. + */ + MLX5_PRIV_FLAGS_DETACH = 1 << 2, };
struct mlx5_adev { @@@ -550,7 -554,6 +554,7 @@@ int idx; };
+struct mlx5_ft_pool; struct mlx5_priv { /* IRQ table valid only for real pci devices PF or VF */ struct mlx5_irq_table *irq_table; @@@ -603,7 -606,6 +607,7 @@@ struct mlx5_core_roce roce; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; + struct mlx5_ft_pool *ft_pool;
struct mlx5_bfreg_data bfregs; struct mlx5_uars_page *uar; diff --combined include/linux/mm.h index a0434e8c2617,8ae31622deef..6cf4c6842ff0 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@@ -1668,11 -1668,10 +1668,11 @@@ struct address_space *page_mapping(stru static inline bool page_is_pfmemalloc(const struct page *page) { /* - * Page index cannot be this large so this must be - * a pfmemalloc page. + * lru.next has bit 1 set if the page is allocated from the + * pfmemalloc reserves. Callers may simply overwrite it if + * they do not need to preserve that information. */ - return page->index == -1UL; + return (uintptr_t)page->lru.next & BIT(1); }
/* @@@ -1681,12 -1680,12 +1681,12 @@@ */ static inline void set_page_pfmemalloc(struct page *page) { - page->index = -1UL; + page->lru.next = (void *)BIT(1); }
static inline void clear_page_pfmemalloc(struct page *page) { - page->index = 0; + page->lru.next = NULL; }
/* @@@ -1720,6 -1719,7 +1720,7 @@@ struct zap_details struct address_space *check_mapping; /* Check page->mapping if set */ pgoff_t first_index; /* Lowest page->index to unmap */ pgoff_t last_index; /* Highest page->index to unmap */ + struct page *single_page; /* Locked page to be unmapped */ };
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, @@@ -1767,6 -1767,7 +1768,7 @@@ extern vm_fault_t handle_mm_fault(struc extern int fixup_user_fault(struct mm_struct *mm, unsigned long address, unsigned int fault_flags, bool *unlocked); + void unmap_mapping_page(struct page *page); void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows); void unmap_mapping_range(struct address_space *mapping, @@@ -1787,6 -1788,7 +1789,7 @@@ static inline int fixup_user_fault(stru BUG(); return -EFAULT; } + static inline void unmap_mapping_page(struct page *page) { } static inline void unmap_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t nr, bool even_cows) { } static inline void unmap_mapping_range(struct address_space *mapping, diff --combined include/linux/mm_types.h index ed6862eacb52,8f0fb62e8975..862f88a8c28a --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@@ -96,13 -96,6 +96,13 @@@ struct page unsigned long private; }; struct { /* page_pool used by netstack */ + /** + * @pp_magic: magic value to avoid recycling non + * page_pool allocated pages. + */ + unsigned long pp_magic; + struct page_pool *pp; + unsigned long _pp_mapping_pad; /** * @dma_addr: might require a 64-bit value on * 32-bit architectures. @@@ -452,13 -445,6 +452,6 @@@ struct mm_struct */ atomic_t has_pinned;
- /** - * @write_protect_seq: Locked when any thread is write - * protecting pages mapped by this mm to enforce a later COW, - * for instance during page table copying for fork(). - */ - seqcount_t write_protect_seq; - #ifdef CONFIG_MMU atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif @@@ -467,6 -453,18 +460,18 @@@ spinlock_t page_table_lock; /* Protects page tables and some * counters */ + /* + * With some kernel config, the current mmap_lock's offset + * inside 'mm_struct' is at 0x120, which is very optimal, as + * its two hot fields 'count' and 'owner' sit in 2 different + * cachelines, and when mmap_lock is highly contended, both + * of the 2 fields will be accessed frequently, current layout + * will help to reduce cache bouncing. + * + * So please be careful with adding new fields before + * mmap_lock, which can easily push the 2 fields into one + * cacheline. + */ struct rw_semaphore mmap_lock;
struct list_head mmlist; /* List of maybe swapped mm's. These @@@ -487,7 -485,15 +492,15 @@@ unsigned long stack_vm; /* VM_STACK */ unsigned long def_flags;
+ /** + * @write_protect_seq: Locked when any thread is write + * protecting pages mapped by this mm to enforce a later COW, + * for instance during page table copying for fork(). + */ + seqcount_t write_protect_seq; + spinlock_t arg_lock; /* protect the below fields */ + unsigned long start_code, end_code, start_data, end_data; unsigned long start_brk, brk, start_stack; unsigned long arg_start, arg_end, env_start, env_end; diff --combined include/linux/ptp_clock_kernel.h index a311bddd9e85,51d7f1b8b32a..aba237c0b3a2 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@@ -186,32 -186,6 +186,32 @@@ struct ptp_clock_event }; };
+/** + * scaled_ppm_to_ppb() - convert scaled ppm to ppb + * + * @ppm: Parts per million, but with a 16 bit binary fractional field + */ - static inline s32 scaled_ppm_to_ppb(long ppm) ++static inline long scaled_ppm_to_ppb(long ppm) +{ + /* + * The 'freq' field in the 'struct timex' is in parts per + * million, but with a 16 bit binary fractional field. + * + * We want to calculate + * + * ppb = scaled_ppm * 1000 / 2^16 + * + * which simplifies to + * + * ppb = scaled_ppm * 125 / 2^13 + */ + s64 ppb = 1 + ppm; + + ppb *= 125; + ppb >>= 13; - return (s32)ppb; ++ return (long)ppb; +} + #if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
/** @@@ -255,6 -229,14 +255,6 @@@ extern void ptp_clock_event(struct ptp_
extern int ptp_clock_index(struct ptp_clock *ptp);
-/** - * scaled_ppm_to_ppb() - convert scaled ppm to ppb - * - * @ppm: Parts per million, but with a 16 bit binary fractional field - */ - -extern long scaled_ppm_to_ppb(long ppm); - /** * ptp_find_pin() - obtain the pin index of a given auxiliary function * diff --combined include/net/net_namespace.h index befc5b93f311,bdc0459a595e..12cf6d7ea62c --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@@ -32,7 -32,6 +32,7 @@@ #include <net/netns/mpls.h> #include <net/netns/can.h> #include <net/netns/xdp.h> +#include <net/netns/smc.h> #include <net/netns/bpf.h> #include <linux/ns_common.h> #include <linux/idr.h> @@@ -171,9 -170,6 +171,9 @@@ struct net struct sock *crypto_nlsk; #endif struct sock *diag_nlsk; +#if IS_ENABLED(CONFIG_SMC) + struct netns_smc smc; +#endif } __randomize_layout;
#include <linux/seq_file_net.h> @@@ -188,6 -184,9 +188,9 @@@ struct net *copy_net_ns(unsigned long f void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
void net_ns_barrier(void); + + struct ns_common *get_net_ns(struct ns_common *ns); + struct net *get_net_ns_by_fd(int fd); #else /* CONFIG_NET_NS */ #include <linux/sched.h> #include <linux/nsproxy.h> @@@ -207,13 -206,22 +210,22 @@@ static inline void net_ns_get_ownership }
static inline void net_ns_barrier(void) {} + + static inline struct ns_common *get_net_ns(struct ns_common *ns) + { + return ERR_PTR(-EINVAL); + } + + static inline struct net *get_net_ns_by_fd(int fd) + { + return ERR_PTR(-EINVAL); + } #endif /* CONFIG_NET_NS */
extern struct list_head net_namespace_list;
struct net *get_net_ns_by_pid(pid_t pid); - struct net *get_net_ns_by_fd(int fd);
#ifdef CONFIG_SYSCTL void ipx_register_sysctl(void); diff --combined include/net/sock.h index 9b341c2c924f,7a7058f4f265..ced2fc965ec7 --- a/include/net/sock.h +++ b/include/net/sock.h @@@ -1934,7 -1934,8 +1934,8 @@@ static inline u32 net_tx_rndhash(void
static inline void sk_set_txhash(struct sock *sk) { - sk->sk_txhash = net_tx_rndhash(); + /* This pairs with READ_ONCE() in skb_set_hash_from_sk() */ + WRITE_ONCE(sk->sk_txhash, net_tx_rndhash()); }
static inline bool sk_rethink_txhash(struct sock *sk) @@@ -2206,9 -2207,12 +2207,12 @@@ static inline void sock_poll_wait(struc
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk) { - if (sk->sk_txhash) { + /* This pairs with WRITE_ONCE() in sk_set_txhash() */ + u32 txhash = READ_ONCE(sk->sk_txhash); + + if (txhash) { skb->l4_hash = 1; - skb->hash = sk->sk_txhash; + skb->hash = txhash; } }
@@@ -2266,8 -2270,13 +2270,13 @@@ struct sk_buff *sock_dequeue_err_skb(st static inline int sock_error(struct sock *sk) { int err; - if (likely(!sk->sk_err)) + + /* Avoid an atomic operation for the common case. + * This is racy since another cpu/thread can change sk_err under us. + */ + if (likely(data_race(!sk->sk_err))) return 0; + err = xchg(&sk->sk_err, 0); return -err; } @@@ -2743,9 -2752,6 +2752,9 @@@ static inline bool sk_dev_equal_l3scope void sock_def_readable(struct sock *sk);
int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk); +void sock_set_timestamp(struct sock *sk, int optname, bool valbool); +int sock_set_timestamping(struct sock *sk, int optname, int val); + void sock_enable_timestamps(struct sock *sk); void sock_no_linger(struct sock *sk); void sock_set_keepalive(struct sock *sk); diff --combined kernel/bpf/verifier.c index b7d51fc937c7,c6a27574242d..e04e33893cff --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@@ -47,7 -47,7 +47,7 @@@ static const struct bpf_verifier_ops * * - unreachable insns exist (shouldn't be a forest. program = one function) * - out of bounds or malformed jumps * The second pass is all possible path descent from the 1st insn. - * Since it's analyzing all pathes through the program, the length of the + * Since it's analyzing all paths through the program, the length of the * analysis is limited to 64k insn, which may be hit even if total number of * insn is less then 4K, but there are too many branches that change stack/regs. * Number of 'branches to be analyzed' is limited to 1k @@@ -132,7 -132,7 +132,7 @@@ * If it's ok, then verifier allows this BPF_CALL insn and looks at * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function - * returns ether pointer to map value or NULL. + * returns either pointer to map value or NULL. * * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off' * insn, the register holding that pointer in the true branch changes state to @@@ -737,104 -737,81 +737,104 @@@ static void print_verifier_state(struc verbose(env, "\n"); }
-#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ -static int copy_##NAME##_state(struct bpf_func_state *dst, \ - const struct bpf_func_state *src) \ -{ \ - if (!src->FIELD) \ - return 0; \ - if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ - /* internal bug, make state invalid to reject the program */ \ - memset(dst, 0, sizeof(*dst)); \ - return -EFAULT; \ - } \ - memcpy(dst->FIELD, src->FIELD, \ - sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ - return 0; \ -} -/* copy_reference_state() */ -COPY_STATE_FN(reference, acquired_refs, refs, 1) -/* copy_stack_state() */ -COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) -#undef COPY_STATE_FN - -#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ -static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ - bool copy_old) \ -{ \ - u32 old_size = state->COUNT; \ - struct bpf_##NAME##_state *new_##FIELD; \ - int slot = size / SIZE; \ - \ - if (size <= old_size || !size) { \ - if (copy_old) \ - return 0; \ - state->COUNT = slot * SIZE; \ - if (!size && old_size) { \ - kfree(state->FIELD); \ - state->FIELD = NULL; \ - } \ - return 0; \ - } \ - new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ - GFP_KERNEL); \ - if (!new_##FIELD) \ - return -ENOMEM; \ - if (copy_old) { \ - if (state->FIELD) \ - memcpy(new_##FIELD, state->FIELD, \ - sizeof(*new_##FIELD) * (old_size / SIZE)); \ - memset(new_##FIELD + old_size / SIZE, 0, \ - sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ - } \ - state->COUNT = slot * SIZE; \ - kfree(state->FIELD); \ - state->FIELD = new_##FIELD; \ - return 0; \ -} -/* realloc_reference_state() */ -REALLOC_STATE_FN(reference, acquired_refs, refs, 1) -/* realloc_stack_state() */ -REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) -#undef REALLOC_STATE_FN - -/* do_check() starts with zero-sized stack in struct bpf_verifier_state to - * make it consume minimal amount of memory. check_stack_write() access from - * the program calls into realloc_func_state() to grow the stack size. - * Note there is a non-zero 'parent' pointer inside bpf_verifier_state - * which realloc_stack_state() copies over. It points to previous - * bpf_verifier_state which is never reallocated. +/* copy array src of length n * size bytes to dst. dst is reallocated if it's too + * small to hold src. This is different from krealloc since we don't want to preserve + * the contents of dst. + * + * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could + * not be allocated. */ -static int realloc_func_state(struct bpf_func_state *state, int stack_size, - int refs_size, bool copy_old) +static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) { - int err = realloc_reference_state(state, refs_size, copy_old); - if (err) - return err; - return realloc_stack_state(state, stack_size, copy_old); + size_t bytes; + + if (ZERO_OR_NULL_PTR(src)) + goto out; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + if (ksize(dst) < bytes) { + kfree(dst); + dst = kmalloc_track_caller(bytes, flags); + if (!dst) + return NULL; + } + + memcpy(dst, src, bytes); +out: + return dst ? dst : ZERO_SIZE_PTR; +} + +/* resize an array from old_n items to new_n items. the array is reallocated if it's too + * small to hold new_n items. new items are zeroed out if the array grows. + * + * Contrary to krealloc_array, does not free arr if new_n is zero. + */ +static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) +{ + if (!new_n || old_n == new_n) + goto out; + + arr = krealloc_array(arr, new_n, size, GFP_KERNEL); + if (!arr) + return NULL; + + if (new_n > old_n) + memset(arr + old_n * size, 0, (new_n - old_n) * size); + +out: + return arr ? arr : ZERO_SIZE_PTR; +} + +static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +{ + dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, + sizeof(struct bpf_reference_state), GFP_KERNEL); + if (!dst->refs) + return -ENOMEM; + + dst->acquired_refs = src->acquired_refs; + return 0; +} + +static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +{ + size_t n = src->allocated_stack / BPF_REG_SIZE; + + dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state), + GFP_KERNEL); + if (!dst->stack) + return -ENOMEM; + + dst->allocated_stack = src->allocated_stack; + return 0; +} + +static int resize_reference_state(struct bpf_func_state *state, size_t n) +{ + state->refs = realloc_array(state->refs, state->acquired_refs, n, + sizeof(struct bpf_reference_state)); + if (!state->refs) + return -ENOMEM; + + state->acquired_refs = n; + return 0; +} + +static int grow_stack_state(struct bpf_func_state *state, int size) +{ + size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE; + + if (old_n >= n) + return 0; + + state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state)); + if (!state->stack) + return -ENOMEM; + + state->allocated_stack = size; + return 0; }
/* Acquire a pointer id from the env and update the state->refs to include @@@ -848,7 -825,7 +848,7 @@@ static int acquire_reference_state(stru int new_ofs = state->acquired_refs; int id, err;
- err = realloc_reference_state(state, state->acquired_refs + 1, true); + err = resize_reference_state(state, state->acquired_refs + 1); if (err) return err; id = ++env->id_gen; @@@ -877,6 -854,18 +877,6 @@@ static int release_reference_state(stru return -EINVAL; }
-static int transfer_reference_state(struct bpf_func_state *dst, - struct bpf_func_state *src) -{ - int err = realloc_reference_state(dst, src->acquired_refs, false); - if (err) - return err; - err = copy_reference_state(dst, src); - if (err) - return err; - return 0; -} - static void free_func_state(struct bpf_func_state *state) { if (!state) @@@ -915,6 -904,10 +915,6 @@@ static int copy_func_state(struct bpf_f { int err;
- err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, - false); - if (err) - return err; memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); err = copy_reference_state(dst, src); if (err) @@@ -926,13 -919,16 +926,13 @@@ static int copy_verifier_state(struct b const struct bpf_verifier_state *src) { struct bpf_func_state *dst; - u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; int i, err;
- if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { - kfree(dst_state->jmp_history); - dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); - if (!dst_state->jmp_history) - return -ENOMEM; - } - memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); + dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, + src->jmp_history_cnt, sizeof(struct bpf_idx_pair), + GFP_USER); + if (!dst_state->jmp_history) + return -ENOMEM; dst_state->jmp_history_cnt = src->jmp_history_cnt;
/* if dst has more stack frames then src frame, free them */ @@@ -2594,7 -2590,8 +2594,7 @@@ static int check_stack_write_fixed_off( u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg; struct bpf_reg_state *reg = NULL;
- err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), - state->acquired_refs, true); + err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE)); if (err) return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@@ -2616,7 -2613,7 +2616,7 @@@ if (dst_reg != BPF_REG_FP) { /* The backtracking logic can only recognize explicit * stack slot address like [fp - 8]. Other spill of - * scalar via different register has to be conervative. + * scalar via different register has to be conservative. * Backtrack from here and mark all registers as precise * that contributed into 'reg' being a constant. */ @@@ -2756,7 -2753,8 +2756,7 @@@ static int check_stack_write_var_off(st if (value_reg && register_is_null(value_reg)) writing_zero = true;
- err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), - state->acquired_refs, true); + err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE)); if (err) return err;
@@@ -5631,7 -5629,7 +5631,7 @@@ static int __check_func_call(struct bpf subprog /* subprog number within this prog */);
/* Transfer references to the callee */ - err = transfer_reference_state(callee, caller); + err = copy_reference_state(callee, caller); if (err) return err;
@@@ -5782,7 -5780,7 +5782,7 @@@ static int prepare_func_exit(struct bpf }
/* Transfer references to the caller */ - err = transfer_reference_state(caller, callee); + err = copy_reference_state(caller, callee); if (err) return err;
@@@ -6485,6 -6483,27 +6485,27 @@@ struct bpf_sanitize_info bool mask_to_left; };
+ static struct bpf_verifier_state * + sanitize_speculative_path(struct bpf_verifier_env *env, + const struct bpf_insn *insn, + u32 next_idx, u32 curr_idx) + { + struct bpf_verifier_state *branch; + struct bpf_reg_state *regs; + + branch = push_stack(env, next_idx, curr_idx, true); + if (branch && insn) { + regs = branch->frame[branch->curframe]->regs; + if (BPF_SRC(insn->code) == BPF_K) { + mark_reg_unknown(env, regs, insn->dst_reg); + } else if (BPF_SRC(insn->code) == BPF_X) { + mark_reg_unknown(env, regs, insn->dst_reg); + mark_reg_unknown(env, regs, insn->src_reg); + } + } + return branch; + } + static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, @@@ -6568,12 -6587,26 +6589,26 @@@ do_sim tmp = *dst_reg; *dst_reg = *ptr_reg; } - ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); + ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, + env->insn_idx); if (!ptr_is_dst_reg && ret) *dst_reg = tmp; return !ret ? REASON_STACK : 0; }
+ static void sanitize_mark_insn_seen(struct bpf_verifier_env *env) + { + struct bpf_verifier_state *vstate = env->cur_state; + + /* If we simulate paths under speculation, we don't update the + * insn as 'seen' such that when we verify unreachable paths in + * the non-speculative domain, sanitize_dead_code() can still + * rewrite/sanitize them. + */ + if (!vstate->speculative) + env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; + } + static int sanitize_err(struct bpf_verifier_env *env, const struct bpf_insn *insn, int reason, const struct bpf_reg_state *off_reg, @@@ -8752,14 -8785,28 +8787,28 @@@ static int check_cond_jmp_op(struct bpf if (err) return err; } + if (pred == 1) { - /* only follow the goto, ignore fall-through */ + /* Only follow the goto, ignore fall-through. If needed, push + * the fall-through branch for simulation under speculative + * execution. + */ + if (!env->bypass_spec_v1 && + !sanitize_speculative_path(env, insn, *insn_idx + 1, + *insn_idx)) + return -EFAULT; *insn_idx += insn->off; return 0; } else if (pred == 0) { - /* only follow fall-through branch, since - * that's where the program will go + /* Only follow the fall-through branch, since that's where the + * program will go. If needed, push the goto branch for + * simulation under speculative execution. */ + if (!env->bypass_spec_v1 && + !sanitize_speculative_path(env, insn, + *insn_idx + insn->off + 1, + *insn_idx)) + return -EFAULT; return 0; }
@@@ -8921,14 -8968,12 +8970,14 @@@ static int check_ld_imm(struct bpf_veri mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map;
- if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { + if (insn->src_reg == BPF_PSEUDO_MAP_VALUE || + insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) { dst_reg->type = PTR_TO_MAP_VALUE; dst_reg->off = aux->map_off; if (map_value_has_spin_lock(map)) dst_reg->id = ++env->id_gen; - } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { + } else if (insn->src_reg == BPF_PSEUDO_MAP_FD || + insn->src_reg == BPF_PSEUDO_MAP_IDX) { dst_reg->type = CONST_PTR_TO_MAP; } else { verbose(env, "bpf verifier is misconfigured\n"); @@@ -9059,7 -9104,7 +9108,7 @@@ static int check_return_code(struct bpf !prog->aux->attach_func_proto->type) return 0;
- /* eBPF calling convetion is such that R0 is used + /* eBPF calling convention is such that R0 is used * to return the value from eBPF program. * Make sure that it's readable at this time * of bpf_exit, which means that program wrote @@@ -9444,7 -9489,7 +9493,7 @@@ static int check_abnormal_return(struc
static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, - union bpf_attr __user *uattr) + bpfptr_t uattr) { const struct btf_type *type, *func_proto, *ret_type; u32 i, nfuncs, urec_size, min_size; @@@ -9453,7 -9498,7 +9502,7 @@@ struct bpf_func_info_aux *info_aux = NULL; struct bpf_prog *prog; const struct btf *btf; - void __user *urecord; + bpfptr_t urecord; u32 prev_offset = 0; bool scalar_return; int ret = -ENOMEM; @@@ -9481,7 -9526,7 +9530,7 @@@ prog = env->prog; btf = prog->aux->btf;
- urecord = u64_to_user_ptr(attr->func_info); + urecord = make_bpfptr(attr->func_info, uattr.is_kernel); min_size = min_t(u32, krec_size, urec_size);
krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); @@@ -9499,15 -9544,13 +9548,15 @@@ /* set the size kernel expects so loader can zero * out the rest of the record. */ - if (put_user(min_size, &uattr->func_info_rec_size)) + if (copy_to_bpfptr_offset(uattr, + offsetof(union bpf_attr, func_info_rec_size), + &min_size, sizeof(min_size))) ret = -EFAULT; } goto err_free; }
- if (copy_from_user(&krecord[i], urecord, min_size)) { + if (copy_from_bpfptr(&krecord[i], urecord, min_size)) { ret = -EFAULT; goto err_free; } @@@ -9559,7 -9602,7 +9608,7 @@@ }
prev_offset = krecord[i].insn_off; - urecord += urec_size; + bpfptr_add(&urecord, urec_size); }
prog->aux->func_info = krecord; @@@ -9591,14 -9634,14 +9640,14 @@@ static void adjust_btf_func(struct bpf_
static int check_btf_line(struct bpf_verifier_env *env, const union bpf_attr *attr, - union bpf_attr __user *uattr) + bpfptr_t uattr) { u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0; struct bpf_subprog_info *sub; struct bpf_line_info *linfo; struct bpf_prog *prog; const struct btf *btf; - void __user *ulinfo; + bpfptr_t ulinfo; int err;
nr_linfo = attr->line_info_cnt; @@@ -9624,7 -9667,7 +9673,7 @@@
s = 0; sub = env->subprog_info; - ulinfo = u64_to_user_ptr(attr->line_info); + ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel); expected_size = sizeof(struct bpf_line_info); ncopy = min_t(u32, expected_size, rec_size); for (i = 0; i < nr_linfo; i++) { @@@ -9632,15 -9675,14 +9681,15 @@@ if (err) { if (err == -E2BIG) { verbose(env, "nonzero tailing record in line_info"); - if (put_user(expected_size, - &uattr->line_info_rec_size)) + if (copy_to_bpfptr_offset(uattr, + offsetof(union bpf_attr, line_info_rec_size), + &expected_size, sizeof(expected_size))) err = -EFAULT; } goto err_free; }
- if (copy_from_user(&linfo[i], ulinfo, ncopy)) { + if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) { err = -EFAULT; goto err_free; } @@@ -9692,7 -9734,7 +9741,7 @@@ }
prev_offset = linfo[i].insn_off; - ulinfo += rec_size; + bpfptr_add(&ulinfo, rec_size); }
if (s != env->subprog_cnt) { @@@ -9714,7 -9756,7 +9763,7 @@@ err_free
static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, - union bpf_attr __user *uattr) + bpfptr_t uattr) { struct btf *btf; int err; @@@ -9759,6 -9801,13 +9808,6 @@@ static bool range_within(struct bpf_reg old->s32_max_value >= cur->s32_max_value; }
-/* Maximum number of register states that can exist at once */ -#define ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) -struct idpair { - u32 old; - u32 cur; -}; - /* If in the old state two registers had the same id, then they need to have * the same id in the new state as well. But that id could be different from * the old state, so we need to track the mapping from old to new ids. @@@ -9769,11 -9818,11 +9818,11 @@@ * So we look through our idmap to see if this old id has been seen before. If * so, we require the new id to match; otherwise, we add the id pair to the map. */ -static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap) +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap) { unsigned int i;
- for (i = 0; i < ID_MAP_SIZE; i++) { + for (i = 0; i < BPF_ID_MAP_SIZE; i++) { if (!idmap[i].old) { /* Reached an empty slot; haven't seen this id before */ idmap[i].old = old_id; @@@ -9850,7 -9899,7 +9899,7 @@@ static void clean_verifier_state(struc * Since the verifier pushes the branch states as it sees them while exploring * the program the condition of walking the branch instruction for the second * time means that all states below this branch were already explored and - * their final liveness markes are already propagated. + * their final liveness marks are already propagated. * Hence when the verifier completes the search of state list in is_state_visited() * we can call this clean_live_states() function to mark all liveness states * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state' @@@ -9886,7 -9935,7 +9935,7 @@@ next
/* Returns true if (rold safe implies rcur safe) */ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, - struct idpair *idmap) + struct bpf_id_pair *idmap) { bool equal;
@@@ -10004,7 -10053,7 +10053,7 @@@
static bool stacksafe(struct bpf_func_state *old, struct bpf_func_state *cur, - struct idpair *idmap) + struct bpf_id_pair *idmap) { int i, spi;
@@@ -10101,23 -10150,32 +10150,23 @@@ static bool refsafe(struct bpf_func_sta * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool func_states_equal(struct bpf_func_state *old, +static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur) { - struct idpair *idmap; - bool ret = false; int i;
- idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); - /* If we failed to allocate the idmap, just say it's not safe */ - if (!idmap) - return false; - - for (i = 0; i < MAX_BPF_REG; i++) { - if (!regsafe(&old->regs[i], &cur->regs[i], idmap)) - goto out_free; - } + memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch)); + for (i = 0; i < MAX_BPF_REG; i++) + if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch)) + return false;
- if (!stacksafe(old, cur, idmap)) - goto out_free; + if (!stacksafe(old, cur, env->idmap_scratch)) + return false;
if (!refsafe(old, cur)) - goto out_free; - ret = true; -out_free: - kfree(idmap); - return ret; + return false; + + return true; }
static bool states_equal(struct bpf_verifier_env *env, @@@ -10144,7 -10202,7 +10193,7 @@@ for (i = 0; i <= old->curframe; i++) { if (old->frame[i]->callsite != cur->frame[i]->callsite) return false; - if (!func_states_equal(old->frame[i], cur->frame[i])) + if (!func_states_equal(env, old->frame[i], cur->frame[i])) return false; } return true; @@@ -10621,7 -10679,7 +10670,7 @@@ static int do_check(struct bpf_verifier }
regs = cur_regs(env); - env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; + sanitize_mark_insn_seen(env); prev_insn_idx = env->insn_idx;
if (class == BPF_ALU || class == BPF_ALU64) { @@@ -10848,7 -10906,7 +10897,7 @@@ process_bpf_exit return err;
env->insn_idx++; - env->insn_aux_data[env->insn_idx].seen = env->pass_cnt; + sanitize_mark_insn_seen(env); } else { verbose(env, "invalid BPF_LD mode\n"); return -EINVAL; @@@ -11181,7 -11239,6 +11230,7 @@@ static int resolve_pseudo_ldimm64(struc struct bpf_map *map; struct fd f; u64 addr; + u32 fd;
if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 || @@@ -11211,38 -11268,16 +11260,38 @@@ /* In final convert_pseudo_ld_imm64() step, this is * converted into regular 64-bit imm load insn. */ - if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && - insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || - (insn[0].src_reg == BPF_PSEUDO_MAP_FD && - insn[1].imm != 0)) { - verbose(env, - "unrecognized bpf_ld_imm64 insn\n"); + switch (insn[0].src_reg) { + case BPF_PSEUDO_MAP_VALUE: + case BPF_PSEUDO_MAP_IDX_VALUE: + break; + case BPF_PSEUDO_MAP_FD: + case BPF_PSEUDO_MAP_IDX: + if (insn[1].imm == 0) + break; + fallthrough; + default: + verbose(env, "unrecognized bpf_ld_imm64 insn\n"); return -EINVAL; }
- f = fdget(insn[0].imm); + switch (insn[0].src_reg) { + case BPF_PSEUDO_MAP_IDX_VALUE: + case BPF_PSEUDO_MAP_IDX: + if (bpfptr_is_null(env->fd_array)) { + verbose(env, "fd_idx without fd_array is invalid\n"); + return -EPROTO; + } + if (copy_from_bpfptr_offset(&fd, env->fd_array, + insn[0].imm * sizeof(fd), + sizeof(fd))) + return -EFAULT; + break; + default: + fd = insn[0].imm; + break; + } + + f = fdget(fd); map = __bpf_map_get(f); if (IS_ERR(map)) { verbose(env, "fd %d is not pointing to valid bpf_map\n", @@@ -11257,8 -11292,7 +11306,8 @@@ }
aux = &env->insn_aux_data[i]; - if (insn->src_reg == BPF_PSEUDO_MAP_FD) { + if (insn[0].src_reg == BPF_PSEUDO_MAP_FD || + insn[0].src_reg == BPF_PSEUDO_MAP_IDX) { addr = (unsigned long)map; } else { u32 off = insn[1].imm; @@@ -11381,6 -11415,7 +11430,7 @@@ static int adjust_insn_aux_data(struct { struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data; struct bpf_insn *insn = new_prog->insnsi; + u32 old_seen = old_data[off].seen; u32 prog_len; int i;
@@@ -11401,7 -11436,8 +11451,8 @@@ memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1)); for (i = off; i < off + cnt - 1; i++) { - new_data[i].seen = env->pass_cnt; + /* Expand insni[off]'s seen count to the patched range. */ + new_data[i].seen = old_seen; new_data[i].zext_dst = insn_has_def32(env, insn + i); } env->insn_aux_data = new_data; @@@ -12470,7 -12506,7 +12521,7 @@@ static int do_misc_fixups(struct bpf_ve prog->aux->max_pkt_offset = MAX_PACKET_OFF;
/* mark bpf_tail_call as different opcode to avoid - * conditional branch in the interpeter for every normal + * conditional branch in the interpreter for every normal * call and to prevent accidental JITing by JIT compiler * that doesn't support bpf_tail_call yet */ @@@ -12725,6 -12761,9 +12776,9 @@@ static void free_states(struct bpf_veri * insn_aux_data was touched. These variables are compared to clear temporary * data from failed pass. For testing and experiments do_check_common() can be * run multiple times even when prior attempt to verify is unsuccessful. + * + * Note that special handling is needed on !env->bypass_spec_v1 if this is + * ever called outside of error path with subsequent program rejection. */ static void sanitize_insn_aux_data(struct bpf_verifier_env *env) { @@@ -13242,14 -13281,6 +13296,14 @@@ static int check_attach_btf_id(struct b int ret; u64 key;
+ if (prog->type == BPF_PROG_TYPE_SYSCALL) { + if (prog->aux->sleepable) + /* attach_btf_id checked to be zero already */ + return 0; + verbose(env, "Syscall programs can only be sleepable\n"); + return -EINVAL; + } + if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM) { verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n"); @@@ -13324,7 -13355,8 +13378,7 @@@ struct btf *bpf_get_btf_vmlinux(void return btf_vmlinux; }
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, - union bpf_attr __user *uattr) +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr) { u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; @@@ -13354,7 -13386,6 +13408,7 @@@ env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; + env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel); is_priv = bpf_capable();
bpf_get_btf_vmlinux(); diff --combined net/batman-adv/bat_iv_ogm.c index 680def809838,fc8be49010b9..12022378f892 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@@ -409,8 -409,10 +409,10 @@@ static void batadv_iv_ogm_emit(struct b if (WARN_ON(!forw_packet->if_outgoing)) return;
- if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) + if (forw_packet->if_outgoing->soft_iface != soft_iface) { + pr_warn("%s: soft interface switch for queued OGM\n", __func__); return; + }
if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) return; @@@ -1849,8 -1851,6 +1851,8 @@@ batadv_iv_ogm_orig_dump_subentry(struc orig_node->orig) || nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, neigh_node->addr) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + neigh_node->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, neigh_node->if_incoming->net_dev->ifindex) || nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) || @@@ -2080,8 -2080,6 +2082,8 @@@ batadv_iv_ogm_neigh_dump_neigh(struct s
if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, hardif_neigh->addr) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + hardif_neigh->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, hardif_neigh->if_incoming->net_dev->ifindex) || nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, @@@ -2463,8 -2461,6 +2465,8 @@@ static int batadv_iv_gw_dump_entry(stru router->addr) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, router->if_incoming->net_dev->name) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + router->if_incoming->net_dev->ifindex) || nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, gw_node->bandwidth_down) || nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, diff --combined net/bluetooth/smp.c index 93144e0c7efa,7dd51da73845..4d93c6c32a71 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@@ -40,7 -40,7 +40,7 @@@ ((struct smp_dev *)((struct l2cap_chan *)((hdev)->smp_data))->data)
/* Low-level debug macros to be used for stuff that we don't want - * accidentially in dmesg, i.e. the values of the various crypto keys + * accidentally in dmesg, i.e. the values of the various crypto keys * and the inputs & outputs of crypto functions. */ #ifdef DEBUG @@@ -560,7 -560,7 +560,7 @@@ int smp_generate_oob(struct hci_dev *hd return err;
/* This is unlikely, but we need to check that - * we didn't accidentially generate a debug key. + * we didn't accidentally generate a debug key. */ if (crypto_memneq(smp->local_pk, debug_pk, 64)) break; @@@ -1902,7 -1902,7 +1902,7 @@@ static u8 sc_send_public_key(struct smp return SMP_UNSPECIFIED;
/* This is unlikely, but we need to check that - * we didn't accidentially generate a debug key. + * we didn't accidentally generate a debug key. */ if (crypto_memneq(smp->local_pk, debug_pk, 64)) break; @@@ -3229,7 -3229,7 +3229,7 @@@ static inline struct l2cap_chan *smp_ne { struct l2cap_chan *chan;
- bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan); + BT_DBG("pchan %p", pchan);
chan = l2cap_chan_create(); if (!chan) @@@ -3250,7 -3250,7 +3250,7 @@@ */ atomic_set(&chan->nesting, L2CAP_NESTING_SMP);
- bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan); + BT_DBG("created chan %p", chan);
return chan; } @@@ -3354,7 -3354,7 +3354,7 @@@ static void smp_del_chan(struct l2cap_c { struct smp_dev *smp;
- bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan); + BT_DBG("chan %p", chan);
smp = chan->data; if (smp) { diff --combined net/bridge/br_private.h index ec661130c2d0,e013d33f1c7c..a684d0cfc58c --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@@ -90,8 -90,8 +90,8 @@@ struct bridge_mcast_stats #endif
struct br_tunnel_info { - __be64 tunnel_id; - struct metadata_dst *tunnel_dst; + __be64 tunnel_id; + struct metadata_dst __rcu *tunnel_dst; };
/* private vlan flags */ @@@ -307,18 -307,16 +307,18 @@@ struct net_bridge_port
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING struct bridge_mcast_own_query ip4_own_query; + struct timer_list ip4_mc_router_timer; + struct hlist_node ip4_rlist; #if IS_ENABLED(CONFIG_IPV6) struct bridge_mcast_own_query ip6_own_query; + struct timer_list ip6_mc_router_timer; + struct hlist_node ip6_rlist; #endif /* IS_ENABLED(CONFIG_IPV6) */ u32 multicast_eht_hosts_limit; u32 multicast_eht_hosts_cnt; unsigned char multicast_router; struct bridge_mcast_stats __percpu *mcast_stats; - struct timer_list multicast_router_timer; struct hlist_head mglist; - struct hlist_node rlist; #endif
#ifdef CONFIG_SYSFS @@@ -451,16 -449,14 +451,16 @@@ struct net_bridge
struct hlist_head mcast_gc_list; struct hlist_head mdb_list; - struct hlist_head router_list;
- struct timer_list multicast_router_timer; + struct hlist_head ip4_mc_router_list; + struct timer_list ip4_mc_router_timer; struct bridge_mcast_other_query ip4_other_query; struct bridge_mcast_own_query ip4_own_query; struct bridge_mcast_querier ip4_querier; struct bridge_mcast_stats __percpu *mcast_stats; #if IS_ENABLED(CONFIG_IPV6) + struct hlist_head ip6_mc_router_list; + struct timer_list ip6_mc_router_timer; struct bridge_mcast_other_query ip6_other_query; struct bridge_mcast_own_query ip6_own_query; struct bridge_mcast_querier ip6_querier; @@@ -868,58 -864,11 +868,58 @@@ static inline bool br_group_is_l2(cons #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
-static inline bool br_multicast_is_router(struct net_bridge *br) +static inline struct hlist_node * +br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) { +#if IS_ENABLED(CONFIG_IPV6) + if (skb->protocol == htons(ETH_P_IPV6)) + return rcu_dereference(hlist_first_rcu(&b->ip6_mc_router_list)); +#endif + return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list)); +} + +static inline struct net_bridge_port * +br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) { +#if IS_ENABLED(CONFIG_IPV6) + if (skb->protocol == htons(ETH_P_IPV6)) + return hlist_entry_safe(rp, struct net_bridge_port, ip6_rlist); +#endif + return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist); +} + +static inline bool br_ip4_multicast_is_router(struct net_bridge *br) +{ + return timer_pending(&br->ip4_mc_router_timer); +} + +static inline bool br_ip6_multicast_is_router(struct net_bridge *br) { - return br->multicast_router == 2 || - (br->multicast_router == 1 && - timer_pending(&br->multicast_router_timer)); +#if IS_ENABLED(CONFIG_IPV6) + return timer_pending(&br->ip6_mc_router_timer); +#else + return false; +#endif +} + +static inline bool +br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb) +{ + switch (br->multicast_router) { + case MDB_RTR_TYPE_PERM: + return true; + case MDB_RTR_TYPE_TEMP_QUERY: + if (skb) { + if (skb->protocol == htons(ETH_P_IP)) + return br_ip4_multicast_is_router(br); + else if (skb->protocol == htons(ETH_P_IPV6)) + return br_ip6_multicast_is_router(br); + } else { + return br_ip4_multicast_is_router(br) || + br_ip6_multicast_is_router(br); + } + fallthrough; + default: + return false; + } }
static inline bool @@@ -1068,8 -1017,7 +1068,8 @@@ static inline void br_multicast_flood(s { }
-static inline bool br_multicast_is_router(struct net_bridge *br) +static inline bool br_multicast_is_router(struct net_bridge *br, + struct sk_buff *skb) { return false; } diff --combined net/can/isotp.c index f995eaef5d7b,be6183f8ca11..bd49299319a1 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@@ -143,10 -143,14 +143,14 @@@ struct isotp_sock u32 force_tx_stmin; u32 force_rx_stmin; struct tpcon rx, tx; - struct notifier_block notifier; + struct list_head notifier; wait_queue_head_t wait; };
+ static LIST_HEAD(isotp_notifier_list); + static DEFINE_SPINLOCK(isotp_notifier_lock); + static struct isotp_sock *isotp_busy_notifier; + static inline struct isotp_sock *isotp_sk(const struct sock *sk) { return (struct isotp_sock *)sk; @@@ -221,8 -225,8 +225,8 @@@ static int isotp_send_fc(struct sock *s
can_send_ret = can_send(nskb, 1); if (can_send_ret) - pr_notice_once("can-isotp: %s: can_send_ret %d\n", - __func__, can_send_ret); + pr_notice_once("can-isotp: %s: can_send_ret %pe\n", + __func__, ERR_PTR(can_send_ret));
dev_put(dev);
@@@ -797,12 -801,10 +801,12 @@@ isotp_tx_burst can_skb_set_owner(skb, sk);
can_send_ret = can_send(skb, 1); - if (can_send_ret) - pr_notice_once("can-isotp: %s: can_send_ret %d\n", - __func__, can_send_ret); - + if (can_send_ret) { + pr_notice_once("can-isotp: %s: can_send_ret %pe\n", + __func__, ERR_PTR(can_send_ret)); + if (can_send_ret == -ENOBUFS) + pr_notice_once("can-isotp: tx queue is full, increasing txqueuelen may prevent this error\n"); + } if (so->tx.idx >= so->tx.len) { /* we are done */ so->tx.state = ISOTP_IDLE; @@@ -948,8 -950,8 +952,8 @@@ static int isotp_sendmsg(struct socket err = can_send(skb, 1); dev_put(dev); if (err) { - pr_notice_once("can-isotp: %s: can_send_ret %d\n", - __func__, err); + pr_notice_once("can-isotp: %s: can_send_ret %pe\n", + __func__, ERR_PTR(err)); return err; }
@@@ -1015,7 -1017,14 +1019,14 @@@ static int isotp_release(struct socket /* wait for complete transmission of current pdu */ wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
- unregister_netdevice_notifier(&so->notifier); + spin_lock(&isotp_notifier_lock); + while (isotp_busy_notifier == so) { + spin_unlock(&isotp_notifier_lock); + schedule_timeout_uninterruptible(1); + spin_lock(&isotp_notifier_lock); + } + list_del(&so->notifier); + spin_unlock(&isotp_notifier_lock);
lock_sock(sk);
@@@ -1319,21 -1328,16 +1330,16 @@@ static int isotp_getsockopt(struct sock return 0; }
- static int isotp_notifier(struct notifier_block *nb, unsigned long msg, - void *ptr) + static void isotp_notify(struct isotp_sock *so, unsigned long msg, + struct net_device *dev) { - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier); struct sock *sk = &so->sk;
if (!net_eq(dev_net(dev), sock_net(sk))) - return NOTIFY_DONE; - - if (dev->type != ARPHRD_CAN) - return NOTIFY_DONE; + return;
if (so->ifindex != dev->ifindex) - return NOTIFY_DONE; + return;
switch (msg) { case NETDEV_UNREGISTER: @@@ -1359,7 -1363,28 +1365,28 @@@ sk->sk_error_report(sk); break; } + }
+ static int isotp_notifier(struct notifier_block *nb, unsigned long msg, + void *ptr) + { + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + + if (dev->type != ARPHRD_CAN) + return NOTIFY_DONE; + if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) + return NOTIFY_DONE; + if (unlikely(isotp_busy_notifier)) /* Check for reentrant bug. */ + return NOTIFY_DONE; + + spin_lock(&isotp_notifier_lock); + list_for_each_entry(isotp_busy_notifier, &isotp_notifier_list, notifier) { + spin_unlock(&isotp_notifier_lock); + isotp_notify(isotp_busy_notifier, msg, dev); + spin_lock(&isotp_notifier_lock); + } + isotp_busy_notifier = NULL; + spin_unlock(&isotp_notifier_lock); return NOTIFY_DONE; }
@@@ -1396,8 -1421,9 +1423,9 @@@ static int isotp_init(struct sock *sk
init_waitqueue_head(&so->wait);
- so->notifier.notifier_call = isotp_notifier; - register_netdevice_notifier(&so->notifier); + spin_lock(&isotp_notifier_lock); + list_add_tail(&so->notifier, &isotp_notifier_list); + spin_unlock(&isotp_notifier_lock);
return 0; } @@@ -1444,6 -1470,10 +1472,10 @@@ static const struct can_proto isotp_can .prot = &isotp_proto, };
+ static struct notifier_block canisotp_notifier = { + .notifier_call = isotp_notifier + }; + static __init int isotp_module_init(void) { int err; @@@ -1452,7 -1482,9 +1484,9 @@@
err = can_proto_register(&isotp_can_proto); if (err < 0) - pr_err("can: registration of isotp protocol failed\n"); + pr_err("can: registration of isotp protocol failed %pe\n", ERR_PTR(err)); + else + register_netdevice_notifier(&canisotp_notifier);
return err; } @@@ -1460,6 -1492,7 +1494,7 @@@ static __exit void isotp_module_exit(void) { can_proto_unregister(&isotp_can_proto); + unregister_netdevice_notifier(&canisotp_notifier); }
module_init(isotp_module_init); diff --combined net/core/neighbour.c index 2b2f333bcdfe,bf774575ad71..53e85c70c6e5 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@@ -238,6 -238,7 +238,7 @@@ static int neigh_forced_gc(struct neigh
write_lock(&n->lock); if ((n->nud_state == NUD_FAILED) || + (n->nud_state == NUD_NOARP) || (tbl->is_multicast && tbl->is_multicast(n->primary_key)) || time_after(tref, n->updated)) @@@ -3141,7 -3142,7 +3142,7 @@@ static struct pneigh_entry *pneigh_get_ struct net *net = seq_file_net(seq); struct neigh_table *tbl = state->tbl; struct pneigh_entry *pn = NULL; - int bucket = state->bucket; + int bucket;
state->flags |= NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { diff --combined net/core/rtnetlink.c index 5baa86bca876,ec931b080156..745965e49f78 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@@ -9,7 -9,7 +9,7 @@@ * Authors: Alexey Kuznetsov, kuznet@ms2.inr.ac.ru * * Fixes: - * Vitaly E. Lavrov RTA_OK arithmetics was wrong. + * Vitaly E. Lavrov RTA_OK arithmetic was wrong. */
#include <linux/bitops.h> @@@ -234,7 -234,7 +234,7 @@@ unlock * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions + * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions * * Like rtnl_register, but for use by removable modules. */ @@@ -254,7 -254,7 +254,7 @@@ EXPORT_SYMBOL_GPL(rtnl_register_module) * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions + * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the @@@ -376,12 -376,12 +376,12 @@@ int __rtnl_link_register(struct rtnl_li if (rtnl_link_ops_get(ops->kind)) return -EEXIST;
- /* The check for setup is here because if ops + /* The check for alloc/setup is here because if ops * does not have that filled up, it is not possible * to use the ops for creating device. So do not * fill up dellink as well. That disables rtnl_dellink. */ - if (ops->setup && !ops->dellink) + if ((ops->alloc || ops->setup) && !ops->dellink) ops->dellink = unregister_netdevice_queue;
list_add_tail(&ops->list, &link_ops); @@@ -543,9 -543,7 +543,9 @@@ static const struct rtnl_af_ops *rtnl_a { const struct rtnl_af_ops *ops;
- list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { + ASSERT_RTNL(); + + list_for_each_entry(ops, &rtnl_af_ops, list) { if (ops->family == family) return ops; } @@@ -1821,16 -1819,6 +1821,16 @@@ static int rtnl_fill_ifinfo(struct sk_b if (rtnl_fill_prop_list(skb, dev)) goto nla_put_failure;
+ if (dev->dev.parent && + nla_put_string(skb, IFLA_PARENT_DEV_NAME, + dev_name(dev->dev.parent))) + goto nla_put_failure; + + if (dev->dev.parent && dev->dev.parent->bus && + nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME, + dev->dev.parent->bus->name)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0;
@@@ -1890,7 -1878,6 +1890,7 @@@ static const struct nla_policy ifla_pol [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT }, [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED }, [IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1), + [IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING }, };
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@@ -2287,18 -2274,27 +2287,18 @@@ static int validate_linkmsg(struct net_ nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { const struct rtnl_af_ops *af_ops;
- rcu_read_lock(); af_ops = rtnl_af_lookup(nla_type(af)); - if (!af_ops) { - rcu_read_unlock(); + if (!af_ops) return -EAFNOSUPPORT; - }
- if (!af_ops->set_link_af) { - rcu_read_unlock(); + if (!af_ops->set_link_af) return -EOPNOTSUPP; - }
if (af_ops->validate_link_af) { err = af_ops->validate_link_af(dev, af); - if (err < 0) { - rcu_read_unlock(); + if (err < 0) return err; - } } - - rcu_read_unlock(); } }
@@@ -2578,7 -2574,7 +2578,7 @@@ static int do_set_proto_down(struct net if (nl_proto_down) { proto_down = nla_get_u8(nl_proto_down);
- /* Dont turn off protodown if there are active reasons */ + /* Don't turn off protodown if there are active reasons */ if (!proto_down && dev->proto_down_reason) { NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons"); return -EBUSY; @@@ -2872,12 -2868,17 +2872,12 @@@ static int do_setlink(const struct sk_b nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { const struct rtnl_af_ops *af_ops;
- rcu_read_lock(); - BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
err = af_ops->set_link_af(dev, af, extack); - if (err < 0) { - rcu_read_unlock(); + if (err < 0) goto errout; - }
- rcu_read_unlock(); status |= DO_SETLINK_NOTIFY; } } @@@ -3176,17 -3177,8 +3176,17 @@@ struct net_device *rtnl_create_link(str return ERR_PTR(-EINVAL); }
- dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type, - ops->setup, num_tx_queues, num_rx_queues); + if (ops->alloc) { + dev = ops->alloc(tb, ifname, name_assign_type, + num_tx_queues, num_rx_queues); + if (IS_ERR(dev)) + return dev; + } else { + dev = alloc_netdev_mqs(ops->priv_size, ifname, + name_assign_type, ops->setup, + num_tx_queues, num_rx_queues); + } + if (!dev) return ERR_PTR(-ENOMEM);
@@@ -3419,7 -3411,7 +3419,7 @@@ replay return -EOPNOTSUPP; }
- if (!ops->setup) + if (!ops->alloc && !ops->setup) return -EOPNOTSUPP;
if (!ifname[0]) { @@@ -4850,10 -4842,12 +4850,12 @@@ static int rtnl_bridge_notify(struct ne if (err < 0) goto errout;
- if (!skb->len) { - err = -EINVAL; + /* Notification info is only filled for bridge ports, not the bridge + * device itself. Therefore, a zero notification length is valid and + * should not result in an error. + */ + if (!skb->len) goto errout; - }
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; diff --combined net/core/skbuff.c index a0b1d4847efe,bbc3b4b62032..2531ac4ffa69 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@@ -70,7 -70,6 +70,7 @@@ #include <net/xfrm.h> #include <net/mpls.h> #include <net/mptcp.h> +#include <net/page_pool.h>
#include <linux/uaccess.h> #include <trace/events/skb.h> @@@ -646,13 -645,10 +646,13 @@@ static void skb_free_head(struct sk_buf { unsigned char *head = skb->head;
- if (skb->head_frag) + if (skb->head_frag) { + if (skb_pp_recycle(skb, head)) + return; skb_free_frag(head); - else + } else { kfree(head); + } }
static void skb_release_data(struct sk_buff *skb) @@@ -668,7 -664,7 +668,7 @@@ skb_zcopy_clear(skb, true);
for (i = 0; i < shinfo->nr_frags; i++) - __skb_frag_unref(&shinfo->frags[i]); + __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
if (shinfo->frag_list) kfree_skb_list(shinfo->frag_list); @@@ -1050,7 -1046,6 +1050,7 @@@ static struct sk_buff *__skb_clone(stru n->nohdr = 0; n->peeked = 0; C(pfmemalloc); + C(pp_recycle); n->destructor = NULL; C(tail); C(end); @@@ -1258,6 -1253,7 +1258,7 @@@ static void __msg_zerocopy_callback(str struct sock *sk = skb->sk; struct sk_buff_head *q; unsigned long flags; + bool is_zerocopy; u32 lo, hi; u16 len;
@@@ -1272,6 -1268,7 +1273,7 @@@ len = uarg->len; lo = uarg->id; hi = uarg->id + len - 1; + is_zerocopy = uarg->zerocopy;
serr = SKB_EXT_ERR(skb); memset(serr, 0, sizeof(*serr)); @@@ -1279,7 -1276,7 +1281,7 @@@ serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; serr->ee.ee_data = hi; serr->ee.ee_info = lo; - if (!uarg->zerocopy) + if (!is_zerocopy) serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
q = &sk->sk_error_queue; @@@ -3500,7 -3497,7 +3502,7 @@@ int skb_shift(struct sk_buff *tgt, stru fragto = &skb_shinfo(tgt)->frags[merge];
skb_frag_size_add(fragto, skb_frag_size(fragfrom)); - __skb_frag_unref(fragfrom); + __skb_frag_unref(fragfrom, skb->pp_recycle); }
/* Reposition in the original skb */ @@@ -5290,13 -5287,6 +5292,13 @@@ bool skb_try_coalesce(struct sk_buff *t if (skb_cloned(to)) return false;
+ /* The page pool signature of struct page will eventually figure out + * which pages can be recycled or not but for now let's prohibit slab + * allocated and page_pool allocated SKBs from being coalesced. + */ + if (to->pp_recycle != from->pp_recycle) + return false; + if (len <= skb_tailroom(to)) { if (len) BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); diff --combined net/ipv4/af_inet.c index 750f388a4a68,2f94d221c00e..54648181dd56 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@@ -318,7 -318,7 +318,7 @@@ lookup_protocol
WARN_ON(!answer_prot->slab);
- err = -ENOBUFS; + err = -ENOMEM; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; @@@ -575,7 -575,7 +575,7 @@@ int inet_dgram_connect(struct socket *s return err; }
- if (!inet_sk(sk)->inet_num && inet_autobind(sk)) + if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); } @@@ -803,7 -803,7 +803,7 @@@ int inet_send_prepare(struct sock *sk sock_rps_record_flow(sk);
/* We may need to bind the socket. */ - if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind && + if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind && inet_autobind(sk)) return -EAGAIN;
@@@ -1720,6 -1720,7 +1720,6 @@@ EXPORT_SYMBOL_GPL(snmp_fold_field64) #ifdef CONFIG_IP_MULTICAST static const struct net_protocol igmp_protocol = { .handler = igmp_rcv, - .netns_ok = 1, }; #endif
@@@ -1732,6 -1733,7 +1732,6 @@@ static struct net_protocol tcp_protoco .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .no_policy = 1, - .netns_ok = 1, .icmp_strict_tag_validation = 1, };
@@@ -1744,12 -1746,14 +1744,12 @@@ static struct net_protocol udp_protoco .handler = udp_rcv, .err_handler = udp_err, .no_policy = 1, - .netns_ok = 1, };
static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, .no_policy = 1, - .netns_ok = 1, };
static __net_init int ipv4_mib_init_net(struct net *net) diff --combined net/ipv4/cipso_ipv4.c index d6e3a92841e3,e0480c6cebaa..099259fc826a --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@@ -187,7 -187,8 +187,7 @@@ static int __init cipso_v4_cache_init(v * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache * * Description: - * Invalidates and frees any entries in the CIPSO cache. Returns zero on - * success and negative values on failure. + * Invalidates and frees any entries in the CIPSO cache. * */ void cipso_v4_cache_invalidate(void) @@@ -471,6 -472,7 +471,7 @@@ void cipso_v4_doi_free(struct cipso_v4_ kfree(doi_def->map.std->lvl.local); kfree(doi_def->map.std->cat.cipso); kfree(doi_def->map.std->cat.local); + kfree(doi_def->map.std); break; } kfree(doi_def); diff --combined net/ipv4/devinet.c index 50deeff48c8b,1c6429c353a9..73721a4448bd --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@@ -1955,7 -1955,7 +1955,7 @@@ static int inet_validate_link_af(const struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem;
- if (dev && !__in_dev_get_rcu(dev)) + if (dev && !__in_dev_get_rtnl(dev)) return -EAFNOSUPPORT;
err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, @@@ -1981,7 -1981,7 +1981,7 @@@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla, struct netlink_ext_ack *extack) { - struct in_device *in_dev = __in_dev_get_rcu(dev); + struct in_device *in_dev = __in_dev_get_rtnl(dev); struct nlattr *a, *tb[IFLA_INET_MAX+1]; int rem;
@@@ -1989,7 -1989,7 +1989,7 @@@ return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL;
if (tb[IFLA_INET_CONF]) { nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) diff --combined net/ipv4/icmp.c index 2e09d62d59e3,752e392083e6..0a57f1892e7e --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@@ -759,6 -759,13 +759,13 @@@ void __icmp_send(struct sk_buff *skb_in icmp_param.data_len = room; icmp_param.head_len = sizeof(struct icmphdr);
+ /* if we don't have a source address at this point, fall back to the + * dummy address instead of sending out a packet with a source address + * of 0.0.0.0 + */ + if (!fl4.saddr) + fl4.saddr = htonl(INADDR_DUMMY); + icmp_push_reply(&icmp_param, &fl4, &ipc, &rt); ende: ip_rt_put(rt); @@@ -1059,7 -1066,7 +1066,7 @@@ static bool icmp_echo(struct sk_buff *s if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) + sizeof(struct in_addr)) goto send_mal_query; - dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr); + dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr); break; #if IS_ENABLED(CONFIG_IPV6) case ICMP_AFI_IP6: diff --combined net/ipv4/route.c index a4c477475f4c,6a36ac98476f..66aacb939d3e --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@@ -1906,128 -1906,13 +1906,128 @@@ out hash_keys->addrs.v4addrs.dst = key_iph->daddr; }
+static u32 fib_multipath_custom_hash_outer(const struct net *net, + const struct sk_buff *skb, + bool *p_has_inner) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys keys, hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); + return flow_hash_from_keys(&hash_keys); +} + +static u32 fib_multipath_custom_hash_inner(const struct net *net, + const struct sk_buff *skb, + bool has_inner) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys keys, hash_keys; + + /* We assume the packet carries an encapsulation, but if none was + * encountered during dissection of the outer flow, then there is no + * point in calling the flow dissector again. + */ + if (!has_inner) + return 0; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, 0); + + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) + return 0; + + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + } + + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + return flow_hash_from_keys(&hash_keys); +} + +static u32 fib_multipath_custom_hash_skb(const struct net *net, + const struct sk_buff *skb) +{ + u32 mhash, mhash_inner; + bool has_inner = true; + + mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner); + mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); + + return jhash_2words(mhash, mhash_inner, 0); +} + +static u32 fib_multipath_custom_hash_fl4(const struct net *net, + const struct flowi4 *fl4) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v4addrs.src = fl4->saddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v4addrs.dst = fl4->daddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = fl4->flowi4_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = fl4->fl4_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = fl4->fl4_dport; + + return flow_hash_from_keys(&hash_keys); +} + /* if skb is set it will be used and fl4 can be NULL */ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys) { u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; struct flow_keys hash_keys; - u32 mhash; + u32 mhash = 0;
switch (net->ipv4.sysctl_fib_multipath_hash_policy) { case 0: @@@ -2039,7 -1924,6 +2039,7 @@@ hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } + mhash = flow_hash_from_keys(&hash_keys); break; case 1: /* skb is currently provided only when forwarding */ @@@ -2073,7 -1957,6 +2073,7 @@@ hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } + mhash = flow_hash_from_keys(&hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@@ -2104,15 -1987,9 +2104,15 @@@ hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } + mhash = flow_hash_from_keys(&hash_keys); + break; + case 3: + if (skb) + mhash = fib_multipath_custom_hash_skb(net, skb); + else + mhash = fib_multipath_custom_hash_fl4(net, fl4); break; } - mhash = flow_hash_from_keys(&hash_keys);
if (multipath_hash) mhash = jhash_2words(mhash, multipath_hash, 0); @@@ -2179,6 -2056,19 +2179,19 @@@ martian_source return err; }
+ /* get device for dst_alloc with local routes */ + static struct net_device *ip_rt_get_dev(struct net *net, + const struct fib_result *res) + { + struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; + struct net_device *dev = NULL; + + if (nhc) + dev = l3mdev_master_dev_rcu(nhc->nhc_dev); + + return dev ? : net->loopback_dev; + } + /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet @@@ -2335,7 -2225,7 +2348,7 @@@ local_input } }
- rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev, + rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, IN_DEV_ORCONF(in_dev, NOPOLICY), false); if (!rth) diff --combined net/ipv6/addrconf.c index 048570900fdf,701eb82acd1c..3bf685fe64b9 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@@ -5827,7 -5827,7 +5827,7 @@@ static int inet6_set_link_af(struct net return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0) - BUG(); + return -EINVAL;
if (tb[IFLA_INET6_TOKEN]) { err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]), @@@ -6903,10 -6903,10 +6903,10 @@@ static const struct ctl_table addrconf_ .proc_handler = proc_dointvec, }, { - .procname = "addr_gen_mode", - .data = &ipv6_devconf.addr_gen_mode, - .maxlen = sizeof(int), - .mode = 0644, + .procname = "addr_gen_mode", + .data = &ipv6_devconf.addr_gen_mode, + .maxlen = sizeof(int), + .mode = 0644, .proc_handler = addrconf_sysctl_addr_gen_mode, }, { diff --combined net/mptcp/options.c index 1aec01686c1a,9b263f27ce9b..25189595ed1d --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@@ -44,20 -44,7 +44,20 @@@ static void mptcp_parse_option(const st else expected_opsize = TCPOLEN_MPTCP_MPC_SYN; } - if (opsize != expected_opsize) + + /* Cfr RFC 8684 Section 3.3.0: + * If a checksum is present but its use had + * not been negotiated in the MP_CAPABLE handshake, the receiver MUST + * close the subflow with a RST, as it is not behaving as negotiated. + * If a checksum is not present when its use has been negotiated, the + * receiver MUST close the subflow with a RST, as it is considered + * broken + * We parse even option with mismatching csum presence, so that + * later in subflow_data_ready we can trigger the reset. + */ + if (opsize != expected_opsize && + (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA || + opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM)) break;
/* try to be gentle vs future versions on the initial syn */ @@@ -79,9 -66,16 +79,9 @@@ * host requires the use of checksums, checksums MUST be used. * In other words, the only way for checksums not to be used * is if both hosts in their SYNs set A=0." - * - * Section 3.3.0: - * "If a checksum is not present when its use has been - * negotiated, the receiver MUST close the subflow with a RST as - * it is considered broken." - * - * We don't implement DSS checksum - fall back to TCP. */ if (flags & MPTCP_CAP_CHECKSUM_REQD) - break; + mp_opt->csum_reqd = 1;
mp_opt->mp_capable = 1; if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) { @@@ -92,7 -86,7 +92,7 @@@ mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; } - if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) { + if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) { /* Section 3.1.: * "the data parameters in a MP_CAPABLE are semantically * equivalent to those in a DSS option and can be used @@@ -104,14 -98,9 +104,14 @@@ mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2; } - pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d", + if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) { + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + mp_opt->csum_reqd = 1; + ptr += 2; + } + pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", version, flags, opsize, mp_opt->sndr_key, - mp_opt->rcvr_key, mp_opt->data_len); + mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break;
case MPTCPOPT_MP_JOIN: @@@ -182,8 -171,10 +182,8 @@@ expected_opsize += TCPOLEN_MPTCP_DSS_MAP32; }
- /* RFC 6824, Section 3.3: - * If a checksum is present, but its use had - * not been negotiated in the MP_CAPABLE handshake, - * the checksum field MUST be ignored. + /* Always parse any csum presence combination, we will enforce + * RFC 8684 Section 3.3.0 checks later in subflow_data_ready */ if (opsize != expected_opsize && opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) @@@ -218,15 -209,9 +218,15 @@@ mp_opt->data_len = get_unaligned_be16(ptr); ptr += 2;
- pr_debug("data_seq=%llu subflow_seq=%u data_len=%u", + if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) { + mp_opt->csum_reqd = 1; + mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr); + ptr += 2; + } + + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", mp_opt->data_seq, mp_opt->subflow_seq, - mp_opt->data_len); + mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum); }
break; @@@ -338,12 -323,9 +338,12 @@@ } }
-void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); const struct tcphdr *th = tcp_hdr(skb); const unsigned char *ptr; int length; @@@ -359,7 -341,6 +359,7 @@@ mp_opt->dss = 0; mp_opt->mp_prio = 0; mp_opt->reset = 0; + mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@@ -375,6 -356,8 +375,8 @@@ length--; continue; default: + if (length < 2) + return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; @@@ -399,7 -382,6 +401,7 @@@ bool mptcp_syn_options(struct sock *sk subflow->snd_isn = TCP_SKB_CB(skb)->end_seq; if (subflow->request_mptcp) { opts->suboptions = OPTION_MPTCP_MPC_SYN; + opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk)); *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { @@@ -455,10 -437,8 +457,10 @@@ static bool mptcp_established_options_m struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); struct mptcp_ext *mpext; unsigned int data_len; + u8 len;
/* When skb is not available, we better over-estimate the emitted * options len. A full DSS option (28 bytes) is longer than @@@ -487,26 -467,16 +489,26 @@@ opts->suboptions = OPTION_MPTCP_MPC_ACK; opts->sndr_key = subflow->local_key; opts->rcvr_key = subflow->remote_key; + opts->csum_reqd = READ_ONCE(msk->csum_enabled);
/* Section 3.1. * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK * packets that start the first subflow of an MPTCP connection, * as well as the first packet that carries data */ - if (data_len > 0) - *size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4); - else + if (data_len > 0) { + len = TCPOLEN_MPTCP_MPC_ACK_DATA; + if (opts->csum_reqd) { + /* we need to propagate more info to csum the pseudo hdr */ + opts->ext_copy.data_seq = mpext->data_seq; + opts->ext_copy.subflow_seq = mpext->subflow_seq; + opts->ext_copy.csum = mpext->csum; + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } + *size = ALIGN(len, 4); + } else { *size = TCPOLEN_MPTCP_MPC_ACK; + }
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", subflow, subflow->local_key, subflow->remote_key, @@@ -567,21 -537,18 +569,21 @@@ static bool mptcp_established_options_d bool ret = false; u64 ack_seq;
+ opts->csum_reqd = READ_ONCE(msk->csum_enabled); mpext = skb ? mptcp_get_ext(skb) : NULL;
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { - unsigned int map_size; + unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
- map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64; + if (mpext) { + if (opts->csum_reqd) + map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
- remaining -= map_size; - dss_size = map_size; - if (mpext) opts->ext_copy = *mpext; + }
+ remaining -= map_size; + dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); ret = true; @@@ -824,7 -791,6 +826,7 @@@ bool mptcp_synack_options(const struct if (subflow_req->mp_capable) { opts->suboptions = OPTION_MPTCP_MPC_SYNACK; opts->sndr_key = subflow_req->local_key; + opts->csum_reqd = subflow_req->csum_reqd; *size = TCPOLEN_MPTCP_MPC_SYNACK; pr_debug("subflow_req=%p, local_key=%llu", subflow_req, subflow_req->local_key); @@@ -1043,7 -1009,7 +1045,7 @@@ void mptcp_incoming_options(struct soc return; }
- mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return;
@@@ -1135,10 -1101,6 +1137,10 @@@ } mpext->data_len = mp_opt.data_len; mpext->use_map = 1; + mpext->csum_reqd = mp_opt.csum_reqd; + + if (mpext->csum_reqd) + mpext->csum = mp_opt.csum; } }
@@@ -1158,50 -1120,25 +1160,50 @@@ static void mptcp_set_rwin(const struc WRITE_ONCE(msk->rcv_wnd_sent, ack_seq); }
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext) +{ + struct csum_pseudo_header header; + __wsum csum; + + /* cfr RFC 8684 3.3.1.: + * the data sequence number used in the pseudo-header is + * always the 64-bit value, irrespective of what length is used in the + * DSS option itself. + */ + header.data_seq = cpu_to_be64(mpext->data_seq); + header.subflow_seq = htonl(mpext->subflow_seq); + header.data_len = htons(mpext->data_len); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum)); + return (__force u16)csum_fold(csum); +} + void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp, struct mptcp_out_options *opts) { if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions) { - u8 len; + u8 len, flag = MPTCP_CAP_HMAC_SHA256;
- if (OPTION_MPTCP_MPC_SYN & opts->suboptions) + if (OPTION_MPTCP_MPC_SYN & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYN; - else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) + } else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) { len = TCPOLEN_MPTCP_MPC_SYNACK; - else if (opts->ext_copy.data_len) + } else if (opts->ext_copy.data_len) { len = TCPOLEN_MPTCP_MPC_ACK_DATA; - else + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; + } else { len = TCPOLEN_MPTCP_MPC_ACK; + } + + if (opts->csum_reqd) + flag |= MPTCP_CAP_CHECKSUM_REQD;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len, MPTCP_SUPPORTED_VERSION, - MPTCP_CAP_HMAC_SHA256); + flag);
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) & opts->suboptions)) @@@ -1217,13 -1154,8 +1219,13 @@@ if (!opts->ext_copy.data_len) goto mp_capable_done;
- put_unaligned_be32(opts->ext_copy.data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + mptcp_make_csum(&opts->ext_copy), ptr); + } else { + put_unaligned_be32(opts->ext_copy.data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } ptr += 1; }
@@@ -1375,9 -1307,6 +1377,9 @@@ mp_capable_done flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64; if (mpext->data_fin) flags |= MPTCP_DSS_DATA_FIN; + + if (opts->csum_reqd) + len += TCPOLEN_MPTCP_DSS_CHECKSUM; }
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags); @@@ -1397,13 -1326,8 +1399,13 @@@ ptr += 2; put_unaligned_be32(mpext->subflow_seq, ptr); ptr += 1; - put_unaligned_be32(mpext->data_len << 16 | - TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + if (opts->csum_reqd) { + put_unaligned_be32(mpext->data_len << 16 | + mptcp_make_csum(mpext), ptr); + } else { + put_unaligned_be32(mpext->data_len << 16 | + TCPOPT_NOP << 8 | TCPOPT_NOP, ptr); + } } }
diff --combined net/mptcp/protocol.c index 42fc7187beee,632350018fb6..b5f2f504b85b --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@@ -39,15 -39,10 +39,15 @@@ struct mptcp_skb_cb u64 map_seq; u64 end_seq; u32 offset; + u8 has_rxtstamp:1; };
#define MPTCP_SKB_CB(__skb) ((struct mptcp_skb_cb *)&((__skb)->cb[0]))
+enum { + MPTCP_CMSG_TS = BIT(0), +}; + static struct percpu_counter mptcp_sockets_allocated;
static void __mptcp_destroy_sock(struct sock *sk); @@@ -277,7 -272,6 +277,7 @@@ static bool __mptcp_move_skb(struct mpt struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = (struct sock *)msk; struct sk_buff *tail; + bool has_rxtstamp;
__skb_unlink(skb, &ssk->sk_receive_queue);
@@@ -286,15 -280,15 +286,17 @@@
/* try to fetch required memory from subflow */ if (!sk_rmem_schedule(sk, skb, skb->truesize)) { - if (ssk->sk_forward_alloc < skb->truesize) - goto drop; - __sk_mem_reclaim(ssk, skb->truesize); - if (!sk_rmem_schedule(sk, skb, skb->truesize)) + int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT; + + if (ssk->sk_forward_alloc < amount) goto drop; + + ssk->sk_forward_alloc -= amount; + sk->sk_forward_alloc += amount; }
+ has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; + /* the skb map_seq accounts for the skb offset: * mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq * value @@@ -302,7 -296,6 +304,7 @@@ MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow); MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len; MPTCP_SKB_CB(skb)->offset = offset; + MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) { /* in sequence */ @@@ -677,18 -670,22 +679,22 @@@ static bool __mptcp_ofo_queue(struct mp /* In most cases we will be able to lock the mptcp socket. If its already * owned, we need to defer to the work queue to avoid ABBA deadlock. */ - static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) + static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) { struct sock *sk = (struct sock *)msk; unsigned int moved = 0;
if (inet_sk_state_load(sk) == TCP_CLOSE) - return; - - mptcp_data_lock(sk); + return false;
__mptcp_move_skbs_from_subflow(msk, ssk, &moved); __mptcp_ofo_queue(msk); + if (unlikely(ssk->sk_err)) { + if (!sock_owned_by_user(sk)) + __mptcp_error_report(sk); + else + set_bit(MPTCP_ERROR_REPORT, &msk->flags); + }
/* If the moves have caught up with the DATA_FIN sequence number * it's time to ack the DATA_FIN and change socket state, but @@@ -697,7 -694,7 +703,7 @@@ */ if (mptcp_pending_data_fin(sk, NULL)) mptcp_schedule_work(sk); - mptcp_data_unlock(sk); + return moved > 0; }
void mptcp_data_ready(struct sock *sk, struct sock *ssk) @@@ -705,7 -702,6 +711,6 @@@ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct mptcp_sock *msk = mptcp_sk(sk); int sk_rbuf, ssk_rbuf; - bool wake;
/* The peer can send data while we are shutting down this * subflow at msk destruction time, but we must avoid enqueuing @@@ -714,28 -710,22 +719,22 @@@ if (unlikely(subflow->disposable)) return;
- /* move_skbs_to_msk below can legitly clear the data_avail flag, - * but we will need later to properly woke the reader, cache its - * value - */ - wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL; - if (wake) - set_bit(MPTCP_DATA_READY, &msk->flags); - ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf); sk_rbuf = READ_ONCE(sk->sk_rcvbuf); if (unlikely(ssk_rbuf > sk_rbuf)) sk_rbuf = ssk_rbuf;
- /* over limit? can't append more skbs to msk */ + /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf) - goto wake; - - move_skbs_to_msk(msk, ssk); + return;
- wake: - if (wake) + /* Wake-up the reader only for in-sequence data */ + mptcp_data_lock(sk); + if (move_skbs_to_msk(msk, ssk)) { + set_bit(MPTCP_DATA_READY, &msk->flags); sk->sk_data_ready(sk); + } + mptcp_data_unlock(sk); }
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk) @@@ -867,7 -857,7 +866,7 @@@ static struct sock *mptcp_subflow_recv_ sock_owned_by_me(sk);
mptcp_for_each_subflow(msk, subflow) { - if (subflow->data_avail) + if (READ_ONCE(subflow->data_avail)) return mptcp_subflow_tcp_sock(subflow); }
@@@ -1308,18 -1298,6 +1307,18 @@@ static bool mptcp_alloc_tx_skb(struct s return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation); }
+/* note: this always recompute the csum on the whole skb, even + * if we just appended a single frag. More status info needed + */ +static void mptcp_update_data_checksum(struct sk_buff *skb, int added) +{ + struct mptcp_ext *mpext = mptcp_get_ext(skb); + __wsum csum = ~csum_unfold(mpext->csum); + int offset = skb->len - added; + + mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset)); +} + static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, struct mptcp_data_frag *dfrag, struct mptcp_sendmsg_info *info) @@@ -1414,14 -1392,10 +1413,14 @@@ if (zero_window_probe) { mptcp_subflow_ctx(ssk)->rel_write_seq += ret; mpext->frozen = 1; - ret = 0; + if (READ_ONCE(msk->csum_enabled)) + mptcp_update_data_checksum(tail, ret); tcp_push_pending_frames(ssk); + return 0; } out: + if (READ_ONCE(msk->csum_enabled)) + mptcp_update_data_checksum(tail, ret); mptcp_subflow_ctx(ssk)->rel_write_seq += ret; return ret; } @@@ -1796,9 -1770,7 +1795,9 @@@ static void mptcp_wait_data(struct soc
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, struct msghdr *msg, - size_t len, int flags) + size_t len, int flags, + struct scm_timestamping_internal *tss, + int *cmsg_flags) { struct sk_buff *skb, *tmp; int copied = 0; @@@ -1818,11 -1790,6 +1817,11 @@@ } }
+ if (MPTCP_SKB_CB(skb)->has_rxtstamp) { + tcp_update_recv_tstamps(skb, tss); + *cmsg_flags |= MPTCP_CMSG_TS; + } + copied += count;
if (count < data_len) { @@@ -1987,6 -1954,9 +1986,9 @@@ static bool __mptcp_move_skbs(struct mp done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved); mptcp_data_unlock(sk); tcp_cleanup_rbuf(ssk, moved); + + if (unlikely(ssk->sk_err)) + __mptcp_error_report(sk); unlock_sock_fast(ssk, slowpath); } while (!done);
@@@ -2010,8 -1980,7 +2012,8 @@@ static int mptcp_recvmsg(struct sock *s int nonblock, int flags, int *addr_len) { struct mptcp_sock *msk = mptcp_sk(sk); - int copied = 0; + struct scm_timestamping_internal tss; + int copied = 0, cmsg_flags = 0; int target; long timeo;
@@@ -2033,7 -2002,7 +2035,7 @@@ while (copied < len) { int bytes_read;
- bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags); + bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags); if (unlikely(bytes_read < 0)) { if (!copied) copied = bytes_read; @@@ -2114,11 -2083,6 +2116,11 @@@ set_bit(MPTCP_DATA_READY, &msk->flags); } out_err: + if (cmsg_flags && copied >= 0) { + if (cmsg_flags & MPTCP_CMSG_TS) + tcp_recv_timestamp(msg, sk, &tss); + } + pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d", msk, test_bit(MPTCP_DATA_READY, &msk->flags), skb_queue_empty_lockless(&sk->sk_receive_queue), copied); @@@ -2375,8 -2339,8 +2377,8 @@@ static void __mptcp_retrans(struct soc
/* limit retransmission to the bytes already sent on some subflows */ info.sent = 0; - info.limit = dfrag->already_sent; - while (info.sent < dfrag->already_sent) { + info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + while (info.sent < info.limit) { if (!mptcp_alloc_tx_skb(sk, ssk)) break;
@@@ -2388,11 -2352,9 +2390,11 @@@ copied += ret; info.sent += ret; } - if (copied) + if (copied) { + dfrag->already_sent = max(dfrag->already_sent, info.sent); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); + }
mptcp_set_timeout(sk, ssk); release_sock(ssk); @@@ -2471,7 -2433,6 +2473,7 @@@ static int __mptcp_init_sock(struct soc msk->ack_hint = NULL; msk->first = NULL; inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; + WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_init(msk);
@@@ -2812,8 -2773,6 +2814,8 @@@ struct sock *mptcp_sk_clone(const struc msk->token = subflow_req->token; msk->subflow = NULL; WRITE_ONCE(msk->fully_established, false); + if (mp_opt->csum_reqd) + WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1; msk->snd_nxt = msk->write_seq; diff --combined net/mptcp/protocol.h index 16e50caf200e,385796f0ef19..160d716ebc2b --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@@ -68,8 -68,6 +68,8 @@@ #define TCPOLEN_MPTCP_FASTCLOSE 12 #define TCPOLEN_MPTCP_RST 4
+#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM (TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA) + /* MPTCP MP_JOIN flags */ #define MPTCPOPT_BACKUP BIT(0) #define MPTCPOPT_HMAC_LEN 20 @@@ -126,7 -124,6 +126,7 @@@ struct mptcp_options_received u64 data_seq; u32 subflow_seq; u16 data_len; + __sum16 csum; u16 mp_capable : 1, mp_join : 1, fastclose : 1, @@@ -136,7 -133,6 +136,7 @@@ rm_addr : 1, mp_prio : 1, echo : 1, + csum_reqd : 1, backup : 1; u32 token; u32 nonce; @@@ -238,7 -234,6 +238,7 @@@ struct mptcp_sock bool snd_data_fin_enable; bool rcv_fastclose; bool use_64bit_ack; /* Set when we received a 64-bit DSN */ + bool csum_enabled; spinlock_t join_list_lock; struct sock *ack_hint; struct work_struct work; @@@ -340,19 -335,11 +340,19 @@@ static inline struct mptcp_data_frag *m return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); }
+struct csum_pseudo_header { + __be64 data_seq; + __be32 subflow_seq; + __be16 data_len; + __sum16 csum; +}; + struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, - backup : 1; + backup : 1, + csum_reqd : 1; u8 local_id; u8 remote_id; u64 local_key; @@@ -375,7 -362,6 +375,6 @@@ mptcp_subflow_rsk(const struct request_ enum mptcp_data_avail { MPTCP_SUBFLOW_NODATA, MPTCP_SUBFLOW_DATA_AVAIL, - MPTCP_SUBFLOW_OOO_DATA };
struct mptcp_delegated_action { @@@ -400,8 -386,6 +399,8 @@@ struct mptcp_subflow_context u32 map_subflow_seq; u32 ssn_offset; u32 map_data_len; + __wsum map_data_csum; + u32 map_csum_len; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, @@@ -411,8 -395,6 +410,8 @@@ pm_notified : 1, /* PM hook called for established status */ conn_finished : 1, map_valid : 1, + map_csum_reqd : 1, + map_data_fin : 1, mpc_map : 1, backup : 1, send_mp_prio : 1, @@@ -542,7 -524,6 +541,7 @@@ static inline void mptcp_subflow_delega
int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); +int mptcp_is_checksum_enabled(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); @@@ -594,8 -575,7 +593,8 @@@ int __init mptcp_proto_v6_init(void) struct sock *mptcp_sk_clone(const struct sock *sk, const struct mptcp_options_received *mp_opt, struct request_sock *req); -void mptcp_get_options(const struct sk_buff *skb, +void mptcp_get_options(const struct sock *sk, + const struct sk_buff *skb, struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk); @@@ -646,8 -626,6 +645,8 @@@ static inline void mptcp_write_space(st
void mptcp_destroy_common(struct mptcp_sock *msk);
+#define MPTCP_TOKEN_MAX_RETRIES 4 + void __init mptcp_token_init(void); static inline void mptcp_token_init_request(struct request_sock *req) { diff --combined net/mptcp/subflow.c index 6b1cd4257edf,be1de4084196..8976ff586b87 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@@ -108,7 -108,6 +108,7 @@@ static void subflow_init_req(struct req
subflow_req->mp_capable = 0; subflow_req->mp_join = 0; + subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener)); subflow_req->msk = NULL; mptcp_token_init_request(req); } @@@ -151,7 -150,7 +151,7 @@@ static int subflow_check_req(struct req return -EINVAL; #endif
- mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); @@@ -163,7 -162,7 +163,7 @@@ }
if (mp_opt.mp_capable && listener->request_mptcp) { - int err, retries = 4; + int err, retries = MPTCP_TOKEN_MAX_RETRIES;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; again: @@@ -248,7 -247,7 +248,7 @@@ int mptcp_subflow_init_cookie_req(struc int err;
subflow_init_req(req, sk_listener); - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join) return -EINVAL; @@@ -395,7 -394,7 +395,7 @@@ static void subflow_finish_connect(stru subflow->ssn_offset = TCP_SKB_CB(skb)->seq; pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (subflow->request_mptcp) { if (!mp_opt.mp_capable) { MPTCP_INC_STATS(sock_net(sk), @@@ -405,8 -404,6 +405,8 @@@ goto fallback; }
+ if (mp_opt.csum_reqd) + WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true); subflow->mp_capable = 1; subflow->can_ack = 1; subflow->remote_key = mp_opt.sndr_key; @@@ -433,15 -430,15 +433,15 @@@ goto do_reset; }
+ if (!mptcp_finish_join(sk)) + goto do_reset; + subflow_generate_hmac(subflow->local_key, subflow->remote_key, subflow->local_nonce, subflow->remote_nonce, hmac); memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
- if (!mptcp_finish_join(sk)) - goto do_reset; - subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
@@@ -641,7 -638,7 +641,7 @@@ static struct sock *subflow_syn_recv_so * reordered MPC will cause fallback, but we don't have other * options. */ - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_capable) { fallback = true; goto create_child; @@@ -651,7 -648,7 +651,7 @@@ if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { - mptcp_get_options(skb, &mp_opt); + mptcp_get_options(sk, skb, &mp_opt); if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) || !mptcp_can_accept_new_subflow(subflow_req->msk)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); @@@ -787,10 -784,10 +787,10 @@@ static u64 expand_seq(u64 old_seq, u16 return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32)); }
- static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) + static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn) { - WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d", - ssn, subflow->map_subflow_seq, subflow->map_data_len); + pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d", + ssn, subflow->map_subflow_seq, subflow->map_data_len); }
static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) @@@ -815,104 -812,22 +815,104 @@@ static bool validate_mapping(struct soc /* Mapping covers data later in the subflow stream, * currently unsupported. */ - warn_bad_map(subflow, ssn); + dbg_bad_map(subflow, ssn); return false; } if (unlikely(!before(ssn, subflow->map_subflow_seq + subflow->map_data_len))) { /* Mapping does covers past subflow data, invalid */ - warn_bad_map(subflow, ssn + skb->len); + dbg_bad_map(subflow, ssn); return false; } return true; }
+static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb, + bool csum_reqd) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct csum_pseudo_header header; + u32 offset, seq, delta; + __wsum csum; + int len; + + if (!csum_reqd) + return MAPPING_OK; + + /* mapping already validated on previous traversal */ + if (subflow->map_csum_len == subflow->map_data_len) + return MAPPING_OK; + + /* traverse the receive queue, ensuring it contains a full + * DSS mapping and accumulating the related csum. + * Preserve the accoumlate csum across multiple calls, to compute + * the csum only once + */ + delta = subflow->map_data_len - subflow->map_csum_len; + for (;;) { + seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len; + offset = seq - TCP_SKB_CB(skb)->seq; + + /* if the current skb has not been accounted yet, csum its contents + * up to the amount covered by the current DSS + */ + if (offset < skb->len) { + __wsum csum; + + len = min(skb->len - offset, delta); + csum = skb_checksum(skb, offset, len, 0); + subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum, + subflow->map_csum_len); + + delta -= len; + subflow->map_csum_len += len; + } + if (delta == 0) + break; + + if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) { + /* if this subflow is closed, the partial mapping + * will be never completed; flush the pending skbs, so + * that subflow_sched_work_if_closed() can kick in + */ + if (unlikely(ssk->sk_state == TCP_CLOSE)) + while ((skb = skb_peek(&ssk->sk_receive_queue))) + sk_eat_skb(ssk, skb); + + /* not enough data to validate the csum */ + return MAPPING_EMPTY; + } + + /* the DSS mapping for next skbs will be validated later, + * when a get_mapping_status call will process such skb + */ + skb = skb->next; + } + + /* note that 'map_data_len' accounts only for the carried data, does + * not include the eventual seq increment due to the data fin, + * while the pseudo header requires the original DSS data len, + * including that + */ + header.data_seq = cpu_to_be64(subflow->map_seq); + header.subflow_seq = htonl(subflow->map_subflow_seq); + header.data_len = htons(subflow->map_data_len + subflow->map_data_fin); + header.csum = 0; + + csum = csum_partial(&header, sizeof(header), subflow->map_data_csum); + if (unlikely(csum_fold(csum))) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR); + return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY; + } + + return MAPPING_OK; +} + static enum mapping_status get_mapping_status(struct sock *ssk, struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + bool csum_reqd = READ_ONCE(msk->csum_enabled); struct mptcp_ext *mpext; struct sk_buff *skb; u16 data_len; @@@ -1005,10 -920,9 +1005,10 @@@ /* Allow replacing only with an identical map */ if (subflow->map_seq == map_seq && subflow->map_subflow_seq == mpext->subflow_seq && - subflow->map_data_len == data_len) { + subflow->map_data_len == data_len && + subflow->map_csum_reqd == mpext->csum_reqd) { skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + goto validate_csum; }
/* If this skb data are fully covered by the current mapping, @@@ -1020,27 -934,17 +1020,27 @@@ }
/* will validate the next map after consuming the current one */ - return MAPPING_OK; + goto validate_csum; }
subflow->map_seq = map_seq; subflow->map_subflow_seq = mpext->subflow_seq; subflow->map_data_len = data_len; subflow->map_valid = 1; + subflow->map_data_fin = mpext->data_fin; subflow->mpc_map = mpext->mpc_map; - pr_debug("new map seq=%llu subflow_seq=%u data_len=%u", + subflow->map_csum_reqd = mpext->csum_reqd; + subflow->map_csum_len = 0; + subflow->map_data_csum = csum_unfold(mpext->csum); + + /* Cfr RFC 8684 Section 3.3.0 */ + if (unlikely(subflow->map_csum_reqd != csum_reqd)) + return MAPPING_INVALID; + + pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", subflow->map_seq, subflow->map_subflow_seq, - subflow->map_data_len); + subflow->map_data_len, subflow->map_csum_reqd, + subflow->map_data_csum);
validate_seq: /* we revalidate valid mapping on new skb, because we must ensure @@@ -1050,9 -954,7 +1050,9 @@@ return MAPPING_INVALID;
skb_ext_del(skb, SKB_EXT_MPTCP); - return MAPPING_OK; + +validate_csum: + return validate_data_csum(ssk, skb, csum_reqd); }
static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, @@@ -1098,7 -1000,7 +1098,7 @@@ static bool subflow_check_data_avail(st struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue)) - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); if (subflow->data_avail) return true;
@@@ -1137,18 -1039,13 +1137,13 @@@ ack_seq = mptcp_subflow_get_mapped_dsn(subflow); pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack, ack_seq); - if (ack_seq == old_ack) { - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; - break; - } else if (after64(ack_seq, old_ack)) { - subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA; - break; + if (unlikely(before64(ack_seq, old_ack))) { + mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); + continue; }
- /* only accept in-sequence mapping. Old values are spurious - * retransmission - */ - mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq); + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); + break; } return true;
@@@ -1163,12 -1060,11 +1158,11 @@@ fallback * subflow_error_report() will introduce the appropriate barriers */ ssk->sk_err = EBADMSG; - ssk->sk_error_report(ssk); tcp_set_state(ssk, TCP_CLOSE); subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMPTCP; tcp_send_active_reset(ssk, GFP_ATOMIC); - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0); return false; }
@@@ -1178,7 -1074,7 +1172,7 @@@ subflow->map_seq = READ_ONCE(msk->ack_seq); subflow->map_data_len = skb->len; subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset; - subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL; + WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL); return true; }
@@@ -1190,7 -1086,7 +1184,7 @@@ bool mptcp_subflow_data_available(struc if (subflow->map_valid && mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) { subflow->map_valid = 0; - subflow->data_avail = 0; + WRITE_ONCE(subflow->data_avail, 0);
pr_debug("Done with mapping: seq=%u data_len=%u", subflow->map_subflow_seq, @@@ -1218,41 -1114,6 +1212,6 @@@ void mptcp_space(const struct sock *ssk *full_space = tcp_full_space(sk); }
- static void subflow_data_ready(struct sock *sk) - { - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - u16 state = 1 << inet_sk_state_load(sk); - struct sock *parent = subflow->conn; - struct mptcp_sock *msk; - - msk = mptcp_sk(parent); - if (state & TCPF_LISTEN) { - /* MPJ subflow are removed from accept queue before reaching here, - * avoid stray wakeups - */ - if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) - return; - - set_bit(MPTCP_DATA_READY, &msk->flags); - parent->sk_data_ready(parent); - return; - } - - WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && - !subflow->mp_join && !(state & TCPF_CLOSE)); - - if (mptcp_subflow_data_available(sk)) - mptcp_data_ready(parent, sk); - } - - static void subflow_write_space(struct sock *ssk) - { - struct sock *sk = mptcp_subflow_ctx(ssk)->conn; - - mptcp_propagate_sndbuf(sk, ssk); - mptcp_write_space(sk); - } - void __mptcp_error_report(struct sock *sk) { struct mptcp_subflow_context *subflow; @@@ -1293,6 -1154,43 +1252,43 @@@ static void subflow_error_report(struc mptcp_data_unlock(sk); }
+ static void subflow_data_ready(struct sock *sk) + { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + u16 state = 1 << inet_sk_state_load(sk); + struct sock *parent = subflow->conn; + struct mptcp_sock *msk; + + msk = mptcp_sk(parent); + if (state & TCPF_LISTEN) { + /* MPJ subflow are removed from accept queue before reaching here, + * avoid stray wakeups + */ + if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) + return; + + set_bit(MPTCP_DATA_READY, &msk->flags); + parent->sk_data_ready(parent); + return; + } + + WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && + !subflow->mp_join && !(state & TCPF_CLOSE)); + + if (mptcp_subflow_data_available(sk)) + mptcp_data_ready(parent, sk); + else if (unlikely(sk->sk_err)) + subflow_error_report(sk); + } + + static void subflow_write_space(struct sock *ssk) + { + struct sock *sk = mptcp_subflow_ctx(ssk)->conn; + + mptcp_propagate_sndbuf(sk, ssk); + mptcp_write_space(sk); + } + static struct inet_connection_sock_af_ops * subflow_default_af_ops(struct sock *sk) { @@@ -1603,6 -1501,8 +1599,8 @@@ static void subflow_state_change(struc */ if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); + else if (unlikely(sk->sk_err)) + subflow_error_report(sk);
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
diff --combined net/netfilter/nf_tables_api.c index f20f6ae0e215,bf4d6ec9fc55..d6214242fe7f --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@@ -862,9 -862,10 +862,9 @@@ static int nft_netlink_dump_start_rcu(s static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; const struct nft_table *table; struct net *net = info->net; struct sk_buff *skb2; @@@ -1067,9 -1068,10 +1067,9 @@@ static int nf_tables_newtable(struct sk const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; @@@ -1261,9 -1263,10 +1261,9 @@@ out static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; @@@ -1633,9 -1636,10 +1633,9 @@@ done static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; const struct nft_chain *chain; struct net *net = info->net; struct nft_table *table; @@@ -2011,12 -2015,11 +2011,12 @@@ static void nft_basechain_hook_init(str const struct nft_chain_hook *hook, struct nft_chain *chain) { - ops->pf = family; - ops->hooknum = hook->num; - ops->priority = hook->priority; - ops->priv = chain; - ops->hook = hook->type->hooks[ops->hooknum]; + ops->pf = family; + ops->hooknum = hook->num; + ops->priority = hook->priority; + ops->priv = chain; + ops->hook = hook->type->hooks[ops->hooknum]; + ops->hook_ops_type = NF_HOOK_OP_NF_TABLES; }
static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, @@@ -2368,9 -2371,10 +2368,9 @@@ static int nf_tables_newchain(struct sk const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; struct nft_chain *chain = NULL; struct net *net = info->net; const struct nlattr *attr; @@@ -2465,9 -2469,10 +2465,9 @@@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; @@@ -3091,9 -3096,10 +3091,9 @@@ static int nf_tables_dump_rules_done(st static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; const struct nft_chain *chain; const struct nft_rule *rule; struct net *net = info->net; @@@ -3231,12 -3237,13 +3231,12 @@@ static int nf_tables_newrule(struct sk_ const struct nlattr * const nla[]) { struct nftables_pernet *nft_net = nft_pernet(info->net); - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; unsigned int size, i, n, ulen = 0, usize = 0; u8 genmask = nft_genmask_next(info->net); struct nft_rule *rule, *old_rule = NULL; struct nft_expr_info *expr_info = NULL; - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; struct nft_flow_rule *flow; struct nft_userdata *udata; @@@ -3470,15 -3477,15 +3470,15 @@@ static struct nft_rule *nft_rule_lookup static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; - int family = nfmsg->nfgen_family, err = 0; u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; struct nft_chain *chain = NULL; struct net *net = info->net; struct nft_table *table; struct nft_rule *rule; struct nft_ctx ctx; + int err = 0;
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask, NETLINK_CB(skb).portid); @@@ -3658,6 -3665,30 +3658,6 @@@ static const struct nla_policy nft_set_ [NFTA_SET_DESC_CONCAT] = { .type = NLA_NESTED }, };
-static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net, - const struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - struct netlink_ext_ack *extack, - u8 genmask, u32 nlpid) -{ - const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - int family = nfmsg->nfgen_family; - struct nft_table *table = NULL; - - if (nla[NFTA_SET_TABLE] != NULL) { - table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, - genmask, nlpid); - if (IS_ERR(table)) { - NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]); - return PTR_ERR(table); - } - } - - nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); - return 0; -} - static struct nft_set *nft_set_lookup(const struct nft_table *table, const struct nlattr *nla, u8 genmask) { @@@ -4037,26 -4068,20 +4037,26 @@@ static int nf_tables_dump_sets_done(str static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); + u8 family = info->nfmsg->nfgen_family; + struct nft_table *table = NULL; struct net *net = info->net; const struct nft_set *set; struct sk_buff *skb2; struct nft_ctx ctx; int err;
- /* Verify existence before starting dump */ - err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack, - genmask, 0); - if (err < 0) - return err; + if (nla[NFTA_SET_TABLE]) { + table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, + genmask, 0); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]); + return PTR_ERR(table); + } + } + + nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { @@@ -4071,12 -4096,12 +4071,12 @@@ }
/* Only accept unspec with dump */ - if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; if (!nla[NFTA_SET_TABLE]) return -EINVAL;
- set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); + set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set);
@@@ -4164,10 -4189,11 +4164,10 @@@ static int nf_tables_set_desc_parse(str static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); u32 ktype, dtype, flags, policy, gc_int, objtype; struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; const struct nft_set_ops *ops; struct nft_expr *expr = NULL; struct net *net = info->net; @@@ -4338,13 -4364,45 +4338,45 @@@ err = nf_tables_set_alloc_name(&ctx, set, name); kfree(name); if (err < 0) - goto err_set_alloc_name; + goto err_set_name; + + udata = NULL; + if (udlen) { + udata = set->data + size; + nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); + } + + INIT_LIST_HEAD(&set->bindings); + INIT_LIST_HEAD(&set->catchall_list); + set->table = table; + write_pnet(&set->net, net); + set->ops = ops; + set->ktype = ktype; + set->klen = desc.klen; + set->dtype = dtype; + set->objtype = objtype; + set->dlen = desc.dlen; + set->flags = flags; + set->size = desc.size; + set->policy = policy; + set->udlen = udlen; + set->udata = udata; + set->timeout = timeout; + set->gc_int = gc_int; + + set->field_count = desc.field_count; + for (i = 0; i < desc.field_count; i++) + set->field_len[i] = desc.field_len[i]; + + err = ops->init(set, &desc, nla); + if (err < 0) + goto err_set_init;
if (nla[NFTA_SET_EXPR]) { expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]); if (IS_ERR(expr)) { err = PTR_ERR(expr); - goto err_set_alloc_name; + goto err_set_expr_alloc; } set->exprs[0] = expr; set->num_exprs++; @@@ -4355,75 -4413,44 +4387,44 @@@
if (!(flags & NFT_SET_EXPR)) { err = -EINVAL; - goto err_set_alloc_name; + goto err_set_expr_alloc; } i = 0; nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) { if (i == NFT_SET_EXPR_MAX) { err = -E2BIG; - goto err_set_init; + goto err_set_expr_alloc; } if (nla_type(tmp) != NFTA_LIST_ELEM) { err = -EINVAL; - goto err_set_init; + goto err_set_expr_alloc; } expr = nft_set_elem_expr_alloc(&ctx, set, tmp); if (IS_ERR(expr)) { err = PTR_ERR(expr); - goto err_set_init; + goto err_set_expr_alloc; } set->exprs[i++] = expr; set->num_exprs++; } }
- udata = NULL; - if (udlen) { - udata = set->data + size; - nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen); - } - - INIT_LIST_HEAD(&set->bindings); - INIT_LIST_HEAD(&set->catchall_list); - set->table = table; - write_pnet(&set->net, net); - set->ops = ops; - set->ktype = ktype; - set->klen = desc.klen; - set->dtype = dtype; - set->objtype = objtype; - set->dlen = desc.dlen; - set->flags = flags; - set->size = desc.size; - set->policy = policy; - set->udlen = udlen; - set->udata = udata; - set->timeout = timeout; - set->gc_int = gc_int; set->handle = nf_tables_alloc_handle(table);
- set->field_count = desc.field_count; - for (i = 0; i < desc.field_count; i++) - set->field_len[i] = desc.field_len[i]; - - err = ops->init(set, &desc, nla); - if (err < 0) - goto err_set_init; - err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set); if (err < 0) - goto err_set_trans; + goto err_set_expr_alloc;
list_add_tail_rcu(&set->list, &table->sets); table->use++; return 0;
- err_set_trans: - ops->destroy(set); - err_set_init: + err_set_expr_alloc: for (i = 0; i < set->num_exprs; i++) nft_expr_destroy(&ctx, set->exprs[i]); - err_set_alloc_name: + + ops->destroy(set); + err_set_init: kfree(set->name); err_set_name: kvfree(set); @@@ -4467,31 -4494,31 +4468,31 @@@ static void nft_set_destroy(const struc static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; + struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; - int err;
- if (nfmsg->nfgen_family == NFPROTO_UNSPEC) + if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; - if (nla[NFTA_SET_TABLE] == NULL) - return -EINVAL;
- err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack, - genmask, NETLINK_CB(skb).portid); - if (err < 0) - return err; + table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family, + genmask, NETLINK_CB(skb).portid); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]); + return PTR_ERR(table); + }
if (nla[NFTA_SET_HANDLE]) { attr = nla[NFTA_SET_HANDLE]; - set = nft_set_lookup_byhandle(ctx.table, attr, genmask); + set = nft_set_lookup_byhandle(table, attr, genmask); } else { attr = nla[NFTA_SET_NAME]; - set = nft_set_lookup(ctx.table, attr, genmask); + set = nft_set_lookup(table, attr, genmask); }
if (IS_ERR(set)) { @@@ -4505,8 -4532,6 +4506,8 @@@ return -EBUSY; }
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + return nft_delset(&ctx, set); }
@@@ -4708,6 -4733,28 +4709,6 @@@ static const struct nla_policy nft_set_ [NFTA_SET_ELEM_LIST_SET_ID] = { .type = NLA_U32 }, };
-static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net, - const struct sk_buff *skb, - const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - struct netlink_ext_ack *extack, - u8 genmask, u32 nlpid) -{ - const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - int family = nfmsg->nfgen_family; - struct nft_table *table; - - table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, - genmask, nlpid); - if (IS_ERR(table)) { - NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); - return PTR_ERR(table); - } - - nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla); - return 0; -} - static int nft_set_elem_expr_dump(struct sk_buff *skb, const struct nft_set *set, const struct nft_set_ext *ext) @@@ -5165,27 -5212,21 +5166,27 @@@ static int nf_tables_getsetelem(struct { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; + struct nft_table *table; struct nft_set *set; struct nlattr *attr; struct nft_ctx ctx; int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack, - genmask, NETLINK_CB(skb).portid); - if (err < 0) - return err; + table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, + genmask, NETLINK_CB(skb).portid); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); + return PTR_ERR(table); + }
- set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); + set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set);
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .start = nf_tables_dump_set_start, @@@ -5954,10 -5995,8 +5955,10 @@@ static int nf_tables_newsetelem(struct struct nftables_pernet *nft_net = nft_pernet(info->net); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; + struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; int rem, err; @@@ -5965,14 -6004,12 +5966,14 @@@ if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL) return -EINVAL;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack, - genmask, NETLINK_CB(skb).portid); - if (err < 0) - return err; + table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, + genmask, NETLINK_CB(skb).portid); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); + return PTR_ERR(table); + }
- set = nft_set_lookup_global(net, ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET], nla[NFTA_SET_ELEM_LIST_SET_ID], genmask); if (IS_ERR(set)) return PTR_ERR(set); @@@ -5980,8 -6017,6 +5981,8 @@@ if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY;
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags); if (err < 0) @@@ -5989,7 -6024,7 +5990,7 @@@ }
if (nft_net->validate_state == NFT_VALIDATE_DO) - return nft_table_validate(net, ctx.table); + return nft_table_validate(net, table);
return 0; } @@@ -6227,29 -6262,23 +6228,29 @@@ static int nf_tables_delsetelem(struct { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; + struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; int rem, err = 0;
- err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack, - genmask, NETLINK_CB(skb).portid); - if (err < 0) - return err; + table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family, + genmask, NETLINK_CB(skb).portid); + if (IS_ERR(table)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]); + return PTR_ERR(table); + }
- set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask); + set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask); if (IS_ERR(set)) return PTR_ERR(set); if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT) return -EBUSY;
+ nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla); + if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) return nft_set_flush(&ctx, set, genmask);
@@@ -6517,10 -6546,11 +6518,10 @@@ err_free_trans static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; const struct nft_object_type *type; - int family = nfmsg->nfgen_family; struct net *net = info->net; struct nft_table *table; struct nft_object *obj; @@@ -6772,9 -6802,10 +6773,9 @@@ static int nf_tables_dump_obj_done(stru static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; const struct nft_table *table; struct net *net = info->net; struct nft_object *obj; @@@ -6861,9 -6892,10 +6862,9 @@@ static void nft_obj_destroy(const struc static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; struct net *net = info->net; const struct nlattr *attr; struct nft_table *table; @@@ -7291,11 -7323,12 +7292,11 @@@ static int nf_tables_newflowtable(struc const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; struct nft_flowtable_hook flowtable_hook; u8 genmask = nft_genmask_next(info->net); + u8 family = info->nfmsg->nfgen_family; const struct nf_flowtable_type *type; - int family = nfmsg->nfgen_family; struct nft_flowtable *flowtable; struct nft_hook *hook, *next; struct net *net = info->net; @@@ -7479,9 -7512,10 +7480,9 @@@ static int nf_tables_delflowtable(struc const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_next(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; struct nft_flowtable *flowtable; struct net *net = info->net; const struct nlattr *attr; @@@ -7673,8 -7707,9 +7674,8 @@@ static int nf_tables_getflowtable(struc const struct nfnl_info *info, const struct nlattr * const nla[]) { - const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh); u8 genmask = nft_genmask_cur(info->net); - int family = nfmsg->nfgen_family; + u8 family = info->nfmsg->nfgen_family; struct nft_flowtable *flowtable; const struct nft_table *table; struct net *net = info->net; diff --combined net/packet/af_packet.c index 71dd6b910f7c,330ba68828e7..77b0cdab3810 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@@ -2683,7 -2683,7 +2683,7 @@@ static int tpacket_snd(struct packet_so } if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); - proto = po->num; + proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@@ -2896,7 -2896,7 +2896,7 @@@ static int packet_snd(struct socket *so
if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); - proto = po->num; + proto = READ_ONCE(po->num); } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@@ -3034,10 -3034,13 +3034,13 @@@ static int packet_sendmsg(struct socke struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk);
- if (po->tx_ring.pg_vec) + /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. + * tpacket_snd() will redo the check safely. + */ + if (data_race(po->tx_ring.pg_vec)) return tpacket_snd(po, msg); - else - return packet_snd(sock, msg, len); + + return packet_snd(sock, msg, len); }
/* @@@ -3168,7 -3171,7 +3171,7 @@@ static int packet_do_bind(struct sock * /* prevents packet_notifier() from calling * register_prot_hook() */ - po->num = 0; + WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, true); rcu_read_lock(); dev_curr = po->prot_hook.dev; @@@ -3178,17 -3181,17 +3181,17 @@@ }
BUG_ON(po->running); - po->num = proto; + WRITE_ONCE(po->num, proto); po->prot_hook.type = proto;
if (unlikely(unlisted)) { dev_put(dev); po->prot_hook.dev = NULL; - po->ifindex = -1; + WRITE_ONCE(po->ifindex, -1); packet_cached_dev_reset(po); } else { po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; + WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); packet_cached_dev_assign(po, dev); } } @@@ -3502,7 -3505,7 +3505,7 @@@ static int packet_getname_spkt(struct s uaddr->sa_family = AF_PACKET; memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data)); rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex); + dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); if (dev) strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data)); rcu_read_unlock(); @@@ -3517,16 -3520,18 +3520,18 @@@ static int packet_getname(struct socke struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); + int ifindex;
if (peer) return -EOPNOTSUPP;
+ ifindex = READ_ONCE(po->ifindex); sll->sll_family = AF_PACKET; - sll->sll_ifindex = po->ifindex; - sll->sll_protocol = po->num; + sll->sll_ifindex = ifindex; + sll->sll_protocol = READ_ONCE(po->num); sll->sll_pkttype = 0; rcu_read_lock(); - dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex); + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); if (dev) { sll->sll_hatype = dev->type; sll->sll_halen = dev->addr_len; @@@ -3929,9 -3934,12 +3934,9 @@@ packet_setsockopt(struct socket *sock, return -EFAULT;
lock_sock(sk); - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { - ret = -EBUSY; - } else { + if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec) po->tp_tx_has_off = !!val; - ret = 0; - } + release_sock(sk); return 0; } @@@ -4102,7 -4110,7 +4107,7 @@@ static int packet_notifier(struct notif } if (msg == NETDEV_UNREGISTER) { packet_cached_dev_reset(po); - po->ifindex = -1; + WRITE_ONCE(po->ifindex, -1); if (po->prot_hook.dev) dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@@ -4408,7 -4416,7 +4413,7 @@@ static int packet_set_ring(struct sock was_running = po->running; num = po->num; if (was_running) { - po->num = 0; + WRITE_ONCE(po->num, 0); __unregister_prot_hook(sk, false); } spin_unlock(&po->bind_lock); @@@ -4443,7 -4451,7 +4448,7 @@@
spin_lock(&po->bind_lock); if (was_running) { - po->num = num; + WRITE_ONCE(po->num, num); register_prot_hook(sk); } spin_unlock(&po->bind_lock); @@@ -4613,8 -4621,8 +4618,8 @@@ static int packet_seq_show(struct seq_f s, refcount_read(&s->sk_refcnt), s->sk_type, - ntohs(po->num), - po->ifindex, + ntohs(READ_ONCE(po->num)), + READ_ONCE(po->ifindex), po->running, atomic_read(&s->sk_rmem_alloc), from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), diff --combined net/unix/af_unix.c index 4d4f24cbd86b,5d1192ceb139..c9dfec7b71e7 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@@ -535,12 -535,14 +535,14 @@@ static void unix_release_sock(struct so u->path.mnt = NULL; state = sk->sk_state; sk->sk_state = TCP_CLOSE; + + skpair = unix_peer(sk); + unix_peer(sk) = NULL; + unix_state_unlock(sk);
wake_up_interruptible_all(&u->peer_wait);
- skpair = unix_peer(sk); - if (skpair != NULL) { if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) { unix_state_lock(skpair); @@@ -555,7 -557,6 +557,6 @@@
unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ - unix_peer(sk) = NULL; }
/* Try to flush out this socket. Throw out buffers at least */ @@@ -1392,7 -1393,7 +1393,7 @@@ restart
unix_state_unlock(sk);
- /* take ten and and send info to listening sock */ + /* take ten and send info to listening sock */ spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, skb); spin_unlock(&other->sk_receive_queue.lock); diff --combined tools/testing/selftests/net/mptcp/mptcp_connect.sh index 69351c3eb68c,2b495dc8d78e..2484fb6a9a8d --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@@ -3,7 -3,7 +3,7 @@@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:f:t" +optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 sin="" sout="" @@@ -22,7 -22,6 +22,7 @@@ sndbuf= rcvbuf=0 options_log=true do_tcp=0 +checksum=false filesize=0
if [ $tc_loss -eq 100 ];then @@@ -48,7 -47,6 +48,7 @@@ usage() echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" + echo -e "\t-C: enable the MPTCP data checksum" }
while getopts "$optstring" option;do @@@ -106,9 -104,6 +106,9 @@@ "t") do_tcp=$((do_tcp+1)) ;; + "C") + checksum=true + ;; "?") usage $0 exit 1 @@@ -202,15 -197,6 +202,12 @@@ ip -net "$ns4" link set ns4eth3 u ip -net "$ns4" route add default via 10.0.3.2 ip -net "$ns4" route add default via dead:beef:3::2
- # use TCP syn cookies, even if no flooding was detected. - ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 - +if $checksum; then + for i in "$ns1" "$ns2" "$ns3" "$ns4";do + ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1 + done +fi + set_ethtool_flags() { local ns="$1" local dev="$2" @@@ -748,6 -734,14 +745,14 @@@ for sender in $ns1 $ns2 $ns3 $ns4;d exit $ret fi
+ # ns1<->ns2 is not subject to reordering/tc delays. Use it to test + # mptcp syncookie support. + if [ $sender = $ns1 ]; then + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2 + else + ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1 + fi + run_tests "$ns2" $sender 10.0.1.2 run_tests "$ns2" $sender dead:beef:1::2 run_tests "$ns2" $sender 10.0.2.1