[linux-next] LinuxNextTracking branch, master, updated. next-20210621 - linux-merge - lists.open-mesh.org

21 Jun 2021

The following commit has been merged in the master branch:
commit adc2e56ebe6377f5c032d96aee0feac30a640453
Merge: 4bea7207a80c8bba3b3eb5b84c407b162968475f 9ed13a17e38e0537e24d9b507645002bf8d0201f
Author: Jakub Kicinski kuba@kernel.org
Date:   Fri Jun 18 19:47:02 2021 -0700
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Trivial conflicts in net/can/isotp.c and
    tools/testing/selftests/net/mptcp/mptcp_connect.sh
scaled_ppm_to_ppb() was moved from drivers/ptp/ptp_clock.c
    to include/linux/ptp_clock_kernel.h in -next so re-apply
    the fix there.
Signed-off-by: Jakub Kicinski kuba@kernel.org
diff --combined MAINTAINERS
index fbf792962d7b,8c5ee008301a..395b052635ca

--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -3877,6 -3877,7 +3877,7 @@@ L:	linux-btrfs@vger.kernel.or
  S:	Maintained
  W:	http://btrfs.wiki.kernel.org/
  Q:	http://patchwork.kernel.org/project/linux-btrfs/list/
+ C:	irc://irc.libera.chat/btrfs
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git
  F:	Documentation/filesystems/btrfs.rst
  F:	fs/btrfs/
@@@ -6811,8 -6812,6 +6812,8 @@@ F:	Documentation/devicetree/bindings/ne
  F:	Documentation/devicetree/bindings/net/qca,ar803x.yaml
  F:	Documentation/networking/phy.rst
  F:	drivers/net/mdio/
 +F:	drivers/net/mdio/acpi_mdio.c
 +F:	drivers/net/mdio/fwnode_mdio.c
  F:	drivers/net/mdio/of_mdio.c
  F:	drivers/net/pcs/
  F:	drivers/net/phy/
@@@ -6947,6 -6946,7 +6948,7 @@@ F:	net/core/failover.
  FANOTIFY
  M:	Jan Kara jack@suse.cz
  R:	Amir Goldstein amir73il@gmail.com
+ R:	Matthew Bobrowski repnop@google.com
  L:	linux-fsdevel@vger.kernel.org
  S:	Maintained
  F:	fs/notify/fanotify/
@@@ -9139,7 -9139,6 +9141,7 @@@ F:	Documentation/networking/device_driv
  F:	drivers/net/ethernet/intel/
  F:	drivers/net/ethernet/intel/*/
  F:	include/linux/avf/virtchnl.h
 +F:	include/linux/net/intel/iidc.h
INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
  M:	Maik Broemme mbroemme@libmpq.org
@@@ -9453,13 -9452,6 +9455,13 @@@ L:	Dell.Client.Kernel@dell.co
  S:	Maintained
  F:	drivers/platform/x86/intel-wmi-thunderbolt.c
+INTEL WWAN IOSM DRIVER
 +M:	M Chetan Kumar m.chetan.kumar@intel.com
 +M:	Intel Corporation linuxwwan@intel.com
 +L:	netdev@vger.kernel.org
 +S:	Maintained
 +F:	drivers/net/wwan/iosm/
 +
  INTEL(R) TRACE HUB
  M:	Alexander Shishkin alexander.shishkin@linux.intel.com
  S:	Supported
@@@ -12396,12 -12388,6 +12398,12 @@@ F:	Documentation/userspace-api/media/dr
  F:	drivers/media/pci/meye/
  F:	include/uapi/linux/meye.h
+MOTORCOMM PHY DRIVER
 +M:	Peter Geis pgwipeout@gmail.com
 +L:	netdev@vger.kernel.org
 +S:	Maintained
 +F:	drivers/net/phy/motorcomm.c
 +
  MOXA SMARTIO/INDUSTIO/INTELLIO SERIAL CARD
  S:	Orphan
  F:	Documentation/driver-api/serial/moxa-smartio.rst
@@@ -13210,7 -13196,6 +13212,7 @@@ M:	Vladimir Oltean <olteanv@gmail.com
  L:	linux-kernel@vger.kernel.org
  S:	Maintained
  F:	drivers/net/dsa/sja1105
 +F:	drivers/net/pcs/pcs-xpcs-nxp.c
NXP TDA998X DRM DRIVER
  M:	Russell King linux@armlinux.org.uk
@@@ -14134,6 -14119,7 +14136,7 @@@ F:	drivers/pci/controller/pci-v3-semi.
  PCI ENDPOINT SUBSYSTEM
  M:	Kishon Vijay Abraham I kishon@ti.com
  M:	Lorenzo Pieralisi lorenzo.pieralisi@arm.com
+ R:	Krzysztof Wilczy��ski kw@linux.com
  L:	linux-pci@vger.kernel.org
  S:	Supported
  F:	Documentation/PCI/endpoint/*
@@@ -14182,6 -14168,7 +14185,7 @@@ F:	drivers/pci/controller/pci-xgene-msi
  PCI NATIVE HOST BRIDGE AND ENDPOINT DRIVERS
  M:	Lorenzo Pieralisi lorenzo.pieralisi@arm.com
  R:	Rob Herring robh@kernel.org
+ R:	Krzysztof Wilczy��ski kw@linux.com
  L:	linux-pci@vger.kernel.org
  S:	Supported
  Q:	http://patchwork.ozlabs.org/project/linux-pci/list/
@@@ -14341,10 -14328,12 +14345,12 @@@ PER-CPU MEMORY ALLOCATO
  M:	Dennis Zhou dennis@kernel.org
  M:	Tejun Heo tj@kernel.org
  M:	Christoph Lameter cl@linux.com
+ L:	linux-mm@kvack.org
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu.git
  F:	arch/*/include/asm/percpu.h
  F:	include/linux/percpu*.h
+ F:	lib/percpu*.c
  F:	mm/percpu*.c
PER-TASK DELAY ACCOUNTING
@@@ -15587,13 -15576,6 +15593,13 @@@ F:	include/linux/rpmsg
  F:	include/uapi/linux/rpmsg.h
  F:	samples/rpmsg/
+REMOTE PROCESSOR MESSAGING (RPMSG) WWAN CONTROL DRIVER
 +M:	Stephan Gerhold stephan@gerhold.net
 +L:	netdev@vger.kernel.org
 +L:	linux-remoteproc@vger.kernel.org
 +S:	Maintained
 +F:	drivers/net/wwan/rpmsg_wwan_ctrl.c
 +
  RENESAS CLOCK DRIVERS
  M:	Geert Uytterhoeven geert+renesas@glider.be
  L:	linux-renesas-soc@vger.kernel.org
@@@ -16578,6 -16560,7 +16584,7 @@@ F:	drivers/misc/sgi-xp
SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
  M:	Karsten Graul kgraul@linux.ibm.com
+ M:	Guvenc Gulce guvenc@linux.ibm.com
  L:	linux-s390@vger.kernel.org
  S:	Supported
  W:	http://www.ibm.com/developerworks/linux/linux390/
@@@ -17691,7 -17674,6 +17698,7 @@@ M:	Jose Abreu <Jose.Abreu@synopsys.com
  L:	netdev@vger.kernel.org
  S:	Supported
  F:	drivers/net/pcs/pcs-xpcs.c
 +F:	drivers/net/pcs/pcs-xpcs.h
  F:	include/linux/pcs/pcs-xpcs.h
SYNOPSYS DESIGNWARE I2C DRIVER
@@@ -17701,7 -17683,6 +17708,6 @@@ R:	Mika Westerberg <mika.westerberg@lin
  L:	linux-i2c@vger.kernel.org
  S:	Maintained
  F:	drivers/i2c/busses/i2c-designware-*
- F:	include/linux/platform_data/i2c-designware.h
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
  M:	Jaehoon Chung jh80.chung@samsung.com
@@@ -18893,6 -18874,13 +18899,13 @@@ S:	Maintaine
  F:	drivers/usb/host/isp116x*
  F:	include/linux/usb/isp116x.h
+ USB ISP1760 DRIVER
+ M:	Rui Miguel Silva rui.silva@linaro.org
+ L:	linux-usb@vger.kernel.org
+ S:	Maintained
+ F:	drivers/usb/isp1760/*
+ F:	Documentation/devicetree/bindings/usb/nxp,isp1760.yaml
+ 
  USB LAN78XX ETHERNET DRIVER
  M:	Woojung Huh woojung.huh@microchip.com
  M:	UNGLinuxDriver@microchip.com
@@@ -20037,6 -20025,7 +20050,7 @@@ F:	arch/x86/xen/*swiotlb
  F:	drivers/xen/*swiotlb*
XFS FILESYSTEM
+ C:	irc://irc.oftc.net/xfs
  M:	Darrick J. Wong djwong@kernel.org
  M:	linux-xfs@vger.kernel.org
  L:	linux-xfs@vger.kernel.org
diff --combined drivers/base/core.c
index b6836bfa985c,54ba506e5a89..2a61003ea2c1
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@@ -194,6 -194,17 +194,17 @@@ int device_links_read_lock_held(void
  {
    return srcu_read_lock_held(&device_links_srcu);
  }
+ 
+ static void device_link_synchronize_removal(void)
+ {
+ 	synchronize_srcu(&device_links_srcu);
+ }
+ 
+ static void device_link_remove_from_lists(struct device_link *link)
+ {
+ 	list_del_rcu(&link->s_node);
+ 	list_del_rcu(&link->c_node);
+ }
  #else /* !CONFIG_SRCU */
  static DECLARE_RWSEM(device_links_lock);
@@@ -224,6 -235,16 +235,16 @@@ int device_links_read_lock_held(void
    return lockdep_is_held(&device_links_lock);
  }
  #endif
+ 
+ static inline void device_link_synchronize_removal(void)
+ {
+ }
+ 
+ static void device_link_remove_from_lists(struct device_link *link)
+ {
+ 	list_del(&link->s_node);
+ 	list_del(&link->c_node);
+ }
  #endif /* !CONFIG_SRCU */
static bool device_is_ancestor(struct device *dev, struct device *target)
@@@ -445,8 -466,13 +466,13 @@@ static struct attribute *devlink_attrs[
  };
  ATTRIBUTE_GROUPS(devlink);
- static void device_link_free(struct device_link *link)
+ static void device_link_release_fn(struct work_struct *work)
  {
+ 	struct device_link *link = container_of(work, struct device_link, rm_work);
+ 
+ 	/* Ensure that all references to the link object have been dropped. */
+ 	device_link_synchronize_removal();
+ 
    while (refcount_dec_not_one(&link->rpm_active))
    	pm_runtime_put(link->supplier);
@@@ -455,24 -481,19 +481,19 @@@
    kfree(link);
  }
- #ifdef CONFIG_SRCU
- static void __device_link_free_srcu(struct rcu_head *rhead)
- {
- 	device_link_free(container_of(rhead, struct device_link, rcu_head));
- }
- 
  static void devlink_dev_release(struct device *dev)
  {
    struct device_link *link = to_devlink(dev);
- 	call_srcu(&device_links_srcu, &link->rcu_head, __device_link_free_srcu);
- }
- #else
- static void devlink_dev_release(struct device *dev)
- {
- 	device_link_free(to_devlink(dev));
+ 	INIT_WORK(&link->rm_work, device_link_release_fn);
+ 	/*
+ 	 * It may take a while to complete this work because of the SRCU
+ 	 * synchronization in device_link_release_fn() and if the consumer or
+ 	 * supplier devices get deleted when it runs, so put it into the "long"
+ 	 * workqueue.
+ 	 */
+ 	queue_work(system_long_wq, &link->rm_work);
  }
- #endif
static struct class devlink_class = {
    .name = "devlink",
@@@ -846,7 -867,6 +867,6 @@@ out
  }
  EXPORT_SYMBOL_GPL(device_link_add);
- #ifdef CONFIG_SRCU
  static void __device_link_del(struct kref *kref)
  {
    struct device_link *link = container_of(kref, struct device_link, kref);
@@@ -856,25 -876,9 +876,9 @@@
pm_runtime_drop_link(link);
- 	list_del_rcu(&link->s_node);
- 	list_del_rcu(&link->c_node);
- 	device_unregister(&link->link_dev);
- }
- #else /* !CONFIG_SRCU */
- static void __device_link_del(struct kref *kref)
- {
- 	struct device_link *link = container_of(kref, struct device_link, kref);
- 
- 	dev_info(link->consumer, "Dropping the link to %s\n",
- 		 dev_name(link->supplier));
- 
- 	pm_runtime_drop_link(link);
- 
- 	list_del(&link->s_node);
- 	list_del(&link->c_node);
+ 	device_link_remove_from_lists(link);
    device_unregister(&link->link_dev);
  }
- #endif /* !CONFIG_SRCU */
static void device_link_put_kref(struct device_link *link)
  {
@@@ -4723,13 -4727,6 +4727,13 @@@ void device_set_of_node_from_dev(struc
  }
  EXPORT_SYMBOL_GPL(device_set_of_node_from_dev);
+void device_set_node(struct device *dev, struct fwnode_handle *fwnode)
 +{
 +	dev->fwnode = fwnode;
 +	dev->of_node = to_of_node(fwnode);
 +}
 +EXPORT_SYMBOL_GPL(device_set_node);
 +
  int device_match_name(struct device *dev, const void *name)
  {
    return sysfs_streq(dev_name(dev), name);
diff --combined drivers/infiniband/hw/mlx5/fs.c
index 941adf5cf3d0,18ee2f293825..5fbc0a8454b9
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@@ -1194,9 -1194,8 +1194,8 @@@ static struct ib_flow *mlx5_ib_create_f
    	goto free_ucmd;
    }
- 	if (flow_attr->port > dev->num_ports ||
- 	    (flow_attr->flags &
- 	     ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS))) {
+ 	if (flow_attr->flags &
+ 	    ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | IB_FLOW_ATTR_FLAGS_EGRESS)) {
    	err = -EINVAL;
    	goto free_ucmd;
    }
@@@ -2134,6 -2133,12 +2133,12 @@@ static int UVERBS_HANDLER(MLX5_IB_METHO
    if (err)
    	goto end;
+ 	if (obj->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
+ 	    mlx5_eswitch_mode(dev->mdev) != MLX5_ESWITCH_OFFLOADS) {
+ 		err = -EINVAL;
+ 		goto end;
+ 	}
+ 
    uobj->object = obj;
    obj->mdev = dev->mdev;
    atomic_set(&obj->usecnt, 0);
@@@ -2280,7 -2285,6 +2285,7 @@@ static int mlx5_ib_flow_action_create_p
    u8 ft_type, u8 dv_prt,
    void *in, size_t len)
  {
 +	struct mlx5_pkt_reformat_params reformat_params;
    enum mlx5_flow_namespace_type namespace;
    u8 prm_prt;
    int ret;
@@@ -2293,13 -2297,9 +2298,13 @@@
    if (ret)
    	return ret;
+	memset(&reformat_params, 0, sizeof(reformat_params));
 +	reformat_params.type = prm_prt;
 +	reformat_params.size = len;
 +	reformat_params.data = in;
    maction->flow_action_raw.pkt_reformat =
 -		mlx5_packet_reformat_alloc(dev->mdev, prm_prt, len,
 -					   in, namespace);
 +		mlx5_packet_reformat_alloc(dev->mdev, &reformat_params,
 +					   namespace);
    if (IS_ERR(maction->flow_action_raw.pkt_reformat)) {
    	ret = PTR_ERR(maction->flow_action_raw.pkt_reformat);
    	return ret;
diff --combined drivers/net/ethernet/amazon/ena/ena_netdev.c
index 3bb0e66b2c7e,52571486705e..edaf37823c50
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@@ -35,6 -35,9 +35,6 @@@ MODULE_LICENSE("GPL")
#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \
    	NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR)
 -static int debug = -1;
 -module_param(debug, int, 0);
 -MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
static struct ena_aenq_handlers aenq_handlers;
@@@ -86,12 -89,6 +86,12 @@@ static void ena_increase_stat(u64 *stat
    u64_stats_update_end(syncp);
  }
+static void ena_ring_tx_doorbell(struct ena_ring *tx_ring)
 +{
 +	ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
 +	ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp);
 +}
 +
  static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
  {
    struct ena_adapter *adapter = netdev_priv(dev);
@@@ -150,7 -147,7 +150,7 @@@ static int ena_xmit_common(struct net_d
    	netif_dbg(adapter, tx_queued, dev,
    		  "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
    		  ring->qid);
 -		ena_com_write_sq_doorbell(ring->ena_com_io_sq);
 +		ena_ring_tx_doorbell(ring);
    }
/* prepare the packet's descriptors to dma engine */
@@@ -200,6 -197,7 +200,6 @@@ static int ena_xdp_io_poll(struct napi_
    int ret;
xdp_ring = ena_napi->xdp_ring;
 -	xdp_ring->first_interrupt = ena_napi->first_interrupt;
xdp_budget = budget;
@@@ -231,7 -229,6 +231,7 @@@
    xdp_ring->tx_stats.napi_comp += napi_comp_call;
    xdp_ring->tx_stats.tx_poll++;
    u64_stats_update_end(&xdp_ring->syncp);
 +	xdp_ring->tx_stats.last_napi_jiffies = jiffies;
return ret;
  }
@@@ -239,36 -236,48 +239,48 @@@
  static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring,
    			struct ena_tx_buffer *tx_info,
    			struct xdp_frame *xdpf,
- 				void **push_hdr,
- 				u32 *push_len)
+ 				struct ena_com_tx_ctx *ena_tx_ctx)
  {
    struct ena_adapter *adapter = xdp_ring->adapter;
    struct ena_com_buf *ena_buf;
- 	dma_addr_t dma = 0;
+ 	int push_len = 0;
+ 	dma_addr_t dma;
+ 	void *data;
    u32 size;
tx_info->xdpf = xdpf;
+ 	data = tx_info->xdpf->data;
    size = tx_info->xdpf->len;
- 	ena_buf = tx_info->bufs;
- 	/* llq push buffer */
- 	*push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
- 	*push_hdr = tx_info->xdpf->data;
+ 	if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) {
+ 		/* Designate part of the packet for LLQ */
+ 		push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
- 	if (size - *push_len > 0) {
+ 		ena_tx_ctx->push_header = data;
+ 
+ 		size -= push_len;
+ 		data += push_len;
+ 	}
+ 
+ 	ena_tx_ctx->header_len = push_len;
+ 
+ 	if (size > 0) {
    	dma = dma_map_single(xdp_ring->dev,
- 				     *push_hdr + *push_len,
- 				     size - *push_len,
+ 				     data,
+ 				     size,
    			     DMA_TO_DEVICE);
    	if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
    		goto error_report_dma_error;
- 		tx_info->map_linear_data = 1;
- 		tx_info->num_of_bufs = 1;
- 	}
+ 		tx_info->map_linear_data = 0;
- 	ena_buf->paddr = dma;
- 	ena_buf->len = size;
+ 		ena_buf = tx_info->bufs;
+ 		ena_buf->paddr = dma;
+ 		ena_buf->len = size;
+ 
+ 		ena_tx_ctx->ena_bufs = ena_buf;
+ 		ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1;
+ 	}
return 0;
@@@ -277,10 -286,6 +289,6 @@@ error_report_dma_error
    		  &xdp_ring->syncp);
    netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n");
- 	xdp_return_frame_rx_napi(tx_info->xdpf);
- 	tx_info->xdpf = NULL;
- 	tx_info->num_of_bufs = 0;
- 
    return -EINVAL;
  }
@@@ -292,8 -297,6 +300,6 @@@ static int ena_xdp_xmit_frame(struct en
    struct ena_com_tx_ctx ena_tx_ctx = {};
    struct ena_tx_buffer *tx_info;
    u16 next_to_use, req_id;
- 	void *push_hdr;
- 	u32 push_len;
    int rc;
next_to_use = xdp_ring->next_to_use;
@@@ -301,15 -304,11 +307,11 @@@
    tx_info = &xdp_ring->tx_buffer_info[req_id];
    tx_info->num_of_bufs = 0;
- 	rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &push_hdr, &push_len);
+ 	rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx);
    if (unlikely(rc))
    	return rc;
- 	ena_tx_ctx.ena_bufs = tx_info->bufs;
- 	ena_tx_ctx.push_header = push_hdr;
- 	ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
    ena_tx_ctx.req_id = req_id;
- 	ena_tx_ctx.header_len = push_len;
rc = ena_xmit_common(dev,
    		     xdp_ring,
@@@ -319,12 -318,14 +321,12 @@@
    		     xdpf->len);
    if (rc)
    	goto error_unmap_dma;
 -	/* trigger the dma engine. ena_com_write_sq_doorbell()
 -	 * has a mb
 +
 +	/* trigger the dma engine. ena_ring_tx_doorbell()
 +	 * calls a memory barrier inside it.
     */
 -	if (flags & XDP_XMIT_FLUSH) {
 -		ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
 -		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1,
 -				  &xdp_ring->syncp);
 -	}
 +	if (flags & XDP_XMIT_FLUSH)
 +		ena_ring_tx_doorbell(xdp_ring);
return rc;
@@@ -365,8 -366,11 +367,8 @@@ static int ena_xdp_xmit(struct net_devi
    }
/* Ring doorbell to make device aware of the packets */
 -	if (flags & XDP_XMIT_FLUSH) {
 -		ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
 -		ena_increase_stat(&xdp_ring->tx_stats.doorbells, 1,
 -				  &xdp_ring->syncp);
 -	}
 +	if (flags & XDP_XMIT_FLUSH)
 +		ena_ring_tx_doorbell(xdp_ring);
spin_unlock(&xdp_ring->xdp_tx_lock);
@@@ -381,6 -385,7 +383,6 @@@ static int ena_xdp_execute(struct ena_r
    u32 verdict = XDP_PASS;
    struct xdp_frame *xdpf;
    u64 *xdp_stat;
 -	int qid;
rcu_read_lock();
    xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
@@@ -401,7 -406,8 +403,7 @@@
    	}
/* Find xmit queue */
 -		qid = rx_ring->qid + rx_ring->adapter->num_io_queues;
 -		xdp_ring = &rx_ring->adapter->tx_ring[qid];
 +		xdp_ring = rx_ring->xdp_ring;
/* The XDP queues are shared between XDP_TX and XDP_REDIRECT */
    	spin_lock(&xdp_ring->xdp_tx_lock);
@@@ -528,7 -534,7 +530,7 @@@ static void ena_xdp_exchange_program_rx
    		rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
    	} else {
    		ena_xdp_unregister_rxq_info(rx_ring);
 -			rx_ring->rx_headroom = 0;
 +			rx_ring->rx_headroom = NET_SKB_PAD;
    	}
    }
  }
@@@ -677,6 -683,7 +679,6 @@@ static void ena_init_io_rings_common(st
    ring->ena_dev = adapter->ena_dev;
    ring->per_napi_packets = 0;
    ring->cpu = 0;
 -	ring->first_interrupt = false;
    ring->no_interrupt_event_cnt = 0;
    u64_stats_init(&ring->syncp);
  }
@@@ -719,9 -726,7 +721,9 @@@ static void ena_init_io_rings(struct en
    		rxr->smoothed_interval =
    			ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
    		rxr->empty_rx_queue = 0;
 +			rxr->rx_headroom = NET_SKB_PAD;
    		adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 +			rxr->xdp_ring = &adapter->tx_ring[i + adapter->num_io_queues];
    	}
    }
  }
@@@ -975,66 -980,47 +977,66 @@@ static void ena_free_all_io_rx_resource
    	ena_free_rx_resources(adapter, i);
  }
-static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 -				    struct ena_rx_buffer *rx_info, gfp_t gfp)
 +static struct page *ena_alloc_map_page(struct ena_ring *rx_ring,
 +				       dma_addr_t *dma)
  {
 -	int headroom = rx_ring->rx_headroom;
 -	struct ena_com_buf *ena_buf;
    struct page *page;
 -	dma_addr_t dma;
-	/* restore page offset value in case it has been changed by device */
 -	rx_info->page_offset = headroom;
 -
 -	/* if previous allocated page is not used */
 -	if (unlikely(rx_info->page))
 -		return 0;
 -
 -	page = alloc_page(gfp);
 -	if (unlikely(!page)) {
 +	/* This would allocate the page on the same NUMA node the executing code
 +	 * is running on.
 +	 */
 +	page = dev_alloc_page();
 +	if (!page) {
    	ena_increase_stat(&rx_ring->rx_stats.page_alloc_fail, 1,
    			  &rx_ring->syncp);
 -		return -ENOMEM;
 +		return ERR_PTR(-ENOSPC);
    }
/* To enable NIC-side port-mirroring, AKA SPAN port,
     * we make the buffer readable from the nic as well
     */
 -	dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
 -			   DMA_BIDIRECTIONAL);
 -	if (unlikely(dma_mapping_error(rx_ring->dev, dma))) {
 +	*dma = dma_map_page(rx_ring->dev, page, 0, ENA_PAGE_SIZE,
 +			    DMA_BIDIRECTIONAL);
 +	if (unlikely(dma_mapping_error(rx_ring->dev, *dma))) {
    	ena_increase_stat(&rx_ring->rx_stats.dma_mapping_err, 1,
    			  &rx_ring->syncp);
 -
    	__free_page(page);
 -		return -EIO;
 +		return ERR_PTR(-EIO);
    }
 +
 +	return page;
 +}
 +
 +static int ena_alloc_rx_buffer(struct ena_ring *rx_ring,
 +			       struct ena_rx_buffer *rx_info)
 +{
 +	int headroom = rx_ring->rx_headroom;
 +	struct ena_com_buf *ena_buf;
 +	struct page *page;
 +	dma_addr_t dma;
 +	int tailroom;
 +
 +	/* restore page offset value in case it has been changed by device */
 +	rx_info->page_offset = headroom;
 +
 +	/* if previous allocated page is not used */
 +	if (unlikely(rx_info->page))
 +		return 0;
 +
 +	/* We handle DMA here */
 +	page = ena_alloc_map_page(rx_ring, &dma);
 +	if (unlikely(IS_ERR(page)))
 +		return PTR_ERR(page);
 +
    netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
    	  "Allocate page %p, rx_info %p\n", page, rx_info);
+	tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 +
    rx_info->page = page;
    ena_buf = &rx_info->ena_buf;
    ena_buf->paddr = dma + headroom;
 -	ena_buf->len = ENA_PAGE_SIZE - headroom;
 +	ena_buf->len = ENA_PAGE_SIZE - headroom - tailroom;
return 0;
  }
@@@ -1081,7 -1067,8 +1083,7 @@@ static int ena_refill_rx_bufs(struct en
rx_info = &rx_ring->rx_buffer_info[req_id];
-		rc = ena_alloc_rx_page(rx_ring, rx_info,
 -				       GFP_ATOMIC | __GFP_COMP);
 +		rc = ena_alloc_rx_buffer(rx_ring, rx_info);
    	if (unlikely(rc < 0)) {
    		netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev,
    			   "Failed to allocate buffer for rx queue %d\n",
@@@ -1399,23 -1386,21 +1401,23 @@@ static int ena_clean_tx_irq(struct ena_
    return tx_pkts;
  }
-static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, bool frags)
 +static struct sk_buff *ena_alloc_skb(struct ena_ring *rx_ring, void *first_frag)
  {
    struct sk_buff *skb;
-	if (frags)
 -		skb = napi_get_frags(rx_ring->napi);
 -	else
 +	if (!first_frag)
    	skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
    					rx_ring->rx_copybreak);
 +	else
 +		skb = build_skb(first_frag, ENA_PAGE_SIZE);
if (unlikely(!skb)) {
    	ena_increase_stat(&rx_ring->rx_stats.skb_alloc_fail, 1,
    			  &rx_ring->syncp);
 +
    	netif_dbg(rx_ring->adapter, rx_err, rx_ring->netdev,
 -			  "Failed to allocate skb. frags: %d\n", frags);
 +			  "Failed to allocate skb. first_frag %s\n",
 +			  first_frag ? "provided" : "not provided");
    	return NULL;
    }
@@@ -1427,12 -1412,10 +1429,12 @@@ static struct sk_buff *ena_rx_skb(struc
    			  u32 descs,
    			  u16 *next_to_clean)
  {
 -	struct sk_buff *skb;
    struct ena_rx_buffer *rx_info;
    u16 len, req_id, buf = 0;
 -	void *va;
 +	struct sk_buff *skb;
 +	void *page_addr;
 +	u32 page_offset;
 +	void *data_addr;
len = ena_bufs[buf].len;
    req_id = ena_bufs[buf].req_id;
@@@ -1450,14 -1433,12 +1452,14 @@@
    	  rx_info, rx_info->page);
/* save virt address of first buffer */
 -	va = page_address(rx_info->page) + rx_info->page_offset;
 +	page_addr = page_address(rx_info->page);
 +	page_offset = rx_info->page_offset;
 +	data_addr = page_addr + page_offset;
-	prefetch(va);
 +	prefetch(data_addr);
if (len <= rx_ring->rx_copybreak) {
 -		skb = ena_alloc_skb(rx_ring, false);
 +		skb = ena_alloc_skb(rx_ring, NULL);
    	if (unlikely(!skb))
    		return NULL;
@@@ -1470,7 -1451,7 +1472,7 @@@
    				dma_unmap_addr(&rx_info->ena_buf, paddr),
    				len,
    				DMA_FROM_DEVICE);
 -		skb_copy_to_linear_data(skb, va, len);
 +		skb_copy_to_linear_data(skb, data_addr, len);
    	dma_sync_single_for_device(rx_ring->dev,
    				   dma_unmap_addr(&rx_info->ena_buf, paddr),
    				   len,
@@@ -1484,18 -1465,16 +1486,18 @@@
    	return skb;
    }
-	skb = ena_alloc_skb(rx_ring, true);
 +	ena_unmap_rx_buff(rx_ring, rx_info);
 +
 +	skb = ena_alloc_skb(rx_ring, page_addr);
    if (unlikely(!skb))
    	return NULL;
-	do {
 -		ena_unmap_rx_buff(rx_ring, rx_info);
 -
 -		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
 -				rx_info->page_offset, len, ENA_PAGE_SIZE);
 +	/* Populate skb's linear part */
 +	skb_reserve(skb, page_offset);
 +	skb_put(skb, len);
 +	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+	do {
    	netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
    		  "RX skb updated. len %d. data_len %d\n",
    		  skb->len, skb->data_len);
@@@ -1514,12 -1493,6 +1516,12 @@@
    	req_id = ena_bufs[buf].req_id;
rx_info = &rx_ring->rx_buffer_info[req_id];
 +
 +		ena_unmap_rx_buff(rx_ring, rx_info);
 +
 +		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
 +				rx_info->page_offset, len, ENA_PAGE_SIZE);
 +
    } while (1);
return skb;
@@@ -1732,12 -1705,14 +1734,12 @@@ static int ena_clean_rx_irq(struct ena_
skb_record_rx_queue(skb, rx_ring->qid);
-		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) {
 -			total_len += rx_ring->ena_bufs[0].len;
 +		if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak)
    		rx_copybreak_pkt++;
 -			napi_gro_receive(napi, skb);
 -		} else {
 -			total_len += skb->len;
 -			napi_gro_frags(napi);
 -		}
 +
 +		total_len += skb->len;
 +
 +		napi_gro_receive(napi, skb);
res_budget--;
    } while (likely(res_budget));
@@@ -1949,6 -1924,9 +1951,6 @@@ static int ena_io_poll(struct napi_stru
    tx_ring = ena_napi->tx_ring;
    rx_ring = ena_napi->rx_ring;
-	tx_ring->first_interrupt = ena_napi->first_interrupt;
 -	rx_ring->first_interrupt = ena_napi->first_interrupt;
 -
    tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
@@@ -2003,8 -1981,6 +2005,8 @@@
    tx_ring->tx_stats.tx_poll++;
    u64_stats_update_end(&tx_ring->syncp);
+	tx_ring->tx_stats.last_napi_jiffies = jiffies;
 +
    return ret;
  }
@@@ -2029,8 -2005,7 +2031,8 @@@ static irqreturn_t ena_intr_msix_io(in
  {
    struct ena_napi *ena_napi = data;
-	ena_napi->first_interrupt = true;
 +	/* Used to check HW health */
 +	WRITE_ONCE(ena_napi->first_interrupt, true);
WRITE_ONCE(ena_napi->interrupts_masked, true);
    smp_wmb(); /* write interrupts_masked before calling napi */
@@@ -3116,11 -3091,14 +3118,11 @@@ static netdev_tx_t ena_start_xmit(struc
    	}
    }
-	if (netif_xmit_stopped(txq) || !netdev_xmit_more()) {
 -		/* trigger the dma engine. ena_com_write_sq_doorbell()
 -		 * has a mb
 +	if (netif_xmit_stopped(txq) || !netdev_xmit_more())
 +		/* trigger the dma engine. ena_ring_tx_doorbell()
 +		 * calls a memory barrier inside it.
    	 */
 -		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
 -		ena_increase_stat(&tx_ring->tx_stats.doorbells, 1,
 -				  &tx_ring->syncp);
 -	}
 +		ena_ring_tx_doorbell(tx_ring);
return NETDEV_TX_OK;
@@@ -3370,7 -3348,7 +3372,7 @@@ static int ena_set_queues_placement_pol
llq_feature_mask = 1 << ENA_ADMIN_LLQ;
    if (!(ena_dev->supported_features & llq_feature_mask)) {
 -		dev_err(&pdev->dev,
 +		dev_warn(&pdev->dev,
    		"LLQ is not supported Fallback to host mode policy.\n");
    	ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST;
    	return 0;
@@@ -3681,9 -3659,7 +3683,9 @@@ static void ena_fw_reset_device(struct 
  static int check_for_rx_interrupt_queue(struct ena_adapter *adapter,
    				struct ena_ring *rx_ring)
  {
 -	if (likely(rx_ring->first_interrupt))
 +	struct ena_napi *ena_napi = container_of(rx_ring->napi, struct ena_napi, napi);
 +
 +	if (likely(READ_ONCE(ena_napi->first_interrupt)))
    	return 0;
if (ena_com_cq_empty(rx_ring->ena_com_io_cq))
@@@ -3707,10 -3683,6 +3709,10 @@@
  static int check_missing_comp_in_tx_queue(struct ena_adapter *adapter,
    				  struct ena_ring *tx_ring)
  {
 +	struct ena_napi *ena_napi = container_of(tx_ring->napi, struct ena_napi, napi);
 +	unsigned int time_since_last_napi;
 +	unsigned int missing_tx_comp_to;
 +	bool is_tx_comp_time_expired;
    struct ena_tx_buffer *tx_buf;
    unsigned long last_jiffies;
    u32 missed_tx = 0;
@@@ -3724,10 -3696,8 +3726,10 @@@
    		/* no pending Tx at this location */
    		continue;
-		if (unlikely(!tx_ring->first_interrupt && time_is_before_jiffies(last_jiffies +
 -			     2 * adapter->missing_tx_completion_to))) {
 +		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
 +			 2 * adapter->missing_tx_completion_to);
 +
 +		if (unlikely(!READ_ONCE(ena_napi->first_interrupt) && is_tx_comp_time_expired)) {
    		/* If after graceful period interrupt is still not
    		 * received, we schedule a reset
    		 */
@@@ -3740,17 -3710,12 +3742,17 @@@
    		return -EIO;
    	}
-		if (unlikely(time_is_before_jiffies(last_jiffies +
 -				adapter->missing_tx_completion_to))) {
 -			if (!tx_buf->print_once)
 +		is_tx_comp_time_expired = time_is_before_jiffies(last_jiffies +
 +			adapter->missing_tx_completion_to);
 +
 +		if (unlikely(is_tx_comp_time_expired)) {
 +			if (!tx_buf->print_once) {
 +				time_since_last_napi = jiffies_to_usecs(jiffies - tx_ring->tx_stats.last_napi_jiffies);
 +				missing_tx_comp_to = jiffies_to_msecs(adapter->missing_tx_completion_to);
    			netif_notice(adapter, tx_err, adapter->netdev,
 -					     "Found a Tx that wasn't completed on time, qid %d, index %d.\n",
 -					     tx_ring->qid, i);
 +					     "Found a Tx that wasn't completed on time, qid %d, index %d. %u usecs have passed since last napi execution. Missing Tx timeout value %u msecs\n",
 +					     tx_ring->qid, i, time_since_last_napi, missing_tx_comp_to);
 +			}
tx_buf->print_once = 1;
    		missed_tx++;
@@@ -4281,7 -4246,7 +4283,7 @@@ static int ena_probe(struct pci_dev *pd
    adapter->ena_dev = ena_dev;
    adapter->netdev = netdev;
    adapter->pdev = pdev;
 -	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
 +	adapter->msg_enable = DEFAULT_MSG_ENABLE;
ena_dev->net_device = netdev;
diff --combined drivers/net/ethernet/atheros/alx/main.c
index 45e380f3b065,7748b276e5fd..11ef1fbe7aee
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@@ -1,5 -1,5 +1,5 @@@
  /*
 - * Copyright (c) 2013 Johannes Berg johannes@sipsolutions.net
 + * Copyright (c) 2013, 2021 Johannes Berg johannes@sipsolutions.net
   *
   *  This file is free software: you may copy, redistribute and/or modify it
   *  under the terms of the GNU General Public License as published by the
@@@ -1091,9 -1091,8 +1091,9 @@@ static int alx_init_sw(struct alx_priv 
    	      ALX_MAC_CTRL_RXFC_EN |
    	      ALX_MAC_CTRL_TXFC_EN |
    	      7 << ALX_MAC_CTRL_PRMBLEN_SHIFT;
 +	mutex_init(&alx->mtx);
-	return err;
 +	return 0;
  }
@@@ -1123,8 -1122,6 +1123,8 @@@ static void alx_halt(struct alx_priv *a
  {
    struct alx_hw *hw = &alx->hw;
+	lockdep_assert_held(&alx->mtx);
 +
    alx_netif_stop(alx);
    hw->link_speed = SPEED_UNKNOWN;
    hw->duplex = DUPLEX_UNKNOWN;
@@@ -1150,8 -1147,6 +1150,8 @@@ static void alx_configure(struct alx_pr
static void alx_activate(struct alx_priv *alx)
  {
 +	lockdep_assert_held(&alx->mtx);
 +
    /* hardware setting lost, restore it */
    alx_reinit_rings(alx);
    alx_configure(alx);
@@@ -1166,7 -1161,7 +1166,7 @@@
static void alx_reinit(struct alx_priv *alx)
  {
 -	ASSERT_RTNL();
 +	lockdep_assert_held(&alx->mtx);
alx_halt(alx);
    alx_activate(alx);
@@@ -1254,8 -1249,6 +1254,8 @@@ out_disable_adv_intr
static void __alx_stop(struct alx_priv *alx)
  {
 +	lockdep_assert_held(&alx->mtx);
 +
    alx_free_irq(alx);
cancel_work_sync(&alx->link_check_wk);
@@@ -1291,8 -1284,6 +1291,8 @@@ static void alx_check_link(struct alx_p
    int old_speed;
    int err;
+	lockdep_assert_held(&alx->mtx);
 +
    /* clear PHY internal interrupt status, otherwise the main
     * interrupt status will be asserted forever
     */
@@@ -1347,24 -1338,12 +1347,24 @@@ reset
static int alx_open(struct net_device *netdev)
  {
 -	return __alx_open(netdev_priv(netdev), false);
 +	struct alx_priv *alx = netdev_priv(netdev);
 +	int ret;
 +
 +	mutex_lock(&alx->mtx);
 +	ret = __alx_open(alx, false);
 +	mutex_unlock(&alx->mtx);
 +
 +	return ret;
  }
static int alx_stop(struct net_device *netdev)
  {
 -	__alx_stop(netdev_priv(netdev));
 +	struct alx_priv *alx = netdev_priv(netdev);
 +
 +	mutex_lock(&alx->mtx);
 +	__alx_stop(alx);
 +	mutex_unlock(&alx->mtx);
 +
    return 0;
  }
@@@ -1374,18 -1353,18 +1374,18 @@@ static void alx_link_check(struct work_
alx = container_of(work, struct alx_priv, link_check_wk);
-	rtnl_lock();
 +	mutex_lock(&alx->mtx);
    alx_check_link(alx);
 -	rtnl_unlock();
 +	mutex_unlock(&alx->mtx);
  }
static void alx_reset(struct work_struct *work)
  {
    struct alx_priv *alx = container_of(work, struct alx_priv, reset_wk);
-	rtnl_lock();
 +	mutex_lock(&alx->mtx);
    alx_reinit(alx);
 -	rtnl_unlock();
 +	mutex_unlock(&alx->mtx);
  }
static int alx_tpd_req(struct sk_buff *skb)
@@@ -1792,8 -1771,6 +1792,8 @@@ static int alx_probe(struct pci_dev *pd
    	goto out_unmap;
    }
+	mutex_lock(&alx->mtx);
 +
    alx_reset_pcie(hw);
phy_configured = alx_phy_configured(hw);
@@@ -1804,7 -1781,7 +1804,7 @@@
    err = alx_reset_mac(hw);
    if (err) {
    	dev_err(&pdev->dev, "MAC Reset failed, error = %d\n", err);
 -		goto out_unmap;
 +		goto out_unlock;
    }
/* setup link to put it in a known good starting state */
@@@ -1814,7 -1791,7 +1814,7 @@@
    		dev_err(&pdev->dev,
    			"failed to configure PHY speed/duplex (err=%d)\n",
    			err);
 -			goto out_unmap;
 +			goto out_unlock;
    	}
    }
@@@ -1847,11 -1824,9 +1847,11 @@@
    if (!alx_get_phy_info(hw)) {
    	dev_err(&pdev->dev, "failed to identify PHY\n");
    	err = -EIO;
 -		goto out_unmap;
 +		goto out_unlock;
    }
+	mutex_unlock(&alx->mtx);
 +
    INIT_WORK(&alx->link_check_wk, alx_link_check);
    INIT_WORK(&alx->reset_wk, alx_reset);
    netif_carrier_off(netdev);
@@@ -1868,14 -1843,13 +1868,15 @@@
return 0;
+out_unlock:
 +	mutex_unlock(&alx->mtx);
  out_unmap:
    iounmap(hw->hw_addr);
  out_free_netdev:
    free_netdev(netdev);
  out_pci_release:
    pci_release_mem_regions(pdev);
+ 	pci_disable_pcie_error_reporting(pdev);
  out_pci_disable:
    pci_disable_device(pdev);
    return err;
@@@ -1896,8 -1870,6 +1897,8 @@@ static void alx_remove(struct pci_dev *
    pci_disable_pcie_error_reporting(pdev);
    pci_disable_device(pdev);
+	mutex_destroy(&alx->mtx);
 +
    free_netdev(alx->dev);
  }
@@@ -1909,11 -1881,7 +1910,11 @@@ static int alx_suspend(struct device *d
    if (!netif_running(alx->dev))
    	return 0;
    netif_device_detach(alx->dev);
 +
 +	mutex_lock(&alx->mtx);
    __alx_stop(alx);
 +	mutex_unlock(&alx->mtx);
 +
    return 0;
  }
@@@ -1923,23 -1891,20 +1924,23 @@@ static int alx_resume(struct device *de
    struct alx_hw *hw = &alx->hw;
    int err;
+	mutex_lock(&alx->mtx);
    alx_reset_phy(hw);
-	if (!netif_running(alx->dev))
 -		return 0;
 +	if (!netif_running(alx->dev)) {
 +		err = 0;
 +		goto unlock;
 +	}
-	rtnl_lock();
    err = __alx_open(alx, true);
 -	rtnl_unlock();
    if (err)
 -		return err;
 +		goto unlock;
netif_device_attach(alx->dev);
-	return 0;
 +unlock:
 +	mutex_unlock(&alx->mtx);
 +	return err;
  }
static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume);
@@@ -1958,7 -1923,7 +1959,7 @@@ static pci_ers_result_t alx_pci_error_d
dev_info(&pdev->dev, "pci error detected\n");
-	rtnl_lock();
 +	mutex_lock(&alx->mtx);
if (netif_running(netdev)) {
    	netif_device_detach(netdev);
@@@ -1970,7 -1935,7 +1971,7 @@@
    else
    	pci_disable_device(pdev);
-	rtnl_unlock();
 +	mutex_unlock(&alx->mtx);
return rc;
  }
@@@ -1983,7 -1948,7 +1984,7 @@@ static pci_ers_result_t alx_pci_error_s
dev_info(&pdev->dev, "pci error slot reset\n");
-	rtnl_lock();
 +	mutex_lock(&alx->mtx);
if (pci_enable_device(pdev)) {
    	dev_err(&pdev->dev, "Failed to re-enable PCI device after reset\n");
@@@ -1996,7 -1961,7 +1997,7 @@@
    if (!alx_reset_mac(hw))
    	rc = PCI_ERS_RESULT_RECOVERED;
  out:
 -	rtnl_unlock();
 +	mutex_unlock(&alx->mtx);
return rc;
  }
@@@ -2008,14 -1973,14 +2009,14 @@@ static void alx_pci_error_resume(struc
dev_info(&pdev->dev, "pci error resume\n");
-	rtnl_lock();
 +	mutex_lock(&alx->mtx);
if (netif_running(netdev)) {
    	alx_activate(alx);
    	netif_device_attach(netdev);
    }
-	rtnl_unlock();
 +	mutex_unlock(&alx->mtx);
  }
static const struct pci_error_handlers alx_err_handlers = {
diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 6479ceedc352,762113a04dde..9a2b166d651e
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@@ -3894,6 -3894,7 +3894,6 @@@ static const struct net_device_ops cxgb
    .ndo_set_vf_vlan        = cxgb4_mgmt_set_vf_vlan,
    .ndo_set_vf_link_state	= cxgb4_mgmt_set_vf_link_state,
  };
 -#endif
static void cxgb4_mgmt_get_drvinfo(struct net_device *dev,
    			   struct ethtool_drvinfo *info)
@@@ -3908,7 -3909,6 +3908,7 @@@
  static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
    .get_drvinfo       = cxgb4_mgmt_get_drvinfo,
  };
 +#endif
static void notify_fatal_err(struct work_struct *work)
  {
@@@ -4424,10 -4424,8 +4424,8 @@@ static int adap_init0_phy(struct adapte
/* Load PHY Firmware onto adapter.
     */
- 	spin_lock_bh(&adap->win0_lock);
    ret = t4_load_phy_fw(adap, MEMWIN_NIC, phy_info->phy_fw_version,
    		     (u8 *)phyf->data, phyf->size);
- 	spin_unlock_bh(&adap->win0_lock);
    if (ret < 0)
    	dev_err(adap->pdev_dev, "PHY Firmware transfer error %d\n",
    		-ret);
diff --combined drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 9e3ea5f7be2e,a0555f4d76fc..6606fb8b3e42
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@@ -3060,16 -3060,19 +3060,19 @@@ int t4_read_flash(struct adapter *adapt
   *	@addr: the start address to write
   *	@n: length of data to write in bytes
   *	@data: the data to write
+  *	@byte_oriented: whether to store data as bytes or as words
   *
   *	Writes up to a page of data (256 bytes) to the serial flash starting
   *	at the given address.  All the data must be written to the same page.
+  *	If @byte_oriented is set the write data is stored as byte stream
+  *	(i.e. matches what on disk), otherwise in big-endian.
   */
  static int t4_write_flash(struct adapter *adapter, unsigned int addr,
- 			  unsigned int n, const u8 *data)
+ 			  unsigned int n, const u8 *data, bool byte_oriented)
  {
- 	int ret;
- 	u32 buf[64];
    unsigned int i, c, left, val, offset = addr & 0xff;
+ 	u32 buf[64];
+ 	int ret;
if (addr >= adapter->params.sf_size || offset + n > SF_PAGE_SIZE)
    	return -EINVAL;
@@@ -3080,10 -3083,14 +3083,14 @@@
        (ret = sf1_write(adapter, 4, 1, 1, val)) != 0)
    	goto unlock;
- 	for (left = n; left; left -= c) {
+ 	for (left = n; left; left -= c, data += c) {
    	c = min(left, 4U);
- 		for (val = 0, i = 0; i < c; ++i)
- 			val = (val << 8) + *data++;
+ 		for (val = 0, i = 0; i < c; ++i) {
+ 			if (byte_oriented)
+ 				val = (val << 8) + data[i];
+ 			else
+ 				val = (val << 8) + data[c - i - 1];
+ 		}
ret = sf1_write(adapter, c, c != left, 1, val);
    	if (ret)
@@@ -3096,7 -3103,8 +3103,8 @@@
    t4_write_reg(adapter, SF_OP_A, 0);    /* unlock SF */
/* Read the page to verify the write succeeded */
- 	ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf, 1);
+ 	ret = t4_read_flash(adapter, addr & ~0xff, ARRAY_SIZE(buf), buf,
+ 			    byte_oriented);
    if (ret)
    	return ret;
@@@ -3692,7 -3700,7 +3700,7 @@@ int t4_load_fw(struct adapter *adap, co
     */
    memcpy(first_page, fw_data, SF_PAGE_SIZE);
    ((struct fw_hdr *)first_page)->fw_ver = cpu_to_be32(0xffffffff);
- 	ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page);
+ 	ret = t4_write_flash(adap, fw_start, SF_PAGE_SIZE, first_page, true);
    if (ret)
    	goto out;
@@@ -3700,14 -3708,14 +3708,14 @@@
    for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
    	addr += SF_PAGE_SIZE;
    	fw_data += SF_PAGE_SIZE;
- 		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data);
+ 		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, fw_data, true);
    	if (ret)
    		goto out;
    }
- 	ret = t4_write_flash(adap,
- 			     fw_start + offsetof(struct fw_hdr, fw_ver),
- 			     sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver);
+ 	ret = t4_write_flash(adap, fw_start + offsetof(struct fw_hdr, fw_ver),
+ 			     sizeof(hdr->fw_ver), (const u8 *)&hdr->fw_ver,
+ 			     true);
  out:
    if (ret)
    	dev_err(adap->pdev_dev, "firmware download failed, error %d\n",
@@@ -3812,9 -3820,11 +3820,11 @@@ int t4_load_phy_fw(struct adapter *adap
    /* Copy the supplied PHY Firmware image to the adapter memory location
     * allocated by the adapter firmware.
     */
+ 	spin_lock_bh(&adap->win0_lock);
    ret = t4_memory_rw(adap, win, mtype, maddr,
    		   phy_fw_size, (__be32 *)phy_fw_data,
    		   T4_MEMORY_WRITE);
+ 	spin_unlock_bh(&adap->win0_lock);
    if (ret)
    	return ret;
@@@ -6983,7 -6993,7 +6993,7 @@@ int t4_fw_bye(struct adapter *adap, uns
  }
/**
 - *	t4_init_cmd - ask FW to initialize the device
 + *	t4_early_init - ask FW to initialize the device
   *	@adap: the adapter
   *	@mbox: mailbox to use for the FW command
   *
@@@ -7782,6 -7792,7 +7792,6 @@@ int t4_free_encap_mac_filt(struct adapt
    		   int idx, bool sleep_ok)
  {
    struct fw_vi_mac_exact *p;
 -	u8 addr[] = {0, 0, 0, 0, 0, 0};
    struct fw_vi_mac_cmd c;
    int ret = 0;
    u32 exact;
@@@ -7798,7 -7809,7 +7808,7 @@@
    p = c.u.exact;
    p->valid_to_idx = cpu_to_be16(FW_VI_MAC_CMD_VALID_F |
    			      FW_VI_MAC_CMD_IDX_V(idx));
 -	memcpy(p->macaddr, addr, sizeof(p->macaddr));
 +	eth_zero_addr(p->macaddr);
    ret = t4_wr_mbox_meat(adap, adap->mbox, &c, sizeof(c), &c, sleep_ok);
    return ret;
  }
@@@ -10207,7 -10218,7 +10217,7 @@@ int t4_load_cfg(struct adapter *adap, c
    		n = size - i;
    	else
    		n = SF_PAGE_SIZE;
- 		ret = t4_write_flash(adap, addr, n, cfg_data);
+ 		ret = t4_write_flash(adap, addr, n, cfg_data, true);
    	if (ret)
    		goto out;
@@@ -10223,7 -10234,7 +10233,7 @@@ out
  }
/**
 - *	t4_set_vf_mac - Set MAC address for the specified VF
 + *	t4_set_vf_mac_acl - Set MAC address for the specified VF
   *	@adapter: The adapter
   *	@vf: one of the VFs instantiated by the specified PF
   *	@naddr: the number of MAC addresses
@@@ -10676,13 -10687,14 +10686,14 @@@ int t4_load_boot(struct adapter *adap, 
    for (size -= SF_PAGE_SIZE; size; size -= SF_PAGE_SIZE) {
    	addr += SF_PAGE_SIZE;
    	boot_data += SF_PAGE_SIZE;
- 		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data);
+ 		ret = t4_write_flash(adap, addr, SF_PAGE_SIZE, boot_data,
+ 				     false);
    	if (ret)
    		goto out;
    }
ret = t4_write_flash(adap, boot_sector, SF_PAGE_SIZE,
- 			     (const u8 *)header);
+ 			     (const u8 *)header, false);
out:
    if (ret)
@@@ -10757,7 -10769,7 +10768,7 @@@ int t4_load_bootcfg(struct adapter *ada
    for (i = 0; i < size; i += SF_PAGE_SIZE) {
    	n = min_t(u32, size - i, SF_PAGE_SIZE);
- 		ret = t4_write_flash(adap, addr, n, cfg_data);
+ 		ret = t4_write_flash(adap, addr, n, cfg_data, false);
    	if (ret)
    		goto out;
@@@ -10769,7 -10781,8 +10780,8 @@@
    for (i = 0; i < npad; i++) {
    	u8 data = 0;
- 		ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data);
+ 		ret = t4_write_flash(adap, cfg_addr + size + i, 1, &data,
+ 				     false);
    	if (ret)
    		goto out;
    }
diff --combined drivers/net/ethernet/intel/ice/ice_lib.c
index cb858be8f4de,27f9dac8719c..dde9802c6c72
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@@ -169,13 -169,12 +169,13 @@@ static void ice_vsi_set_num_qs(struct i
switch (vsi->type) {
    case ICE_VSI_PF:
 -		vsi->alloc_txq = min3(pf->num_lan_msix,
 -				      ice_get_avail_txq_count(pf),
 -				      (u16)num_online_cpus());
    	if (vsi->req_txq) {
    		vsi->alloc_txq = vsi->req_txq;
    		vsi->num_txq = vsi->req_txq;
 +		} else {
 +			vsi->alloc_txq = min3(pf->num_lan_msix,
 +					      ice_get_avail_txq_count(pf),
 +					      (u16)num_online_cpus());
    	}
pf->num_lan_tx = vsi->alloc_txq;
@@@ -184,13 -183,12 +184,13 @@@
    	if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
    		vsi->alloc_rxq = 1;
    	} else {
 -			vsi->alloc_rxq = min3(pf->num_lan_msix,
 -					      ice_get_avail_rxq_count(pf),
 -					      (u16)num_online_cpus());
    		if (vsi->req_rxq) {
    			vsi->alloc_rxq = vsi->req_rxq;
    			vsi->num_rxq = vsi->req_rxq;
 +			} else {
 +				vsi->alloc_rxq = min3(pf->num_lan_msix,
 +						      ice_get_avail_rxq_count(pf),
 +						      (u16)num_online_cpus());
    		}
    	}
@@@ -630,17 -628,6 +630,17 @@@ bool ice_is_safe_mode(struct ice_pf *pf
    return !test_bit(ICE_FLAG_ADV_FEATURES, pf->flags);
  }
+/**
 + * ice_is_aux_ena
 + * @pf: pointer to the PF struct
 + *
 + * returns true if AUX devices/drivers are supported, false otherwise
 + */
 +bool ice_is_aux_ena(struct ice_pf *pf)
 +{
 +	return test_bit(ICE_FLAG_AUX_ENA, pf->flags);
 +}
 +
  /**
   * ice_vsi_clean_rss_flow_fld - Delete RSS configuration
   * @vsi: the VSI being cleaned up
@@@ -1205,11 -1192,11 +1205,11 @@@ static int ice_vsi_setup_vector_base(st
    num_q_vectors = vsi->num_q_vectors;
    /* reserve slots from OS requested IRQs */
    if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
 -		struct ice_vf *vf;
    	int i;
ice_for_each_vf(pf, i) {
 -			vf = &pf->vf[i];
 +			struct ice_vf *vf = &pf->vf[i];
 +
    		if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) {
    			base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
    			break;
@@@ -1298,7 -1285,6 +1298,7 @@@ static int ice_vsi_alloc_rings(struct i
    	ring->reg_idx = vsi->txq_map[i];
    	ring->ring_active = false;
    	ring->vsi = vsi;
 +		ring->tx_tstamps = &pf->ptp.port.tx;
    	ring->dev = dev;
    	ring->count = vsi->num_tx_desc;
    	WRITE_ONCE(vsi->tx_rings[i], ring);
@@@ -1676,11 -1662,9 +1676,11 @@@ void ice_vsi_cfg_frame_size(struct ice_
   * @pf_q: index of the Rx queue in the PF's queue space
   * @rxdid: flexible descriptor RXDID
   * @prio: priority for the RXDID for this queue
 + * @ena_ts: true to enable timestamp and false to disable timestamp
   */
  void
 -ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio)
 +ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio,
 +			bool ena_ts)
  {
    int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
@@@ -1695,40 -1679,9 +1695,40 @@@
    regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) &
    	QRXFLXP_CNTXT_RXDID_PRIO_M;
+	if (ena_ts)
 +		/* Enable TimeSync on this queue */
 +		regval |= QRXFLXP_CNTXT_TS_M;
 +
    wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
  }
+int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx)
 +{
 +	if (q_idx >= vsi->num_rxq)
 +		return -EINVAL;
 +
 +	return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]);
 +}
 +
 +int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx)
 +{
 +	struct ice_aqc_add_tx_qgrp *qg_buf;
 +	int err;
 +
 +	if (q_idx >= vsi->alloc_txq || !tx_rings || !tx_rings[q_idx])
 +		return -EINVAL;
 +
 +	qg_buf = kzalloc(struct_size(qg_buf, txqs, 1), GFP_KERNEL);
 +	if (!qg_buf)
 +		return -ENOMEM;
 +
 +	qg_buf->num_txqs = 1;
 +
 +	err = ice_vsi_cfg_txq(vsi, tx_rings[q_idx], qg_buf);
 +	kfree(qg_buf);
 +	return err;
 +}
 +
  /**
   * ice_vsi_cfg_rxqs - Configure the VSI for Rx
   * @vsi: the VSI being configured
@@@ -1746,11 -1699,15 +1746,11 @@@ int ice_vsi_cfg_rxqs(struct ice_vsi *vs
    ice_vsi_cfg_frame_size(vsi);
  setup_rings:
    /* set up individual rings */
 -	for (i = 0; i < vsi->num_rxq; i++) {
 -		int err;
 +	ice_for_each_rxq(vsi, i) {
 +		int err = ice_vsi_cfg_rxq(vsi->rx_rings[i]);
-		err = ice_setup_rx_ctx(vsi->rx_rings[i]);
 -		if (err) {
 -			dev_err(ice_pf_to_dev(vsi->back), "ice_setup_rx_ctx failed for RxQ %d, err %d\n",
 -				i, err);
 +		if (err)
    		return err;
 -		}
    }
return 0;
@@@ -1760,12 -1717,13 +1760,13 @@@
   * ice_vsi_cfg_txqs - Configure the VSI for Tx
   * @vsi: the VSI being configured
   * @rings: Tx ring array to be configured
+  * @count: number of Tx ring array elements
   *
   * Return 0 on success and a negative value on error
   * Configure the Tx VSI for operation.
   */
  static int
- ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings)
+ ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count)
  {
    struct ice_aqc_add_tx_qgrp *qg_buf;
    u16 q_idx = 0;
@@@ -1777,7 -1735,7 +1778,7 @@@
qg_buf->num_txqs = 1;
- 	for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+ 	for (q_idx = 0; q_idx < count; q_idx++) {
    	err = ice_vsi_cfg_txq(vsi, rings[q_idx], qg_buf);
    	if (err)
    		goto err_cfg_txqs;
@@@ -1797,7 -1755,7 +1798,7 @@@ err_cfg_txqs
   */
  int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi)
  {
- 	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings);
+ 	return ice_vsi_cfg_txqs(vsi, vsi->tx_rings, vsi->num_txq);
  }
/**
@@@ -1812,7 -1770,7 +1813,7 @@@ int ice_vsi_cfg_xdp_txqs(struct ice_vs
    int ret;
    int i;
- 	ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings);
+ 	ret = ice_vsi_cfg_txqs(vsi, vsi->xdp_rings, vsi->num_xdp_txq);
    if (ret)
    	return ret;
@@@ -2052,17 -2010,18 +2053,18 @@@ int ice_vsi_stop_all_rx_rings(struct ic
   * @rst_src: reset source
   * @rel_vmvf_num: Relative ID of VF/VM
   * @rings: Tx ring array to be stopped
+  * @count: number of Tx ring array elements
   */
  static int
  ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
- 		      u16 rel_vmvf_num, struct ice_ring **rings)
+ 		      u16 rel_vmvf_num, struct ice_ring **rings, u16 count)
  {
    u16 q_idx;
if (vsi->num_txq > ICE_LAN_TXQ_MAX_QDIS)
    	return -EINVAL;
- 	for (q_idx = 0; q_idx < vsi->num_txq; q_idx++) {
+ 	for (q_idx = 0; q_idx < count; q_idx++) {
    	struct ice_txq_meta txq_meta = { };
    	int status;
@@@ -2090,7 -2049,7 +2092,7 @@@ in
  ice_vsi_stop_lan_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
    		  u16 rel_vmvf_num)
  {
- 	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings);
+ 	return ice_vsi_stop_tx_rings(vsi, rst_src, rel_vmvf_num, vsi->tx_rings, vsi->num_txq);
  }
/**
@@@ -2099,7 -2058,7 +2101,7 @@@
   */
  int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi)
  {
- 	return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings);
+ 	return ice_vsi_stop_tx_rings(vsi, ICE_NO_RESET, 0, vsi->xdp_rings, vsi->num_xdp_txq);
  }
/**
@@@ -2258,7 -2217,7 +2260,7 @@@ void ice_cfg_sw_lldp(struct ice_vsi *vs
    }
if (status)
 -		dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
 +		dev_dbg(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
    		create ? "adding" : "removing", tx ? "TX" : "RX",
    		vsi->vsi_num, ice_stat_str(status));
  }
@@@ -2873,11 -2832,11 +2875,11 @@@ int ice_vsi_release(struct ice_vsi *vsi
     * cleared in the same manner.
     */
    if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
 -		struct ice_vf *vf;
    	int i;
ice_for_each_vf(pf, i) {
 -			vf = &pf->vf[i];
 +			struct ice_vf *vf = &pf->vf[i];
 +
    		if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI)
    			break;
    	}
@@@ -3237,34 -3196,6 +3239,34 @@@ bool ice_is_reset_in_progress(unsigned 
           test_bit(ICE_GLOBR_REQ, state);
  }
+/**
 + * ice_wait_for_reset - Wait for driver to finish reset and rebuild
 + * @pf: pointer to the PF structure
 + * @timeout: length of time to wait, in jiffies
 + *
 + * Wait (sleep) for a short time until the driver finishes cleaning up from
 + * a device reset. The caller must be able to sleep. Use this to delay
 + * operations that could fail while the driver is cleaning up after a device
 + * reset.
 + *
 + * Returns 0 on success, -EBUSY if the reset is not finished within the
 + * timeout, and -ERESTARTSYS if the thread was interrupted.
 + */
 +int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout)
 +{
 +	long ret;
 +
 +	ret = wait_event_interruptible_timeout(pf->reset_wait_queue,
 +					       !ice_is_reset_in_progress(pf->state),
 +					       timeout);
 +	if (ret < 0)
 +		return ret;
 +	else if (!ret)
 +		return -EBUSY;
 +	else
 +		return 0;
 +}
 +
  #ifdef CONFIG_DCB
  /**
   * ice_vsi_update_q_map - update our copy of the VSI info with new queue map
@@@ -3399,22 -3330,13 +3401,22 @@@ int ice_status_to_errno(enum ice_statu
    case ICE_ERR_DOES_NOT_EXIST:
    	return -ENOENT;
    case ICE_ERR_OUT_OF_RANGE:
 -		return -ENOTTY;
 +	case ICE_ERR_AQ_ERROR:
 +	case ICE_ERR_AQ_TIMEOUT:
 +	case ICE_ERR_AQ_EMPTY:
 +	case ICE_ERR_AQ_FW_CRITICAL:
 +		return -EIO;
    case ICE_ERR_PARAM:
 +	case ICE_ERR_INVAL_SIZE:
    	return -EINVAL;
    case ICE_ERR_NO_MEMORY:
    	return -ENOMEM;
    case ICE_ERR_MAX_LIMIT:
    	return -EAGAIN;
 +	case ICE_ERR_RESET_ONGOING:
 +		return -EBUSY;
 +	case ICE_ERR_AQ_FULL:
 +		return -ENOSPC;
    default:
    	return -EINVAL;
    }
diff --combined drivers/net/ethernet/intel/ice/ice_main.c
index 5ca6c0356499,0eb2307325d3..5c3ea504770a
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@@ -35,8 -35,6 +35,8 @@@ MODULE_PARM_DESC(debug, "netif level (0
  MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)");
  #endif /* !CONFIG_DYNAMIC_DEBUG */
+static DEFINE_IDA(ice_aux_ida);
 +
  static struct workqueue_struct *ice_wq;
  static const struct net_device_ops ice_netdev_safe_mode_ops;
  static const struct net_device_ops ice_netdev_ops;
@@@ -456,8 -454,6 +456,8 @@@ ice_prepare_for_reset(struct ice_pf *pf
    if (test_bit(ICE_PREPARED_FOR_RESET, pf->state))
    	return;
+	ice_unplug_aux_dev(pf);
 +
    /* Notify VFs of impending reset */
    if (ice_check_sq_alive(hw, &hw->mailboxq))
    	ice_vc_notify_reset(pf);
@@@ -471,9 -467,6 +471,9 @@@
    /* disable the VSIs and their queues that are not already DOWN */
    ice_pf_dis_all_vsi(pf, false);
+	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
 +		ice_ptp_release(pf);
 +
    if (hw->port_info)
    	ice_sched_clear_port(hw->port_info);
@@@ -506,7 -499,6 +506,7 @@@ static void ice_do_reset(struct ice_pf 
    	clear_bit(ICE_PFR_REQ, pf->state);
    	clear_bit(ICE_CORER_REQ, pf->state);
    	clear_bit(ICE_GLOBR_REQ, pf->state);
 +		wake_up(&pf->reset_wait_queue);
    	return;
    }
@@@ -519,7 -511,6 +519,7 @@@
    	ice_rebuild(pf, reset_type);
    	clear_bit(ICE_PREPARED_FOR_RESET, pf->state);
    	clear_bit(ICE_PFR_REQ, pf->state);
 +		wake_up(&pf->reset_wait_queue);
    	ice_reset_all_vfs(pf, true);
    }
  }
@@@ -570,7 -561,6 +570,7 @@@ static void ice_reset_subtask(struct ic
    		clear_bit(ICE_PFR_REQ, pf->state);
    		clear_bit(ICE_CORER_REQ, pf->state);
    		clear_bit(ICE_GLOBR_REQ, pf->state);
 +			wake_up(&pf->reset_wait_queue);
    		ice_reset_all_vfs(pf, true);
    	}
@@@ -867,38 -857,6 +867,38 @@@ static void ice_set_dflt_mib(struct ice
    kfree(lldpmib);
  }
+/**
 + * ice_check_module_power
 + * @pf: pointer to PF struct
 + * @link_cfg_err: bitmap from the link info structure
 + *
 + * check module power level returned by a previous call to aq_get_link_info
 + * and print error messages if module power level is not supported
 + */
 +static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err)
 +{
 +	/* if module power level is supported, clear the flag */
 +	if (!(link_cfg_err & (ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT |
 +			      ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED))) {
 +		clear_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
 +		return;
 +	}
 +
 +	/* if ICE_FLAG_MOD_POWER_UNSUPPORTED was previously set and the
 +	 * above block didn't clear this bit, there's nothing to do
 +	 */
 +	if (test_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags))
 +		return;
 +
 +	if (link_cfg_err & ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT) {
 +		dev_err(ice_pf_to_dev(pf), "The installed module is incompatible with the device's NVM image. Cannot start link\n");
 +		set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
 +	} else if (link_cfg_err & ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED) {
 +		dev_err(ice_pf_to_dev(pf), "The module's power requirements exceed the device's power supply. Cannot start link\n");
 +		set_bit(ICE_FLAG_MOD_POWER_UNSUPPORTED, pf->flags);
 +	}
 +}
 +
  /**
   * ice_link_event - process the link event
   * @pf: PF that the link event is associated with
@@@ -934,8 -892,6 +934,8 @@@ ice_link_event(struct ice_pf *pf, struc
    		pi->lport, ice_stat_str(status),
    		ice_aq_str(pi->hw->adminq.sq_last_status));
+	ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
 +
    /* Check if the link state is up after updating link info, and treat
     * this event as an UP event since the link is actually UP now.
     */
@@@ -1234,10 -1190,6 +1234,10 @@@ static int __ice_clean_ctrlq(struct ice
    	cq = &hw->adminq;
    	qtype = "Admin";
    	break;
 +	case ICE_CTL_Q_SB:
 +		cq = &hw->sbq;
 +		qtype = "Sideband";
 +		break;
    case ICE_CTL_Q_MAILBOX:
    	cq = &hw->mailboxq;
    	qtype = "Mailbox";
@@@ -1411,34 -1363,6 +1411,34 @@@ static void ice_clean_mailboxq_subtask(
    ice_flush(hw);
  }
+/**
 + * ice_clean_sbq_subtask - clean the Sideband Queue rings
 + * @pf: board private structure
 + */
 +static void ice_clean_sbq_subtask(struct ice_pf *pf)
 +{
 +	struct ice_hw *hw = &pf->hw;
 +
 +	/* Nothing to do here if sideband queue is not supported */
 +	if (!ice_is_sbq_supported(hw)) {
 +		clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
 +		return;
 +	}
 +
 +	if (!test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state))
 +		return;
 +
 +	if (__ice_clean_ctrlq(pf, ICE_CTL_Q_SB))
 +		return;
 +
 +	clear_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
 +
 +	if (ice_ctrlq_pending(hw, &hw->sbq))
 +		__ice_clean_ctrlq(pf, ICE_CTL_Q_SB);
 +
 +	ice_flush(hw);
 +}
 +
  /**
   * ice_service_task_schedule - schedule the service task to wake up
   * @pf: board private structure
@@@ -2082,8 -2006,6 +2082,8 @@@ static void ice_check_media_subtask(str
    if (err)
    	return;
+	ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
 +
    if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
    	if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state))
    		ice_init_phy_user_cfg(pi);
@@@ -2141,7 -2063,6 +2141,7 @@@ static void ice_service_task(struct wor
ice_process_vflr_event(pf);
    ice_clean_mailboxq_subtask(pf);
 +	ice_clean_sbq_subtask(pf);
    ice_sync_arfs_fltrs(pf);
    ice_flush_fdir_ctx(pf);
@@@ -2157,7 -2078,6 +2157,7 @@@
        test_bit(ICE_VFLR_EVENT_PENDING, pf->state) ||
        test_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
        test_bit(ICE_FD_VF_FLUSH_CTX, pf->state) ||
 +	    test_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state) ||
        test_bit(ICE_ADMINQ_EVENT_PENDING, pf->state))
    	mod_timer(&pf->serv_tmr, jiffies);
  }
@@@ -2176,10 -2096,6 +2176,10 @@@ static void ice_set_ctrlq_len(struct ic
    hw->mailboxq.num_sq_entries = ICE_MBXSQ_LEN;
    hw->mailboxq.rq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
    hw->mailboxq.sq_buf_size = ICE_MBXQ_MAX_BUF_LEN;
 +	hw->sbq.num_rq_entries = ICE_SBQ_LEN;
 +	hw->sbq.num_sq_entries = ICE_SBQ_LEN;
 +	hw->sbq.rq_buf_size = ICE_SBQ_MAX_BUF_LEN;
 +	hw->sbq.sq_buf_size = ICE_SBQ_MAX_BUF_LEN;
  }
/**
@@@ -2202,8 -2118,6 +2202,8 @@@ int ice_schedule_reset(struct ice_pf *p
    	return -EBUSY;
    }
+	ice_unplug_aux_dev(pf);
 +
    switch (reset) {
    case ICE_RESET_PFR:
    	set_bit(ICE_PFR_REQ, pf->state);
@@@ -2641,6 -2555,20 +2641,20 @@@ ice_xdp_setup_prog(struct ice_vsi *vsi
    return (ret || xdp_ring_err) ? -ENOMEM : 0;
  }
+ /**
+  * ice_xdp_safe_mode - XDP handler for safe mode
+  * @dev: netdevice
+  * @xdp: XDP command
+  */
+ static int ice_xdp_safe_mode(struct net_device __always_unused *dev,
+ 			     struct netdev_bpf *xdp)
+ {
+ 	NL_SET_ERR_MSG_MOD(xdp->extack,
+ 			   "Please provide working DDP firmware package in order to use XDP\n"
+ 			   "Refer to Documentation/networking/device_drivers/ethernet/intel/ice.rst");
+ 	return -EOPNOTSUPP;
+ }
+ 
  /**
   * ice_xdp - implements XDP handler
   * @dev: netdevice
@@@ -2694,7 -2622,6 +2708,7 @@@ static void ice_ena_misc_vector(struct 
           PFINT_OICR_PCI_EXCEPTION_M |
           PFINT_OICR_VFLR_M |
           PFINT_OICR_HMC_ERR_M |
 +	       PFINT_OICR_PE_PUSH_M |
           PFINT_OICR_PE_CRITERR_M);
wr32(hw, PFINT_OICR_ENA, val);
@@@ -2720,7 -2647,6 +2734,7 @@@ static irqreturn_t ice_misc_intr(int __
    dev = ice_pf_to_dev(pf);
    set_bit(ICE_ADMINQ_EVENT_PENDING, pf->state);
    set_bit(ICE_MAILBOXQ_EVENT_PENDING, pf->state);
 +	set_bit(ICE_SIDEBANDQ_EVENT_PENDING, pf->state);
oicr = rd32(hw, PFINT_OICR);
    ena_mask = rd32(hw, PFINT_OICR_ENA);
@@@ -2766,6 -2692,8 +2780,6 @@@
/* If a reset cycle isn't already in progress, we set a bit in
    	 * pf->state so that the service task can start a reset/rebuild.
 -		 * We also make note of which reset happened so that peer
 -		 * devices/drivers can be informed.
    	 */
    	if (!test_and_set_bit(ICE_RESET_OICR_RECV, pf->state)) {
    		if (reset == ICE_RESET_CORER)
@@@ -2792,24 -2720,11 +2806,24 @@@
    	}
    }
-	if (oicr & PFINT_OICR_HMC_ERR_M) {
 -		ena_mask &= ~PFINT_OICR_HMC_ERR_M;
 -		dev_dbg(dev, "HMC Error interrupt - info 0x%x, data 0x%x\n",
 -			rd32(hw, PFHMC_ERRORINFO),
 -			rd32(hw, PFHMC_ERRORDATA));
 +	if (oicr & PFINT_OICR_TSYN_TX_M) {
 +		ena_mask &= ~PFINT_OICR_TSYN_TX_M;
 +		ice_ptp_process_ts(pf);
 +	}
 +
 +#define ICE_AUX_CRIT_ERR (PFINT_OICR_PE_CRITERR_M | PFINT_OICR_HMC_ERR_M | PFINT_OICR_PE_PUSH_M)
 +	if (oicr & ICE_AUX_CRIT_ERR) {
 +		struct iidc_event *event;
 +
 +		ena_mask &= ~ICE_AUX_CRIT_ERR;
 +		event = kzalloc(sizeof(*event), GFP_KERNEL);
 +		if (event) {
 +			set_bit(IIDC_EVENT_CRIT_ERR, event->type);
 +			/* report the entire OICR value to AUX driver */
 +			event->reg = oicr;
 +			ice_send_event_to_aux(pf, event);
 +			kfree(event);
 +		}
    }
/* Report any remaining unexpected interrupts */
@@@ -2819,7 -2734,8 +2833,7 @@@
    	/* If a critical error is pending there is no choice but to
    	 * reset the device.
    	 */
 -		if (oicr & (PFINT_OICR_PE_CRITERR_M |
 -			    PFINT_OICR_PCI_EXCEPTION_M |
 +		if (oicr & (PFINT_OICR_PCI_EXCEPTION_M |
    		    PFINT_OICR_ECC_ERR_M)) {
    		set_bit(ICE_PFR_REQ, pf->state);
    		ice_service_task_schedule(pf);
@@@ -2847,9 -2763,6 +2861,9 @@@ static void ice_dis_ctrlq_interrupts(st
    wr32(hw, PFINT_MBX_CTL,
         rd32(hw, PFINT_MBX_CTL) & ~PFINT_MBX_CTL_CAUSE_ENA_M);
+	wr32(hw, PFINT_SB_CTL,
 +	     rd32(hw, PFINT_SB_CTL) & ~PFINT_SB_CTL_CAUSE_ENA_M);
 +
    /* disable Control queue Interrupt causes */
    wr32(hw, PFINT_OICR_CTL,
         rd32(hw, PFINT_OICR_CTL) & ~PFINT_OICR_CTL_CAUSE_ENA_M);
@@@ -2904,11 -2817,6 +2918,11 @@@ static void ice_ena_ctrlq_interrupts(st
           PFINT_MBX_CTL_CAUSE_ENA_M);
    wr32(hw, PFINT_MBX_CTL, val);
+	/* This enables Sideband queue Interrupt causes */
 +	val = ((reg_idx & PFINT_SB_CTL_MSIX_INDX_M) |
 +	       PFINT_SB_CTL_CAUSE_ENA_M);
 +	wr32(hw, PFINT_SB_CTL, val);
 +
    ice_flush(hw);
  }
@@@ -3078,6 -2986,7 +3092,6 @@@ static void ice_set_netdev_features(str
   */
  static int ice_cfg_netdev(struct ice_vsi *vsi)
  {
 -	struct ice_pf *pf = vsi->back;
    struct ice_netdev_priv *np;
    struct net_device *netdev;
    u8 mac_addr[ETH_ALEN];
@@@ -3097,7 -3006,7 +3111,7 @@@
    ice_set_ops(netdev);
if (vsi->type == ICE_VSI_PF) {
 -		SET_NETDEV_DEV(netdev, ice_pf_to_dev(pf));
 +		SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back));
    	ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr);
    	ether_addr_copy(netdev->dev_addr, mac_addr);
    	ether_addr_copy(netdev->perm_addr, mac_addr);
@@@ -3371,9 -3280,6 +3385,9 @@@ static void ice_deinit_pf(struct ice_p
    	bitmap_free(pf->avail_rxqs);
    	pf->avail_rxqs = NULL;
    }
 +
 +	if (pf->ptp.clock)
 +		ptp_clock_unregister(pf->ptp.clock);
  }
/**
@@@ -3384,12 -3290,6 +3398,12 @@@ static void ice_set_pf_caps(struct ice_
  {
    struct ice_hw_func_caps *func_caps = &pf->hw.func_caps;
+	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
 +	clear_bit(ICE_FLAG_AUX_ENA, pf->flags);
 +	if (func_caps->common_cap.rdma) {
 +		set_bit(ICE_FLAG_RDMA_ENA, pf->flags);
 +		set_bit(ICE_FLAG_AUX_ENA, pf->flags);
 +	}
    clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
    if (func_caps->common_cap.dcb)
    	set_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
@@@ -3420,10 -3320,6 +3434,10 @@@
    			       func_caps->fd_fltr_best_effort);
    }
+	clear_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
 +	if (func_caps->common_cap.ieee_1588)
 +		set_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags);
 +
    pf->max_pf_txqs = func_caps->common_cap.num_txq;
    pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
  }
@@@ -3443,8 -3339,6 +3457,8 @@@ static int ice_init_pf(struct ice_pf *p
    spin_lock_init(&pf->aq_wait_lock);
    init_waitqueue_head(&pf->aq_wait_queue);
+	init_waitqueue_head(&pf->reset_wait_queue);
 +
    /* setup service timer and periodic service task */
    timer_setup(&pf->serv_tmr, ice_service_timer, 0);
    pf->serv_tmr_period = HZ;
@@@ -3475,12 -3369,11 +3489,12 @@@
   */
  static int ice_ena_msix_range(struct ice_pf *pf)
  {
 -	int v_left, v_actual, v_other, v_budget = 0;
 +	int num_cpus, v_left, v_actual, v_other, v_budget = 0;
    struct device *dev = ice_pf_to_dev(pf);
    int needed, err, i;
v_left = pf->hw.func_caps.common_cap.num_msix_vectors;
 +	num_cpus = num_online_cpus();
/* reserve for LAN miscellaneous handler */
    needed = ICE_MIN_LAN_OICR_MSIX;
@@@ -3502,23 -3395,13 +3516,23 @@@
    v_other = v_budget;
/* reserve vectors for LAN traffic */
 -	needed = min_t(int, num_online_cpus(), v_left);
 +	needed = num_cpus;
    if (v_left < needed)
    	goto no_hw_vecs_left_err;
    pf->num_lan_msix = needed;
    v_budget += needed;
    v_left -= needed;
+	/* reserve vectors for RDMA auxiliary driver */
 +	if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) {
 +		needed = num_cpus + ICE_RDMA_NUM_AEQ_MSIX;
 +		if (v_left < needed)
 +			goto no_hw_vecs_left_err;
 +		pf->num_rdma_msix = needed;
 +		v_budget += needed;
 +		v_left -= needed;
 +	}
 +
    pf->msix_entries = devm_kcalloc(dev, v_budget,
    				sizeof(*pf->msix_entries), GFP_KERNEL);
    if (!pf->msix_entries) {
@@@ -3548,46 -3431,16 +3562,46 @@@
    		err = -ERANGE;
    		goto msix_err;
    	} else {
 -			int v_traffic = v_actual - v_other;
 +			int v_remain = v_actual - v_other;
 +			int v_rdma = 0, v_min_rdma = 0;
 +
 +			if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags)) {
 +				/* Need at least 1 interrupt in addition to
 +				 * AEQ MSIX
 +				 */
 +				v_rdma = ICE_RDMA_NUM_AEQ_MSIX + 1;
 +				v_min_rdma = ICE_MIN_RDMA_MSIX;
 +			}
if (v_actual == ICE_MIN_MSIX ||
 -			    v_traffic < ICE_MIN_LAN_TXRX_MSIX)
 +			    v_remain < ICE_MIN_LAN_TXRX_MSIX + v_min_rdma) {
 +				dev_warn(dev, "Not enough MSI-X vectors to support RDMA.\n");
 +				clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
 +
 +				pf->num_rdma_msix = 0;
    			pf->num_lan_msix = ICE_MIN_LAN_TXRX_MSIX;
 -			else
 -				pf->num_lan_msix = v_traffic;
 +			} else if ((v_remain < ICE_MIN_LAN_TXRX_MSIX + v_rdma) ||
 +				   (v_remain - v_rdma < v_rdma)) {
 +				/* Support minimum RDMA and give remaining
 +				 * vectors to LAN MSIX
 +				 */
 +				pf->num_rdma_msix = v_min_rdma;
 +				pf->num_lan_msix = v_remain - v_min_rdma;
 +			} else {
 +				/* Split remaining MSIX with RDMA after
 +				 * accounting for AEQ MSIX
 +				 */
 +				pf->num_rdma_msix = (v_remain - ICE_RDMA_NUM_AEQ_MSIX) / 2 +
 +						    ICE_RDMA_NUM_AEQ_MSIX;
 +				pf->num_lan_msix = v_remain - pf->num_rdma_msix;
 +			}
dev_notice(dev, "Enabled %d MSI-X vectors for LAN traffic.\n",
    			   pf->num_lan_msix);
 +
 +			if (test_bit(ICE_FLAG_RDMA_ENA, pf->flags))
 +				dev_notice(dev, "Enabled %d MSI-X vectors for RDMA.\n",
 +					   pf->num_rdma_msix);
    	}
    }
@@@ -3602,7 -3455,6 +3616,7 @@@ no_hw_vecs_left_err
    	needed, v_left);
    err = -ERANGE;
  exit_err:
 +	pf->num_rdma_msix = 0;
    pf->num_lan_msix = 0;
    return err;
  }
@@@ -4366,8 -4218,6 +4380,8 @@@ ice_probe(struct pci_dev *pdev, const s
ice_init_link_dflt_override(pf->hw.port_info);
+	ice_check_module_power(pf, pf->hw.port_info->phy.link_info.link_cfg_err);
 +
    /* if media available, initialize PHY settings */
    if (pf->hw.port_info->phy.link_info.link_info &
        ICE_AQ_MEDIA_AVAILABLE) {
@@@ -4406,8 -4256,6 +4420,8 @@@
    }
/* initialize DDP driven features */
 +	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
 +		ice_ptp_init(pf);
/* Note: Flow director init failure is non-fatal to load */
    if (ice_init_fdir(pf))
@@@ -4434,29 -4282,8 +4448,29 @@@ probe_done
/* ready to go, so clear down state bit */
    clear_bit(ICE_DOWN, pf->state);
 +	if (ice_is_aux_ena(pf)) {
 +		pf->aux_idx = ida_alloc(&ice_aux_ida, GFP_KERNEL);
 +		if (pf->aux_idx < 0) {
 +			dev_err(dev, "Failed to allocate device ID for AUX driver\n");
 +			err = -ENOMEM;
 +			goto err_netdev_reg;
 +		}
 +
 +		err = ice_init_rdma(pf);
 +		if (err) {
 +			dev_err(dev, "Failed to initialize RDMA: %d\n", err);
 +			err = -EIO;
 +			goto err_init_aux_unroll;
 +		}
 +	} else {
 +		dev_warn(dev, "RDMA is not supported on this device\n");
 +	}
 +
    return 0;
+err_init_aux_unroll:
 +	pf->adev = NULL;
 +	ida_free(&ice_aux_ida, pf->aux_idx);
  err_netdev_reg:
  err_send_version_unroll:
    ice_vsi_release_all(pf);
@@@ -4566,17 -4393,13 +4580,17 @@@ static void ice_remove(struct pci_dev *
    	ice_free_vfs(pf);
    }
-	set_bit(ICE_DOWN, pf->state);
    ice_service_task_stop(pf);
ice_aq_cancel_waiting_tasks(pf);
 +	ice_unplug_aux_dev(pf);
 +	ida_free(&ice_aux_ida, pf->aux_idx);
 +	set_bit(ICE_DOWN, pf->state);
mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
    ice_deinit_lag(pf);
 +	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
 +		ice_ptp_release(pf);
    if (!ice_is_safe_mode(pf))
    	ice_remove_arfs(pf);
    ice_setup_mc_magic_wake(pf);
@@@ -4729,8 -4552,6 +4743,8 @@@ static int __maybe_unused ice_suspend(s
     */
    disabled = ice_service_task_stop(pf);
+	ice_unplug_aux_dev(pf);
 +
    /* Already suspended?, then there is nothing to do */
    if (test_and_set_bit(ICE_SUSPENDED, pf->state)) {
    	if (!disabled)
@@@ -5630,6 -5451,7 +5644,6 @@@ ice_update_vsi_tx_ring_stats(struct ice
  static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
  {
    struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats;
 -	struct ice_ring *ring;
    u64 pkts, bytes;
    int i;
@@@ -5653,8 -5475,7 +5667,8 @@@
/* update Rx rings counters */
    ice_for_each_rxq(vsi, i) {
 -		ring = READ_ONCE(vsi->rx_rings[i]);
 +		struct ice_ring *ring = READ_ONCE(vsi->rx_rings[i]);
 +
    	ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
    	vsi_stats->rx_packets += pkts;
    	vsi_stats->rx_bytes += bytes;
@@@ -6321,12 -6142,6 +6335,12 @@@ static void ice_rebuild(struct ice_pf *
ice_clear_pxe_mode(hw);
+	ret = ice_init_nvm(hw);
 +	if (ret) {
 +		dev_err(dev, "ice_init_nvm failed %s\n", ice_stat_str(ret));
 +		goto err_init_ctrlq;
 +	}
 +
    ret = ice_get_caps(hw);
    if (ret) {
    	dev_err(dev, "ice_get_caps failed %s\n", ice_stat_str(ret));
@@@ -6368,13 -6183,6 +6382,13 @@@
    if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
    	ice_dcb_rebuild(pf);
+	/* If the PF previously had enabled PTP, PTP init needs to happen before
 +	 * the VSI rebuild. If not, this causes the PTP link status events to
 +	 * fail.
 +	 */
 +	if (test_bit(ICE_FLAG_PTP_SUPPORTED, pf->flags))
 +		ice_ptp_init(pf);
 +
    /* rebuild PF VSI */
    err = ice_vsi_rebuild_by_type(pf, ICE_VSI_PF);
    if (err) {
@@@ -6414,8 -6222,6 +6428,8 @@@
/* if we get here, reset flow is successful */
    clear_bit(ICE_RESET_FAILED, pf->state);
 +
 +	ice_plug_aux_dev(pf);
    return;
err_vsi_rebuild:
@@@ -6454,9 -6260,7 +6468,9 @@@ static int ice_change_mtu(struct net_de
    struct ice_netdev_priv *np = netdev_priv(netdev);
    struct ice_vsi *vsi = np->vsi;
    struct ice_pf *pf = vsi->back;
 +	struct iidc_event *event;
    u8 count = 0;
 +	int err = 0;
if (new_mtu == (int)netdev->mtu) {
    	netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
@@@ -6489,59 -6293,27 +6503,59 @@@
    	return -EBUSY;
    }
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
 +	if (!event)
 +		return -ENOMEM;
 +
 +	set_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
 +	ice_send_event_to_aux(pf, event);
 +	clear_bit(IIDC_EVENT_BEFORE_MTU_CHANGE, event->type);
 +
    netdev->mtu = (unsigned int)new_mtu;
/* if VSI is up, bring it down and then back up */
    if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
 -		int err;
 -
    	err = ice_down(vsi);
    	if (err) {
    		netdev_err(netdev, "change MTU if_down err %d\n", err);
 -			return err;
 +			goto event_after;
    	}
err = ice_up(vsi);
    	if (err) {
    		netdev_err(netdev, "change MTU if_up err %d\n", err);
 -			return err;
 +			goto event_after;
    	}
    }
netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
 -	return 0;
 +event_after:
 +	set_bit(IIDC_EVENT_AFTER_MTU_CHANGE, event->type);
 +	ice_send_event_to_aux(pf, event);
 +	kfree(event);
 +
 +	return err;
 +}
 +
 +/**
 + * ice_do_ioctl - Access the hwtstamp interface
 + * @netdev: network interface device structure
 + * @ifr: interface request data
 + * @cmd: ioctl command
 + */
 +static int ice_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 +{
 +	struct ice_netdev_priv *np = netdev_priv(netdev);
 +	struct ice_pf *pf = np->vsi->back;
 +
 +	switch (cmd) {
 +	case SIOCGHWTSTAMP:
 +		return ice_ptp_get_ts_config(pf, ifr);
 +	case SIOCSHWTSTAMP:
 +		return ice_ptp_set_ts_config(pf, ifr);
 +	default:
 +		return -EOPNOTSUPP;
 +	}
  }
/**
@@@ -7060,8 -6832,6 +7074,8 @@@ int ice_open_internal(struct net_devic
    	return -EIO;
    }
+	ice_check_module_power(pf, pi->phy.link_info.link_cfg_err);
 +
    /* Set PHY if there is media, otherwise, turn off PHY */
    if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) {
    	clear_bit(ICE_FLAG_NO_MEDIA, pf->flags);
@@@ -7181,6 -6951,7 +7195,7 @@@ static const struct net_device_ops ice_
    .ndo_change_mtu = ice_change_mtu,
    .ndo_get_stats64 = ice_get_stats64,
    .ndo_tx_timeout = ice_tx_timeout,
+ 	.ndo_bpf = ice_xdp_safe_mode,
  };
static const struct net_device_ops ice_netdev_ops = {
@@@ -7194,7 -6965,6 +7209,7 @@@
    .ndo_change_mtu = ice_change_mtu,
    .ndo_get_stats64 = ice_get_stats64,
    .ndo_set_tx_maxrate = ice_set_tx_maxrate,
 +	.ndo_do_ioctl = ice_do_ioctl,
    .ndo_set_vf_spoofchk = ice_set_vf_spoofchk,
    .ndo_set_vf_mac = ice_set_vf_mac,
    .ndo_get_vf_config = ice_get_vf_cfg,
diff --combined drivers/net/ethernet/lantiq_xrx200.c
index 27df06ed355e,21ef2f128070..fb78f17d734f
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@@ -154,6 -154,7 +154,7 @@@ static int xrx200_close(struct net_devi
static int xrx200_alloc_skb(struct xrx200_chan *ch)
  {
+ 	struct sk_buff *skb = ch->skb[ch->dma.desc];
    dma_addr_t mapping;
    int ret = 0;
@@@ -168,6 -169,7 +169,7 @@@
    			 XRX200_DMA_DATA_LEN, DMA_FROM_DEVICE);
    if (unlikely(dma_mapping_error(ch->priv->dev, mapping))) {
    	dev_kfree_skb_any(ch->skb[ch->dma.desc]);
+ 		ch->skb[ch->dma.desc] = skb;
    	ret = -ENOMEM;
    	goto skip;
    }
@@@ -198,7 -200,6 +200,6 @@@ static int xrx200_hw_receive(struct xrx
    ch->dma.desc %= LTQ_DESC_NUM;
if (ret) {
- 		ch->skb[ch->dma.desc] = skb;
    	net_dev->stats.rx_dropped++;
    	netdev_err(net_dev, "failed to allocate new rx buffer\n");
    	return ret;
@@@ -352,8 -353,8 +353,8 @@@ static irqreturn_t xrx200_dma_irq(int i
    struct xrx200_chan *ch = ptr;
if (napi_schedule_prep(&ch->napi)) {
- 		__napi_schedule(&ch->napi);
    	ltq_dma_disable_irq(&ch->dma);
+ 		__napi_schedule(&ch->napi);
    }
ltq_dma_ack_irq(&ch->dma);
@@@ -436,6 -437,7 +437,6 @@@ static int xrx200_probe(struct platform
  {
    struct device *dev = &pdev->dev;
    struct device_node *np = dev->of_node;
 -	struct resource *res;
    struct xrx200_priv *priv;
    struct net_device *net_dev;
    int err;
@@@ -455,7 -457,13 +456,7 @@@
    net_dev->max_mtu = XRX200_DMA_DATA_LEN;
/* load the memory ranges */
 -	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 -	if (!res) {
 -		dev_err(dev, "failed to get resources\n");
 -		return -ENOENT;
 -	}
 -
 -	priv->pmac_reg = devm_ioremap_resource(dev, res);
 +	priv->pmac_reg = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
    if (IS_ERR(priv->pmac_reg))
    	return PTR_ERR(priv->pmac_reg);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index f0b98f5b2a92,85eaadc989df..059799e4f483
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@@ -94,13 -94,9 +94,9 @@@ void mlx5e_rep_update_flows(struct mlx5
ASSERT_RTNL();
- 	/* wait for encap to be fully initialized */
- 	wait_for_completion(&e->res_ready);
- 
    mutex_lock(&esw->offloads.encap_tbl_lock);
    encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
- 	if (e->compl_result < 0 || (encap_connected == neigh_connected &&
- 				    ether_addr_equal(e->h_dest, ha)))
+ 	if (encap_connected == neigh_connected && ether_addr_equal(e->h_dest, ha))
    	goto unlock;
mlx5e_take_all_encap_flows(e, &flow_list);
@@@ -617,7 -613,7 +613,7 @@@ static bool mlx5e_restore_skb(struct sk
    		      struct mlx5e_tc_update_priv *tc_priv)
  {
    struct mlx5e_priv *priv = netdev_priv(skb->dev);
 -	u32 tunnel_id = reg_c1 >> ESW_TUN_OFFSET;
 +	u32 tunnel_id = (reg_c1 >> ESW_TUN_OFFSET) & TUNNEL_ID_MASK;
if (chain) {
    	struct mlx5_rep_uplink_priv *uplink_priv;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 0dfd51d2d178,490131e06efb..2e846b741280
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@@ -120,7 -120,6 +120,7 @@@ void mlx5e_tc_encap_flows_add(struct ml
    		      struct list_head *flow_list)
  {
    struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 +	struct mlx5_pkt_reformat_params reformat_params;
    struct mlx5_esw_flow_attr *esw_attr;
    struct mlx5_flow_handle *rule;
    struct mlx5_flow_attr *attr;
@@@ -131,12 -130,9 +131,12 @@@
    if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
    	return;
+	memset(&reformat_params, 0, sizeof(reformat_params));
 +	reformat_params.type = e->reformat_type;
 +	reformat_params.size = e->encap_size;
 +	reformat_params.data = e->encap_header;
    e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
 -						     e->reformat_type,
 -						     e->encap_size, e->encap_header,
 +						     &reformat_params,
    					     MLX5_FLOW_NAMESPACE_FDB);
    if (IS_ERR(e->pkt_reformat)) {
    	mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
@@@ -255,9 -251,12 +255,12 @@@ static void mlx5e_take_all_route_decap_
    	mlx5e_take_tmp_flow(flow, flow_list, 0);
  }
+ typedef bool (match_cb)(struct mlx5e_encap_entry *);
+ 
  static struct mlx5e_encap_entry *
- mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
- 			   struct mlx5e_encap_entry *e)
+ mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
+ 			      struct mlx5e_encap_entry *e,
+ 			      match_cb match)
  {
    struct mlx5e_encap_entry *next = NULL;
@@@ -292,7 -291,7 +295,7 @@@ retry
    /* wait for encap to be fully initialized */
    wait_for_completion(&next->res_ready);
    /* continue searching if encap entry is not in valid state after completion */
- 	if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
+ 	if (!match(next)) {
    	e = next;
    	goto retry;
    }
@@@ -300,6 -299,30 +303,30 @@@
    return next;
  }
+ static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
+ {
+ 	return e->flags & MLX5_ENCAP_ENTRY_VALID;
+ }
+ 
+ static struct mlx5e_encap_entry *
+ mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
+ 			   struct mlx5e_encap_entry *e)
+ {
+ 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
+ }
+ 
+ static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
+ {
+ 	return e->compl_result >= 0;
+ }
+ 
+ struct mlx5e_encap_entry *
+ mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+ 			  struct mlx5e_encap_entry *e)
+ {
+ 	return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
+ }
+ 
  void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
  {
    struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
@@@ -816,7 -839,6 +843,7 @@@ int mlx5e_attach_decap(struct mlx5e_pri
  {
    struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
    struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
 +	struct mlx5_pkt_reformat_params reformat_params;
    struct mlx5e_tc_flow_parse_attr *parse_attr;
    struct mlx5e_decap_entry *d;
    struct mlx5e_decap_key key;
@@@ -858,12 -880,10 +885,12 @@@
    hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
    mutex_unlock(&esw->offloads.decap_tbl_lock);
+	memset(&reformat_params, 0, sizeof(reformat_params));
 +	reformat_params.type = MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2;
 +	reformat_params.size = sizeof(parse_attr->eth);
 +	reformat_params.data = &parse_attr->eth;
    d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
 -						     MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
 -						     sizeof(parse_attr->eth),
 -						     &parse_attr->eth,
 +						     &reformat_params,
    					     MLX5_FLOW_NAMESPACE_FDB);
    if (IS_ERR(d->pkt_reformat)) {
    	err = PTR_ERR(d->pkt_reformat);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 930b225dfe77,d26b8ed51195..7d732fac09f0
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@@ -91,16 -91,12 +91,16 @@@ void mlx5e_update_carrier(struct mlx5e_
  {
    struct mlx5_core_dev *mdev = priv->mdev;
    u8 port_state;
 +	bool up;
port_state = mlx5_query_vport_state(mdev,
    				    MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT,
    				    0);
-	if (port_state == VPORT_STATE_UP) {
 +	up = port_state == VPORT_STATE_UP;
 +	if (up == netif_carrier_ok(priv->netdev))
 +		netif_carrier_event(priv->netdev);
 +	if (up) {
    	netdev_info(priv->netdev, "Link up\n");
    	netif_carrier_on(priv->netdev);
    } else {
@@@ -857,7 -853,7 +857,7 @@@ int mlx5e_open_rq(struct mlx5e_params *
    if (err)
    	goto err_destroy_rq;
-	if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev))
 +	if (mlx5e_is_tls_on(rq->priv) && !mlx5e_accel_is_ktls_device(mdev))
    	__set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
@@@ -2709,8 -2705,6 +2709,6 @@@ static int mlx5e_update_netdev_queues(s
    nch = priv->channels.params.num_channels;
    ntc = priv->channels.params.num_tc;
    num_rxqs = nch * priv->profile->rq_groups;
- 	if (priv->channels.params.ptp_rx)
- 		num_rxqs++;
mlx5e_netdev_set_tcs(netdev, nch, ntc);
@@@ -4667,10 -4661,12 +4665,10 @@@ void mlx5e_build_nic_params(struct mlx5
    params->log_sq_size = is_kdump_kernel() ?
    	MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
    	MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 -	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE,
 -			MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
 +	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
/* XDP SQ */
 -	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE,
 -			MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
 +	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, mlx5e_tx_mpwqe_supported(mdev));
/* set CQE compression */
    params->rx_cqe_compress_def = false;
@@@ -4826,22 -4822,15 +4824,15 @@@ static void mlx5e_build_nic_netdev(stru
    }
if (mlx5_vxlan_allowed(mdev->vxlan) || mlx5_geneve_tx_allowed(mdev)) {
- 		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL |
- 					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
- 		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL |
- 					   NETIF_F_GSO_UDP_TUNNEL_CSUM;
- 		netdev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
- 		netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL |
- 					 NETIF_F_GSO_UDP_TUNNEL_CSUM;
+ 		netdev->hw_features     |= NETIF_F_GSO_UDP_TUNNEL;
+ 		netdev->hw_enc_features |= NETIF_F_GSO_UDP_TUNNEL;
+ 		netdev->vlan_features |= NETIF_F_GSO_UDP_TUNNEL;
    }
if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_GRE)) {
- 		netdev->hw_features     |= NETIF_F_GSO_GRE |
- 					   NETIF_F_GSO_GRE_CSUM;
- 		netdev->hw_enc_features |= NETIF_F_GSO_GRE |
- 					   NETIF_F_GSO_GRE_CSUM;
- 		netdev->gso_partial_features |= NETIF_F_GSO_GRE |
- 						NETIF_F_GSO_GRE_CSUM;
+ 		netdev->hw_features     |= NETIF_F_GSO_GRE;
+ 		netdev->hw_enc_features |= NETIF_F_GSO_GRE;
+ 		netdev->gso_partial_features |= NETIF_F_GSO_GRE;
    }
if (mlx5e_tunnel_proto_supported_tx(mdev, IPPROTO_IPIP)) {
@@@ -5114,7 -5103,7 +5105,7 @@@ static void mlx5e_nic_enable(struct mlx
    mlx5e_set_netdev_mtu_boundaries(priv);
    mlx5e_set_dev_port_mtu(priv);
-	mlx5_lag_add(mdev, netdev);
 +	mlx5_lag_add_netdev(mdev, netdev);
mlx5e_enable_async_events(priv);
    mlx5e_enable_blocking_events(priv);
@@@ -5162,7 -5151,7 +5153,7 @@@ static void mlx5e_nic_disable(struct ml
    	priv->en_trap = NULL;
    }
    mlx5e_disable_async_events(priv);
 -	mlx5_lag_remove(mdev);
 +	mlx5_lag_remove_netdev(mdev, priv->netdev);
    mlx5_vxlan_reset_to_default(mdev->vxlan);
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index cf4558e12325,d4b0f270b6bb..8d84d0712c20
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@@ -83,17 -83,17 +83,17 @@@ struct mlx5e_tc_attr_to_reg_mapping mlx
    [CHAIN_TO_REG] = {
    	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
    	.moffset = 0,
 -		.mlen = 2,
 +		.mlen = 16,
    },
    [VPORT_TO_REG] = {
    	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
 -		.moffset = 2,
 -		.mlen = 2,
 +		.moffset = 16,
 +		.mlen = 16,
    },
    [TUNNEL_TO_REG] = {
    	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
 -		.moffset = 1,
 -		.mlen = ((ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS) / 8),
 +		.moffset = 8,
 +		.mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
    	.soffset = MLX5_BYTE_OFF(fte_match_param,
    				 misc_parameters_2.metadata_reg_c_1),
    },
@@@ -110,7 -110,7 +110,7 @@@
    [NIC_CHAIN_TO_REG] = {
    	.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
    	.moffset = 0,
 -		.mlen = 2,
 +		.mlen = 16,
    },
    [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
  };
@@@ -128,46 -128,23 +128,46 @@@ static void mlx5e_put_flow_tunnel_id(st
  void
  mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
    		    enum mlx5e_tc_attr_to_reg type,
 -			    u32 data,
 +			    u32 val,
    		    u32 mask)
  {
 +	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
    int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
 +	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
    int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
 -	void *headers_c = spec->match_criteria;
 -	void *headers_v = spec->match_value;
 -	void *fmask, *fval;
 +	u32 max_mask = GENMASK(match_len - 1, 0);
 +	__be32 curr_mask_be, curr_val_be;
 +	u32 curr_mask, curr_val;
fmask = headers_c + soffset;
    fval = headers_v + soffset;
-	mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8));
 -	data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8));
 +	memcpy(&curr_mask_be, fmask, 4);
 +	memcpy(&curr_val_be, fval, 4);
 +
 +	curr_mask = be32_to_cpu(curr_mask_be);
 +	curr_val = be32_to_cpu(curr_val_be);
 +
 +	//move to correct offset
 +	WARN_ON(mask > max_mask);
 +	mask <<= moffset;
 +	val <<= moffset;
 +	max_mask <<= moffset;
 +
 +	//zero val and mask
 +	curr_mask &= ~max_mask;
 +	curr_val &= ~max_mask;
-	memcpy(fmask, &mask, match_len);
 -	memcpy(fval, &data, match_len);
 +	//add current to mask
 +	curr_mask |= mask;
 +	curr_val |= val;
 +
 +	//back to be32 and write
 +	curr_mask_be = cpu_to_be32(curr_mask);
 +	curr_val_be = cpu_to_be32(curr_val);
 +
 +	memcpy(fmask, &curr_mask_be, 4);
 +	memcpy(fval, &curr_val_be, 4);
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
  }
@@@ -175,28 -152,23 +175,28 @@@
  void
  mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
    			enum mlx5e_tc_attr_to_reg type,
 -				u32 *data,
 +				u32 *val,
    			u32 *mask)
  {
 +	void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
    int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
 +	int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
    int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
 -	void *headers_c = spec->match_criteria;
 -	void *headers_v = spec->match_value;
 -	void *fmask, *fval;
 +	u32 max_mask = GENMASK(match_len - 1, 0);
 +	__be32 curr_mask_be, curr_val_be;
 +	u32 curr_mask, curr_val;
fmask = headers_c + soffset;
    fval = headers_v + soffset;
-	memcpy(mask, fmask, match_len);
 -	memcpy(data, fval, match_len);
 +	memcpy(&curr_mask_be, fmask, 4);
 +	memcpy(&curr_val_be, fval, 4);
 +
 +	curr_mask = be32_to_cpu(curr_mask_be);
 +	curr_val = be32_to_cpu(curr_val_be);
-	*mask = be32_to_cpu((__force __be32)(*mask << (32 - (match_len * 8))));
 -	*data = be32_to_cpu((__force __be32)(*data << (32 - (match_len * 8))));
 +	*mask = (curr_mask >> moffset) & max_mask;
 +	*val = (curr_val >> moffset) & max_mask;
  }
int
@@@ -220,13 -192,13 +220,13 @@@ mlx5e_tc_match_to_reg_set_and_get_id(st
    	 (mod_hdr_acts->num_actions * MLX5_MH_ACT_SZ);
/* Firmware has 5bit length field and 0 means 32bits */
 -	if (mlen == 4)
 +	if (mlen == 32)
    	mlen = 0;
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
    MLX5_SET(set_action_in, modact, field, mfield);
 -	MLX5_SET(set_action_in, modact, offset, moffset * 8);
 -	MLX5_SET(set_action_in, modact, length, mlen * 8);
 +	MLX5_SET(set_action_in, modact, offset, moffset);
 +	MLX5_SET(set_action_in, modact, length, mlen);
    MLX5_SET(set_action_in, modact, data, data);
    err = mod_hdr_acts->num_actions;
    mod_hdr_acts->num_actions++;
@@@ -324,13 -296,13 +324,13 @@@ void mlx5e_tc_match_to_reg_mod_hdr_chan
    modact = mod_hdr_acts->actions + (act_id * MLX5_MH_ACT_SZ);
/* Firmware has 5bit length field and 0 means 32bits */
 -	if (mlen == 4)
 +	if (mlen == 32)
    	mlen = 0;
MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
    MLX5_SET(set_action_in, modact, field, mfield);
 -	MLX5_SET(set_action_in, modact, offset, moffset * 8);
 -	MLX5_SET(set_action_in, modact, length, mlen * 8);
 +	MLX5_SET(set_action_in, modact, offset, moffset);
 +	MLX5_SET(set_action_in, modact, length, mlen);
    MLX5_SET(set_action_in, modact, data, data);
  }
@@@ -4793,7 -4765,7 +4793,7 @@@ static void mlx5e_tc_hairpin_update_dea
    list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
    	wait_for_completion(&hpe->res_ready);
    	if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
- 			hpe->hp->pair->peer_gone = true;
+ 			mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
mlx5e_hairpin_put(priv, hpe);
    }
@@@ -5133,7 -5105,7 +5133,7 @@@ bool mlx5e_tc_update_skb(struct mlx5_cq
tc_skb_ext->chain = chain;
-		zone_restore_id = (reg_b >> REG_MAPPING_SHIFT(NIC_ZONE_RESTORE_TO_REG)) &
 +		zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
    		ESW_ZONE_ID_MASK;
if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 721093b55acc,17027536efba..f7cbeb0b66d2
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@@ -129,7 -129,7 +129,7 @@@ struct tunnel_match_enc_opts 
   */
  #define TUNNEL_INFO_BITS 12
  #define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
 -#define ENC_OPTS_BITS 12
 +#define ENC_OPTS_BITS 11
  #define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
  #define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
  #define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
@@@ -178,6 -178,9 +178,9 @@@ void mlx5e_take_all_encap_flows(struct 
  void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list);
struct mlx5e_neigh_hash_entry;
+ struct mlx5e_encap_entry *
+ mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
+ 			  struct mlx5e_encap_entry *e);
  void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe);
void mlx5e_tc_reoffload_flows_work(struct work_struct *work);
@@@ -198,10 -201,10 +201,10 @@@ enum mlx5e_tc_attr_to_reg
struct mlx5e_tc_attr_to_reg_mapping {
    int mfield; /* rewrite field */
 -	int moffset; /* offset of mfield */
 -	int mlen; /* bytes to rewrite/match */
 +	int moffset; /* bit offset of mfield */
 +	int mlen; /* bits to rewrite/match */
-	int soffset; /* offset of spec for match */
 +	int soffset; /* byte offset of spec for match */
  };
extern struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[];
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 669ff58107e4,320fe0cda917..c63d78eda606
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@@ -32,7 -32,6 +32,6 @@@
#include <linux/tcp.h>
  #include <linux/if_vlan.h>
- #include <linux/ptp_classify.h>
  #include <net/geneve.h>
  #include <net/dsfield.h>
  #include "en.h"
@@@ -67,24 -66,6 +66,6 @@@ static inline int mlx5e_get_dscp_up(str
  }
  #endif
- static bool mlx5e_use_ptpsq(struct sk_buff *skb)
- {
- 	struct flow_keys fk;
- 
- 	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
- 		return false;
- 
- 	if (fk.basic.n_proto == htons(ETH_P_1588))
- 		return true;
- 
- 	if (fk.basic.n_proto != htons(ETH_P_IP) &&
- 	    fk.basic.n_proto != htons(ETH_P_IPV6))
- 		return false;
- 
- 	return (fk.basic.ip_proto == IPPROTO_UDP &&
- 		fk.ports.dst == htons(PTP_EV_PORT));
- }
- 
  static u16 mlx5e_select_ptpsq(struct net_device *dev, struct sk_buff *skb)
  {
    struct mlx5e_priv *priv = netdev_priv(dev);
@@@ -145,9 -126,9 +126,9 @@@ u16 mlx5e_select_queue(struct net_devic
    	}
ptp_channel = READ_ONCE(priv->channels.ptp);
- 		if (unlikely(ptp_channel) &&
- 		    test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
- 		    mlx5e_use_ptpsq(skb))
+ 		if (unlikely(ptp_channel &&
+ 			     test_bit(MLX5E_PTP_STATE_TX, ptp_channel->state) &&
+ 			     mlx5e_use_ptpsq(skb)))
    		return mlx5e_select_ptpsq(dev, skb);
txq_ix = netdev_pick_tx(dev, skb, NULL);
@@@ -706,12 -687,16 +687,12 @@@ void mlx5e_tx_mpwqe_ensure_complete(str
    	mlx5e_tx_mpwqe_session_complete(sq);
  }
-static bool mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
 +static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq *sq,
    			   struct sk_buff *skb, struct mlx5e_accel_tx_state *accel,
    			   struct mlx5_wqe_eth_seg *eseg, u16 ihs)
  {
 -	if (unlikely(!mlx5e_accel_tx_eseg(priv, skb, eseg, ihs)))
 -		return false;
 -
 +	mlx5e_accel_tx_eseg(priv, skb, eseg, ihs);
    mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg);
 -
 -	return true;
  }
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
@@@ -740,7 -725,10 +721,7 @@@
    	if (mlx5e_tx_skb_supports_mpwqe(skb, &attr)) {
    		struct mlx5_wqe_eth_seg eseg = {};
-			if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg,
 -							     attr.ihs)))
 -				return NETDEV_TX_OK;
 -
 +			mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &eseg, attr.ihs);
    		mlx5e_sq_xmit_mpwqe(sq, skb, &eseg, netdev_xmit_more());
    		return NETDEV_TX_OK;
    	}
@@@ -755,7 -743,9 +736,7 @@@
    /* May update the WQE, but may not post other WQEs. */
    mlx5e_accel_tx_finish(sq, wqe, &accel,
    		      (struct mlx5_wqe_inline_seg *)(wqe->data + wqe_attr.ds_cnt_inl));
 -	if (unlikely(!mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs)))
 -		return NETDEV_TX_OK;
 -
 +	mlx5e_txwqe_build_eseg(priv, sq, skb, &accel, &wqe->eth, attr.ihs);
    mlx5e_sq_xmit_wqe(sq, skb, &attr, &wqe_attr, wqe, pi, netdev_xmit_more());
return NETDEV_TX_OK;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 7e5b3826eae5,940333410267..c7efd177da1f
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@@ -1,6 -1,33 +1,6 @@@
 +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
  /*
 - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
 - *
 - * This software is available to you under a choice of one of two
 - * licenses.  You may choose to be licensed under the terms of the GNU
 - * General Public License (GPL) Version 2, available from the file
 - * COPYING in the main directory of this source tree, or the
 - * OpenIB.org BSD license below:
 - *
 - *     Redistribution and use in source and binary forms, with or
 - *     without modification, are permitted provided that the following
 - *     conditions are met:
 - *
 - *      - Redistributions of source code must retain the above
 - *        copyright notice, this list of conditions and the following
 - *        disclaimer.
 - *
 - *      - Redistributions in binary form must reproduce the above
 - *        copyright notice, this list of conditions and the following
 - *        disclaimer in the documentation and/or other materials
 - *        provided with the distribution.
 - *
 - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 - * SOFTWARE.
 + * Copyright (c) 2013-2021, Mellanox Technologies inc.  All rights reserved.
   */
#include <linux/interrupt.h>
@@@ -18,7 -45,6 +18,7 @@@
  #include "eswitch.h"
  #include "lib/clock.h"
  #include "diag/fw_tracer.h"
 +#include "mlx5_irq.h"
enum {
    MLX5_EQE_OWNER_INIT_VAL	= 0x1,
@@@ -58,9 -84,6 +58,9 @@@ struct mlx5_eq_table 
    struct mutex            lock; /* sync async eqs creations */
    int			num_comp_eqs;
    struct mlx5_irq_table	*irq_table;
 +#ifdef CONFIG_RFS_ACCEL
 +	struct cpu_rmap		*rmap;
 +#endif
  };
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG)	    | \
@@@ -113,7 -136,7 +113,7 @@@ static int mlx5_eq_comp_int(struct noti
eqe = next_eqe_sw(eq);
    if (!eqe)
- 		return 0;
+ 		goto out;
do {
    	struct mlx5_core_cq *cq;
@@@ -138,6 -161,8 +138,8 @@@
    	++eq->cons_index;
} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
+ 
+ out:
    eq_update_ci(eq, 1);
if (cqn != -1)
@@@ -225,9 -250,9 +227,9 @@@ static int mlx5_eq_async_int(struct not
    	++eq->cons_index;
} while ((++num_eqes < MLX5_EQ_POLLING_BUDGET) && (eqe = next_eqe_sw(eq)));
- 	eq_update_ci(eq, 1);
out:
+ 	eq_update_ci(eq, 1);
    mlx5_eq_async_int_unlock(eq_async, recovery, &flags);
return unlikely(recovery) ? num_eqes : 0;
@@@ -263,7 -288,7 +265,7 @@@ create_map_eq(struct mlx5_core_dev *dev
    u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
    u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
    struct mlx5_priv *priv = &dev->priv;
 -	u8 vecidx = param->irq_index;
 +	u16 vecidx = param->irq_index;
    __be64 *pas;
    void *eqc;
    int inlen;
@@@ -286,20 -311,13 +288,20 @@@
    mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
    init_eq_buf(eq);
+	eq->irq = mlx5_irq_request(dev, vecidx, param->affinity);
 +	if (IS_ERR(eq->irq)) {
 +		err = PTR_ERR(eq->irq);
 +		goto err_buf;
 +	}
 +
 +	vecidx = mlx5_irq_get_index(eq->irq);
    inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
    	MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
    if (!in) {
    	err = -ENOMEM;
 -		goto err_buf;
 +		goto err_irq;
    }
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
@@@ -343,8 -361,6 +345,8 @@@ err_eq
  err_in:
    kvfree(in);
+err_irq:
 +	mlx5_irq_release(eq->irq);
  err_buf:
    mlx5_frag_buf_free(dev, &eq->frag_buf);
    return err;
@@@ -363,9 -379,10 +365,9 @@@
  int mlx5_eq_enable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
    	   struct notifier_block *nb)
  {
 -	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
    int err;
-	err = mlx5_irq_attach_nb(eq_table->irq_table, eq->vecidx, nb);
 +	err = mlx5_irq_attach_nb(eq->irq, nb);
    if (!err)
    	eq_update_ci(eq, 1);
@@@ -384,7 -401,9 +386,7 @@@ EXPORT_SYMBOL(mlx5_eq_enable)
  void mlx5_eq_disable(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
    	     struct notifier_block *nb)
  {
 -	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 -
 -	mlx5_irq_detach_nb(eq_table->irq_table, eq->vecidx, nb);
 +	mlx5_irq_detach_nb(eq->irq, nb);
  }
  EXPORT_SYMBOL(mlx5_eq_disable);
@@@ -398,9 -417,10 +400,9 @@@ static int destroy_unmap_eq(struct mlx5
    if (err)
    	mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
    		       eq->eqn);
 -	synchronize_irq(eq->irqn);
 +	mlx5_irq_release(eq->irq);
mlx5_frag_buf_free(dev, &eq->frag_buf);
 -
    return err;
  }
@@@ -472,7 -492,14 +474,7 @@@ static int create_async_eq(struct mlx5_
    int err;
mutex_lock(&eq_table->lock);
 -	/* Async EQs must share irq index 0 */
 -	if (param->irq_index != 0) {
 -		err = -EINVAL;
 -		goto unlock;
 -	}
 -
    err = create_map_eq(dev, eq, param);
 -unlock:
    mutex_unlock(&eq_table->lock);
    return err;
  }
@@@ -591,11 -618,8 +593,11 @@@ setup_async_eq(struct mlx5_core_dev *de
eq->irq_nb.notifier_call = mlx5_eq_async_int;
    spin_lock_init(&eq->lock);
 +	if (!zalloc_cpumask_var(&param->affinity, GFP_KERNEL))
 +		return -ENOMEM;
err = create_async_eq(dev, &eq->core, param);
 +	free_cpumask_var(param->affinity);
    if (err) {
    	mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
    	return err;
@@@ -630,6 -654,7 +632,6 @@@ static int create_async_eqs(struct mlx5
    mlx5_eq_notifier_register(dev, &table->cq_err_nb);
param = (struct mlx5_eq_param) {
 -		.irq_index = 0,
    	.nent = MLX5_NUM_CMD_EQE,
    	.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
    };
@@@ -642,6 -667,7 +644,6 @@@
    mlx5_cmd_allowed_opcode(dev, CMD_ALLOWED_OPCODE_ALL);
param = (struct mlx5_eq_param) {
 -		.irq_index = 0,
    	.nent = MLX5_NUM_ASYNC_EQE,
    };
@@@ -651,6 -677,7 +653,6 @@@
    	goto err2;
param = (struct mlx5_eq_param) {
 -		.irq_index = 0,
    	.nent = /* TODO: sriov max_vf + */ 1,
    	.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
    };
@@@ -710,9 -737,6 +712,9 @@@ mlx5_eq_create_generic(struct mlx5_core
    struct mlx5_eq *eq = kvzalloc(sizeof(*eq), GFP_KERNEL);
    int err;
+	if (!param->affinity)
 +		return ERR_PTR(-EINVAL);
 +
    if (!eq)
    	return ERR_PTR(-ENOMEM);
@@@ -823,21 -847,16 +825,21 @@@ static int create_comp_eqs(struct mlx5_
    		.irq_index = vecidx,
    		.nent = nent,
    	};
 -		err = create_map_eq(dev, &eq->core, &param);
 -		if (err) {
 -			kfree(eq);
 -			goto clean;
 +
 +		if (!zalloc_cpumask_var(&param.affinity, GFP_KERNEL)) {
 +			err = -ENOMEM;
 +			goto clean_eq;
    	}
 +		cpumask_set_cpu(cpumask_local_spread(i, dev->priv.numa_node),
 +				param.affinity);
 +		err = create_map_eq(dev, &eq->core, &param);
 +		free_cpumask_var(param.affinity);
 +		if (err)
 +			goto clean_eq;
    	err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
    	if (err) {
    		destroy_unmap_eq(dev, &eq->core);
 -			kfree(eq);
 -			goto clean;
 +			goto clean_eq;
    	}
mlx5_core_dbg(dev, "allocated completion EQN %d\n", eq->core.eqn);
@@@ -846,8 -865,7 +848,8 @@@
    }
return 0;
 -
 +clean_eq:
 +	kfree(eq);
  clean:
    destroy_comp_eqs(dev);
    return err;
@@@ -883,23 -901,17 +885,23 @@@ EXPORT_SYMBOL(mlx5_comp_vectors_count)
  struct cpumask *
  mlx5_comp_irq_get_affinity_mask(struct mlx5_core_dev *dev, int vector)
  {
 -	int vecidx = vector + MLX5_IRQ_VEC_COMP_BASE;
 +	struct mlx5_eq_table *table = dev->priv.eq_table;
 +	struct mlx5_eq_comp *eq, *n;
 +	int i = 0;
-	return mlx5_irq_get_affinity_mask(dev->priv.eq_table->irq_table,
 -					  vecidx);
 +	list_for_each_entry_safe(eq, n, &table->comp_eqs_list, list) {
 +		if (i++ == vector)
 +			break;
 +	}
 +
 +	return mlx5_irq_get_affinity_mask(eq->core.irq);
  }
  EXPORT_SYMBOL(mlx5_comp_irq_get_affinity_mask);
#ifdef CONFIG_RFS_ACCEL
  struct cpu_rmap *mlx5_eq_table_get_rmap(struct mlx5_core_dev *dev)
  {
 -	return mlx5_irq_get_rmap(dev->priv.eq_table->irq_table);
 +	return dev->priv.eq_table->rmap;
  }
  #endif
@@@ -916,57 -928,12 +918,57 @@@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(s
    return ERR_PTR(-ENOENT);
  }
+static void clear_rmap(struct mlx5_core_dev *dev)
 +{
 +#ifdef CONFIG_RFS_ACCEL
 +	struct mlx5_eq_table *eq_table = dev->priv.eq_table;
 +
 +	free_irq_cpu_rmap(eq_table->rmap);
 +#endif
 +}
 +
 +static int set_rmap(struct mlx5_core_dev *mdev)
 +{
 +	int err = 0;
 +#ifdef CONFIG_RFS_ACCEL
 +	struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
 +	int vecidx;
 +
 +	eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
 +	if (!eq_table->rmap) {
 +		err = -ENOMEM;
 +		mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
 +		goto err_out;
 +	}
 +
 +	vecidx = MLX5_IRQ_VEC_COMP_BASE;
 +	for (; vecidx < eq_table->num_comp_eqs + MLX5_IRQ_VEC_COMP_BASE;
 +	     vecidx++) {
 +		err = irq_cpu_rmap_add(eq_table->rmap,
 +				       pci_irq_vector(mdev->pdev, vecidx));
 +		if (err) {
 +			mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
 +				      err);
 +			goto err_irq_cpu_rmap_add;
 +		}
 +	}
 +	return 0;
 +
 +err_irq_cpu_rmap_add:
 +	clear_rmap(mdev);
 +err_out:
 +#endif
 +	return err;
 +}
 +
  /* This function should only be called after mlx5_cmd_force_teardown_hca */
  void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
  {
    struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
 +	if (!mlx5_core_is_sf(dev))
 +		clear_rmap(dev);
    mlx5_irq_table_destroy(dev);
    mutex_unlock(&table->lock);
  }
@@@ -983,19 -950,12 +985,19 @@@ int mlx5_eq_table_create(struct mlx5_co
    int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
    	      MLX5_CAP_GEN(dev, max_num_eqs) :
    	      1 << MLX5_CAP_GEN(dev, log_max_eq);
 +	int max_eqs_sf;
    int err;
eq_table->num_comp_eqs =
    	min_t(int,
 -		      mlx5_irq_get_num_comp(eq_table->irq_table),
 +		      mlx5_irq_table_get_num_comp(eq_table->irq_table),
    	      num_eqs - MLX5_MAX_ASYNC_EQS);
 +	if (mlx5_core_is_sf(dev)) {
 +		max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
 +				   mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
 +		eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
 +					       max_eqs_sf);
 +	}
err = create_async_eqs(dev);
    if (err) {
@@@ -1003,18 -963,6 +1005,18 @@@
    	goto err_async_eqs;
    }
+	if (!mlx5_core_is_sf(dev)) {
 +		/* rmap is a mapping between irq number and queue number.
 +		 * each irq can be assign only to a single rmap.
 +		 * since SFs share IRQs, rmap mapping cannot function correctly
 +		 * for irqs that are shared for different core/netdev RX rings.
 +		 * Hence we don't allow netdev rmap for SFs
 +		 */
 +		err = set_rmap(dev);
 +		if (err)
 +			goto err_rmap;
 +	}
 +
    err = create_comp_eqs(dev);
    if (err) {
    	mlx5_core_err(dev, "Failed to create completion EQs\n");
@@@ -1023,9 -971,6 +1025,9 @@@
return 0;
  err_comp_eqs:
 +	if (!mlx5_core_is_sf(dev))
 +		clear_rmap(dev);
 +err_rmap:
    destroy_async_eqs(dev);
  err_async_eqs:
    return err;
@@@ -1033,8 -978,6 +1035,8 @@@
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
  {
 +	if (!mlx5_core_is_sf(dev))
 +		clear_rmap(dev);
    destroy_comp_eqs(dev);
    destroy_async_eqs(dev);
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/main.c
index 390b1d3a6fde,0d0f63a27aba..eb1b316560a8
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@@ -76,7 -76,6 +76,7 @@@
  #include "sf/vhca_event.h"
  #include "sf/dev/dev.h"
  #include "sf/sf.h"
 +#include "mlx5_irq.h"
MODULE_AUTHOR("Eli Cohen eli@mellanox.com");
  MODULE_DESCRIPTION("Mellanox 5th generation network adapters (ConnectX series) core driver");
@@@ -1162,7 -1161,7 +1162,7 @@@ static int mlx5_load(struct mlx5_core_d
    err = mlx5_core_set_hca_defaults(dev);
    if (err) {
    	mlx5_core_err(dev, "Failed to set hca defaults\n");
- 		goto err_sriov;
+ 		goto err_set_hca;
    }
mlx5_vhca_event_start(dev);
@@@ -1186,7 -1185,6 +1186,7 @@@
    }
mlx5_sf_dev_table_create(dev);
 +	mlx5_lag_add_mdev(dev);
return 0;
@@@ -1196,6 -1194,7 +1196,7 @@@ err_ec
    mlx5_sf_hw_table_destroy(dev);
  err_vhca:
    mlx5_vhca_event_stop(dev);
+ err_set_hca:
    mlx5_cleanup_fs(dev);
  err_fs:
    mlx5_accel_tls_cleanup(dev);
@@@ -1221,7 -1220,6 +1222,7 @@@ err_irq_table
static void mlx5_unload(struct mlx5_core_dev *dev)
  {
 +	mlx5_lag_remove_mdev(dev);
    mlx5_sf_dev_table_destroy(dev);
    mlx5_sriov_detach(dev);
    mlx5_ec_cleanup(dev);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 42668de01abc,7466f016375c..4aaca8eb7597
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@@ -116,8 -116,6 +116,8 @@@ enum 
    DR_STE_V1_ACTION_MDFY_FLD_IPV6_SRC_OUT_3	= 0x4f,
    DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_0		= 0x5e,
    DR_STE_V1_ACTION_MDFY_FLD_TCP_MISC_1		= 0x5f,
 +	DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0		= 0x6f,
 +	DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1		= 0x70,
    DR_STE_V1_ACTION_MDFY_FLD_METADATA_2_CQE	= 0x7b,
    DR_STE_V1_ACTION_MDFY_FLD_GNRL_PURPOSE		= 0x7c,
    DR_STE_V1_ACTION_MDFY_FLD_REGISTER_2		= 0x8c,
@@@ -248,12 -246,6 +248,12 @@@ static const struct mlx5dr_ste_action_m
    [MLX5_ACTION_IN_FIELD_OUT_FIRST_VID] = {
    	.hw_field = DR_STE_V1_ACTION_MDFY_FLD_L2_OUT_2, .start = 0, .end = 15,
    },
 +	[MLX5_ACTION_IN_FIELD_OUT_EMD_31_0] = {
 +		.hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_1, .start = 0, .end = 31,
 +	},
 +	[MLX5_ACTION_IN_FIELD_OUT_EMD_47_32] = {
 +		.hw_field = DR_STE_V1_ACTION_MDFY_FLD_CFG_HDR_0_0, .start = 0, .end = 15,
 +	},
  };
static void dr_ste_v1_set_entry_type(u8 *hw_ste_p, u8 entry_type)
@@@ -369,8 -361,8 +369,8 @@@ static void dr_ste_v1_set_reparse(u8 *h
    MLX5_SET(ste_match_bwc_v1, hw_ste_p, reparse, 1);
  }
-static void dr_ste_v1_set_tx_encap(u8 *hw_ste_p, u8 *d_action,
 -				   u32 reformat_id, int size)
 +static void dr_ste_v1_set_encap(u8 *hw_ste_p, u8 *d_action,
 +				u32 reformat_id, int size)
  {
    MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, action_id,
    	 DR_STE_V1_ACTION_ID_INSERT_POINTER);
@@@ -382,26 -374,6 +382,26 @@@
    dr_ste_v1_set_reparse(hw_ste_p);
  }
+static void dr_ste_v1_set_insert_hdr(u8 *hw_ste_p, u8 *d_action,
 +				     u32 reformat_id,
 +				     u8 anchor, u8 offset,
 +				     int size)
 +{
 +	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action,
 +		 action_id, DR_STE_V1_ACTION_ID_INSERT_POINTER);
 +	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_anchor, anchor);
 +
 +	/* The hardware expects here size and offset in words (2 byte) */
 +	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, size, size / 2);
 +	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, start_offset, offset / 2);
 +
 +	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, pointer, reformat_id);
 +	MLX5_SET(ste_double_action_insert_with_ptr_v1, d_action, attributes,
 +		 DR_STE_V1_ACTION_INSERT_PTR_ATTR_NONE);
 +
 +	dr_ste_v1_set_reparse(hw_ste_p);
 +}
 +
  static void dr_ste_v1_set_tx_push_vlan(u8 *hw_ste_p, u8 *d_action,
    			       u32 vlan_hdr)
  {
@@@ -429,11 -401,11 +429,11 @@@ static void dr_ste_v1_set_rx_pop_vlan(u
    dr_ste_v1_set_reparse(hw_ste_p);
  }
-static void dr_ste_v1_set_tx_encap_l3(u8 *hw_ste_p,
 -				      u8 *frst_s_action,
 -				      u8 *scnd_d_action,
 -				      u32 reformat_id,
 -				      int size)
 +static void dr_ste_v1_set_encap_l3(u8 *hw_ste_p,
 +				   u8 *frst_s_action,
 +				   u8 *scnd_d_action,
 +				   u32 reformat_id,
 +				   int size)
  {
    /* Remove L2 headers */
    MLX5_SET(ste_single_action_remove_header_v1, frst_s_action, action_id,
@@@ -547,9 -519,9 +547,9 @@@ static void dr_ste_v1_set_actions_tx(st
    		action_sz = DR_STE_ACTION_TRIPLE_SZ;
    		allow_encap = true;
    	}
 -		dr_ste_v1_set_tx_encap(last_ste, action,
 -				       attr->reformat_id,
 -				       attr->reformat_size);
 +		dr_ste_v1_set_encap(last_ste, action,
 +				    attr->reformat.id,
 +				    attr->reformat.size);
    	action_sz -= DR_STE_ACTION_DOUBLE_SZ;
    	action += DR_STE_ACTION_DOUBLE_SZ;
    } else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
@@@ -560,25 -532,12 +560,25 @@@
    	action_sz = DR_STE_ACTION_TRIPLE_SZ;
    	d_action = action + DR_STE_ACTION_SINGLE_SZ;
-		dr_ste_v1_set_tx_encap_l3(last_ste,
 -					  action, d_action,
 -					  attr->reformat_id,
 -					  attr->reformat_size);
 +		dr_ste_v1_set_encap_l3(last_ste,
 +				       action, d_action,
 +				       attr->reformat.id,
 +				       attr->reformat.size);
    	action_sz -= DR_STE_ACTION_TRIPLE_SZ;
    	action += DR_STE_ACTION_TRIPLE_SZ;
 +	} else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) {
 +		if (!allow_encap || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
 +			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
 +			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
 +			action_sz = DR_STE_ACTION_TRIPLE_SZ;
 +		}
 +		dr_ste_v1_set_insert_hdr(last_ste, action,
 +					 attr->reformat.id,
 +					 attr->reformat.param_0,
 +					 attr->reformat.param_1,
 +					 attr->reformat.size);
 +		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
 +		action += DR_STE_ACTION_DOUBLE_SZ;
    }
dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
@@@ -657,9 -616,7 +657,9 @@@ static void dr_ste_v1_set_actions_rx(st
    }
if (action_type_set[DR_ACTION_TYP_CTR]) {
 -		/* Counter action set after decap to exclude decaped header */
 +		/* Counter action set after decap and before insert_hdr
 +		 * to exclude decaped / encaped header respectively.
 +		 */
    	if (!allow_ctr) {
    		dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
    		action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
@@@ -670,52 -627,6 +670,52 @@@
    	dr_ste_v1_set_counter_id(last_ste, attr->ctr_id);
    }
+	if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L2]) {
 +		if (action_sz < DR_STE_ACTION_DOUBLE_SZ) {
 +			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
 +			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
 +			action_sz = DR_STE_ACTION_TRIPLE_SZ;
 +		}
 +		dr_ste_v1_set_encap(last_ste, action,
 +				    attr->reformat.id,
 +				    attr->reformat.size);
 +		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
 +		action += DR_STE_ACTION_DOUBLE_SZ;
 +		allow_modify_hdr = false;
 +	} else if (action_type_set[DR_ACTION_TYP_L2_TO_TNL_L3]) {
 +		u8 *d_action;
 +
 +		if (action_sz < DR_STE_ACTION_TRIPLE_SZ) {
 +			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
 +			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
 +			action_sz = DR_STE_ACTION_TRIPLE_SZ;
 +		}
 +
 +		d_action = action + DR_STE_ACTION_SINGLE_SZ;
 +
 +		dr_ste_v1_set_encap_l3(last_ste,
 +				       action, d_action,
 +				       attr->reformat.id,
 +				       attr->reformat.size);
 +		action_sz -= DR_STE_ACTION_TRIPLE_SZ;
 +		allow_modify_hdr = false;
 +	} else if (action_type_set[DR_ACTION_TYP_INSERT_HDR]) {
 +		/* Modify header, decap, and encap must use different STEs */
 +		if (!allow_modify_hdr || action_sz < DR_STE_ACTION_DOUBLE_SZ) {
 +			dr_ste_v1_arr_init_next_match(&last_ste, added_stes, attr->gvmi);
 +			action = MLX5_ADDR_OF(ste_mask_and_match_v1, last_ste, action);
 +			action_sz = DR_STE_ACTION_TRIPLE_SZ;
 +		}
 +		dr_ste_v1_set_insert_hdr(last_ste, action,
 +					 attr->reformat.id,
 +					 attr->reformat.param_0,
 +					 attr->reformat.param_1,
 +					 attr->reformat.size);
 +		action_sz -= DR_STE_ACTION_DOUBLE_SZ;
 +		action += DR_STE_ACTION_DOUBLE_SZ;
 +		allow_modify_hdr = false;
 +	}
 +
    dr_ste_v1_set_hit_gvmi(last_ste, attr->hit_gvmi);
    dr_ste_v1_set_hit_addr(last_ste, attr->final_icm_addr, 1);
  }
@@@ -783,7 -694,11 +783,11 @@@ static int dr_ste_v1_set_action_decap_l
    if (hw_action_sz / DR_STE_ACTION_DOUBLE_SZ < DR_STE_DECAP_L3_ACTION_NUM)
    	return -EINVAL;
- 	memcpy(padded_data, data, data_sz);
+ 	inline_data_sz =
+ 		MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+ 
+ 	/* Add an alignment padding  */
+ 	memcpy(padded_data + data_sz % inline_data_sz, data, data_sz);
/* Remove L2L3 outer headers */
    MLX5_SET(ste_single_action_remove_header_v1, hw_action, action_id,
@@@ -795,32 -710,34 +799,34 @@@
    hw_action += DR_STE_ACTION_DOUBLE_SZ;
    used_actions++; /* Remove and NOP are a single double action */
- 	inline_data_sz =
- 		MLX5_FLD_SZ_BYTES(ste_double_action_insert_with_inline_v1, inline_data);
+ 	/* Point to the last dword of the header */
+ 	data_ptr += (data_sz / inline_data_sz) * inline_data_sz;
- 	/* Add the new header inline + 2 extra bytes */
+ 	/* Add the new header using inline action 4Byte at a time, the header
+ 	 * is added in reversed order to the beginning of the packet to avoid
+ 	 * incorrect parsing by the HW. Since header is 14B or 18B an extra
+ 	 * two bytes are padded and later removed.
+ 	 */
    for (i = 0; i < data_sz / inline_data_sz + 1; i++) {
    	void *addr_inline;
MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, action_id,
    		 DR_STE_V1_ACTION_ID_INSERT_INLINE);
    	/* The hardware expects here offset to words (2 bytes) */
- 		MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset,
- 			 i * 2);
+ 		MLX5_SET(ste_double_action_insert_with_inline_v1, hw_action, start_offset, 0);
/* Copy bytes one by one to avoid endianness problem */
    	addr_inline = MLX5_ADDR_OF(ste_double_action_insert_with_inline_v1,
    				   hw_action, inline_data);
- 		memcpy(addr_inline, data_ptr, inline_data_sz);
+ 		memcpy(addr_inline, data_ptr - i * inline_data_sz, inline_data_sz);
    	hw_action += DR_STE_ACTION_DOUBLE_SZ;
- 		data_ptr += inline_data_sz;
    	used_actions++;
    }
- 	/* Remove 2 extra bytes */
+ 	/* Remove first 2 extra bytes */
    MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, action_id,
    	 DR_STE_V1_ACTION_ID_REMOVE_BY_SIZE);
- 	MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, data_sz / 2);
+ 	MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, start_offset, 0);
    /* The hardware expects here size in words (2 bytes) */
    MLX5_SET(ste_single_action_remove_header_size_v1, hw_action, remove_size, 1);
    used_actions++;
@@@ -1954,7 -1871,6 +1960,7 @@@ struct mlx5dr_ste_ctx ste_ctx_v1 = 
    .set_byte_mask			= &dr_ste_v1_set_byte_mask,
    .get_byte_mask			= &dr_ste_v1_get_byte_mask,
    /* Actions */
 +	.actions_caps			= DR_STE_CTX_ACTION_CAP_RX_ENCAP,
    .set_actions_rx			= &dr_ste_v1_set_actions_rx,
    .set_actions_tx			= &dr_ste_v1_set_actions_tx,
    .modify_field_arr_sz		= ARRAY_SIZE(dr_ste_v1_action_modify_field_arr),
diff --combined drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 0e2b73731117,9737565cd8d4..b2aa6c93c3a1
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@@ -26,7 -26,6 +26,7 @@@ enum mlx5dr_action_reformat_type 
    DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L2,
    DR_ACTION_REFORMAT_TYP_TNL_L3_TO_L2,
    DR_ACTION_REFORMAT_TYP_L2_TO_TNL_L3,
 +	DR_ACTION_REFORMAT_TYP_INSERT_HDR,
  };
struct mlx5dr_match_parameters {
@@@ -106,8 -105,6 +106,8 @@@ mlx5dr_action_create_flow_counter(u32 c
  struct mlx5dr_action *
  mlx5dr_action_create_packet_reformat(struct mlx5dr_domain *dmn,
    			     enum mlx5dr_action_reformat_type reformat_type,
 +				     u8 reformat_param_0,
 +				     u8 reformat_param_1,
    			     size_t data_sz,
    			     void *data);
@@@ -127,10 -124,11 +127,11 @@@ int mlx5dr_action_destroy(struct mlx5dr
  static inline bool
  mlx5dr_is_supported(struct mlx5_core_dev *dev)
  {
- 	return MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
- 	       (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
- 		(MLX5_CAP_GEN(dev, steering_format_version) <=
- 		 MLX5_STEERING_FORMAT_CONNECTX_6DX));
+ 	return MLX5_CAP_GEN(dev, roce) &&
+ 	       (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner) ||
+ 		(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, sw_owner_v2) &&
+ 		 (MLX5_CAP_GEN(dev, steering_format_version) <=
+ 		  MLX5_STEERING_FORMAT_CONNECTX_6DX)));
  }
/* buddy functions & structure */
diff --combined drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
index 677a53f65008,85f0ce285146..0998dcc9cac0
--- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@@ -149,27 -149,22 +149,27 @@@ mlxsw_thermal_module_trips_reset(struc
static int
  mlxsw_thermal_module_trips_update(struct device *dev, struct mlxsw_core *core,
 -				  struct mlxsw_thermal_module *tz)
 +				  struct mlxsw_thermal_module *tz,
 +				  int crit_temp, int emerg_temp)
  {
 -	int crit_temp, emerg_temp;
    int err;
-	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
 -						   SFP_TEMP_HIGH_WARN,
 -						   &crit_temp);
 -	if (err)
 -		return err;
 +	/* Do not try to query temperature thresholds directly from the module's
 +	 * EEPROM if we got valid thresholds from MTMP.
 +	 */
 +	if (!emerg_temp || !crit_temp) {
 +		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
 +							   SFP_TEMP_HIGH_WARN,
 +							   &crit_temp);
 +		if (err)
 +			return err;
-	err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
 -						   SFP_TEMP_HIGH_ALARM,
 -						   &emerg_temp);
 -	if (err)
 -		return err;
 +		err = mlxsw_env_module_temp_thresholds_get(core, tz->module,
 +							   SFP_TEMP_HIGH_ALARM,
 +							   &emerg_temp);
 +		if (err)
 +			return err;
 +	}
if (crit_temp > emerg_temp) {
    	dev_warn(dev, "%s : Critical threshold %d is above emergency threshold %d\n",
@@@ -286,7 -281,7 +286,7 @@@ static int mlxsw_thermal_get_temp(struc
    	dev_err(dev, "Failed to query temp sensor\n");
    	return err;
    }
 -	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
 +	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
    if (temp > 0)
    	mlxsw_thermal_tz_score_update(thermal, tzdev, thermal->trips,
    				      temp);
@@@ -425,57 -420,36 +425,57 @@@ static int mlxsw_thermal_module_unbind(
    return err;
  }
-static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 -					 int *p_temp)
 +static void
 +mlxsw_thermal_module_temp_and_thresholds_get(struct mlxsw_core *core,
 +					     u16 sensor_index, int *p_temp,
 +					     int *p_crit_temp,
 +					     int *p_emerg_temp)
  {
 -	struct mlxsw_thermal_module *tz = tzdev->devdata;
 -	struct mlxsw_thermal *thermal = tz->parent;
 -	struct device *dev = thermal->bus_info->dev;
    char mtmp_pl[MLXSW_REG_MTMP_LEN];
 -	int temp;
    int err;
-	/* Read module temperature. */
 -	mlxsw_reg_mtmp_pack(mtmp_pl, MLXSW_REG_MTMP_MODULE_INDEX_MIN +
 -			    tz->module, false, false);
 -	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
 +	/* Read module temperature and thresholds. */
 +	mlxsw_reg_mtmp_pack(mtmp_pl, sensor_index, false, false);
 +	err = mlxsw_reg_query(core, MLXSW_REG(mtmp), mtmp_pl);
    if (err) {
 -		/* Do not return error - in case of broken module's sensor
 -		 * it will cause error message flooding.
 +		/* Set temperature and thresholds to zero to avoid passing
 +		 * uninitialized data back to the caller.
    	 */
 -		temp = 0;
 -		*p_temp = (int) temp;
 -		return 0;
 +		*p_temp = 0;
 +		*p_crit_temp = 0;
 +		*p_emerg_temp = 0;
 +
 +		return;
    }
 -	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
 +	mlxsw_reg_mtmp_unpack(mtmp_pl, p_temp, NULL, p_crit_temp, p_emerg_temp,
 +			      NULL);
 +}
 +
 +static int mlxsw_thermal_module_temp_get(struct thermal_zone_device *tzdev,
 +					 int *p_temp)
 +{
 +	struct mlxsw_thermal_module *tz = tzdev->devdata;
 +	struct mlxsw_thermal *thermal = tz->parent;
 +	int temp, crit_temp, emerg_temp;
 +	struct device *dev;
 +	u16 sensor_index;
 +	int err;
 +
 +	dev = thermal->bus_info->dev;
 +	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + tz->module;
 +
 +	/* Read module temperature and thresholds. */
 +	mlxsw_thermal_module_temp_and_thresholds_get(thermal->core,
 +						     sensor_index, &temp,
 +						     &crit_temp, &emerg_temp);
    *p_temp = temp;
if (!temp)
    	return 0;
/* Update trip points. */
 -	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz);
 +	err = mlxsw_thermal_module_trips_update(dev, thermal->core, tz,
 +						crit_temp, emerg_temp);
    if (!err && temp > 0)
    	mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
@@@ -586,7 -560,7 +586,7 @@@ static int mlxsw_thermal_gearbox_temp_g
    if (err)
    	return err;
-	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
 +	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL, NULL, NULL);
    if (temp > 0)
    	mlxsw_thermal_tz_score_update(thermal, tzdev, tz->trips, temp);
@@@ -719,7 -693,8 +719,8 @@@ mlxsw_thermal_module_tz_init(struct mlx
    						MLXSW_THERMAL_TRIP_MASK,
    						module_tz,
    						&mlxsw_thermal_module_ops,
- 							NULL, 0, 0);
+ 							NULL, 0,
+ 							module_tz->parent->polling_delay);
    if (IS_ERR(module_tz->tzdev)) {
    	err = PTR_ERR(module_tz->tzdev);
    	return err;
@@@ -742,10 -717,7 +743,10 @@@ mlxsw_thermal_module_init(struct devic
    		  struct mlxsw_thermal *thermal, u8 module)
  {
    struct mlxsw_thermal_module *module_tz;
 +	int dummy_temp, crit_temp, emerg_temp;
 +	u16 sensor_index;
+	sensor_index = MLXSW_REG_MTMP_MODULE_INDEX_MIN + module;
    module_tz = &thermal->tz_module_arr[module];
    /* Skip if parent is already set (case of port split). */
    if (module_tz->parent)
@@@ -756,12 -728,8 +757,12 @@@
           sizeof(thermal->trips));
    /* Initialize all trip point. */
    mlxsw_thermal_module_trips_reset(module_tz);
 +	/* Read module temperature and thresholds. */
 +	mlxsw_thermal_module_temp_and_thresholds_get(core, sensor_index, &dummy_temp,
 +						     &crit_temp, &emerg_temp);
    /* Update trip point according to the module data. */
 -	return mlxsw_thermal_module_trips_update(dev, core, module_tz);
 +	return mlxsw_thermal_module_trips_update(dev, core, module_tz,
 +						 crit_temp, emerg_temp);
  }
static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz)
@@@ -848,7 -816,8 +849,8 @@@ mlxsw_thermal_gearbox_tz_init(struct ml
    					MLXSW_THERMAL_TRIP_MASK,
    					gearbox_tz,
    					&mlxsw_thermal_gearbox_ops,
- 						NULL, 0, 0);
+ 						NULL, 0,
+ 						gearbox_tz->parent->polling_delay);
    if (IS_ERR(gearbox_tz->tzdev))
    	return PTR_ERR(gearbox_tz->tzdev);
diff --combined drivers/net/ethernet/mellanox/mlxsw/reg.h
index 5304309ecb9d,2bc5a9003c6d..93f1db3927af
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@@ -3907,7 -3907,7 +3907,7 @@@ MLXSW_ITEM32(reg, qeec, max_shaper_bs, 
  #define MLXSW_REG_QEEC_HIGHEST_SHAPER_BS	25
  #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP1	5
  #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP2	11
- #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3	5
+ #define MLXSW_REG_QEEC_LOWEST_SHAPER_BS_SP3	11
static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
    			       enum mlxsw_reg_qeec_hr hr, u8 index,
@@@ -8305,8 -8305,6 +8305,8 @@@ enum 
    MLXSW_REG_RECR2_TCP_UDP_EN_IPV4		= 7,
    /* Enable TCP/UDP header fields if packet is IPv6 */
    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6		= 8,
 +
 +	__MLXSW_REG_RECR2_HEADER_CNT,
  };
/* reg_recr2_outer_header_enables
@@@ -8341,8 -8339,6 +8341,8 @@@ enum 
    MLXSW_REG_RECR2_TCP_UDP_SPORT			= 74,
    /* TCP/UDP Destination Port */
    MLXSW_REG_RECR2_TCP_UDP_DPORT			= 75,
 +
 +	__MLXSW_REG_RECR2_FIELD_CNT,
  };
/* reg_recr2_outer_header_fields_enable
@@@ -8351,47 -8347,47 +8351,47 @@@
   */
  MLXSW_ITEM_BIT_ARRAY(reg, recr2, outer_header_fields_enable, 0x14, 0x14, 1);
-static inline void mlxsw_reg_recr2_ipv4_sip_enable(char *payload)
 -{
 -	int i;
 -
 -	for (i = MLXSW_REG_RECR2_IPV4_SIP0; i <= MLXSW_REG_RECR2_IPV4_SIP3; i++)
 -		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
 -							       true);
 -}
 -
 -static inline void mlxsw_reg_recr2_ipv4_dip_enable(char *payload)
 -{
 -	int i;
 -
 -	for (i = MLXSW_REG_RECR2_IPV4_DIP0; i <= MLXSW_REG_RECR2_IPV4_DIP3; i++)
 -		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
 -							       true);
 -}
 -
 -static inline void mlxsw_reg_recr2_ipv6_sip_enable(char *payload)
 -{
 -	int i = MLXSW_REG_RECR2_IPV6_SIP0_7;
 -
 -	mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
 -
 -	i = MLXSW_REG_RECR2_IPV6_SIP8;
 -	for (; i <= MLXSW_REG_RECR2_IPV6_SIP15; i++)
 -		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
 -							       true);
 -}
 -
 -static inline void mlxsw_reg_recr2_ipv6_dip_enable(char *payload)
 -{
 -	int i = MLXSW_REG_RECR2_IPV6_DIP0_7;
 +/* reg_recr2_inner_header_enables
 + * Bit mask where each bit enables a specific inner layer to be included in the
 + * hash calculation. Same values as reg_recr2_outer_header_enables.
 + * Access: RW
 + */
 +MLXSW_ITEM_BIT_ARRAY(reg, recr2, inner_header_enables, 0x2C, 0x04, 1);
-	mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i, true);
 +enum {
 +	/* Inner IPv4 Source IP */
 +	MLXSW_REG_RECR2_INNER_IPV4_SIP0			= 3,
 +	MLXSW_REG_RECR2_INNER_IPV4_SIP3			= 6,
 +	/* Inner IPv4 Destination IP */
 +	MLXSW_REG_RECR2_INNER_IPV4_DIP0			= 7,
 +	MLXSW_REG_RECR2_INNER_IPV4_DIP3			= 10,
 +	/* Inner IP Protocol */
 +	MLXSW_REG_RECR2_INNER_IPV4_PROTOCOL		= 11,
 +	/* Inner IPv6 Source IP */
 +	MLXSW_REG_RECR2_INNER_IPV6_SIP0_7		= 12,
 +	MLXSW_REG_RECR2_INNER_IPV6_SIP8			= 20,
 +	MLXSW_REG_RECR2_INNER_IPV6_SIP15		= 27,
 +	/* Inner IPv6 Destination IP */
 +	MLXSW_REG_RECR2_INNER_IPV6_DIP0_7		= 28,
 +	MLXSW_REG_RECR2_INNER_IPV6_DIP8			= 36,
 +	MLXSW_REG_RECR2_INNER_IPV6_DIP15		= 43,
 +	/* Inner IPv6 Next Header */
 +	MLXSW_REG_RECR2_INNER_IPV6_NEXT_HEADER		= 44,
 +	/* Inner IPv6 Flow Label */
 +	MLXSW_REG_RECR2_INNER_IPV6_FLOW_LABEL		= 45,
 +	/* Inner TCP/UDP Source Port */
 +	MLXSW_REG_RECR2_INNER_TCP_UDP_SPORT		= 46,
 +	/* Inner TCP/UDP Destination Port */
 +	MLXSW_REG_RECR2_INNER_TCP_UDP_DPORT		= 47,
 +
 +	__MLXSW_REG_RECR2_INNER_FIELD_CNT,
 +};
-	i = MLXSW_REG_RECR2_IPV6_DIP8;
 -	for (; i <= MLXSW_REG_RECR2_IPV6_DIP15; i++)
 -		mlxsw_reg_recr2_outer_header_fields_enable_set(payload, i,
 -							       true);
 -}
 +/* reg_recr2_inner_header_fields_enable
 + * Inner packet fields to enable for ECMP hash subject to inner_header_enables.
 + * Access: RW
 + */
 +MLXSW_ITEM_BIT_ARRAY(reg, recr2, inner_header_fields_enable, 0x30, 0x08, 1);
static inline void mlxsw_reg_recr2_pack(char *payload, u32 seed)
  {
@@@ -9463,14 -9459,6 +9463,14 @@@ MLXSW_ITEM32(reg, mtmp, sensor_index, 0
    				  ((s16)((GENMASK(15, 0) + (v_) + 1) \
    				   * 125)); })
+/* reg_mtmp_max_operational_temperature
 + * The highest temperature in the nominal operational range. Reading is in
 + * 0.125 Celsius degrees units.
 + * In case of module this is SFF critical temperature threshold.
 + * Access: RO
 + */
 +MLXSW_ITEM32(reg, mtmp, max_operational_temperature, 0x04, 16, 16);
 +
  /* reg_mtmp_temperature
   * Temperature reading from the sensor. Reading is in 0.125 Celsius
   * degrees units.
@@@ -9549,9 -9537,7 +9549,9 @@@ static inline void mlxsw_reg_mtmp_pack(
  }
static inline void mlxsw_reg_mtmp_unpack(char *payload, int *p_temp,
 -					 int *p_max_temp, char *sensor_name)
 +					 int *p_max_temp, int *p_temp_hi,
 +					 int *p_max_oper_temp,
 +					 char *sensor_name)
  {
    s16 temp;
@@@ -9563,14 -9549,6 +9563,14 @@@
    	temp = mlxsw_reg_mtmp_max_temperature_get(payload);
    	*p_max_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
    }
 +	if (p_temp_hi) {
 +		temp = mlxsw_reg_mtmp_temperature_threshold_hi_get(payload);
 +		*p_temp_hi = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
 +	}
 +	if (p_max_oper_temp) {
 +		temp = mlxsw_reg_mtmp_max_operational_temperature_get(payload);
 +		*p_max_oper_temp = MLXSW_REG_MTMP_TEMP_TO_MC(temp);
 +	}
    if (sensor_name)
    	mlxsw_reg_mtmp_sensor_name_memcpy_from(payload, sensor_name);
  }
diff --combined drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 14282472c7a6,3beafc60747e..b307264e59cf
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@@ -319,8 -319,10 +319,8 @@@ int qlcnic_read_mac_addr(struct qlcnic_
  static void qlcnic_delete_adapter_mac(struct qlcnic_adapter *adapter)
  {
    struct qlcnic_mac_vlan_list *cur;
 -	struct list_head *head;
-	list_for_each(head, &adapter->mac_list) {
 -		cur = list_entry(head, struct qlcnic_mac_vlan_list, list);
 +	list_for_each_entry(cur, &adapter->mac_list, list) {
    	if (ether_addr_equal_unaligned(adapter->mac_addr, cur->mac_addr)) {
    		qlcnic_sre_macaddr_change(adapter, cur->mac_addr,
    					  0, QLCNIC_MAC_DEL);
@@@ -2688,6 -2690,7 +2688,7 @@@ err_out_free_hw_res
    kfree(ahw);
err_out_free_res:
+ 	pci_disable_pcie_error_reporting(pdev);
    pci_release_regions(pdev);
err_out_disable_pdev:
@@@ -3341,6 -3344,9 +3342,6 @@@ qlcnic_can_start_firmware(struct qlcnic
    do {
    	msleep(1000);
    	prev_state = QLC_SHARED_REG_RD32(adapter, QLCNIC_CRB_DEV_STATE);
 -
 -		if (prev_state == QLCNIC_DEV_QUISCENT)
 -			continue;
    } while ((prev_state != QLCNIC_DEV_READY) && --dev_init_timeo);
if (!dev_init_timeo) {
diff --combined drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 6556b5381ce8,ab1e0fcccabb..13d8eb43a485
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@@ -126,24 -126,24 +126,24 @@@ static void rmnet_get_stats64(struct ne
    		      struct rtnl_link_stats64 *s)
  {
    struct rmnet_priv *priv = netdev_priv(dev);
- 	struct rmnet_vnd_stats total_stats;
+ 	struct rmnet_vnd_stats total_stats = { };
    struct rmnet_pcpu_stats *pcpu_ptr;
+ 	struct rmnet_vnd_stats snapshot;
    unsigned int cpu, start;
- 	memset(&total_stats, 0, sizeof(struct rmnet_vnd_stats));
- 
    for_each_possible_cpu(cpu) {
    	pcpu_ptr = per_cpu_ptr(priv->pcpu_stats, cpu);
do {
    		start = u64_stats_fetch_begin_irq(&pcpu_ptr->syncp);
- 			total_stats.rx_pkts += pcpu_ptr->stats.rx_pkts;
- 			total_stats.rx_bytes += pcpu_ptr->stats.rx_bytes;
- 			total_stats.tx_pkts += pcpu_ptr->stats.tx_pkts;
- 			total_stats.tx_bytes += pcpu_ptr->stats.tx_bytes;
+ 			snapshot = pcpu_ptr->stats;	/* struct assignment */
    	} while (u64_stats_fetch_retry_irq(&pcpu_ptr->syncp, start));
- 		total_stats.tx_drops += pcpu_ptr->stats.tx_drops;
+ 		total_stats.rx_pkts += snapshot.rx_pkts;
+ 		total_stats.rx_bytes += snapshot.rx_bytes;
+ 		total_stats.tx_pkts += snapshot.tx_pkts;
+ 		total_stats.tx_bytes += snapshot.tx_bytes;
+ 		total_stats.tx_drops += snapshot.tx_drops;
    }
s->rx_packets = total_stats.rx_pkts;
@@@ -166,7 -166,6 +166,7 @@@ static const struct net_device_ops rmne
static const char rmnet_gstrings_stats[][ETH_GSTRING_LEN] = {
    "Checksum ok",
 +	"Bad IPv4 header checksum",
    "Checksum valid bit not set",
    "Checksum validation failed",
    "Checksum error bad buffer",
@@@ -175,7 -174,6 +175,7 @@@
    "Checksum skipped on ip fragment",
    "Checksum skipped",
    "Checksum computed in software",
 +	"Checksum computed in hardware",
  };
static void rmnet_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
@@@ -356,4 -354,4 +356,4 @@@ int rmnet_vnd_update_dev_mtu(struct rmn
    }
return 0;
- }
+ }
diff --combined drivers/net/ethernet/realtek/r8169_main.c
index 6a9fe9f7e0be,2ee72dc431cd..f744557c33a3
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@@ -34,6 -34,8 +34,6 @@@
  #include "r8169.h"
  #include "r8169_firmware.h"
-#define MODULENAME "r8169"
 -
  #define FIRMWARE_8168D_1	"rtl_nic/rtl8168d-1.fw"
  #define FIRMWARE_8168D_2	"rtl_nic/rtl8168d-2.fw"
  #define FIRMWARE_8168E_1	"rtl_nic/rtl8168e-1.fw"
@@@ -1452,7 -1454,7 +1452,7 @@@ static void rtl8169_get_drvinfo(struct 
    struct rtl8169_private *tp = netdev_priv(dev);
    struct rtl_fw *rtl_fw = tp->rtl_fw;
-	strlcpy(info->driver, MODULENAME, sizeof(info->driver));
 +	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
    strlcpy(info->bus_info, pci_name(tp->pci_dev), sizeof(info->bus_info));
    BUILD_BUG_ON(sizeof(info->fw_version) < sizeof(rtl_fw->version));
    if (rtl_fw)
@@@ -1669,7 -1671,7 +1669,7 @@@ static void rtl8169_get_strings(struct 
  {
    switch(stringset) {
    case ETH_SS_STATS:
- 		memcpy(data, *rtl8169_gstrings, sizeof(rtl8169_gstrings));
+ 		memcpy(data, rtl8169_gstrings, sizeof(rtl8169_gstrings));
    	break;
    }
  }
@@@ -3508,6 -3510,7 +3508,6 @@@ static void rtl_hw_start_8106(struct rt
    rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
rtl_pcie_state_l2l3_disable(tp);
 -	rtl_hw_aspm_clkreq_enable(tp, true);
  }
DECLARE_RTL_COND(rtl_mac_ocp_e00e_cond)
@@@ -4114,7 -4117,6 +4114,7 @@@ static unsigned int rtl_quirk_packet_pa
    case RTL_GIGA_MAC_VER_61:
    case RTL_GIGA_MAC_VER_63:
    	padto = max_t(unsigned int, padto, ETH_ZLEN);
 +		break;
    default:
    	break;
    }
@@@ -5303,7 -5305,7 +5303,7 @@@ static int rtl_init_one(struct pci_dev 
    	return -ENODEV;
    }
-	rc = pcim_iomap_regions(pdev, BIT(region), MODULENAME);
 +	rc = pcim_iomap_regions(pdev, BIT(region), KBUILD_MODNAME);
    if (rc < 0) {
    	dev_err(&pdev->dev, "cannot remap MMIO, aborting\n");
    	return rc;
@@@ -5438,7 -5440,7 +5438,7 @@@
  }
static struct pci_driver rtl8169_pci_driver = {
 -	.name		= MODULENAME,
 +	.name		= KBUILD_MODNAME,
    .id_table	= rtl8169_pci_tbl,
    .probe		= rtl_init_one,
    .remove		= rtl_remove_one,
diff --combined drivers/net/ethernet/renesas/sh_eth.c
index 177523be4fb6,713d3629b4c1..840478692a37
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@@ -2287,7 -2287,7 +2287,7 @@@ static void sh_eth_get_strings(struct n
  {
    switch (stringset) {
    case ETH_SS_STATS:
- 		memcpy(data, *sh_eth_gstrings_stats,
+ 		memcpy(data, sh_eth_gstrings_stats,
    	       sizeof(sh_eth_gstrings_stats));
    	break;
    }
@@@ -3225,6 -3225,9 +3225,6 @@@ static int sh_eth_drv_probe(struct plat
    struct net_device *ndev;
    int ret;
-	/* get base addr */
 -	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 -
    ndev = alloc_etherdev(sizeof(struct sh_eth_private));
    if (!ndev)
    	return -ENOMEM;
@@@ -3242,7 -3245,7 +3242,7 @@@
    mdp = netdev_priv(ndev);
    mdp->num_tx_ring = TX_RING_SIZE;
    mdp->num_rx_ring = RX_RING_SIZE;
 -	mdp->addr = devm_ioremap_resource(&pdev->dev, res);
 +	mdp->addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
    if (IS_ERR(mdp->addr)) {
    	ret = PTR_ERR(mdp->addr);
    	goto out_release;
diff --combined drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index d8ae58bdbbe3,a696ada013eb..072eff8079d0
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@@ -230,6 -230,8 +230,6 @@@ static int stmmac_mtl_setup(struct plat
    	plat->tx_sched_algorithm = MTL_TX_ALGORITHM_WFQ;
    else if (of_property_read_bool(tx_node, "snps,tx-sched-dwrr"))
    	plat->tx_sched_algorithm = MTL_TX_ALGORITHM_DWRR;
 -	else if (of_property_read_bool(tx_node, "snps,tx-sched-sp"))
 -		plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
    else
    	plat->tx_sched_algorithm = MTL_TX_ALGORITHM_SP;
@@@ -600,13 -602,6 +600,13 @@@ stmmac_probe_config_dt(struct platform_
    	goto error_hw_init;
    }
+	plat->stmmac_ahb_rst = devm_reset_control_get_optional_shared(
 +							&pdev->dev, "ahb");
 +	if (IS_ERR(plat->stmmac_ahb_rst)) {
 +		ret = plat->stmmac_ahb_rst;
 +		goto error_hw_init;
 +	}
 +
    return plat;
error_hw_init:
@@@ -627,6 -622,8 +627,8 @@@ error_pclk_get
  void stmmac_remove_config_dt(struct platform_device *pdev,
    		     struct plat_stmmacenet_data *plat)
  {
+ 	clk_disable_unprepare(plat->stmmac_clk);
+ 	clk_disable_unprepare(plat->pclk);
    of_node_put(plat->phy_node);
    of_node_put(plat->mdio_node);
  }
diff --combined drivers/net/hamradio/mkiss.c
index 9933c87c1327,7685a1721597..b99128669bc8
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@@ -276,7 -276,7 +276,7 @@@ static void ax_bump(struct mkiss *ax
    		 */
    		*ax->rbuff &= ~0x20;
    	}
 - 	}
 +	}
count = ax->rcount;
@@@ -501,7 -501,7 +501,7 @@@ static void ax_encaps(struct net_devic
    	default:
    		count = kiss_esc(p, ax->xbuff, len);
    	}
 -  	}
 +	}
    spin_unlock_bh(&ax->buflock);
set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags);
@@@ -799,6 -799,7 +799,7 @@@ static void mkiss_close(struct tty_stru
    ax->tty = NULL;
unregister_netdev(ax->dev);
+ 	free_netdev(ax->dev);
  }
/* Perform I/O control on an active ax25 channel. */
@@@ -815,7 -816,7 +816,7 @@@ static int mkiss_ioctl(struct tty_struc
    dev = ax->dev;
switch (cmd) {
 - 	case SIOCGIFNAME:
 +	case SIOCGIFNAME:
    	err = copy_to_user((void __user *) arg, ax->dev->name,
    	                   strlen(ax->dev->name) + 1) ? -EFAULT : 0;
    	break;
diff --combined drivers/net/mhi/net.c
index 832d9de42f62,b806f2f8f859..6aa753387372
--- a/drivers/net/mhi/net.c
+++ b/drivers/net/mhi/net.c
@@@ -11,7 -11,6 +11,7 @@@
  #include <linux/netdevice.h>
  #include <linux/skbuff.h>
  #include <linux/u64_stats_sync.h>
 +#include <linux/wwan.h>
#include "mhi.h"
@@@ -19,12 -18,6 +19,12 @@@
  #define MHI_NET_MAX_MTU		0xffff
  #define MHI_NET_DEFAULT_MTU	0x4000
+/* When set to false, the default netdev (link 0) is not created, and it's up
 + * to user to create the link (via wwan rtnetlink).
 + */
 +static bool create_default_iface = true;
 +module_param(create_default_iface, bool, 0);
 +
  struct mhi_device_info {
    const char *netname;
    const struct mhi_net_proto *proto;
@@@ -56,7 -49,7 +56,7 @@@ static int mhi_ndo_stop(struct net_devi
    return 0;
  }
- static int mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
+ static netdev_tx_t mhi_ndo_xmit(struct sk_buff *skb, struct net_device *ndev)
  {
    struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
    const struct mhi_net_proto *proto = mhi_netdev->proto;
@@@ -302,33 -295,32 +302,33 @@@ static void mhi_net_rx_refill_work(stru
    	schedule_delayed_work(&mhi_netdev->rx_refill, HZ / 2);
  }
-static struct device_type wwan_type = {
 -	.name = "wwan",
 -};
 -
 -static int mhi_net_probe(struct mhi_device *mhi_dev,
 -			 const struct mhi_device_id *id)
 +static int mhi_net_newlink(void *ctxt, struct net_device *ndev, u32 if_id,
 +			   struct netlink_ext_ack *extack)
  {
 -	const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
 -	struct device *dev = &mhi_dev->dev;
 +	const struct mhi_device_info *info;
 +	struct mhi_device *mhi_dev = ctxt;
    struct mhi_net_dev *mhi_netdev;
 -	struct net_device *ndev;
    int err;
-	ndev = alloc_netdev(sizeof(*mhi_netdev), info->netname,
 -			    NET_NAME_PREDICTABLE, mhi_net_setup);
 -	if (!ndev)
 -		return -ENOMEM;
 +	info = (struct mhi_device_info *)mhi_dev->id->driver_data;
 +
 +	/* For now we only support one link (link context 0), driver must be
 +	 * reworked to break 1:1 relationship for net MBIM and to forward setup
 +	 * call to rmnet(QMAP) otherwise.
 +	 */
 +	if (if_id != 0)
 +		return -EINVAL;
 +
 +	if (dev_get_drvdata(&mhi_dev->dev))
 +		return -EBUSY;
mhi_netdev = netdev_priv(ndev);
 -	dev_set_drvdata(dev, mhi_netdev);
 +
 +	dev_set_drvdata(&mhi_dev->dev, mhi_netdev);
    mhi_netdev->ndev = ndev;
    mhi_netdev->mdev = mhi_dev;
    mhi_netdev->skbagg_head = NULL;
    mhi_netdev->proto = info->proto;
 -	SET_NETDEV_DEV(ndev, &mhi_dev->dev);
 -	SET_NETDEV_DEVTYPE(ndev, &wwan_type);
INIT_DELAYED_WORK(&mhi_netdev->rx_refill, mhi_net_rx_refill_work);
    u64_stats_init(&mhi_netdev->stats.rx_syncp);
@@@ -342,10 -334,7 +342,10 @@@
    /* Number of transfer descriptors determines size of the queue */
    mhi_netdev->rx_queue_sz = mhi_get_free_desc_count(mhi_dev, DMA_FROM_DEVICE);
-	err = register_netdev(ndev);
 +	if (extack)
 +		err = register_netdevice(ndev);
 +	else
 +		err = register_netdev(ndev);
    if (err)
    	goto out_err;
@@@ -358,89 -347,23 +358,89 @@@
    return 0;
out_err_proto:
 -	unregister_netdev(ndev);
 +	unregister_netdevice(ndev);
  out_err:
    free_netdev(ndev);
    return err;
  }
-static void mhi_net_remove(struct mhi_device *mhi_dev)
 +static void mhi_net_dellink(void *ctxt, struct net_device *ndev,
 +			    struct list_head *head)
  {
 -	struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
 +	struct mhi_net_dev *mhi_netdev = netdev_priv(ndev);
 +	struct mhi_device *mhi_dev = ctxt;
-	unregister_netdev(mhi_netdev->ndev);
 +	if (head)
 +		unregister_netdevice_queue(ndev, head);
 +	else
 +		unregister_netdev(ndev);
-	mhi_unprepare_from_transfer(mhi_netdev->mdev);
 +	mhi_unprepare_from_transfer(mhi_dev);
kfree_skb(mhi_netdev->skbagg_head);
-	free_netdev(mhi_netdev->ndev);
 +	dev_set_drvdata(&mhi_dev->dev, NULL);
 +}
 +
 +static const struct wwan_ops mhi_wwan_ops = {
 +	.owner = THIS_MODULE,
 +	.priv_size = sizeof(struct mhi_net_dev),
 +	.setup = mhi_net_setup,
 +	.newlink = mhi_net_newlink,
 +	.dellink = mhi_net_dellink,
 +};
 +
 +static int mhi_net_probe(struct mhi_device *mhi_dev,
 +			 const struct mhi_device_id *id)
 +{
 +	const struct mhi_device_info *info = (struct mhi_device_info *)id->driver_data;
 +	struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
 +	struct net_device *ndev;
 +	int err;
 +
 +	err = wwan_register_ops(&cntrl->mhi_dev->dev, &mhi_wwan_ops, mhi_dev);
 +	if (err)
 +		return err;
 +
 +	if (!create_default_iface)
 +		return 0;
 +
 +	/* Create a default interface which is used as either RMNET real-dev,
 +	 * MBIM link 0 or ip link 0)
 +	 */
 +	ndev = alloc_netdev(sizeof(struct mhi_net_dev), info->netname,
 +			    NET_NAME_PREDICTABLE, mhi_net_setup);
 +	if (!ndev) {
 +		err = -ENOMEM;
 +		goto err_unregister;
 +	}
 +
 +	SET_NETDEV_DEV(ndev, &mhi_dev->dev);
 +
 +	err = mhi_net_newlink(mhi_dev, ndev, 0, NULL);
 +	if (err)
 +		goto err_release;
 +
 +	return 0;
 +
 +err_release:
 +	free_netdev(ndev);
 +err_unregister:
 +	wwan_unregister_ops(&cntrl->mhi_dev->dev);
 +
 +	return err;
 +}
 +
 +static void mhi_net_remove(struct mhi_device *mhi_dev)
 +{
 +	struct mhi_net_dev *mhi_netdev = dev_get_drvdata(&mhi_dev->dev);
 +	struct mhi_controller *cntrl = mhi_dev->mhi_cntrl;
 +
 +	/* rtnetlink takes care of removing remaining links */
 +	wwan_unregister_ops(&cntrl->mhi_dev->dev);
 +
 +	if (create_default_iface)
 +		mhi_net_dellink(mhi_dev, mhi_netdev->ndev, NULL);
  }
static const struct mhi_device_info mhi_hwip0 = {
diff --combined drivers/net/usb/cdc_ncm.c
index c67f11e0e9a7,df0d1837e4ed..24753a4da7e6
--- a/drivers/net/usb/cdc_ncm.c
+++ b/drivers/net/usb/cdc_ncm.c
@@@ -192,8 -192,7 +192,8 @@@ static u32 cdc_ncm_check_tx_max(struct 
    return val;
  }
-static ssize_t cdc_ncm_show_min_tx_pkt(struct device *d, struct device_attribute *attr, char *buf)
 +static ssize_t min_tx_pkt_show(struct device *d,
 +			       struct device_attribute *attr, char *buf)
  {
    struct usbnet *dev = netdev_priv(to_net_dev(d));
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@@ -201,8 -200,7 +201,8 @@@
    return sprintf(buf, "%u\n", ctx->min_tx_pkt);
  }
-static ssize_t cdc_ncm_show_rx_max(struct device *d, struct device_attribute *attr, char *buf)
 +static ssize_t rx_max_show(struct device *d,
 +			   struct device_attribute *attr, char *buf)
  {
    struct usbnet *dev = netdev_priv(to_net_dev(d));
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@@ -210,8 -208,7 +210,8 @@@
    return sprintf(buf, "%u\n", ctx->rx_max);
  }
-static ssize_t cdc_ncm_show_tx_max(struct device *d, struct device_attribute *attr, char *buf)
 +static ssize_t tx_max_show(struct device *d,
 +			   struct device_attribute *attr, char *buf)
  {
    struct usbnet *dev = netdev_priv(to_net_dev(d));
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@@ -219,8 -216,7 +219,8 @@@
    return sprintf(buf, "%u\n", ctx->tx_max);
  }
-static ssize_t cdc_ncm_show_tx_timer_usecs(struct device *d, struct device_attribute *attr, char *buf)
 +static ssize_t tx_timer_usecs_show(struct device *d,
 +				   struct device_attribute *attr, char *buf)
  {
    struct usbnet *dev = netdev_priv(to_net_dev(d));
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@@ -228,9 -224,7 +228,9 @@@
    return sprintf(buf, "%u\n", ctx->timer_interval / (u32)NSEC_PER_USEC);
  }
-static ssize_t cdc_ncm_store_min_tx_pkt(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
 +static ssize_t min_tx_pkt_store(struct device *d,
 +				struct device_attribute *attr,
 +				const char *buf, size_t len)
  {
    struct usbnet *dev = netdev_priv(to_net_dev(d));
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@@ -244,9 -238,7 +244,9 @@@
    return len;
  }
-static ssize_t cdc_ncm_store_rx_max(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
 +static ssize_t rx_max_store(struct device *d,
 +			    struct device_attribute *attr,
 +			    const char *buf, size_t len)
  {
    struct usbnet *dev = netdev_priv(to_net_dev(d));
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@@ -259,9 -251,7 +259,9 @@@
    return len;
  }
-static ssize_t cdc_ncm_store_tx_max(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
 +static ssize_t tx_max_store(struct device *d,
 +			    struct device_attribute *attr,
 +			    const char *buf, size_t len)
  {
    struct usbnet *dev = netdev_priv(to_net_dev(d));
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@@ -274,9 -264,7 +274,9 @@@
    return len;
  }
-static ssize_t cdc_ncm_store_tx_timer_usecs(struct device *d,  struct device_attribute *attr, const char *buf, size_t len)
 +static ssize_t tx_timer_usecs_store(struct device *d,
 +				    struct device_attribute *attr,
 +				    const char *buf, size_t len)
  {
    struct usbnet *dev = netdev_priv(to_net_dev(d));
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
@@@ -297,10 -285,10 +297,10 @@@
    return len;
  }
-static DEVICE_ATTR(min_tx_pkt, 0644, cdc_ncm_show_min_tx_pkt, cdc_ncm_store_min_tx_pkt);
 -static DEVICE_ATTR(rx_max, 0644, cdc_ncm_show_rx_max, cdc_ncm_store_rx_max);
 -static DEVICE_ATTR(tx_max, 0644, cdc_ncm_show_tx_max, cdc_ncm_store_tx_max);
 -static DEVICE_ATTR(tx_timer_usecs, 0644, cdc_ncm_show_tx_timer_usecs, cdc_ncm_store_tx_timer_usecs);
 +static DEVICE_ATTR_RW(min_tx_pkt);
 +static DEVICE_ATTR_RW(rx_max);
 +static DEVICE_ATTR_RW(tx_max);
 +static DEVICE_ATTR_RW(tx_timer_usecs);
static ssize_t ndp_to_end_show(struct device *d, struct device_attribute *attr, char *buf)
  {
@@@ -640,7 -628,7 +640,7 @@@ out
    /* set MTU to max supported by the device if necessary */
    dev->net->mtu = min_t(int, dev->net->mtu, ctx->max_datagram_size - cdc_ncm_eth_hlen(dev));
-	/* do not exceed operater preferred MTU */
 +	/* do not exceed operator preferred MTU */
    if (ctx->mbim_extended_desc) {
    	mbim_mtu = le16_to_cpu(ctx->mbim_extended_desc->wMTU);
    	if (mbim_mtu != 0 && mbim_mtu < dev->net->mtu)
@@@ -697,7 -685,7 +697,7 @@@ static int cdc_ncm_setup(struct usbnet 
    struct cdc_ncm_ctx *ctx = (struct cdc_ncm_ctx *)dev->data[0];
    u32 def_rx, def_tx;
-	/* be conservative when selecting intial buffer size to
 +	/* be conservative when selecting initial buffer size to
     * increase the number of hosts this will work for
     */
    def_rx = min_t(u32, CDC_NCM_NTB_DEF_SIZE_RX,
@@@ -1892,7 -1880,7 +1892,7 @@@ static void cdc_ncm_status(struct usbne
  static const struct driver_info cdc_ncm_info = {
    .description = "CDC NCM",
    .flags = FLAG_POINTTOPOINT | FLAG_NO_SETINT | FLAG_MULTI_PACKET
- 			| FLAG_LINK_INTR,
+ 			| FLAG_LINK_INTR | FLAG_ETHER,
    .bind = cdc_ncm_bind,
    .unbind = cdc_ncm_unbind,
    .manage_power = usbnet_manage_power,
diff --combined drivers/net/usb/qmi_wwan.c
index db157f21a322,bc55ec739af9..6a2e4f884b12
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@@ -575,7 -575,7 +575,7 @@@ static int qmi_wwan_rx_fixup(struct usb
if (info->flags & QMI_WWAN_FLAG_PASS_THROUGH) {
    	skb->protocol = htons(ETH_P_MAP);
- 		return (netif_rx(skb) == NET_RX_SUCCESS);
+ 		return 1;
    }
switch (skb->data[0] & 0xf0) {
@@@ -710,8 -710,7 +710,8 @@@ static int qmi_wwan_register_subdriver(
/* register subdriver */
    subdriver = usb_cdc_wdm_register(info->control, &dev->status->desc,
 -					 4096, &qmi_wwan_cdc_wdm_manage_power);
 +					 4096, WWAN_PORT_QMI,
 +					 &qmi_wwan_cdc_wdm_manage_power);
    if (IS_ERR(subdriver)) {
    	dev_err(&info->control->dev, "subdriver registration failed\n");
    	rv = PTR_ERR(subdriver);
diff --combined drivers/net/usb/r8152.c
index 62cd48dc2878,e25bfb7021ed..1692d3b1b6e1
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@@ -931,8 -931,6 +931,8 @@@ struct r8152 
    u32 rx_pending;
    u32 fc_pause_on, fc_pause_off;
+	unsigned int pipe_in, pipe_out, pipe_intr, pipe_ctrl_in, pipe_ctrl_out;
 +
    u32 support_2500full:1;
    u32 lenovo_macpassthru:1;
    u32 dell_tb_rx_agg_bug:1;
@@@ -1200,7 -1198,7 +1200,7 @@@ int get_registers(struct r8152 *tp, u1
    if (!tmp)
    	return -ENOMEM;
-	ret = usb_control_msg(tp->udev, usb_rcvctrlpipe(tp->udev, 0),
 +	ret = usb_control_msg(tp->udev, tp->pipe_ctrl_in,
    		      RTL8152_REQ_GET_REGS, RTL8152_REQT_READ,
    		      value, index, tmp, size, 500);
    if (ret < 0)
@@@ -1223,7 -1221,7 +1223,7 @@@ int set_registers(struct r8152 *tp, u1
    if (!tmp)
    	return -ENOMEM;
-	ret = usb_control_msg(tp->udev, usb_sndctrlpipe(tp->udev, 0),
 +	ret = usb_control_msg(tp->udev, tp->pipe_ctrl_out,
    		      RTL8152_REQ_SET_REGS, RTL8152_REQT_WRITE,
    		      value, index, tmp, size, 500);
@@@ -2043,7 -2041,7 +2043,7 @@@ static int alloc_all_mem(struct r8152 *
    	goto err1;
tp->intr_interval = (int)ep_intr->desc.bInterval;
 -	usb_fill_int_urb(tp->intr_urb, tp->udev, usb_rcvintpipe(tp->udev, 3),
 +	usb_fill_int_urb(tp->intr_urb, tp->udev, tp->pipe_intr,
    		 tp->intr_buff, INTBUFSIZE, intr_callback,
    		 tp, tp->intr_interval);
@@@ -2307,7 -2305,7 +2307,7 @@@ static int r8152_tx_agg_fill(struct r81
    if (ret < 0)
    	goto out_tx_fill;
-	usb_fill_bulk_urb(agg->urb, tp->udev, usb_sndbulkpipe(tp->udev, 2),
 +	usb_fill_bulk_urb(agg->urb, tp->udev, tp->pipe_out,
    		  agg->head, (int)(tx_data - (u8 *)agg->head),
    		  (usb_complete_t)write_bulk_callback, agg);
@@@ -2447,7 -2445,7 +2447,7 @@@ static int rx_bottom(struct r8152 *tp, 
    		unsigned int pkt_len, rx_frag_head_sz;
    		struct sk_buff *skb;
-			/* limite the skb numbers for rx_queue */
 +			/* limit the skb numbers for rx_queue */
    		if (unlikely(skb_queue_len(&tp->rx_queue) >= 1000))
    			break;
@@@ -2622,7 -2620,7 +2622,7 @@@ int r8152_submit_rx(struct r8152 *tp, s
        !test_bit(WORK_ENABLE, &tp->flags) || !netif_carrier_ok(tp->netdev))
    	return 0;
-	usb_fill_bulk_urb(agg->urb, tp->udev, usb_rcvbulkpipe(tp->udev, 1),
 +	usb_fill_bulk_urb(agg->urb, tp->udev, tp->pipe_in,
    		  agg->buffer, tp->rx_buf_sz,
    		  (usb_complete_t)read_bulk_callback, agg);
@@@ -8213,7 -8211,7 +8213,7 @@@ static int rtl8152_post_reset(struct us
    if (!tp)
    	return 0;
-	/* reset the MAC adddress in case of policy change */
 +	/* reset the MAC address in case of policy change */
    if (determine_ethernet_addr(tp, &sa) >= 0) {
    	rtnl_lock();
    	dev_set_mac_address (tp->netdev, &sa, NULL);
@@@ -8680,7 -8678,7 +8680,7 @@@ static void rtl8152_get_strings(struct 
  {
    switch (stringset) {
    case ETH_SS_STATS:
- 		memcpy(data, *rtl8152_gstrings, sizeof(rtl8152_gstrings));
+ 		memcpy(data, rtl8152_gstrings, sizeof(rtl8152_gstrings));
    	break;
    }
  }
@@@ -8969,79 -8967,6 +8969,79 @@@ static int rtl8152_set_ringparam(struc
    return 0;
  }
+static void rtl8152_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 +{
 +	struct r8152 *tp = netdev_priv(netdev);
 +	u16 bmcr, lcladv, rmtadv;
 +	u8 cap;
 +
 +	if (usb_autopm_get_interface(tp->intf) < 0)
 +		return;
 +
 +	mutex_lock(&tp->control);
 +
 +	bmcr = r8152_mdio_read(tp, MII_BMCR);
 +	lcladv = r8152_mdio_read(tp, MII_ADVERTISE);
 +	rmtadv = r8152_mdio_read(tp, MII_LPA);
 +
 +	mutex_unlock(&tp->control);
 +
 +	usb_autopm_put_interface(tp->intf);
 +
 +	if (!(bmcr & BMCR_ANENABLE)) {
 +		pause->autoneg = 0;
 +		pause->rx_pause = 0;
 +		pause->tx_pause = 0;
 +		return;
 +	}
 +
 +	pause->autoneg = 1;
 +
 +	cap = mii_resolve_flowctrl_fdx(lcladv, rmtadv);
 +
 +	if (cap & FLOW_CTRL_RX)
 +		pause->rx_pause = 1;
 +
 +	if (cap & FLOW_CTRL_TX)
 +		pause->tx_pause = 1;
 +}
 +
 +static int rtl8152_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 +{
 +	struct r8152 *tp = netdev_priv(netdev);
 +	u16 old, new1;
 +	u8 cap = 0;
 +	int ret;
 +
 +	ret = usb_autopm_get_interface(tp->intf);
 +	if (ret < 0)
 +		return ret;
 +
 +	mutex_lock(&tp->control);
 +
 +	if (pause->autoneg && !(r8152_mdio_read(tp, MII_BMCR) & BMCR_ANENABLE)) {
 +		ret = -EINVAL;
 +		goto out;
 +	}
 +
 +	if (pause->rx_pause)
 +		cap |= FLOW_CTRL_RX;
 +
 +	if (pause->tx_pause)
 +		cap |= FLOW_CTRL_TX;
 +
 +	old = r8152_mdio_read(tp, MII_ADVERTISE);
 +	new1 = (old & ~(ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM)) | mii_advertise_flowctrl(cap);
 +	if (old != new1)
 +		r8152_mdio_write(tp, MII_ADVERTISE, new1);
 +
 +out:
 +	mutex_unlock(&tp->control);
 +	usb_autopm_put_interface(tp->intf);
 +
 +	return ret;
 +}
 +
  static const struct ethtool_ops ops = {
    .supported_coalesce_params = ETHTOOL_COALESCE_USECS,
    .get_drvinfo = rtl8152_get_drvinfo,
@@@ -9064,8 -8989,6 +9064,8 @@@
    .set_tunable = rtl8152_set_tunable,
    .get_ringparam = rtl8152_get_ringparam,
    .set_ringparam = rtl8152_set_ringparam,
 +	.get_pauseparam = rtl8152_get_pauseparam,
 +	.set_pauseparam = rtl8152_set_pauseparam,
  };
static int rtl8152_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd)
@@@ -9509,12 -9432,6 +9509,12 @@@ static int rtl8152_probe(struct usb_int
    tp->intf = intf;
    tp->version = version;
+	tp->pipe_ctrl_in = usb_rcvctrlpipe(udev, 0);
 +	tp->pipe_ctrl_out = usb_sndctrlpipe(udev, 0);
 +	tp->pipe_in = usb_rcvbulkpipe(udev, 1);
 +	tp->pipe_out = usb_sndbulkpipe(udev, 2);
 +	tp->pipe_intr = usb_rcvintpipe(udev, 3);
 +
    switch (version) {
    case RTL_VER_01:
    case RTL_VER_02:
diff --combined drivers/net/vrf.c
index 07eaef5e73c2,28a6c4cfe9b8..452822f88214
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@@ -274,7 -274,7 +274,7 @@@ vrf_map_register_dev(struct net_device 
    int res;
/* we pre-allocate elements used in the spin-locked section (so that we
 -	 * keep the spinlock as short as possibile).
 +	 * keep the spinlock as short as possible).
     */
    new_me = vrf_map_elem_alloc(GFP_KERNEL);
    if (!new_me)
@@@ -1183,9 -1183,6 +1183,6 @@@ static int vrf_dev_init(struct net_devi
dev->flags = IFF_MASTER | IFF_NOARP;
- 	/* MTU is irrelevant for VRF device; set to 64k similar to lo */
- 	dev->mtu = 64 * 1024;
- 
    /* similarly, oper state is irrelevant; set to up to avoid confusion */
    dev->operstate = IF_OPER_UP;
    netdev_lockdep_set_classes(dev);
@@@ -1685,7 -1682,8 +1682,8 @@@ static void vrf_setup(struct net_devic
     * which breaks networking.
     */
    dev->min_mtu = IPV6_MIN_MTU;
- 	dev->max_mtu = ETH_MAX_MTU;
+ 	dev->max_mtu = IP6_MAX_MTU;
+ 	dev->mtu = dev->max_mtu;
  }
static int vrf_validate(struct nlattr *tb[], struct nlattr *data[],
diff --combined drivers/ptp/ptp_clock.c
index a780435331c8,21c4c34c52d8..841d8900504d
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@@ -63,6 -63,27 +63,6 @@@ static void enqueue_external_timestamp(
    spin_unlock_irqrestore(&queue->lock, flags);
  }
-long scaled_ppm_to_ppb(long ppm)
 -{
 -	/*
 -	 * The 'freq' field in the 'struct timex' is in parts per
 -	 * million, but with a 16 bit binary fractional field.
 -	 *
 -	 * We want to calculate
 -	 *
 -	 *    ppb = scaled_ppm * 1000 / 2^16
 -	 *
 -	 * which simplifies to
 -	 *
 -	 *    ppb = scaled_ppm * 125 / 2^13
 -	 */
 -	s64 ppb = 1 + ppm;
 -	ppb *= 125;
 -	ppb >>= 13;
 -	return (long) ppb;
 -}
 -EXPORT_SYMBOL(scaled_ppm_to_ppb);
 -
  /* posix clock implementation */
static int ptp_clock_getres(struct posix_clock *pc, struct timespec64 *tp)
@@@ -117,7 -138,7 +117,7 @@@ static int ptp_clock_adjtime(struct pos
    	delta = ktime_to_ns(kt);
    	err = ops->adjtime(ops, delta);
    } else if (tx->modes & ADJ_FREQUENCY) {
- 		s32 ppb = scaled_ppm_to_ppb(tx->freq);
+ 		long ppb = scaled_ppm_to_ppb(tx->freq);
    	if (ppb > ops->max_adj || ppb < -ops->max_adj)
    		return -ERANGE;
    	if (ops->adjfine)
diff --combined include/linux/device.h
index a1e7cab2c7bf,f1a00040fa53..8f0ec3081a24
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@@ -570,7 -570,7 +570,7 @@@ struct device 
   * @flags: Link flags.
   * @rpm_active: Whether or not the consumer device is runtime-PM-active.
   * @kref: Count repeated addition of the same link.
-  * @rcu_head: An RCU head to use for deferred execution of SRCU callbacks.
+  * @rm_work: Work structure used for removing the link.
   * @supplier_preactivated: Supplier has been made active before consumer probe.
   */
  struct device_link {
@@@ -583,9 -583,7 +583,7 @@@
    u32 flags;
    refcount_t rpm_active;
    struct kref kref;
- #ifdef CONFIG_SRCU
- 	struct rcu_head rcu_head;
- #endif
+ 	struct work_struct rm_work;
    bool supplier_preactivated; /* Owned by consumer probe. */
  };
@@@ -819,7 -817,6 +817,7 @@@ int device_online(struct device *dev)
  void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
  void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode);
  void device_set_of_node_from_dev(struct device *dev, const struct device *dev2);
 +void device_set_node(struct device *dev, struct fwnode_handle *fwnode);
static inline int dev_num_vf(struct device *dev)
  {
diff --combined include/linux/mlx5/driver.h
index f90f84061438,f8902bcd91e2..1efe37466969
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@@ -542,6 -542,10 +542,10 @@@ struct mlx5_core_roce 
  enum {
    MLX5_PRIV_FLAGS_DISABLE_IB_ADEV = 1 << 0,
    MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV = 1 << 1,
+ 	/* Set during device detach to block any further devices
+ 	 * creation/deletion on drivers rescan. Unset during device attach.
+ 	 */
+ 	MLX5_PRIV_FLAGS_DETACH = 1 << 2,
  };
struct mlx5_adev {
@@@ -550,7 -554,6 +554,7 @@@
    int idx;
  };
+struct mlx5_ft_pool;
  struct mlx5_priv {
    /* IRQ table valid only for real pci devices PF or VF */
    struct mlx5_irq_table   *irq_table;
@@@ -603,7 -606,6 +607,7 @@@
    struct mlx5_core_roce	roce;
    struct mlx5_fc_stats		fc_stats;
    struct mlx5_rl_table            rl_table;
 +	struct mlx5_ft_pool		*ft_pool;
struct mlx5_bfreg_data		bfregs;
    struct mlx5_uars_page	       *uar;
diff --combined include/linux/mm.h
index a0434e8c2617,8ae31622deef..6cf4c6842ff0
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@@ -1668,11 -1668,10 +1668,11 @@@ struct address_space *page_mapping(stru
  static inline bool page_is_pfmemalloc(const struct page *page)
  {
    /*
 -	 * Page index cannot be this large so this must be
 -	 * a pfmemalloc page.
 +	 * lru.next has bit 1 set if the page is allocated from the
 +	 * pfmemalloc reserves.  Callers may simply overwrite it if
 +	 * they do not need to preserve that information.
     */
 -	return page->index == -1UL;
 +	return (uintptr_t)page->lru.next & BIT(1);
  }
/*
@@@ -1681,12 -1680,12 +1681,12 @@@
   */
  static inline void set_page_pfmemalloc(struct page *page)
  {
 -	page->index = -1UL;
 +	page->lru.next = (void *)BIT(1);
  }
static inline void clear_page_pfmemalloc(struct page *page)
  {
 -	page->index = 0;
 +	page->lru.next = NULL;
  }
/*
@@@ -1720,6 -1719,7 +1720,7 @@@ struct zap_details 
    struct address_space *check_mapping;	/* Check page->mapping if set */
    pgoff_t	first_index;			/* Lowest page->index to unmap */
    pgoff_t last_index;			/* Highest page->index to unmap */
+ 	struct page *single_page;		/* Locked page to be unmapped */
  };
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
@@@ -1767,6 -1767,7 +1768,7 @@@ extern vm_fault_t handle_mm_fault(struc
  extern int fixup_user_fault(struct mm_struct *mm,
    		    unsigned long address, unsigned int fault_flags,
    		    bool *unlocked);
+ void unmap_mapping_page(struct page *page);
  void unmap_mapping_pages(struct address_space *mapping,
    	pgoff_t start, pgoff_t nr, bool even_cows);
  void unmap_mapping_range(struct address_space *mapping,
@@@ -1787,6 -1788,7 +1789,7 @@@ static inline int fixup_user_fault(stru
    BUG();
    return -EFAULT;
  }
+ static inline void unmap_mapping_page(struct page *page) { }
  static inline void unmap_mapping_pages(struct address_space *mapping,
    	pgoff_t start, pgoff_t nr, bool even_cows) { }
  static inline void unmap_mapping_range(struct address_space *mapping,
diff --combined include/linux/mm_types.h
index ed6862eacb52,8f0fb62e8975..862f88a8c28a
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@@ -96,13 -96,6 +96,13 @@@ struct page 
    		unsigned long private;
    	};
    	struct {	/* page_pool used by netstack */
 +			/**
 +			 * @pp_magic: magic value to avoid recycling non
 +			 * page_pool allocated pages.
 +			 */
 +			unsigned long pp_magic;
 +			struct page_pool *pp;
 +			unsigned long _pp_mapping_pad;
    		/**
    		 * @dma_addr: might require a 64-bit value on
    		 * 32-bit architectures.
@@@ -452,13 -445,6 +452,6 @@@ struct mm_struct 
    	 */
    	atomic_t has_pinned;
- 		/**
- 		 * @write_protect_seq: Locked when any thread is write
- 		 * protecting pages mapped by this mm to enforce a later COW,
- 		 * for instance during page table copying for fork().
- 		 */
- 		seqcount_t write_protect_seq;
- 
  #ifdef CONFIG_MMU
    	atomic_long_t pgtables_bytes;	/* PTE page table pages */
  #endif
@@@ -467,6 -453,18 +460,18 @@@
    	spinlock_t page_table_lock; /* Protects page tables and some
    				     * counters
    				     */
+ 		/*
+ 		 * With some kernel config, the current mmap_lock's offset
+ 		 * inside 'mm_struct' is at 0x120, which is very optimal, as
+ 		 * its two hot fields 'count' and 'owner' sit in 2 different
+ 		 * cachelines,  and when mmap_lock is highly contended, both
+ 		 * of the 2 fields will be accessed frequently, current layout
+ 		 * will help to reduce cache bouncing.
+ 		 *
+ 		 * So please be careful with adding new fields before
+ 		 * mmap_lock, which can easily push the 2 fields into one
+ 		 * cacheline.
+ 		 */
    	struct rw_semaphore mmap_lock;
struct list_head mmlist; /* List of maybe swapped mm's.	These
@@@ -487,7 -485,15 +492,15 @@@
    	unsigned long stack_vm;	   /* VM_STACK */
    	unsigned long def_flags;
+ 		/**
+ 		 * @write_protect_seq: Locked when any thread is write
+ 		 * protecting pages mapped by this mm to enforce a later COW,
+ 		 * for instance during page table copying for fork().
+ 		 */
+ 		seqcount_t write_protect_seq;
+ 
    	spinlock_t arg_lock; /* protect the below fields */
+ 
    	unsigned long start_code, end_code, start_data, end_data;
    	unsigned long start_brk, brk, start_stack;
    	unsigned long arg_start, arg_end, env_start, env_end;
diff --combined include/linux/ptp_clock_kernel.h
index a311bddd9e85,51d7f1b8b32a..aba237c0b3a2
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@@ -186,32 -186,6 +186,32 @@@ struct ptp_clock_event 
    };
  };
+/**
 + * scaled_ppm_to_ppb() - convert scaled ppm to ppb
 + *
 + * @ppm:    Parts per million, but with a 16 bit binary fractional field
 + */
- static inline s32 scaled_ppm_to_ppb(long ppm)
++static inline long scaled_ppm_to_ppb(long ppm)
 +{
 +	/*
 +	 * The 'freq' field in the 'struct timex' is in parts per
 +	 * million, but with a 16 bit binary fractional field.
 +	 *
 +	 * We want to calculate
 +	 *
 +	 *    ppb = scaled_ppm * 1000 / 2^16
 +	 *
 +	 * which simplifies to
 +	 *
 +	 *    ppb = scaled_ppm * 125 / 2^13
 +	 */
 +	s64 ppb = 1 + ppm;
 +
 +	ppb *= 125;
 +	ppb >>= 13;
- 	return (s32)ppb;
++	return (long)ppb;
 +}
 +
  #if IS_REACHABLE(CONFIG_PTP_1588_CLOCK)
/**
@@@ -255,6 -229,14 +255,6 @@@ extern void ptp_clock_event(struct ptp_
extern int ptp_clock_index(struct ptp_clock *ptp);
-/**
 - * scaled_ppm_to_ppb() - convert scaled ppm to ppb
 - *
 - * @ppm:    Parts per million, but with a 16 bit binary fractional field
 - */
 -
 -extern long scaled_ppm_to_ppb(long ppm);
 -
  /**
   * ptp_find_pin() - obtain the pin index of a given auxiliary function
   *
diff --combined include/net/net_namespace.h
index befc5b93f311,bdc0459a595e..12cf6d7ea62c
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@@ -32,7 -32,6 +32,7 @@@
  #include <net/netns/mpls.h>
  #include <net/netns/can.h>
  #include <net/netns/xdp.h>
 +#include <net/netns/smc.h>
  #include <net/netns/bpf.h>
  #include <linux/ns_common.h>
  #include <linux/idr.h>
@@@ -171,9 -170,6 +171,9 @@@ struct net 
    struct sock		*crypto_nlsk;
  #endif
    struct sock		*diag_nlsk;
 +#if IS_ENABLED(CONFIG_SMC)
 +	struct netns_smc	smc;
 +#endif
  } __randomize_layout;
#include <linux/seq_file_net.h>
@@@ -188,6 -184,9 +188,9 @@@ struct net *copy_net_ns(unsigned long f
  void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid);
void net_ns_barrier(void);
+ 
+ struct ns_common *get_net_ns(struct ns_common *ns);
+ struct net *get_net_ns_by_fd(int fd);
  #else /* CONFIG_NET_NS */
  #include <linux/sched.h>
  #include <linux/nsproxy.h>
@@@ -207,13 -206,22 +210,22 @@@ static inline void net_ns_get_ownership
  }
static inline void net_ns_barrier(void) {}
+ 
+ static inline struct ns_common *get_net_ns(struct ns_common *ns)
+ {
+ 	return ERR_PTR(-EINVAL);
+ }
+ 
+ static inline struct net *get_net_ns_by_fd(int fd)
+ {
+ 	return ERR_PTR(-EINVAL);
+ }
  #endif /* CONFIG_NET_NS */
extern struct list_head net_namespace_list;
struct net *get_net_ns_by_pid(pid_t pid);
- struct net *get_net_ns_by_fd(int fd);
#ifdef CONFIG_SYSCTL
  void ipx_register_sysctl(void);
diff --combined include/net/sock.h
index 9b341c2c924f,7a7058f4f265..ced2fc965ec7
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@@ -1934,7 -1934,8 +1934,8 @@@ static inline u32 net_tx_rndhash(void
static inline void sk_set_txhash(struct sock *sk)
  {
- 	sk->sk_txhash = net_tx_rndhash();
+ 	/* This pairs with READ_ONCE() in skb_set_hash_from_sk() */
+ 	WRITE_ONCE(sk->sk_txhash, net_tx_rndhash());
  }
static inline bool sk_rethink_txhash(struct sock *sk)
@@@ -2206,9 -2207,12 +2207,12 @@@ static inline void sock_poll_wait(struc
static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
  {
- 	if (sk->sk_txhash) {
+ 	/* This pairs with WRITE_ONCE() in sk_set_txhash() */
+ 	u32 txhash = READ_ONCE(sk->sk_txhash);
+ 
+ 	if (txhash) {
    	skb->l4_hash = 1;
- 		skb->hash = sk->sk_txhash;
+ 		skb->hash = txhash;
    }
  }
@@@ -2266,8 -2270,13 +2270,13 @@@ struct sk_buff *sock_dequeue_err_skb(st
  static inline int sock_error(struct sock *sk)
  {
    int err;
- 	if (likely(!sk->sk_err))
+ 
+ 	/* Avoid an atomic operation for the common case.
+ 	 * This is racy since another cpu/thread can change sk_err under us.
+ 	 */
+ 	if (likely(data_race(!sk->sk_err)))
    	return 0;
+ 
    err = xchg(&sk->sk_err, 0);
    return -err;
  }
@@@ -2743,9 -2752,6 +2752,9 @@@ static inline bool sk_dev_equal_l3scope
  void sock_def_readable(struct sock *sk);
int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
 +void sock_set_timestamp(struct sock *sk, int optname, bool valbool);
 +int sock_set_timestamping(struct sock *sk, int optname, int val);
 +
  void sock_enable_timestamps(struct sock *sk);
  void sock_no_linger(struct sock *sk);
  void sock_set_keepalive(struct sock *sk);
diff --combined kernel/bpf/verifier.c
index b7d51fc937c7,c6a27574242d..e04e33893cff
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -47,7 -47,7 +47,7 @@@ static const struct bpf_verifier_ops * 
   * - unreachable insns exist (shouldn't be a forest. program = one function)
   * - out of bounds or malformed jumps
   * The second pass is all possible path descent from the 1st insn.
 - * Since it's analyzing all pathes through the program, the length of the
 + * Since it's analyzing all paths through the program, the length of the
   * analysis is limited to 64k insn, which may be hit even if total number of
   * insn is less then 4K, but there are too many branches that change stack/regs.
   * Number of 'branches to be analyzed' is limited to 1k
@@@ -132,7 -132,7 +132,7 @@@
   * If it's ok, then verifier allows this BPF_CALL insn and looks at
   * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
   * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 - * returns ether pointer to map value or NULL.
 + * returns either pointer to map value or NULL.
   *
   * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
   * insn, the register holding that pointer in the true branch changes state to
@@@ -737,104 -737,81 +737,104 @@@ static void print_verifier_state(struc
    verbose(env, "\n");
  }
-#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
 -static int copy_##NAME##_state(struct bpf_func_state *dst,		\
 -			       const struct bpf_func_state *src)	\
 -{									\
 -	if (!src->FIELD)						\
 -		return 0;						\
 -	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
 -		/* internal bug, make state invalid to reject the program */ \
 -		memset(dst, 0, sizeof(*dst));				\
 -		return -EFAULT;						\
 -	}								\
 -	memcpy(dst->FIELD, src->FIELD,					\
 -	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
 -	return 0;							\
 -}
 -/* copy_reference_state() */
 -COPY_STATE_FN(reference, acquired_refs, refs, 1)
 -/* copy_stack_state() */
 -COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 -#undef COPY_STATE_FN
 -
 -#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
 -static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
 -				  bool copy_old)			\
 -{									\
 -	u32 old_size = state->COUNT;					\
 -	struct bpf_##NAME##_state *new_##FIELD;				\
 -	int slot = size / SIZE;						\
 -									\
 -	if (size <= old_size || !size) {				\
 -		if (copy_old)						\
 -			return 0;					\
 -		state->COUNT = slot * SIZE;				\
 -		if (!size && old_size) {				\
 -			kfree(state->FIELD);				\
 -			state->FIELD = NULL;				\
 -		}							\
 -		return 0;						\
 -	}								\
 -	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
 -				    GFP_KERNEL);			\
 -	if (!new_##FIELD)						\
 -		return -ENOMEM;						\
 -	if (copy_old) {							\
 -		if (state->FIELD)					\
 -			memcpy(new_##FIELD, state->FIELD,		\
 -			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
 -		memset(new_##FIELD + old_size / SIZE, 0,		\
 -		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
 -	}								\
 -	state->COUNT = slot * SIZE;					\
 -	kfree(state->FIELD);						\
 -	state->FIELD = new_##FIELD;					\
 -	return 0;							\
 -}
 -/* realloc_reference_state() */
 -REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
 -/* realloc_stack_state() */
 -REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 -#undef REALLOC_STATE_FN
 -
 -/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 - * make it consume minimal amount of memory. check_stack_write() access from
 - * the program calls into realloc_func_state() to grow the stack size.
 - * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 - * which realloc_stack_state() copies over. It points to previous
 - * bpf_verifier_state which is never reallocated.
 +/* copy array src of length n * size bytes to dst. dst is reallocated if it's too
 + * small to hold src. This is different from krealloc since we don't want to preserve
 + * the contents of dst.
 + *
 + * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
 + * not be allocated.
   */
 -static int realloc_func_state(struct bpf_func_state *state, int stack_size,
 -			      int refs_size, bool copy_old)
 +static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
  {
 -	int err = realloc_reference_state(state, refs_size, copy_old);
 -	if (err)
 -		return err;
 -	return realloc_stack_state(state, stack_size, copy_old);
 +	size_t bytes;
 +
 +	if (ZERO_OR_NULL_PTR(src))
 +		goto out;
 +
 +	if (unlikely(check_mul_overflow(n, size, &bytes)))
 +		return NULL;
 +
 +	if (ksize(dst) < bytes) {
 +		kfree(dst);
 +		dst = kmalloc_track_caller(bytes, flags);
 +		if (!dst)
 +			return NULL;
 +	}
 +
 +	memcpy(dst, src, bytes);
 +out:
 +	return dst ? dst : ZERO_SIZE_PTR;
 +}
 +
 +/* resize an array from old_n items to new_n items. the array is reallocated if it's too
 + * small to hold new_n items. new items are zeroed out if the array grows.
 + *
 + * Contrary to krealloc_array, does not free arr if new_n is zero.
 + */
 +static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
 +{
 +	if (!new_n || old_n == new_n)
 +		goto out;
 +
 +	arr = krealloc_array(arr, new_n, size, GFP_KERNEL);
 +	if (!arr)
 +		return NULL;
 +
 +	if (new_n > old_n)
 +		memset(arr + old_n * size, 0, (new_n - old_n) * size);
 +
 +out:
 +	return arr ? arr : ZERO_SIZE_PTR;
 +}
 +
 +static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 +{
 +	dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
 +			       sizeof(struct bpf_reference_state), GFP_KERNEL);
 +	if (!dst->refs)
 +		return -ENOMEM;
 +
 +	dst->acquired_refs = src->acquired_refs;
 +	return 0;
 +}
 +
 +static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
 +{
 +	size_t n = src->allocated_stack / BPF_REG_SIZE;
 +
 +	dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
 +				GFP_KERNEL);
 +	if (!dst->stack)
 +		return -ENOMEM;
 +
 +	dst->allocated_stack = src->allocated_stack;
 +	return 0;
 +}
 +
 +static int resize_reference_state(struct bpf_func_state *state, size_t n)
 +{
 +	state->refs = realloc_array(state->refs, state->acquired_refs, n,
 +				    sizeof(struct bpf_reference_state));
 +	if (!state->refs)
 +		return -ENOMEM;
 +
 +	state->acquired_refs = n;
 +	return 0;
 +}
 +
 +static int grow_stack_state(struct bpf_func_state *state, int size)
 +{
 +	size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
 +
 +	if (old_n >= n)
 +		return 0;
 +
 +	state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
 +	if (!state->stack)
 +		return -ENOMEM;
 +
 +	state->allocated_stack = size;
 +	return 0;
  }
/* Acquire a pointer id from the env and update the state->refs to include
@@@ -848,7 -825,7 +848,7 @@@ static int acquire_reference_state(stru
    int new_ofs = state->acquired_refs;
    int id, err;
-	err = realloc_reference_state(state, state->acquired_refs + 1, true);
 +	err = resize_reference_state(state, state->acquired_refs + 1);
    if (err)
    	return err;
    id = ++env->id_gen;
@@@ -877,6 -854,18 +877,6 @@@ static int release_reference_state(stru
    return -EINVAL;
  }
-static int transfer_reference_state(struct bpf_func_state *dst,
 -				    struct bpf_func_state *src)
 -{
 -	int err = realloc_reference_state(dst, src->acquired_refs, false);
 -	if (err)
 -		return err;
 -	err = copy_reference_state(dst, src);
 -	if (err)
 -		return err;
 -	return 0;
 -}
 -
  static void free_func_state(struct bpf_func_state *state)
  {
    if (!state)
@@@ -915,6 -904,10 +915,6 @@@ static int copy_func_state(struct bpf_f
  {
    int err;
-	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
 -				 false);
 -	if (err)
 -		return err;
    memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
    err = copy_reference_state(dst, src);
    if (err)
@@@ -926,13 -919,16 +926,13 @@@ static int copy_verifier_state(struct b
    		       const struct bpf_verifier_state *src)
  {
    struct bpf_func_state *dst;
 -	u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
    int i, err;
-	if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
 -		kfree(dst_state->jmp_history);
 -		dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
 -		if (!dst_state->jmp_history)
 -			return -ENOMEM;
 -	}
 -	memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
 +	dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
 +					    src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
 +					    GFP_USER);
 +	if (!dst_state->jmp_history)
 +		return -ENOMEM;
    dst_state->jmp_history_cnt = src->jmp_history_cnt;
/* if dst has more stack frames then src frame, free them */
@@@ -2594,7 -2590,8 +2594,7 @@@ static int check_stack_write_fixed_off(
    u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
    struct bpf_reg_state *reg = NULL;
-	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
 -				 state->acquired_refs, true);
 +	err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE));
    if (err)
    	return err;
    /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@@ -2616,7 -2613,7 +2616,7 @@@
    	if (dst_reg != BPF_REG_FP) {
    		/* The backtracking logic can only recognize explicit
    		 * stack slot address like [fp - 8]. Other spill of
 -			 * scalar via different register has to be conervative.
 +			 * scalar via different register has to be conservative.
    		 * Backtrack from here and mark all registers as precise
    		 * that contributed into 'reg' being a constant.
    		 */
@@@ -2756,7 -2753,8 +2756,7 @@@ static int check_stack_write_var_off(st
    if (value_reg && register_is_null(value_reg))
    	writing_zero = true;
-	err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE),
 -				 state->acquired_refs, true);
 +	err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE));
    if (err)
    	return err;
@@@ -5631,7 -5629,7 +5631,7 @@@ static int __check_func_call(struct bpf
    		subprog /* subprog number within this prog */);
/* Transfer references to the callee */
 -	err = transfer_reference_state(callee, caller);
 +	err = copy_reference_state(callee, caller);
    if (err)
    	return err;
@@@ -5782,7 -5780,7 +5782,7 @@@ static int prepare_func_exit(struct bpf
    }
/* Transfer references to the caller */
 -	err = transfer_reference_state(caller, callee);
 +	err = copy_reference_state(caller, callee);
    if (err)
    	return err;
@@@ -6485,6 -6483,27 +6485,27 @@@ struct bpf_sanitize_info 
    bool mask_to_left;
  };
+ static struct bpf_verifier_state *
+ sanitize_speculative_path(struct bpf_verifier_env *env,
+ 			  const struct bpf_insn *insn,
+ 			  u32 next_idx, u32 curr_idx)
+ {
+ 	struct bpf_verifier_state *branch;
+ 	struct bpf_reg_state *regs;
+ 
+ 	branch = push_stack(env, next_idx, curr_idx, true);
+ 	if (branch && insn) {
+ 		regs = branch->frame[branch->curframe]->regs;
+ 		if (BPF_SRC(insn->code) == BPF_K) {
+ 			mark_reg_unknown(env, regs, insn->dst_reg);
+ 		} else if (BPF_SRC(insn->code) == BPF_X) {
+ 			mark_reg_unknown(env, regs, insn->dst_reg);
+ 			mark_reg_unknown(env, regs, insn->src_reg);
+ 		}
+ 	}
+ 	return branch;
+ }
+ 
  static int sanitize_ptr_alu(struct bpf_verifier_env *env,
    		    struct bpf_insn *insn,
    		    const struct bpf_reg_state *ptr_reg,
@@@ -6568,12 -6587,26 +6589,26 @@@ do_sim
    	tmp = *dst_reg;
    	*dst_reg = *ptr_reg;
    }
- 	ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true);
+ 	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
+ 					env->insn_idx);
    if (!ptr_is_dst_reg && ret)
    	*dst_reg = tmp;
    return !ret ? REASON_STACK : 0;
  }
+ static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
+ {
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 
+ 	/* If we simulate paths under speculation, we don't update the
+ 	 * insn as 'seen' such that when we verify unreachable paths in
+ 	 * the non-speculative domain, sanitize_dead_code() can still
+ 	 * rewrite/sanitize them.
+ 	 */
+ 	if (!vstate->speculative)
+ 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+ }
+ 
  static int sanitize_err(struct bpf_verifier_env *env,
    		const struct bpf_insn *insn, int reason,
    		const struct bpf_reg_state *off_reg,
@@@ -8752,14 -8785,28 +8787,28 @@@ static int check_cond_jmp_op(struct bpf
    	if (err)
    		return err;
    }
+ 
    if (pred == 1) {
- 		/* only follow the goto, ignore fall-through */
+ 		/* Only follow the goto, ignore fall-through. If needed, push
+ 		 * the fall-through branch for simulation under speculative
+ 		 * execution.
+ 		 */
+ 		if (!env->bypass_spec_v1 &&
+ 		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
+ 					       *insn_idx))
+ 			return -EFAULT;
    	*insn_idx += insn->off;
    	return 0;
    } else if (pred == 0) {
- 		/* only follow fall-through branch, since
- 		 * that's where the program will go
+ 		/* Only follow the fall-through branch, since that's where the
+ 		 * program will go. If needed, push the goto branch for
+ 		 * simulation under speculative execution.
    	 */
+ 		if (!env->bypass_spec_v1 &&
+ 		    !sanitize_speculative_path(env, insn,
+ 					       *insn_idx + insn->off + 1,
+ 					       *insn_idx))
+ 			return -EFAULT;
    	return 0;
    }
@@@ -8921,14 -8968,12 +8970,14 @@@ static int check_ld_imm(struct bpf_veri
    mark_reg_known_zero(env, regs, insn->dst_reg);
    dst_reg->map_ptr = map;
-	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
 +	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
 +	    insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
    	dst_reg->type = PTR_TO_MAP_VALUE;
    	dst_reg->off = aux->map_off;
    	if (map_value_has_spin_lock(map))
    		dst_reg->id = ++env->id_gen;
 -	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
 +	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
 +		   insn->src_reg == BPF_PSEUDO_MAP_IDX) {
    	dst_reg->type = CONST_PTR_TO_MAP;
    } else {
    	verbose(env, "bpf verifier is misconfigured\n");
@@@ -9059,7 -9104,7 +9108,7 @@@ static int check_return_code(struct bpf
        !prog->aux->attach_func_proto->type)
    	return 0;
-	/* eBPF calling convetion is such that R0 is used
 +	/* eBPF calling convention is such that R0 is used
     * to return the value from eBPF program.
     * Make sure that it's readable at this time
     * of bpf_exit, which means that program wrote
@@@ -9444,7 -9489,7 +9493,7 @@@ static int check_abnormal_return(struc
static int check_btf_func(struct bpf_verifier_env *env,
    		  const union bpf_attr *attr,
 -			  union bpf_attr __user *uattr)
 +			  bpfptr_t uattr)
  {
    const struct btf_type *type, *func_proto, *ret_type;
    u32 i, nfuncs, urec_size, min_size;
@@@ -9453,7 -9498,7 +9502,7 @@@
    struct bpf_func_info_aux *info_aux = NULL;
    struct bpf_prog *prog;
    const struct btf *btf;
 -	void __user *urecord;
 +	bpfptr_t urecord;
    u32 prev_offset = 0;
    bool scalar_return;
    int ret = -ENOMEM;
@@@ -9481,7 -9526,7 +9530,7 @@@
    prog = env->prog;
    btf = prog->aux->btf;
-	urecord = u64_to_user_ptr(attr->func_info);
 +	urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
    min_size = min_t(u32, krec_size, urec_size);
krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
@@@ -9499,15 -9544,13 +9548,15 @@@
    			/* set the size kernel expects so loader can zero
    			 * out the rest of the record.
    			 */
 -				if (put_user(min_size, &uattr->func_info_rec_size))
 +				if (copy_to_bpfptr_offset(uattr,
 +							  offsetof(union bpf_attr, func_info_rec_size),
 +							  &min_size, sizeof(min_size)))
    				ret = -EFAULT;
    		}
    		goto err_free;
    	}
-		if (copy_from_user(&krecord[i], urecord, min_size)) {
 +		if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
    		ret = -EFAULT;
    		goto err_free;
    	}
@@@ -9559,7 -9602,7 +9608,7 @@@
    	}
prev_offset = krecord[i].insn_off;
 -		urecord += urec_size;
 +		bpfptr_add(&urecord, urec_size);
    }
prog->aux->func_info = krecord;
@@@ -9591,14 -9634,14 +9640,14 @@@ static void adjust_btf_func(struct bpf_
static int check_btf_line(struct bpf_verifier_env *env,
    		  const union bpf_attr *attr,
 -			  union bpf_attr __user *uattr)
 +			  bpfptr_t uattr)
  {
    u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
    struct bpf_subprog_info *sub;
    struct bpf_line_info *linfo;
    struct bpf_prog *prog;
    const struct btf *btf;
 -	void __user *ulinfo;
 +	bpfptr_t ulinfo;
    int err;
nr_linfo = attr->line_info_cnt;
@@@ -9624,7 -9667,7 +9673,7 @@@
s = 0;
    sub = env->subprog_info;
 -	ulinfo = u64_to_user_ptr(attr->line_info);
 +	ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
    expected_size = sizeof(struct bpf_line_info);
    ncopy = min_t(u32, expected_size, rec_size);
    for (i = 0; i < nr_linfo; i++) {
@@@ -9632,15 -9675,14 +9681,15 @@@
    	if (err) {
    		if (err == -E2BIG) {
    			verbose(env, "nonzero tailing record in line_info");
 -				if (put_user(expected_size,
 -					     &uattr->line_info_rec_size))
 +				if (copy_to_bpfptr_offset(uattr,
 +							  offsetof(union bpf_attr, line_info_rec_size),
 +							  &expected_size, sizeof(expected_size)))
    				err = -EFAULT;
    		}
    		goto err_free;
    	}
-		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
 +		if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
    		err = -EFAULT;
    		goto err_free;
    	}
@@@ -9692,7 -9734,7 +9741,7 @@@
    	}
prev_offset = linfo[i].insn_off;
 -		ulinfo += rec_size;
 +		bpfptr_add(&ulinfo, rec_size);
    }
if (s != env->subprog_cnt) {
@@@ -9714,7 -9756,7 +9763,7 @@@ err_free
static int check_btf_info(struct bpf_verifier_env *env,
    		  const union bpf_attr *attr,
 -			  union bpf_attr __user *uattr)
 +			  bpfptr_t uattr)
  {
    struct btf *btf;
    int err;
@@@ -9759,6 -9801,13 +9808,6 @@@ static bool range_within(struct bpf_reg
           old->s32_max_value >= cur->s32_max_value;
  }
-/* Maximum number of register states that can exist at once */
 -#define ID_MAP_SIZE	(MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
 -struct idpair {
 -	u32 old;
 -	u32 cur;
 -};
 -
  /* If in the old state two registers had the same id, then they need to have
   * the same id in the new state as well.  But that id could be different from
   * the old state, so we need to track the mapping from old to new ids.
@@@ -9769,11 -9818,11 +9818,11 @@@
   * So we look through our idmap to see if this old id has been seen before.  If
   * so, we require the new id to match; otherwise, we add the id pair to the map.
   */
 -static bool check_ids(u32 old_id, u32 cur_id, struct idpair *idmap)
 +static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
  {
    unsigned int i;
-	for (i = 0; i < ID_MAP_SIZE; i++) {
 +	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
    	if (!idmap[i].old) {
    		/* Reached an empty slot; haven't seen this id before */
    		idmap[i].old = old_id;
@@@ -9850,7 -9899,7 +9899,7 @@@ static void clean_verifier_state(struc
   * Since the verifier pushes the branch states as it sees them while exploring
   * the program the condition of walking the branch instruction for the second
   * time means that all states below this branch were already explored and
 - * their final liveness markes are already propagated.
 + * their final liveness marks are already propagated.
   * Hence when the verifier completes the search of state list in is_state_visited()
   * we can call this clean_live_states() function to mark all liveness states
   * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
@@@ -9886,7 -9935,7 +9935,7 @@@ next
/* Returns true if (rold safe implies rcur safe) */
  static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
 -		    struct idpair *idmap)
 +		    struct bpf_id_pair *idmap)
  {
    bool equal;
@@@ -10004,7 -10053,7 +10053,7 @@@
static bool stacksafe(struct bpf_func_state *old,
    	      struct bpf_func_state *cur,
 -		      struct idpair *idmap)
 +		      struct bpf_id_pair *idmap)
  {
    int i, spi;
@@@ -10101,23 -10150,32 +10150,23 @@@ static bool refsafe(struct bpf_func_sta
   * whereas register type in current state is meaningful, it means that
   * the current state will reach 'bpf_exit' instruction safely
   */
 -static bool func_states_equal(struct bpf_func_state *old,
 +static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
    		      struct bpf_func_state *cur)
  {
 -	struct idpair *idmap;
 -	bool ret = false;
    int i;
-	idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL);
 -	/* If we failed to allocate the idmap, just say it's not safe */
 -	if (!idmap)
 -		return false;
 -
 -	for (i = 0; i < MAX_BPF_REG; i++) {
 -		if (!regsafe(&old->regs[i], &cur->regs[i], idmap))
 -			goto out_free;
 -	}
 +	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
 +	for (i = 0; i < MAX_BPF_REG; i++)
 +		if (!regsafe(&old->regs[i], &cur->regs[i], env->idmap_scratch))
 +			return false;
-	if (!stacksafe(old, cur, idmap))
 -		goto out_free;
 +	if (!stacksafe(old, cur, env->idmap_scratch))
 +		return false;
if (!refsafe(old, cur))
 -		goto out_free;
 -	ret = true;
 -out_free:
 -	kfree(idmap);
 -	return ret;
 +		return false;
 +
 +	return true;
  }
static bool states_equal(struct bpf_verifier_env *env,
@@@ -10144,7 -10202,7 +10193,7 @@@
    for (i = 0; i <= old->curframe; i++) {
    	if (old->frame[i]->callsite != cur->frame[i]->callsite)
    		return false;
 -		if (!func_states_equal(old->frame[i], cur->frame[i]))
 +		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
    		return false;
    }
    return true;
@@@ -10621,7 -10679,7 +10670,7 @@@ static int do_check(struct bpf_verifier
    	}
regs = cur_regs(env);
- 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+ 		sanitize_mark_insn_seen(env);
    	prev_insn_idx = env->insn_idx;
if (class == BPF_ALU || class == BPF_ALU64) {
@@@ -10848,7 -10906,7 +10897,7 @@@ process_bpf_exit
    				return err;
env->insn_idx++;
- 				env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
+ 				sanitize_mark_insn_seen(env);
    		} else {
    			verbose(env, "invalid BPF_LD mode\n");
    			return -EINVAL;
@@@ -11181,7 -11239,6 +11230,7 @@@ static int resolve_pseudo_ldimm64(struc
    		struct bpf_map *map;
    		struct fd f;
    		u64 addr;
 +			u32 fd;
if (i == insn_cnt - 1 || insn[1].code != 0 ||
    		    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@@ -11211,38 -11268,16 +11260,38 @@@
    		/* In final convert_pseudo_ld_imm64() step, this is
    		 * converted into regular 64-bit imm load insn.
    		 */
 -			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
 -			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
 -			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
 -			     insn[1].imm != 0)) {
 -				verbose(env,
 -					"unrecognized bpf_ld_imm64 insn\n");
 +			switch (insn[0].src_reg) {
 +			case BPF_PSEUDO_MAP_VALUE:
 +			case BPF_PSEUDO_MAP_IDX_VALUE:
 +				break;
 +			case BPF_PSEUDO_MAP_FD:
 +			case BPF_PSEUDO_MAP_IDX:
 +				if (insn[1].imm == 0)
 +					break;
 +				fallthrough;
 +			default:
 +				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
    			return -EINVAL;
    		}
-			f = fdget(insn[0].imm);
 +			switch (insn[0].src_reg) {
 +			case BPF_PSEUDO_MAP_IDX_VALUE:
 +			case BPF_PSEUDO_MAP_IDX:
 +				if (bpfptr_is_null(env->fd_array)) {
 +					verbose(env, "fd_idx without fd_array is invalid\n");
 +					return -EPROTO;
 +				}
 +				if (copy_from_bpfptr_offset(&fd, env->fd_array,
 +							    insn[0].imm * sizeof(fd),
 +							    sizeof(fd)))
 +					return -EFAULT;
 +				break;
 +			default:
 +				fd = insn[0].imm;
 +				break;
 +			}
 +
 +			f = fdget(fd);
    		map = __bpf_map_get(f);
    		if (IS_ERR(map)) {
    			verbose(env, "fd %d is not pointing to valid bpf_map\n",
@@@ -11257,8 -11292,7 +11306,8 @@@
    		}
aux = &env->insn_aux_data[i];
 -			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
 +			if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
 +			    insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
    			addr = (unsigned long)map;
    		} else {
    			u32 off = insn[1].imm;
@@@ -11381,6 -11415,7 +11430,7 @@@ static int adjust_insn_aux_data(struct 
  {
    struct bpf_insn_aux_data *new_data, *old_data = env->insn_aux_data;
    struct bpf_insn *insn = new_prog->insnsi;
+ 	u32 old_seen = old_data[off].seen;
    u32 prog_len;
    int i;
@@@ -11401,7 -11436,8 +11451,8 @@@
    memcpy(new_data + off + cnt - 1, old_data + off,
           sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
    for (i = off; i < off + cnt - 1; i++) {
- 		new_data[i].seen = env->pass_cnt;
+ 		/* Expand insni[off]'s seen count to the patched range. */
+ 		new_data[i].seen = old_seen;
    	new_data[i].zext_dst = insn_has_def32(env, insn + i);
    }
    env->insn_aux_data = new_data;
@@@ -12470,7 -12506,7 +12521,7 @@@ static int do_misc_fixups(struct bpf_ve
    		prog->aux->max_pkt_offset = MAX_PACKET_OFF;
/* mark bpf_tail_call as different opcode to avoid
 -			 * conditional branch in the interpeter for every normal
 +			 * conditional branch in the interpreter for every normal
    		 * call and to prevent accidental JITing by JIT compiler
    		 * that doesn't support bpf_tail_call yet
    		 */
@@@ -12725,6 -12761,9 +12776,9 @@@ static void free_states(struct bpf_veri
   * insn_aux_data was touched. These variables are compared to clear temporary
   * data from failed pass. For testing and experiments do_check_common() can be
   * run multiple times even when prior attempt to verify is unsuccessful.
+  *
+  * Note that special handling is needed on !env->bypass_spec_v1 if this is
+  * ever called outside of error path with subsequent program rejection.
   */
  static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
  {
@@@ -13242,14 -13281,6 +13296,14 @@@ static int check_attach_btf_id(struct b
    int ret;
    u64 key;
+	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
 +		if (prog->aux->sleepable)
 +			/* attach_btf_id checked to be zero already */
 +			return 0;
 +		verbose(env, "Syscall programs can only be sleepable\n");
 +		return -EINVAL;
 +	}
 +
    if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
        prog->type != BPF_PROG_TYPE_LSM) {
    	verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
@@@ -13324,7 -13355,8 +13378,7 @@@ struct btf *bpf_get_btf_vmlinux(void
    return btf_vmlinux;
  }
-int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 -	      union bpf_attr __user *uattr)
 +int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
  {
    u64 start_time = ktime_get_ns();
    struct bpf_verifier_env *env;
@@@ -13354,7 -13386,6 +13408,7 @@@
    	env->insn_aux_data[i].orig_idx = i;
    env->prog = *prog;
    env->ops = bpf_verifier_ops[env->prog->type];
 +	env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
    is_priv = bpf_capable();
bpf_get_btf_vmlinux();
diff --combined net/batman-adv/bat_iv_ogm.c
index 680def809838,fc8be49010b9..12022378f892
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@@ -409,8 -409,10 +409,10 @@@ static void batadv_iv_ogm_emit(struct b
    if (WARN_ON(!forw_packet->if_outgoing))
    	return;
- 	if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface))
+ 	if (forw_packet->if_outgoing->soft_iface != soft_iface) {
+ 		pr_warn("%s: soft interface switch for queued OGM\n", __func__);
    	return;
+ 	}
if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE)
    	return;
@@@ -1849,8 -1851,6 +1851,8 @@@ batadv_iv_ogm_orig_dump_subentry(struc
    	    orig_node->orig) ||
        nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
    	    neigh_node->addr) ||
 +	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
 +			   neigh_node->if_incoming->net_dev->name) ||
        nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
    		neigh_node->if_incoming->net_dev->ifindex) ||
        nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) ||
@@@ -2080,8 -2080,6 +2082,8 @@@ batadv_iv_ogm_neigh_dump_neigh(struct s
if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN,
    	    hardif_neigh->addr) ||
 +	    nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
 +			   hardif_neigh->if_incoming->net_dev->name) ||
        nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
    		hardif_neigh->if_incoming->net_dev->ifindex) ||
        nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS,
@@@ -2463,8 -2461,6 +2465,8 @@@ static int batadv_iv_gw_dump_entry(stru
    	    router->addr) ||
        nla_put_string(msg, BATADV_ATTR_HARD_IFNAME,
    		   router->if_incoming->net_dev->name) ||
 +	    nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX,
 +			router->if_incoming->net_dev->ifindex) ||
        nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN,
    		gw_node->bandwidth_down) ||
        nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP,
diff --combined net/bluetooth/smp.c
index 93144e0c7efa,7dd51da73845..4d93c6c32a71
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@@ -40,7 -40,7 +40,7 @@@
    ((struct smp_dev *)((struct l2cap_chan *)((hdev)->smp_data))->data)
/* Low-level debug macros to be used for stuff that we don't want
 - * accidentially in dmesg, i.e. the values of the various crypto keys
 + * accidentally in dmesg, i.e. the values of the various crypto keys
   * and the inputs & outputs of crypto functions.
   */
  #ifdef DEBUG
@@@ -560,7 -560,7 +560,7 @@@ int smp_generate_oob(struct hci_dev *hd
    			return err;
/* This is unlikely, but we need to check that
 -			 * we didn't accidentially generate a debug key.
 +			 * we didn't accidentally generate a debug key.
    		 */
    		if (crypto_memneq(smp->local_pk, debug_pk, 64))
    			break;
@@@ -1902,7 -1902,7 +1902,7 @@@ static u8 sc_send_public_key(struct smp
    			return SMP_UNSPECIFIED;
/* This is unlikely, but we need to check that
 -			 * we didn't accidentially generate a debug key.
 +			 * we didn't accidentally generate a debug key.
    		 */
    		if (crypto_memneq(smp->local_pk, debug_pk, 64))
    			break;
@@@ -3229,7 -3229,7 +3229,7 @@@ static inline struct l2cap_chan *smp_ne
  {
    struct l2cap_chan *chan;
- 	bt_dev_dbg(pchan->conn->hcon->hdev, "pchan %p", pchan);
+ 	BT_DBG("pchan %p", pchan);
chan = l2cap_chan_create();
    if (!chan)
@@@ -3250,7 -3250,7 +3250,7 @@@
     */
    atomic_set(&chan->nesting, L2CAP_NESTING_SMP);
- 	bt_dev_dbg(pchan->conn->hcon->hdev, "created chan %p", chan);
+ 	BT_DBG("created chan %p", chan);
return chan;
  }
@@@ -3354,7 -3354,7 +3354,7 @@@ static void smp_del_chan(struct l2cap_c
  {
    struct smp_dev *smp;
- 	bt_dev_dbg(chan->conn->hcon->hdev, "chan %p", chan);
+ 	BT_DBG("chan %p", chan);
smp = chan->data;
    if (smp) {
diff --combined net/bridge/br_private.h
index ec661130c2d0,e013d33f1c7c..a684d0cfc58c
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@@ -90,8 -90,8 +90,8 @@@ struct bridge_mcast_stats 
  #endif
struct br_tunnel_info {
- 	__be64			tunnel_id;
- 	struct metadata_dst	*tunnel_dst;
+ 	__be64				tunnel_id;
+ 	struct metadata_dst __rcu	*tunnel_dst;
  };
/* private vlan flags */
@@@ -307,18 -307,16 +307,18 @@@ struct net_bridge_port
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
    struct bridge_mcast_own_query	ip4_own_query;
 +	struct timer_list		ip4_mc_router_timer;
 +	struct hlist_node		ip4_rlist;
  #if IS_ENABLED(CONFIG_IPV6)
    struct bridge_mcast_own_query	ip6_own_query;
 +	struct timer_list		ip6_mc_router_timer;
 +	struct hlist_node		ip6_rlist;
  #endif /* IS_ENABLED(CONFIG_IPV6) */
    u32				multicast_eht_hosts_limit;
    u32				multicast_eht_hosts_cnt;
    unsigned char			multicast_router;
    struct bridge_mcast_stats	__percpu *mcast_stats;
 -	struct timer_list		multicast_router_timer;
    struct hlist_head		mglist;
 -	struct hlist_node		rlist;
  #endif
#ifdef CONFIG_SYSFS
@@@ -451,16 -449,14 +451,16 @@@ struct net_bridge
struct hlist_head		mcast_gc_list;
    struct hlist_head		mdb_list;
 -	struct hlist_head		router_list;
-	struct timer_list		multicast_router_timer;
 +	struct hlist_head		ip4_mc_router_list;
 +	struct timer_list		ip4_mc_router_timer;
    struct bridge_mcast_other_query	ip4_other_query;
    struct bridge_mcast_own_query	ip4_own_query;
    struct bridge_mcast_querier	ip4_querier;
    struct bridge_mcast_stats	__percpu *mcast_stats;
  #if IS_ENABLED(CONFIG_IPV6)
 +	struct hlist_head		ip6_mc_router_list;
 +	struct timer_list		ip6_mc_router_timer;
    struct bridge_mcast_other_query	ip6_other_query;
    struct bridge_mcast_own_query	ip6_own_query;
    struct bridge_mcast_querier	ip6_querier;
@@@ -868,58 -864,11 +868,58 @@@ static inline bool br_group_is_l2(cons
  #define mlock_dereference(X, br) \
    rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
-static inline bool br_multicast_is_router(struct net_bridge *br)
 +static inline struct hlist_node *
 +br_multicast_get_first_rport_node(struct net_bridge *b, struct sk_buff *skb) {
 +#if IS_ENABLED(CONFIG_IPV6)
 +	if (skb->protocol == htons(ETH_P_IPV6))
 +		return rcu_dereference(hlist_first_rcu(&b->ip6_mc_router_list));
 +#endif
 +	return rcu_dereference(hlist_first_rcu(&b->ip4_mc_router_list));
 +}
 +
 +static inline struct net_bridge_port *
 +br_multicast_rport_from_node_skb(struct hlist_node *rp, struct sk_buff *skb) {
 +#if IS_ENABLED(CONFIG_IPV6)
 +	if (skb->protocol == htons(ETH_P_IPV6))
 +		return hlist_entry_safe(rp, struct net_bridge_port, ip6_rlist);
 +#endif
 +	return hlist_entry_safe(rp, struct net_bridge_port, ip4_rlist);
 +}
 +
 +static inline bool br_ip4_multicast_is_router(struct net_bridge *br)
 +{
 +	return timer_pending(&br->ip4_mc_router_timer);
 +}
 +
 +static inline bool br_ip6_multicast_is_router(struct net_bridge *br)
  {
 -	return br->multicast_router == 2 ||
 -	       (br->multicast_router == 1 &&
 -		timer_pending(&br->multicast_router_timer));
 +#if IS_ENABLED(CONFIG_IPV6)
 +	return timer_pending(&br->ip6_mc_router_timer);
 +#else
 +	return false;
 +#endif
 +}
 +
 +static inline bool
 +br_multicast_is_router(struct net_bridge *br, struct sk_buff *skb)
 +{
 +	switch (br->multicast_router) {
 +	case MDB_RTR_TYPE_PERM:
 +		return true;
 +	case MDB_RTR_TYPE_TEMP_QUERY:
 +		if (skb) {
 +			if (skb->protocol == htons(ETH_P_IP))
 +				return br_ip4_multicast_is_router(br);
 +			else if (skb->protocol == htons(ETH_P_IPV6))
 +				return br_ip6_multicast_is_router(br);
 +		} else {
 +			return br_ip4_multicast_is_router(br) ||
 +			       br_ip6_multicast_is_router(br);
 +		}
 +		fallthrough;
 +	default:
 +		return false;
 +	}
  }
static inline bool
@@@ -1068,8 -1017,7 +1068,8 @@@ static inline void br_multicast_flood(s
  {
  }
-static inline bool br_multicast_is_router(struct net_bridge *br)
 +static inline bool br_multicast_is_router(struct net_bridge *br,
 +					  struct sk_buff *skb)
  {
    return false;
  }
diff --combined net/can/isotp.c
index f995eaef5d7b,be6183f8ca11..bd49299319a1
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@@ -143,10 -143,14 +143,14 @@@ struct isotp_sock 
    u32 force_tx_stmin;
    u32 force_rx_stmin;
    struct tpcon rx, tx;
- 	struct notifier_block notifier;
+ 	struct list_head notifier;
    wait_queue_head_t wait;
  };
+ static LIST_HEAD(isotp_notifier_list);
+ static DEFINE_SPINLOCK(isotp_notifier_lock);
+ static struct isotp_sock *isotp_busy_notifier;
+ 
  static inline struct isotp_sock *isotp_sk(const struct sock *sk)
  {
    return (struct isotp_sock *)sk;
@@@ -221,8 -225,8 +225,8 @@@ static int isotp_send_fc(struct sock *s
can_send_ret = can_send(nskb, 1);
    if (can_send_ret)
 -		pr_notice_once("can-isotp: %s: can_send_ret %d\n",
 -			       __func__, can_send_ret);
 +		pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
 +			       __func__, ERR_PTR(can_send_ret));
dev_put(dev);
@@@ -797,12 -801,10 +801,12 @@@ isotp_tx_burst
    	can_skb_set_owner(skb, sk);
can_send_ret = can_send(skb, 1);
 -		if (can_send_ret)
 -			pr_notice_once("can-isotp: %s: can_send_ret %d\n",
 -				       __func__, can_send_ret);
 -
 +		if (can_send_ret) {
 +			pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
 +				       __func__, ERR_PTR(can_send_ret));
 +			if (can_send_ret == -ENOBUFS)
 +				pr_notice_once("can-isotp: tx queue is full, increasing txqueuelen may prevent this error\n");
 +		}
    	if (so->tx.idx >= so->tx.len) {
    		/* we are done */
    		so->tx.state = ISOTP_IDLE;
@@@ -948,8 -950,8 +952,8 @@@ static int isotp_sendmsg(struct socket 
    err = can_send(skb, 1);
    dev_put(dev);
    if (err) {
 -		pr_notice_once("can-isotp: %s: can_send_ret %d\n",
 -			       __func__, err);
 +		pr_notice_once("can-isotp: %s: can_send_ret %pe\n",
 +			       __func__, ERR_PTR(err));
    	return err;
    }
@@@ -1015,7 -1017,14 +1019,14 @@@ static int isotp_release(struct socket 
    /* wait for complete transmission of current pdu */
    wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
- 	unregister_netdevice_notifier(&so->notifier);
+ 	spin_lock(&isotp_notifier_lock);
+ 	while (isotp_busy_notifier == so) {
+ 		spin_unlock(&isotp_notifier_lock);
+ 		schedule_timeout_uninterruptible(1);
+ 		spin_lock(&isotp_notifier_lock);
+ 	}
+ 	list_del(&so->notifier);
+ 	spin_unlock(&isotp_notifier_lock);
lock_sock(sk);
@@@ -1319,21 -1328,16 +1330,16 @@@ static int isotp_getsockopt(struct sock
    return 0;
  }
- static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
- 			  void *ptr)
+ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
+ 			 struct net_device *dev)
  {
- 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- 	struct isotp_sock *so = container_of(nb, struct isotp_sock, notifier);
    struct sock *sk = &so->sk;
if (!net_eq(dev_net(dev), sock_net(sk)))
- 		return NOTIFY_DONE;
- 
- 	if (dev->type != ARPHRD_CAN)
- 		return NOTIFY_DONE;
+ 		return;
if (so->ifindex != dev->ifindex)
- 		return NOTIFY_DONE;
+ 		return;
switch (msg) {
    case NETDEV_UNREGISTER:
@@@ -1359,7 -1363,28 +1365,28 @@@
    		sk->sk_error_report(sk);
    	break;
    }
+ }
+ static int isotp_notifier(struct notifier_block *nb, unsigned long msg,
+ 			  void *ptr)
+ {
+ 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ 
+ 	if (dev->type != ARPHRD_CAN)
+ 		return NOTIFY_DONE;
+ 	if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN)
+ 		return NOTIFY_DONE;
+ 	if (unlikely(isotp_busy_notifier)) /* Check for reentrant bug. */
+ 		return NOTIFY_DONE;
+ 
+ 	spin_lock(&isotp_notifier_lock);
+ 	list_for_each_entry(isotp_busy_notifier, &isotp_notifier_list, notifier) {
+ 		spin_unlock(&isotp_notifier_lock);
+ 		isotp_notify(isotp_busy_notifier, msg, dev);
+ 		spin_lock(&isotp_notifier_lock);
+ 	}
+ 	isotp_busy_notifier = NULL;
+ 	spin_unlock(&isotp_notifier_lock);
    return NOTIFY_DONE;
  }
@@@ -1396,8 -1421,9 +1423,9 @@@ static int isotp_init(struct sock *sk
init_waitqueue_head(&so->wait);
- 	so->notifier.notifier_call = isotp_notifier;
- 	register_netdevice_notifier(&so->notifier);
+ 	spin_lock(&isotp_notifier_lock);
+ 	list_add_tail(&so->notifier, &isotp_notifier_list);
+ 	spin_unlock(&isotp_notifier_lock);
return 0;
  }
@@@ -1444,6 -1470,10 +1472,10 @@@ static const struct can_proto isotp_can
    .prot = &isotp_proto,
  };
+ static struct notifier_block canisotp_notifier = {
+ 	.notifier_call = isotp_notifier
+ };
+ 
  static __init int isotp_module_init(void)
  {
    int err;
@@@ -1452,7 -1482,9 +1484,9 @@@
err = can_proto_register(&isotp_can_proto);
    if (err < 0)
 -		pr_err("can: registration of isotp protocol failed\n");
 +		pr_err("can: registration of isotp protocol failed %pe\n", ERR_PTR(err));
+ 	else
+ 		register_netdevice_notifier(&canisotp_notifier);
return err;
  }
@@@ -1460,6 -1492,7 +1494,7 @@@
  static __exit void isotp_module_exit(void)
  {
    can_proto_unregister(&isotp_can_proto);
+ 	unregister_netdevice_notifier(&canisotp_notifier);
  }
module_init(isotp_module_init);
diff --combined net/core/neighbour.c
index 2b2f333bcdfe,bf774575ad71..53e85c70c6e5
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@@ -238,6 -238,7 +238,7 @@@ static int neigh_forced_gc(struct neigh
write_lock(&n->lock);
    		if ((n->nud_state == NUD_FAILED) ||
+ 			    (n->nud_state == NUD_NOARP) ||
    		    (tbl->is_multicast &&
    		     tbl->is_multicast(n->primary_key)) ||
    		    time_after(tref, n->updated))
@@@ -3141,7 -3142,7 +3142,7 @@@ static struct pneigh_entry *pneigh_get_
    struct net *net = seq_file_net(seq);
    struct neigh_table *tbl = state->tbl;
    struct pneigh_entry *pn = NULL;
 -	int bucket = state->bucket;
 +	int bucket;
state->flags |= NEIGH_SEQ_IS_PNEIGH;
    for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
diff --combined net/core/rtnetlink.c
index 5baa86bca876,ec931b080156..745965e49f78
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@@ -9,7 -9,7 +9,7 @@@
   * Authors:	Alexey Kuznetsov, kuznet@ms2.inr.ac.ru
   *
   *	Fixes:
 - *	Vitaly E. Lavrov		RTA_OK arithmetics was wrong.
 + *	Vitaly E. Lavrov		RTA_OK arithmetic was wrong.
   */
#include <linux/bitops.h>
@@@ -234,7 -234,7 +234,7 @@@ unlock
   * @msgtype: rtnetlink message type
   * @doit: Function pointer called for each request message
   * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
 - * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
 + * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
   *
   * Like rtnl_register, but for use by removable modules.
   */
@@@ -254,7 -254,7 +254,7 @@@ EXPORT_SYMBOL_GPL(rtnl_register_module)
   * @msgtype: rtnetlink message type
   * @doit: Function pointer called for each request message
   * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message
 - * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions
 + * @flags: rtnl_link_flags to modify behaviour of doit/dumpit functions
   *
   * Registers the specified function pointers (at least one of them has
   * to be non-NULL) to be called whenever a request message for the
@@@ -376,12 -376,12 +376,12 @@@ int __rtnl_link_register(struct rtnl_li
    if (rtnl_link_ops_get(ops->kind))
    	return -EEXIST;
-	/* The check for setup is here because if ops
 +	/* The check for alloc/setup is here because if ops
     * does not have that filled up, it is not possible
     * to use the ops for creating device. So do not
     * fill up dellink as well. That disables rtnl_dellink.
     */
 -	if (ops->setup && !ops->dellink)
 +	if ((ops->alloc || ops->setup) && !ops->dellink)
    	ops->dellink = unregister_netdevice_queue;
list_add_tail(&ops->list, &link_ops);
@@@ -543,9 -543,7 +543,9 @@@ static const struct rtnl_af_ops *rtnl_a
  {
    const struct rtnl_af_ops *ops;
-	list_for_each_entry_rcu(ops, &rtnl_af_ops, list) {
 +	ASSERT_RTNL();
 +
 +	list_for_each_entry(ops, &rtnl_af_ops, list) {
    	if (ops->family == family)
    		return ops;
    }
@@@ -1821,16 -1819,6 +1821,16 @@@ static int rtnl_fill_ifinfo(struct sk_b
    if (rtnl_fill_prop_list(skb, dev))
    	goto nla_put_failure;
+	if (dev->dev.parent &&
 +	    nla_put_string(skb, IFLA_PARENT_DEV_NAME,
 +			   dev_name(dev->dev.parent)))
 +		goto nla_put_failure;
 +
 +	if (dev->dev.parent && dev->dev.parent->bus &&
 +	    nla_put_string(skb, IFLA_PARENT_DEV_BUS_NAME,
 +			   dev->dev.parent->bus->name))
 +		goto nla_put_failure;
 +
    nlmsg_end(skb, nlh);
    return 0;
@@@ -1890,7 -1878,6 +1890,7 @@@ static const struct nla_policy ifla_pol
    [IFLA_PERM_ADDRESS]	= { .type = NLA_REJECT },
    [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
    [IFLA_NEW_IFINDEX]	= NLA_POLICY_MIN(NLA_S32, 1),
 +	[IFLA_PARENT_DEV_NAME]	= { .type = NLA_NUL_STRING },
  };
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@@ -2287,18 -2274,27 +2287,18 @@@ static int validate_linkmsg(struct net_
    	nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
    		const struct rtnl_af_ops *af_ops;
-			rcu_read_lock();
    		af_ops = rtnl_af_lookup(nla_type(af));
 -			if (!af_ops) {
 -				rcu_read_unlock();
 +			if (!af_ops)
    			return -EAFNOSUPPORT;
 -			}
-			if (!af_ops->set_link_af) {
 -				rcu_read_unlock();
 +			if (!af_ops->set_link_af)
    			return -EOPNOTSUPP;
 -			}
if (af_ops->validate_link_af) {
    			err = af_ops->validate_link_af(dev, af);
 -				if (err < 0) {
 -					rcu_read_unlock();
 +				if (err < 0)
    				return err;
 -				}
    		}
 -
 -			rcu_read_unlock();
    	}
    }
@@@ -2578,7 -2574,7 +2578,7 @@@ static int do_set_proto_down(struct net
    if (nl_proto_down) {
    	proto_down = nla_get_u8(nl_proto_down);
-		/* Dont turn off protodown if there are active reasons */
 +		/* Don't turn off protodown if there are active reasons */
    	if (!proto_down && dev->proto_down_reason) {
    		NL_SET_ERR_MSG(extack, "Cannot clear protodown, active reasons");
    		return -EBUSY;
@@@ -2872,12 -2868,17 +2872,12 @@@ static int do_setlink(const struct sk_b
    	nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) {
    		const struct rtnl_af_ops *af_ops;
-			rcu_read_lock();
 -
    		BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
err = af_ops->set_link_af(dev, af, extack);
 -			if (err < 0) {
 -				rcu_read_unlock();
 +			if (err < 0)
    			goto errout;
 -			}
-			rcu_read_unlock();
    		status |= DO_SETLINK_NOTIFY;
    	}
    }
@@@ -3176,17 -3177,8 +3176,17 @@@ struct net_device *rtnl_create_link(str
    	return ERR_PTR(-EINVAL);
    }
-	dev = alloc_netdev_mqs(ops->priv_size, ifname, name_assign_type,
 -			       ops->setup, num_tx_queues, num_rx_queues);
 +	if (ops->alloc) {
 +		dev = ops->alloc(tb, ifname, name_assign_type,
 +				 num_tx_queues, num_rx_queues);
 +		if (IS_ERR(dev))
 +			return dev;
 +	} else {
 +		dev = alloc_netdev_mqs(ops->priv_size, ifname,
 +				       name_assign_type, ops->setup,
 +				       num_tx_queues, num_rx_queues);
 +	}
 +
    if (!dev)
    	return ERR_PTR(-ENOMEM);
@@@ -3419,7 -3411,7 +3419,7 @@@ replay
    	return -EOPNOTSUPP;
    }
-	if (!ops->setup)
 +	if (!ops->alloc && !ops->setup)
    	return -EOPNOTSUPP;
if (!ifname[0]) {
@@@ -4850,10 -4842,12 +4850,12 @@@ static int rtnl_bridge_notify(struct ne
    if (err < 0)
    	goto errout;
- 	if (!skb->len) {
- 		err = -EINVAL;
+ 	/* Notification info is only filled for bridge ports, not the bridge
+ 	 * device itself. Therefore, a zero notification length is valid and
+ 	 * should not result in an error.
+ 	 */
+ 	if (!skb->len)
    	goto errout;
- 	}
rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC);
    return 0;
diff --combined net/core/skbuff.c
index a0b1d4847efe,bbc3b4b62032..2531ac4ffa69
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@@ -70,7 -70,6 +70,7 @@@
  #include <net/xfrm.h>
  #include <net/mpls.h>
  #include <net/mptcp.h>
 +#include <net/page_pool.h>
#include <linux/uaccess.h>
  #include <trace/events/skb.h>
@@@ -646,13 -645,10 +646,13 @@@ static void skb_free_head(struct sk_buf
  {
    unsigned char *head = skb->head;
-	if (skb->head_frag)
 +	if (skb->head_frag) {
 +		if (skb_pp_recycle(skb, head))
 +			return;
    	skb_free_frag(head);
 -	else
 +	} else {
    	kfree(head);
 +	}
  }
static void skb_release_data(struct sk_buff *skb)
@@@ -668,7 -664,7 +668,7 @@@
    skb_zcopy_clear(skb, true);
for (i = 0; i < shinfo->nr_frags; i++)
 -		__skb_frag_unref(&shinfo->frags[i]);
 +		__skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
if (shinfo->frag_list)
    	kfree_skb_list(shinfo->frag_list);
@@@ -1050,7 -1046,6 +1050,7 @@@ static struct sk_buff *__skb_clone(stru
    n->nohdr = 0;
    n->peeked = 0;
    C(pfmemalloc);
 +	C(pp_recycle);
    n->destructor = NULL;
    C(tail);
    C(end);
@@@ -1258,6 -1253,7 +1258,7 @@@ static void __msg_zerocopy_callback(str
    struct sock *sk = skb->sk;
    struct sk_buff_head *q;
    unsigned long flags;
+ 	bool is_zerocopy;
    u32 lo, hi;
    u16 len;
@@@ -1272,6 -1268,7 +1273,7 @@@
    len = uarg->len;
    lo = uarg->id;
    hi = uarg->id + len - 1;
+ 	is_zerocopy = uarg->zerocopy;
serr = SKB_EXT_ERR(skb);
    memset(serr, 0, sizeof(*serr));
@@@ -1279,7 -1276,7 +1281,7 @@@
    serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY;
    serr->ee.ee_data = hi;
    serr->ee.ee_info = lo;
- 	if (!uarg->zerocopy)
+ 	if (!is_zerocopy)
    	serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED;
q = &sk->sk_error_queue;
@@@ -3500,7 -3497,7 +3502,7 @@@ int skb_shift(struct sk_buff *tgt, stru
    	fragto = &skb_shinfo(tgt)->frags[merge];
skb_frag_size_add(fragto, skb_frag_size(fragfrom));
 -		__skb_frag_unref(fragfrom);
 +		__skb_frag_unref(fragfrom, skb->pp_recycle);
    }
/* Reposition in the original skb */
@@@ -5290,13 -5287,6 +5292,13 @@@ bool skb_try_coalesce(struct sk_buff *t
    if (skb_cloned(to))
    	return false;
+	/* The page pool signature of struct page will eventually figure out
 +	 * which pages can be recycled or not but for now let's prohibit slab
 +	 * allocated and page_pool allocated SKBs from being coalesced.
 +	 */
 +	if (to->pp_recycle != from->pp_recycle)
 +		return false;
 +
    if (len <= skb_tailroom(to)) {
    	if (len)
    		BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len));
diff --combined net/ipv4/af_inet.c
index 750f388a4a68,2f94d221c00e..54648181dd56
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@@ -318,7 -318,7 +318,7 @@@ lookup_protocol
WARN_ON(!answer_prot->slab);
-	err = -ENOBUFS;
 +	err = -ENOMEM;
    sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot, kern);
    if (!sk)
    	goto out;
@@@ -575,7 -575,7 +575,7 @@@ int inet_dgram_connect(struct socket *s
    		return err;
    }
- 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
+ 	if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk))
    	return -EAGAIN;
    return sk->sk_prot->connect(sk, uaddr, addr_len);
  }
@@@ -803,7 -803,7 +803,7 @@@ int inet_send_prepare(struct sock *sk
    sock_rps_record_flow(sk);
/* We may need to bind the socket. */
- 	if (!inet_sk(sk)->inet_num && !sk->sk_prot->no_autobind &&
+ 	if (data_race(!inet_sk(sk)->inet_num) && !sk->sk_prot->no_autobind &&
        inet_autobind(sk))
    	return -EAGAIN;
@@@ -1720,6 -1720,7 +1720,6 @@@ EXPORT_SYMBOL_GPL(snmp_fold_field64)
  #ifdef CONFIG_IP_MULTICAST
  static const struct net_protocol igmp_protocol = {
    .handler =	igmp_rcv,
 -	.netns_ok =	1,
  };
  #endif
@@@ -1732,6 -1733,7 +1732,6 @@@ static struct net_protocol tcp_protoco
    .handler	=	tcp_v4_rcv,
    .err_handler	=	tcp_v4_err,
    .no_policy	=	1,
 -	.netns_ok	=	1,
    .icmp_strict_tag_validation = 1,
  };
@@@ -1744,12 -1746,14 +1744,12 @@@ static struct net_protocol udp_protoco
    .handler =	udp_rcv,
    .err_handler =	udp_err,
    .no_policy =	1,
 -	.netns_ok =	1,
  };
static const struct net_protocol icmp_protocol = {
    .handler =	icmp_rcv,
    .err_handler =	icmp_err,
    .no_policy =	1,
 -	.netns_ok =	1,
  };
static __net_init int ipv4_mib_init_net(struct net *net)
diff --combined net/ipv4/cipso_ipv4.c
index d6e3a92841e3,e0480c6cebaa..099259fc826a
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@@ -187,7 -187,8 +187,7 @@@ static int __init cipso_v4_cache_init(v
   * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache
   *
   * Description:
 - * Invalidates and frees any entries in the CIPSO cache.  Returns zero on
 - * success and negative values on failure.
 + * Invalidates and frees any entries in the CIPSO cache.
   *
   */
  void cipso_v4_cache_invalidate(void)
@@@ -471,6 -472,7 +471,7 @@@ void cipso_v4_doi_free(struct cipso_v4_
    	kfree(doi_def->map.std->lvl.local);
    	kfree(doi_def->map.std->cat.cipso);
    	kfree(doi_def->map.std->cat.local);
+ 		kfree(doi_def->map.std);
    	break;
    }
    kfree(doi_def);
diff --combined net/ipv4/devinet.c
index 50deeff48c8b,1c6429c353a9..73721a4448bd
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@@ -1955,7 -1955,7 +1955,7 @@@ static int inet_validate_link_af(const 
    struct nlattr *a, *tb[IFLA_INET_MAX+1];
    int err, rem;
-	if (dev && !__in_dev_get_rcu(dev))
 +	if (dev && !__in_dev_get_rtnl(dev))
    	return -EAFNOSUPPORT;
err = nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla,
@@@ -1981,7 -1981,7 +1981,7 @@@
  static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla,
    		    struct netlink_ext_ack *extack)
  {
 -	struct in_device *in_dev = __in_dev_get_rcu(dev);
 +	struct in_device *in_dev = __in_dev_get_rtnl(dev);
    struct nlattr *a, *tb[IFLA_INET_MAX+1];
    int rem;
@@@ -1989,7 -1989,7 +1989,7 @@@
    	return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET_MAX, nla, NULL, NULL) < 0)
- 		BUG();
+ 		return -EINVAL;
if (tb[IFLA_INET_CONF]) {
    	nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
diff --combined net/ipv4/icmp.c
index 2e09d62d59e3,752e392083e6..0a57f1892e7e
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@@ -759,6 -759,13 +759,13 @@@ void __icmp_send(struct sk_buff *skb_in
    	icmp_param.data_len = room;
    icmp_param.head_len = sizeof(struct icmphdr);
+ 	/* if we don't have a source address at this point, fall back to the
+ 	 * dummy address instead of sending out a packet with a source address
+ 	 * of 0.0.0.0
+ 	 */
+ 	if (!fl4.saddr)
+ 		fl4.saddr = htonl(INADDR_DUMMY);
+ 
    icmp_push_reply(&icmp_param, &fl4, &ipc, &rt);
  ende:
    ip_rt_put(rt);
@@@ -1059,7 -1066,7 +1066,7 @@@ static bool icmp_echo(struct sk_buff *s
    		if (ident_len != sizeof(iio->ident.addr.ctype3_hdr) +
    				 sizeof(struct in_addr))
    			goto send_mal_query;
 -			dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr.s_addr);
 +			dev = ip_dev_find(net, iio->ident.addr.ip_addr.ipv4_addr);
    		break;
  #if IS_ENABLED(CONFIG_IPV6)
    	case ICMP_AFI_IP6:
diff --combined net/ipv4/route.c
index a4c477475f4c,6a36ac98476f..66aacb939d3e
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@@ -1906,128 -1906,13 +1906,128 @@@ out
    hash_keys->addrs.v4addrs.dst = key_iph->daddr;
  }
+static u32 fib_multipath_custom_hash_outer(const struct net *net,
 +					   const struct sk_buff *skb,
 +					   bool *p_has_inner)
 +{
 +	u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
 +	struct flow_keys keys, hash_keys;
 +
 +	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
 +		return 0;
 +
 +	memset(&hash_keys, 0, sizeof(hash_keys));
 +	skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
 +
 +	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
 +		hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
 +		hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
 +		hash_keys.basic.ip_proto = keys.basic.ip_proto;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
 +		hash_keys.ports.src = keys.ports.src;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
 +		hash_keys.ports.dst = keys.ports.dst;
 +
 +	*p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION);
 +	return flow_hash_from_keys(&hash_keys);
 +}
 +
 +static u32 fib_multipath_custom_hash_inner(const struct net *net,
 +					   const struct sk_buff *skb,
 +					   bool has_inner)
 +{
 +	u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
 +	struct flow_keys keys, hash_keys;
 +
 +	/* We assume the packet carries an encapsulation, but if none was
 +	 * encountered during dissection of the outer flow, then there is no
 +	 * point in calling the flow dissector again.
 +	 */
 +	if (!has_inner)
 +		return 0;
 +
 +	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK))
 +		return 0;
 +
 +	memset(&hash_keys, 0, sizeof(hash_keys));
 +	skb_flow_dissect_flow_keys(skb, &keys, 0);
 +
 +	if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION))
 +		return 0;
 +
 +	if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 +		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 +		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
 +			hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
 +		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
 +			hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst;
 +	} else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 +		hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
 +		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP)
 +			hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
 +		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP)
 +			hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst;
 +		if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL)
 +			hash_keys.tags.flow_label = keys.tags.flow_label;
 +	}
 +
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO)
 +		hash_keys.basic.ip_proto = keys.basic.ip_proto;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT)
 +		hash_keys.ports.src = keys.ports.src;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT)
 +		hash_keys.ports.dst = keys.ports.dst;
 +
 +	return flow_hash_from_keys(&hash_keys);
 +}
 +
 +static u32 fib_multipath_custom_hash_skb(const struct net *net,
 +					 const struct sk_buff *skb)
 +{
 +	u32 mhash, mhash_inner;
 +	bool has_inner = true;
 +
 +	mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner);
 +	mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner);
 +
 +	return jhash_2words(mhash, mhash_inner, 0);
 +}
 +
 +static u32 fib_multipath_custom_hash_fl4(const struct net *net,
 +					 const struct flowi4 *fl4)
 +{
 +	u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
 +	struct flow_keys hash_keys;
 +
 +	if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
 +		return 0;
 +
 +	memset(&hash_keys, 0, sizeof(hash_keys));
 +	hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP)
 +		hash_keys.addrs.v4addrs.src = fl4->saddr;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP)
 +		hash_keys.addrs.v4addrs.dst = fl4->daddr;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO)
 +		hash_keys.basic.ip_proto = fl4->flowi4_proto;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT)
 +		hash_keys.ports.src = fl4->fl4_sport;
 +	if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT)
 +		hash_keys.ports.dst = fl4->fl4_dport;
 +
 +	return flow_hash_from_keys(&hash_keys);
 +}
 +
  /* if skb is set it will be used and fl4 can be NULL */
  int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4,
    	       const struct sk_buff *skb, struct flow_keys *flkeys)
  {
    u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0;
    struct flow_keys hash_keys;
 -	u32 mhash;
 +	u32 mhash = 0;
switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
    case 0:
@@@ -2039,7 -1924,6 +2039,7 @@@
    		hash_keys.addrs.v4addrs.src = fl4->saddr;
    		hash_keys.addrs.v4addrs.dst = fl4->daddr;
    	}
 +		mhash = flow_hash_from_keys(&hash_keys);
    	break;
    case 1:
    	/* skb is currently provided only when forwarding */
@@@ -2073,7 -1957,6 +2073,7 @@@
    		hash_keys.ports.dst = fl4->fl4_dport;
    		hash_keys.basic.ip_proto = fl4->flowi4_proto;
    	}
 +		mhash = flow_hash_from_keys(&hash_keys);
    	break;
    case 2:
    	memset(&hash_keys, 0, sizeof(hash_keys));
@@@ -2104,15 -1987,9 +2104,15 @@@
    		hash_keys.addrs.v4addrs.src = fl4->saddr;
    		hash_keys.addrs.v4addrs.dst = fl4->daddr;
    	}
 +		mhash = flow_hash_from_keys(&hash_keys);
 +		break;
 +	case 3:
 +		if (skb)
 +			mhash = fib_multipath_custom_hash_skb(net, skb);
 +		else
 +			mhash = fib_multipath_custom_hash_fl4(net, fl4);
    	break;
    }
 -	mhash = flow_hash_from_keys(&hash_keys);
if (multipath_hash)
    	mhash = jhash_2words(mhash, multipath_hash, 0);
@@@ -2179,6 -2056,19 +2179,19 @@@ martian_source
    return err;
  }
+ /* get device for dst_alloc with local routes */
+ static struct net_device *ip_rt_get_dev(struct net *net,
+ 					const struct fib_result *res)
+ {
+ 	struct fib_nh_common *nhc = res->fi ? res->nhc : NULL;
+ 	struct net_device *dev = NULL;
+ 
+ 	if (nhc)
+ 		dev = l3mdev_master_dev_rcu(nhc->nhc_dev);
+ 
+ 	return dev ? : net->loopback_dev;
+ }
+ 
  /*
   *	NOTE. We drop all the packets that has local source
   *	addresses, because every properly looped back packet
@@@ -2335,7 -2225,7 +2348,7 @@@ local_input
    	}
    }
- 	rth = rt_dst_alloc(l3mdev_master_dev_rcu(dev) ? : net->loopback_dev,
+ 	rth = rt_dst_alloc(ip_rt_get_dev(net, res),
    		   flags | RTCF_LOCAL, res->type,
    		   IN_DEV_ORCONF(in_dev, NOPOLICY), false);
    if (!rth)
diff --combined net/ipv6/addrconf.c
index 048570900fdf,701eb82acd1c..3bf685fe64b9
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@@ -5827,7 -5827,7 +5827,7 @@@ static int inet6_set_link_af(struct net
    	return -EAFNOSUPPORT;
if (nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla, NULL, NULL) < 0)
- 		BUG();
+ 		return -EINVAL;
if (tb[IFLA_INET6_TOKEN]) {
    	err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
@@@ -6903,10 -6903,10 +6903,10 @@@ static const struct ctl_table addrconf_
    	.proc_handler   = proc_dointvec,
    },
    {
 -		.procname		= "addr_gen_mode",
 -		.data			= &ipv6_devconf.addr_gen_mode,
 -		.maxlen			= sizeof(int),
 -		.mode			= 0644,
 +		.procname	= "addr_gen_mode",
 +		.data		= &ipv6_devconf.addr_gen_mode,
 +		.maxlen		= sizeof(int),
 +		.mode		= 0644,
    	.proc_handler	= addrconf_sysctl_addr_gen_mode,
    },
    {
diff --combined net/mptcp/options.c
index 1aec01686c1a,9b263f27ce9b..25189595ed1d
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@@ -44,20 -44,7 +44,20 @@@ static void mptcp_parse_option(const st
    		else
    			expected_opsize = TCPOLEN_MPTCP_MPC_SYN;
    	}
 -		if (opsize != expected_opsize)
 +
 +		/* Cfr RFC 8684 Section 3.3.0:
 +		 * If a checksum is present but its use had
 +		 * not been negotiated in the MP_CAPABLE handshake, the receiver MUST
 +		 * close the subflow with a RST, as it is not behaving as negotiated.
 +		 * If a checksum is not present when its use has been negotiated, the
 +		 * receiver MUST close the subflow with a RST, as it is considered
 +		 * broken
 +		 * We parse even option with mismatching csum presence, so that
 +		 * later in subflow_data_ready we can trigger the reset.
 +		 */
 +		if (opsize != expected_opsize &&
 +		    (expected_opsize != TCPOLEN_MPTCP_MPC_ACK_DATA ||
 +		     opsize != TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM))
    		break;
/* try to be gentle vs future versions on the initial syn */
@@@ -79,9 -66,16 +79,9 @@@
    	 * host requires the use of checksums, checksums MUST be used.
    	 * In other words, the only way for checksums not to be used
    	 * is if both hosts in their SYNs set A=0."
 -		 *
 -		 * Section 3.3.0:
 -		 * "If a checksum is not present when its use has been
 -		 * negotiated, the receiver MUST close the subflow with a RST as
 -		 * it is considered broken."
 -		 *
 -		 * We don't implement DSS checksum - fall back to TCP.
    	 */
    	if (flags & MPTCP_CAP_CHECKSUM_REQD)
 -			break;
 +			mp_opt->csum_reqd = 1;
mp_opt->mp_capable = 1;
    	if (opsize >= TCPOLEN_MPTCP_MPC_SYNACK) {
@@@ -92,7 -86,7 +92,7 @@@
    		mp_opt->rcvr_key = get_unaligned_be64(ptr);
    		ptr += 8;
    	}
 -		if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA) {
 +		if (opsize >= TCPOLEN_MPTCP_MPC_ACK_DATA) {
    		/* Section 3.1.:
    		 * "the data parameters in a MP_CAPABLE are semantically
    		 * equivalent to those in a DSS option and can be used
@@@ -104,14 -98,9 +104,14 @@@
    		mp_opt->data_len = get_unaligned_be16(ptr);
    		ptr += 2;
    	}
 -		pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d",
 +		if (opsize == TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM) {
 +			mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
 +			mp_opt->csum_reqd = 1;
 +			ptr += 2;
 +		}
 +		pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u",
    		 version, flags, opsize, mp_opt->sndr_key,
 -			 mp_opt->rcvr_key, mp_opt->data_len);
 +			 mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum);
    	break;
case MPTCPOPT_MP_JOIN:
@@@ -182,8 -171,10 +182,8 @@@
    			expected_opsize += TCPOLEN_MPTCP_DSS_MAP32;
    	}
-		/* RFC 6824, Section 3.3:
 -		 * If a checksum is present, but its use had
 -		 * not been negotiated in the MP_CAPABLE handshake,
 -		 * the checksum field MUST be ignored.
 +		/* Always parse any csum presence combination, we will enforce
 +		 * RFC 8684 Section 3.3.0 checks later in subflow_data_ready
    	 */
    	if (opsize != expected_opsize &&
    	    opsize != expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM)
@@@ -218,15 -209,9 +218,15 @@@
    		mp_opt->data_len = get_unaligned_be16(ptr);
    		ptr += 2;
-			pr_debug("data_seq=%llu subflow_seq=%u data_len=%u",
 +			if (opsize == expected_opsize + TCPOLEN_MPTCP_DSS_CHECKSUM) {
 +				mp_opt->csum_reqd = 1;
 +				mp_opt->csum = (__force __sum16)get_unaligned_be16(ptr);
 +				ptr += 2;
 +			}
 +
 +			pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
    			 mp_opt->data_seq, mp_opt->subflow_seq,
 -				 mp_opt->data_len);
 +				 mp_opt->data_len, mp_opt->csum_reqd, mp_opt->csum);
    	}
break;
@@@ -338,12 -323,9 +338,12 @@@
    }
  }
-void mptcp_get_options(const struct sk_buff *skb,
 +void mptcp_get_options(const struct sock *sk,
 +		       const struct sk_buff *skb,
    	       struct mptcp_options_received *mp_opt)
  {
 +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 +	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
    const struct tcphdr *th = tcp_hdr(skb);
    const unsigned char *ptr;
    int length;
@@@ -359,7 -341,6 +359,7 @@@
    mp_opt->dss = 0;
    mp_opt->mp_prio = 0;
    mp_opt->reset = 0;
 +	mp_opt->csum_reqd = READ_ONCE(msk->csum_enabled);
length = (th->doff * 4) - sizeof(struct tcphdr);
    ptr = (const unsigned char *)(th + 1);
@@@ -375,6 -356,8 +375,8 @@@
    		length--;
    		continue;
    	default:
+ 			if (length < 2)
+ 				return;
    		opsize = *ptr++;
    		if (opsize < 2) /* "silly options" */
    			return;
@@@ -399,7 -382,6 +401,7 @@@ bool mptcp_syn_options(struct sock *sk
    subflow->snd_isn = TCP_SKB_CB(skb)->end_seq;
    if (subflow->request_mptcp) {
    	opts->suboptions = OPTION_MPTCP_MPC_SYN;
 +		opts->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk));
    	*size = TCPOLEN_MPTCP_MPC_SYN;
    	return true;
    } else if (subflow->request_join) {
@@@ -455,10 -437,8 +457,10 @@@ static bool mptcp_established_options_m
    				 struct mptcp_out_options *opts)
  {
    struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
 +	struct mptcp_sock *msk = mptcp_sk(subflow->conn);
    struct mptcp_ext *mpext;
    unsigned int data_len;
 +	u8 len;
/* When skb is not available, we better over-estimate the emitted
     * options len. A full DSS option (28 bytes) is longer than
@@@ -487,26 -467,16 +489,26 @@@
    	opts->suboptions = OPTION_MPTCP_MPC_ACK;
    	opts->sndr_key = subflow->local_key;
    	opts->rcvr_key = subflow->remote_key;
 +		opts->csum_reqd = READ_ONCE(msk->csum_enabled);
/* Section 3.1.
    	 * The MP_CAPABLE option is carried on the SYN, SYN/ACK, and ACK
    	 * packets that start the first subflow of an MPTCP connection,
    	 * as well as the first packet that carries data
    	 */
 -		if (data_len > 0)
 -			*size = ALIGN(TCPOLEN_MPTCP_MPC_ACK_DATA, 4);
 -		else
 +		if (data_len > 0) {
 +			len = TCPOLEN_MPTCP_MPC_ACK_DATA;
 +			if (opts->csum_reqd) {
 +				/* we need to propagate more info to csum the pseudo hdr */
 +				opts->ext_copy.data_seq = mpext->data_seq;
 +				opts->ext_copy.subflow_seq = mpext->subflow_seq;
 +				opts->ext_copy.csum = mpext->csum;
 +				len += TCPOLEN_MPTCP_DSS_CHECKSUM;
 +			}
 +			*size = ALIGN(len, 4);
 +		} else {
    		*size = TCPOLEN_MPTCP_MPC_ACK;
 +		}
pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d",
    		 subflow, subflow->local_key, subflow->remote_key,
@@@ -567,21 -537,18 +569,21 @@@ static bool mptcp_established_options_d
    bool ret = false;
    u64 ack_seq;
+	opts->csum_reqd = READ_ONCE(msk->csum_enabled);
    mpext = skb ? mptcp_get_ext(skb) : NULL;
if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
 -		unsigned int map_size;
 +		unsigned int map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
-		map_size = TCPOLEN_MPTCP_DSS_BASE + TCPOLEN_MPTCP_DSS_MAP64;
 +		if (mpext) {
 +			if (opts->csum_reqd)
 +				map_size += TCPOLEN_MPTCP_DSS_CHECKSUM;
-		remaining -= map_size;
 -		dss_size = map_size;
 -		if (mpext)
    		opts->ext_copy = *mpext;
 +		}
+		remaining -= map_size;
 +		dss_size = map_size;
    	if (skb && snd_data_fin_enable)
    		mptcp_write_data_fin(subflow, skb, &opts->ext_copy);
    	ret = true;
@@@ -824,7 -791,6 +826,7 @@@ bool mptcp_synack_options(const struct 
    if (subflow_req->mp_capable) {
    	opts->suboptions = OPTION_MPTCP_MPC_SYNACK;
    	opts->sndr_key = subflow_req->local_key;
 +		opts->csum_reqd = subflow_req->csum_reqd;
    	*size = TCPOLEN_MPTCP_MPC_SYNACK;
    	pr_debug("subflow_req=%p, local_key=%llu",
    		 subflow_req, subflow_req->local_key);
@@@ -1043,7 -1009,7 +1045,7 @@@ void mptcp_incoming_options(struct soc
    	return;
    }
-	mptcp_get_options(skb, &mp_opt);
 +	mptcp_get_options(sk, skb, &mp_opt);
    if (!check_fully_established(msk, sk, subflow, skb, &mp_opt))
    	return;
@@@ -1135,10 -1101,6 +1137,10 @@@
    	}
    	mpext->data_len = mp_opt.data_len;
    	mpext->use_map = 1;
 +		mpext->csum_reqd = mp_opt.csum_reqd;
 +
 +		if (mpext->csum_reqd)
 +			mpext->csum = mp_opt.csum;
    }
  }
@@@ -1158,50 -1120,25 +1160,50 @@@ static void mptcp_set_rwin(const struc
    	WRITE_ONCE(msk->rcv_wnd_sent, ack_seq);
  }
+static u16 mptcp_make_csum(const struct mptcp_ext *mpext)
 +{
 +	struct csum_pseudo_header header;
 +	__wsum csum;
 +
 +	/* cfr RFC 8684 3.3.1.:
 +	 * the data sequence number used in the pseudo-header is
 +	 * always the 64-bit value, irrespective of what length is used in the
 +	 * DSS option itself.
 +	 */
 +	header.data_seq = cpu_to_be64(mpext->data_seq);
 +	header.subflow_seq = htonl(mpext->subflow_seq);
 +	header.data_len = htons(mpext->data_len);
 +	header.csum = 0;
 +
 +	csum = csum_partial(&header, sizeof(header), ~csum_unfold(mpext->csum));
 +	return (__force u16)csum_fold(csum);
 +}
 +
  void mptcp_write_options(__be32 *ptr, const struct tcp_sock *tp,
    		 struct mptcp_out_options *opts)
  {
    if ((OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK |
         OPTION_MPTCP_MPC_ACK) & opts->suboptions) {
 -		u8 len;
 +		u8 len, flag = MPTCP_CAP_HMAC_SHA256;
-		if (OPTION_MPTCP_MPC_SYN & opts->suboptions)
 +		if (OPTION_MPTCP_MPC_SYN & opts->suboptions) {
    		len = TCPOLEN_MPTCP_MPC_SYN;
 -		else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions)
 +		} else if (OPTION_MPTCP_MPC_SYNACK & opts->suboptions) {
    		len = TCPOLEN_MPTCP_MPC_SYNACK;
 -		else if (opts->ext_copy.data_len)
 +		} else if (opts->ext_copy.data_len) {
    		len = TCPOLEN_MPTCP_MPC_ACK_DATA;
 -		else
 +			if (opts->csum_reqd)
 +				len += TCPOLEN_MPTCP_DSS_CHECKSUM;
 +		} else {
    		len = TCPOLEN_MPTCP_MPC_ACK;
 +		}
 +
 +		if (opts->csum_reqd)
 +			flag |= MPTCP_CAP_CHECKSUM_REQD;
*ptr++ = mptcp_option(MPTCPOPT_MP_CAPABLE, len,
    			      MPTCP_SUPPORTED_VERSION,
 -				      MPTCP_CAP_HMAC_SHA256);
 +				      flag);
if (!((OPTION_MPTCP_MPC_SYNACK | OPTION_MPTCP_MPC_ACK) &
    	    opts->suboptions))
@@@ -1217,13 -1154,8 +1219,13 @@@
    	if (!opts->ext_copy.data_len)
    		goto mp_capable_done;
-		put_unaligned_be32(opts->ext_copy.data_len << 16 |
 -				   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
 +		if (opts->csum_reqd) {
 +			put_unaligned_be32(opts->ext_copy.data_len << 16 |
 +					   mptcp_make_csum(&opts->ext_copy), ptr);
 +		} else {
 +			put_unaligned_be32(opts->ext_copy.data_len << 16 |
 +					   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
 +		}
    	ptr += 1;
    }
@@@ -1375,9 -1307,6 +1377,9 @@@ mp_capable_done
    		flags |= MPTCP_DSS_HAS_MAP | MPTCP_DSS_DSN64;
    		if (mpext->data_fin)
    			flags |= MPTCP_DSS_DATA_FIN;
 +
 +			if (opts->csum_reqd)
 +				len += TCPOLEN_MPTCP_DSS_CHECKSUM;
    	}
*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
@@@ -1397,13 -1326,8 +1399,13 @@@
    		ptr += 2;
    		put_unaligned_be32(mpext->subflow_seq, ptr);
    		ptr += 1;
 -			put_unaligned_be32(mpext->data_len << 16 |
 -					   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
 +			if (opts->csum_reqd) {
 +				put_unaligned_be32(mpext->data_len << 16 |
 +						   mptcp_make_csum(mpext), ptr);
 +			} else {
 +				put_unaligned_be32(mpext->data_len << 16 |
 +						   TCPOPT_NOP << 8 | TCPOPT_NOP, ptr);
 +			}
    	}
    }
diff --combined net/mptcp/protocol.c
index 42fc7187beee,632350018fb6..b5f2f504b85b
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@@ -39,15 -39,10 +39,15 @@@ struct mptcp_skb_cb 
    u64 map_seq;
    u64 end_seq;
    u32 offset;
 +	u8  has_rxtstamp:1;
  };
#define MPTCP_SKB_CB(__skb)	((struct mptcp_skb_cb *)&((__skb)->cb[0]))
+enum {
 +	MPTCP_CMSG_TS = BIT(0),
 +};
 +
  static struct percpu_counter mptcp_sockets_allocated;
static void __mptcp_destroy_sock(struct sock *sk);
@@@ -277,7 -272,6 +277,7 @@@ static bool __mptcp_move_skb(struct mpt
    struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
    struct sock *sk = (struct sock *)msk;
    struct sk_buff *tail;
 +	bool has_rxtstamp;
__skb_unlink(skb, &ssk->sk_receive_queue);
@@@ -286,15 -280,15 +286,17 @@@
/* try to fetch required memory from subflow */
    if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
- 		if (ssk->sk_forward_alloc < skb->truesize)
- 			goto drop;
- 		__sk_mem_reclaim(ssk, skb->truesize);
- 		if (!sk_rmem_schedule(sk, skb, skb->truesize))
+ 		int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
+ 
+ 		if (ssk->sk_forward_alloc < amount)
    		goto drop;
+ 
+ 		ssk->sk_forward_alloc -= amount;
+ 		sk->sk_forward_alloc += amount;
    }
+	has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
 +
    /* the skb map_seq accounts for the skb offset:
     * mptcp_subflow_get_mapped_dsn() is based on the current tp->copied_seq
     * value
@@@ -302,7 -296,6 +304,7 @@@
    MPTCP_SKB_CB(skb)->map_seq = mptcp_subflow_get_mapped_dsn(subflow);
    MPTCP_SKB_CB(skb)->end_seq = MPTCP_SKB_CB(skb)->map_seq + copy_len;
    MPTCP_SKB_CB(skb)->offset = offset;
 +	MPTCP_SKB_CB(skb)->has_rxtstamp = has_rxtstamp;
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
    	/* in sequence */
@@@ -677,18 -670,22 +679,22 @@@ static bool __mptcp_ofo_queue(struct mp
  /* In most cases we will be able to lock the mptcp socket.  If its already
   * owned, we need to defer to the work queue to avoid ABBA deadlock.
   */
- static void move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
+ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
  {
    struct sock *sk = (struct sock *)msk;
    unsigned int moved = 0;
if (inet_sk_state_load(sk) == TCP_CLOSE)
- 		return;
- 
- 	mptcp_data_lock(sk);
+ 		return false;
__mptcp_move_skbs_from_subflow(msk, ssk, &moved);
    __mptcp_ofo_queue(msk);
+ 	if (unlikely(ssk->sk_err)) {
+ 		if (!sock_owned_by_user(sk))
+ 			__mptcp_error_report(sk);
+ 		else
+ 			set_bit(MPTCP_ERROR_REPORT,  &msk->flags);
+ 	}
/* If the moves have caught up with the DATA_FIN sequence number
     * it's time to ack the DATA_FIN and change socket state, but
@@@ -697,7 -694,7 +703,7 @@@
     */
    if (mptcp_pending_data_fin(sk, NULL))
    	mptcp_schedule_work(sk);
- 	mptcp_data_unlock(sk);
+ 	return moved > 0;
  }
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
@@@ -705,7 -702,6 +711,6 @@@
    struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
    struct mptcp_sock *msk = mptcp_sk(sk);
    int sk_rbuf, ssk_rbuf;
- 	bool wake;
/* The peer can send data while we are shutting down this
     * subflow at msk destruction time, but we must avoid enqueuing
@@@ -714,28 -710,22 +719,22 @@@
    if (unlikely(subflow->disposable))
    	return;
- 	/* move_skbs_to_msk below can legitly clear the data_avail flag,
- 	 * but we will need later to properly woke the reader, cache its
- 	 * value
- 	 */
- 	wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
- 	if (wake)
- 		set_bit(MPTCP_DATA_READY, &msk->flags);
- 
    ssk_rbuf = READ_ONCE(ssk->sk_rcvbuf);
    sk_rbuf = READ_ONCE(sk->sk_rcvbuf);
    if (unlikely(ssk_rbuf > sk_rbuf))
    	sk_rbuf = ssk_rbuf;
- 	/* over limit? can't append more skbs to msk */
+ 	/* over limit? can't append more skbs to msk, Also, no need to wake-up*/
    if (atomic_read(&sk->sk_rmem_alloc) > sk_rbuf)
- 		goto wake;
- 
- 	move_skbs_to_msk(msk, ssk);
+ 		return;
- wake:
- 	if (wake)
+ 	/* Wake-up the reader only for in-sequence data */
+ 	mptcp_data_lock(sk);
+ 	if (move_skbs_to_msk(msk, ssk)) {
+ 		set_bit(MPTCP_DATA_READY, &msk->flags);
    	sk->sk_data_ready(sk);
+ 	}
+ 	mptcp_data_unlock(sk);
  }
static bool mptcp_do_flush_join_list(struct mptcp_sock *msk)
@@@ -867,7 -857,7 +866,7 @@@ static struct sock *mptcp_subflow_recv_
    sock_owned_by_me(sk);
mptcp_for_each_subflow(msk, subflow) {
- 		if (subflow->data_avail)
+ 		if (READ_ONCE(subflow->data_avail))
    		return mptcp_subflow_tcp_sock(subflow);
    }
@@@ -1308,18 -1298,6 +1307,18 @@@ static bool mptcp_alloc_tx_skb(struct s
    return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
  }
+/* note: this always recompute the csum on the whole skb, even
 + * if we just appended a single frag. More status info needed
 + */
 +static void mptcp_update_data_checksum(struct sk_buff *skb, int added)
 +{
 +	struct mptcp_ext *mpext = mptcp_get_ext(skb);
 +	__wsum csum = ~csum_unfold(mpext->csum);
 +	int offset = skb->len - added;
 +
 +	mpext->csum = csum_fold(csum_block_add(csum, skb_checksum(skb, offset, added, 0), offset));
 +}
 +
  static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
    		      struct mptcp_data_frag *dfrag,
    		      struct mptcp_sendmsg_info *info)
@@@ -1414,14 -1392,10 +1413,14 @@@
    if (zero_window_probe) {
    	mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
    	mpext->frozen = 1;
 -		ret = 0;
 +		if (READ_ONCE(msk->csum_enabled))
 +			mptcp_update_data_checksum(tail, ret);
    	tcp_push_pending_frames(ssk);
 +		return 0;
    }
  out:
 +	if (READ_ONCE(msk->csum_enabled))
 +		mptcp_update_data_checksum(tail, ret);
    mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
    return ret;
  }
@@@ -1796,9 -1770,7 +1795,9 @@@ static void mptcp_wait_data(struct soc
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
    			struct msghdr *msg,
 -				size_t len, int flags)
 +				size_t len, int flags,
 +				struct scm_timestamping_internal *tss,
 +				int *cmsg_flags)
  {
    struct sk_buff *skb, *tmp;
    int copied = 0;
@@@ -1818,11 -1790,6 +1817,11 @@@
    		}
    	}
+		if (MPTCP_SKB_CB(skb)->has_rxtstamp) {
 +			tcp_update_recv_tstamps(skb, tss);
 +			*cmsg_flags |= MPTCP_CMSG_TS;
 +		}
 +
    	copied += count;
if (count < data_len) {
@@@ -1987,6 -1954,9 +1986,9 @@@ static bool __mptcp_move_skbs(struct mp
    	done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
    	mptcp_data_unlock(sk);
    	tcp_cleanup_rbuf(ssk, moved);
+ 
+ 		if (unlikely(ssk->sk_err))
+ 			__mptcp_error_report(sk);
    	unlock_sock_fast(ssk, slowpath);
    } while (!done);
@@@ -2010,8 -1980,7 +2012,8 @@@ static int mptcp_recvmsg(struct sock *s
    		 int nonblock, int flags, int *addr_len)
  {
    struct mptcp_sock *msk = mptcp_sk(sk);
 -	int copied = 0;
 +	struct scm_timestamping_internal tss;
 +	int copied = 0, cmsg_flags = 0;
    int target;
    long timeo;
@@@ -2033,7 -2002,7 +2035,7 @@@
    while (copied < len) {
    	int bytes_read;
-		bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags);
 +		bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
    	if (unlikely(bytes_read < 0)) {
    		if (!copied)
    			copied = bytes_read;
@@@ -2114,11 -2083,6 +2116,11 @@@
    	set_bit(MPTCP_DATA_READY, &msk->flags);
    }
  out_err:
 +	if (cmsg_flags && copied >= 0) {
 +		if (cmsg_flags & MPTCP_CMSG_TS)
 +			tcp_recv_timestamp(msg, sk, &tss);
 +	}
 +
    pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
    	 msk, test_bit(MPTCP_DATA_READY, &msk->flags),
    	 skb_queue_empty_lockless(&sk->sk_receive_queue), copied);
@@@ -2375,8 -2339,8 +2377,8 @@@ static void __mptcp_retrans(struct soc
/* limit retransmission to the bytes already sent on some subflows */
    info.sent = 0;
 -	info.limit = dfrag->already_sent;
 -	while (info.sent < dfrag->already_sent) {
 +	info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
 +	while (info.sent < info.limit) {
    	if (!mptcp_alloc_tx_skb(sk, ssk))
    		break;
@@@ -2388,11 -2352,9 +2390,11 @@@
    	copied += ret;
    	info.sent += ret;
    }
 -	if (copied)
 +	if (copied) {
 +		dfrag->already_sent = max(dfrag->already_sent, info.sent);
    	tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
    		 info.size_goal);
 +	}
mptcp_set_timeout(sk, ssk);
    release_sock(ssk);
@@@ -2471,7 -2433,6 +2473,7 @@@ static int __mptcp_init_sock(struct soc
    msk->ack_hint = NULL;
    msk->first = NULL;
    inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
 +	WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
mptcp_pm_data_init(msk);
@@@ -2812,8 -2773,6 +2814,8 @@@ struct sock *mptcp_sk_clone(const struc
    msk->token = subflow_req->token;
    msk->subflow = NULL;
    WRITE_ONCE(msk->fully_established, false);
 +	if (mp_opt->csum_reqd)
 +		WRITE_ONCE(msk->csum_enabled, true);
msk->write_seq = subflow_req->idsn + 1;
    msk->snd_nxt = msk->write_seq;
diff --combined net/mptcp/protocol.h
index 16e50caf200e,385796f0ef19..160d716ebc2b
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@@ -68,8 -68,6 +68,8 @@@
  #define TCPOLEN_MPTCP_FASTCLOSE		12
  #define TCPOLEN_MPTCP_RST		4
+#define TCPOLEN_MPTCP_MPC_ACK_DATA_CSUM	(TCPOLEN_MPTCP_DSS_CHECKSUM + TCPOLEN_MPTCP_MPC_ACK_DATA)
 +
  /* MPTCP MP_JOIN flags */
  #define MPTCPOPT_BACKUP		BIT(0)
  #define MPTCPOPT_HMAC_LEN	20
@@@ -126,7 -124,6 +126,7 @@@ struct mptcp_options_received 
    u64	data_seq;
    u32	subflow_seq;
    u16	data_len;
 +	__sum16	csum;
    u16	mp_capable : 1,
    	mp_join : 1,
    	fastclose : 1,
@@@ -136,7 -133,6 +136,7 @@@
    	rm_addr : 1,
    	mp_prio : 1,
    	echo : 1,
 +		csum_reqd : 1,
    	backup : 1;
    u32	token;
    u32	nonce;
@@@ -238,7 -234,6 +238,7 @@@ struct mptcp_sock 
    bool		snd_data_fin_enable;
    bool		rcv_fastclose;
    bool		use_64bit_ack; /* Set when we received a 64-bit DSN */
 +	bool		csum_enabled;
    spinlock_t	join_list_lock;
    struct sock	*ack_hint;
    struct work_struct work;
@@@ -340,19 -335,11 +340,19 @@@ static inline struct mptcp_data_frag *m
    return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list);
  }
+struct csum_pseudo_header {
 +	__be64 data_seq;
 +	__be32 subflow_seq;
 +	__be16 data_len;
 +	__sum16 csum;
 +};
 +
  struct mptcp_subflow_request_sock {
    struct	tcp_request_sock sk;
    u16	mp_capable : 1,
    	mp_join : 1,
 -		backup : 1;
 +		backup : 1,
 +		csum_reqd : 1;
    u8	local_id;
    u8	remote_id;
    u64	local_key;
@@@ -375,7 -362,6 +375,6 @@@ mptcp_subflow_rsk(const struct request_
  enum mptcp_data_avail {
    MPTCP_SUBFLOW_NODATA,
    MPTCP_SUBFLOW_DATA_AVAIL,
- 	MPTCP_SUBFLOW_OOO_DATA
  };
struct mptcp_delegated_action {
@@@ -400,8 -386,6 +399,8 @@@ struct mptcp_subflow_context 
    u32	map_subflow_seq;
    u32	ssn_offset;
    u32	map_data_len;
 +	__wsum	map_data_csum;
 +	u32	map_csum_len;
    u32	request_mptcp : 1,  /* send MP_CAPABLE */
    	request_join : 1,   /* send MP_JOIN */
    	request_bkup : 1,
@@@ -411,8 -395,6 +410,8 @@@
    	pm_notified : 1,    /* PM hook called for established status */
    	conn_finished : 1,
    	map_valid : 1,
 +		map_csum_reqd : 1,
 +		map_data_fin : 1,
    	mpc_map : 1,
    	backup : 1,
    	send_mp_prio : 1,
@@@ -542,7 -524,6 +541,7 @@@ static inline void mptcp_subflow_delega
int mptcp_is_enabled(struct net *net);
  unsigned int mptcp_get_add_addr_timeout(struct net *net);
 +int mptcp_is_checksum_enabled(struct net *net);
  void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
    			     struct mptcp_options_received *mp_opt);
  bool mptcp_subflow_data_available(struct sock *sk);
@@@ -594,8 -575,7 +593,8 @@@ int __init mptcp_proto_v6_init(void)
  struct sock *mptcp_sk_clone(const struct sock *sk,
    		    const struct mptcp_options_received *mp_opt,
    		    struct request_sock *req);
 -void mptcp_get_options(const struct sk_buff *skb,
 +void mptcp_get_options(const struct sock *sk,
 +		       const struct sk_buff *skb,
    	       struct mptcp_options_received *mp_opt);
void mptcp_finish_connect(struct sock *sk);
@@@ -646,8 -626,6 +645,8 @@@ static inline void mptcp_write_space(st
void mptcp_destroy_common(struct mptcp_sock *msk);
+#define MPTCP_TOKEN_MAX_RETRIES	4
 +
  void __init mptcp_token_init(void);
  static inline void mptcp_token_init_request(struct request_sock *req)
  {
diff --combined net/mptcp/subflow.c
index 6b1cd4257edf,be1de4084196..8976ff586b87
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@@ -108,7 -108,6 +108,7 @@@ static void subflow_init_req(struct req
subflow_req->mp_capable = 0;
    subflow_req->mp_join = 0;
 +	subflow_req->csum_reqd = mptcp_is_checksum_enabled(sock_net(sk_listener));
    subflow_req->msk = NULL;
    mptcp_token_init_request(req);
  }
@@@ -151,7 -150,7 +151,7 @@@ static int subflow_check_req(struct req
    	return -EINVAL;
  #endif
-	mptcp_get_options(skb, &mp_opt);
 +	mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable) {
    	SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE);
@@@ -163,7 -162,7 +163,7 @@@
    }
if (mp_opt.mp_capable && listener->request_mptcp) {
 -		int err, retries = 4;
 +		int err, retries = MPTCP_TOKEN_MAX_RETRIES;
subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq;
  again:
@@@ -248,7 -247,7 +248,7 @@@ int mptcp_subflow_init_cookie_req(struc
    int err;
subflow_init_req(req, sk_listener);
 -	mptcp_get_options(skb, &mp_opt);
 +	mptcp_get_options(sk_listener, skb, &mp_opt);
if (mp_opt.mp_capable && mp_opt.mp_join)
    	return -EINVAL;
@@@ -395,7 -394,7 +395,7 @@@ static void subflow_finish_connect(stru
    subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
    pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
-	mptcp_get_options(skb, &mp_opt);
 +	mptcp_get_options(sk, skb, &mp_opt);
    if (subflow->request_mptcp) {
    	if (!mp_opt.mp_capable) {
    		MPTCP_INC_STATS(sock_net(sk),
@@@ -405,8 -404,6 +405,8 @@@
    		goto fallback;
    	}
+		if (mp_opt.csum_reqd)
 +			WRITE_ONCE(mptcp_sk(parent)->csum_enabled, true);
    	subflow->mp_capable = 1;
    	subflow->can_ack = 1;
    	subflow->remote_key = mp_opt.sndr_key;
@@@ -433,15 -430,15 +433,15 @@@
    		goto do_reset;
    	}
+		if (!mptcp_finish_join(sk))
 +			goto do_reset;
 +
    	subflow_generate_hmac(subflow->local_key, subflow->remote_key,
    			      subflow->local_nonce,
    			      subflow->remote_nonce,
    			      hmac);
    	memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN);
-		if (!mptcp_finish_join(sk))
 -			goto do_reset;
 -
    	subflow->mp_join = 1;
    	MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
@@@ -641,7 -638,7 +641,7 @@@ static struct sock *subflow_syn_recv_so
    	 * reordered MPC will cause fallback, but we don't have other
    	 * options.
    	 */
 -		mptcp_get_options(skb, &mp_opt);
 +		mptcp_get_options(sk, skb, &mp_opt);
    	if (!mp_opt.mp_capable) {
    		fallback = true;
    		goto create_child;
@@@ -651,7 -648,7 +651,7 @@@
    	if (!new_msk)
    		fallback = true;
    } else if (subflow_req->mp_join) {
 -		mptcp_get_options(skb, &mp_opt);
 +		mptcp_get_options(sk, skb, &mp_opt);
    	if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
    	    !mptcp_can_accept_new_subflow(subflow_req->msk)) {
    		SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
@@@ -787,10 -784,10 +787,10 @@@ static u64 expand_seq(u64 old_seq, u16 
    return seq | ((old_seq + old_data_len + 1) & GENMASK_ULL(63, 32));
  }
- static void warn_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
+ static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
  {
- 	WARN_ONCE(1, "Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
- 		  ssn, subflow->map_subflow_seq, subflow->map_data_len);
+ 	pr_debug("Bad mapping: ssn=%d map_seq=%d map_data_len=%d",
+ 		 ssn, subflow->map_subflow_seq, subflow->map_data_len);
  }
static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb)
@@@ -815,104 -812,22 +815,104 @@@ static bool validate_mapping(struct soc
    	/* Mapping covers data later in the subflow stream,
    	 * currently unsupported.
    	 */
- 		warn_bad_map(subflow, ssn);
+ 		dbg_bad_map(subflow, ssn);
    	return false;
    }
    if (unlikely(!before(ssn, subflow->map_subflow_seq +
    			  subflow->map_data_len))) {
    	/* Mapping does covers past subflow data, invalid */
- 		warn_bad_map(subflow, ssn + skb->len);
+ 		dbg_bad_map(subflow, ssn);
    	return false;
    }
    return true;
  }
+static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *skb,
 +					      bool csum_reqd)
 +{
 +	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 +	struct csum_pseudo_header header;
 +	u32 offset, seq, delta;
 +	__wsum csum;
 +	int len;
 +
 +	if (!csum_reqd)
 +		return MAPPING_OK;
 +
 +	/* mapping already validated on previous traversal */
 +	if (subflow->map_csum_len == subflow->map_data_len)
 +		return MAPPING_OK;
 +
 +	/* traverse the receive queue, ensuring it contains a full
 +	 * DSS mapping and accumulating the related csum.
 +	 * Preserve the accoumlate csum across multiple calls, to compute
 +	 * the csum only once
 +	 */
 +	delta = subflow->map_data_len - subflow->map_csum_len;
 +	for (;;) {
 +		seq = tcp_sk(ssk)->copied_seq + subflow->map_csum_len;
 +		offset = seq - TCP_SKB_CB(skb)->seq;
 +
 +		/* if the current skb has not been accounted yet, csum its contents
 +		 * up to the amount covered by the current DSS
 +		 */
 +		if (offset < skb->len) {
 +			__wsum csum;
 +
 +			len = min(skb->len - offset, delta);
 +			csum = skb_checksum(skb, offset, len, 0);
 +			subflow->map_data_csum = csum_block_add(subflow->map_data_csum, csum,
 +								subflow->map_csum_len);
 +
 +			delta -= len;
 +			subflow->map_csum_len += len;
 +		}
 +		if (delta == 0)
 +			break;
 +
 +		if (skb_queue_is_last(&ssk->sk_receive_queue, skb)) {
 +			/* if this subflow is closed, the partial mapping
 +			 * will be never completed; flush the pending skbs, so
 +			 * that subflow_sched_work_if_closed() can kick in
 +			 */
 +			if (unlikely(ssk->sk_state == TCP_CLOSE))
 +				while ((skb = skb_peek(&ssk->sk_receive_queue)))
 +					sk_eat_skb(ssk, skb);
 +
 +			/* not enough data to validate the csum */
 +			return MAPPING_EMPTY;
 +		}
 +
 +		/* the DSS mapping for next skbs will be validated later,
 +		 * when a get_mapping_status call will process such skb
 +		 */
 +		skb = skb->next;
 +	}
 +
 +	/* note that 'map_data_len' accounts only for the carried data, does
 +	 * not include the eventual seq increment due to the data fin,
 +	 * while the pseudo header requires the original DSS data len,
 +	 * including that
 +	 */
 +	header.data_seq = cpu_to_be64(subflow->map_seq);
 +	header.subflow_seq = htonl(subflow->map_subflow_seq);
 +	header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
 +	header.csum = 0;
 +
 +	csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
 +	if (unlikely(csum_fold(csum))) {
 +		MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
 +		return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
 +	}
 +
 +	return MAPPING_OK;
 +}
 +
  static enum mapping_status get_mapping_status(struct sock *ssk,
    				      struct mptcp_sock *msk)
  {
    struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 +	bool csum_reqd = READ_ONCE(msk->csum_enabled);
    struct mptcp_ext *mpext;
    struct sk_buff *skb;
    u16 data_len;
@@@ -1005,10 -920,9 +1005,10 @@@
    	/* Allow replacing only with an identical map */
    	if (subflow->map_seq == map_seq &&
    	    subflow->map_subflow_seq == mpext->subflow_seq &&
 -		    subflow->map_data_len == data_len) {
 +		    subflow->map_data_len == data_len &&
 +		    subflow->map_csum_reqd == mpext->csum_reqd) {
    		skb_ext_del(skb, SKB_EXT_MPTCP);
 -			return MAPPING_OK;
 +			goto validate_csum;
    	}
/* If this skb data are fully covered by the current mapping,
@@@ -1020,27 -934,17 +1020,27 @@@
    	}
/* will validate the next map after consuming the current one */
 -		return MAPPING_OK;
 +		goto validate_csum;
    }
subflow->map_seq = map_seq;
    subflow->map_subflow_seq = mpext->subflow_seq;
    subflow->map_data_len = data_len;
    subflow->map_valid = 1;
 +	subflow->map_data_fin = mpext->data_fin;
    subflow->mpc_map = mpext->mpc_map;
 -	pr_debug("new map seq=%llu subflow_seq=%u data_len=%u",
 +	subflow->map_csum_reqd = mpext->csum_reqd;
 +	subflow->map_csum_len = 0;
 +	subflow->map_data_csum = csum_unfold(mpext->csum);
 +
 +	/* Cfr RFC 8684 Section 3.3.0 */
 +	if (unlikely(subflow->map_csum_reqd != csum_reqd))
 +		return MAPPING_INVALID;
 +
 +	pr_debug("new map seq=%llu subflow_seq=%u data_len=%u csum=%d:%u",
    	 subflow->map_seq, subflow->map_subflow_seq,
 -		 subflow->map_data_len);
 +		 subflow->map_data_len, subflow->map_csum_reqd,
 +		 subflow->map_data_csum);
validate_seq:
    /* we revalidate valid mapping on new skb, because we must ensure
@@@ -1050,9 -954,7 +1050,9 @@@
    	return MAPPING_INVALID;
skb_ext_del(skb, SKB_EXT_MPTCP);
 -	return MAPPING_OK;
 +
 +validate_csum:
 +	return validate_data_csum(ssk, skb, csum_reqd);
  }
static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
@@@ -1098,7 -1000,7 +1098,7 @@@ static bool subflow_check_data_avail(st
    struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- 		subflow->data_avail = 0;
+ 		WRITE_ONCE(subflow->data_avail, 0);
    if (subflow->data_avail)
    	return true;
@@@ -1137,18 -1039,13 +1137,13 @@@
    	ack_seq = mptcp_subflow_get_mapped_dsn(subflow);
    	pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
    		 ack_seq);
- 		if (ack_seq == old_ack) {
- 			subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
- 			break;
- 		} else if (after64(ack_seq, old_ack)) {
- 			subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
- 			break;
+ 		if (unlikely(before64(ack_seq, old_ack))) {
+ 			mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+ 			continue;
    	}
- 		/* only accept in-sequence mapping. Old values are spurious
- 		 * retransmission
- 		 */
- 		mptcp_subflow_discard_data(ssk, skb, old_ack - ack_seq);
+ 		WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ 		break;
    }
    return true;
@@@ -1163,12 -1060,11 +1158,11 @@@ fallback
    	 * subflow_error_report() will introduce the appropriate barriers
    	 */
    	ssk->sk_err = EBADMSG;
- 		ssk->sk_error_report(ssk);
    	tcp_set_state(ssk, TCP_CLOSE);
    	subflow->reset_transient = 0;
    	subflow->reset_reason = MPTCP_RST_EMPTCP;
    	tcp_send_active_reset(ssk, GFP_ATOMIC);
- 		subflow->data_avail = 0;
+ 		WRITE_ONCE(subflow->data_avail, 0);
    	return false;
    }
@@@ -1178,7 -1074,7 +1172,7 @@@
    subflow->map_seq = READ_ONCE(msk->ack_seq);
    subflow->map_data_len = skb->len;
    subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - subflow->ssn_offset;
- 	subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+ 	WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
    return true;
  }
@@@ -1190,7 -1086,7 +1184,7 @@@ bool mptcp_subflow_data_available(struc
    if (subflow->map_valid &&
        mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
    	subflow->map_valid = 0;
- 		subflow->data_avail = 0;
+ 		WRITE_ONCE(subflow->data_avail, 0);
pr_debug("Done with mapping: seq=%u data_len=%u",
    		 subflow->map_subflow_seq,
@@@ -1218,41 -1114,6 +1212,6 @@@ void mptcp_space(const struct sock *ssk
    *full_space = tcp_full_space(sk);
  }
- static void subflow_data_ready(struct sock *sk)
- {
- 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- 	u16 state = 1 << inet_sk_state_load(sk);
- 	struct sock *parent = subflow->conn;
- 	struct mptcp_sock *msk;
- 
- 	msk = mptcp_sk(parent);
- 	if (state & TCPF_LISTEN) {
- 		/* MPJ subflow are removed from accept queue before reaching here,
- 		 * avoid stray wakeups
- 		 */
- 		if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
- 			return;
- 
- 		set_bit(MPTCP_DATA_READY, &msk->flags);
- 		parent->sk_data_ready(parent);
- 		return;
- 	}
- 
- 	WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
- 		     !subflow->mp_join && !(state & TCPF_CLOSE));
- 
- 	if (mptcp_subflow_data_available(sk))
- 		mptcp_data_ready(parent, sk);
- }
- 
- static void subflow_write_space(struct sock *ssk)
- {
- 	struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
- 
- 	mptcp_propagate_sndbuf(sk, ssk);
- 	mptcp_write_space(sk);
- }
- 
  void __mptcp_error_report(struct sock *sk)
  {
    struct mptcp_subflow_context *subflow;
@@@ -1293,6 -1154,43 +1252,43 @@@ static void subflow_error_report(struc
    mptcp_data_unlock(sk);
  }
+ static void subflow_data_ready(struct sock *sk)
+ {
+ 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ 	u16 state = 1 << inet_sk_state_load(sk);
+ 	struct sock *parent = subflow->conn;
+ 	struct mptcp_sock *msk;
+ 
+ 	msk = mptcp_sk(parent);
+ 	if (state & TCPF_LISTEN) {
+ 		/* MPJ subflow are removed from accept queue before reaching here,
+ 		 * avoid stray wakeups
+ 		 */
+ 		if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+ 			return;
+ 
+ 		set_bit(MPTCP_DATA_READY, &msk->flags);
+ 		parent->sk_data_ready(parent);
+ 		return;
+ 	}
+ 
+ 	WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable &&
+ 		     !subflow->mp_join && !(state & TCPF_CLOSE));
+ 
+ 	if (mptcp_subflow_data_available(sk))
+ 		mptcp_data_ready(parent, sk);
+ 	else if (unlikely(sk->sk_err))
+ 		subflow_error_report(sk);
+ }
+ 
+ static void subflow_write_space(struct sock *ssk)
+ {
+ 	struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+ 
+ 	mptcp_propagate_sndbuf(sk, ssk);
+ 	mptcp_write_space(sk);
+ }
+ 
  static struct inet_connection_sock_af_ops *
  subflow_default_af_ops(struct sock *sk)
  {
@@@ -1603,6 -1501,8 +1599,8 @@@ static void subflow_state_change(struc
     */
    if (mptcp_subflow_data_available(sk))
    	mptcp_data_ready(parent, sk);
+ 	else if (unlikely(sk->sk_err))
+ 		subflow_error_report(sk);
subflow_sched_work_if_closed(mptcp_sk(parent), sk);
diff --combined net/netfilter/nf_tables_api.c
index f20f6ae0e215,bf4d6ec9fc55..d6214242fe7f
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@@ -862,9 -862,10 +862,9 @@@ static int nft_netlink_dump_start_rcu(s
  static int nf_tables_gettable(struct sk_buff *skb, const struct nfnl_info *info,
    		      const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_cur(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    const struct nft_table *table;
    struct net *net = info->net;
    struct sk_buff *skb2;
@@@ -1067,9 -1068,10 +1067,9 @@@ static int nf_tables_newtable(struct sk
    		      const struct nlattr * const nla[])
  {
    struct nftables_pernet *nft_net = nft_pernet(info->net);
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
    const struct nlattr *attr;
    struct nft_table *table;
@@@ -1261,9 -1263,10 +1261,9 @@@ out
  static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
    		      const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
    const struct nlattr *attr;
    struct nft_table *table;
@@@ -1633,9 -1636,10 +1633,9 @@@ done
  static int nf_tables_getchain(struct sk_buff *skb, const struct nfnl_info *info,
    		      const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_cur(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    const struct nft_chain *chain;
    struct net *net = info->net;
    struct nft_table *table;
@@@ -2011,12 -2015,11 +2011,12 @@@ static void nft_basechain_hook_init(str
    			    const struct nft_chain_hook *hook,
    			    struct nft_chain *chain)
  {
 -	ops->pf		= family;
 -	ops->hooknum	= hook->num;
 -	ops->priority	= hook->priority;
 -	ops->priv	= chain;
 -	ops->hook	= hook->type->hooks[ops->hooknum];
 +	ops->pf			= family;
 +	ops->hooknum		= hook->num;
 +	ops->priority		= hook->priority;
 +	ops->priv		= chain;
 +	ops->hook		= hook->type->hooks[ops->hooknum];
 +	ops->hook_ops_type	= NF_HOOK_OP_NF_TABLES;
  }
static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
@@@ -2368,9 -2371,10 +2368,9 @@@ static int nf_tables_newchain(struct sk
    		      const struct nlattr * const nla[])
  {
    struct nftables_pernet *nft_net = nft_pernet(info->net);
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    struct nft_chain *chain = NULL;
    struct net *net = info->net;
    const struct nlattr *attr;
@@@ -2465,9 -2469,10 +2465,9 @@@
  static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
    		      const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
    const struct nlattr *attr;
    struct nft_table *table;
@@@ -3091,9 -3096,10 +3091,9 @@@ static int nf_tables_dump_rules_done(st
  static int nf_tables_getrule(struct sk_buff *skb, const struct nfnl_info *info,
    		     const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_cur(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    const struct nft_chain *chain;
    const struct nft_rule *rule;
    struct net *net = info->net;
@@@ -3231,12 -3237,13 +3231,12 @@@ static int nf_tables_newrule(struct sk_
    		     const struct nlattr * const nla[])
  {
    struct nftables_pernet *nft_net = nft_pernet(info->net);
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    unsigned int size, i, n, ulen = 0, usize = 0;
    u8 genmask = nft_genmask_next(info->net);
    struct nft_rule *rule, *old_rule = NULL;
    struct nft_expr_info *expr_info = NULL;
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
    struct nft_flow_rule *flow;
    struct nft_userdata *udata;
@@@ -3470,15 -3477,15 +3470,15 @@@ static struct nft_rule *nft_rule_lookup
  static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
    		     const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
 -	int family = nfmsg->nfgen_family, err = 0;
    u8 genmask = nft_genmask_next(info->net);
 +	u8 family = info->nfmsg->nfgen_family;
    struct nft_chain *chain = NULL;
    struct net *net = info->net;
    struct nft_table *table;
    struct nft_rule *rule;
    struct nft_ctx ctx;
 +	int err = 0;
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask,
    			 NETLINK_CB(skb).portid);
@@@ -3658,6 -3665,30 +3658,6 @@@ static const struct nla_policy nft_set_
    [NFTA_SET_DESC_CONCAT]		= { .type = NLA_NESTED },
  };
-static int nft_ctx_init_from_setattr(struct nft_ctx *ctx, struct net *net,
 -				     const struct sk_buff *skb,
 -				     const struct nlmsghdr *nlh,
 -				     const struct nlattr * const nla[],
 -				     struct netlink_ext_ack *extack,
 -				     u8 genmask, u32 nlpid)
 -{
 -	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 -	int family = nfmsg->nfgen_family;
 -	struct nft_table *table = NULL;
 -
 -	if (nla[NFTA_SET_TABLE] != NULL) {
 -		table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
 -					 genmask, nlpid);
 -		if (IS_ERR(table)) {
 -			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
 -			return PTR_ERR(table);
 -		}
 -	}
 -
 -	nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
 -	return 0;
 -}
 -
  static struct nft_set *nft_set_lookup(const struct nft_table *table,
    			      const struct nlattr *nla, u8 genmask)
  {
@@@ -4037,26 -4068,20 +4037,26 @@@ static int nf_tables_dump_sets_done(str
  static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
    		    const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_cur(info->net);
 +	u8 family = info->nfmsg->nfgen_family;
 +	struct nft_table *table = NULL;
    struct net *net = info->net;
    const struct nft_set *set;
    struct sk_buff *skb2;
    struct nft_ctx ctx;
    int err;
-	/* Verify existence before starting dump */
 -	err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
 -					genmask, 0);
 -	if (err < 0)
 -		return err;
 +	if (nla[NFTA_SET_TABLE]) {
 +		table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
 +					 genmask, 0);
 +		if (IS_ERR(table)) {
 +			NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
 +			return PTR_ERR(table);
 +		}
 +	}
 +
 +	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
    	struct netlink_dump_control c = {
@@@ -4071,12 -4096,12 +4071,12 @@@
    }
/* Only accept unspec with dump */
 -	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
 +	if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
    	return -EAFNOSUPPORT;
    if (!nla[NFTA_SET_TABLE])
    	return -EINVAL;
-	set = nft_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask);
 +	set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
    if (IS_ERR(set))
    	return PTR_ERR(set);
@@@ -4164,10 -4189,11 +4164,10 @@@ static int nf_tables_set_desc_parse(str
  static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
    		    const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    u32 ktype, dtype, flags, policy, gc_int, objtype;
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    const struct nft_set_ops *ops;
    struct nft_expr *expr = NULL;
    struct net *net = info->net;
@@@ -4338,13 -4364,45 +4338,45 @@@
    err = nf_tables_set_alloc_name(&ctx, set, name);
    kfree(name);
    if (err < 0)
- 		goto err_set_alloc_name;
+ 		goto err_set_name;
+ 
+ 	udata = NULL;
+ 	if (udlen) {
+ 		udata = set->data + size;
+ 		nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
+ 	}
+ 
+ 	INIT_LIST_HEAD(&set->bindings);
+ 	INIT_LIST_HEAD(&set->catchall_list);
+ 	set->table = table;
+ 	write_pnet(&set->net, net);
+ 	set->ops = ops;
+ 	set->ktype = ktype;
+ 	set->klen = desc.klen;
+ 	set->dtype = dtype;
+ 	set->objtype = objtype;
+ 	set->dlen = desc.dlen;
+ 	set->flags = flags;
+ 	set->size = desc.size;
+ 	set->policy = policy;
+ 	set->udlen = udlen;
+ 	set->udata = udata;
+ 	set->timeout = timeout;
+ 	set->gc_int = gc_int;
+ 
+ 	set->field_count = desc.field_count;
+ 	for (i = 0; i < desc.field_count; i++)
+ 		set->field_len[i] = desc.field_len[i];
+ 
+ 	err = ops->init(set, &desc, nla);
+ 	if (err < 0)
+ 		goto err_set_init;
if (nla[NFTA_SET_EXPR]) {
    	expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
    	if (IS_ERR(expr)) {
    		err = PTR_ERR(expr);
- 			goto err_set_alloc_name;
+ 			goto err_set_expr_alloc;
    	}
    	set->exprs[0] = expr;
    	set->num_exprs++;
@@@ -4355,75 -4413,44 +4387,44 @@@
if (!(flags & NFT_SET_EXPR)) {
    		err = -EINVAL;
- 			goto err_set_alloc_name;
+ 			goto err_set_expr_alloc;
    	}
    	i = 0;
    	nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
    		if (i == NFT_SET_EXPR_MAX) {
    			err = -E2BIG;
- 				goto err_set_init;
+ 				goto err_set_expr_alloc;
    		}
    		if (nla_type(tmp) != NFTA_LIST_ELEM) {
    			err = -EINVAL;
- 				goto err_set_init;
+ 				goto err_set_expr_alloc;
    		}
    		expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
    		if (IS_ERR(expr)) {
    			err = PTR_ERR(expr);
- 				goto err_set_init;
+ 				goto err_set_expr_alloc;
    		}
    		set->exprs[i++] = expr;
    		set->num_exprs++;
    	}
    }
- 	udata = NULL;
- 	if (udlen) {
- 		udata = set->data + size;
- 		nla_memcpy(udata, nla[NFTA_SET_USERDATA], udlen);
- 	}
- 
- 	INIT_LIST_HEAD(&set->bindings);
- 	INIT_LIST_HEAD(&set->catchall_list);
- 	set->table = table;
- 	write_pnet(&set->net, net);
- 	set->ops   = ops;
- 	set->ktype = ktype;
- 	set->klen  = desc.klen;
- 	set->dtype = dtype;
- 	set->objtype = objtype;
- 	set->dlen  = desc.dlen;
- 	set->flags = flags;
- 	set->size  = desc.size;
- 	set->policy = policy;
- 	set->udlen  = udlen;
- 	set->udata  = udata;
- 	set->timeout = timeout;
- 	set->gc_int = gc_int;
    set->handle = nf_tables_alloc_handle(table);
- 	set->field_count = desc.field_count;
- 	for (i = 0; i < desc.field_count; i++)
- 		set->field_len[i] = desc.field_len[i];
- 
- 	err = ops->init(set, &desc, nla);
- 	if (err < 0)
- 		goto err_set_init;
- 
    err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
    if (err < 0)
- 		goto err_set_trans;
+ 		goto err_set_expr_alloc;
list_add_tail_rcu(&set->list, &table->sets);
    table->use++;
    return 0;
- err_set_trans:
- 	ops->destroy(set);
- err_set_init:
+ err_set_expr_alloc:
    for (i = 0; i < set->num_exprs; i++)
    	nft_expr_destroy(&ctx, set->exprs[i]);
- err_set_alloc_name:
+ 
+ 	ops->destroy(set);
+ err_set_init:
    kfree(set->name);
  err_set_name:
    kvfree(set);
@@@ -4467,31 -4494,31 +4468,31 @@@ static void nft_set_destroy(const struc
  static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
    		    const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
    const struct nlattr *attr;
 +	struct nft_table *table;
    struct nft_set *set;
    struct nft_ctx ctx;
 -	int err;
-	if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
 +	if (info->nfmsg->nfgen_family == NFPROTO_UNSPEC)
    	return -EAFNOSUPPORT;
 -	if (nla[NFTA_SET_TABLE] == NULL)
 -		return -EINVAL;
-	err = nft_ctx_init_from_setattr(&ctx, net, skb, info->nlh, nla, extack,
 -					genmask, NETLINK_CB(skb).portid);
 -	if (err < 0)
 -		return err;
 +	table = nft_table_lookup(net, nla[NFTA_SET_TABLE], family,
 +				 genmask, NETLINK_CB(skb).portid);
 +	if (IS_ERR(table)) {
 +		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_TABLE]);
 +		return PTR_ERR(table);
 +	}
if (nla[NFTA_SET_HANDLE]) {
    	attr = nla[NFTA_SET_HANDLE];
 -		set = nft_set_lookup_byhandle(ctx.table, attr, genmask);
 +		set = nft_set_lookup_byhandle(table, attr, genmask);
    } else {
    	attr = nla[NFTA_SET_NAME];
 -		set = nft_set_lookup(ctx.table, attr, genmask);
 +		set = nft_set_lookup(table, attr, genmask);
    }
if (IS_ERR(set)) {
@@@ -4505,8 -4532,6 +4506,8 @@@
    	return -EBUSY;
    }
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 +
    return nft_delset(&ctx, set);
  }
@@@ -4708,6 -4733,28 +4709,6 @@@ static const struct nla_policy nft_set_
    [NFTA_SET_ELEM_LIST_SET_ID]	= { .type = NLA_U32 },
  };
-static int nft_ctx_init_from_elemattr(struct nft_ctx *ctx, struct net *net,
 -				      const struct sk_buff *skb,
 -				      const struct nlmsghdr *nlh,
 -				      const struct nlattr * const nla[],
 -				      struct netlink_ext_ack *extack,
 -				      u8 genmask, u32 nlpid)
 -{
 -	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
 -	int family = nfmsg->nfgen_family;
 -	struct nft_table *table;
 -
 -	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
 -				 genmask, nlpid);
 -	if (IS_ERR(table)) {
 -		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
 -		return PTR_ERR(table);
 -	}
 -
 -	nft_ctx_init(ctx, net, skb, nlh, family, table, NULL, nla);
 -	return 0;
 -}
 -
  static int nft_set_elem_expr_dump(struct sk_buff *skb,
    			  const struct nft_set *set,
    			  const struct nft_set_ext *ext)
@@@ -5165,27 -5212,21 +5166,27 @@@ static int nf_tables_getsetelem(struct 
  {
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_cur(info->net);
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
 +	struct nft_table *table;
    struct nft_set *set;
    struct nlattr *attr;
    struct nft_ctx ctx;
    int rem, err = 0;
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
 -					 genmask, NETLINK_CB(skb).portid);
 -	if (err < 0)
 -		return err;
 +	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
 +				 genmask, NETLINK_CB(skb).portid);
 +	if (IS_ERR(table)) {
 +		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
 +		return PTR_ERR(table);
 +	}
-	set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
 +	set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
    if (IS_ERR(set))
    	return PTR_ERR(set);
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 +
    if (info->nlh->nlmsg_flags & NLM_F_DUMP) {
    	struct netlink_dump_control c = {
    		.start = nf_tables_dump_set_start,
@@@ -5954,10 -5995,8 +5955,10 @@@ static int nf_tables_newsetelem(struct 
    struct nftables_pernet *nft_net = nft_pernet(info->net);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
    const struct nlattr *attr;
 +	struct nft_table *table;
    struct nft_set *set;
    struct nft_ctx ctx;
    int rem, err;
@@@ -5965,14 -6004,12 +5966,14 @@@
    if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
    	return -EINVAL;
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
 -					 genmask, NETLINK_CB(skb).portid);
 -	if (err < 0)
 -		return err;
 +	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
 +				 genmask, NETLINK_CB(skb).portid);
 +	if (IS_ERR(table)) {
 +		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
 +		return PTR_ERR(table);
 +	}
-	set = nft_set_lookup_global(net, ctx.table, nla[NFTA_SET_ELEM_LIST_SET],
 +	set = nft_set_lookup_global(net, table, nla[NFTA_SET_ELEM_LIST_SET],
    			    nla[NFTA_SET_ELEM_LIST_SET_ID], genmask);
    if (IS_ERR(set))
    	return PTR_ERR(set);
@@@ -5980,8 -6017,6 +5981,8 @@@
    if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
    	return -EBUSY;
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 +
    nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
    	err = nft_add_set_elem(&ctx, set, attr, info->nlh->nlmsg_flags);
    	if (err < 0)
@@@ -5989,7 -6024,7 +5990,7 @@@
    }
if (nft_net->validate_state == NFT_VALIDATE_DO)
 -		return nft_table_validate(net, ctx.table);
 +		return nft_table_validate(net, table);
return 0;
  }
@@@ -6227,29 -6262,23 +6228,29 @@@ static int nf_tables_delsetelem(struct 
  {
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
    const struct nlattr *attr;
 +	struct nft_table *table;
    struct nft_set *set;
    struct nft_ctx ctx;
    int rem, err = 0;
-	err = nft_ctx_init_from_elemattr(&ctx, net, skb, info->nlh, nla, extack,
 -					 genmask, NETLINK_CB(skb).portid);
 -	if (err < 0)
 -		return err;
 +	table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
 +				 genmask, NETLINK_CB(skb).portid);
 +	if (IS_ERR(table)) {
 +		NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
 +		return PTR_ERR(table);
 +	}
-	set = nft_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
 +	set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
    if (IS_ERR(set))
    	return PTR_ERR(set);
    if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
    	return -EBUSY;
+	nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
 +
    if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
    	return nft_set_flush(&ctx, set, genmask);
@@@ -6517,10 -6546,11 +6518,10 @@@ err_free_trans
  static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
    		    const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 +	u8 family = info->nfmsg->nfgen_family;
    const struct nft_object_type *type;
 -	int family = nfmsg->nfgen_family;
    struct net *net = info->net;
    struct nft_table *table;
    struct nft_object *obj;
@@@ -6772,9 -6802,10 +6773,9 @@@ static int nf_tables_dump_obj_done(stru
  static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info,
    		    const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_cur(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    const struct nft_table *table;
    struct net *net = info->net;
    struct nft_object *obj;
@@@ -6861,9 -6892,10 +6862,9 @@@ static void nft_obj_destroy(const struc
  static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
    		    const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    struct net *net = info->net;
    const struct nlattr *attr;
    struct nft_table *table;
@@@ -7291,11 -7323,12 +7292,11 @@@ static int nf_tables_newflowtable(struc
    			  const struct nfnl_info *info,
    			  const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    struct nft_flowtable_hook flowtable_hook;
    u8 genmask = nft_genmask_next(info->net);
 +	u8 family = info->nfmsg->nfgen_family;
    const struct nf_flowtable_type *type;
 -	int family = nfmsg->nfgen_family;
    struct nft_flowtable *flowtable;
    struct nft_hook *hook, *next;
    struct net *net = info->net;
@@@ -7479,9 -7512,10 +7480,9 @@@ static int nf_tables_delflowtable(struc
    			  const struct nfnl_info *info,
    			  const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    struct netlink_ext_ack *extack = info->extack;
    u8 genmask = nft_genmask_next(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    struct nft_flowtable *flowtable;
    struct net *net = info->net;
    const struct nlattr *attr;
@@@ -7673,8 -7707,9 +7674,8 @@@ static int nf_tables_getflowtable(struc
    			  const struct nfnl_info *info,
    			  const struct nlattr * const nla[])
  {
 -	const struct nfgenmsg *nfmsg = nlmsg_data(info->nlh);
    u8 genmask = nft_genmask_cur(info->net);
 -	int family = nfmsg->nfgen_family;
 +	u8 family = info->nfmsg->nfgen_family;
    struct nft_flowtable *flowtable;
    const struct nft_table *table;
    struct net *net = info->net;
diff --combined net/packet/af_packet.c
index 71dd6b910f7c,330ba68828e7..77b0cdab3810
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@@ -2683,7 -2683,7 +2683,7 @@@ static int tpacket_snd(struct packet_so
    }
    if (likely(saddr == NULL)) {
    	dev	= packet_cached_dev_get(po);
- 		proto	= po->num;
+ 		proto	= READ_ONCE(po->num);
    } else {
    	err = -EINVAL;
    	if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@@ -2896,7 -2896,7 +2896,7 @@@ static int packet_snd(struct socket *so
if (likely(saddr == NULL)) {
    	dev	= packet_cached_dev_get(po);
- 		proto	= po->num;
+ 		proto	= READ_ONCE(po->num);
    } else {
    	err = -EINVAL;
    	if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@@ -3034,10 -3034,13 +3034,13 @@@ static int packet_sendmsg(struct socke
    struct sock *sk = sock->sk;
    struct packet_sock *po = pkt_sk(sk);
- 	if (po->tx_ring.pg_vec)
+ 	/* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
+ 	 * tpacket_snd() will redo the check safely.
+ 	 */
+ 	if (data_race(po->tx_ring.pg_vec))
    	return tpacket_snd(po, msg);
- 	else
- 		return packet_snd(sock, msg, len);
+ 
+ 	return packet_snd(sock, msg, len);
  }
/*
@@@ -3168,7 -3171,7 +3171,7 @@@ static int packet_do_bind(struct sock *
    		/* prevents packet_notifier() from calling
    		 * register_prot_hook()
    		 */
- 			po->num = 0;
+ 			WRITE_ONCE(po->num, 0);
    		__unregister_prot_hook(sk, true);
    		rcu_read_lock();
    		dev_curr = po->prot_hook.dev;
@@@ -3178,17 -3181,17 +3181,17 @@@
    	}
BUG_ON(po->running);
- 		po->num = proto;
+ 		WRITE_ONCE(po->num, proto);
    	po->prot_hook.type = proto;
if (unlikely(unlisted)) {
    		dev_put(dev);
    		po->prot_hook.dev = NULL;
- 			po->ifindex = -1;
+ 			WRITE_ONCE(po->ifindex, -1);
    		packet_cached_dev_reset(po);
    	} else {
    		po->prot_hook.dev = dev;
- 			po->ifindex = dev ? dev->ifindex : 0;
+ 			WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
    		packet_cached_dev_assign(po, dev);
    	}
    }
@@@ -3502,7 -3505,7 +3505,7 @@@ static int packet_getname_spkt(struct s
    uaddr->sa_family = AF_PACKET;
    memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data));
    rcu_read_lock();
- 	dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
+ 	dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
    if (dev)
    	strlcpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data));
    rcu_read_unlock();
@@@ -3517,16 -3520,18 +3520,18 @@@ static int packet_getname(struct socke
    struct sock *sk = sock->sk;
    struct packet_sock *po = pkt_sk(sk);
    DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
+ 	int ifindex;
if (peer)
    	return -EOPNOTSUPP;
+ 	ifindex = READ_ONCE(po->ifindex);
    sll->sll_family = AF_PACKET;
- 	sll->sll_ifindex = po->ifindex;
- 	sll->sll_protocol = po->num;
+ 	sll->sll_ifindex = ifindex;
+ 	sll->sll_protocol = READ_ONCE(po->num);
    sll->sll_pkttype = 0;
    rcu_read_lock();
- 	dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
+ 	dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
    if (dev) {
    	sll->sll_hatype = dev->type;
    	sll->sll_halen = dev->addr_len;
@@@ -3929,9 -3934,12 +3934,9 @@@ packet_setsockopt(struct socket *sock, 
    		return -EFAULT;
lock_sock(sk);
 -		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
 -			ret = -EBUSY;
 -		} else {
 +		if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
    		po->tp_tx_has_off = !!val;
 -			ret = 0;
 -		}
 +
    	release_sock(sk);
    	return 0;
    }
@@@ -4102,7 -4110,7 +4107,7 @@@ static int packet_notifier(struct notif
    			}
    			if (msg == NETDEV_UNREGISTER) {
    				packet_cached_dev_reset(po);
- 					po->ifindex = -1;
+ 					WRITE_ONCE(po->ifindex, -1);
    				if (po->prot_hook.dev)
    					dev_put(po->prot_hook.dev);
    				po->prot_hook.dev = NULL;
@@@ -4408,7 -4416,7 +4413,7 @@@ static int packet_set_ring(struct sock 
    was_running = po->running;
    num = po->num;
    if (was_running) {
- 		po->num = 0;
+ 		WRITE_ONCE(po->num, 0);
    	__unregister_prot_hook(sk, false);
    }
    spin_unlock(&po->bind_lock);
@@@ -4443,7 -4451,7 +4448,7 @@@
spin_lock(&po->bind_lock);
    if (was_running) {
- 		po->num = num;
+ 		WRITE_ONCE(po->num, num);
    	register_prot_hook(sk);
    }
    spin_unlock(&po->bind_lock);
@@@ -4613,8 -4621,8 +4618,8 @@@ static int packet_seq_show(struct seq_f
    		   s,
    		   refcount_read(&s->sk_refcnt),
    		   s->sk_type,
- 			   ntohs(po->num),
- 			   po->ifindex,
+ 			   ntohs(READ_ONCE(po->num)),
+ 			   READ_ONCE(po->ifindex),
    		   po->running,
    		   atomic_read(&s->sk_rmem_alloc),
    		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
diff --combined net/unix/af_unix.c
index 4d4f24cbd86b,5d1192ceb139..c9dfec7b71e7
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@@ -535,12 -535,14 +535,14 @@@ static void unix_release_sock(struct so
    u->path.mnt = NULL;
    state = sk->sk_state;
    sk->sk_state = TCP_CLOSE;
+ 
+ 	skpair = unix_peer(sk);
+ 	unix_peer(sk) = NULL;
+ 
    unix_state_unlock(sk);
wake_up_interruptible_all(&u->peer_wait);
- 	skpair = unix_peer(sk);
- 
    if (skpair != NULL) {
    	if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
    		unix_state_lock(skpair);
@@@ -555,7 -557,6 +557,6 @@@
unix_dgram_peer_wake_disconnect(sk, skpair);
    	sock_put(skpair); /* It may now die */
- 		unix_peer(sk) = NULL;
    }
/* Try to flush out this socket. Throw out buffers at least */
@@@ -1392,7 -1393,7 +1393,7 @@@ restart
unix_state_unlock(sk);
-	/* take ten and and send info to listening sock */
 +	/* take ten and send info to listening sock */
    spin_lock(&other->sk_receive_queue.lock);
    __skb_queue_tail(&other->sk_receive_queue, skb);
    spin_unlock(&other->sk_receive_queue.lock);
diff --combined tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 69351c3eb68c,2b495dc8d78e..2484fb6a9a8d
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@@ -3,7 -3,7 +3,7 @@@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:f:t"
 +optstring="S:R:d:e:l:r:h4cm:f:tC"
  ret=0
  sin=""
  sout=""
@@@ -22,7 -22,6 +22,7 @@@ sndbuf=
  rcvbuf=0
  options_log=true
  do_tcp=0
 +checksum=false
  filesize=0
if [ $tc_loss -eq 100 ];then
@@@ -48,7 -47,6 +48,7 @@@ usage() 
    echo -e "\t-R: set rcvbuf value (default: use kernel default)"
    echo -e "\t-m: test mode (poll, sendfile; default: poll)"
    echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
 +	echo -e "\t-C: enable the MPTCP data checksum"
  }
while getopts "$optstring" option;do
@@@ -106,9 -104,6 +106,9 @@@
    "t")
    	do_tcp=$((do_tcp+1))
    	;;
 +	"C")
 +		checksum=true
 +		;;
    "?")
    	usage $0
    	exit 1
@@@ -202,15 -197,6 +202,12 @@@ ip -net "$ns4" link set ns4eth3 u
  ip -net "$ns4" route add default via 10.0.3.2
  ip -net "$ns4" route add default via dead:beef:3::2
- # use TCP syn cookies, even if no flooding was detected.
- ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
- 
 +if $checksum; then
 +	for i in "$ns1" "$ns2" "$ns3" "$ns4";do
 +		ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
 +	done
 +fi
 +
  set_ethtool_flags() {
    local ns="$1"
    local dev="$2"
@@@ -748,6 -734,14 +745,14 @@@ for sender in $ns1 $ns2 $ns3 $ns4;d
    	exit $ret
    fi
+ 	# ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+ 	# mptcp syncookie support.
+ 	if [ $sender = $ns1 ]; then
+ 		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+ 	else
+ 		ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
+ 	fi
+ 
    run_tests "$ns2" $sender 10.0.1.2
    run_tests "$ns2" $sender dead:beef:1::2
    run_tests "$ns2" $sender 10.0.2.1
-- 
LinuxNextTracking