[linux-next] LinuxNextTracking branch, master, updated. next-20210412

12 Apr 2021

The following commit has been merged in the master branch:
commit 8859a44ea0df92bccdc942ef15781ebbfe0ad9f3
Merge: 6c5e6b4ccc1bb9ac56579a9aed25d517d2318be6 4e04e7513b0fa2fe8966a1c83fb473f1667e2810
Author: Jakub Kicinski kuba@kernel.org
Date:   Fri Apr 9 20:46:01 2021 -0700
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Conflicts:
MAINTAINERS
     - keep Chandrasekar
    drivers/net/ethernet/mellanox/mlx5/core/en_main.c
     - simple fix + trust the code re-added to param.c in -next is fine
    include/linux/bpf.h
     - trivial
    include/linux/ethtool.h
     - trivial, fix kdoc while at it
    include/linux/skmsg.h
     - move to relevant place in tcp.c, comment re-wrapped
    net/core/skmsg.c
     - add the sk = sk // sk = NULL around calls
    net/tipc/crypto.c
     - trivial
Signed-off-by: Jakub Kicinski kuba@kernel.org
diff --combined Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
index 2a3be0f9a1a1,13c26f23a820..2f46e45dcd60

--- a/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,bcm4908-enet.yaml
@@@ -22,25 -22,17 +22,25 @@@ properties
      maxItems: 1
interrupts:
 -    description: RX interrupt
 +    minItems: 1
 +    maxItems: 2
 +    items:
 +      - description: RX interrupt
 +      - description: TX interrupt
interrupt-names:
 -    const: rx
 +    minItems: 1
 +    maxItems: 2
 +    items:
 +      - const: rx
 +      - const: tx
required:
    - reg
    - interrupts
    - interrupt-names
- additionalProperties: false
+ unevaluatedProperties: false
examples:
    - |
@@@ -51,7 -43,6 +51,7 @@@
          compatible = "brcm,bcm4908-enet";
          reg = <0x80002000 0x1000>;
-        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>;
 -        interrupt-names = "rx";
 +        interrupts = <GIC_SPI 86 IRQ_TYPE_LEVEL_HIGH>,
 +                     <GIC_SPI 87 IRQ_TYPE_LEVEL_HIGH>;
 +        interrupt-names = "rx", "tx";
      };
diff --combined Documentation/networking/ethtool-netlink.rst
index fd84f4ed898a,dc03ff884541..ce4a69f8308f
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@@ -208,8 -208,6 +208,8 @@@ Userspace to kernel
    ``ETHTOOL_MSG_CABLE_TEST_ACT``        action start cable test
    ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT``    action start raw TDR cable test
    ``ETHTOOL_MSG_TUNNEL_INFO_GET``       get tunnel offload info
 +  ``ETHTOOL_MSG_FEC_GET``               get FEC settings
 +  ``ETHTOOL_MSG_FEC_SET``               set FEC settings
    ===================================== ================================
Kernel to userspace:
@@@ -244,8 -242,6 +244,8 @@@
    ``ETHTOOL_MSG_CABLE_TEST_NTF``        Cable test results
    ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF``    Cable test TDR results
    ``ETHTOOL_MSG_TUNNEL_INFO_GET_REPLY`` tunnel offload info
 +  ``ETHTOOL_MSG_FEC_GET_REPLY``         FEC settings
 +  ``ETHTOOL_MSG_FEC_NTF``               FEC settings
    ===================================== =================================
``GET`` requests are sent by userspace applications to retrieve device
@@@ -980,9 -976,9 +980,9 @@@ constraints on coalescing parameters an
PAUSE_GET
- ============
+ =========
- Gets channel counts like ``ETHTOOL_GPAUSE`` ioctl request.
+ Gets pause frame settings like ``ETHTOOL_GPAUSEPARAM`` ioctl request.
Request contents:
@@@ -1011,7 -1007,7 +1011,7 @@@ the statistics in the following structu
  Each member has a corresponding attribute defined.
PAUSE_SET
- ============
+ =========
Sets pause parameters like ``ETHTOOL_GPAUSEPARAM`` ioctl request.
@@@ -1028,7 -1024,7 +1028,7 @@@ Request contents
  EEE_GET
  =======
- Gets channel counts like ``ETHTOOL_GEEE`` ioctl request.
+ Gets Energy Efficient Ethernet settings like ``ETHTOOL_GEEE`` ioctl request.
Request contents:
@@@ -1058,7 -1054,7 +1058,7 @@@ first 32 are provided by the ``ethtool_
  EEE_SET
  =======
- Sets pause parameters like ``ETHTOOL_GEEEPARAM`` ioctl request.
+ Sets Energy Efficient Ethernet parameters like ``ETHTOOL_SEEE`` ioctl request.
Request contents:
@@@ -1284,60 -1280,6 +1284,60 @@@ Kernel response contents
  For UDP tunnel table empty ``ETHTOOL_A_TUNNEL_UDP_TABLE_TYPES`` indicates that
  the table contains static entries, hard-coded by the NIC.
+FEC_GET
 +=======
 +
 +Gets FEC configuration and state like ``ETHTOOL_GFECPARAM`` ioctl request.
 +
 +Request contents:
 +
 +  =====================================  ======  ==========================
 +  ``ETHTOOL_A_FEC_HEADER``               nested  request header
 +  =====================================  ======  ==========================
 +
 +Kernel response contents:
 +
 +  =====================================  ======  ==========================
 +  ``ETHTOOL_A_FEC_HEADER``               nested  request header
 +  ``ETHTOOL_A_FEC_MODES``                bitset  configured modes
 +  ``ETHTOOL_A_FEC_AUTO``                 bool    FEC mode auto selection
 +  ``ETHTOOL_A_FEC_ACTIVE``               u32     index of active FEC mode
 +  =====================================  ======  ==========================
 +
 +``ETHTOOL_A_FEC_ACTIVE`` is the bit index of the FEC link mode currently
 +active on the interface. This attribute may not be present if device does
 +not support FEC.
 +
 +``ETHTOOL_A_FEC_MODES`` and ``ETHTOOL_A_FEC_AUTO`` are only meaningful when
 +autonegotiation is disabled. If ``ETHTOOL_A_FEC_AUTO`` is non-zero driver will
 +select the FEC mode automatically based on the parameters of the SFP module.
 +This is equivalent to the ``ETHTOOL_FEC_AUTO`` bit of the ioctl interface.
 +``ETHTOOL_A_FEC_MODES`` carry the current FEC configuration using link mode
 +bits (rather than old ``ETHTOOL_FEC_*`` bits).
 +
 +FEC_SET
 +=======
 +
 +Sets FEC parameters like ``ETHTOOL_SFECPARAM`` ioctl request.
 +
 +Request contents:
 +
 +  =====================================  ======  ==========================
 +  ``ETHTOOL_A_FEC_HEADER``               nested  request header
 +  ``ETHTOOL_A_FEC_MODES``                bitset  configured modes
 +  ``ETHTOOL_A_FEC_AUTO``                 bool    FEC mode auto selection
 +  =====================================  ======  ==========================
 +
 +``FEC_SET`` is only meaningful when autonegotiation is disabled. Otherwise
 +FEC mode is selected as part of autonegotiation.
 +
 +``ETHTOOL_A_FEC_MODES`` selects which FEC mode should be used. It's recommended
 +to set only one bit, if multiple bits are set driver may choose between them
 +in an implementation specific way.
 +
 +``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP
 +module parameters. This does not mean autonegotiation.
 +
  Request translation
  ===================
@@@ -1431,9 -1373,9 +1431,9 @@@ are netlink only
                                        ``ETHTOOL_MSG_LINKMODES_SET``
    ``ETHTOOL_PHY_GTUNABLE``            n/a
    ``ETHTOOL_PHY_STUNABLE``            n/a
 -  ``ETHTOOL_GFECPARAM``               n/a
 -  ``ETHTOOL_SFECPARAM``               n/a
 -  n/a                                 ''ETHTOOL_MSG_CABLE_TEST_ACT''
 -  n/a                                 ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT''
 +  ``ETHTOOL_GFECPARAM``               ``ETHTOOL_MSG_FEC_GET``
 +  ``ETHTOOL_SFECPARAM``               ``ETHTOOL_MSG_FEC_SET``
 +  n/a                                 ``ETHTOOL_MSG_CABLE_TEST_ACT``
 +  n/a                                 ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT``
    n/a                                 ``ETHTOOL_MSG_TUNNEL_INFO_GET``
    =================================== =====================================
diff --combined MAINTAINERS
index 3ea9539821b5,ccd9228350cf..795b9941c151
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -1530,7 -1530,6 +1530,7 @@@ F:	Documentation/devicetree/bindings/dm
  F:	Documentation/devicetree/bindings/i2c/i2c-owl.yaml
  F:	Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml
  F:	Documentation/devicetree/bindings/mmc/owl-mmc.yaml
 +F:	Documentation/devicetree/bindings/net/actions,owl-emac.yaml
  F:	Documentation/devicetree/bindings/pinctrl/actions,*
  F:	Documentation/devicetree/bindings/power/actions,owl-sps.txt
  F:	Documentation/devicetree/bindings/timer/actions,owl-timer.txt
@@@ -1543,7 -1542,6 +1543,7 @@@ F:	drivers/dma/owl-dma.
  F:	drivers/i2c/busses/i2c-owl.c
  F:	drivers/irqchip/irq-owl-sirq.c
  F:	drivers/mmc/host/owl-mmc.c
 +F:	drivers/net/ethernet/actions/
  F:	drivers/pinctrl/actions/*
  F:	drivers/soc/actions/
  F:	include/dt-bindings/power/owl-*
@@@ -2491,7 -2489,7 +2491,7 @@@ N:	sc27x
  N:	sc2731
ARM/STI ARCHITECTURE
- M:	Patrice Chotard patrice.chotard@st.com
+ M:	Patrice Chotard patrice.chotard@foss.st.com
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  W:	http://www.stlinux.com
@@@ -2524,7 -2522,7 +2524,7 @@@ F:	include/linux/remoteproc/st_slim_rpr
ARM/STM32 ARCHITECTURE
  M:	Maxime Coquelin mcoquelin.stm32@gmail.com
- M:	Alexandre Torgue alexandre.torgue@st.com
+ M:	Alexandre Torgue alexandre.torgue@foss.st.com
  L:	linux-stm32@st-md-mailman.stormreply.com (moderated for non-subscribers)
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
@@@ -3117,7 -3115,7 +3117,7 @@@ C:	irc://irc.oftc.net/bcach
  F:	drivers/md/bcache/
BDISP ST MEDIA DRIVER
- M:	Fabien Dessenne fabien.dessenne@st.com
+ M:	Fabien Dessenne fabien.dessenne@foss.st.com
  L:	linux-media@vger.kernel.org
  S:	Supported
  W:	https://linuxtv.org
@@@ -3235,7 -3233,6 +3235,7 @@@ T:	git git://git.kernel.org/pub/scm/lin
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
  F:	Documentation/bpf/
  F:	Documentation/networking/filter.rst
 +F:	Documentation/userspace-api/ebpf/
  F:	arch/*/net/*
  F:	include/linux/bpf*
  F:	include/linux/filter.h
@@@ -3250,7 -3247,6 +3250,7 @@@ F:	net/core/filter.
  F:	net/sched/act_bpf.c
  F:	net/sched/cls_bpf.c
  F:	samples/bpf/
 +F:	scripts/bpf_doc.py
  F:	tools/bpf/
  F:	tools/lib/bpf/
  F:	tools/testing/selftests/bpf/
@@@ -3679,7 -3675,7 +3679,7 @@@ M:	bcm-kernel-feedback-list@broadcom.co
  L:	linux-pm@vger.kernel.org
  S:	Maintained
  T:	git git://github.com/broadcom/stblinux.git
- F:	drivers/soc/bcm/bcm-pmb.c
+ F:	drivers/soc/bcm/bcm63xx/bcm-pmb.c
  F:	include/dt-bindings/soc/bcm-pmb.h
BROADCOM SPECIFIC AMBA DRIVER (BCMA)
@@@ -5084,7 -5080,7 +5084,7 @@@ S:	Maintaine
  F:	drivers/platform/x86/dell/dell-wmi.c
DELTA ST MEDIA DRIVER
- M:	Hugues Fruchet hugues.fruchet@st.com
+ M:	Hugues Fruchet hugues.fruchet@foss.st.com
  L:	linux-media@vger.kernel.org
  S:	Supported
  W:	https://linuxtv.org
@@@ -5475,11 -5471,11 +5475,11 @@@ F:	drivers/net/ethernet/freescale/dpaa2
  F:	drivers/net/ethernet/freescale/dpaa2/dpni*
DPAA2 ETHERNET SWITCH DRIVER
 -M:	Ioana Radulescu ruxandra.radulescu@nxp.com
  M:	Ioana Ciornei ioana.ciornei@nxp.com
 -L:	linux-kernel@vger.kernel.org
 +L:	netdev@vger.kernel.org
  S:	Maintained
 -F:	drivers/staging/fsl-dpaa2/ethsw
 +F:	drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
 +F:	drivers/net/ethernet/freescale/dpaa2/dpsw*
DPT_I2O SCSI RAID DRIVER
  M:	Adaptec OEM Raid Solutions aacraid@microsemi.com
@@@ -6010,7 -6006,6 +6010,6 @@@ F:	drivers/gpu/drm/rockchip
DRM DRIVERS FOR STI
  M:	Benjamin Gaignard benjamin.gaignard@linaro.org
- M:	Vincent Abriou vincent.abriou@st.com
  L:	dri-devel@lists.freedesktop.org
  S:	Maintained
  T:	git git://anongit.freedesktop.org/drm/drm-misc
@@@ -6018,10 -6013,9 +6017,9 @@@ F:	Documentation/devicetree/bindings/di
  F:	drivers/gpu/drm/sti
DRM DRIVERS FOR STM
- M:	Yannick Fertre yannick.fertre@st.com
- M:	Philippe Cornu philippe.cornu@st.com
+ M:	Yannick Fertre yannick.fertre@foss.st.com
+ M:	Philippe Cornu philippe.cornu@foss.st.com
  M:	Benjamin Gaignard benjamin.gaignard@linaro.org
- M:	Vincent Abriou vincent.abriou@st.com
  L:	dri-devel@lists.freedesktop.org
  S:	Maintained
  T:	git git://anongit.freedesktop.org/drm/drm-misc
@@@ -7480,8 -7474,9 +7478,9 @@@ F:	include/uapi/asm-generic
  GENERIC PHY FRAMEWORK
  M:	Kishon Vijay Abraham I kishon@ti.com
  M:	Vinod Koul vkoul@kernel.org
- L:	linux-kernel@vger.kernel.org
+ L:	linux-phy@lists.infradead.org
  S:	Supported
+ Q:	https://patchwork.kernel.org/project/linux-phy/list/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git
  F:	Documentation/devicetree/bindings/phy/
  F:	drivers/phy/
@@@ -8234,7 -8229,7 +8233,7 @@@ F:	include/linux/hugetlb.
  F:	mm/hugetlb.c
HVA ST MEDIA DRIVER
- M:	Jean-Christophe Trotin jean-christophe.trotin@st.com
+ M:	Jean-Christophe Trotin jean-christophe.trotin@foss.st.com
  L:	linux-media@vger.kernel.org
  S:	Supported
  W:	https://linuxtv.org
@@@ -10034,7 -10029,6 +10033,6 @@@ F:	scripts/leaking_addresses.p
LED SUBSYSTEM
  M:	Pavel Machek pavel@ucw.cz
- R:	Dan Murphy dmurphy@ti.com
  L:	linux-leds@vger.kernel.org
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git
@@@ -10695,7 -10689,6 +10693,7 @@@ F:	include/linux/mv643xx.
MARVELL MV88X3310 PHY DRIVER
  M:	Russell King linux@armlinux.org.uk
 +M:	Marek Behun marek.behun@nic.cz
  L:	netdev@vger.kernel.org
  S:	Maintained
  F:	drivers/net/phy/marvell10g.c
@@@ -10911,7 -10904,7 +10909,7 @@@ T:	git git://linuxtv.org/media_tree.gi
  F:	drivers/media/radio/radio-maxiradio*
MCAN MMIO DEVICE DRIVER
 -M:	Pankaj Sharma pankj.sharma@samsung.com
 +M:	Chandrasekar Ramakrishnan rcsekar@samsung.com
  L:	linux-can@vger.kernel.org
  S:	Maintained
  F:	Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
@@@ -11171,7 -11164,7 +11169,7 @@@ T:	git git://linuxtv.org/media_tree.gi
  F:	drivers/media/dvb-frontends/stv6111*
MEDIA DRIVERS FOR STM32 - DCMI
- M:	Hugues Fruchet hugues.fruchet@st.com
+ M:	Hugues Fruchet hugues.fruchet@foss.st.com
  L:	linux-media@vger.kernel.org
  S:	Supported
  T:	git git://linuxtv.org/media_tree.git
@@@ -14857,6 -14850,14 +14855,14 @@@ L:	linux-arm-msm@vger.kernel.or
  S:	Maintained
  F:	drivers/iommu/arm/arm-smmu/qcom_iommu.c
+ QUALCOMM IPC ROUTER (QRTR) DRIVER
+ M:	Manivannan Sadhasivam manivannan.sadhasivam@linaro.org
+ L:	linux-arm-msm@vger.kernel.org
+ S:	Maintained
+ F:	include/trace/events/qrtr.h
+ F:	include/uapi/linux/qrtr.h
+ F:	net/qrtr/
+ 
  QUALCOMM IPCC MAILBOX DRIVER
  M:	Manivannan Sadhasivam manivannan.sadhasivam@linaro.org
  L:	linux-arm-msm@vger.kernel.org
@@@ -15206,6 -15207,7 +15212,7 @@@ F:	fs/reiserfs
  REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
  M:	Ohad Ben-Cohen ohad@wizery.com
  M:	Bjorn Andersson bjorn.andersson@linaro.org
+ M:	Mathieu Poirier mathieu.poirier@linaro.org
  L:	linux-remoteproc@vger.kernel.org
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git rproc-next
@@@ -15219,6 -15221,7 +15226,7 @@@ F:	include/linux/remoteproc
  REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM
  M:	Ohad Ben-Cohen ohad@wizery.com
  M:	Bjorn Andersson bjorn.andersson@linaro.org
+ M:	Mathieu Poirier mathieu.poirier@linaro.org
  L:	linux-remoteproc@vger.kernel.org
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git rpmsg-next
@@@ -15635,8 -15638,8 +15643,8 @@@ F:	Documentation/s390/pci.rs
S390 VFIO AP DRIVER
  M:	Tony Krowiak akrowiak@linux.ibm.com
- M:	Pierre Morel pmorel@linux.ibm.com
  M:	Halil Pasic pasic@linux.ibm.com
+ M:	Jason Herne jjherne@linux.ibm.com
  L:	linux-s390@vger.kernel.org
  S:	Supported
  W:	http://www.ibm.com/developerworks/linux/linux390/
@@@ -15648,6 -15651,7 +15656,7 @@@ F:	drivers/s390/crypto/vfio_ap_private.
  S390 VFIO-CCW DRIVER
  M:	Cornelia Huck cohuck@redhat.com
  M:	Eric Farman farman@linux.ibm.com
+ M:	Matthew Rosato mjrosato@linux.ibm.com
  R:	Halil Pasic pasic@linux.ibm.com
  L:	linux-s390@vger.kernel.org
  L:	kvm@vger.kernel.org
@@@ -15658,6 -15662,7 +15667,7 @@@ F:	include/uapi/linux/vfio_ccw.
S390 VFIO-PCI DRIVER
  M:	Matthew Rosato mjrosato@linux.ibm.com
+ M:	Eric Farman farman@linux.ibm.com
  L:	linux-s390@vger.kernel.org
  L:	kvm@vger.kernel.org
  S:	Supported
@@@ -16944,7 -16949,8 +16954,8 @@@ F:	Documentation/devicetree/bindings/me
  F:	drivers/media/i2c/st-mipid02.c
ST STM32 I2C/SMBUS DRIVER
- M:	Pierre-Yves MORDRET pierre-yves.mordret@st.com
+ M:	Pierre-Yves MORDRET pierre-yves.mordret@foss.st.com
+ M:	Alain Volmat alain.volmat@foss.st.com
  L:	linux-i2c@vger.kernel.org
  S:	Maintained
  F:	drivers/i2c/busses/i2c-stm32*
@@@ -17069,7 -17075,7 +17080,7 @@@ F:	kernel/jump_label.
  F:	kernel/static_call.c
STI AUDIO (ASoC) DRIVERS
- M:	Arnaud Pouliquen arnaud.pouliquen@st.com
+ M:	Arnaud Pouliquen arnaud.pouliquen@foss.st.com
  L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:	Maintained
  F:	Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
@@@ -17089,15 -17095,15 +17100,15 @@@ T:	git git://linuxtv.org/media_tree.gi
  F:	drivers/media/usb/stk1160/
STM32 AUDIO (ASoC) DRIVERS
- M:	Olivier Moysan olivier.moysan@st.com
- M:	Arnaud Pouliquen arnaud.pouliquen@st.com
+ M:	Olivier Moysan olivier.moysan@foss.st.com
+ M:	Arnaud Pouliquen arnaud.pouliquen@foss.st.com
  L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:	Maintained
  F:	Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml
  F:	sound/soc/stm/
STM32 TIMER/LPTIMER DRIVERS
- M:	Fabrice Gasnier fabrice.gasnier@st.com
+ M:	Fabrice Gasnier fabrice.gasnier@foss.st.com
  S:	Maintained
  F:	Documentation/ABI/testing/*timer-stm32
  F:	Documentation/devicetree/bindings/*/*stm32-*timer*
@@@ -17107,7 -17113,7 +17118,7 @@@ F:	include/linux/*/stm32-*tim
STMMAC ETHERNET DRIVER
  M:	Giuseppe Cavallaro peppe.cavallaro@st.com
- M:	Alexandre Torgue alexandre.torgue@st.com
+ M:	Alexandre Torgue alexandre.torgue@foss.st.com
  M:	Jose Abreu joabreu@synopsys.com
  L:	netdev@vger.kernel.org
  S:	Supported
@@@ -17849,7 -17855,6 +17860,6 @@@ S:	Maintaine
  F:	drivers/thermal/ti-soc-thermal/
TI BQ27XXX POWER SUPPLY DRIVER
- R:	Dan Murphy dmurphy@ti.com
  F:	drivers/power/supply/bq27xxx_battery.c
  F:	drivers/power/supply/bq27xxx_battery_i2c.c
  F:	include/linux/power/bq27xxx_battery.h
@@@ -17983,6 -17988,12 +17993,6 @@@ L:	alsa-devel@alsa-project.org (moderat
  S:	Odd Fixes
  F:	sound/soc/codecs/tas571x*
-TI TCAN4X5X DEVICE DRIVER
 -L:	linux-can@vger.kernel.org
 -S:	Maintained
 -F:	Documentation/devicetree/bindings/net/can/tcan4x5x.txt
 -F:	drivers/net/can/m_can/tcan4x5x*
 -
  TI TRF7970A NFC DRIVER
  M:	Mark Greer mgreer@animalcreek.com
  L:	linux-wireless@vger.kernel.org
diff --combined arch/x86/net/bpf_jit_comp.c
index 9eead60f0301,7f1b3a862e14..1a467b2a5467
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@@ -1689,7 -1689,16 +1689,16 @@@ emit_jmp
    	}
if (image) {
- 			if (unlikely(proglen + ilen > oldproglen)) {
+ 			/*
+ 			 * When populating the image, assert that:
+ 			 *
+ 			 *  i) We do not write beyond the allocated space, and
+ 			 * ii) addrs[i] did not change from the prior run, in order
+ 			 *     to validate assumptions made for computing branch
+ 			 *     displacements.
+ 			 */
+ 			if (unlikely(proglen + ilen > oldproglen ||
+ 				     proglen + ilen != addrs[i])) {
    			pr_err("bpf_jit: fatal error\n");
    			return -EFAULT;
    		}
@@@ -2346,8 -2355,3 +2355,8 @@@ out
    				   tmp : orig_prog);
    return prog;
  }
 +
 +bool bpf_jit_supports_kfunc_call(void)
 +{
 +	return true;
 +}
diff --combined arch/x86/net/bpf_jit_comp32.c
index 0a7a2870f111,6a99def7d315..3da88ded6ee3
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@@ -1390,19 -1390,6 +1390,19 @@@ static inline void emit_push_r64(const 
    *pprog = prog;
  }
+static void emit_push_r32(const u8 src[], u8 **pprog)
 +{
 +	u8 *prog = *pprog;
 +	int cnt = 0;
 +
 +	/* mov ecx,dword ptr [ebp+off] */
 +	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
 +	/* push ecx */
 +	EMIT1(0x51);
 +
 +	*pprog = prog;
 +}
 +
  static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
  {
    u8 jmp_cond;
@@@ -1472,174 -1459,6 +1472,174 @@@
    return jmp_cond;
  }
+/* i386 kernel compiles with "-mregparm=3".  From gcc document:
 + *
 + * ==== snippet ====
 + * regparm (number)
 + *	On x86-32 targets, the regparm attribute causes the compiler
 + *	to pass arguments number one to (number) if they are of integral
 + *	type in registers EAX, EDX, and ECX instead of on the stack.
 + *	Functions that take a variable number of arguments continue
 + *	to be passed all of their arguments on the stack.
 + * ==== snippet ====
 + *
 + * The first three args of a function will be considered for
 + * putting into the 32bit register EAX, EDX, and ECX.
 + *
 + * Two 32bit registers are used to pass a 64bit arg.
 + *
 + * For example,
 + * void foo(u32 a, u32 b, u32 c, u32 d):
 + *	u32 a: EAX
 + *	u32 b: EDX
 + *	u32 c: ECX
 + *	u32 d: stack
 + *
 + * void foo(u64 a, u32 b, u32 c):
 + *	u64 a: EAX (lo32) EDX (hi32)
 + *	u32 b: ECX
 + *	u32 c: stack
 + *
 + * void foo(u32 a, u64 b, u32 c):
 + *	u32 a: EAX
 + *	u64 b: EDX (lo32) ECX (hi32)
 + *	u32 c: stack
 + *
 + * void foo(u32 a, u32 b, u64 c):
 + *	u32 a: EAX
 + *	u32 b: EDX
 + *	u64 c: stack
 + *
 + * The return value will be stored in the EAX (and EDX for 64bit value).
 + *
 + * For example,
 + * u32 foo(u32 a, u32 b, u32 c):
 + *	return value: EAX
 + *
 + * u64 foo(u32 a, u32 b, u32 c):
 + *	return value: EAX (lo32) EDX (hi32)
 + *
 + * Notes:
 + *	The verifier only accepts function having integer and pointers
 + *	as its args and return value, so it does not have
 + *	struct-by-value.
 + *
 + * emit_kfunc_call() finds out the btf_func_model by calling
 + * bpf_jit_find_kfunc_model().  A btf_func_model
 + * has the details about the number of args, size of each arg,
 + * and the size of the return value.
 + *
 + * It first decides how many args can be passed by EAX, EDX, and ECX.
 + * That will decide what args should be pushed to the stack:
 + * [first_stack_regno, last_stack_regno] are the bpf regnos
 + * that should be pushed to the stack.
 + *
 + * It will first push all args to the stack because the push
 + * will need to use ECX.  Then, it moves
 + * [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX.
 + *
 + * When emitting a call (0xE8), it needs to figure out
 + * the jmp_offset relative to the jit-insn address immediately
 + * following the call (0xE8) instruction.  At this point, it knows
 + * the end of the jit-insn address after completely translated the
 + * current (BPF_JMP | BPF_CALL) bpf-insn.  It is passed as "end_addr"
 + * to the emit_kfunc_call().  Thus, it can learn the "immediate-follow-call"
 + * address by figuring out how many jit-insn is generated between
 + * the call (0xE8) and the end_addr:
 + *	- 0-1 jit-insn (3 bytes each) to restore the esp pointer if there
 + *	  is arg pushed to the stack.
 + *	- 0-2 jit-insns (3 bytes each) to handle the return value.
 + */
 +static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr,
 +			   const struct bpf_insn *insn, u8 **pprog)
 +{
 +	const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX };
 +	int i, cnt = 0, first_stack_regno, last_stack_regno;
 +	int free_arg_regs = ARRAY_SIZE(arg_regs);
 +	const struct btf_func_model *fm;
 +	int bytes_in_stack = 0;
 +	const u8 *cur_arg_reg;
 +	u8 *prog = *pprog;
 +	s64 jmp_offset;
 +
 +	fm = bpf_jit_find_kfunc_model(bpf_prog, insn);
 +	if (!fm)
 +		return -EINVAL;
 +
 +	first_stack_regno = BPF_REG_1;
 +	for (i = 0; i < fm->nr_args; i++) {
 +		int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1;
 +
 +		if (regs_needed > free_arg_regs)
 +			break;
 +
 +		free_arg_regs -= regs_needed;
 +		first_stack_regno++;
 +	}
 +
 +	/* Push the args to the stack */
 +	last_stack_regno = BPF_REG_0 + fm->nr_args;
 +	for (i = last_stack_regno; i >= first_stack_regno; i--) {
 +		if (fm->arg_size[i - 1] > sizeof(u32)) {
 +			emit_push_r64(bpf2ia32[i], &prog);
 +			bytes_in_stack += 8;
 +		} else {
 +			emit_push_r32(bpf2ia32[i], &prog);
 +			bytes_in_stack += 4;
 +		}
 +	}
 +
 +	cur_arg_reg = &arg_regs[0];
 +	for (i = BPF_REG_1; i < first_stack_regno; i++) {
 +		/* mov e[adc]x,dword ptr [ebp+off] */
 +		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
 +		      STACK_VAR(bpf2ia32[i][0]));
 +		if (fm->arg_size[i - 1] > sizeof(u32))
 +			/* mov e[adc]x,dword ptr [ebp+off] */
 +			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
 +			      STACK_VAR(bpf2ia32[i][1]));
 +	}
 +
 +	if (bytes_in_stack)
 +		/* add esp,"bytes_in_stack" */
 +		end_addr -= 3;
 +
 +	/* mov dword ptr [ebp+off],edx */
 +	if (fm->ret_size > sizeof(u32))
 +		end_addr -= 3;
 +
 +	/* mov dword ptr [ebp+off],eax */
 +	if (fm->ret_size)
 +		end_addr -= 3;
 +
 +	jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr;
 +	if (!is_simm32(jmp_offset)) {
 +		pr_err("unsupported BPF kernel function jmp_offset:%lld\n",
 +		       jmp_offset);
 +		return -EINVAL;
 +	}
 +
 +	EMIT1_off32(0xE8, jmp_offset);
 +
 +	if (fm->ret_size)
 +		/* mov dword ptr [ebp+off],eax */
 +		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
 +		      STACK_VAR(bpf2ia32[BPF_REG_0][0]));
 +
 +	if (fm->ret_size > sizeof(u32))
 +		/* mov dword ptr [ebp+off],edx */
 +		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
 +		      STACK_VAR(bpf2ia32[BPF_REG_0][1]));
 +
 +	if (bytes_in_stack)
 +		/* add esp,"bytes_in_stack" */
 +		EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack);
 +
 +	*pprog = prog;
 +
 +	return 0;
 +}
 +
  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
    	  int oldproglen, struct jit_context *ctx)
  {
@@@ -2069,18 -1888,6 +2069,18 @@@
    		if (insn->src_reg == BPF_PSEUDO_CALL)
    			goto notyet;
+			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
 +				int err;
 +
 +				err = emit_kfunc_call(bpf_prog,
 +						      image + addrs[i],
 +						      insn, &prog);
 +
 +				if (err)
 +					return err;
 +				break;
 +			}
 +
    		func = (u8 *) __bpf_call_base + imm32;
    		jmp_offset = func - (image + addrs[i]);
@@@ -2469,7 -2276,16 +2469,16 @@@ notyet
    	}
if (image) {
- 			if (unlikely(proglen + ilen > oldproglen)) {
+ 			/*
+ 			 * When populating the image, assert that:
+ 			 *
+ 			 *  i) We do not write beyond the allocated space, and
+ 			 * ii) addrs[i] did not change from the prior run, in order
+ 			 *     to validate assumptions made for computing branch
+ 			 *     displacements.
+ 			 */
+ 			if (unlikely(proglen + ilen > oldproglen ||
+ 				     proglen + ilen != addrs[i])) {
    			pr_err("bpf_jit: fatal error\n");
    			return -EFAULT;
    		}
@@@ -2586,8 -2402,3 +2595,8 @@@ out
    				   tmp : orig_prog);
    return prog;
  }
 +
 +bool bpf_jit_supports_kfunc_call(void)
 +{
 +	return true;
 +}
diff --combined drivers/bluetooth/btusb.c
index 192cb8c191bc,5cbfbd948f67..5d603ef39bad
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@@ -399,9 -399,7 +399,9 @@@ static const struct usb_device_id black
/* MediaTek Bluetooth devices */
    { USB_VENDOR_AND_INTERFACE_INFO(0x0e8d, 0xe0, 0x01, 0x01),
 -	  .driver_info = BTUSB_MEDIATEK },
 +	  .driver_info = BTUSB_MEDIATEK |
 +			 BTUSB_WIDEBAND_SPEECH |
 +			 BTUSB_VALID_LE_STATES },
/* Additional MediaTek MT7615E Bluetooth devices */
    { USB_DEVICE(0x13d3, 0x3560), .driver_info = BTUSB_MEDIATEK},
@@@ -457,8 -455,6 +457,8 @@@
    					     BTUSB_WIDEBAND_SPEECH },
    { USB_DEVICE(0x0bda, 0xc123), .driver_info = BTUSB_REALTEK |
    					     BTUSB_WIDEBAND_SPEECH },
 +	{ USB_DEVICE(0x0cb5, 0xc547), .driver_info = BTUSB_REALTEK |
 +						     BTUSB_WIDEBAND_SPEECH },
/* Silicon Wave based devices */
    { USB_DEVICE(0x0c10, 0x0000), .driver_info = BTUSB_SWAVE },
@@@ -2404,7 -2400,7 +2404,7 @@@ static int btusb_send_frame_intel(struc
    return -EILSEQ;
  }
-static bool btusb_setup_intel_new_get_fw_name(struct intel_version *ver,
 +static int btusb_setup_intel_new_get_fw_name(struct intel_version *ver,
    				     struct intel_boot_params *params,
    				     char *fw_name, size_t len,
    				     const char *suffix)
@@@ -2428,10 -2424,9 +2428,10 @@@
    		suffix);
    	break;
    default:
 -		return false;
 +		return -EINVAL;
    }
 -	return true;
 +
 +	return 0;
  }
static void btusb_setup_intel_newgen_get_fw_name(const struct intel_version_tlv *ver_tlv,
@@@ -2449,44 -2444,6 +2449,44 @@@
    	 suffix);
  }
+static int btusb_download_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
 +{
 +	struct btusb_data *data = hci_get_drvdata(hdev);
 +	ktime_t delta, rettime;
 +	unsigned long long duration;
 +	int err;
 +
 +	set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
 +
 +	bt_dev_info(hdev, "Waiting for firmware download to complete");
 +
 +	err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
 +				  TASK_INTERRUPTIBLE,
 +				  msecs_to_jiffies(msec));
 +	if (err == -EINTR) {
 +		bt_dev_err(hdev, "Firmware loading interrupted");
 +		return err;
 +	}
 +
 +	if (err) {
 +		bt_dev_err(hdev, "Firmware loading timeout");
 +		return -ETIMEDOUT;
 +	}
 +
 +	if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) {
 +		bt_dev_err(hdev, "Firmware loading failed");
 +		return -ENOEXEC;
 +	}
 +
 +	rettime = ktime_get();
 +	delta = ktime_sub(rettime, calltime);
 +	duration = (unsigned long long)ktime_to_ns(delta) >> 10;
 +
 +	bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
 +
 +	return 0;
 +}
 +
  static int btusb_intel_download_firmware_newgen(struct hci_dev *hdev,
    					struct intel_version_tlv *ver,
    					u32 *boot_param)
@@@ -2495,11 -2452,19 +2495,11 @@@
    char fwname[64];
    int err;
    struct btusb_data *data = hci_get_drvdata(hdev);
 +	ktime_t calltime;
if (!ver || !boot_param)
    	return -EINVAL;
-	/* The hardware platform number has a fixed value of 0x37 and
 -	 * for now only accept this single value.
 -	 */
 -	if (INTEL_HW_PLATFORM(ver->cnvi_bt) != 0x37) {
 -		bt_dev_err(hdev, "Unsupported Intel hardware platform (0x%2x)",
 -			   INTEL_HW_PLATFORM(ver->cnvi_bt));
 -		return -EINVAL;
 -	}
 -
    /* The firmware variant determines if the device is in bootloader
     * mode or is running operational firmware. The value 0x03 identifies
     * the bootloader and the value 0x23 identifies the operational
@@@ -2516,6 -2481,50 +2516,6 @@@
    if (ver->img_type == 0x03) {
    	clear_bit(BTUSB_BOOTLOADER, &data->flags);
    	btintel_check_bdaddr(hdev);
 -		return 0;
 -	}
 -
 -	/* Check for supported iBT hardware variants of this firmware
 -	 * loading method.
 -	 *
 -	 * This check has been put in place to ensure correct forward
 -	 * compatibility options when newer hardware variants come along.
 -	 */
 -	switch (INTEL_HW_VARIANT(ver->cnvi_bt)) {
 -	case 0x17:	/* TyP */
 -	case 0x18:	/* Slr */
 -	case 0x19:	/* Slr-F */
 -		break;
 -	default:
 -		bt_dev_err(hdev, "Unsupported Intel hardware variant (0x%x)",
 -			   INTEL_HW_VARIANT(ver->cnvi_bt));
 -		return -EINVAL;
 -	}
 -
 -	/* If the device is not in bootloader mode, then the only possible
 -	 * choice is to return an error and abort the device initialization.
 -	 */
 -	if (ver->img_type != 0x01) {
 -		bt_dev_err(hdev, "Unsupported Intel firmware variant (0x%x)",
 -			   ver->img_type);
 -		return -ENODEV;
 -	}
 -
 -	/* It is required that every single firmware fragment is acknowledged
 -	 * with a command complete event. If the boot parameters indicate
 -	 * that this bootloader does not send them, then abort the setup.
 -	 */
 -	if (ver->limited_cce != 0x00) {
 -		bt_dev_err(hdev, "Unsupported Intel firmware loading method (0x%x)",
 -			   ver->limited_cce);
 -		return -EINVAL;
 -	}
 -
 -	/* Secure boot engine type should be either 1 (ECDSA) or 0 (RSA) */
 -	if (ver->sbe_type > 0x01) {
 -		bt_dev_err(hdev, "Unsupported Intel secure boot engine type (0x%x)",
 -			   ver->sbe_type);
 -		return -EINVAL;
    }
/* If the OTP has no valid Bluetooth device address, then there will
@@@ -2529,8 -2538,7 +2529,8 @@@
    btusb_setup_intel_newgen_get_fw_name(ver, fwname, sizeof(fwname), "sfi");
    err = request_firmware(&fw, fwname, &hdev->dev);
    if (err < 0) {
 -		bt_dev_err(hdev, "Failed to load Intel firmware file (%d)", err);
 +		bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
 +			   fwname, err);
    	return err;
    }
@@@ -2543,28 -2551,22 +2543,28 @@@
    	goto done;
    }
+	calltime = ktime_get();
 +
    set_bit(BTUSB_DOWNLOADING, &data->flags);
/* Start firmware downloading and get boot parameter */
 -	err = btintel_download_firmware_newgen(hdev, fw, boot_param,
 +	err = btintel_download_firmware_newgen(hdev, ver, fw, boot_param,
    				       INTEL_HW_VARIANT(ver->cnvi_bt),
    				       ver->sbe_type);
    if (err < 0) {
 +		if (err == -EALREADY) {
 +			/* Firmware has already been loaded */
 +			set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
 +			err = 0;
 +			goto done;
 +		}
 +
    	/* When FW download fails, send Intel Reset to retry
    	 * FW download.
    	 */
    	btintel_reset_to_bootloader(hdev);
    	goto done;
    }
 -	set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
 -
 -	bt_dev_info(hdev, "Waiting for firmware download to complete");
/* Before switching the device into operational mode and with that
     * booting the loaded firmware, wait for the bootloader notification
@@@ -2577,9 -2579,26 +2577,9 @@@
     * and thus just timeout if that happens and fail the setup
     * of this device.
     */
 -	err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
 -				  TASK_INTERRUPTIBLE,
 -				  msecs_to_jiffies(5000));
 -	if (err == -EINTR) {
 -		bt_dev_err(hdev, "Firmware loading interrupted");
 -		goto done;
 -	}
 -
 -	if (err) {
 -		bt_dev_err(hdev, "Firmware loading timeout");
 -		err = -ETIMEDOUT;
 +	err = btusb_download_wait(hdev, calltime, 5000);
 +	if (err == -ETIMEDOUT)
    	btintel_reset_to_bootloader(hdev);
 -		goto done;
 -	}
 -
 -	if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) {
 -		bt_dev_err(hdev, "Firmware loading failed");
 -		err = -ENOEXEC;
 -		goto done;
 -	}
done:
    release_firmware(fw);
@@@ -2595,11 -2614,41 +2595,11 @@@ static int btusb_intel_download_firmwar
    char fwname[64];
    int err;
    struct btusb_data *data = hci_get_drvdata(hdev);
 +	ktime_t calltime;
if (!ver || !params)
    	return -EINVAL;
-	/* The hardware platform number has a fixed value of 0x37 and
 -	 * for now only accept this single value.
 -	 */
 -	if (ver->hw_platform != 0x37) {
 -		bt_dev_err(hdev, "Unsupported Intel hardware platform (%u)",
 -			   ver->hw_platform);
 -		return -EINVAL;
 -	}
 -
 -	/* Check for supported iBT hardware variants of this firmware
 -	 * loading method.
 -	 *
 -	 * This check has been put in place to ensure correct forward
 -	 * compatibility options when newer hardware variants come along.
 -	 */
 -	switch (ver->hw_variant) {
 -	case 0x0b:	/* SfP */
 -	case 0x0c:	/* WsP */
 -	case 0x11:	/* JfP */
 -	case 0x12:	/* ThP */
 -	case 0x13:	/* HrP */
 -	case 0x14:	/* CcP */
 -		break;
 -	default:
 -		bt_dev_err(hdev, "Unsupported Intel hardware variant (%u)",
 -			   ver->hw_variant);
 -		return -EINVAL;
 -	}
 -
 -	btintel_version_info(hdev, ver);
 -
    /* The firmware variant determines if the device is in bootloader
     * mode or is running operational firmware. The value 0x06 identifies
     * the bootloader and the value 0x23 identifies the operational
@@@ -2616,18 -2665,16 +2616,18 @@@
    if (ver->fw_variant == 0x23) {
    	clear_bit(BTUSB_BOOTLOADER, &data->flags);
    	btintel_check_bdaddr(hdev);
 -		return 0;
 -	}
-	/* If the device is not in bootloader mode, then the only possible
 -	 * choice is to return an error and abort the device initialization.
 -	 */
 -	if (ver->fw_variant != 0x06) {
 -		bt_dev_err(hdev, "Unsupported Intel firmware variant (%u)",
 -			   ver->fw_variant);
 -		return -ENODEV;
 +		/* SfP and WsP don't seem to update the firmware version on file
 +		 * so version checking is currently possible.
 +		 */
 +		switch (ver->hw_variant) {
 +		case 0x0b:	/* SfP */
 +		case 0x0c:	/* WsP */
 +			return 0;
 +		}
 +
 +		/* Proceed to download to check if the version matches */
 +		goto download;
    }
/* Read the secure boot parameters to identify the operating
@@@ -2655,7 -2702,6 +2655,7 @@@
    	set_bit(HCI_QUIRK_INVALID_BDADDR, &hdev->quirks);
    }
+download:
    /* With this Intel bootloader only the hardware variant and device
     * revision information are used to select the right firmware for SfP
     * and WsP.
@@@ -2679,15 -2725,14 +2679,15 @@@
     */
    err = btusb_setup_intel_new_get_fw_name(ver, params, fwname,
    					sizeof(fwname), "sfi");
 -	if (!err) {
 +	if (err < 0) {
    	bt_dev_err(hdev, "Unsupported Intel firmware naming");
    	return -EINVAL;
    }
err = request_firmware(&fw, fwname, &hdev->dev);
    if (err < 0) {
 -		bt_dev_err(hdev, "Failed to load Intel firmware file (%d)", err);
 +		bt_dev_err(hdev, "Failed to load Intel firmware file %s (%d)",
 +			   fwname, err);
    	return err;
    }
@@@ -2700,26 -2745,20 +2700,26 @@@
    	goto done;
    }
+	calltime = ktime_get();
 +
    set_bit(BTUSB_DOWNLOADING, &data->flags);
/* Start firmware downloading and get boot parameter */
 -	err = btintel_download_firmware(hdev, fw, boot_param);
 +	err = btintel_download_firmware(hdev, ver, fw, boot_param);
    if (err < 0) {
 +		if (err == -EALREADY) {
 +			/* Firmware has already been loaded */
 +			set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
 +			err = 0;
 +			goto done;
 +		}
 +
    	/* When FW download fails, send Intel Reset to retry
    	 * FW download.
    	 */
    	btintel_reset_to_bootloader(hdev);
    	goto done;
    }
 -	set_bit(BTUSB_FIRMWARE_LOADED, &data->flags);
 -
 -	bt_dev_info(hdev, "Waiting for firmware download to complete");
/* Before switching the device into operational mode and with that
     * booting the loaded firmware, wait for the bootloader notification
@@@ -2732,74 -2771,29 +2732,74 @@@
     * and thus just timeout if that happens and fail the setup
     * of this device.
     */
 -	err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING,
 +	err = btusb_download_wait(hdev, calltime, 5000);
 +	if (err == -ETIMEDOUT)
 +		btintel_reset_to_bootloader(hdev);
 +
 +done:
 +	release_firmware(fw);
 +	return err;
 +}
 +
 +static int btusb_boot_wait(struct hci_dev *hdev, ktime_t calltime, int msec)
 +{
 +	struct btusb_data *data = hci_get_drvdata(hdev);
 +	ktime_t delta, rettime;
 +	unsigned long long duration;
 +	int err;
 +
 +	bt_dev_info(hdev, "Waiting for device to boot");
 +
 +	err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING,
    			  TASK_INTERRUPTIBLE,
 -				  msecs_to_jiffies(5000));
 +				  msecs_to_jiffies(msec));
    if (err == -EINTR) {
 -		bt_dev_err(hdev, "Firmware loading interrupted");
 -		goto done;
 +		bt_dev_err(hdev, "Device boot interrupted");
 +		return -EINTR;
    }
if (err) {
 -		bt_dev_err(hdev, "Firmware loading timeout");
 -		err = -ETIMEDOUT;
 -		btintel_reset_to_bootloader(hdev);
 -		goto done;
 +		bt_dev_err(hdev, "Device boot timeout");
 +		return -ETIMEDOUT;
    }
-	if (test_bit(BTUSB_FIRMWARE_FAILED, &data->flags)) {
 -		bt_dev_err(hdev, "Firmware loading failed");
 -		err = -ENOEXEC;
 -		goto done;
 +	rettime = ktime_get();
 +	delta = ktime_sub(rettime, calltime);
 +	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 +
 +	bt_dev_info(hdev, "Device booted in %llu usecs", duration);
 +
 +	return 0;
 +}
 +
 +static int btusb_intel_boot(struct hci_dev *hdev, u32 boot_addr)
 +{
 +	struct btusb_data *data = hci_get_drvdata(hdev);
 +	ktime_t calltime;
 +	int err;
 +
 +	calltime = ktime_get();
 +
 +	set_bit(BTUSB_BOOTING, &data->flags);
 +
 +	err = btintel_send_intel_reset(hdev, boot_addr);
 +	if (err) {
 +		bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
 +		btintel_reset_to_bootloader(hdev);
 +		return err;
    }
-done:
 -	release_firmware(fw);
 +	/* The bootloader will not indicate when the device is ready. This
 +	 * is done by the operational firmware sending bootup notification.
 +	 *
 +	 * Booting into operational firmware should not take longer than
 +	 * 1 second. However if that happens, then just fail the setup
 +	 * since something went wrong.
 +	 */
 +	err = btusb_boot_wait(hdev, calltime, 1000);
 +	if (err == -ETIMEDOUT)
 +		btintel_reset_to_bootloader(hdev);
 +
    return err;
  }
@@@ -2810,6 -2804,8 +2810,6 @@@ static int btusb_setup_intel_new(struc
    struct intel_boot_params params;
    u32 boot_param;
    char ddcname[64];
 -	ktime_t calltime, delta, rettime;
 -	unsigned long long duration;
    int err;
    struct intel_debug_features features;
@@@ -2821,6 -2817,8 +2821,6 @@@
     */
    boot_param = 0x00000000;
-	calltime = ktime_get();
 -
    /* Read the Intel version information to determine if the device
     * is in bootloader mode or if it already has operational firmware
     * loaded.
@@@ -2832,10 -2830,6 +2832,10 @@@
    	return err;
    }
+	err = btintel_version_info(hdev, &ver);
 +	if (err)
 +		return err;
 +
    err = btusb_intel_download_firmware(hdev, &ver, &params, &boot_param);
    if (err)
    	return err;
@@@ -2844,16 -2838,59 +2844,16 @@@
    if (ver.fw_variant == 0x23)
    	goto finish;
-	rettime = ktime_get();
 -	delta = ktime_sub(rettime, calltime);
 -	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 -
 -	bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
 -
 -	calltime = ktime_get();
 -
 -	set_bit(BTUSB_BOOTING, &data->flags);
 -
 -	err = btintel_send_intel_reset(hdev, boot_param);
 -	if (err) {
 -		bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
 -		btintel_reset_to_bootloader(hdev);
 +	err = btusb_intel_boot(hdev, boot_param);
 +	if (err)
    	return err;
 -	}
 -
 -	/* The bootloader will not indicate when the device is ready. This
 -	 * is done by the operational firmware sending bootup notification.
 -	 *
 -	 * Booting into operational firmware should not take longer than
 -	 * 1 second. However if that happens, then just fail the setup
 -	 * since something went wrong.
 -	 */
 -	bt_dev_info(hdev, "Waiting for device to boot");
 -
 -	err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING,
 -				  TASK_INTERRUPTIBLE,
 -				  msecs_to_jiffies(1000));
 -
 -	if (err == -EINTR) {
 -		bt_dev_err(hdev, "Device boot interrupted");
 -		return -EINTR;
 -	}
 -
 -	if (err) {
 -		bt_dev_err(hdev, "Device boot timeout");
 -		btintel_reset_to_bootloader(hdev);
 -		return -ETIMEDOUT;
 -	}
 -
 -	rettime = ktime_get();
 -	delta = ktime_sub(rettime, calltime);
 -	duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 -
 -	bt_dev_info(hdev, "Device booted in %llu usecs", duration);
clear_bit(BTUSB_BOOTLOADER, &data->flags);
err = btusb_setup_intel_new_get_fw_name(&ver, &params, ddcname,
    					sizeof(ddcname), "ddc");
-	if (!err) {
 +	if (err < 0) {
    	bt_dev_err(hdev, "Unsupported Intel firmware naming");
    } else {
    	/* Once the device is running in operational mode, it needs to
@@@ -2910,6 -2947,8 +2910,6 @@@ static int btusb_setup_intel_newgen(str
    struct btusb_data *data = hci_get_drvdata(hdev);
    u32 boot_param;
    char ddcname[64];
 -	ktime_t calltime, delta, rettime;
 -	unsigned long long duration;
    int err;
    struct intel_debug_features features;
    struct intel_version_tlv version;
@@@ -2922,6 -2961,8 +2922,6 @@@
     */
    boot_param = 0x00000000;
-	calltime = ktime_get();
 -
    /* Read the Intel version information to determine if the device
     * is in bootloader mode or if it already has operational firmware
     * loaded.
@@@ -2933,9 -2974,7 +2933,9 @@@
    	return err;
    }
-	btintel_version_info_tlv(hdev, &version);
 +	err = btintel_version_info_tlv(hdev, &version);
 +	if (err)
 +		return err;
err = btusb_intel_download_firmware_newgen(hdev, &version, &boot_param);
    if (err)
@@@ -2945,9 -2984,52 +2945,9 @@@
    if (version.img_type == 0x03)
    	goto finish;
-	rettime = ktime_get();
 -	delta = ktime_sub(rettime, calltime);
 -	duration = (unsigned long long)ktime_to_ns(delta) >> 10;
 -
 -	bt_dev_info(hdev, "Firmware loaded in %llu usecs", duration);
 -
 -	calltime = ktime_get();
 -
 -	set_bit(BTUSB_BOOTING, &data->flags);
 -
 -	err = btintel_send_intel_reset(hdev, boot_param);
 -	if (err) {
 -		bt_dev_err(hdev, "Intel Soft Reset failed (%d)", err);
 -		btintel_reset_to_bootloader(hdev);
 +	err = btusb_intel_boot(hdev, boot_param);
 +	if (err)
    	return err;
 -	}
 -
 -	/* The bootloader will not indicate when the device is ready. This
 -	 * is done by the operational firmware sending bootup notification.
 -	 *
 -	 * Booting into operational firmware should not take longer than
 -	 * 1 second. However if that happens, then just fail the setup
 -	 * since something went wrong.
 -	 */
 -	bt_dev_info(hdev, "Waiting for device to boot");
 -
 -	err = wait_on_bit_timeout(&data->flags, BTUSB_BOOTING,
 -				  TASK_INTERRUPTIBLE,
 -				  msecs_to_jiffies(1000));
 -
 -	if (err == -EINTR) {
 -		bt_dev_err(hdev, "Device boot interrupted");
 -		return -EINTR;
 -	}
 -
 -	if (err) {
 -		bt_dev_err(hdev, "Device boot timeout");
 -		btintel_reset_to_bootloader(hdev);
 -		return -ETIMEDOUT;
 -	}
 -
 -	rettime = ktime_get();
 -	delta = ktime_sub(rettime, calltime);
 -	duration = (unsigned long long)ktime_to_ns(delta) >> 10;
 -
 -	bt_dev_info(hdev, "Device booted in %llu usecs", duration);
clear_bit(BTUSB_BOOTLOADER, &data->flags);
@@@ -3413,7 -3495,7 +3413,7 @@@ static int btusb_mtk_setup_firmware_79x
    fw_ptr = fw->data;
    fw_bin_ptr = fw_ptr;
    globaldesc = (struct btmtk_global_desc *)(fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE);
 -	section_num = globaldesc->section_num;
 +	section_num = le32_to_cpu(globaldesc->section_num);
for (i = 0; i < section_num; i++) {
    	first_block = 1;
@@@ -3421,8 -3503,8 +3421,8 @@@
    	sectionmap = (struct btmtk_section_map *)(fw_ptr + MTK_FW_ROM_PATCH_HEADER_SIZE +
    		      MTK_FW_ROM_PATCH_GD_SIZE + MTK_FW_ROM_PATCH_SEC_MAP_SIZE * i);
-		section_offset = sectionmap->secoffset;
 -		dl_size = sectionmap->bin_info_spec.dlsize;
 +		section_offset = le32_to_cpu(sectionmap->secoffset);
 +		dl_size = le32_to_cpu(sectionmap->bin_info_spec.dlsize);
if (dl_size > 0) {
    		retry = 20;
@@@ -3658,7 -3740,7 +3658,7 @@@ static int btusb_mtk_setup(struct hci_d
    int err, status;
    u32 dev_id;
    char fw_bin_name[64];
 -	u32 fw_version;
 +	u32 fw_version = 0;
    u8 param;
calltime = ktime_get();
@@@ -4767,8 -4849,8 +4767,8 @@@ static int btusb_probe(struct usb_inter
    		data->diag = NULL;
    }
- 	if (!enable_autosuspend)
- 		usb_disable_autosuspend(data->udev);
+ 	if (enable_autosuspend)
+ 		usb_enable_autosuspend(data->udev);
err = hci_register_dev(hdev);
    if (err < 0)
@@@ -4828,9 -4910,6 +4828,6 @@@ static void btusb_disconnect(struct usb
    	gpiod_put(data->reset_gpio);
hci_free_dev(hdev);
- 
- 	if (!enable_autosuspend)
- 		usb_enable_autosuspend(data->udev);
  }
#ifdef CONFIG_PM
diff --combined drivers/net/can/spi/mcp251x.c
index 80ab1593ca31,a57da43680d8..492f1bcb0516
--- a/drivers/net/can/spi/mcp251x.c
+++ b/drivers/net/can/spi/mcp251x.c
@@@ -276,7 -276,7 +276,7 @@@ static void mcp251x_clean(struct net_de
    	net->stats.tx_errors++;
    dev_kfree_skb(priv->tx_skb);
    if (priv->tx_len)
 -		can_free_echo_skb(priv->net, 0);
 +		can_free_echo_skb(priv->net, 0, NULL);
    priv->tx_skb = NULL;
    priv->tx_len = 0;
  }
@@@ -314,6 -314,18 +314,18 @@@ static int mcp251x_spi_trans(struct spi
    return ret;
  }
+ static int mcp251x_spi_write(struct spi_device *spi, int len)
+ {
+ 	struct mcp251x_priv *priv = spi_get_drvdata(spi);
+ 	int ret;
+ 
+ 	ret = spi_write(spi, priv->spi_tx_buf, len);
+ 	if (ret)
+ 		dev_err(&spi->dev, "spi write failed: ret = %d\n", ret);
+ 
+ 	return ret;
+ }
+ 
  static u8 mcp251x_read_reg(struct spi_device *spi, u8 reg)
  {
    struct mcp251x_priv *priv = spi_get_drvdata(spi);
@@@ -361,7 -373,7 +373,7 @@@ static void mcp251x_write_reg(struct sp
    priv->spi_tx_buf[1] = reg;
    priv->spi_tx_buf[2] = val;
- 	mcp251x_spi_trans(spi, 3);
+ 	mcp251x_spi_write(spi, 3);
  }
static void mcp251x_write_2regs(struct spi_device *spi, u8 reg, u8 v1, u8 v2)
@@@ -373,7 -385,7 +385,7 @@@
    priv->spi_tx_buf[2] = v1;
    priv->spi_tx_buf[3] = v2;
- 	mcp251x_spi_trans(spi, 4);
+ 	mcp251x_spi_write(spi, 4);
  }
static void mcp251x_write_bits(struct spi_device *spi, u8 reg,
@@@ -386,7 -398,7 +398,7 @@@
    priv->spi_tx_buf[2] = mask;
    priv->spi_tx_buf[3] = val;
- 	mcp251x_spi_trans(spi, 4);
+ 	mcp251x_spi_write(spi, 4);
  }
static u8 mcp251x_read_stat(struct spi_device *spi)
@@@ -618,7 -630,7 +630,7 @@@ static void mcp251x_hw_tx_frame(struct 
    				  buf[i]);
    } else {
    	memcpy(priv->spi_tx_buf, buf, TXBDAT_OFF + len);
- 		mcp251x_spi_trans(spi, TXBDAT_OFF + len);
+ 		mcp251x_spi_write(spi, TXBDAT_OFF + len);
    }
  }
@@@ -650,7 -662,7 +662,7 @@@ static void mcp251x_hw_tx(struct spi_de
/* use INSTRUCTION_RTS, to avoid "repeated frame problem" */
    priv->spi_tx_buf[0] = INSTRUCTION_RTS(1 << tx_buf_idx);
- 	mcp251x_spi_trans(priv->spi, 1);
+ 	mcp251x_spi_write(priv->spi, 1);
  }
static void mcp251x_hw_rx_frame(struct spi_device *spi, u8 *buf,
@@@ -888,7 -900,7 +900,7 @@@ static int mcp251x_hw_reset(struct spi_
    mdelay(MCP251X_OST_DELAY_MS);
priv->spi_tx_buf[0] = INSTRUCTION_RESET;
- 	ret = mcp251x_spi_trans(spi, 1);
+ 	ret = mcp251x_spi_write(spi, 1);
    if (ret)
    	return ret;
diff --combined drivers/net/can/usb/peak_usb/pcan_usb_core.c
index ad006edf474d,28e916a04047..e69b005be068
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@@ -14,7 -14,6 +14,7 @@@
  #include <linux/module.h>
  #include <linux/netdevice.h>
  #include <linux/usb.h>
 +#include <linux/ethtool.h>
#include <linux/can.h>
  #include <linux/can/dev.h>
@@@ -372,7 -371,7 +372,7 @@@ static netdev_tx_t peak_usb_ndo_start_x
err = usb_submit_urb(urb, GFP_ATOMIC);
    if (err) {
 -		can_free_echo_skb(netdev, context->echo_index);
 +		can_free_echo_skb(netdev, context->echo_index, NULL);
usb_unanchor_urb(urb);
@@@ -821,9 -820,6 +821,9 @@@ static int peak_usb_create_dev(const st
netdev->flags |= IFF_ECHO; /* we support local echo */
+	/* add ethtool support */
 +	netdev->ethtool_ops = peak_usb_adapter->ethtool_ops;
 +
    init_usb_anchor(&dev->rx_submitted);
init_usb_anchor(&dev->tx_submitted);
@@@ -861,7 -857,7 +861,7 @@@
    if (dev->adapter->dev_set_bus) {
    	err = dev->adapter->dev_set_bus(dev, 0);
    	if (err)
- 			goto lbl_unregister_candev;
+ 			goto adap_dev_free;
    }
/* get device number early */
@@@ -873,6 -869,10 +873,10 @@@
return 0;
+ adap_dev_free:
+ 	if (dev->adapter->dev_free)
+ 		dev->adapter->dev_free(dev);
+ 
  lbl_unregister_candev:
    unregister_candev(netdev);
diff --combined drivers/net/dsa/lantiq_gswip.c
index 26d0e3bb5dea,bf5c62e5c0b0..314ae78bbdd6
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@@ -1,6 -1,6 +1,6 @@@
  // SPDX-License-Identifier: GPL-2.0
  /*
 - * Lantiq / Intel GSWIP switch driver for VRX200 SoCs
 + * Lantiq / Intel GSWIP switch driver for VRX200, xRX300 and xRX330 SoCs
   *
   * Copyright (C) 2010 Lantiq Deutschland
   * Copyright (C) 2012 John Crispin john@phrozen.org
@@@ -93,14 -93,17 +93,18 @@@
/* GSWIP MII Registers */
  #define GSWIP_MII_CFGp(p)		(0x2 * (p))
+ #define  GSWIP_MII_CFG_RESET		BIT(15)
  #define  GSWIP_MII_CFG_EN		BIT(14)
+ #define  GSWIP_MII_CFG_ISOLATE		BIT(13)
  #define  GSWIP_MII_CFG_LDCLKDIS		BIT(12)
+ #define  GSWIP_MII_CFG_RGMII_IBS	BIT(8)
+ #define  GSWIP_MII_CFG_RMII_CLK		BIT(7)
  #define  GSWIP_MII_CFG_MODE_MIIP	0x0
  #define  GSWIP_MII_CFG_MODE_MIIM	0x1
  #define  GSWIP_MII_CFG_MODE_RMIIP	0x2
  #define  GSWIP_MII_CFG_MODE_RMIIM	0x3
  #define  GSWIP_MII_CFG_MODE_RGMII	0x4
 +#define  GSWIP_MII_CFG_MODE_GMII	0x9
  #define  GSWIP_MII_CFG_MODE_MASK	0xf
  #define  GSWIP_MII_CFG_RATE_M2P5	0x00
  #define  GSWIP_MII_CFG_RATE_M25	0x10
@@@ -191,6 -194,23 +195,23 @@@
  #define GSWIP_PCE_DEFPVID(p)		(0x486 + ((p) * 0xA))
#define GSWIP_MAC_FLEN			0x8C5
+ #define GSWIP_MAC_CTRL_0p(p)		(0x903 + ((p) * 0xC))
+ #define  GSWIP_MAC_CTRL_0_PADEN		BIT(8)
+ #define  GSWIP_MAC_CTRL_0_FCS_EN	BIT(7)
+ #define  GSWIP_MAC_CTRL_0_FCON_MASK	0x0070
+ #define  GSWIP_MAC_CTRL_0_FCON_AUTO	0x0000
+ #define  GSWIP_MAC_CTRL_0_FCON_RX	0x0010
+ #define  GSWIP_MAC_CTRL_0_FCON_TX	0x0020
+ #define  GSWIP_MAC_CTRL_0_FCON_RXTX	0x0030
+ #define  GSWIP_MAC_CTRL_0_FCON_NONE	0x0040
+ #define  GSWIP_MAC_CTRL_0_FDUP_MASK	0x000C
+ #define  GSWIP_MAC_CTRL_0_FDUP_AUTO	0x0000
+ #define  GSWIP_MAC_CTRL_0_FDUP_EN	0x0004
+ #define  GSWIP_MAC_CTRL_0_FDUP_DIS	0x000C
+ #define  GSWIP_MAC_CTRL_0_GMII_MASK	0x0003
+ #define  GSWIP_MAC_CTRL_0_GMII_AUTO	0x0000
+ #define  GSWIP_MAC_CTRL_0_GMII_MII	0x0001
+ #define  GSWIP_MAC_CTRL_0_GMII_RGMII	0x0002
  #define GSWIP_MAC_CTRL_2p(p)		(0x905 + ((p) * 0xC))
  #define GSWIP_MAC_CTRL_2_MLEN		BIT(3) /* Maximum Untagged Frame Lnegth */
@@@ -221,7 -241,6 +242,7 @@@
  struct gswip_hw_info {
    int max_ports;
    int cpu_port;
 +	const struct dsa_switch_ops *ops;
  };
struct xway_gphy_match_data {
@@@ -655,16 -674,13 +676,13 @@@ static int gswip_port_enable(struct dsa
    		  GSWIP_SDMA_PCTRLp(port));
if (!dsa_is_cpu_port(ds, port)) {
- 		u32 macconf = GSWIP_MDIO_PHY_LINK_AUTO |
- 			      GSWIP_MDIO_PHY_SPEED_AUTO |
- 			      GSWIP_MDIO_PHY_FDUP_AUTO |
- 			      GSWIP_MDIO_PHY_FCONTX_AUTO |
- 			      GSWIP_MDIO_PHY_FCONRX_AUTO |
- 			      (phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK);
- 
- 		gswip_mdio_w(priv, macconf, GSWIP_MDIO_PHYp(port));
- 		/* Activate MDIO auto polling */
- 		gswip_mdio_mask(priv, 0, BIT(port), GSWIP_MDIO_MDC_CFG0);
+ 		u32 mdio_phy = 0;
+ 
+ 		if (phydev)
+ 			mdio_phy = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK;
+ 
+ 		gswip_mdio_mask(priv, GSWIP_MDIO_PHY_ADDR_MASK, mdio_phy,
+ 				GSWIP_MDIO_PHYp(port));
    }
return 0;
@@@ -677,14 -693,6 +695,6 @@@ static void gswip_port_disable(struct d
    if (!dsa_is_user_port(ds, port))
    	return;
- 	if (!dsa_is_cpu_port(ds, port)) {
- 		gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_DOWN,
- 				GSWIP_MDIO_PHY_LINK_MASK,
- 				GSWIP_MDIO_PHYp(port));
- 		/* Deactivate MDIO auto polling */
- 		gswip_mdio_mask(priv, BIT(port), 0, GSWIP_MDIO_MDC_CFG0);
- 	}
- 
    gswip_switch_mask(priv, GSWIP_FDMA_PCTRL_EN, 0,
    		  GSWIP_FDMA_PCTRLp(port));
    gswip_switch_mask(priv, GSWIP_SDMA_PCTRL_EN, 0,
@@@ -796,14 -804,32 +806,32 @@@ static int gswip_setup(struct dsa_switc
    gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP2);
    gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP3);
- 	/* disable PHY auto polling */
+ 	/* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an
+ 	 * interoperability problem with this auto polling mechanism because
+ 	 * their status registers think that the link is in a different state
+ 	 * than it actually is. For the AR8030 it has the BMSR_ESTATEN bit set
+ 	 * as well as ESTATUS_1000_TFULL and ESTATUS_1000_XFULL. This makes the
+ 	 * auto polling state machine consider the link being negotiated with
+ 	 * 1Gbit/s. Since the PHY itself is a Fast Ethernet RMII PHY this leads
+ 	 * to the switch port being completely dead (RX and TX are both not
+ 	 * working).
+ 	 * Also with various other PHY / port combinations (PHY11G GPHY, PHY22F
+ 	 * GPHY, external RGMII PEF7071/7072) any traffic would stop. Sometimes
+ 	 * it would work fine for a few minutes to hours and then stop, on
+ 	 * other device it would no traffic could be sent or received at all.
+ 	 * Testing shows that when PHY auto polling is disabled these problems
+ 	 * go away.
+ 	 */
    gswip_mdio_w(priv, 0x0, GSWIP_MDIO_MDC_CFG0);
+ 
    /* Configure the MDIO Clock 2.5 MHz */
    gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1);
- 	/* Disable the xMII link */
+ 	/* Disable the xMII interface and clear it's isolation bit */
    for (i = 0; i < priv->hw_info->max_ports; i++)
- 		gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i);
+ 		gswip_mii_mask_cfg(priv,
+ 				   GSWIP_MII_CFG_EN | GSWIP_MII_CFG_ISOLATE,
+ 				   0, i);
/* enable special tag insertion on cpu port */
    gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN,
@@@ -1386,42 -1412,12 +1414,42 @@@ static int gswip_port_fdb_dump(struct d
    return 0;
  }
-static void gswip_phylink_validate(struct dsa_switch *ds, int port,
 -				   unsigned long *supported,
 -				   struct phylink_link_state *state)
 +static void gswip_phylink_set_capab(unsigned long *supported,
 +				    struct phylink_link_state *state)
  {
    __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+	/* Allow all the expected bits */
 +	phylink_set(mask, Autoneg);
 +	phylink_set_port_modes(mask);
 +	phylink_set(mask, Pause);
 +	phylink_set(mask, Asym_Pause);
 +
 +	/* With the exclusion of MII, Reverse MII and Reduced MII, we
 +	 * support Gigabit, including Half duplex
 +	 */
 +	if (state->interface != PHY_INTERFACE_MODE_MII &&
 +	    state->interface != PHY_INTERFACE_MODE_REVMII &&
 +	    state->interface != PHY_INTERFACE_MODE_RMII) {
 +		phylink_set(mask, 1000baseT_Full);
 +		phylink_set(mask, 1000baseT_Half);
 +	}
 +
 +	phylink_set(mask, 10baseT_Half);
 +	phylink_set(mask, 10baseT_Full);
 +	phylink_set(mask, 100baseT_Half);
 +	phylink_set(mask, 100baseT_Full);
 +
 +	bitmap_and(supported, supported, mask,
 +		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 +	bitmap_and(state->advertising, state->advertising, mask,
 +		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 +}
 +
 +static void gswip_xrx200_phylink_validate(struct dsa_switch *ds, int port,
 +					  unsigned long *supported,
 +					  struct phylink_link_state *state)
 +{
    switch (port) {
    case 0:
    case 1:
@@@ -1448,56 -1444,146 +1476,162 @@@
    	return;
    }
-	/* Allow all the expected bits */
 -	phylink_set(mask, Autoneg);
 -	phylink_set_port_modes(mask);
 -	phylink_set(mask, Pause);
 -	phylink_set(mask, Asym_Pause);
 +	gswip_phylink_set_capab(supported, state);
-	/* With the exclusion of MII, Reverse MII and Reduced MII, we
 -	 * support Gigabit, including Half duplex
 -	 */
 -	if (state->interface != PHY_INTERFACE_MODE_MII &&
 -	    state->interface != PHY_INTERFACE_MODE_REVMII &&
 -	    state->interface != PHY_INTERFACE_MODE_RMII) {
 -		phylink_set(mask, 1000baseT_Full);
 -		phylink_set(mask, 1000baseT_Half);
 +	return;
 +
 +unsupported:
 +	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 +	dev_err(ds->dev, "Unsupported interface '%s' for port %d\n",
 +		phy_modes(state->interface), port);
 +}
 +
 +static void gswip_xrx300_phylink_validate(struct dsa_switch *ds, int port,
 +					  unsigned long *supported,
 +					  struct phylink_link_state *state)
 +{
 +	switch (port) {
 +	case 0:
 +		if (!phy_interface_mode_is_rgmii(state->interface) &&
 +		    state->interface != PHY_INTERFACE_MODE_GMII &&
 +		    state->interface != PHY_INTERFACE_MODE_RMII)
 +			goto unsupported;
 +		break;
 +	case 1:
 +	case 2:
 +	case 3:
 +	case 4:
 +		if (state->interface != PHY_INTERFACE_MODE_INTERNAL)
 +			goto unsupported;
 +		break;
 +	case 5:
 +		if (!phy_interface_mode_is_rgmii(state->interface) &&
 +		    state->interface != PHY_INTERFACE_MODE_INTERNAL &&
 +		    state->interface != PHY_INTERFACE_MODE_RMII)
 +			goto unsupported;
 +		break;
 +	default:
 +		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 +		dev_err(ds->dev, "Unsupported port: %i\n", port);
 +		return;
    }
-	phylink_set(mask, 10baseT_Half);
 -	phylink_set(mask, 10baseT_Full);
 -	phylink_set(mask, 100baseT_Half);
 -	phylink_set(mask, 100baseT_Full);
 +	gswip_phylink_set_capab(supported, state);
-	bitmap_and(supported, supported, mask,
 -		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 -	bitmap_and(state->advertising, state->advertising, mask,
 -		   __ETHTOOL_LINK_MODE_MASK_NBITS);
    return;
unsupported:
    bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
    dev_err(ds->dev, "Unsupported interface '%s' for port %d\n",
    	phy_modes(state->interface), port);
 -	return;
  }
+ static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link)
+ {
+ 	u32 mdio_phy;
+ 
+ 	if (link)
+ 		mdio_phy = GSWIP_MDIO_PHY_LINK_UP;
+ 	else
+ 		mdio_phy = GSWIP_MDIO_PHY_LINK_DOWN;
+ 
+ 	gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_MASK, mdio_phy,
+ 			GSWIP_MDIO_PHYp(port));
+ }
+ 
+ static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed,
+ 				 phy_interface_t interface)
+ {
+ 	u32 mdio_phy = 0, mii_cfg = 0, mac_ctrl_0 = 0;
+ 
+ 	switch (speed) {
+ 	case SPEED_10:
+ 		mdio_phy = GSWIP_MDIO_PHY_SPEED_M10;
+ 
+ 		if (interface == PHY_INTERFACE_MODE_RMII)
+ 			mii_cfg = GSWIP_MII_CFG_RATE_M50;
+ 		else
+ 			mii_cfg = GSWIP_MII_CFG_RATE_M2P5;
+ 
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII;
+ 		break;
+ 
+ 	case SPEED_100:
+ 		mdio_phy = GSWIP_MDIO_PHY_SPEED_M100;
+ 
+ 		if (interface == PHY_INTERFACE_MODE_RMII)
+ 			mii_cfg = GSWIP_MII_CFG_RATE_M50;
+ 		else
+ 			mii_cfg = GSWIP_MII_CFG_RATE_M25;
+ 
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII;
+ 		break;
+ 
+ 	case SPEED_1000:
+ 		mdio_phy = GSWIP_MDIO_PHY_SPEED_G1;
+ 
+ 		mii_cfg = GSWIP_MII_CFG_RATE_M125;
+ 
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_RGMII;
+ 		break;
+ 	}
+ 
+ 	gswip_mdio_mask(priv, GSWIP_MDIO_PHY_SPEED_MASK, mdio_phy,
+ 			GSWIP_MDIO_PHYp(port));
+ 	gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, mii_cfg, port);
+ 	gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_GMII_MASK, mac_ctrl_0,
+ 			  GSWIP_MAC_CTRL_0p(port));
+ }
+ 
+ static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex)
+ {
+ 	u32 mac_ctrl_0, mdio_phy;
+ 
+ 	if (duplex == DUPLEX_FULL) {
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_EN;
+ 		mdio_phy = GSWIP_MDIO_PHY_FDUP_EN;
+ 	} else {
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_DIS;
+ 		mdio_phy = GSWIP_MDIO_PHY_FDUP_DIS;
+ 	}
+ 
+ 	gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FDUP_MASK, mac_ctrl_0,
+ 			  GSWIP_MAC_CTRL_0p(port));
+ 	gswip_mdio_mask(priv, GSWIP_MDIO_PHY_FDUP_MASK, mdio_phy,
+ 			GSWIP_MDIO_PHYp(port));
+ }
+ 
+ static void gswip_port_set_pause(struct gswip_priv *priv, int port,
+ 				 bool tx_pause, bool rx_pause)
+ {
+ 	u32 mac_ctrl_0, mdio_phy;
+ 
+ 	if (tx_pause && rx_pause) {
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RXTX;
+ 		mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN |
+ 			   GSWIP_MDIO_PHY_FCONRX_EN;
+ 	} else if (tx_pause) {
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_TX;
+ 		mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN |
+ 			   GSWIP_MDIO_PHY_FCONRX_DIS;
+ 	} else if (rx_pause) {
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RX;
+ 		mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS |
+ 			   GSWIP_MDIO_PHY_FCONRX_EN;
+ 	} else {
+ 		mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_NONE;
+ 		mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS |
+ 			   GSWIP_MDIO_PHY_FCONRX_DIS;
+ 	}
+ 
+ 	gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FCON_MASK,
+ 			  mac_ctrl_0, GSWIP_MAC_CTRL_0p(port));
+ 	gswip_mdio_mask(priv,
+ 			GSWIP_MDIO_PHY_FCONTX_MASK |
+ 			GSWIP_MDIO_PHY_FCONRX_MASK,
+ 			mdio_phy, GSWIP_MDIO_PHYp(port));
+ }
+ 
  static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
    			     unsigned int mode,
    			     const struct phylink_link_state *state)
@@@ -1517,6 -1603,9 +1651,9 @@@
    	break;
    case PHY_INTERFACE_MODE_RMII:
    	miicfg |= GSWIP_MII_CFG_MODE_RMIIM;
+ 
+ 		/* Configure the RMII clock as output: */
+ 		miicfg |= GSWIP_MII_CFG_RMII_CLK;
    	break;
    case PHY_INTERFACE_MODE_RGMII:
    case PHY_INTERFACE_MODE_RGMII_ID:
@@@ -1524,15 -1613,16 +1661,19 @@@
    case PHY_INTERFACE_MODE_RGMII_TXID:
    	miicfg |= GSWIP_MII_CFG_MODE_RGMII;
    	break;
 +	case PHY_INTERFACE_MODE_GMII:
 +		miicfg |= GSWIP_MII_CFG_MODE_GMII;
 +		break;
    default:
    	dev_err(ds->dev,
    		"Unsupported interface: %d\n", state->interface);
    	return;
    }
- 	gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_MODE_MASK, miicfg, port);
+ 
+ 	gswip_mii_mask_cfg(priv,
+ 			   GSWIP_MII_CFG_MODE_MASK | GSWIP_MII_CFG_RMII_CLK |
+ 			   GSWIP_MII_CFG_RGMII_IBS | GSWIP_MII_CFG_LDCLKDIS,
+ 			   miicfg, port);
switch (state->interface) {
    case PHY_INTERFACE_MODE_RGMII_ID:
@@@ -1557,6 -1647,9 +1698,9 @@@ static void gswip_phylink_mac_link_down
    struct gswip_priv *priv = ds->priv;
gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, port);
+ 
+ 	if (!dsa_is_cpu_port(ds, port))
+ 		gswip_port_set_link(priv, port, false);
  }
static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port,
@@@ -1568,6 -1661,13 +1712,13 @@@
  {
    struct gswip_priv *priv = ds->priv;
+ 	if (!dsa_is_cpu_port(ds, port)) {
+ 		gswip_port_set_link(priv, port, true);
+ 		gswip_port_set_speed(priv, port, speed, interface);
+ 		gswip_port_set_duplex(priv, port, duplex);
+ 		gswip_port_set_pause(priv, port, tx_pause, rx_pause);
+ 	}
+ 
    gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port);
  }
@@@ -1639,7 -1739,7 +1790,7 @@@ static int gswip_get_sset_count(struct 
    return ARRAY_SIZE(gswip_rmon_cnt);
  }
-static const struct dsa_switch_ops gswip_switch_ops = {
 +static const struct dsa_switch_ops gswip_xrx200_switch_ops = {
    .get_tag_protocol	= gswip_get_tag_protocol,
    .setup			= gswip_setup,
    .port_enable		= gswip_port_enable,
@@@ -1654,31 -1754,7 +1805,31 @@@
    .port_fdb_add		= gswip_port_fdb_add,
    .port_fdb_del		= gswip_port_fdb_del,
    .port_fdb_dump		= gswip_port_fdb_dump,
 -	.phylink_validate	= gswip_phylink_validate,
 +	.phylink_validate	= gswip_xrx200_phylink_validate,
 +	.phylink_mac_config	= gswip_phylink_mac_config,
 +	.phylink_mac_link_down	= gswip_phylink_mac_link_down,
 +	.phylink_mac_link_up	= gswip_phylink_mac_link_up,
 +	.get_strings		= gswip_get_strings,
 +	.get_ethtool_stats	= gswip_get_ethtool_stats,
 +	.get_sset_count		= gswip_get_sset_count,
 +};
 +
 +static const struct dsa_switch_ops gswip_xrx300_switch_ops = {
 +	.get_tag_protocol	= gswip_get_tag_protocol,
 +	.setup			= gswip_setup,
 +	.port_enable		= gswip_port_enable,
 +	.port_disable		= gswip_port_disable,
 +	.port_bridge_join	= gswip_port_bridge_join,
 +	.port_bridge_leave	= gswip_port_bridge_leave,
 +	.port_fast_age		= gswip_port_fast_age,
 +	.port_vlan_filtering	= gswip_port_vlan_filtering,
 +	.port_vlan_add		= gswip_port_vlan_add,
 +	.port_vlan_del		= gswip_port_vlan_del,
 +	.port_stp_state_set	= gswip_port_stp_state_set,
 +	.port_fdb_add		= gswip_port_fdb_add,
 +	.port_fdb_del		= gswip_port_fdb_del,
 +	.port_fdb_dump		= gswip_port_fdb_dump,
 +	.phylink_validate	= gswip_xrx300_phylink_validate,
    .phylink_mac_config	= gswip_phylink_mac_config,
    .phylink_mac_link_down	= gswip_phylink_mac_link_down,
    .phylink_mac_link_up	= gswip_phylink_mac_link_up,
@@@ -1907,7 -1983,7 +2058,7 @@@ remove_gphy
  static int gswip_probe(struct platform_device *pdev)
  {
    struct gswip_priv *priv;
 -	struct device_node *mdio_np, *gphy_fw_np;
 +	struct device_node *np, *mdio_np, *gphy_fw_np;
    struct device *dev = &pdev->dev;
    int err;
    int i;
@@@ -1940,28 -2016,10 +2091,28 @@@
    priv->ds->dev = dev;
    priv->ds->num_ports = priv->hw_info->max_ports;
    priv->ds->priv = priv;
 -	priv->ds->ops = &gswip_switch_ops;
 +	priv->ds->ops = priv->hw_info->ops;
    priv->dev = dev;
    version = gswip_switch_r(priv, GSWIP_VERSION);
+	np = dev->of_node;
 +	switch (version) {
 +	case GSWIP_VERSION_2_0:
 +	case GSWIP_VERSION_2_1:
 +		if (!of_device_is_compatible(np, "lantiq,xrx200-gswip"))
 +			return -EINVAL;
 +		break;
 +	case GSWIP_VERSION_2_2:
 +	case GSWIP_VERSION_2_2_ETC:
 +		if (!of_device_is_compatible(np, "lantiq,xrx300-gswip") &&
 +		    !of_device_is_compatible(np, "lantiq,xrx330-gswip"))
 +			return -EINVAL;
 +		break;
 +	default:
 +		dev_err(dev, "unknown GSWIP version: 0x%x", version);
 +		return -ENOENT;
 +	}
 +
    /* bring up the mdio bus */
    gphy_fw_np = of_get_compatible_child(dev->of_node, "lantiq,gphy-fw");
    if (gphy_fw_np) {
@@@ -2039,19 -2097,10 +2190,19 @@@ static int gswip_remove(struct platform
  static const struct gswip_hw_info gswip_xrx200 = {
    .max_ports = 7,
    .cpu_port = 6,
 +	.ops = &gswip_xrx200_switch_ops,
 +};
 +
 +static const struct gswip_hw_info gswip_xrx300 = {
 +	.max_ports = 7,
 +	.cpu_port = 6,
 +	.ops = &gswip_xrx300_switch_ops,
  };
static const struct of_device_id gswip_of_match[] = {
    { .compatible = "lantiq,xrx200-gswip", .data = &gswip_xrx200 },
 +	{ .compatible = "lantiq,xrx300-gswip", .data = &gswip_xrx300 },
 +	{ .compatible = "lantiq,xrx330-gswip", .data = &gswip_xrx300 },
    {},
  };
  MODULE_DEVICE_TABLE(of, gswip_of_match);
diff --combined drivers/net/ethernet/broadcom/bcm4908_enet.c
index cbfed1d1477b,65981931a798..b7afac5c7ca7
--- a/drivers/net/ethernet/broadcom/bcm4908_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm4908_enet.c
@@@ -9,7 -9,6 +9,7 @@@
  #include <linux/interrupt.h>
  #include <linux/module.h>
  #include <linux/of.h>
 +#include <linux/of_net.h>
  #include <linux/platform_device.h>
  #include <linux/slab.h>
  #include <linux/string.h>
@@@ -54,7 -53,6 +54,7 @@@ struct bcm4908_enet_dma_ring 
    int length;
    u16 cfg_block;
    u16 st_ram_block;
 +	struct napi_struct napi;
union {
    	void *cpu_addr;
@@@ -68,8 -66,8 +68,8 @@@
  struct bcm4908_enet {
    struct device *dev;
    struct net_device *netdev;
 -	struct napi_struct napi;
    void __iomem *base;
 +	int irq_tx;
struct bcm4908_enet_dma_ring tx_ring;
    struct bcm4908_enet_dma_ring rx_ring;
@@@ -124,31 -122,24 +124,31 @@@ static void enet_umac_set(struct bcm490
   * Helpers
   */
-static void bcm4908_enet_intrs_on(struct bcm4908_enet *enet)
 +static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu)
  {
 -	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS);
 +	enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD);
  }
-static void bcm4908_enet_intrs_off(struct bcm4908_enet *enet)
 +/***
 + * DMA ring ops
 + */
 +
 +static void bcm4908_enet_dma_ring_intrs_on(struct bcm4908_enet *enet,
 +					   struct bcm4908_enet_dma_ring *ring)
  {
 -	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_MASK, 0);
 +	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, ENET_DMA_INT_DEFAULTS);
  }
-static void bcm4908_enet_intrs_ack(struct bcm4908_enet *enet)
 +static void bcm4908_enet_dma_ring_intrs_off(struct bcm4908_enet *enet,
 +					    struct bcm4908_enet_dma_ring *ring)
  {
 -	enet_write(enet, ENET_DMA_CH_RX_CFG + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS);
 +	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_MASK, 0);
  }
-static void bcm4908_enet_set_mtu(struct bcm4908_enet *enet, int mtu)
 +static void bcm4908_enet_dma_ring_intrs_ack(struct bcm4908_enet *enet,
 +					    struct bcm4908_enet_dma_ring *ring)
  {
 -	enet_umac_write(enet, UMAC_MAX_FRAME_LEN, mtu + ENET_MAX_ETH_OVERHEAD);
 +	enet_write(enet, ring->cfg_block + ENET_DMA_CH_CFG_INT_STAT, ENET_DMA_INT_DEFAULTS);
  }
/***
@@@ -181,6 -172,7 +181,7 @@@ static int bcm4908_dma_alloc_buf_descs(
err_free_buf_descs:
    dma_free_coherent(dev, size, ring->cpu_addr, ring->dma_addr);
+ 	ring->cpu_addr = NULL;
    return -ENOMEM;
  }
@@@ -422,14 -414,11 +423,14 @@@ static void bcm4908_enet_gmac_init(stru
  static irqreturn_t bcm4908_enet_irq_handler(int irq, void *dev_id)
  {
    struct bcm4908_enet *enet = dev_id;
 +	struct bcm4908_enet_dma_ring *ring;
-	bcm4908_enet_intrs_off(enet);
 -	bcm4908_enet_intrs_ack(enet);
 +	ring = (irq == enet->irq_tx) ? &enet->tx_ring : &enet->rx_ring;
-	napi_schedule(&enet->napi);
 +	bcm4908_enet_dma_ring_intrs_off(enet, ring);
 +	bcm4908_enet_dma_ring_intrs_ack(enet, ring);
 +
 +	napi_schedule(&ring->napi);
return IRQ_HANDLED;
  }
@@@ -437,8 -426,6 +438,8 @@@
  static int bcm4908_enet_open(struct net_device *netdev)
  {
    struct bcm4908_enet *enet = netdev_priv(netdev);
 +	struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring;
 +	struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring;
    struct device *dev = enet->dev;
    int err;
@@@ -448,17 -435,6 +449,17 @@@
    	return err;
    }
+	if (enet->irq_tx > 0) {
 +		err = request_irq(enet->irq_tx, bcm4908_enet_irq_handler, 0,
 +				  "tx", enet);
 +		if (err) {
 +			dev_err(dev, "Failed to request IRQ %d: %d\n",
 +				enet->irq_tx, err);
 +			free_irq(netdev->irq, enet);
 +			return err;
 +		}
 +	}
 +
    bcm4908_enet_gmac_init(enet);
    bcm4908_enet_dma_reset(enet);
    bcm4908_enet_dma_init(enet);
@@@ -467,19 -443,14 +468,19 @@@
enet_set(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_MASTER_EN);
    enet_maskset(enet, ENET_DMA_CONTROLLER_CFG, ENET_DMA_CTRL_CFG_FLOWC_CH1_EN, 0);
 -	bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring);
-	napi_enable(&enet->napi);
 +	if (enet->irq_tx > 0) {
 +		napi_enable(&tx_ring->napi);
 +		bcm4908_enet_dma_ring_intrs_ack(enet, tx_ring);
 +		bcm4908_enet_dma_ring_intrs_on(enet, tx_ring);
 +	}
 +
 +	bcm4908_enet_dma_rx_ring_enable(enet, rx_ring);
 +	napi_enable(&rx_ring->napi);
    netif_carrier_on(netdev);
    netif_start_queue(netdev);
 -
 -	bcm4908_enet_intrs_ack(enet);
 -	bcm4908_enet_intrs_on(enet);
 +	bcm4908_enet_dma_ring_intrs_ack(enet, rx_ring);
 +	bcm4908_enet_dma_ring_intrs_on(enet, rx_ring);
return 0;
  }
@@@ -487,20 -458,16 +488,20 @@@
  static int bcm4908_enet_stop(struct net_device *netdev)
  {
    struct bcm4908_enet *enet = netdev_priv(netdev);
 +	struct bcm4908_enet_dma_ring *tx_ring = &enet->tx_ring;
 +	struct bcm4908_enet_dma_ring *rx_ring = &enet->rx_ring;
netif_stop_queue(netdev);
    netif_carrier_off(netdev);
 -	napi_disable(&enet->napi);
 +	napi_disable(&rx_ring->napi);
 +	napi_disable(&tx_ring->napi);
bcm4908_enet_dma_rx_ring_disable(enet, &enet->rx_ring);
    bcm4908_enet_dma_tx_ring_disable(enet, &enet->tx_ring);
bcm4908_enet_dma_uninit(enet);
+	free_irq(enet->irq_tx, enet);
    free_irq(enet->netdev->irq, enet);
return 0;
@@@ -517,19 -484,25 +518,19 @@@ static int bcm4908_enet_start_xmit(stru
    u32 tmp;
/* Free transmitted skbs */
 -	while (ring->read_idx != ring->write_idx) {
 -		buf_desc = &ring->buf_desc[ring->read_idx];
 -		if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN)
 -			break;
 -		slot = &ring->slots[ring->read_idx];
 -
 -		dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE);
 -		dev_kfree_skb(slot->skb);
 -		if (++ring->read_idx == ring->length)
 -			ring->read_idx = 0;
 -	}
 +	if (enet->irq_tx < 0 &&
 +	    !(le32_to_cpu(ring->buf_desc[ring->read_idx].ctl) & DMA_CTL_STATUS_OWN))
 +		napi_schedule(&enet->tx_ring.napi);
/* Don't use the last empty buf descriptor */
    if (ring->read_idx <= ring->write_idx)
    	free_buf_descs = ring->read_idx - ring->write_idx + ring->length;
    else
    	free_buf_descs = ring->read_idx - ring->write_idx;
 -	if (free_buf_descs < 2)
 +	if (free_buf_descs < 2) {
 +		netif_stop_queue(netdev);
    	return NETDEV_TX_BUSY;
 +	}
/* Hardware removes OWN bit after sending data */
    buf_desc = &ring->buf_desc[ring->write_idx];
@@@ -566,10 -539,9 +567,10 @@@
    return NETDEV_TX_OK;
  }
-static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
 +static int bcm4908_enet_poll_rx(struct napi_struct *napi, int weight)
  {
 -	struct bcm4908_enet *enet = container_of(napi, struct bcm4908_enet, napi);
 +	struct bcm4908_enet_dma_ring *rx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi);
 +	struct bcm4908_enet *enet = container_of(rx_ring, struct bcm4908_enet, rx_ring);
    struct device *dev = enet->dev;
    int handled = 0;
@@@ -618,7 -590,7 +619,7 @@@
if (handled < weight) {
    	napi_complete_done(napi, handled);
 -		bcm4908_enet_intrs_on(enet);
 +		bcm4908_enet_dma_ring_intrs_on(enet, rx_ring);
    }
/* Hardware could disable ring if it run out of descriptors */
@@@ -627,42 -599,6 +628,42 @@@
    return handled;
  }
+static int bcm4908_enet_poll_tx(struct napi_struct *napi, int weight)
 +{
 +	struct bcm4908_enet_dma_ring *tx_ring = container_of(napi, struct bcm4908_enet_dma_ring, napi);
 +	struct bcm4908_enet *enet = container_of(tx_ring, struct bcm4908_enet, tx_ring);
 +	struct bcm4908_enet_dma_ring_bd *buf_desc;
 +	struct bcm4908_enet_dma_ring_slot *slot;
 +	struct device *dev = enet->dev;
 +	unsigned int bytes = 0;
 +	int handled = 0;
 +
 +	while (handled < weight && tx_ring->read_idx != tx_ring->write_idx) {
 +		buf_desc = &tx_ring->buf_desc[tx_ring->read_idx];
 +		if (le32_to_cpu(buf_desc->ctl) & DMA_CTL_STATUS_OWN)
 +			break;
 +		slot = &tx_ring->slots[tx_ring->read_idx];
 +
 +		dma_unmap_single(dev, slot->dma_addr, slot->len, DMA_TO_DEVICE);
 +		dev_kfree_skb(slot->skb);
 +		bytes += slot->len;
 +		if (++tx_ring->read_idx == tx_ring->length)
 +			tx_ring->read_idx = 0;
 +
 +		handled++;
 +	}
 +
 +	if (handled < weight) {
 +		napi_complete_done(napi, handled);
 +		bcm4908_enet_dma_ring_intrs_on(enet, tx_ring);
 +	}
 +
 +	if (netif_queue_stopped(enet->netdev))
 +		netif_wake_queue(enet->netdev);
 +
 +	return handled;
 +}
 +
  static int bcm4908_enet_change_mtu(struct net_device *netdev, int new_mtu)
  {
    struct bcm4908_enet *enet = netdev_priv(netdev);
@@@ -685,7 -621,6 +686,7 @@@ static int bcm4908_enet_probe(struct pl
    struct device *dev = &pdev->dev;
    struct net_device *netdev;
    struct bcm4908_enet *enet;
 +	const u8 *mac;
    int err;
netdev = devm_alloc_etherdev(dev, sizeof(*enet));
@@@ -706,8 -641,6 +707,8 @@@
    if (netdev->irq < 0)
    	return netdev->irq;
+	enet->irq_tx = platform_get_irq_byname(pdev, "tx");
 +
    dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
err = bcm4908_enet_dma_alloc(enet);
@@@ -715,17 -648,12 +716,17 @@@
    	return err;
SET_NETDEV_DEV(netdev, &pdev->dev);
 -	eth_hw_addr_random(netdev);
 +	mac = of_get_mac_address(dev->of_node);
 +	if (!IS_ERR(mac))
 +		ether_addr_copy(netdev->dev_addr, mac);
 +	else
 +		eth_hw_addr_random(netdev);
    netdev->netdev_ops = &bcm4908_enet_netdev_ops;
    netdev->min_mtu = ETH_ZLEN;
    netdev->mtu = ETH_DATA_LEN;
    netdev->max_mtu = ENET_MTU_MAX;
 -	netif_napi_add(netdev, &enet->napi, bcm4908_enet_poll, 64);
 +	netif_tx_napi_add(netdev, &enet->tx_ring.napi, bcm4908_enet_poll_tx, NAPI_POLL_WEIGHT);
 +	netif_napi_add(netdev, &enet->rx_ring.napi, bcm4908_enet_poll_rx, NAPI_POLL_WEIGHT);
err = register_netdev(netdev);
    if (err) {
@@@ -743,8 -671,7 +744,8 @@@ static int bcm4908_enet_remove(struct p
    struct bcm4908_enet *enet = platform_get_drvdata(pdev);
unregister_netdev(enet->netdev);
 -	netif_napi_del(&enet->napi);
 +	netif_napi_del(&enet->rx_ring.napi);
 +	netif_napi_del(&enet->tx_ring.napi);
    bcm4908_enet_dma_free(enet);
return 0;
diff --combined drivers/net/ethernet/cadence/macb_main.c
index f56f3dbbc015,6e5cf490c01d..ffd56a23f8b0
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@@ -694,22 -694,6 +694,22 @@@ static void macb_mac_config(struct phyl
    if (old_ncr ^ ncr)
    	macb_or_gem_writel(bp, NCR, ncr);
+	/* Disable AN for SGMII fixed link configuration, enable otherwise.
 +	 * Must be written after PCSSEL is set in NCFGR,
 +	 * otherwise writes will not take effect.
 +	 */
 +	if (macb_is_gem(bp) && state->interface == PHY_INTERFACE_MODE_SGMII) {
 +		u32 pcsctrl, old_pcsctrl;
 +
 +		old_pcsctrl = gem_readl(bp, PCSCNTRL);
 +		if (mode == MLO_AN_FIXED)
 +			pcsctrl = old_pcsctrl & ~GEM_BIT(PCSAUTONEG);
 +		else
 +			pcsctrl = old_pcsctrl | GEM_BIT(PCSAUTONEG);
 +		if (old_pcsctrl != pcsctrl)
 +			gem_writel(bp, PCSCNTRL, pcsctrl);
 +	}
 +
    spin_unlock_irqrestore(&bp->lock, flags);
  }
@@@ -863,15 -847,6 +863,15 @@@ static int macb_phylink_connect(struct 
    return 0;
  }
+static void macb_get_pcs_fixed_state(struct phylink_config *config,
 +				     struct phylink_link_state *state)
 +{
 +	struct net_device *ndev = to_net_dev(config->dev);
 +	struct macb *bp = netdev_priv(ndev);
 +
 +	state->link = (macb_readl(bp, NSR) & MACB_BIT(NSR_LINK)) != 0;
 +}
 +
  /* based on au1000_eth. c*/
  static int macb_mii_probe(struct net_device *dev)
  {
@@@ -880,11 -855,6 +880,11 @@@
    bp->phylink_config.dev = &dev->dev;
    bp->phylink_config.type = PHYLINK_NETDEV;
+	if (bp->phy_interface == PHY_INTERFACE_MODE_SGMII) {
 +		bp->phylink_config.poll_fixed_state = true;
 +		bp->phylink_config.get_fixed_state = macb_get_pcs_fixed_state;
 +	}
 +
    bp->phylink = phylink_create(&bp->phylink_config, bp->pdev->dev.fwnode,
    			     bp->phy_interface, &macb_phylink_ops);
    if (IS_ERR(bp->phylink)) {
@@@ -3269,6 -3239,9 +3269,9 @@@ static void gem_prog_cmp_regs(struct ma
    bool cmp_b = false;
    bool cmp_c = false;
+ 	if (!macb_is_gem(bp))
+ 		return;
+ 
    tp4sp_v = &(fs->h_u.tcp_ip4_spec);
    tp4sp_m = &(fs->m_u.tcp_ip4_spec);
@@@ -3637,6 -3610,7 +3640,7 @@@ static void macb_restore_features(struc
  {
    struct net_device *netdev = bp->dev;
    netdev_features_t features = netdev->features;
+ 	struct ethtool_rx_fs_item *item;
/* TX checksum offload */
    macb_set_txcsum_feature(bp, features);
@@@ -3645,6 -3619,9 +3649,9 @@@
    macb_set_rxcsum_feature(bp, features);
/* RX Flow Filters */
+ 	list_for_each_entry(item, &bp->rx_fs_list.list, list)
+ 		gem_prog_cmp_regs(bp, &item->fs);
+ 
    macb_set_rxflow_feature(bp, features);
  }
@@@ -3758,15 -3735,17 +3765,15 @@@ static int macb_clk_init(struct platfor
    	*hclk = devm_clk_get(&pdev->dev, "hclk");
    }
-	if (IS_ERR_OR_NULL(*pclk)) {
 -		err = IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV;
 -		dev_err(&pdev->dev, "failed to get macb_clk (%d)\n", err);
 -		return err;
 -	}
 +	if (IS_ERR_OR_NULL(*pclk))
 +		return dev_err_probe(&pdev->dev,
 +				     IS_ERR(*pclk) ? PTR_ERR(*pclk) : -ENODEV,
 +				     "failed to get pclk\n");
-	if (IS_ERR_OR_NULL(*hclk)) {
 -		err = IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV;
 -		dev_err(&pdev->dev, "failed to get hclk (%d)\n", err);
 -		return err;
 -	}
 +	if (IS_ERR_OR_NULL(*hclk))
 +		return dev_err_probe(&pdev->dev,
 +				     IS_ERR(*hclk) ? PTR_ERR(*hclk) : -ENODEV,
 +				     "failed to get hclk\n");
*tx_clk = devm_clk_get_optional(&pdev->dev, "tx_clk");
    if (IS_ERR(*tx_clk))
diff --combined drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
index d2ba40c19696,23a2ebdfd503..a7f291c89702
--- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c
@@@ -1794,11 -1794,25 +1794,25 @@@ int cudbg_collect_sge_indirect(struct c
    struct cudbg_buffer temp_buff = { 0 };
    struct sge_qbase_reg_field *sge_qbase;
    struct ireg_buf *ch_sge_dbg;
+ 	u8 padap_running = 0;
    int i, rc;
+ 	u32 size;
- 	rc = cudbg_get_buff(pdbg_init, dbg_buff,
- 			    sizeof(*ch_sge_dbg) * 2 + sizeof(*sge_qbase),
- 			    &temp_buff);
+ 	/* Accessing SGE_QBASE_MAP[0-3] and SGE_QBASE_INDEX regs can
+ 	 * lead to SGE missing doorbells under heavy traffic. So, only
+ 	 * collect them when adapter is idle.
+ 	 */
+ 	for_each_port(padap, i) {
+ 		padap_running = netif_running(padap->port[i]);
+ 		if (padap_running)
+ 			break;
+ 	}
+ 
+ 	size = sizeof(*ch_sge_dbg) * 2;
+ 	if (!padap_running)
+ 		size += sizeof(*sge_qbase);
+ 
+ 	rc = cudbg_get_buff(pdbg_init, dbg_buff, size, &temp_buff);
    if (rc)
    	return rc;
@@@ -1820,7 -1834,8 +1834,8 @@@
    	ch_sge_dbg++;
    }
- 	if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5) {
+ 	if (CHELSIO_CHIP_VERSION(padap->params.chip) > CHELSIO_T5 &&
+ 	    !padap_running) {
    	sge_qbase = (struct sge_qbase_reg_field *)ch_sge_dbg;
    	/* 1 addr reg SGE_QBASE_INDEX and 4 data reg
    	 * SGE_QBASE_MAP[0-3]
@@@ -3536,7 -3551,8 +3551,7 @@@ out
    }
out_free:
 -	if (data)
 -		kvfree(data);
 +	kvfree(data);
#undef QDESC_GET_FLQ
  #undef QDESC_GET_RXQ
diff --combined drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c446b63be503,b0dbe6dcaa7b..1c17fdc780e9
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@@ -62,7 -62,7 +62,7 @@@ static void hclge_sync_vlan_filter(stru
  static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
  static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
  static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
 -static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
 +static int hclge_clear_arfs_rules(struct hclge_dev *hdev);
  static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
    					   unsigned long *addr);
  static int hclge_set_default_loopback(struct hclge_dev *hdev);
@@@ -70,7 -70,6 +70,7 @@@
  static void hclge_sync_mac_table(struct hclge_dev *hdev);
  static void hclge_restore_hw_table(struct hclge_dev *hdev);
  static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
 +static void hclge_sync_fd_table(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
@@@ -385,62 -384,36 +385,62 @@@ static const struct key_info meta_data_
  };
static const struct key_info tuple_key_info[] = {
 -	{ OUTER_DST_MAC, 48},
 -	{ OUTER_SRC_MAC, 48},
 -	{ OUTER_VLAN_TAG_FST, 16},
 -	{ OUTER_VLAN_TAG_SEC, 16},
 -	{ OUTER_ETH_TYPE, 16},
 -	{ OUTER_L2_RSV, 16},
 -	{ OUTER_IP_TOS, 8},
 -	{ OUTER_IP_PROTO, 8},
 -	{ OUTER_SRC_IP, 32},
 -	{ OUTER_DST_IP, 32},
 -	{ OUTER_L3_RSV, 16},
 -	{ OUTER_SRC_PORT, 16},
 -	{ OUTER_DST_PORT, 16},
 -	{ OUTER_L4_RSV, 32},
 -	{ OUTER_TUN_VNI, 24},
 -	{ OUTER_TUN_FLOW_ID, 8},
 -	{ INNER_DST_MAC, 48},
 -	{ INNER_SRC_MAC, 48},
 -	{ INNER_VLAN_TAG_FST, 16},
 -	{ INNER_VLAN_TAG_SEC, 16},
 -	{ INNER_ETH_TYPE, 16},
 -	{ INNER_L2_RSV, 16},
 -	{ INNER_IP_TOS, 8},
 -	{ INNER_IP_PROTO, 8},
 -	{ INNER_SRC_IP, 32},
 -	{ INNER_DST_IP, 32},
 -	{ INNER_L3_RSV, 16},
 -	{ INNER_SRC_PORT, 16},
 -	{ INNER_DST_PORT, 16},
 -	{ INNER_L4_RSV, 32},
 +	{ OUTER_DST_MAC, 48, KEY_OPT_MAC, -1, -1 },
 +	{ OUTER_SRC_MAC, 48, KEY_OPT_MAC, -1, -1 },
 +	{ OUTER_VLAN_TAG_FST, 16, KEY_OPT_LE16, -1, -1 },
 +	{ OUTER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 },
 +	{ OUTER_ETH_TYPE, 16, KEY_OPT_LE16, -1, -1 },
 +	{ OUTER_L2_RSV, 16, KEY_OPT_LE16, -1, -1 },
 +	{ OUTER_IP_TOS, 8, KEY_OPT_U8, -1, -1 },
 +	{ OUTER_IP_PROTO, 8, KEY_OPT_U8, -1, -1 },
 +	{ OUTER_SRC_IP, 32, KEY_OPT_IP, -1, -1 },
 +	{ OUTER_DST_IP, 32, KEY_OPT_IP, -1, -1 },
 +	{ OUTER_L3_RSV, 16, KEY_OPT_LE16, -1, -1 },
 +	{ OUTER_SRC_PORT, 16, KEY_OPT_LE16, -1, -1 },
 +	{ OUTER_DST_PORT, 16, KEY_OPT_LE16, -1, -1 },
 +	{ OUTER_L4_RSV, 32, KEY_OPT_LE32, -1, -1 },
 +	{ OUTER_TUN_VNI, 24, KEY_OPT_VNI, -1, -1 },
 +	{ OUTER_TUN_FLOW_ID, 8, KEY_OPT_U8, -1, -1 },
 +	{ INNER_DST_MAC, 48, KEY_OPT_MAC,
 +	  offsetof(struct hclge_fd_rule, tuples.dst_mac),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.dst_mac) },
 +	{ INNER_SRC_MAC, 48, KEY_OPT_MAC,
 +	  offsetof(struct hclge_fd_rule, tuples.src_mac),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.src_mac) },
 +	{ INNER_VLAN_TAG_FST, 16, KEY_OPT_LE16,
 +	  offsetof(struct hclge_fd_rule, tuples.vlan_tag1),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.vlan_tag1) },
 +	{ INNER_VLAN_TAG_SEC, 16, KEY_OPT_LE16, -1, -1 },
 +	{ INNER_ETH_TYPE, 16, KEY_OPT_LE16,
 +	  offsetof(struct hclge_fd_rule, tuples.ether_proto),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.ether_proto) },
 +	{ INNER_L2_RSV, 16, KEY_OPT_LE16,
 +	  offsetof(struct hclge_fd_rule, tuples.l2_user_def),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.l2_user_def) },
 +	{ INNER_IP_TOS, 8, KEY_OPT_U8,
 +	  offsetof(struct hclge_fd_rule, tuples.ip_tos),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.ip_tos) },
 +	{ INNER_IP_PROTO, 8, KEY_OPT_U8,
 +	  offsetof(struct hclge_fd_rule, tuples.ip_proto),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.ip_proto) },
 +	{ INNER_SRC_IP, 32, KEY_OPT_IP,
 +	  offsetof(struct hclge_fd_rule, tuples.src_ip),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.src_ip) },
 +	{ INNER_DST_IP, 32, KEY_OPT_IP,
 +	  offsetof(struct hclge_fd_rule, tuples.dst_ip),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.dst_ip) },
 +	{ INNER_L3_RSV, 16, KEY_OPT_LE16,
 +	  offsetof(struct hclge_fd_rule, tuples.l3_user_def),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.l3_user_def) },
 +	{ INNER_SRC_PORT, 16, KEY_OPT_LE16,
 +	  offsetof(struct hclge_fd_rule, tuples.src_port),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.src_port) },
 +	{ INNER_DST_PORT, 16, KEY_OPT_LE16,
 +	  offsetof(struct hclge_fd_rule, tuples.dst_port),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.dst_port) },
 +	{ INNER_L4_RSV, 32, KEY_OPT_LE32,
 +	  offsetof(struct hclge_fd_rule, tuples.l4_user_def),
 +	  offsetof(struct hclge_fd_rule, tuples_mask.l4_user_def) },
  };
static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
@@@ -553,6 -526,7 +553,6 @@@ static int hclge_mac_update_stats(struc
    int ret;
ret = hclge_mac_query_reg_num(hdev, &desc_num);
 -
    /* The firmware supports the new statistics acquisition method */
    if (!ret)
    	ret = hclge_mac_update_stats_complete(hdev, desc_num);
@@@ -777,12 -751,12 +777,12 @@@ static int hclge_get_sset_count(struct 
    	handle->flags |= HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK;
    	handle->flags |= HNAE3_SUPPORT_SERDES_PARALLEL_LOOPBACK;
-		if (hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv &&
 -		    hdev->hw.mac.phydev->drv->set_loopback) {
 +		if ((hdev->hw.mac.phydev && hdev->hw.mac.phydev->drv &&
 +		     hdev->hw.mac.phydev->drv->set_loopback) ||
 +		    hnae3_dev_phy_imp_supported(hdev)) {
    		count += 1;
    		handle->flags |= HNAE3_SUPPORT_PHY_LOOPBACK;
    	}
 -
    } else if (stringset == ETH_SS_STATS) {
    	count = ARRAY_SIZE(g_mac_stats_string) +
    		hclge_tqps_get_sset_count(handle, stringset);
@@@ -1176,10 -1150,8 +1176,10 @@@ static void hclge_parse_fiber_link_mode
    if (hnae3_dev_fec_supported(hdev))
    	hclge_convert_setting_fec(mac);
+	if (hnae3_dev_pause_supported(hdev))
 +		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
 +
    linkmode_set_bit(ETHTOOL_LINK_MODE_FIBRE_BIT, mac->supported);
 -	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
    linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported);
  }
@@@ -1191,11 -1163,8 +1191,11 @@@ static void hclge_parse_backplane_link_
    hclge_convert_setting_kr(mac, speed_ability);
    if (hnae3_dev_fec_supported(hdev))
    	hclge_convert_setting_fec(mac);
 +
 +	if (hnae3_dev_pause_supported(hdev))
 +		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
 +
    linkmode_set_bit(ETHTOOL_LINK_MODE_Backplane_BIT, mac->supported);
 -	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, mac->supported);
    linkmode_set_bit(ETHTOOL_LINK_MODE_FEC_NONE_BIT, mac->supported);
  }
@@@ -1224,13 -1193,10 +1224,13 @@@ static void hclge_parse_copper_link_mod
    	linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, supported);
    }
+	if (hnae3_dev_pause_supported(hdev)) {
 +		linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
 +		linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
 +	}
 +
    linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, supported);
    linkmode_set_bit(ETHTOOL_LINK_MODE_TP_BIT, supported);
 -	linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, supported);
 -	linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, supported);
  }
static void hclge_parse_link_mode(struct hclge_dev *hdev, u16 speed_ability)
@@@ -1290,6 -1256,9 +1290,6 @@@ static void hclge_parse_cfg(struct hclg
    req = (struct hclge_cfg_param_cmd *)desc[0].data;
/* get the configuration */
 -	cfg->vmdq_vport_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
 -					      HCLGE_CFG_VMDQ_M,
 -					      HCLGE_CFG_VMDQ_S);
    cfg->tc_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
    			      HCLGE_CFG_TC_NUM_M, HCLGE_CFG_TC_NUM_S);
    cfg->tqp_desc_num = hnae3_get_field(__le32_to_cpu(req->param[0]),
@@@ -1506,7 -1475,7 +1506,7 @@@ static void hclge_init_kdump_kernel_con
    	 "Running kdump kernel. Using minimal resources\n");
/* minimal queue pairs equals to the number of vports */
 -	hdev->num_tqps = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
 +	hdev->num_tqps = hdev->num_req_vfs + 1;
    hdev->num_tx_desc = HCLGE_MIN_TX_DESC;
    hdev->num_rx_desc = HCLGE_MIN_RX_DESC;
  }
@@@ -1521,6 -1490,7 +1521,6 @@@ static int hclge_configure(struct hclge
    if (ret)
    	return ret;
-	hdev->num_vmdq_vport = cfg.vmdq_vport_num;
    hdev->base_tqp_pid = 0;
    hdev->vf_rss_size_max = cfg.vf_rss_size_max;
    hdev->pf_rss_size_max = cfg.pf_rss_size_max;
@@@ -1771,7 -1741,7 +1771,7 @@@ static int hclge_map_tqp(struct hclge_d
    struct hclge_vport *vport = hdev->vport;
    u16 i, num_vport;
-	num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
 +	num_vport = hdev->num_req_vfs + 1;
    for (i = 0; i < num_vport; i++)	{
    	int ret;
@@@ -1813,7 -1783,7 +1813,7 @@@ static int hclge_alloc_vport(struct hcl
    int ret;
/* We need to alloc a vport for main NIC of PF */
 -	num_vport = hdev->num_vmdq_vport + hdev->num_req_vfs + 1;
 +	num_vport = hdev->num_req_vfs + 1;
if (hdev->num_tqps < num_vport) {
    	dev_err(&hdev->pdev->dev, "tqps(%u) is less than vports(%d)",
@@@ -2189,6 -2159,7 +2189,6 @@@ static int hclge_only_alloc_priv_buff(s
    		COMPENSATE_HALF_MPS_NUM * half_mps;
    min_rx_priv = round_up(min_rx_priv, HCLGE_BUF_SIZE_UNIT);
    rx_priv = round_down(rx_priv, HCLGE_BUF_SIZE_UNIT);
 -
    if (rx_priv < min_rx_priv)
    	return false;
@@@ -2217,7 -2188,7 +2217,7 @@@
  /* hclge_rx_buffer_calc: calculate the rx private buffer size for all TCs
   * @hdev: pointer to struct hclge_dev
   * @buf_alloc: pointer to buffer calculation data
 - * @return: 0: calculate sucessful, negative: fail
 + * @return: 0: calculate successful, negative: fail
   */
  static int hclge_rx_buffer_calc(struct hclge_dev *hdev,
    			struct hclge_pkt_buf_alloc *buf_alloc)
@@@ -2882,12 -2853,13 +2882,12 @@@ static int hclge_get_mac_phy_link(struc
static void hclge_update_link_status(struct hclge_dev *hdev)
  {
 +	struct hnae3_handle *rhandle = &hdev->vport[0].roce;
 +	struct hnae3_handle *handle = &hdev->vport[0].nic;
    struct hnae3_client *rclient = hdev->roce_client;
    struct hnae3_client *client = hdev->nic_client;
 -	struct hnae3_handle *rhandle;
 -	struct hnae3_handle *handle;
    int state;
    int ret;
 -	int i;
if (!client)
    	return;
@@@ -2902,23 -2874,25 +2902,23 @@@
    }
if (state != hdev->hw.mac.link) {
 -		for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 -			handle = &hdev->vport[i].nic;
 -			client->ops->link_status_change(handle, state);
 -			hclge_config_mac_tnl_int(hdev, state);
 -			rhandle = &hdev->vport[i].roce;
 -			if (rclient && rclient->ops->link_status_change)
 -				rclient->ops->link_status_change(rhandle,
 -								 state);
 -		}
 +		client->ops->link_status_change(handle, state);
 +		hclge_config_mac_tnl_int(hdev, state);
 +		if (rclient && rclient->ops->link_status_change)
 +			rclient->ops->link_status_change(rhandle, state);
 +
    	hdev->hw.mac.link = state;
    }
clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state);
  }
-static void hclge_update_port_capability(struct hclge_mac *mac)
 +static void hclge_update_port_capability(struct hclge_dev *hdev,
 +					 struct hclge_mac *mac)
  {
 -	/* update fec ability by speed */
 -	hclge_convert_setting_fec(mac);
 +	if (hnae3_dev_fec_supported(hdev))
 +		/* update fec ability by speed */
 +		hclge_convert_setting_fec(mac);
/* firmware can not identify back plane type, the media type
     * read from configuration can help deal it
@@@ -3010,141 -2984,6 +3010,141 @@@ static int hclge_get_sfp_info(struct hc
    return 0;
  }
+static int hclge_get_phy_link_ksettings(struct hnae3_handle *handle,
 +					struct ethtool_link_ksettings *cmd)
 +{
 +	struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM];
 +	struct hclge_vport *vport = hclge_get_vport(handle);
 +	struct hclge_phy_link_ksetting_0_cmd *req0;
 +	struct hclge_phy_link_ksetting_1_cmd *req1;
 +	u32 supported, advertising, lp_advertising;
 +	struct hclge_dev *hdev = vport->back;
 +	int ret;
 +
 +	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING,
 +				   true);
 +	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 +	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING,
 +				   true);
 +
 +	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM);
 +	if (ret) {
 +		dev_err(&hdev->pdev->dev,
 +			"failed to get phy link ksetting, ret = %d.\n", ret);
 +		return ret;
 +	}
 +
 +	req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data;
 +	cmd->base.autoneg = req0->autoneg;
 +	cmd->base.speed = le32_to_cpu(req0->speed);
 +	cmd->base.duplex = req0->duplex;
 +	cmd->base.port = req0->port;
 +	cmd->base.transceiver = req0->transceiver;
 +	cmd->base.phy_address = req0->phy_address;
 +	cmd->base.eth_tp_mdix = req0->eth_tp_mdix;
 +	cmd->base.eth_tp_mdix_ctrl = req0->eth_tp_mdix_ctrl;
 +	supported = le32_to_cpu(req0->supported);
 +	advertising = le32_to_cpu(req0->advertising);
 +	lp_advertising = le32_to_cpu(req0->lp_advertising);
 +	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
 +						supported);
 +	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
 +						advertising);
 +	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
 +						lp_advertising);
 +
 +	req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data;
 +	cmd->base.master_slave_cfg = req1->master_slave_cfg;
 +	cmd->base.master_slave_state = req1->master_slave_state;
 +
 +	return 0;
 +}
 +
 +static int
 +hclge_set_phy_link_ksettings(struct hnae3_handle *handle,
 +			     const struct ethtool_link_ksettings *cmd)
 +{
 +	struct hclge_desc desc[HCLGE_PHY_LINK_SETTING_BD_NUM];
 +	struct hclge_vport *vport = hclge_get_vport(handle);
 +	struct hclge_phy_link_ksetting_0_cmd *req0;
 +	struct hclge_phy_link_ksetting_1_cmd *req1;
 +	struct hclge_dev *hdev = vport->back;
 +	u32 advertising;
 +	int ret;
 +
 +	if (cmd->base.autoneg == AUTONEG_DISABLE &&
 +	    ((cmd->base.speed != SPEED_100 && cmd->base.speed != SPEED_10) ||
 +	     (cmd->base.duplex != DUPLEX_HALF &&
 +	      cmd->base.duplex != DUPLEX_FULL)))
 +		return -EINVAL;
 +
 +	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_PHY_LINK_KSETTING,
 +				   false);
 +	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
 +	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_PHY_LINK_KSETTING,
 +				   false);
 +
 +	req0 = (struct hclge_phy_link_ksetting_0_cmd *)desc[0].data;
 +	req0->autoneg = cmd->base.autoneg;
 +	req0->speed = cpu_to_le32(cmd->base.speed);
 +	req0->duplex = cmd->base.duplex;
 +	ethtool_convert_link_mode_to_legacy_u32(&advertising,
 +						cmd->link_modes.advertising);
 +	req0->advertising = cpu_to_le32(advertising);
 +	req0->eth_tp_mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
 +
 +	req1 = (struct hclge_phy_link_ksetting_1_cmd *)desc[1].data;
 +	req1->master_slave_cfg = cmd->base.master_slave_cfg;
 +
 +	ret = hclge_cmd_send(&hdev->hw, desc, HCLGE_PHY_LINK_SETTING_BD_NUM);
 +	if (ret) {
 +		dev_err(&hdev->pdev->dev,
 +			"failed to set phy link ksettings, ret = %d.\n", ret);
 +		return ret;
 +	}
 +
 +	hdev->hw.mac.autoneg = cmd->base.autoneg;
 +	hdev->hw.mac.speed = cmd->base.speed;
 +	hdev->hw.mac.duplex = cmd->base.duplex;
 +	linkmode_copy(hdev->hw.mac.advertising, cmd->link_modes.advertising);
 +
 +	return 0;
 +}
 +
 +static int hclge_update_tp_port_info(struct hclge_dev *hdev)
 +{
 +	struct ethtool_link_ksettings cmd;
 +	int ret;
 +
 +	if (!hnae3_dev_phy_imp_supported(hdev))
 +		return 0;
 +
 +	ret = hclge_get_phy_link_ksettings(&hdev->vport->nic, &cmd);
 +	if (ret)
 +		return ret;
 +
 +	hdev->hw.mac.autoneg = cmd.base.autoneg;
 +	hdev->hw.mac.speed = cmd.base.speed;
 +	hdev->hw.mac.duplex = cmd.base.duplex;
 +
 +	return 0;
 +}
 +
 +static int hclge_tp_port_init(struct hclge_dev *hdev)
 +{
 +	struct ethtool_link_ksettings cmd;
 +
 +	if (!hnae3_dev_phy_imp_supported(hdev))
 +		return 0;
 +
 +	cmd.base.autoneg = hdev->hw.mac.autoneg;
 +	cmd.base.speed = hdev->hw.mac.speed;
 +	cmd.base.duplex = hdev->hw.mac.duplex;
 +	linkmode_copy(cmd.link_modes.advertising, hdev->hw.mac.advertising);
 +
 +	return hclge_set_phy_link_ksettings(&hdev->vport->nic, &cmd);
 +}
 +
  static int hclge_update_port_info(struct hclge_dev *hdev)
  {
    struct hclge_mac *mac = &hdev->hw.mac;
@@@ -3153,7 -2992,7 +3153,7 @@@
/* get the port info from SFP cmd if not copper port */
    if (mac->media_type == HNAE3_MEDIA_TYPE_COPPER)
 -		return 0;
 +		return hclge_update_tp_port_info(hdev);
/* if IMP does not support get SFP/qSFP info, return directly */
    if (!hdev->support_sfp_query)
@@@ -3173,7 -3012,7 +3173,7 @@@
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
    	if (mac->speed_type == QUERY_ACTIVE_SPEED) {
 -			hclge_update_port_capability(mac);
 +			hclge_update_port_capability(hdev, mac);
    		return 0;
    	}
    	return hclge_cfg_mac_speed_dup(hdev, mac->speed,
@@@ -3358,7 -3197,7 +3358,7 @@@ static irqreturn_t hclge_misc_irq_handl
    	 * caused this event. Therefore, we will do below for now:
    	 * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we
    	 *    have defered type of reset to be used.
 -		 * 2. Schedule the reset serivce task.
 +		 * 2. Schedule the reset service task.
    	 * 3. When service task receives  HNAE3_UNKNOWN_RESET type it
    	 *    will fetch the correct type of reset.  This would be done
    	 *    by first decoding the types of errors.
@@@ -3486,9 -3325,8 +3486,9 @@@ static void hclge_misc_irq_uninit(struc
  int hclge_notify_client(struct hclge_dev *hdev,
    		enum hnae3_reset_notify_type type)
  {
 +	struct hnae3_handle *handle = &hdev->vport[0].nic;
    struct hnae3_client *client = hdev->nic_client;
 -	u16 i;
 +	int ret;
if (!test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state) || !client)
    	return 0;
@@@ -3496,20 -3334,27 +3496,20 @@@
    if (!client->ops->reset_notify)
    	return -EOPNOTSUPP;
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 -		struct hnae3_handle *handle = &hdev->vport[i].nic;
 -		int ret;
 -
 -		ret = client->ops->reset_notify(handle, type);
 -		if (ret) {
 -			dev_err(&hdev->pdev->dev,
 -				"notify nic client failed %d(%d)\n", type, ret);
 -			return ret;
 -		}
 -	}
 +	ret = client->ops->reset_notify(handle, type);
 +	if (ret)
 +		dev_err(&hdev->pdev->dev, "notify nic client failed %d(%d)\n",
 +			type, ret);
-	return 0;
 +	return ret;
  }
static int hclge_notify_roce_client(struct hclge_dev *hdev,
    			    enum hnae3_reset_notify_type type)
  {
 +	struct hnae3_handle *handle = &hdev->vport[0].roce;
    struct hnae3_client *client = hdev->roce_client;
    int ret;
 -	u16 i;
if (!test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state) || !client)
    	return 0;
@@@ -3517,10 -3362,17 +3517,10 @@@
    if (!client->ops->reset_notify)
    	return -EOPNOTSUPP;
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 -		struct hnae3_handle *handle = &hdev->vport[i].roce;
 -
 -		ret = client->ops->reset_notify(handle, type);
 -		if (ret) {
 -			dev_err(&hdev->pdev->dev,
 -				"notify roce client failed %d(%d)",
 -				type, ret);
 -			return ret;
 -		}
 -	}
 +	ret = client->ops->reset_notify(handle, type);
 +	if (ret)
 +		dev_err(&hdev->pdev->dev, "notify roce client failed %d(%d)",
 +			type, ret);
return ret;
  }
@@@ -3588,7 -3440,7 +3588,7 @@@ static int hclge_set_all_vf_rst(struct 
  {
    int i;
-	for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++) {
 +	for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++) {
    	struct hclge_vport *vport = &hdev->vport[i];
    	int ret;
@@@ -3669,12 -3521,14 +3669,12 @@@ void hclge_report_hw_error(struct hclge
    		   enum hnae3_hw_error_type type)
  {
    struct hnae3_client *client = hdev->nic_client;
 -	u16 i;
if (!client || !client->ops->process_hw_error ||
        !test_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state))
    	return;
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++)
 -		client->ops->process_hw_error(&hdev->vport[i].nic, type);
 +	client->ops->process_hw_error(&hdev->vport[0].nic, type);
  }
static void hclge_handle_imp_error(struct hclge_dev *hdev)
@@@ -3940,21 -3794,6 +3940,21 @@@ static bool hclge_reset_err_handle(stru
    return false;
  }
+static void hclge_update_reset_level(struct hclge_dev *hdev)
 +{
 +	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 +	enum hnae3_reset_type reset_level;
 +
 +	/* if default_reset_request has a higher level reset request,
 +	 * it should be handled as soon as possible. since some errors
 +	 * need this kind of reset to fix.
 +	 */
 +	reset_level = hclge_get_reset_level(ae_dev,
 +					    &hdev->default_reset_request);
 +	if (reset_level != HNAE3_NONE_RESET)
 +		set_bit(reset_level, &hdev->reset_request);
 +}
 +
  static int hclge_set_rst_done(struct hclge_dev *hdev)
  {
    struct hclge_pf_rst_done_cmd *req;
@@@ -4042,6 -3881,8 +4042,6 @@@ static int hclge_reset_prepare(struct h
static int hclge_reset_rebuild(struct hclge_dev *hdev)
  {
 -	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 -	enum hnae3_reset_type reset_level;
    int ret;
hdev->rst_stats.hw_reset_done_cnt++;
@@@ -4085,7 -3926,14 +4085,7 @@@
    hdev->rst_stats.reset_done_cnt++;
    clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
-	/* if default_reset_request has a higher level reset request,
 -	 * it should be handled as soon as possible. since some errors
 -	 * need this kind of reset to fix.
 -	 */
 -	reset_level = hclge_get_reset_level(ae_dev,
 -					    &hdev->default_reset_request);
 -	if (reset_level != HNAE3_NONE_RESET)
 -		set_bit(reset_level, &hdev->reset_request);
 +	hclge_update_reset_level(hdev);
return 0;
  }
@@@ -4118,7 -3966,6 +4118,6 @@@ static void hclge_reset_event(struct pc
     *    normalcy is to reset.
     * 2. A new reset request from the stack due to timeout
     *
- 	 * For the first case,error event might not have ae handle available.
     * check if this is a new reset request and we are not here just because
     * last reset attempt did not succeed and watchdog hit us again. We will
     * know this if last reset request did not occur very recently (watchdog
@@@ -4128,14 -3975,14 +4127,14 @@@
     * want to make sure we throttle the reset request. Therefore, we will
     * not allow it again before 3*HZ times.
     */
- 	if (!handle)
- 		handle = &hdev->vport[0].nic;
if (time_before(jiffies, (hdev->last_reset_time +
    			  HCLGE_RESET_INTERVAL))) {
    	mod_timer(&hdev->reset_timer, jiffies + HCLGE_RESET_INTERVAL);
    	return;
- 	} else if (hdev->default_reset_request) {
+ 	}
+ 
+ 	if (hdev->default_reset_request) {
    	hdev->reset_level =
    		hclge_get_reset_level(ae_dev,
    				      &hdev->default_reset_request);
@@@ -4247,7 -4094,6 +4246,7 @@@ static void hclge_periodic_service_task
    hclge_update_link_status(hdev);
    hclge_sync_mac_table(hdev);
    hclge_sync_promisc_mode(hdev);
 +	hclge_sync_fd_table(hdev);
if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
    	delta = jiffies - hdev->last_serv_processed;
@@@ -4892,44 -4738,58 +4891,44 @@@ int hclge_rss_init_hw(struct hclge_dev
void hclge_rss_indir_init_cfg(struct hclge_dev *hdev)
  {
 -	struct hclge_vport *vport = hdev->vport;
 -	int i, j;
 +	struct hclge_vport *vport = &hdev->vport[0];
 +	int i;
-	for (j = 0; j < hdev->num_vmdq_vport + 1; j++) {
 -		for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++)
 -			vport[j].rss_indirection_tbl[i] =
 -				i % vport[j].alloc_rss_size;
 -	}
 +	for (i = 0; i < hdev->ae_dev->dev_specs.rss_ind_tbl_size; i++)
 +		vport->rss_indirection_tbl[i] = i % vport->alloc_rss_size;
  }
static int hclge_rss_init_cfg(struct hclge_dev *hdev)
  {
    u16 rss_ind_tbl_size = hdev->ae_dev->dev_specs.rss_ind_tbl_size;
 -	int i, rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
 -	struct hclge_vport *vport = hdev->vport;
 +	int rss_algo = HCLGE_RSS_HASH_ALGO_TOEPLITZ;
 +	struct hclge_vport *vport = &hdev->vport[0];
 +	u16 *rss_ind_tbl;
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
    	rss_algo = HCLGE_RSS_HASH_ALGO_SIMPLE;
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 -		u16 *rss_ind_tbl;
 -
 -		vport[i].rss_tuple_sets.ipv4_tcp_en =
 -			HCLGE_RSS_INPUT_TUPLE_OTHER;
 -		vport[i].rss_tuple_sets.ipv4_udp_en =
 -			HCLGE_RSS_INPUT_TUPLE_OTHER;
 -		vport[i].rss_tuple_sets.ipv4_sctp_en =
 -			HCLGE_RSS_INPUT_TUPLE_SCTP;
 -		vport[i].rss_tuple_sets.ipv4_fragment_en =
 -			HCLGE_RSS_INPUT_TUPLE_OTHER;
 -		vport[i].rss_tuple_sets.ipv6_tcp_en =
 -			HCLGE_RSS_INPUT_TUPLE_OTHER;
 -		vport[i].rss_tuple_sets.ipv6_udp_en =
 -			HCLGE_RSS_INPUT_TUPLE_OTHER;
 -		vport[i].rss_tuple_sets.ipv6_sctp_en =
 -			hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ?
 -			HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT :
 -			HCLGE_RSS_INPUT_TUPLE_SCTP;
 -		vport[i].rss_tuple_sets.ipv6_fragment_en =
 -			HCLGE_RSS_INPUT_TUPLE_OTHER;
 -
 -		vport[i].rss_algo = rss_algo;
 -
 -		rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size,
 -					   sizeof(*rss_ind_tbl), GFP_KERNEL);
 -		if (!rss_ind_tbl)
 -			return -ENOMEM;
 +	vport->rss_tuple_sets.ipv4_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
 +	vport->rss_tuple_sets.ipv4_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
 +	vport->rss_tuple_sets.ipv4_sctp_en = HCLGE_RSS_INPUT_TUPLE_SCTP;
 +	vport->rss_tuple_sets.ipv4_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
 +	vport->rss_tuple_sets.ipv6_tcp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
 +	vport->rss_tuple_sets.ipv6_udp_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
 +	vport->rss_tuple_sets.ipv6_sctp_en =
 +		hdev->ae_dev->dev_version <= HNAE3_DEVICE_VERSION_V2 ?
 +		HCLGE_RSS_INPUT_TUPLE_SCTP_NO_PORT :
 +		HCLGE_RSS_INPUT_TUPLE_SCTP;
 +	vport->rss_tuple_sets.ipv6_fragment_en = HCLGE_RSS_INPUT_TUPLE_OTHER;
 +
 +	vport->rss_algo = rss_algo;
 +
 +	rss_ind_tbl = devm_kcalloc(&hdev->pdev->dev, rss_ind_tbl_size,
 +				   sizeof(*rss_ind_tbl), GFP_KERNEL);
 +	if (!rss_ind_tbl)
 +		return -ENOMEM;
-		vport[i].rss_indirection_tbl = rss_ind_tbl;
 -		memcpy(vport[i].rss_hash_key, hclge_hash_key,
 -		       HCLGE_RSS_KEY_SIZE);
 -	}
 +	vport->rss_indirection_tbl = rss_ind_tbl;
 +	memcpy(vport->rss_hash_key, hclge_hash_key, HCLGE_RSS_KEY_SIZE);
hclge_rss_indir_init_cfg(hdev);
@@@ -5135,285 -4995,6 +5134,285 @@@ static void hclge_request_update_promis
    set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
  }
+static void hclge_sync_fd_state(struct hclge_dev *hdev)
 +{
 +	if (hlist_empty(&hdev->fd_rule_list))
 +		hdev->fd_active_type = HCLGE_FD_RULE_NONE;
 +}
 +
 +static void hclge_fd_inc_rule_cnt(struct hclge_dev *hdev, u16 location)
 +{
 +	if (!test_bit(location, hdev->fd_bmap)) {
 +		set_bit(location, hdev->fd_bmap);
 +		hdev->hclge_fd_rule_num++;
 +	}
 +}
 +
 +static void hclge_fd_dec_rule_cnt(struct hclge_dev *hdev, u16 location)
 +{
 +	if (test_bit(location, hdev->fd_bmap)) {
 +		clear_bit(location, hdev->fd_bmap);
 +		hdev->hclge_fd_rule_num--;
 +	}
 +}
 +
 +static void hclge_fd_free_node(struct hclge_dev *hdev,
 +			       struct hclge_fd_rule *rule)
 +{
 +	hlist_del(&rule->rule_node);
 +	kfree(rule);
 +	hclge_sync_fd_state(hdev);
 +}
 +
 +static void hclge_update_fd_rule_node(struct hclge_dev *hdev,
 +				      struct hclge_fd_rule *old_rule,
 +				      struct hclge_fd_rule *new_rule,
 +				      enum HCLGE_FD_NODE_STATE state)
 +{
 +	switch (state) {
 +	case HCLGE_FD_TO_ADD:
 +	case HCLGE_FD_ACTIVE:
 +		/* 1) if the new state is TO_ADD, just replace the old rule
 +		 * with the same location, no matter its state, because the
 +		 * new rule will be configured to the hardware.
 +		 * 2) if the new state is ACTIVE, it means the new rule
 +		 * has been configured to the hardware, so just replace
 +		 * the old rule node with the same location.
 +		 * 3) for it doesn't add a new node to the list, so it's
 +		 * unnecessary to update the rule number and fd_bmap.
 +		 */
 +		new_rule->rule_node.next = old_rule->rule_node.next;
 +		new_rule->rule_node.pprev = old_rule->rule_node.pprev;
 +		memcpy(old_rule, new_rule, sizeof(*old_rule));
 +		kfree(new_rule);
 +		break;
 +	case HCLGE_FD_DELETED:
 +		hclge_fd_dec_rule_cnt(hdev, old_rule->location);
 +		hclge_fd_free_node(hdev, old_rule);
 +		break;
 +	case HCLGE_FD_TO_DEL:
 +		/* if new request is TO_DEL, and old rule is existent
 +		 * 1) the state of old rule is TO_DEL, we need do nothing,
 +		 * because we delete rule by location, other rule content
 +		 * is unncessary.
 +		 * 2) the state of old rule is ACTIVE, we need to change its
 +		 * state to TO_DEL, so the rule will be deleted when periodic
 +		 * task being scheduled.
 +		 * 3) the state of old rule is TO_ADD, it means the rule hasn't
 +		 * been added to hardware, so we just delete the rule node from
 +		 * fd_rule_list directly.
 +		 */
 +		if (old_rule->state == HCLGE_FD_TO_ADD) {
 +			hclge_fd_dec_rule_cnt(hdev, old_rule->location);
 +			hclge_fd_free_node(hdev, old_rule);
 +			return;
 +		}
 +		old_rule->state = HCLGE_FD_TO_DEL;
 +		break;
 +	}
 +}
 +
 +static struct hclge_fd_rule *hclge_find_fd_rule(struct hlist_head *hlist,
 +						u16 location,
 +						struct hclge_fd_rule **parent)
 +{
 +	struct hclge_fd_rule *rule;
 +	struct hlist_node *node;
 +
 +	hlist_for_each_entry_safe(rule, node, hlist, rule_node) {
 +		if (rule->location == location)
 +			return rule;
 +		else if (rule->location > location)
 +			return NULL;
 +		/* record the parent node, use to keep the nodes in fd_rule_list
 +		 * in ascend order.
 +		 */
 +		*parent = rule;
 +	}
 +
 +	return NULL;
 +}
 +
 +/* insert fd rule node in ascend order according to rule->location */
 +static void hclge_fd_insert_rule_node(struct hlist_head *hlist,
 +				      struct hclge_fd_rule *rule,
 +				      struct hclge_fd_rule *parent)
 +{
 +	INIT_HLIST_NODE(&rule->rule_node);
 +
 +	if (parent)
 +		hlist_add_behind(&rule->rule_node, &parent->rule_node);
 +	else
 +		hlist_add_head(&rule->rule_node, hlist);
 +}
 +
 +static int hclge_fd_set_user_def_cmd(struct hclge_dev *hdev,
 +				     struct hclge_fd_user_def_cfg *cfg)
 +{
 +	struct hclge_fd_user_def_cfg_cmd *req;
 +	struct hclge_desc desc;
 +	u16 data = 0;
 +	int ret;
 +
 +	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_FD_USER_DEF_OP, false);
 +
 +	req = (struct hclge_fd_user_def_cfg_cmd *)desc.data;
 +
 +	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[0].ref_cnt > 0);
 +	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
 +			HCLGE_FD_USER_DEF_OFT_S, cfg[0].offset);
 +	req->ol2_cfg = cpu_to_le16(data);
 +
 +	data = 0;
 +	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[1].ref_cnt > 0);
 +	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
 +			HCLGE_FD_USER_DEF_OFT_S, cfg[1].offset);
 +	req->ol3_cfg = cpu_to_le16(data);
 +
 +	data = 0;
 +	hnae3_set_bit(data, HCLGE_FD_USER_DEF_EN_B, cfg[2].ref_cnt > 0);
 +	hnae3_set_field(data, HCLGE_FD_USER_DEF_OFT_M,
 +			HCLGE_FD_USER_DEF_OFT_S, cfg[2].offset);
 +	req->ol4_cfg = cpu_to_le16(data);
 +
 +	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 +	if (ret)
 +		dev_err(&hdev->pdev->dev,
 +			"failed to set fd user def data, ret= %d\n", ret);
 +	return ret;
 +}
 +
 +static void hclge_sync_fd_user_def_cfg(struct hclge_dev *hdev, bool locked)
 +{
 +	int ret;
 +
 +	if (!test_and_clear_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state))
 +		return;
 +
 +	if (!locked)
 +		spin_lock_bh(&hdev->fd_rule_lock);
 +
 +	ret = hclge_fd_set_user_def_cmd(hdev, hdev->fd_cfg.user_def_cfg);
 +	if (ret)
 +		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
 +
 +	if (!locked)
 +		spin_unlock_bh(&hdev->fd_rule_lock);
 +}
 +
 +static int hclge_fd_check_user_def_refcnt(struct hclge_dev *hdev,
 +					  struct hclge_fd_rule *rule)
 +{
 +	struct hlist_head *hlist = &hdev->fd_rule_list;
 +	struct hclge_fd_rule *fd_rule, *parent = NULL;
 +	struct hclge_fd_user_def_info *info, *old_info;
 +	struct hclge_fd_user_def_cfg *cfg;
 +
 +	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
 +	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
 +		return 0;
 +
 +	/* for valid layer is start from 1, so need minus 1 to get the cfg */
 +	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
 +	info = &rule->ep.user_def;
 +
 +	if (!cfg->ref_cnt || cfg->offset == info->offset)
 +		return 0;
 +
 +	if (cfg->ref_cnt > 1)
 +		goto error;
 +
 +	fd_rule = hclge_find_fd_rule(hlist, rule->location, &parent);
 +	if (fd_rule) {
 +		old_info = &fd_rule->ep.user_def;
 +		if (info->layer == old_info->layer)
 +			return 0;
 +	}
 +
 +error:
 +	dev_err(&hdev->pdev->dev,
 +		"No available offset for layer%d fd rule, each layer only support one user def offset.\n",
 +		info->layer + 1);
 +	return -ENOSPC;
 +}
 +
 +static void hclge_fd_inc_user_def_refcnt(struct hclge_dev *hdev,
 +					 struct hclge_fd_rule *rule)
 +{
 +	struct hclge_fd_user_def_cfg *cfg;
 +
 +	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
 +	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
 +		return;
 +
 +	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
 +	if (!cfg->ref_cnt) {
 +		cfg->offset = rule->ep.user_def.offset;
 +		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
 +	}
 +	cfg->ref_cnt++;
 +}
 +
 +static void hclge_fd_dec_user_def_refcnt(struct hclge_dev *hdev,
 +					 struct hclge_fd_rule *rule)
 +{
 +	struct hclge_fd_user_def_cfg *cfg;
 +
 +	if (!rule || rule->rule_type != HCLGE_FD_EP_ACTIVE ||
 +	    rule->ep.user_def.layer == HCLGE_FD_USER_DEF_NONE)
 +		return;
 +
 +	cfg = &hdev->fd_cfg.user_def_cfg[rule->ep.user_def.layer - 1];
 +	if (!cfg->ref_cnt)
 +		return;
 +
 +	cfg->ref_cnt--;
 +	if (!cfg->ref_cnt) {
 +		cfg->offset = 0;
 +		set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
 +	}
 +}
 +
 +static void hclge_update_fd_list(struct hclge_dev *hdev,
 +				 enum HCLGE_FD_NODE_STATE state, u16 location,
 +				 struct hclge_fd_rule *new_rule)
 +{
 +	struct hlist_head *hlist = &hdev->fd_rule_list;
 +	struct hclge_fd_rule *fd_rule, *parent = NULL;
 +
 +	fd_rule = hclge_find_fd_rule(hlist, location, &parent);
 +	if (fd_rule) {
 +		hclge_fd_dec_user_def_refcnt(hdev, fd_rule);
 +		if (state == HCLGE_FD_ACTIVE)
 +			hclge_fd_inc_user_def_refcnt(hdev, new_rule);
 +		hclge_sync_fd_user_def_cfg(hdev, true);
 +
 +		hclge_update_fd_rule_node(hdev, fd_rule, new_rule, state);
 +		return;
 +	}
 +
 +	/* it's unlikely to fail here, because we have checked the rule
 +	 * exist before.
 +	 */
 +	if (unlikely(state == HCLGE_FD_TO_DEL || state == HCLGE_FD_DELETED)) {
 +		dev_warn(&hdev->pdev->dev,
 +			 "failed to delete fd rule %u, it's inexistent\n",
 +			 location);
 +		return;
 +	}
 +
 +	hclge_fd_inc_user_def_refcnt(hdev, new_rule);
 +	hclge_sync_fd_user_def_cfg(hdev, true);
 +
 +	hclge_fd_insert_rule_node(hlist, new_rule, parent);
 +	hclge_fd_inc_rule_cnt(hdev, new_rule->location);
 +
 +	if (state == HCLGE_FD_TO_ADD) {
 +		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 +		hclge_task_schedule(hdev, 0);
 +	}
 +}
 +
  static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
  {
    struct hclge_get_fd_mode_cmd *req;
@@@ -5492,17 -5073,6 +5491,17 @@@ static int hclge_set_fd_key_config(stru
    return ret;
  }
+static void hclge_fd_disable_user_def(struct hclge_dev *hdev)
 +{
 +	struct hclge_fd_user_def_cfg *cfg = hdev->fd_cfg.user_def_cfg;
 +
 +	spin_lock_bh(&hdev->fd_rule_lock);
 +	memset(cfg, 0, sizeof(hdev->fd_cfg.user_def_cfg));
 +	spin_unlock_bh(&hdev->fd_rule_lock);
 +
 +	hclge_fd_set_user_def_cmd(hdev, cfg);
 +}
 +
  static int hclge_init_fd_config(struct hclge_dev *hdev)
  {
  #define LOW_2_WORDS		0x03
@@@ -5543,12 -5113,9 +5542,12 @@@
    			BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
/* If use max 400bit key, we can support tuples for ether type */
 -	if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1)
 +	if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) {
    	key_cfg->tuple_active |=
    			BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC);
 +		if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V3)
 +			key_cfg->tuple_active |= HCLGE_FD_TUPLE_USER_DEF_TUPLES;
 +	}
/* roce_type is used to filter roce frames
     * dst_vport is used to specify the rule
@@@ -5657,57 -5224,96 +5656,57 @@@ static int hclge_fd_ad_config(struct hc
  static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
    			   struct hclge_fd_rule *rule)
  {
 +	int offset, moffset, ip_offset;
 +	enum HCLGE_FD_KEY_OPT key_opt;
    u16 tmp_x_s, tmp_y_s;
    u32 tmp_x_l, tmp_y_l;
 +	u8 *p = (u8 *)rule;
    int i;
-	if (rule->unused_tuple & tuple_bit)
 +	if (rule->unused_tuple & BIT(tuple_bit))
    	return true;
-	switch (tuple_bit) {
 -	case BIT(INNER_DST_MAC):
 -		for (i = 0; i < ETH_ALEN; i++) {
 -			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
 -			       rule->tuples_mask.dst_mac[i]);
 -			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
 -			       rule->tuples_mask.dst_mac[i]);
 -		}
 +	key_opt = tuple_key_info[tuple_bit].key_opt;
 +	offset = tuple_key_info[tuple_bit].offset;
 +	moffset = tuple_key_info[tuple_bit].moffset;
-		return true;
 -	case BIT(INNER_SRC_MAC):
 -		for (i = 0; i < ETH_ALEN; i++) {
 -			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
 -			       rule->tuples_mask.src_mac[i]);
 -			calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
 -			       rule->tuples_mask.src_mac[i]);
 -		}
 +	switch (key_opt) {
 +	case KEY_OPT_U8:
 +		calc_x(*key_x, p[offset], p[moffset]);
 +		calc_y(*key_y, p[offset], p[moffset]);
return true;
 -	case BIT(INNER_VLAN_TAG_FST):
 -		calc_x(tmp_x_s, rule->tuples.vlan_tag1,
 -		       rule->tuples_mask.vlan_tag1);
 -		calc_y(tmp_y_s, rule->tuples.vlan_tag1,
 -		       rule->tuples_mask.vlan_tag1);
 +	case KEY_OPT_LE16:
 +		calc_x(tmp_x_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset]));
 +		calc_y(tmp_y_s, *(u16 *)(&p[offset]), *(u16 *)(&p[moffset]));
    	*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
    	*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
return true;
 -	case BIT(INNER_ETH_TYPE):
 -		calc_x(tmp_x_s, rule->tuples.ether_proto,
 -		       rule->tuples_mask.ether_proto);
 -		calc_y(tmp_y_s, rule->tuples.ether_proto,
 -		       rule->tuples_mask.ether_proto);
 -		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
 -		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
 -
 -		return true;
 -	case BIT(INNER_IP_TOS):
 -		calc_x(*key_x, rule->tuples.ip_tos, rule->tuples_mask.ip_tos);
 -		calc_y(*key_y, rule->tuples.ip_tos, rule->tuples_mask.ip_tos);
 -
 -		return true;
 -	case BIT(INNER_IP_PROTO):
 -		calc_x(*key_x, rule->tuples.ip_proto,
 -		       rule->tuples_mask.ip_proto);
 -		calc_y(*key_y, rule->tuples.ip_proto,
 -		       rule->tuples_mask.ip_proto);
 -
 -		return true;
 -	case BIT(INNER_SRC_IP):
 -		calc_x(tmp_x_l, rule->tuples.src_ip[IPV4_INDEX],
 -		       rule->tuples_mask.src_ip[IPV4_INDEX]);
 -		calc_y(tmp_y_l, rule->tuples.src_ip[IPV4_INDEX],
 -		       rule->tuples_mask.src_ip[IPV4_INDEX]);
 -		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
 -		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
 -
 -		return true;
 -	case BIT(INNER_DST_IP):
 -		calc_x(tmp_x_l, rule->tuples.dst_ip[IPV4_INDEX],
 -		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
 -		calc_y(tmp_y_l, rule->tuples.dst_ip[IPV4_INDEX],
 -		       rule->tuples_mask.dst_ip[IPV4_INDEX]);
 +	case KEY_OPT_LE32:
 +		calc_x(tmp_x_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset]));
 +		calc_y(tmp_y_l, *(u32 *)(&p[offset]), *(u32 *)(&p[moffset]));
    	*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
    	*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
return true;
 -	case BIT(INNER_SRC_PORT):
 -		calc_x(tmp_x_s, rule->tuples.src_port,
 -		       rule->tuples_mask.src_port);
 -		calc_y(tmp_y_s, rule->tuples.src_port,
 -		       rule->tuples_mask.src_port);
 -		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
 -		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
 +	case KEY_OPT_MAC:
 +		for (i = 0; i < ETH_ALEN; i++) {
 +			calc_x(key_x[ETH_ALEN - 1 - i], p[offset + i],
 +			       p[moffset + i]);
 +			calc_y(key_y[ETH_ALEN - 1 - i], p[offset + i],
 +			       p[moffset + i]);
 +		}
return true;
 -	case BIT(INNER_DST_PORT):
 -		calc_x(tmp_x_s, rule->tuples.dst_port,
 -		       rule->tuples_mask.dst_port);
 -		calc_y(tmp_y_s, rule->tuples.dst_port,
 -		       rule->tuples_mask.dst_port);
 -		*(__le16 *)key_x = cpu_to_le16(tmp_x_s);
 -		*(__le16 *)key_y = cpu_to_le16(tmp_y_s);
 +	case KEY_OPT_IP:
 +		ip_offset = IPV4_INDEX * sizeof(u32);
 +		calc_x(tmp_x_l, *(u32 *)(&p[offset + ip_offset]),
 +		       *(u32 *)(&p[moffset + ip_offset]));
 +		calc_y(tmp_y_l, *(u32 *)(&p[offset + ip_offset]),
 +		       *(u32 *)(&p[moffset + ip_offset]));
 +		*(__le32 *)key_x = cpu_to_le32(tmp_x_l);
 +		*(__le32 *)key_y = cpu_to_le32(tmp_y_l);
return true;
    default:
@@@ -5795,12 -5401,12 +5794,12 @@@ static int hclge_config_key(struct hclg
for (i = 0 ; i < MAX_TUPLE; i++) {
    	bool tuple_valid;
 -		u32 check_tuple;
tuple_size = tuple_key_info[i].key_length / 8;
 -		check_tuple = key_cfg->tuple_active & BIT(i);
 +		if (!(key_cfg->tuple_active & BIT(i)))
 +			continue;
-		tuple_valid = hclge_fd_convert_tuple(check_tuple, cur_key_x,
 +		tuple_valid = hclge_fd_convert_tuple(i, cur_key_x,
    					     cur_key_y, rule);
    	if (tuple_valid) {
    		cur_key_x += tuple_size;
@@@ -5931,7 -5537,8 +5930,7 @@@ static int hclge_fd_check_tcpip6_tuple(
    if (!spec || !unused_tuple)
    	return -EINVAL;
-	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
 -		BIT(INNER_IP_TOS);
 +	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
/* check whether src/dst ip address used */
    if (ipv6_addr_any((struct in6_addr *)spec->ip6src))
@@@ -5946,8 -5553,8 +5945,8 @@@
    if (!spec->pdst)
    	*unused_tuple |= BIT(INNER_DST_PORT);
-	if (spec->tclass)
 -		return -EOPNOTSUPP;
 +	if (!spec->tclass)
 +		*unused_tuple |= BIT(INNER_IP_TOS);
return 0;
  }
@@@ -5959,7 -5566,7 +5958,7 @@@ static int hclge_fd_check_ip6_tuple(str
    	return -EINVAL;
*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
 -		BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
 +			BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
/* check whether src/dst ip address used */
    if (ipv6_addr_any((struct in6_addr *)spec->ip6src))
@@@ -5971,8 -5578,8 +5970,8 @@@
    if (!spec->l4_proto)
    	*unused_tuple |= BIT(INNER_IP_PROTO);
-	if (spec->tclass)
 -		return -EOPNOTSUPP;
 +	if (!spec->tclass)
 +		*unused_tuple |= BIT(INNER_IP_TOS);
if (spec->l4_4_bytes)
    	return -EOPNOTSUPP;
@@@ -6042,98 -5649,9 +6041,98 @@@ static int hclge_fd_check_ext_tuple(str
    return 0;
  }
+static int hclge_fd_get_user_def_layer(u32 flow_type, u32 *unused_tuple,
 +				       struct hclge_fd_user_def_info *info)
 +{
 +	switch (flow_type) {
 +	case ETHER_FLOW:
 +		info->layer = HCLGE_FD_USER_DEF_L2;
 +		*unused_tuple &= ~BIT(INNER_L2_RSV);
 +		break;
 +	case IP_USER_FLOW:
 +	case IPV6_USER_FLOW:
 +		info->layer = HCLGE_FD_USER_DEF_L3;
 +		*unused_tuple &= ~BIT(INNER_L3_RSV);
 +		break;
 +	case TCP_V4_FLOW:
 +	case UDP_V4_FLOW:
 +	case TCP_V6_FLOW:
 +	case UDP_V6_FLOW:
 +		info->layer = HCLGE_FD_USER_DEF_L4;
 +		*unused_tuple &= ~BIT(INNER_L4_RSV);
 +		break;
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +
 +	return 0;
 +}
 +
 +static bool hclge_fd_is_user_def_all_masked(struct ethtool_rx_flow_spec *fs)
 +{
 +	return be32_to_cpu(fs->m_ext.data[1] | fs->m_ext.data[0]) == 0;
 +}
 +
 +static int hclge_fd_parse_user_def_field(struct hclge_dev *hdev,
 +					 struct ethtool_rx_flow_spec *fs,
 +					 u32 *unused_tuple,
 +					 struct hclge_fd_user_def_info *info)
 +{
 +	u32 tuple_active = hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1].tuple_active;
 +	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
 +	u16 data, offset, data_mask, offset_mask;
 +	int ret;
 +
 +	info->layer = HCLGE_FD_USER_DEF_NONE;
 +	*unused_tuple |= HCLGE_FD_TUPLE_USER_DEF_TUPLES;
 +
 +	if (!(fs->flow_type & FLOW_EXT) || hclge_fd_is_user_def_all_masked(fs))
 +		return 0;
 +
 +	/* user-def data from ethtool is 64 bit value, the bit0~15 is used
 +	 * for data, and bit32~47 is used for offset.
 +	 */
 +	data = be32_to_cpu(fs->h_ext.data[1]) & HCLGE_FD_USER_DEF_DATA;
 +	data_mask = be32_to_cpu(fs->m_ext.data[1]) & HCLGE_FD_USER_DEF_DATA;
 +	offset = be32_to_cpu(fs->h_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET;
 +	offset_mask = be32_to_cpu(fs->m_ext.data[0]) & HCLGE_FD_USER_DEF_OFFSET;
 +
 +	if (!(tuple_active & HCLGE_FD_TUPLE_USER_DEF_TUPLES)) {
 +		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
 +		return -EOPNOTSUPP;
 +	}
 +
 +	if (offset > HCLGE_FD_MAX_USER_DEF_OFFSET) {
 +		dev_err(&hdev->pdev->dev,
 +			"user-def offset[%u] should be no more than %u\n",
 +			offset, HCLGE_FD_MAX_USER_DEF_OFFSET);
 +		return -EINVAL;
 +	}
 +
 +	if (offset_mask != HCLGE_FD_USER_DEF_OFFSET_UNMASK) {
 +		dev_err(&hdev->pdev->dev, "user-def offset can't be masked\n");
 +		return -EINVAL;
 +	}
 +
 +	ret = hclge_fd_get_user_def_layer(flow_type, unused_tuple, info);
 +	if (ret) {
 +		dev_err(&hdev->pdev->dev,
 +			"unsupported flow type for user-def bytes, ret = %d\n",
 +			ret);
 +		return ret;
 +	}
 +
 +	info->data = data;
 +	info->data_mask = data_mask;
 +	info->offset = offset;
 +
 +	return 0;
 +}
 +
  static int hclge_fd_check_spec(struct hclge_dev *hdev,
    		       struct ethtool_rx_flow_spec *fs,
 -			       u32 *unused_tuple)
 +			       u32 *unused_tuple,
 +			       struct hclge_fd_user_def_info *info)
  {
    u32 flow_type;
    int ret;
@@@ -6146,9 -5664,11 +6145,9 @@@
    	return -EINVAL;
    }
-	if ((fs->flow_type & FLOW_EXT) &&
 -	    (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) {
 -		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
 -		return -EOPNOTSUPP;
 -	}
 +	ret = hclge_fd_parse_user_def_field(hdev, fs, unused_tuple, info);
 +	if (ret)
 +		return ret;
flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
    switch (flow_type) {
@@@ -6200,194 -5720,217 +6199,194 @@@
    return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple);
  }
-static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location)
 +static void hclge_fd_get_tcpip4_tuple(struct hclge_dev *hdev,
 +				      struct ethtool_rx_flow_spec *fs,
 +				      struct hclge_fd_rule *rule, u8 ip_proto)
  {
 -	struct hclge_fd_rule *rule = NULL;
 -	struct hlist_node *node2;
 -
 -	spin_lock_bh(&hdev->fd_rule_lock);
 -	hlist_for_each_entry_safe(rule, node2, &hdev->fd_rule_list, rule_node) {
 -		if (rule->location >= location)
 -			break;
 -	}
 -
 -	spin_unlock_bh(&hdev->fd_rule_lock);
 +	rule->tuples.src_ip[IPV4_INDEX] =
 +			be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src);
 +	rule->tuples_mask.src_ip[IPV4_INDEX] =
 +			be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
-	return  rule && rule->location == location;
 -}
 +	rule->tuples.dst_ip[IPV4_INDEX] =
 +			be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst);
 +	rule->tuples_mask.dst_ip[IPV4_INDEX] =
 +			be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
-/* make sure being called after lock up with fd_rule_lock */
 -static int hclge_fd_update_rule_list(struct hclge_dev *hdev,
 -				     struct hclge_fd_rule *new_rule,
 -				     u16 location,
 -				     bool is_add)
 -{
 -	struct hclge_fd_rule *rule = NULL, *parent = NULL;
 -	struct hlist_node *node2;
 +	rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc);
 +	rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
-	if (is_add && !new_rule)
 -		return -EINVAL;
 +	rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst);
 +	rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
-	hlist_for_each_entry_safe(rule, node2,
 -				  &hdev->fd_rule_list, rule_node) {
 -		if (rule->location >= location)
 -			break;
 -		parent = rule;
 -	}
 +	rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos;
 +	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
-	if (rule && rule->location == location) {
 -		hlist_del(&rule->rule_node);
 -		kfree(rule);
 -		hdev->hclge_fd_rule_num--;
 +	rule->tuples.ether_proto = ETH_P_IP;
 +	rule->tuples_mask.ether_proto = 0xFFFF;
-		if (!is_add) {
 -			if (!hdev->hclge_fd_rule_num)
 -				hdev->fd_active_type = HCLGE_FD_RULE_NONE;
 -			clear_bit(location, hdev->fd_bmap);
 +	rule->tuples.ip_proto = ip_proto;
 +	rule->tuples_mask.ip_proto = 0xFF;
 +}
-			return 0;
 -		}
 -	} else if (!is_add) {
 -		dev_err(&hdev->pdev->dev,
 -			"delete fail, rule %u is inexistent\n",
 -			location);
 -		return -EINVAL;
 -	}
 +static void hclge_fd_get_ip4_tuple(struct hclge_dev *hdev,
 +				   struct ethtool_rx_flow_spec *fs,
 +				   struct hclge_fd_rule *rule)
 +{
 +	rule->tuples.src_ip[IPV4_INDEX] =
 +			be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src);
 +	rule->tuples_mask.src_ip[IPV4_INDEX] =
 +			be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
-	INIT_HLIST_NODE(&new_rule->rule_node);
 +	rule->tuples.dst_ip[IPV4_INDEX] =
 +			be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst);
 +	rule->tuples_mask.dst_ip[IPV4_INDEX] =
 +			be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
-	if (parent)
 -		hlist_add_behind(&new_rule->rule_node, &parent->rule_node);
 -	else
 -		hlist_add_head(&new_rule->rule_node, &hdev->fd_rule_list);
 +	rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos;
 +	rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
-	set_bit(location, hdev->fd_bmap);
 -	hdev->hclge_fd_rule_num++;
 -	hdev->fd_active_type = new_rule->rule_type;
 +	rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto;
 +	rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
-	return 0;
 +	rule->tuples.ether_proto = ETH_P_IP;
 +	rule->tuples_mask.ether_proto = 0xFFFF;
  }
-static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 -			      struct ethtool_rx_flow_spec *fs,
 -			      struct hclge_fd_rule *rule)
 +static void hclge_fd_get_tcpip6_tuple(struct hclge_dev *hdev,
 +				      struct ethtool_rx_flow_spec *fs,
 +				      struct hclge_fd_rule *rule, u8 ip_proto)
  {
 -	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
 -
 -	switch (flow_type) {
 -	case SCTP_V4_FLOW:
 -	case TCP_V4_FLOW:
 -	case UDP_V4_FLOW:
 -		rule->tuples.src_ip[IPV4_INDEX] =
 -				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4src);
 -		rule->tuples_mask.src_ip[IPV4_INDEX] =
 -				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4src);
 +	be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.tcp_ip6_spec.ip6src,
 +			  IPV6_SIZE);
 +	be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.tcp_ip6_spec.ip6src,
 +			  IPV6_SIZE);
-		rule->tuples.dst_ip[IPV4_INDEX] =
 -				be32_to_cpu(fs->h_u.tcp_ip4_spec.ip4dst);
 -		rule->tuples_mask.dst_ip[IPV4_INDEX] =
 -				be32_to_cpu(fs->m_u.tcp_ip4_spec.ip4dst);
 +	be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.tcp_ip6_spec.ip6dst,
 +			  IPV6_SIZE);
 +	be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.tcp_ip6_spec.ip6dst,
 +			  IPV6_SIZE);
-		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.psrc);
 -		rule->tuples_mask.src_port =
 -				be16_to_cpu(fs->m_u.tcp_ip4_spec.psrc);
 +	rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
 +	rule->tuples_mask.src_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
-		rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip4_spec.pdst);
 -		rule->tuples_mask.dst_port =
 -				be16_to_cpu(fs->m_u.tcp_ip4_spec.pdst);
 +	rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst);
 +	rule->tuples_mask.dst_port = be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
-		rule->tuples.ip_tos = fs->h_u.tcp_ip4_spec.tos;
 -		rule->tuples_mask.ip_tos = fs->m_u.tcp_ip4_spec.tos;
 +	rule->tuples.ether_proto = ETH_P_IPV6;
 +	rule->tuples_mask.ether_proto = 0xFFFF;
-		rule->tuples.ether_proto = ETH_P_IP;
 -		rule->tuples_mask.ether_proto = 0xFFFF;
 +	rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass;
 +	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
-		break;
 -	case IP_USER_FLOW:
 -		rule->tuples.src_ip[IPV4_INDEX] =
 -				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4src);
 -		rule->tuples_mask.src_ip[IPV4_INDEX] =
 -				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4src);
 +	rule->tuples.ip_proto = ip_proto;
 +	rule->tuples_mask.ip_proto = 0xFF;
 +}
-		rule->tuples.dst_ip[IPV4_INDEX] =
 -				be32_to_cpu(fs->h_u.usr_ip4_spec.ip4dst);
 -		rule->tuples_mask.dst_ip[IPV4_INDEX] =
 -				be32_to_cpu(fs->m_u.usr_ip4_spec.ip4dst);
 +static void hclge_fd_get_ip6_tuple(struct hclge_dev *hdev,
 +				   struct ethtool_rx_flow_spec *fs,
 +				   struct hclge_fd_rule *rule)
 +{
 +	be32_to_cpu_array(rule->tuples.src_ip, fs->h_u.usr_ip6_spec.ip6src,
 +			  IPV6_SIZE);
 +	be32_to_cpu_array(rule->tuples_mask.src_ip, fs->m_u.usr_ip6_spec.ip6src,
 +			  IPV6_SIZE);
-		rule->tuples.ip_tos = fs->h_u.usr_ip4_spec.tos;
 -		rule->tuples_mask.ip_tos = fs->m_u.usr_ip4_spec.tos;
 +	be32_to_cpu_array(rule->tuples.dst_ip, fs->h_u.usr_ip6_spec.ip6dst,
 +			  IPV6_SIZE);
 +	be32_to_cpu_array(rule->tuples_mask.dst_ip, fs->m_u.usr_ip6_spec.ip6dst,
 +			  IPV6_SIZE);
-		rule->tuples.ip_proto = fs->h_u.usr_ip4_spec.proto;
 -		rule->tuples_mask.ip_proto = fs->m_u.usr_ip4_spec.proto;
 +	rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
 +	rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
-		rule->tuples.ether_proto = ETH_P_IP;
 -		rule->tuples_mask.ether_proto = 0xFFFF;
 +	rule->tuples.ip_tos = fs->h_u.tcp_ip6_spec.tclass;
 +	rule->tuples_mask.ip_tos = fs->m_u.tcp_ip6_spec.tclass;
-		break;
 -	case SCTP_V6_FLOW:
 -	case TCP_V6_FLOW:
 -	case UDP_V6_FLOW:
 -		be32_to_cpu_array(rule->tuples.src_ip,
 -				  fs->h_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
 -		be32_to_cpu_array(rule->tuples_mask.src_ip,
 -				  fs->m_u.tcp_ip6_spec.ip6src, IPV6_SIZE);
 -
 -		be32_to_cpu_array(rule->tuples.dst_ip,
 -				  fs->h_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
 -		be32_to_cpu_array(rule->tuples_mask.dst_ip,
 -				  fs->m_u.tcp_ip6_spec.ip6dst, IPV6_SIZE);
 +	rule->tuples.ether_proto = ETH_P_IPV6;
 +	rule->tuples_mask.ether_proto = 0xFFFF;
 +}
-		rule->tuples.src_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.psrc);
 -		rule->tuples_mask.src_port =
 -				be16_to_cpu(fs->m_u.tcp_ip6_spec.psrc);
 +static void hclge_fd_get_ether_tuple(struct hclge_dev *hdev,
 +				     struct ethtool_rx_flow_spec *fs,
 +				     struct hclge_fd_rule *rule)
 +{
 +	ether_addr_copy(rule->tuples.src_mac, fs->h_u.ether_spec.h_source);
 +	ether_addr_copy(rule->tuples_mask.src_mac, fs->m_u.ether_spec.h_source);
-		rule->tuples.dst_port = be16_to_cpu(fs->h_u.tcp_ip6_spec.pdst);
 -		rule->tuples_mask.dst_port =
 -				be16_to_cpu(fs->m_u.tcp_ip6_spec.pdst);
 +	ether_addr_copy(rule->tuples.dst_mac, fs->h_u.ether_spec.h_dest);
 +	ether_addr_copy(rule->tuples_mask.dst_mac, fs->m_u.ether_spec.h_dest);
-		rule->tuples.ether_proto = ETH_P_IPV6;
 -		rule->tuples_mask.ether_proto = 0xFFFF;
 +	rule->tuples.ether_proto = be16_to_cpu(fs->h_u.ether_spec.h_proto);
 +	rule->tuples_mask.ether_proto = be16_to_cpu(fs->m_u.ether_spec.h_proto);
 +}
+static void hclge_fd_get_user_def_tuple(struct hclge_fd_user_def_info *info,
 +					struct hclge_fd_rule *rule)
 +{
 +	switch (info->layer) {
 +	case HCLGE_FD_USER_DEF_L2:
 +		rule->tuples.l2_user_def = info->data;
 +		rule->tuples_mask.l2_user_def = info->data_mask;
    	break;
 -	case IPV6_USER_FLOW:
 -		be32_to_cpu_array(rule->tuples.src_ip,
 -				  fs->h_u.usr_ip6_spec.ip6src, IPV6_SIZE);
 -		be32_to_cpu_array(rule->tuples_mask.src_ip,
 -				  fs->m_u.usr_ip6_spec.ip6src, IPV6_SIZE);
 -
 -		be32_to_cpu_array(rule->tuples.dst_ip,
 -				  fs->h_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
 -		be32_to_cpu_array(rule->tuples_mask.dst_ip,
 -				  fs->m_u.usr_ip6_spec.ip6dst, IPV6_SIZE);
 -
 -		rule->tuples.ip_proto = fs->h_u.usr_ip6_spec.l4_proto;
 -		rule->tuples_mask.ip_proto = fs->m_u.usr_ip6_spec.l4_proto;
 -
 -		rule->tuples.ether_proto = ETH_P_IPV6;
 -		rule->tuples_mask.ether_proto = 0xFFFF;
 -
 +	case HCLGE_FD_USER_DEF_L3:
 +		rule->tuples.l3_user_def = info->data;
 +		rule->tuples_mask.l3_user_def = info->data_mask;
    	break;
 -	case ETHER_FLOW:
 -		ether_addr_copy(rule->tuples.src_mac,
 -				fs->h_u.ether_spec.h_source);
 -		ether_addr_copy(rule->tuples_mask.src_mac,
 -				fs->m_u.ether_spec.h_source);
 -
 -		ether_addr_copy(rule->tuples.dst_mac,
 -				fs->h_u.ether_spec.h_dest);
 -		ether_addr_copy(rule->tuples_mask.dst_mac,
 -				fs->m_u.ether_spec.h_dest);
 -
 -		rule->tuples.ether_proto =
 -				be16_to_cpu(fs->h_u.ether_spec.h_proto);
 -		rule->tuples_mask.ether_proto =
 -				be16_to_cpu(fs->m_u.ether_spec.h_proto);
 -
 +	case HCLGE_FD_USER_DEF_L4:
 +		rule->tuples.l4_user_def = (u32)info->data << 16;
 +		rule->tuples_mask.l4_user_def = (u32)info->data_mask << 16;
    	break;
    default:
 -		return -EOPNOTSUPP;
 +		break;
    }
+	rule->ep.user_def = *info;
 +}
 +
 +static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 +			      struct ethtool_rx_flow_spec *fs,
 +			      struct hclge_fd_rule *rule,
 +			      struct hclge_fd_user_def_info *info)
 +{
 +	u32 flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
 +
    switch (flow_type) {
    case SCTP_V4_FLOW:
 -	case SCTP_V6_FLOW:
 -		rule->tuples.ip_proto = IPPROTO_SCTP;
 -		rule->tuples_mask.ip_proto = 0xFF;
 +		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_SCTP);
    	break;
    case TCP_V4_FLOW:
 -	case TCP_V6_FLOW:
 -		rule->tuples.ip_proto = IPPROTO_TCP;
 -		rule->tuples_mask.ip_proto = 0xFF;
 +		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_TCP);
    	break;
    case UDP_V4_FLOW:
 +		hclge_fd_get_tcpip4_tuple(hdev, fs, rule, IPPROTO_UDP);
 +		break;
 +	case IP_USER_FLOW:
 +		hclge_fd_get_ip4_tuple(hdev, fs, rule);
 +		break;
 +	case SCTP_V6_FLOW:
 +		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_SCTP);
 +		break;
 +	case TCP_V6_FLOW:
 +		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_TCP);
 +		break;
    case UDP_V6_FLOW:
 -		rule->tuples.ip_proto = IPPROTO_UDP;
 -		rule->tuples_mask.ip_proto = 0xFF;
 +		hclge_fd_get_tcpip6_tuple(hdev, fs, rule, IPPROTO_UDP);
    	break;
 -	default:
 +	case IPV6_USER_FLOW:
 +		hclge_fd_get_ip6_tuple(hdev, fs, rule);
    	break;
 +	case ETHER_FLOW:
 +		hclge_fd_get_ether_tuple(hdev, fs, rule);
 +		break;
 +	default:
 +		return -EOPNOTSUPP;
    }
if (fs->flow_type & FLOW_EXT) {
    	rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci);
    	rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci);
 +		hclge_fd_get_user_def_tuple(info, rule);
    }
if (fs->flow_type & FLOW_MAC_EXT) {
@@@ -6398,53 -5941,33 +6397,53 @@@
    return 0;
  }
-/* make sure being called after lock up with fd_rule_lock */
  static int hclge_fd_config_rule(struct hclge_dev *hdev,
    			struct hclge_fd_rule *rule)
  {
    int ret;
-	if (!rule) {
 +	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
 +	if (ret)
 +		return ret;
 +
 +	return hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
 +}
 +
 +static int hclge_add_fd_entry_common(struct hclge_dev *hdev,
 +				     struct hclge_fd_rule *rule)
 +{
 +	int ret;
 +
 +	spin_lock_bh(&hdev->fd_rule_lock);
 +
 +	if (hdev->fd_active_type != rule->rule_type &&
 +	    (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE ||
 +	     hdev->fd_active_type == HCLGE_FD_EP_ACTIVE)) {
    	dev_err(&hdev->pdev->dev,
 -			"The flow director rule is NULL\n");
 +			"mode conflict(new type %d, active type %d), please delete existent rules first\n",
 +			rule->rule_type, hdev->fd_active_type);
 +		spin_unlock_bh(&hdev->fd_rule_lock);
    	return -EINVAL;
    }
-	/* it will never fail here, so needn't to check return value */
 -	hclge_fd_update_rule_list(hdev, rule, rule->location, true);
 +	ret = hclge_fd_check_user_def_refcnt(hdev, rule);
 +	if (ret)
 +		goto out;
-	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
 +	ret = hclge_clear_arfs_rules(hdev);
    if (ret)
 -		goto clear_rule;
 +		goto out;
-	ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
 +	ret = hclge_fd_config_rule(hdev, rule);
    if (ret)
 -		goto clear_rule;
 +		goto out;
-	return 0;
 +	rule->state = HCLGE_FD_ACTIVE;
 +	hdev->fd_active_type = rule->rule_type;
 +	hclge_update_fd_list(hdev, rule->state, rule->location, rule);
-clear_rule:
 -	hclge_fd_update_rule_list(hdev, rule, rule->location, false);
 +out:
 +	spin_unlock_bh(&hdev->fd_rule_lock);
    return ret;
  }
@@@ -6456,48 -5979,11 +6455,48 @@@ static bool hclge_is_cls_flower_active(
    return hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE;
  }
+static int hclge_fd_parse_ring_cookie(struct hclge_dev *hdev, u64 ring_cookie,
 +				      u16 *vport_id, u8 *action, u16 *queue_id)
 +{
 +	struct hclge_vport *vport = hdev->vport;
 +
 +	if (ring_cookie == RX_CLS_FLOW_DISC) {
 +		*action = HCLGE_FD_ACTION_DROP_PACKET;
 +	} else {
 +		u32 ring = ethtool_get_flow_spec_ring(ring_cookie);
 +		u8 vf = ethtool_get_flow_spec_ring_vf(ring_cookie);
 +		u16 tqps;
 +
 +		if (vf > hdev->num_req_vfs) {
 +			dev_err(&hdev->pdev->dev,
 +				"Error: vf id (%u) > max vf num (%u)\n",
 +				vf, hdev->num_req_vfs);
 +			return -EINVAL;
 +		}
 +
 +		*vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id;
 +		tqps = hdev->vport[vf].nic.kinfo.num_tqps;
 +
 +		if (ring >= tqps) {
 +			dev_err(&hdev->pdev->dev,
 +				"Error: queue id (%u) > max tqp num (%u)\n",
 +				ring, tqps - 1);
 +			return -EINVAL;
 +		}
 +
 +		*action = HCLGE_FD_ACTION_SELECT_QUEUE;
 +		*queue_id = ring;
 +	}
 +
 +	return 0;
 +}
 +
  static int hclge_add_fd_entry(struct hnae3_handle *handle,
    		      struct ethtool_rxnfc *cmd)
  {
    struct hclge_vport *vport = hclge_get_vport(handle);
    struct hclge_dev *hdev = vport->back;
 +	struct hclge_fd_user_def_info info;
    u16 dst_vport_id = 0, q_index = 0;
    struct ethtool_rx_flow_spec *fs;
    struct hclge_fd_rule *rule;
@@@ -6517,22 -6003,51 +6516,22 @@@
    	return -EOPNOTSUPP;
    }
-	if (hclge_is_cls_flower_active(handle)) {
 -		dev_err(&hdev->pdev->dev,
 -			"please delete all exist cls flower rules first\n");
 -		return -EINVAL;
 -	}
 -
    fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
-	ret = hclge_fd_check_spec(hdev, fs, &unused);
 +	ret = hclge_fd_check_spec(hdev, fs, &unused, &info);
    if (ret)
    	return ret;
-	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
 -		action = HCLGE_FD_ACTION_DROP_PACKET;
 -	} else {
 -		u32 ring = ethtool_get_flow_spec_ring(fs->ring_cookie);
 -		u8 vf = ethtool_get_flow_spec_ring_vf(fs->ring_cookie);
 -		u16 tqps;
 -
 -		if (vf > hdev->num_req_vfs) {
 -			dev_err(&hdev->pdev->dev,
 -				"Error: vf id (%u) > max vf num (%u)\n",
 -				vf, hdev->num_req_vfs);
 -			return -EINVAL;
 -		}
 -
 -		dst_vport_id = vf ? hdev->vport[vf].vport_id : vport->vport_id;
 -		tqps = vf ? hdev->vport[vf].alloc_tqps : vport->alloc_tqps;
 -
 -		if (ring >= tqps) {
 -			dev_err(&hdev->pdev->dev,
 -				"Error: queue id (%u) > max tqp num (%u)\n",
 -				ring, tqps - 1);
 -			return -EINVAL;
 -		}
 -
 -		action = HCLGE_FD_ACTION_SELECT_QUEUE;
 -		q_index = ring;
 -	}
 +	ret = hclge_fd_parse_ring_cookie(hdev, fs->ring_cookie, &dst_vport_id,
 +					 &action, &q_index);
 +	if (ret)
 +		return ret;
rule = kzalloc(sizeof(*rule), GFP_KERNEL);
    if (!rule)
    	return -ENOMEM;
-	ret = hclge_fd_get_tuple(hdev, fs, rule);
 +	ret = hclge_fd_get_tuple(hdev, fs, rule, &info);
    if (ret) {
    	kfree(rule);
    	return ret;
@@@ -6546,9 -6061,15 +6545,9 @@@
    rule->action = action;
    rule->rule_type = HCLGE_FD_EP_ACTIVE;
-	/* to avoid rule conflict, when user configure rule by ethtool,
 -	 * we need to clear all arfs rules
 -	 */
 -	spin_lock_bh(&hdev->fd_rule_lock);
 -	hclge_clear_arfs_rules(handle);
 -
 -	ret = hclge_fd_config_rule(hdev, rule);
 -
 -	spin_unlock_bh(&hdev->fd_rule_lock);
 +	ret = hclge_add_fd_entry_common(hdev, rule);
 +	if (ret)
 +		kfree(rule);
return ret;
  }
@@@ -6569,30 -6090,32 +6568,30 @@@ static int hclge_del_fd_entry(struct hn
    if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
    	return -EINVAL;
-	if (hclge_is_cls_flower_active(handle) || !hdev->hclge_fd_rule_num ||
 -	    !hclge_fd_rule_exist(hdev, fs->location)) {
 +	spin_lock_bh(&hdev->fd_rule_lock);
 +	if (hdev->fd_active_type == HCLGE_FD_TC_FLOWER_ACTIVE ||
 +	    !test_bit(fs->location, hdev->fd_bmap)) {
    	dev_err(&hdev->pdev->dev,
    		"Delete fail, rule %u is inexistent\n", fs->location);
 +		spin_unlock_bh(&hdev->fd_rule_lock);
    	return -ENOENT;
    }
ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, fs->location,
    			   NULL, false);
    if (ret)
 -		return ret;
 +		goto out;
-	spin_lock_bh(&hdev->fd_rule_lock);
 -	ret = hclge_fd_update_rule_list(hdev, NULL, fs->location, false);
 +	hclge_update_fd_list(hdev, HCLGE_FD_DELETED, fs->location, NULL);
+out:
    spin_unlock_bh(&hdev->fd_rule_lock);
 -
    return ret;
  }
-/* make sure being called after lock up with fd_rule_lock */
 -static void hclge_del_all_fd_entries(struct hnae3_handle *handle,
 -				     bool clear_list)
 +static void hclge_clear_fd_rules_in_list(struct hclge_dev *hdev,
 +					 bool clear_list)
  {
 -	struct hclge_vport *vport = hclge_get_vport(handle);
 -	struct hclge_dev *hdev = vport->back;
    struct hclge_fd_rule *rule;
    struct hlist_node *node;
    u16 location;
@@@ -6600,8 -6123,6 +6599,8 @@@
    if (!hnae3_dev_fd_supported(hdev))
    	return;
+	spin_lock_bh(&hdev->fd_rule_lock);
 +
    for_each_set_bit(location, hdev->fd_bmap,
    		 hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
    	hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, location,
@@@ -6618,14 -6139,6 +6617,14 @@@
    	bitmap_zero(hdev->fd_bmap,
    		    hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]);
    }
 +
 +	spin_unlock_bh(&hdev->fd_rule_lock);
 +}
 +
 +static void hclge_del_all_fd_entries(struct hclge_dev *hdev)
 +{
 +	hclge_clear_fd_rules_in_list(hdev, true);
 +	hclge_fd_disable_user_def(hdev);
  }
static int hclge_restore_fd_entries(struct hnae3_handle *handle)
@@@ -6634,6 -6147,7 +6633,6 @@@
    struct hclge_dev *hdev = vport->back;
    struct hclge_fd_rule *rule;
    struct hlist_node *node;
 -	int ret;
/* Return ok here, because reset error handling will check this
     * return value. If error is returned here, the reset process will
@@@ -6648,11 -6162,25 +6647,11 @@@
spin_lock_bh(&hdev->fd_rule_lock);
    hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
 -		ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
 -		if (!ret)
 -			ret = hclge_config_key(hdev, HCLGE_FD_STAGE_1, rule);
 -
 -		if (ret) {
 -			dev_warn(&hdev->pdev->dev,
 -				 "Restore rule %u failed, remove it\n",
 -				 rule->location);
 -			clear_bit(rule->location, hdev->fd_bmap);
 -			hlist_del(&rule->rule_node);
 -			kfree(rule);
 -			hdev->hclge_fd_rule_num--;
 -		}
 +		if (rule->state == HCLGE_FD_ACTIVE)
 +			rule->state = HCLGE_FD_TO_ADD;
    }
 -
 -	if (hdev->hclge_fd_rule_num)
 -		hdev->fd_active_type = HCLGE_FD_EP_ACTIVE;
 -
    spin_unlock_bh(&hdev->fd_rule_lock);
 +	set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
return 0;
  }
@@@ -6740,10 -6268,6 +6739,10 @@@ static void hclge_fd_get_tcpip6_info(st
    	cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip,
    			  IPV6_SIZE);
+	spec->tclass = rule->tuples.ip_tos;
 +	spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ?
 +			0 : rule->tuples_mask.ip_tos;
 +
    spec->psrc = cpu_to_be16(rule->tuples.src_port);
    spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ?
    		0 : cpu_to_be16(rule->tuples_mask.src_port);
@@@ -6771,10 -6295,6 +6770,10 @@@ static void hclge_fd_get_ip6_info(struc
    	cpu_to_be32_array(spec_mask->ip6dst,
    			  rule->tuples_mask.dst_ip, IPV6_SIZE);
+	spec->tclass = rule->tuples.ip_tos;
 +	spec_mask->tclass = rule->unused_tuple & BIT(INNER_IP_TOS) ?
 +			0 : rule->tuples_mask.ip_tos;
 +
    spec->l4_proto = rule->tuples.ip_proto;
    spec_mask->l4_proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ?
    		0 : rule->tuples_mask.ip_proto;
@@@ -6802,24 -6322,6 +6801,24 @@@ static void hclge_fd_get_ether_info(str
    		0 : cpu_to_be16(rule->tuples_mask.ether_proto);
  }
+static void hclge_fd_get_user_def_info(struct ethtool_rx_flow_spec *fs,
 +				       struct hclge_fd_rule *rule)
 +{
 +	if ((rule->unused_tuple & HCLGE_FD_TUPLE_USER_DEF_TUPLES) ==
 +	    HCLGE_FD_TUPLE_USER_DEF_TUPLES) {
 +		fs->h_ext.data[0] = 0;
 +		fs->h_ext.data[1] = 0;
 +		fs->m_ext.data[0] = 0;
 +		fs->m_ext.data[1] = 0;
 +	} else {
 +		fs->h_ext.data[0] = cpu_to_be32(rule->ep.user_def.offset);
 +		fs->h_ext.data[1] = cpu_to_be32(rule->ep.user_def.data);
 +		fs->m_ext.data[0] =
 +				cpu_to_be32(HCLGE_FD_USER_DEF_OFFSET_UNMASK);
 +		fs->m_ext.data[1] = cpu_to_be32(rule->ep.user_def.data_mask);
 +	}
 +}
 +
  static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs,
    			  struct hclge_fd_rule *rule)
  {
@@@ -6828,8 -6330,6 +6827,8 @@@
    	fs->m_ext.vlan_tci =
    			rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ?
    			0 : cpu_to_be16(rule->tuples_mask.vlan_tag1);
 +
 +		hclge_fd_get_user_def_info(fs, rule);
    }
if (fs->flow_type & FLOW_MAC_EXT) {
@@@ -6941,9 -6441,6 +6940,9 @@@ static int hclge_get_all_rules(struct h
    		return -EMSGSIZE;
    	}
+		if (rule->state == HCLGE_FD_TO_DEL)
 +			continue;
 +
    	rule_locs[cnt] = rule->location;
    	cnt++;
    }
@@@ -7003,7 -6500,6 +7002,7 @@@ static void hclge_fd_build_arfs_rule(co
    rule->action = 0;
    rule->vf_id = 0;
    rule->rule_type = HCLGE_FD_ARFS_ACTIVE;
 +	rule->state = HCLGE_FD_TO_ADD;
    if (tuples->ether_proto == ETH_P_IP) {
    	if (tuples->ip_proto == IPPROTO_TCP)
    		rule->flow_type = TCP_V4_FLOW;
@@@ -7026,7 -6522,9 +7025,7 @@@ static int hclge_add_fd_entry_by_arfs(s
    struct hclge_fd_rule_tuples new_tuples = {};
    struct hclge_dev *hdev = vport->back;
    struct hclge_fd_rule *rule;
 -	u16 tmp_queue_id;
    u16 bit_id;
 -	int ret;
if (!hnae3_dev_fd_supported(hdev))
    	return -EOPNOTSUPP;
@@@ -7062,19 -6560,34 +7061,19 @@@
    		return -ENOMEM;
    	}
-		set_bit(bit_id, hdev->fd_bmap);
    	rule->location = bit_id;
    	rule->arfs.flow_id = flow_id;
    	rule->queue_id = queue_id;
    	hclge_fd_build_arfs_rule(&new_tuples, rule);
 -		ret = hclge_fd_config_rule(hdev, rule);
 -
 -		spin_unlock_bh(&hdev->fd_rule_lock);
 -
 -		if (ret)
 -			return ret;
 -
 -		return rule->location;
 +		hclge_update_fd_list(hdev, rule->state, rule->location, rule);
 +		hdev->fd_active_type = HCLGE_FD_ARFS_ACTIVE;
 +	} else if (rule->queue_id != queue_id) {
 +		rule->queue_id = queue_id;
 +		rule->state = HCLGE_FD_TO_ADD;
 +		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 +		hclge_task_schedule(hdev, 0);
    }
 -
    spin_unlock_bh(&hdev->fd_rule_lock);
 -
 -	if (rule->queue_id == queue_id)
 -		return rule->location;
 -
 -	tmp_queue_id = rule->queue_id;
 -	rule->queue_id = queue_id;
 -	ret = hclge_config_action(hdev, HCLGE_FD_STAGE_1, rule);
 -	if (ret) {
 -		rule->queue_id = tmp_queue_id;
 -		return ret;
 -	}
 -
    return rule->location;
  }
@@@ -7084,6 -6597,7 +7083,6 @@@ static void hclge_rfs_filter_expire(str
    struct hnae3_handle *handle = &hdev->vport[0].nic;
    struct hclge_fd_rule *rule;
    struct hlist_node *node;
 -	HLIST_HEAD(del_list);
spin_lock_bh(&hdev->fd_rule_lock);
    if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE) {
@@@ -7091,51 -6605,34 +7090,51 @@@
    	return;
    }
    hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
 +		if (rule->state != HCLGE_FD_ACTIVE)
 +			continue;
    	if (rps_may_expire_flow(handle->netdev, rule->queue_id,
    				rule->arfs.flow_id, rule->location)) {
 -			hlist_del_init(&rule->rule_node);
 -			hlist_add_head(&rule->rule_node, &del_list);
 -			hdev->hclge_fd_rule_num--;
 -			clear_bit(rule->location, hdev->fd_bmap);
 +			rule->state = HCLGE_FD_TO_DEL;
 +			set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 +		}
 +	}
 +	spin_unlock_bh(&hdev->fd_rule_lock);
 +#endif
 +}
 +
 +/* make sure being called after lock up with fd_rule_lock */
 +static int hclge_clear_arfs_rules(struct hclge_dev *hdev)
 +{
 +#ifdef CONFIG_RFS_ACCEL
 +	struct hclge_fd_rule *rule;
 +	struct hlist_node *node;
 +	int ret;
 +
 +	if (hdev->fd_active_type != HCLGE_FD_ARFS_ACTIVE)
 +		return 0;
 +
 +	hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
 +		switch (rule->state) {
 +		case HCLGE_FD_TO_DEL:
 +		case HCLGE_FD_ACTIVE:
 +			ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
 +						   rule->location, NULL, false);
 +			if (ret)
 +				return ret;
 +			fallthrough;
 +		case HCLGE_FD_TO_ADD:
 +			hclge_fd_dec_rule_cnt(hdev, rule->location);
 +			hlist_del(&rule->rule_node);
 +			kfree(rule);
 +			break;
 +		default:
 +			break;
    	}
    }
 -	spin_unlock_bh(&hdev->fd_rule_lock);
 -
 -	hlist_for_each_entry_safe(rule, node, &del_list, rule_node) {
 -		hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
 -				     rule->location, NULL, false);
 -		kfree(rule);
 -	}
 -#endif
 -}
 -
 -/* make sure being called after lock up with fd_rule_lock */
 -static void hclge_clear_arfs_rules(struct hnae3_handle *handle)
 -{
 -#ifdef CONFIG_RFS_ACCEL
 -	struct hclge_vport *vport = hclge_get_vport(handle);
 -	struct hclge_dev *hdev = vport->back;
 +	hclge_sync_fd_state(hdev);
-	if (hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE)
 -		hclge_del_all_fd_entries(handle, true);
  #endif
 +	return 0;
  }
static void hclge_get_cls_key_basic(const struct flow_rule *flow,
@@@ -7317,6 -6814,12 +7316,6 @@@ static int hclge_add_cls_flower(struct 
    struct hclge_fd_rule *rule;
    int ret;
-	if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
 -		dev_err(&hdev->pdev->dev,
 -			"please remove all exist fd rules via ethtool first\n");
 -		return -EINVAL;
 -	}
 -
    ret = hclge_check_cls_flower(hdev, cls_flower, tc);
    if (ret) {
    	dev_err(&hdev->pdev->dev,
@@@ -7329,10 -6832,8 +7328,10 @@@
    	return -ENOMEM;
ret = hclge_parse_cls_flower(hdev, cls_flower, rule);
 -	if (ret)
 -		goto err;
 +	if (ret) {
 +		kfree(rule);
 +		return ret;
 +	}
rule->action = HCLGE_FD_ACTION_SELECT_TC;
    rule->cls_flower.tc = tc;
@@@ -7341,10 -6842,22 +7340,10 @@@
    rule->cls_flower.cookie = cls_flower->cookie;
    rule->rule_type = HCLGE_FD_TC_FLOWER_ACTIVE;
-	spin_lock_bh(&hdev->fd_rule_lock);
 -	hclge_clear_arfs_rules(handle);
 -
 -	ret = hclge_fd_config_rule(hdev, rule);
 -
 -	spin_unlock_bh(&hdev->fd_rule_lock);
 -
 -	if (ret) {
 -		dev_err(&hdev->pdev->dev,
 -			"failed to add cls flower rule, ret = %d\n", ret);
 -		goto err;
 -	}
 +	ret = hclge_add_fd_entry_common(hdev, rule);
 +	if (ret)
 +		kfree(rule);
-	return 0;
 -err:
 -	kfree(rule);
    return ret;
  }
@@@ -7381,66 -6894,25 +7380,66 @@@ static int hclge_del_cls_flower(struct 
    ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true, rule->location,
    			   NULL, false);
    if (ret) {
 -		dev_err(&hdev->pdev->dev,
 -			"failed to delete cls flower rule %u, ret = %d\n",
 -			rule->location, ret);
    	spin_unlock_bh(&hdev->fd_rule_lock);
    	return ret;
    }
-	ret = hclge_fd_update_rule_list(hdev, NULL, rule->location, false);
 -	if (ret) {
 -		dev_err(&hdev->pdev->dev,
 -			"failed to delete cls flower rule %u in list, ret = %d\n",
 -			rule->location, ret);
 -		spin_unlock_bh(&hdev->fd_rule_lock);
 -		return ret;
 +	hclge_update_fd_list(hdev, HCLGE_FD_DELETED, rule->location, NULL);
 +	spin_unlock_bh(&hdev->fd_rule_lock);
 +
 +	return 0;
 +}
 +
 +static void hclge_sync_fd_list(struct hclge_dev *hdev, struct hlist_head *hlist)
 +{
 +	struct hclge_fd_rule *rule;
 +	struct hlist_node *node;
 +	int ret = 0;
 +
 +	if (!test_and_clear_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state))
 +		return;
 +
 +	spin_lock_bh(&hdev->fd_rule_lock);
 +
 +	hlist_for_each_entry_safe(rule, node, hlist, rule_node) {
 +		switch (rule->state) {
 +		case HCLGE_FD_TO_ADD:
 +			ret = hclge_fd_config_rule(hdev, rule);
 +			if (ret)
 +				goto out;
 +			rule->state = HCLGE_FD_ACTIVE;
 +			break;
 +		case HCLGE_FD_TO_DEL:
 +			ret = hclge_fd_tcam_config(hdev, HCLGE_FD_STAGE_1, true,
 +						   rule->location, NULL, false);
 +			if (ret)
 +				goto out;
 +			hclge_fd_dec_rule_cnt(hdev, rule->location);
 +			hclge_fd_free_node(hdev, rule);
 +			break;
 +		default:
 +			break;
 +		}
    }
+out:
 +	if (ret)
 +		set_bit(HCLGE_STATE_FD_TBL_CHANGED, &hdev->state);
 +
    spin_unlock_bh(&hdev->fd_rule_lock);
 +}
-	return 0;
 +static void hclge_sync_fd_table(struct hclge_dev *hdev)
 +{
 +	if (test_and_clear_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state)) {
 +		bool clear_list = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
 +
 +		hclge_clear_fd_rules_in_list(hdev, clear_list);
 +	}
 +
 +	hclge_sync_fd_user_def_cfg(hdev, false);
 +
 +	hclge_sync_fd_list(hdev, &hdev->fd_rule_list);
  }
static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle)
@@@ -7480,15 -6952,18 +7479,15 @@@ static void hclge_enable_fd(struct hnae
  {
    struct hclge_vport *vport = hclge_get_vport(handle);
    struct hclge_dev *hdev = vport->back;
 -	bool clear;
hdev->fd_en = enable;
 -	clear = hdev->fd_active_type == HCLGE_FD_ARFS_ACTIVE;
-	if (!enable) {
 -		spin_lock_bh(&hdev->fd_rule_lock);
 -		hclge_del_all_fd_entries(handle, clear);
 -		spin_unlock_bh(&hdev->fd_rule_lock);
 -	} else {
 +	if (!enable)
 +		set_bit(HCLGE_STATE_FD_CLEAR_ALL, &hdev->state);
 +	else
    	hclge_restore_fd_entries(handle);
 -	}
 +
 +	hclge_task_schedule(hdev, 0);
  }
static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
@@@ -7649,19 -7124,19 +7648,19 @@@ static int hclge_set_app_loopback(struc
    return ret;
  }
-static int hclge_cfg_serdes_loopback(struct hclge_dev *hdev, bool en,
 +static int hclge_cfg_common_loopback(struct hclge_dev *hdev, bool en,
    			     enum hnae3_loop loop_mode)
  {
 -#define HCLGE_SERDES_RETRY_MS	10
 -#define HCLGE_SERDES_RETRY_NUM	100
 +#define HCLGE_COMMON_LB_RETRY_MS	10
 +#define HCLGE_COMMON_LB_RETRY_NUM	100
-	struct hclge_serdes_lb_cmd *req;
 +	struct hclge_common_lb_cmd *req;
    struct hclge_desc desc;
    int ret, i = 0;
    u8 loop_mode_b;
-	req = (struct hclge_serdes_lb_cmd *)desc.data;
 -	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK, false);
 +	req = (struct hclge_common_lb_cmd *)desc.data;
 +	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK, false);
switch (loop_mode) {
    case HNAE3_LOOP_SERIAL_SERDES:
@@@ -7670,12 -7145,9 +7669,12 @@@
    case HNAE3_LOOP_PARALLEL_SERDES:
    	loop_mode_b = HCLGE_CMD_SERDES_PARALLEL_INNER_LOOP_B;
    	break;
 +	case HNAE3_LOOP_PHY:
 +		loop_mode_b = HCLGE_CMD_GE_PHY_INNER_LOOP_B;
 +		break;
    default:
    	dev_err(&hdev->pdev->dev,
 -			"unsupported serdes loopback mode %d\n", loop_mode);
 +			"unsupported common loopback mode %d\n", loop_mode);
    	return -ENOTSUPP;
    }
@@@ -7689,39 -7161,39 +7688,39 @@@
    ret = hclge_cmd_send(&hdev->hw, &desc, 1);
    if (ret) {
    	dev_err(&hdev->pdev->dev,
 -			"serdes loopback set fail, ret = %d\n", ret);
 +			"common loopback set fail, ret = %d\n", ret);
    	return ret;
    }
do {
 -		msleep(HCLGE_SERDES_RETRY_MS);
 -		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_SERDES_LOOPBACK,
 +		msleep(HCLGE_COMMON_LB_RETRY_MS);
 +		hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_COMMON_LOOPBACK,
    				   true);
    	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
    	if (ret) {
    		dev_err(&hdev->pdev->dev,
 -				"serdes loopback get, ret = %d\n", ret);
 +				"common loopback get, ret = %d\n", ret);
    		return ret;
    	}
 -	} while (++i < HCLGE_SERDES_RETRY_NUM &&
 -		 !(req->result & HCLGE_CMD_SERDES_DONE_B));
 +	} while (++i < HCLGE_COMMON_LB_RETRY_NUM &&
 +		 !(req->result & HCLGE_CMD_COMMON_LB_DONE_B));
-	if (!(req->result & HCLGE_CMD_SERDES_DONE_B)) {
 -		dev_err(&hdev->pdev->dev, "serdes loopback set timeout\n");
 +	if (!(req->result & HCLGE_CMD_COMMON_LB_DONE_B)) {
 +		dev_err(&hdev->pdev->dev, "common loopback set timeout\n");
    	return -EBUSY;
 -	} else if (!(req->result & HCLGE_CMD_SERDES_SUCCESS_B)) {
 -		dev_err(&hdev->pdev->dev, "serdes loopback set failed in fw\n");
 +	} else if (!(req->result & HCLGE_CMD_COMMON_LB_SUCCESS_B)) {
 +		dev_err(&hdev->pdev->dev, "common loopback set failed in fw\n");
    	return -EIO;
    }
    return ret;
  }
-static int hclge_set_serdes_loopback(struct hclge_dev *hdev, bool en,
 +static int hclge_set_common_loopback(struct hclge_dev *hdev, bool en,
    			     enum hnae3_loop loop_mode)
  {
    int ret;
-	ret = hclge_cfg_serdes_loopback(hdev, en, loop_mode);
 +	ret = hclge_cfg_common_loopback(hdev, en, loop_mode);
    if (ret)
    	return ret;
@@@ -7770,12 -7242,8 +7769,12 @@@ static int hclge_set_phy_loopback(struc
    struct phy_device *phydev = hdev->hw.mac.phydev;
    int ret;
-	if (!phydev)
 +	if (!phydev) {
 +		if (hnae3_dev_phy_imp_supported(hdev))
 +			return hclge_set_common_loopback(hdev, en,
 +							 HNAE3_LOOP_PHY);
    	return -ENOTSUPP;
 +	}
if (en)
    	ret = hclge_enable_phy_loopback(hdev, phydev);
@@@ -7797,12 -7265,13 +7796,12 @@@
    return ret;
  }
-static int hclge_tqp_enable(struct hclge_dev *hdev, unsigned int tqp_id,
 -			    int stream_id, bool enable)
 +static int hclge_tqp_enable_cmd_send(struct hclge_dev *hdev, u16 tqp_id,
 +				     u16 stream_id, bool enable)
  {
    struct hclge_desc desc;
    struct hclge_cfg_com_tqp_queue_cmd *req =
    	(struct hclge_cfg_com_tqp_queue_cmd *)desc.data;
 -	int ret;
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_COM_TQP_QUEUE, false);
    req->tqp_id = cpu_to_le16(tqp_id);
@@@ -7810,30 -7279,20 +7809,30 @@@
    if (enable)
    	req->enable |= 1U << HCLGE_TQP_ENABLE_B;
-	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 -	if (ret)
 -		dev_err(&hdev->pdev->dev,
 -			"Tqp enable fail, status =%d.\n", ret);
 -	return ret;
 +	return hclge_cmd_send(&hdev->hw, &desc, 1);
 +}
 +
 +static int hclge_tqp_enable(struct hnae3_handle *handle, bool enable)
 +{
 +	struct hclge_vport *vport = hclge_get_vport(handle);
 +	struct hclge_dev *hdev = vport->back;
 +	int ret;
 +	u16 i;
 +
 +	for (i = 0; i < handle->kinfo.num_tqps; i++) {
 +		ret = hclge_tqp_enable_cmd_send(hdev, i, 0, enable);
 +		if (ret)
 +			return ret;
 +	}
 +	return 0;
  }
static int hclge_set_loopback(struct hnae3_handle *handle,
    		      enum hnae3_loop loop_mode, bool en)
  {
    struct hclge_vport *vport = hclge_get_vport(handle);
 -	struct hnae3_knic_private_info *kinfo;
    struct hclge_dev *hdev = vport->back;
 -	int i, ret;
 +	int ret;
/* Loopback can be enabled in three places: SSU, MAC, and serdes. By
     * default, SSU loopback is enabled, so if the SMAC and the DMAC are
@@@ -7855,7 -7314,7 +7854,7 @@@
    	break;
    case HNAE3_LOOP_SERIAL_SERDES:
    case HNAE3_LOOP_PARALLEL_SERDES:
 -		ret = hclge_set_serdes_loopback(hdev, en, loop_mode);
 +		ret = hclge_set_common_loopback(hdev, en, loop_mode);
    	break;
    case HNAE3_LOOP_PHY:
    	ret = hclge_set_phy_loopback(hdev, en);
@@@ -7870,12 -7329,14 +7869,12 @@@
    if (ret)
    	return ret;
-	kinfo = &vport->nic.kinfo;
 -	for (i = 0; i < kinfo->num_tqps; i++) {
 -		ret = hclge_tqp_enable(hdev, i, 0, en);
 -		if (ret)
 -			return ret;
 -	}
 +	ret = hclge_tqp_enable(handle, en);
 +	if (ret)
 +		dev_err(&hdev->pdev->dev, "failed to %s tqp in loopback, ret = %d\n",
 +			en ? "enable" : "disable", ret);
-	return 0;
 +	return ret;
  }
static int hclge_set_default_loopback(struct hclge_dev *hdev)
@@@ -7886,11 -7347,11 +7885,11 @@@
    if (ret)
    	return ret;
-	ret = hclge_cfg_serdes_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES);
 +	ret = hclge_cfg_common_loopback(hdev, false, HNAE3_LOOP_SERIAL_SERDES);
    if (ret)
    	return ret;
-	return hclge_cfg_serdes_loopback(hdev, false,
 +	return hclge_cfg_common_loopback(hdev, false,
    				 HNAE3_LOOP_PARALLEL_SERDES);
  }
@@@ -7962,10 -7423,11 +7961,10 @@@ static void hclge_ae_stop(struct hnae3_
  {
    struct hclge_vport *vport = hclge_get_vport(handle);
    struct hclge_dev *hdev = vport->back;
 -	int i;
set_bit(HCLGE_STATE_DOWN, &hdev->state);
    spin_lock_bh(&hdev->fd_rule_lock);
 -	hclge_clear_arfs_rules(handle);
 +	hclge_clear_arfs_rules(hdev);
    spin_unlock_bh(&hdev->fd_rule_lock);
/* If it is not PF reset, the firmware will disable the MAC,
@@@ -7978,7 -7440,8 +7977,7 @@@
    	return;
    }
-	for (i = 0; i < handle->kinfo.num_tqps; i++)
 -		hclge_reset_tqp(handle, i);
 +	hclge_reset_tqp(handle);
hclge_config_mac_tnl_int(hdev, false);
@@@ -8428,7 -7891,7 +8427,7 @@@ int hclge_update_mac_list(struct hclge_
/* if the mac addr is already in the mac list, no need to add a new
     * one into it, just check the mac addr state, convert it to a new
 -	 * new state, or just remove it, or do nothing.
 +	 * state, or just remove it, or do nothing.
     */
    mac_node = hclge_find_mac_node(list, addr);
    if (mac_node) {
@@@ -8617,6 -8080,7 +8616,6 @@@ int hclge_add_mc_addr_common(struct hcl
    if (status)
    	return status;
    status = hclge_add_mac_vlan_tbl(vport, &req, desc);
 -
    /* if already overflow, not to print each time */
    if (status == -ENOSPC &&
        !(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE))
@@@ -8665,6 -8129,7 +8664,6 @@@ int hclge_rm_mc_addr_common(struct hclg
    	else
    		/* Not all the vfid is zero, update the vfid */
    		status = hclge_add_mac_vlan_tbl(vport, &req, desc);
 -
    } else if (status == -ENOENT) {
    	status = 0;
    }
@@@ -9099,7 -8564,7 +9098,7 @@@ static bool hclge_check_vf_mac_exist(st
    	return true;
vf_idx += HCLGE_VF_VPORT_START_NUM;
 -	for (i = hdev->num_vmdq_vport + 1; i < hdev->num_alloc_vport; i++)
 +	for (i = HCLGE_VF_VPORT_START_NUM; i < hdev->num_alloc_vport; i++)
    	if (i != vf_idx &&
    	    ether_addr_equal(mac_addr, hdev->vport[i].vf_info.mac))
    		return true;
@@@ -9293,29 -8758,6 +9292,29 @@@ static int hclge_set_mac_addr(struct hn
    return 0;
  }
+static int hclge_mii_ioctl(struct hclge_dev *hdev, struct ifreq *ifr, int cmd)
 +{
 +	struct mii_ioctl_data *data = if_mii(ifr);
 +
 +	if (!hnae3_dev_phy_imp_supported(hdev))
 +		return -EOPNOTSUPP;
 +
 +	switch (cmd) {
 +	case SIOCGMIIPHY:
 +		data->phy_id = hdev->hw.mac.phy_addr;
 +		/* this command reads phy id and register at the same time */
 +		fallthrough;
 +	case SIOCGMIIREG:
 +		data->val_out = hclge_read_phy_reg(hdev, data->reg_num);
 +		return 0;
 +
 +	case SIOCSMIIREG:
 +		return hclge_write_phy_reg(hdev, data->reg_num, data->val_in);
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +}
 +
  static int hclge_do_ioctl(struct hnae3_handle *handle, struct ifreq *ifr,
    		  int cmd)
  {
@@@ -9323,7 -8765,7 +9322,7 @@@
    struct hclge_dev *hdev = vport->back;
if (!hdev->hw.mac.phydev)
 -		return -EOPNOTSUPP;
 +		return hclge_mii_ioctl(hdev, ifr, cmd);
return phy_mii_ioctl(hdev->hw.mac.phydev, ifr, cmd);
  }
@@@ -9480,7 -8922,8 +9479,7 @@@ static int hclge_check_vf_vlan_cmd_stat
  }
static int hclge_set_vf_vlan_common(struct hclge_dev *hdev, u16 vfid,
 -				    bool is_kill, u16 vlan,
 -				    __be16 proto)
 +				    bool is_kill, u16 vlan)
  {
    struct hclge_vport *vport = &hdev->vport[vfid];
    struct hclge_desc desc[2];
@@@ -9546,7 -8989,8 +9545,7 @@@ static int hclge_set_vlan_filter_hw(str
    if (is_kill && !vlan_id)
    	return 0;
-	ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id,
 -				       proto);
 +	ret = hclge_set_vf_vlan_common(hdev, vport_id, is_kill, vlan_id);
    if (ret) {
    	dev_err(&hdev->pdev->dev,
    		"Set %u vport vlan filter config fail, ret =%d.\n",
@@@ -9996,7 -9440,7 +9995,7 @@@ static void hclge_restore_hw_table(stru
    hclge_restore_mac_table_common(vport);
    hclge_restore_vport_vlan_table(vport);
    set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
 -
 +	set_bit(HCLGE_STATE_FD_USER_DEF_CHANGED, &hdev->state);
    hclge_restore_fd_entries(handle);
  }
@@@ -10352,7 -9796,7 +10351,7 @@@ out
    return ret;
  }
-static int hclge_send_reset_tqp_cmd(struct hclge_dev *hdev, u16 queue_id,
 +static int hclge_reset_tqp_cmd_send(struct hclge_dev *hdev, u16 queue_id,
    			    bool enable)
  {
    struct hclge_reset_tqp_queue_cmd *req;
@@@ -10408,114 -9852,94 +10407,114 @@@ u16 hclge_covert_handle_qid_global(stru
    return tqp->index;
  }
-int hclge_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 +static int hclge_reset_tqp_cmd(struct hnae3_handle *handle)
  {
    struct hclge_vport *vport = hclge_get_vport(handle);
    struct hclge_dev *hdev = vport->back;
 -	int reset_try_times = 0;
 +	u16 reset_try_times = 0;
    int reset_status;
    u16 queue_gid;
    int ret;
 +	u16 i;
-	queue_gid = hclge_covert_handle_qid_global(handle, queue_id);
 -
 -	ret = hclge_tqp_enable(hdev, queue_id, 0, false);
 -	if (ret) {
 -		dev_err(&hdev->pdev->dev, "Disable tqp fail, ret = %d\n", ret);
 -		return ret;
 -	}
 +	for (i = 0; i < handle->kinfo.num_tqps; i++) {
 +		queue_gid = hclge_covert_handle_qid_global(handle, i);
 +		ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, true);
 +		if (ret) {
 +			dev_err(&hdev->pdev->dev,
 +				"failed to send reset tqp cmd, ret = %d\n",
 +				ret);
 +			return ret;
 +		}
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
 -	if (ret) {
 -		dev_err(&hdev->pdev->dev,
 -			"Send reset tqp cmd fail, ret = %d\n", ret);
 -		return ret;
 -	}
 +		while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
 +			reset_status = hclge_get_reset_status(hdev, queue_gid);
 +			if (reset_status)
 +				break;
-	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
 -		reset_status = hclge_get_reset_status(hdev, queue_gid);
 -		if (reset_status)
 -			break;
 +			/* Wait for tqp hw reset */
 +			usleep_range(1000, 1200);
 +		}
-		/* Wait for tqp hw reset */
 -		usleep_range(1000, 1200);
 -	}
 +		if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
 +			dev_err(&hdev->pdev->dev,
 +				"wait for tqp hw reset timeout\n");
 +			return -ETIME;
 +		}
-	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
 -		dev_err(&hdev->pdev->dev, "Reset TQP fail\n");
 -		return ret;
 +		ret = hclge_reset_tqp_cmd_send(hdev, queue_gid, false);
 +		if (ret) {
 +			dev_err(&hdev->pdev->dev,
 +				"failed to deassert soft reset, ret = %d\n",
 +				ret);
 +			return ret;
 +		}
 +		reset_try_times = 0;
    }
 -
 -	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
 -	if (ret)
 -		dev_err(&hdev->pdev->dev,
 -			"Deassert the soft reset fail, ret = %d\n", ret);
 -
 -	return ret;
 +	return 0;
  }
-void hclge_reset_vf_queue(struct hclge_vport *vport, u16 queue_id)
 +static int hclge_reset_rcb(struct hnae3_handle *handle)
  {
 -	struct hnae3_handle *handle = &vport->nic;
 +#define HCLGE_RESET_RCB_NOT_SUPPORT	0U
 +#define HCLGE_RESET_RCB_SUCCESS		1U
 +
 +	struct hclge_vport *vport = hclge_get_vport(handle);
    struct hclge_dev *hdev = vport->back;
 -	int reset_try_times = 0;
 -	int reset_status;
 +	struct hclge_reset_cmd *req;
 +	struct hclge_desc desc;
 +	u8 return_status;
    u16 queue_gid;
    int ret;
-	if (queue_id >= handle->kinfo.num_tqps) {
 -		dev_warn(&hdev->pdev->dev, "Invalid vf queue id(%u)\n",
 -			 queue_id);
 -		return;
 -	}
 +	queue_gid = hclge_covert_handle_qid_global(handle, 0);
-	queue_gid = hclge_covert_handle_qid_global(&vport->nic, queue_id);
 +	req = (struct hclge_reset_cmd *)desc.data;
 +	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CFG_RST_TRIGGER, false);
 +	hnae3_set_bit(req->fun_reset_rcb, HCLGE_CFG_RESET_RCB_B, 1);
 +	req->fun_reset_rcb_vqid_start = cpu_to_le16(queue_gid);
 +	req->fun_reset_rcb_vqid_num = cpu_to_le16(handle->kinfo.num_tqps);
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, true);
 +	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
    if (ret) {
 -		dev_warn(&hdev->pdev->dev,
 -			 "Send reset tqp cmd fail, ret = %d\n", ret);
 -		return;
 +		dev_err(&hdev->pdev->dev,
 +			"failed to send rcb reset cmd, ret = %d\n", ret);
 +		return ret;
    }
-	while (reset_try_times++ < HCLGE_TQP_RESET_TRY_TIMES) {
 -		reset_status = hclge_get_reset_status(hdev, queue_gid);
 -		if (reset_status)
 -			break;
 +	return_status = req->fun_reset_rcb_return_status;
 +	if (return_status == HCLGE_RESET_RCB_SUCCESS)
 +		return 0;
-		/* Wait for tqp hw reset */
 -		usleep_range(1000, 1200);
 +	if (return_status != HCLGE_RESET_RCB_NOT_SUPPORT) {
 +		dev_err(&hdev->pdev->dev, "failed to reset rcb, ret = %u\n",
 +			return_status);
 +		return -EIO;
    }
-	if (reset_try_times >= HCLGE_TQP_RESET_TRY_TIMES) {
 -		dev_warn(&hdev->pdev->dev, "Reset TQP fail\n");
 -		return;
 +	/* if reset rcb cmd is unsupported, we need to send reset tqp cmd
 +	 * again to reset all tqps
 +	 */
 +	return hclge_reset_tqp_cmd(handle);
 +}
 +
 +int hclge_reset_tqp(struct hnae3_handle *handle)
 +{
 +	struct hclge_vport *vport = hclge_get_vport(handle);
 +	struct hclge_dev *hdev = vport->back;
 +	int ret;
 +
 +	/* only need to disable PF's tqp */
 +	if (!vport->vport_id) {
 +		ret = hclge_tqp_enable(handle, false);
 +		if (ret) {
 +			dev_err(&hdev->pdev->dev,
 +				"failed to disable tqp, ret = %d\n", ret);
 +			return ret;
 +		}
    }
-	ret = hclge_send_reset_tqp_cmd(hdev, queue_gid, false);
 -	if (ret)
 -		dev_warn(&hdev->pdev->dev,
 -			 "Deassert the soft reset fail, ret = %d\n", ret);
 +	return hclge_reset_rcb(handle);
  }
static u32 hclge_get_fw_version(struct hnae3_handle *handle)
@@@ -10588,10 -10012,9 +10587,10 @@@ static void hclge_get_pauseparam(struc
  {
    struct hclge_vport *vport = hclge_get_vport(handle);
    struct hclge_dev *hdev = vport->back;
 -	struct phy_device *phydev = hdev->hw.mac.phydev;
 +	u8 media_type = hdev->hw.mac.media_type;
-	*auto_neg = phydev ? hclge_get_autoneg(handle) : 0;
 +	*auto_neg = (media_type == HNAE3_MEDIA_TYPE_COPPER) ?
 +		    hclge_get_autoneg(handle) : 0;
if (hdev->tm_info.fc_mode == HCLGE_FC_PFC) {
    	*rx_en = 0;
@@@ -10637,7 -10060,7 +10636,7 @@@ static int hclge_set_pauseparam(struct 
    struct phy_device *phydev = hdev->hw.mac.phydev;
    u32 fc_autoneg;
-	if (phydev) {
 +	if (phydev || hnae3_dev_phy_imp_supported(hdev)) {
    	fc_autoneg = hclge_get_autoneg(handle);
    	if (auto_neg != fc_autoneg) {
    		dev_info(&hdev->pdev->dev,
@@@ -10656,7 -10079,7 +10655,7 @@@
hclge_record_user_pauseparam(hdev, rx_en, tx_en);
-	if (!auto_neg)
 +	if (!auto_neg || hnae3_dev_phy_imp_supported(hdev))
    	return hclge_cfg_pauseparam(hdev, rx_en, tx_en);
if (phydev)
@@@ -10758,6 -10181,7 +10757,6 @@@ static void hclge_info_show(struct hclg
    dev_info(dev, "Desc num per TX queue: %u\n", hdev->num_tx_desc);
    dev_info(dev, "Desc num per RX queue: %u\n", hdev->num_rx_desc);
    dev_info(dev, "Numbers of vports: %u\n", hdev->num_alloc_vport);
 -	dev_info(dev, "Numbers of vmdp vports: %u\n", hdev->num_vmdq_vport);
    dev_info(dev, "Numbers of VF for this PF: %u\n", hdev->num_req_vfs);
    dev_info(dev, "HW tc map: 0x%x\n", hdev->hw_tc_map);
    dev_info(dev, "Total buffer size for TX/RX: %u\n", hdev->pkt_buf_size);
@@@ -10872,35 -10296,39 +10871,35 @@@ static int hclge_init_client_instance(s
    			      struct hnae3_ae_dev *ae_dev)
  {
    struct hclge_dev *hdev = ae_dev->priv;
 -	struct hclge_vport *vport;
 -	int i, ret;
 -
 -	for (i = 0; i <  hdev->num_vmdq_vport + 1; i++) {
 -		vport = &hdev->vport[i];
 +	struct hclge_vport *vport = &hdev->vport[0];
 +	int ret;
-		switch (client->type) {
 -		case HNAE3_CLIENT_KNIC:
 -			hdev->nic_client = client;
 -			vport->nic.client = client;
 -			ret = hclge_init_nic_client_instance(ae_dev, vport);
 -			if (ret)
 -				goto clear_nic;
 +	switch (client->type) {
 +	case HNAE3_CLIENT_KNIC:
 +		hdev->nic_client = client;
 +		vport->nic.client = client;
 +		ret = hclge_init_nic_client_instance(ae_dev, vport);
 +		if (ret)
 +			goto clear_nic;
-			ret = hclge_init_roce_client_instance(ae_dev, vport);
 -			if (ret)
 -				goto clear_roce;
 +		ret = hclge_init_roce_client_instance(ae_dev, vport);
 +		if (ret)
 +			goto clear_roce;
-			break;
 -		case HNAE3_CLIENT_ROCE:
 -			if (hnae3_dev_roce_supported(hdev)) {
 -				hdev->roce_client = client;
 -				vport->roce.client = client;
 -			}
 +		break;
 +	case HNAE3_CLIENT_ROCE:
 +		if (hnae3_dev_roce_supported(hdev)) {
 +			hdev->roce_client = client;
 +			vport->roce.client = client;
 +		}
-			ret = hclge_init_roce_client_instance(ae_dev, vport);
 -			if (ret)
 -				goto clear_roce;
 +		ret = hclge_init_roce_client_instance(ae_dev, vport);
 +		if (ret)
 +			goto clear_roce;
-			break;
 -		default:
 -			return -EINVAL;
 -		}
 +		break;
 +	default:
 +		return -EINVAL;
    }
return 0;
@@@ -10919,27 -10347,32 +10918,27 @@@ static void hclge_uninit_client_instanc
    				 struct hnae3_ae_dev *ae_dev)
  {
    struct hclge_dev *hdev = ae_dev->priv;
 -	struct hclge_vport *vport;
 -	int i;
 +	struct hclge_vport *vport = &hdev->vport[0];
-	for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
 -		vport = &hdev->vport[i];
 -		if (hdev->roce_client) {
 -			clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
 -			while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
 -				msleep(HCLGE_WAIT_RESET_DONE);
 -
 -			hdev->roce_client->ops->uninit_instance(&vport->roce,
 -								0);
 -			hdev->roce_client = NULL;
 -			vport->roce.client = NULL;
 -		}
 -		if (client->type == HNAE3_CLIENT_ROCE)
 -			return;
 -		if (hdev->nic_client && client->ops->uninit_instance) {
 -			clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
 -			while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
 -				msleep(HCLGE_WAIT_RESET_DONE);
 -
 -			client->ops->uninit_instance(&vport->nic, 0);
 -			hdev->nic_client = NULL;
 -			vport->nic.client = NULL;
 -		}
 +	if (hdev->roce_client) {
 +		clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state);
 +		while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
 +			msleep(HCLGE_WAIT_RESET_DONE);
 +
 +		hdev->roce_client->ops->uninit_instance(&vport->roce, 0);
 +		hdev->roce_client = NULL;
 +		vport->roce.client = NULL;
 +	}
 +	if (client->type == HNAE3_CLIENT_ROCE)
 +		return;
 +	if (hdev->nic_client && client->ops->uninit_instance) {
 +		clear_bit(HCLGE_STATE_NIC_REGISTERED, &hdev->state);
 +		while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
 +			msleep(HCLGE_WAIT_RESET_DONE);
 +
 +		client->ops->uninit_instance(&vport->nic, 0);
 +		hdev->nic_client = NULL;
 +		vport->nic.client = NULL;
    }
  }
@@@ -11058,11 -10491,10 +11057,11 @@@ static void hclge_state_uninit(struct h
    	cancel_delayed_work_sync(&hdev->service_task);
  }
-static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev)
 +static void hclge_reset_prepare_general(struct hnae3_ae_dev *ae_dev,
 +					enum hnae3_reset_type rst_type)
  {
 -#define HCLGE_FLR_RETRY_WAIT_MS	500
 -#define HCLGE_FLR_RETRY_CNT	5
 +#define HCLGE_RESET_RETRY_WAIT_MS	500
 +#define HCLGE_RESET_RETRY_CNT	5
struct hclge_dev *hdev = ae_dev->priv;
    int retry_cnt = 0;
@@@ -11071,34 -10503,30 +11070,34 @@@
  retry:
    down(&hdev->reset_sem);
    set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
 -	hdev->reset_type = HNAE3_FLR_RESET;
 +	hdev->reset_type = rst_type;
    ret = hclge_reset_prepare(hdev);
    if (ret || hdev->reset_pending) {
 -		dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n",
 +		dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n",
    		ret);
    	if (hdev->reset_pending ||
 -		    retry_cnt++ < HCLGE_FLR_RETRY_CNT) {
 +		    retry_cnt++ < HCLGE_RESET_RETRY_CNT) {
    		dev_err(&hdev->pdev->dev,
    			"reset_pending:0x%lx, retry_cnt:%d\n",
    			hdev->reset_pending, retry_cnt);
    		clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
    		up(&hdev->reset_sem);
 -			msleep(HCLGE_FLR_RETRY_WAIT_MS);
 +			msleep(HCLGE_RESET_RETRY_WAIT_MS);
    		goto retry;
    	}
    }
-	/* disable misc vector before FLR done */
 +	/* disable misc vector before reset done */
    hclge_enable_vector(&hdev->misc_vector, false);
    set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 -	hdev->rst_stats.flr_rst_cnt++;
 +
 +	if (hdev->reset_type == HNAE3_FLR_RESET)
 +		hdev->rst_stats.flr_rst_cnt++;
 +	else if (hdev->reset_type == HNAE3_FUNC_RESET)
 +		hdev->rst_stats.pf_rst_cnt++;
  }
-static void hclge_flr_done(struct hnae3_ae_dev *ae_dev)
 +static void hclge_reset_done(struct hnae3_ae_dev *ae_dev)
  {
    struct hclge_dev *hdev = ae_dev->priv;
    int ret;
@@@ -11209,8 -10637,7 +11208,8 @@@ static int hclge_init_ae_dev(struct hna
    if (ret)
    	goto err_msi_irq_uninit;
-	if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER) {
 +	if (hdev->hw.mac.media_type == HNAE3_MEDIA_TYPE_COPPER &&
 +	    !hnae3_dev_phy_imp_supported(hdev)) {
    	ret = hclge_mac_mdio_config(hdev);
    	if (ret)
    		goto err_msi_irq_uninit;
@@@ -11603,13 -11030,6 +11602,13 @@@ static int hclge_reset_ae_dev(struct hn
    	return ret;
    }
+	ret = hclge_tp_port_init(hdev);
 +	if (ret) {
 +		dev_err(&pdev->dev, "failed to init tp port, ret = %d\n",
 +			ret);
 +		return ret;
 +	}
 +
    ret = hclge_config_tso(hdev, HCLGE_TSO_MSS_MIN, HCLGE_TSO_MSS_MAX);
    if (ret) {
    	dev_err(&pdev->dev, "Enable tso fail, ret =%d\n", ret);
@@@ -11700,7 -11120,6 +11699,7 @@@ static void hclge_uninit_ae_dev(struct 
    hclge_misc_affinity_teardown(hdev);
    hclge_state_uninit(hdev);
    hclge_uninit_mac_table(hdev);
 +	hclge_del_all_fd_entries(hdev);
if (mac->phydev)
    	mdiobus_unregister(mac->mdio_bus);
@@@ -11791,7 -11210,7 +11790,7 @@@ static int hclge_set_channels(struct hn
    if (ret)
    	return ret;
- 	/* RSS indirection table has been configuared by user */
+ 	/* RSS indirection table has been configured by user */
    if (rxfh_configured)
    	goto out;
@@@ -11960,6 -11379,7 +11959,6 @@@ static int hclge_get_64_bit_regs(struc
  #define REG_LEN_PER_LINE	(REG_NUM_PER_LINE * sizeof(u32))
  #define REG_SEPARATOR_LINE	1
  #define REG_NUM_REMAIN_MASK	3
 -#define BD_LIST_MAX_NUM		30
int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc)
  {
@@@ -12053,19 -11473,15 +12052,19 @@@ static int hclge_get_dfx_reg_len(struc
  {
    u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list);
    int data_len_per_desc, bd_num, i;
 -	int bd_num_list[BD_LIST_MAX_NUM];
 +	int *bd_num_list;
    u32 data_len;
    int ret;
+	bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL);
 +	if (!bd_num_list)
 +		return -ENOMEM;
 +
    ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num);
    if (ret) {
    	dev_err(&hdev->pdev->dev,
    		"Get dfx reg bd num fail, status is %d.\n", ret);
 -		return ret;
 +		goto out;
    }
data_len_per_desc = sizeof_field(struct hclge_desc, data);
@@@ -12076,8 -11492,6 +12075,8 @@@
    	*len += (data_len / REG_LEN_PER_LINE + 1) * REG_LEN_PER_LINE;
    }
+out:
 +	kfree(bd_num_list);
    return ret;
  }
@@@ -12085,20 -11499,16 +12084,20 @@@ static int hclge_get_dfx_reg(struct hcl
  {
    u32 dfx_reg_type_num = ARRAY_SIZE(hclge_dfx_bd_offset_list);
    int bd_num, bd_num_max, buf_len, i;
 -	int bd_num_list[BD_LIST_MAX_NUM];
    struct hclge_desc *desc_src;
 +	int *bd_num_list;
    u32 *reg = data;
    int ret;
+	bd_num_list = kcalloc(dfx_reg_type_num, sizeof(int), GFP_KERNEL);
 +	if (!bd_num_list)
 +		return -ENOMEM;
 +
    ret = hclge_get_dfx_reg_bd_num(hdev, bd_num_list, dfx_reg_type_num);
    if (ret) {
    	dev_err(&hdev->pdev->dev,
    		"Get dfx reg bd num fail, status is %d.\n", ret);
 -		return ret;
 +		goto out;
    }
bd_num_max = bd_num_list[0];
@@@ -12107,10 -11517,8 +12106,10 @@@
buf_len = sizeof(*desc_src) * bd_num_max;
    desc_src = kzalloc(buf_len, GFP_KERNEL);
 -	if (!desc_src)
 -		return -ENOMEM;
 +	if (!desc_src) {
 +		ret = -ENOMEM;
 +		goto out;
 +	}
for (i = 0; i < dfx_reg_type_num; i++) {
    	bd_num = bd_num_list[i];
@@@ -12126,8 -11534,6 +12125,8 @@@
    }
kfree(desc_src);
 +out:
 +	kfree(bd_num_list);
    return ret;
  }
@@@ -12471,8 -11877,8 +12470,8 @@@ static int hclge_get_module_eeprom(stru
  static const struct hnae3_ae_ops hclge_ops = {
    .init_ae_dev = hclge_init_ae_dev,
    .uninit_ae_dev = hclge_uninit_ae_dev,
 -	.flr_prepare = hclge_flr_prepare,
 -	.flr_done = hclge_flr_done,
 +	.reset_prepare = hclge_reset_prepare_general,
 +	.reset_done = hclge_reset_done,
    .init_client_instance = hclge_init_client_instance,
    .uninit_client_instance = hclge_uninit_client_instance,
    .map_ring_to_vector = hclge_map_ring_to_vector,
@@@ -12537,6 -11943,7 +12536,6 @@@
    .get_link_mode = hclge_get_link_mode,
    .add_fd_entry = hclge_add_fd_entry,
    .del_fd_entry = hclge_del_fd_entry,
 -	.del_all_fd_entries = hclge_del_all_fd_entries,
    .get_fd_rule_cnt = hclge_get_fd_rule_cnt,
    .get_fd_rule_info = hclge_get_fd_rule_info,
    .get_fd_all_rules = hclge_get_all_rules,
@@@ -12564,8 -11971,6 +12563,8 @@@
    .add_cls_flower = hclge_add_cls_flower,
    .del_cls_flower = hclge_del_cls_flower,
    .cls_flower_active = hclge_is_cls_flower_active,
 +	.get_phy_link_ksettings = hclge_get_phy_link_ksettings,
 +	.set_phy_link_ksettings = hclge_set_phy_link_ksettings,
  };
static struct hnae3_ae_algo ae_algo = {
diff --combined drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index c7d5c1726499,e295d359e912..07aa26ba0966
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@@ -497,6 -497,7 +497,6 @@@ void hclgevf_update_link_status(struct
link_state =
    	test_bit(HCLGEVF_STATE_DOWN, &hdev->state) ? 0 : link_state;
 -
    if (link_state != hdev->hw.mac.link) {
    	client->ops->link_status_change(handle, !!link_state);
    	if (rclient && rclient->ops->link_status_change)
@@@ -706,9 -707,6 +706,9 @@@ static int hclgevf_set_rss_tc_mode(stru
    		      (tc_valid[i] & 0x1));
    	hnae3_set_field(mode, HCLGEVF_RSS_TC_SIZE_M,
    			HCLGEVF_RSS_TC_SIZE_S, tc_size[i]);
 +		hnae3_set_bit(mode, HCLGEVF_RSS_TC_SIZE_MSB_B,
 +			      tc_size[i] >> HCLGEVF_RSS_TC_SIZE_MSB_OFFSET &
 +			      0x1);
    	hnae3_set_field(mode, HCLGEVF_RSS_TC_OFFSET_M,
    			HCLGEVF_RSS_TC_OFFSET_S, tc_offset[i]);
@@@ -1243,11 -1241,12 +1243,11 @@@ static void hclgevf_sync_promisc_mode(s
    }
  }
-static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id,
 -			      int stream_id, bool enable)
 +static int hclgevf_tqp_enable_cmd_send(struct hclgevf_dev *hdev, u16 tqp_id,
 +				       u16 stream_id, bool enable)
  {
    struct hclgevf_cfg_com_tqp_queue_cmd *req;
    struct hclgevf_desc desc;
 -	int status;
req = (struct hclgevf_cfg_com_tqp_queue_cmd *)desc.data;
@@@ -1258,22 -1257,12 +1258,22 @@@
    if (enable)
    	req->enable |= 1U << HCLGEVF_TQP_ENABLE_B;
-	status = hclgevf_cmd_send(&hdev->hw, &desc, 1);
 -	if (status)
 -		dev_err(&hdev->pdev->dev,
 -			"TQP enable fail, status =%d.\n", status);
 +	return hclgevf_cmd_send(&hdev->hw, &desc, 1);
 +}
-	return status;
 +static int hclgevf_tqp_enable(struct hnae3_handle *handle, bool enable)
 +{
 +	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 +	int ret;
 +	u16 i;
 +
 +	for (i = 0; i < handle->kinfo.num_tqps; i++) {
 +		ret = hclgevf_tqp_enable_cmd_send(hdev, i, 0, enable);
 +		if (ret)
 +			return ret;
 +	}
 +
 +	return 0;
  }
static void hclgevf_reset_tqp_stats(struct hnae3_handle *handle)
@@@ -1722,39 -1711,20 +1722,39 @@@ static int hclgevf_en_hw_strip_rxvtag(s
    return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
  }
-static int hclgevf_reset_tqp(struct hnae3_handle *handle, u16 queue_id)
 +static int hclgevf_reset_tqp(struct hnae3_handle *handle)
  {
 +#define HCLGEVF_RESET_ALL_QUEUE_DONE	1U
    struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
    struct hclge_vf_to_pf_msg send_msg;
 +	u8 return_status = 0;
    int ret;
 +	u16 i;
/* disable vf queue before send queue reset msg to PF */
 -	ret = hclgevf_tqp_enable(hdev, queue_id, 0, false);
 -	if (ret)
 +	ret = hclgevf_tqp_enable(handle, false);
 +	if (ret) {
 +		dev_err(&hdev->pdev->dev, "failed to disable tqp, ret = %d\n",
 +			ret);
    	return ret;
 +	}
hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0);
 -	memcpy(send_msg.data, &queue_id, sizeof(queue_id));
 -	return hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0);
 +
 +	ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, &return_status,
 +				   sizeof(return_status));
 +	if (ret || return_status == HCLGEVF_RESET_ALL_QUEUE_DONE)
 +		return ret;
 +
 +	for (i = 1; i < handle->kinfo.num_tqps; i++) {
 +		hclgevf_build_send_msg(&send_msg, HCLGE_MBX_QUEUE_RESET, 0);
 +		memcpy(send_msg.data, &i, sizeof(i));
 +		ret = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0);
 +		if (ret)
 +			return ret;
 +	}
 +
 +	return 0;
  }
static int hclgevf_set_mtu(struct hnae3_handle *handle, int new_mtu)
@@@ -2114,11 -2084,10 +2114,11 @@@ static void hclgevf_enable_vector(struc
    writel(en ? 1 : 0, vector->addr);
  }
-static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev)
 +static void hclgevf_reset_prepare_general(struct hnae3_ae_dev *ae_dev,
 +					  enum hnae3_reset_type rst_type)
  {
 -#define HCLGEVF_FLR_RETRY_WAIT_MS	500
 -#define HCLGEVF_FLR_RETRY_CNT		5
 +#define HCLGEVF_RESET_RETRY_WAIT_MS	500
 +#define HCLGEVF_RESET_RETRY_CNT		5
struct hclgevf_dev *hdev = ae_dev->priv;
    int retry_cnt = 0;
@@@ -2127,31 -2096,29 +2127,31 @@@
  retry:
    down(&hdev->reset_sem);
    set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
 -	hdev->reset_type = HNAE3_FLR_RESET;
 +	hdev->reset_type = rst_type;
    ret = hclgevf_reset_prepare(hdev);
    if (ret) {
 -		dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n",
 +		dev_err(&hdev->pdev->dev, "fail to prepare to reset, ret=%d\n",
    		ret);
    	if (hdev->reset_pending ||
 -		    retry_cnt++ < HCLGEVF_FLR_RETRY_CNT) {
 +		    retry_cnt++ < HCLGEVF_RESET_RETRY_CNT) {
    		dev_err(&hdev->pdev->dev,
    			"reset_pending:0x%lx, retry_cnt:%d\n",
    			hdev->reset_pending, retry_cnt);
    		clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
    		up(&hdev->reset_sem);
 -			msleep(HCLGEVF_FLR_RETRY_WAIT_MS);
 +			msleep(HCLGEVF_RESET_RETRY_WAIT_MS);
    		goto retry;
    	}
    }
-	/* disable misc vector before FLR done */
 +	/* disable misc vector before reset done */
    hclgevf_enable_vector(&hdev->misc_vector, false);
 -	hdev->rst_stats.flr_rst_cnt++;
 +
 +	if (hdev->reset_type == HNAE3_FLR_RESET)
 +		hdev->rst_stats.flr_rst_cnt++;
  }
-static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev)
 +static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev)
  {
    struct hclgevf_dev *hdev = ae_dev->priv;
    int ret;
@@@ -2226,7 -2193,7 +2226,7 @@@ static void hclgevf_reset_service_task(
if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
    		       &hdev->reset_state)) {
- 		/* PF has initmated that it is about to reset the hardware.
+ 		/* PF has intimated that it is about to reset the hardware.
    	 * We now have to poll & check if hardware has actually
    	 * completed the reset sequence. On hardware reset completion,
    	 * VF needs to reset the client and ae device.
@@@ -2389,6 -2356,7 +2389,6 @@@ static enum hclgevf_evt_cause hclgevf_c
    /* fetch the events from their corresponding regs */
    cmdq_stat_reg = hclgevf_read_dev(&hdev->hw,
    				 HCLGEVF_VECTOR0_CMDQ_STATE_REG);
 -
    if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) {
    	rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING);
    	dev_info(&hdev->pdev->dev,
@@@ -2656,25 -2624,28 +2656,25 @@@ static int hclgevf_ae_start(struct hnae
  {
    struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+ 	clear_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+ 
    hclgevf_reset_tqp_stats(handle);
hclgevf_request_link_info(hdev);
hclgevf_update_link_mode(hdev);
- 	clear_bit(HCLGEVF_STATE_DOWN, &hdev->state);
- 
    return 0;
  }
static void hclgevf_ae_stop(struct hnae3_handle *handle)
  {
    struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
 -	int i;
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
if (hdev->reset_type != HNAE3_VF_RESET)
 -		for (i = 0; i < handle->kinfo.num_tqps; i++)
 -			if (hclgevf_reset_tqp(handle, i))
 -				break;
 +		hclgevf_reset_tqp(handle);
hclgevf_reset_tqp_stats(handle);
    hclgevf_update_link_status(hdev, 0);
@@@ -3526,7 -3497,7 +3526,7 @@@ static int hclgevf_set_channels(struct 
    if (ret)
    	return ret;
- 	/* RSS indirection table has been configuared by user */
+ 	/* RSS indirection table has been configured by user */
    if (rxfh_configured)
    	goto out;
@@@ -3751,8 -3722,8 +3751,8 @@@ void hclgevf_update_port_base_vlan_info
  static const struct hnae3_ae_ops hclgevf_ops = {
    .init_ae_dev = hclgevf_init_ae_dev,
    .uninit_ae_dev = hclgevf_uninit_ae_dev,
 -	.flr_prepare = hclgevf_flr_prepare,
 -	.flr_done = hclgevf_flr_done,
 +	.reset_prepare = hclgevf_reset_prepare_general,
 +	.reset_done = hclgevf_reset_done,
    .init_client_instance = hclgevf_init_client_instance,
    .uninit_client_instance = hclgevf_uninit_client_instance,
    .start = hclgevf_ae_start,
diff --combined drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index e8230da29f05,d627b59ad446..291e61ac3e44
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@@ -578,6 -578,9 +578,9 @@@ static void i40e_dbg_dump_desc(int cnt
    case RING_TYPE_XDP:
    	ring = kmemdup(vsi->xdp_rings[ring_id], sizeof(*ring), GFP_KERNEL);
    	break;
+ 	default:
+ 		ring = NULL;
+ 		break;
    }
    if (!ring)
    	return;
@@@ -651,7 -654,7 +654,7 @@@ static void i40e_dbg_dump_vsi_no_seid(s
  }
/**
 - * i40e_dbg_dump_stats - handles dump stats write into command datum
 + * i40e_dbg_dump_eth_stats - handles dump stats write into command datum
   * @pf: the i40e_pf created in command write
   * @estats: the eth stats structure to be dumped
   **/
@@@ -1638,7 -1641,7 +1641,7 @@@ static const struct file_operations i40
  static char i40e_dbg_netdev_ops_buf[256] = "";
/**
 - * i40e_dbg_netdev_ops - read for netdev_ops datum
 + * i40e_dbg_netdev_ops_read - read for netdev_ops datum
   * @filp: the opened file
   * @buffer: where to write the data for the user to read
   * @count: the size of the user's buffer
diff --combined drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index c4c167650b6b,0e92668012e3..040a01400b85
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@@ -212,7 -212,7 +212,7 @@@ static void __i40e_add_stat_strings(u8 
  }
/**
 - * 40e_add_stat_strings - copy stat strings into ethtool buffer
 + * i40e_add_stat_strings - copy stat strings into ethtool buffer
   * @p: ethtool supplied buffer
   * @stats: stat definitions array
   *
@@@ -232,6 -232,8 +232,8 @@@
    I40E_STAT(struct i40e_vsi, _name, _stat)
  #define I40E_VEB_STAT(_name, _stat) \
    I40E_STAT(struct i40e_veb, _name, _stat)
+ #define I40E_VEB_TC_STAT(_name, _stat) \
+ 	I40E_STAT(struct i40e_cp_veb_tc_stats, _name, _stat)
  #define I40E_PFC_STAT(_name, _stat) \
    I40E_STAT(struct i40e_pfc_stats, _name, _stat)
  #define I40E_QUEUE_STAT(_name, _stat) \
@@@ -266,11 -268,18 +268,18 @@@ static const struct i40e_stats i40e_gst
    I40E_VEB_STAT("veb.rx_unknown_protocol", stats.rx_unknown_protocol),
  };
+ struct i40e_cp_veb_tc_stats {
+ 	u64 tc_rx_packets;
+ 	u64 tc_rx_bytes;
+ 	u64 tc_tx_packets;
+ 	u64 tc_tx_bytes;
+ };
+ 
  static const struct i40e_stats i40e_gstrings_veb_tc_stats[] = {
- 	I40E_VEB_STAT("veb.tc_%u_tx_packets", tc_stats.tc_tx_packets),
- 	I40E_VEB_STAT("veb.tc_%u_tx_bytes", tc_stats.tc_tx_bytes),
- 	I40E_VEB_STAT("veb.tc_%u_rx_packets", tc_stats.tc_rx_packets),
- 	I40E_VEB_STAT("veb.tc_%u_rx_bytes", tc_stats.tc_rx_bytes),
+ 	I40E_VEB_TC_STAT("veb.tc_%u_tx_packets", tc_tx_packets),
+ 	I40E_VEB_TC_STAT("veb.tc_%u_tx_bytes", tc_tx_bytes),
+ 	I40E_VEB_TC_STAT("veb.tc_%u_rx_packets", tc_rx_packets),
+ 	I40E_VEB_TC_STAT("veb.tc_%u_rx_bytes", tc_rx_bytes),
  };
static const struct i40e_stats i40e_gstrings_misc_stats[] = {
@@@ -1101,6 -1110,7 +1110,7 @@@ static int i40e_get_link_ksettings(stru
/* Set flow control settings */
    ethtool_link_ksettings_add_link_mode(ks, supported, Pause);
+ 	ethtool_link_ksettings_add_link_mode(ks, supported, Asym_Pause);
switch (hw->fc.requested_mode) {
    case I40E_FC_FULL:
@@@ -2216,6 -2226,29 +2226,29 @@@ static int i40e_get_sset_count(struct n
    }
  }
+ /**
+  * i40e_get_veb_tc_stats - copy VEB TC statistics to formatted structure
+  * @tc: the TC statistics in VEB structure (veb->tc_stats)
+  * @i: the index of traffic class in (veb->tc_stats) structure to copy
+  *
+  * Copy VEB TC statistics from structure of arrays (veb->tc_stats) to
+  * one dimensional structure i40e_cp_veb_tc_stats.
+  * Produce formatted i40e_cp_veb_tc_stats structure of the VEB TC
+  * statistics for the given TC.
+  **/
+ static struct i40e_cp_veb_tc_stats
+ i40e_get_veb_tc_stats(struct i40e_veb_tc_stats *tc, unsigned int i)
+ {
+ 	struct i40e_cp_veb_tc_stats veb_tc = {
+ 		.tc_rx_packets = tc->tc_rx_packets[i],
+ 		.tc_rx_bytes = tc->tc_rx_bytes[i],
+ 		.tc_tx_packets = tc->tc_tx_packets[i],
+ 		.tc_tx_bytes = tc->tc_tx_bytes[i],
+ 	};
+ 
+ 	return veb_tc;
+ }
+ 
  /**
   * i40e_get_pfc_stats - copy HW PFC statistics to formatted structure
   * @pf: the PF device structure
@@@ -2300,8 -2333,16 +2333,16 @@@ static void i40e_get_ethtool_stats(stru
    		       i40e_gstrings_veb_stats);
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
- 		i40e_add_ethtool_stats(&data, veb_stats ? veb : NULL,
- 				       i40e_gstrings_veb_tc_stats);
+ 		if (veb_stats) {
+ 			struct i40e_cp_veb_tc_stats veb_tc =
+ 				i40e_get_veb_tc_stats(&veb->tc_stats, i);
+ 
+ 			i40e_add_ethtool_stats(&data, &veb_tc,
+ 					       i40e_gstrings_veb_tc_stats);
+ 		} else {
+ 			i40e_add_ethtool_stats(&data, NULL,
+ 					       i40e_gstrings_veb_tc_stats);
+ 		}
i40e_add_ethtool_stats(&data, pf, i40e_gstrings_stats);
@@@ -2368,15 -2409,21 +2409,15 @@@ static void i40e_get_priv_flag_strings(
    struct i40e_netdev_priv *np = netdev_priv(netdev);
    struct i40e_vsi *vsi = np->vsi;
    struct i40e_pf *pf = vsi->back;
 -	char *p = (char *)data;
    unsigned int i;
 +	u8 *p = data;
-	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
 -		snprintf(p, ETH_GSTRING_LEN, "%s",
 -			 i40e_gstrings_priv_flags[i].flag_string);
 -		p += ETH_GSTRING_LEN;
 -	}
 +	for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++)
 +		ethtool_sprintf(&p, i40e_gstrings_priv_flags[i].flag_string);
    if (pf->hw.pf_id != 0)
    	return;
 -	for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++) {
 -		snprintf(p, ETH_GSTRING_LEN, "%s",
 -			 i40e_gl_gstrings_priv_flags[i].flag_string);
 -		p += ETH_GSTRING_LEN;
 -	}
 +	for (i = 0; i < I40E_GL_PRIV_FLAGS_STR_LEN; i++)
 +		ethtool_sprintf(&p, i40e_gl_gstrings_priv_flags[i].flag_string);
  }
static void i40e_get_strings(struct net_device *netdev, u32 stringset,
@@@ -5433,7 -5480,7 +5474,7 @@@ static int i40e_get_module_eeprom(struc
status = i40e_aq_get_phy_register(hw,
    			I40E_AQ_PHY_REG_ACCESS_EXTERNAL_MODULE,
- 				true, addr, offset, &value, NULL);
+ 				addr, true, offset, &value, NULL);
    	if (status)
    		return -EIO;
    	data[i] = value;
diff --combined drivers/net/ethernet/intel/i40e/i40e_main.c
index 1555d6009bf5,30ad7c08d0fb..9502e043a0b7
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@@ -2023,7 -2023,7 +2023,7 @@@ static void i40e_undo_add_filter_entrie
  }
/**
 - * i40e_next_entry - Get the next non-broadcast filter from a list
 + * i40e_next_filter - Get the next non-broadcast filter from a list
   * @next: pointer to filter in list
   *
   * Returns the next non-broadcast filter in the list. Required so that we
@@@ -2560,8 -2560,7 +2560,7 @@@ int i40e_sync_vsi_filters(struct i40e_v
    			 i40e_stat_str(hw, aq_ret),
    			 i40e_aq_str(hw, hw->aq.asq_last_status));
    	} else {
- 			dev_info(&pf->pdev->dev, "%s is %s allmulti mode.\n",
- 				 vsi->netdev->name,
+ 			dev_info(&pf->pdev->dev, "%s allmulti mode.\n",
    			 cur_multipromisc ? "entering" : "leaving");
    	}
    }
@@@ -5204,7 -5203,7 +5203,7 @@@ static u8 i40e_pf_get_num_tc(struct i40
  }
/**
 - * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes
 + * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes
   * @pf: PF being queried
   *
   * Return a bitmap for enabled traffic classes for this PF.
@@@ -6738,9 -6737,9 +6737,9 @@@ out
    		set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
    		set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
    	}
- 	/* registers are set, lets apply */
- 	if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB)
- 		ret = i40e_hw_set_dcb_config(pf, new_cfg);
+ 		/* registers are set, lets apply */
+ 		if (pf->hw_features & I40E_HW_USE_SET_LLDP_MIB)
+ 			ret = i40e_hw_set_dcb_config(pf, new_cfg);
    }
err:
@@@ -7339,7 -7338,7 +7338,7 @@@ static void i40e_vsi_set_default_tc_con
    qcount = min_t(int, vsi->alloc_queue_pairs,
    	       i40e_pf_get_max_q_per_tc(vsi->back));
    for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
 -		/* For the TC that is not enabled set the offset to to default
 +		/* For the TC that is not enabled set the offset to default
    	 * queue and allocate one queue for the given TC.
    	 */
    	vsi->tc_config.tc_info[i].qoffset = 0;
@@@ -9467,7 -9466,7 +9466,7 @@@ static void i40e_fdir_flush_and_replay(
  }
/**
 - * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed
 + * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed
   * @pf: board private structure
   **/
  u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
@@@ -10573,12 -10572,6 +10572,6 @@@ static void i40e_rebuild(struct i40e_p
    	goto end_core_reset;
    }
- 	if (!lock_acquired)
- 		rtnl_lock();
- 	ret = i40e_setup_pf_switch(pf, reinit);
- 	if (ret)
- 		goto end_unlock;
- 
  #ifdef CONFIG_I40E_DCB
    /* Enable FW to write a default DCB config on link-up
     * unless I40E_FLAG_TC_MQPRIO was enabled or DCB
@@@ -10593,7 -10586,7 +10586,7 @@@
    		i40e_aq_set_dcb_parameters(hw, false, NULL);
    		dev_warn(&pf->pdev->dev,
    			 "DCB is not supported for X710-T*L 2.5/5G speeds\n");
- 				 pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
+ 			pf->flags &= ~I40E_FLAG_DCB_CAPABLE;
    	} else {
    		i40e_aq_set_dcb_parameters(hw, true, NULL);
    		ret = i40e_init_pf_dcb(pf);
@@@ -10607,6 -10600,11 +10600,11 @@@
    }
#endif /* CONFIG_I40E_DCB */
+ 	if (!lock_acquired)
+ 		rtnl_lock();
+ 	ret = i40e_setup_pf_switch(pf, reinit);
+ 	if (ret)
+ 		goto end_unlock;
/* The driver only wants link up/down and module qualification
     * reports from firmware.  Note the negative logic.
@@@ -10625,7 -10623,7 +10623,7 @@@
     * need to rebuild the switch model in the HW.
     *
     * If there were VEBs but the reconstitution failed, we'll try
 -	 * try to recover minimal use by getting the basic PF VSI working.
 +	 * to recover minimal use by getting the basic PF VSI working.
     */
    if (vsi->uplink_seid != pf->mac_seid) {
    	dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
@@@ -15140,12 -15138,16 +15138,16 @@@ static int i40e_init_recovery_mode(stru
     * in order to register the netdev
     */
    v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN);
- 	if (v_idx < 0)
+ 	if (v_idx < 0) {
+ 		err = v_idx;
    	goto err_switch_setup;
+ 	}
    pf->lan_vsi = v_idx;
    vsi = pf->vsi[v_idx];
- 	if (!vsi)
+ 	if (!vsi) {
+ 		err = -EFAULT;
    	goto err_switch_setup;
+ 	}
    vsi->alloc_queue_pairs = 1;
    err = i40e_config_netdev(vsi);
    if (err)
diff --combined drivers/net/ethernet/intel/i40e/i40e_txrx.c
index fc20afc23bfa,06b4271219b1..121cd99fdeff
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@@ -2295,8 -2295,7 +2295,7 @@@ int i40e_xmit_xdp_tx_ring(struct xdp_bu
   * @rx_ring: Rx ring being processed
   * @xdp: XDP buffer containing the frame
   **/
- static struct sk_buff *i40e_run_xdp(struct i40e_ring *rx_ring,
- 				    struct xdp_buff *xdp)
+ static int i40e_run_xdp(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
  {
    int err, result = I40E_XDP_PASS;
    struct i40e_ring *xdp_ring;
@@@ -2335,7 -2334,7 +2334,7 @@@
    }
  xdp_out:
    rcu_read_unlock();
- 	return ERR_PTR(-result);
+ 	return result;
  }
/**
@@@ -2448,6 -2447,7 +2447,7 @@@ static int i40e_clean_rx_irq(struct i40
    unsigned int xdp_xmit = 0;
    bool failure = false;
    struct xdp_buff xdp;
+ 	int xdp_res = 0;
#if (PAGE_SIZE < 8192)
    frame_sz = i40e_rx_frame_truesize(rx_ring, 0);
@@@ -2513,12 -2513,10 +2513,10 @@@
    		/* At larger PAGE_SIZE, frame_sz depend on len size */
    		xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
  #endif
- 			skb = i40e_run_xdp(rx_ring, &xdp);
+ 			xdp_res = i40e_run_xdp(rx_ring, &xdp);
    	}
- 		if (IS_ERR(skb)) {
- 			unsigned int xdp_res = -PTR_ERR(skb);
- 
+ 		if (xdp_res) {
    		if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
    			xdp_xmit |= xdp_res;
    			i40e_rx_buffer_flip(rx_ring, rx_buffer, size);
@@@ -3333,7 -3331,7 +3331,7 @@@ static int i40e_tx_enable_csum(struct s
  }
/**
 - * i40e_create_tx_ctx Build the Tx context descriptor
 + * i40e_create_tx_ctx - Build the Tx context descriptor
   * @tx_ring:  ring to create the descriptor on
   * @cd_type_cmd_tso_mss: Quad Word 1
   * @cd_tunneling: Quad Word 0 - bits 0-31
@@@ -3835,8 -3833,8 +3833,8 @@@ netdev_tx_t i40e_lan_xmit_frame(struct 
   * @frames: array of XDP buffer pointers
   * @flags: XDP extra info
   *
 - * Returns number of frames successfully sent. Frames that fail are
 - * free'ed via XDP return API.
 + * Returns number of frames successfully sent. Failed frames
 + * will be free'ed by XDP core.
   *
   * For error cases, a negative errno code is returned and no-frames
   * are transmitted (caller must handle freeing frames).
@@@ -3849,7 -3847,7 +3847,7 @@@ int i40e_xdp_xmit(struct net_device *de
    struct i40e_vsi *vsi = np->vsi;
    struct i40e_pf *pf = vsi->back;
    struct i40e_ring *xdp_ring;
 -	int drops = 0;
 +	int nxmit = 0;
    int i;
if (test_bit(__I40E_VSI_DOWN, vsi->state))
@@@ -3869,13 -3867,14 +3867,13 @@@
    	int err;
err = i40e_xmit_xdp_ring(xdpf, xdp_ring);
 -		if (err != I40E_XDP_TX) {
 -			xdp_return_frame_rx_napi(xdpf);
 -			drops++;
 -		}
 +		if (err != I40E_XDP_TX)
 +			break;
 +		nxmit++;
    }
if (unlikely(flags & XDP_XMIT_FLUSH))
    	i40e_xdp_ring_update_tail(xdp_ring);
-	return n - drops;
 +	return nxmit;
  }
diff --combined drivers/net/ethernet/intel/i40e/i40e_xsk.c
index d89c22347d9d,12ca84113587..46d884417c63
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@@ -160,13 -160,6 +160,13 @@@ static int i40e_run_xdp_zc(struct i40e_
    xdp_prog = READ_ONCE(rx_ring->xdp_prog);
    act = bpf_prog_run_xdp(xdp_prog, xdp);
+	if (likely(act == XDP_REDIRECT)) {
 +		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
 +		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
 +		rcu_read_unlock();
 +		return result;
 +	}
 +
    switch (act) {
    case XDP_PASS:
    	break;
@@@ -174,6 -167,10 +174,6 @@@
    	xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->queue_index];
    	result = i40e_xmit_xdp_tx_ring(xdp, xdp_ring);
    	break;
 -	case XDP_REDIRECT:
 -		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
 -		result = !err ? I40E_XDP_REDIR : I40E_XDP_CONSUMED;
 -		break;
    default:
    	bpf_warn_invalid_xdp_action(act);
    	fallthrough;
@@@ -474,7 -471,7 +474,7 @@@ static bool i40e_xmit_zc(struct i40e_ri
nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget);
    if (!nb_pkts)
- 		return false;
+ 		return true;
if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
    	nb_processed = xdp_ring->count - xdp_ring->next_to_use;
@@@ -491,7 -488,7 +491,7 @@@
i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);
- 	return true;
+ 	return nb_pkts < budget;
  }
/**
@@@ -628,7 -625,7 +628,7 @@@ void i40e_xsk_clean_rx_ring(struct i40e
  }
/**
 - * i40e_xsk_clean_xdp_ring - Clean the XDP Tx ring on shutdown
 + * i40e_xsk_clean_tx_ring - Clean the XDP Tx ring on shutdown
   * @tx_ring: XDP Tx ring
   **/
  void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring)
diff --combined drivers/net/ethernet/intel/ice/ice.h
index 721afa0f0a88,17101c45cbcd..07777ac4f098
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@@ -73,7 -73,7 +73,7 @@@
  #define ICE_MIN_LAN_TXRX_MSIX	1
  #define ICE_MIN_LAN_OICR_MSIX	1
  #define ICE_MIN_MSIX		(ICE_MIN_LAN_TXRX_MSIX + ICE_MIN_LAN_OICR_MSIX)
 -#define ICE_FDIR_MSIX		1
 +#define ICE_FDIR_MSIX		2
  #define ICE_NO_VSI		0xffff
  #define ICE_VSI_MAP_CONTIG	0
  #define ICE_VSI_MAP_SCATTER	1
@@@ -84,12 -84,9 +84,12 @@@
  #define ICE_MAX_LG_RSS_QS	256
  #define ICE_RES_VALID_BIT	0x8000
  #define ICE_RES_MISC_VEC_ID	(ICE_RES_VALID_BIT - 1)
 +/* All VF control VSIs share the same IRQ, so assign a unique ID for them */
 +#define ICE_RES_VF_CTRL_VEC_ID	(ICE_RES_MISC_VEC_ID - 1)
  #define ICE_INVAL_Q_INDEX	0xffff
  #define ICE_INVAL_VFID		256
+#define ICE_MAX_RXQS_PER_TC		256	/* Used when setting VSI context per TC Rx queues */
  #define ICE_MAX_RESET_WAIT		20
#define ICE_VSIQF_HKEY_ARRAY_SIZE	((VSIQF_HKEY_MAX_INDEX + 1) *	4)
@@@ -193,13 -190,12 +193,12 @@@ struct ice_sw 
    u8 dflt_vsi_ena:1;	/* true if above dflt_vsi is enabled */
  };
-enum ice_state {
 +enum ice_pf_state {
    __ICE_TESTING,
    __ICE_DOWN,
    __ICE_NEEDS_RESTART,
    __ICE_PREPARED_FOR_RESET,	/* set by driver when prepared */
    __ICE_RESET_OICR_RECV,		/* set by driver after rcv reset OICR */
- 	__ICE_DCBNL_DEVRESET,		/* set by dcbnl devreset */
    __ICE_PFR_REQ,			/* set by driver and peers */
    __ICE_CORER_REQ,		/* set by driver and peers */
    __ICE_GLOBR_REQ,		/* set by driver and peers */
@@@ -232,18 -228,15 +231,18 @@@
    __ICE_VF_RESETS_DISABLED,	/* disable resets during ice_remove */
    __ICE_LINK_DEFAULT_OVERRIDE_PENDING,
    __ICE_PHY_INIT_COMPLETE,
 +	__ICE_FD_VF_FLUSH_CTX,		/* set at FD Rx IRQ or timeout */
    __ICE_STATE_NBITS		/* must be last */
  };
-enum ice_vsi_flags {
 -	ICE_VSI_FLAG_UMAC_FLTR_CHANGED,
 -	ICE_VSI_FLAG_MMAC_FLTR_CHANGED,
 -	ICE_VSI_FLAG_VLAN_FLTR_CHANGED,
 -	ICE_VSI_FLAG_PROMISC_CHANGED,
 -	ICE_VSI_FLAG_NBITS		/* must be last */
 +enum ice_vsi_state {
 +	ICE_VSI_DOWN,
 +	ICE_VSI_NEEDS_RESTART,
 +	ICE_VSI_UMAC_FLTR_CHANGED,
 +	ICE_VSI_MMAC_FLTR_CHANGED,
 +	ICE_VSI_VLAN_FLTR_CHANGED,
 +	ICE_VSI_PROMISC_CHANGED,
 +	ICE_VSI_STATE_NBITS		/* must be last */
  };
/* struct that defines a VSI, associated with a dev */
@@@ -259,12 -252,14 +258,12 @@@ struct ice_vsi 
    irqreturn_t (*irq_handler)(int irq, void *data);
u64 tx_linearize;
 -	DECLARE_BITMAP(state, __ICE_STATE_NBITS);
 -	DECLARE_BITMAP(flags, ICE_VSI_FLAG_NBITS);
 +	DECLARE_BITMAP(state, ICE_VSI_STATE_NBITS);
    unsigned int current_netdev_flags;
    u32 tx_restart;
    u32 tx_busy;
    u32 rx_buf_failed;
    u32 rx_page_failed;
 -	u32 rx_gro_dropped;
    u16 num_q_vectors;
    u16 base_vector;		/* IRQ base for OS reserved vectors */
    enum ice_vsi_type type;
@@@ -504,7 -499,7 +503,7 @@@ ice_irq_dynamic_ena(struct ice_hw *hw, 
    val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M |
          (itr << GLINT_DYN_CTL_ITR_INDX_S);
    if (vsi)
 -		if (test_bit(__ICE_DOWN, vsi->state))
 +		if (test_bit(ICE_VSI_DOWN, vsi->state))
    		return;
    wr32(hw, GLINT_DYN_CTL(vector), val);
  }
@@@ -621,16 -616,14 +620,16 @@@ int ice_destroy_xdp_rings(struct ice_vs
  int
  ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
         u32 flags);
 -int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 -int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 +int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
 +int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size);
 +int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed);
 +int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed);
  void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
  int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
  void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
  const char *ice_stat_str(enum ice_status stat_err);
  const char *ice_aq_str(enum ice_aq_err aq_err);
- bool ice_is_wol_supported(struct ice_pf *pf);
+ bool ice_is_wol_supported(struct ice_hw *hw);
  int
  ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
    	    bool is_tun);
@@@ -648,6 -641,7 +647,7 @@@ int ice_fdir_create_dflt_rules(struct i
  int ice_aq_wait_for_event(struct ice_pf *pf, u16 opcode, unsigned long timeout,
    		  struct ice_rq_event_info *event);
  int ice_open(struct net_device *netdev);
+ int ice_open_internal(struct net_device *netdev);
  int ice_stop(struct net_device *netdev);
  void ice_service_task_schedule(struct ice_pf *pf);
diff --combined drivers/net/ethernet/intel/ice/ice_common.c
index b13a630ea1b7,a20edf1538a0..e93b1e40f627
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@@ -158,10 -158,6 +158,10 @@@ ice_aq_get_phy_caps(struct ice_port_inf
    	return ICE_ERR_PARAM;
    hw = pi->hw;
+	if (report_mode == ICE_AQC_REPORT_DFLT_CFG &&
 +	    !ice_fw_supports_report_dflt_cfg(hw))
 +		return ICE_ERR_PARAM;
 +
    ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_phy_caps);
if (qual_mods)
@@@ -195,7 -191,7 +195,7 @@@
    ice_debug(hw, ICE_DBG_LINK, "   module_type[2] = 0x%x\n",
    	  pcaps->module_type[2]);
-	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP) {
 +	if (!status && report_mode == ICE_AQC_REPORT_TOPO_CAP_MEDIA) {
    	pi->phy.phy_type_low = le64_to_cpu(pcaps->phy_type_low);
    	pi->phy.phy_type_high = le64_to_cpu(pcaps->phy_type_high);
    	memcpy(pi->phy.link_info.module_type, &pcaps->module_type,
@@@ -721,8 -717,8 +721,8 @@@ static enum ice_status ice_cfg_fw_log(s
if (!data) {
    			data = devm_kcalloc(ice_hw_to_dev(hw),
- 						    sizeof(*data),
    					    ICE_AQC_FW_LOG_ID_MAX,
+ 						    sizeof(*data),
    					    GFP_KERNEL);
    			if (!data)
    				return ICE_ERR_NO_MEMORY;
@@@ -926,8 -922,7 +926,8 @@@ enum ice_status ice_init_hw(struct ice_
/* Initialize port_info struct with PHY capabilities */
    status = ice_aq_get_phy_caps(hw->port_info, false,
 -				     ICE_AQC_REPORT_TOPO_CAP, pcaps, NULL);
 +				     ICE_AQC_REPORT_TOPO_CAP_MEDIA, pcaps,
 +				     NULL);
    devm_kfree(ice_hw_to_dev(hw), pcaps);
    if (status)
    	dev_warn(ice_hw_to_dev(hw), "Get PHY capabilities failed status = %d, continuing anyway\n",
@@@ -1297,85 -1292,6 +1297,85 @@@ const struct ice_ctx_ele ice_tlan_ctx_i
   */
  DEFINE_MUTEX(ice_global_cfg_lock_sw);
+/**
 + * ice_should_retry_sq_send_cmd
 + * @opcode: AQ opcode
 + *
 + * Decide if we should retry the send command routine for the ATQ, depending
 + * on the opcode.
 + */
 +static bool ice_should_retry_sq_send_cmd(u16 opcode)
 +{
 +	switch (opcode) {
 +	case ice_aqc_opc_get_link_topo:
 +	case ice_aqc_opc_lldp_stop:
 +	case ice_aqc_opc_lldp_start:
 +	case ice_aqc_opc_lldp_filter_ctrl:
 +		return true;
 +	}
 +
 +	return false;
 +}
 +
 +/**
 + * ice_sq_send_cmd_retry - send command to Control Queue (ATQ)
 + * @hw: pointer to the HW struct
 + * @cq: pointer to the specific Control queue
 + * @desc: prefilled descriptor describing the command
 + * @buf: buffer to use for indirect commands (or NULL for direct commands)
 + * @buf_size: size of buffer for indirect commands (or 0 for direct commands)
 + * @cd: pointer to command details structure
 + *
 + * Retry sending the FW Admin Queue command, multiple times, to the FW Admin
 + * Queue if the EBUSY AQ error is returned.
 + */
 +static enum ice_status
 +ice_sq_send_cmd_retry(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 +		      struct ice_aq_desc *desc, void *buf, u16 buf_size,
 +		      struct ice_sq_cd *cd)
 +{
 +	struct ice_aq_desc desc_cpy;
 +	enum ice_status status;
 +	bool is_cmd_for_retry;
 +	u8 *buf_cpy = NULL;
 +	u8 idx = 0;
 +	u16 opcode;
 +
 +	opcode = le16_to_cpu(desc->opcode);
 +	is_cmd_for_retry = ice_should_retry_sq_send_cmd(opcode);
 +	memset(&desc_cpy, 0, sizeof(desc_cpy));
 +
 +	if (is_cmd_for_retry) {
 +		if (buf) {
 +			buf_cpy = kzalloc(buf_size, GFP_KERNEL);
 +			if (!buf_cpy)
 +				return ICE_ERR_NO_MEMORY;
 +		}
 +
 +		memcpy(&desc_cpy, desc, sizeof(desc_cpy));
 +	}
 +
 +	do {
 +		status = ice_sq_send_cmd(hw, cq, desc, buf, buf_size, cd);
 +
 +		if (!is_cmd_for_retry || !status ||
 +		    hw->adminq.sq_last_status != ICE_AQ_RC_EBUSY)
 +			break;
 +
 +		if (buf_cpy)
 +			memcpy(buf, buf_cpy, buf_size);
 +
 +		memcpy(desc, &desc_cpy, sizeof(desc_cpy));
 +
 +		mdelay(ICE_SQ_SEND_DELAY_TIME_MS);
 +
 +	} while (++idx < ICE_SQ_SEND_MAX_EXECUTE);
 +
 +	kfree(buf_cpy);
 +
 +	return status;
 +}
 +
  /**
   * ice_aq_send_cmd - send FW Admin Queue command to FW Admin Queue
   * @hw: pointer to the HW struct
@@@ -1417,7 -1333,7 +1417,7 @@@ ice_aq_send_cmd(struct ice_hw *hw, stru
    	break;
    }
-	status = ice_sq_send_cmd(hw, &hw->adminq, desc, buf, buf_size, cd);
 +	status = ice_sq_send_cmd_retry(hw, &hw->adminq, desc, buf, buf_size, cd);
    if (lock_acquired)
    	mutex_unlock(&ice_global_cfg_lock_sw);
@@@ -2739,7 -2655,7 +2739,7 @@@ enum ice_status ice_update_link_info(st
    	if (!pcaps)
    		return ICE_ERR_NO_MEMORY;
-		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
 +		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
    				     pcaps, NULL);
devm_kfree(ice_hw_to_dev(hw), pcaps);
@@@ -2899,8 -2815,8 +2899,8 @@@ ice_set_fc(struct ice_port_info *pi, u
    	return ICE_ERR_NO_MEMORY;
/* Get the current PHY config */
 -	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
 -				     NULL);
 +	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG,
 +				     pcaps, NULL);
    if (status) {
    	*aq_failures = ICE_SET_FC_AQ_FAIL_GET;
    	goto out;
@@@ -3013,6 -2929,17 +3013,6 @@@ ice_copy_phy_caps_to_cfg(struct ice_por
    cfg->link_fec_opt = caps->link_fec_options;
    cfg->module_compliance_enforcement =
    	caps->module_compliance_enforcement;
 -
 -	if (ice_fw_supports_link_override(pi->hw)) {
 -		struct ice_link_default_override_tlv tlv;
 -
 -		if (ice_get_link_default_override(&tlv, pi))
 -			return;
 -
 -		if (tlv.options & ICE_LINK_OVERRIDE_STRICT_MODE)
 -			cfg->module_compliance_enforcement |=
 -				ICE_LINK_OVERRIDE_STRICT_MODE;
 -	}
  }
/**
@@@ -3027,21 -2954,16 +3027,21 @@@ ice_cfg_phy_fec(struct ice_port_info *p
  {
    struct ice_aqc_get_phy_caps_data *pcaps;
    enum ice_status status;
 +	struct ice_hw *hw;
if (!pi || !cfg)
    	return ICE_ERR_BAD_PTR;
+	hw = pi->hw;
 +
    pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
    if (!pcaps)
    	return ICE_ERR_NO_MEMORY;
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
 -				     NULL);
 +	status = ice_aq_get_phy_caps(pi, false,
 +				     (ice_fw_supports_report_dflt_cfg(hw) ?
 +				      ICE_AQC_REPORT_DFLT_CFG :
 +				      ICE_AQC_REPORT_TOPO_CAP_MEDIA), pcaps, NULL);
    if (status)
    	goto out;
@@@ -3080,8 -3002,7 +3080,8 @@@
    	break;
    }
-	if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(pi->hw)) {
 +	if (fec == ICE_FEC_AUTO && ice_fw_supports_link_override(hw) &&
 +	    !ice_fw_supports_report_dflt_cfg(hw)) {
    	struct ice_link_default_override_tlv tlv;
if (ice_get_link_default_override(&tlv, pi))
@@@ -3265,7 -3186,7 +3265,7 @@@ ice_aq_sff_eeprom(struct ice_hw *hw, u1
ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_sff_eeprom);
    cmd = &desc.params.read_write_sff_param;
 -	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD | ICE_AQ_FLAG_BUF);
 +	desc.flags = cpu_to_le16(ICE_AQ_FLAG_RD);
    cmd->lport_num = (u8)(lport & 0xff);
    cmd->lport_num_valid = (u8)((lport >> 8) & 0x01);
    cmd->i2c_bus_addr = cpu_to_le16(((bus_addr >> 1) &
@@@ -3285,33 -3206,23 +3285,33 @@@
  /**
   * __ice_aq_get_set_rss_lut
   * @hw: pointer to the hardware structure
 - * @vsi_id: VSI FW index
 - * @lut_type: LUT table type
 - * @lut: pointer to the LUT buffer provided by the caller
 - * @lut_size: size of the LUT buffer
 - * @glob_lut_idx: global LUT index
 + * @params: RSS LUT parameters
   * @set: set true to set the table, false to get the table
   *
   * Internal function to get (0x0B05) or set (0x0B03) RSS look up table
   */
  static enum ice_status
 -__ice_aq_get_set_rss_lut(struct ice_hw *hw, u16 vsi_id, u8 lut_type, u8 *lut,
 -			 u16 lut_size, u8 glob_lut_idx, bool set)
 +__ice_aq_get_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *params, bool set)
  {
 +	u16 flags = 0, vsi_id, lut_type, lut_size, glob_lut_idx, vsi_handle;
    struct ice_aqc_get_set_rss_lut *cmd_resp;
    struct ice_aq_desc desc;
    enum ice_status status;
 -	u16 flags = 0;
 +	u8 *lut;
 +
 +	if (!params)
 +		return ICE_ERR_PARAM;
 +
 +	vsi_handle = params->vsi_handle;
 +	lut = params->lut;
 +
 +	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
 +		return ICE_ERR_PARAM;
 +
 +	lut_size = params->lut_size;
 +	lut_type = params->lut_type;
 +	glob_lut_idx = params->global_lut_id;
 +	vsi_id = ice_get_hw_vsi_num(hw, vsi_handle);
cmd_resp = &desc.params.get_set_rss_lut;
@@@ -3385,27 -3296,43 +3385,27 @@@ ice_aq_get_set_rss_lut_exit
  /**
   * ice_aq_get_rss_lut
   * @hw: pointer to the hardware structure
 - * @vsi_handle: software VSI handle
 - * @lut_type: LUT table type
 - * @lut: pointer to the LUT buffer provided by the caller
 - * @lut_size: size of the LUT buffer
 + * @get_params: RSS LUT parameters used to specify which RSS LUT to get
   *
   * get the RSS lookup table, PF or VSI type
   */
  enum ice_status
 -ice_aq_get_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type,
 -		   u8 *lut, u16 lut_size)
 +ice_aq_get_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *get_params)
  {
 -	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
 -		return ICE_ERR_PARAM;
 -
 -	return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle),
 -					lut_type, lut, lut_size, 0, false);
 +	return __ice_aq_get_set_rss_lut(hw, get_params, false);
  }
/**
   * ice_aq_set_rss_lut
   * @hw: pointer to the hardware structure
 - * @vsi_handle: software VSI handle
 - * @lut_type: LUT table type
 - * @lut: pointer to the LUT buffer provided by the caller
 - * @lut_size: size of the LUT buffer
 + * @set_params: RSS LUT parameters used to specify how to set the RSS LUT
   *
   * set the RSS lookup table, PF or VSI type
   */
  enum ice_status
 -ice_aq_set_rss_lut(struct ice_hw *hw, u16 vsi_handle, u8 lut_type,
 -		   u8 *lut, u16 lut_size)
 +ice_aq_set_rss_lut(struct ice_hw *hw, struct ice_aq_get_set_rss_lut_params *set_params)
  {
 -	if (!ice_is_vsi_valid(hw, vsi_handle) || !lut)
 -		return ICE_ERR_PARAM;
 -
 -	return __ice_aq_get_set_rss_lut(hw, ice_get_hw_vsi_num(hw, vsi_handle),
 -					lut_type, lut, lut_size, 0, true);
 +	return __ice_aq_get_set_rss_lut(hw, set_params, true);
  }
/**
@@@ -4446,7 -4373,7 +4446,7 @@@ ice_aq_set_lldp_mib(struct ice_hw *hw, 
  }
/**
 - * ice_fw_supports_lldp_fltr - check NVM version supports lldp_fltr_ctrl
 + * ice_fw_supports_lldp_fltr_ctrl - check NVM version supports lldp_fltr_ctrl
   * @hw: pointer to HW struct
   */
  bool ice_fw_supports_lldp_fltr_ctrl(struct ice_hw *hw)
@@@ -4491,23 -4418,3 +4491,23 @@@ ice_lldp_fltr_add_remove(struct ice_hw
return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL);
  }
 +
 +/**
 + * ice_fw_supports_report_dflt_cfg
 + * @hw: pointer to the hardware structure
 + *
 + * Checks if the firmware supports report default configuration
 + */
 +bool ice_fw_supports_report_dflt_cfg(struct ice_hw *hw)
 +{
 +	if (hw->api_maj_ver == ICE_FW_API_REPORT_DFLT_CFG_MAJ) {
 +		if (hw->api_min_ver > ICE_FW_API_REPORT_DFLT_CFG_MIN)
 +			return true;
 +		if (hw->api_min_ver == ICE_FW_API_REPORT_DFLT_CFG_MIN &&
 +		    hw->api_patch >= ICE_FW_API_REPORT_DFLT_CFG_PATCH)
 +			return true;
 +	} else if (hw->api_maj_ver > ICE_FW_API_REPORT_DFLT_CFG_MAJ) {
 +		return true;
 +	}
 +	return false;
 +}
diff --combined drivers/net/ethernet/intel/ice/ice_controlq.h
index 7d0905f25ddc,68866f4f0eb0..77c2307d4fb8
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@@ -14,8 -14,8 +14,8 @@@
    (&(((struct ice_aq_desc *)((R).desc_buf.va))[i]))
#define ICE_CTL_Q_DESC_UNUSED(R) \
 -	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 -	      (R)->next_to_clean - (R)->next_to_use - 1)
 +	((u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
 +	       (R)->next_to_clean - (R)->next_to_use - 1))
/* Defines that help manage the driver vs FW API checks.
   * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
@@@ -31,8 -31,8 +31,8 @@@ enum ice_ctl_q 
    ICE_CTL_Q_MAILBOX,
  };
- /* Control Queue timeout settings - max delay 250ms */
- #define ICE_CTL_Q_SQ_CMD_TIMEOUT	2500  /* Count 2500 times */
+ /* Control Queue timeout settings - max delay 1s */
+ #define ICE_CTL_Q_SQ_CMD_TIMEOUT	10000 /* Count 10000 times */
  #define ICE_CTL_Q_SQ_CMD_USEC		100   /* Check every 100usec */
  #define ICE_CTL_Q_ADMIN_INIT_TIMEOUT	10    /* Count 10 times */
  #define ICE_CTL_Q_ADMIN_INIT_MSEC	100   /* Check every 100msec */
diff --combined drivers/net/ethernet/intel/ice/ice_dcb.c
index 85c9eccfdae8,211ac6f907ad..43c6af42de8a
--- a/drivers/net/ethernet/intel/ice/ice_dcb.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb.c
@@@ -738,22 -738,27 +738,27 @@@ ice_aq_get_cee_dcb_cfg(struct ice_hw *h
  /**
   * ice_cee_to_dcb_cfg
   * @cee_cfg: pointer to CEE configuration struct
-  * @dcbcfg: DCB configuration struct
+  * @pi: port information structure
   *
   * Convert CEE configuration from firmware to DCB configuration
   */
  static void
  ice_cee_to_dcb_cfg(struct ice_aqc_get_cee_dcb_cfg_resp *cee_cfg,
- 		   struct ice_dcbx_cfg *dcbcfg)
+ 		   struct ice_port_info *pi)
  {
    u32 status, tlv_status = le32_to_cpu(cee_cfg->tlv_status);
    u32 ice_aqc_cee_status_mask, ice_aqc_cee_status_shift;
+ 	u8 i, j, err, sync, oper, app_index, ice_app_sel_type;
    u16 app_prio = le16_to_cpu(cee_cfg->oper_app_prio);
- 	u8 i, err, sync, oper, app_index, ice_app_sel_type;
    u16 ice_aqc_cee_app_mask, ice_aqc_cee_app_shift;
+ 	struct ice_dcbx_cfg *cmp_dcbcfg, *dcbcfg;
    u16 ice_app_prot_id_type;
- 	/* CEE PG data to ETS config */
+ 	dcbcfg = &pi->qos_cfg.local_dcbx_cfg;
+ 	dcbcfg->dcbx_mode = ICE_DCBX_MODE_CEE;
+ 	dcbcfg->tlv_status = tlv_status;
+ 
+ 	/* CEE PG data */
    dcbcfg->etscfg.maxtcs = cee_cfg->oper_num_tc;
/* Note that the FW creates the oper_prio_tc nibbles reversed
@@@ -780,10 -785,16 +785,16 @@@
    	}
    }
- 	/* CEE PFC data to ETS config */
+ 	/* CEE PFC data */
    dcbcfg->pfc.pfcena = cee_cfg->oper_pfc_en;
    dcbcfg->pfc.pfccap = ICE_MAX_TRAFFIC_CLASS;
+ 	/* CEE APP TLV data */
+ 	if (dcbcfg->app_mode == ICE_DCBX_APPS_NON_WILLING)
+ 		cmp_dcbcfg = &pi->qos_cfg.desired_dcbx_cfg;
+ 	else
+ 		cmp_dcbcfg = &pi->qos_cfg.remote_dcbx_cfg;
+ 
    app_index = 0;
    for (i = 0; i < 3; i++) {
    	if (i == 0) {
@@@ -802,6 -813,18 +813,18 @@@
    		ice_aqc_cee_app_shift = ICE_AQC_CEE_APP_ISCSI_S;
    		ice_app_sel_type = ICE_APP_SEL_TCPIP;
    		ice_app_prot_id_type = ICE_APP_PROT_ID_ISCSI;
+ 
+ 			for (j = 0; j < cmp_dcbcfg->numapps; j++) {
+ 				u16 prot_id = cmp_dcbcfg->app[j].prot_id;
+ 				u8 sel = cmp_dcbcfg->app[j].selector;
+ 
+ 				if  (sel == ICE_APP_SEL_TCPIP &&
+ 				     (prot_id == ICE_APP_PROT_ID_ISCSI ||
+ 				      prot_id == ICE_APP_PROT_ID_ISCSI_860)) {
+ 					ice_app_prot_id_type = prot_id;
+ 					break;
+ 				}
+ 			}
    	} else {
    		/* FIP APP */
    		ice_aqc_cee_status_mask = ICE_AQC_CEE_FIP_STATUS_M;
@@@ -834,7 -857,7 +857,7 @@@
  }
/**
 - * ice_get_ieee_dcb_cfg
 + * ice_get_ieee_or_cee_dcb_cfg
   * @pi: port information structure
   * @dcbx_mode: mode of DCBX (IEEE or CEE)
   *
@@@ -892,11 -915,8 +915,8 @@@ enum ice_status ice_get_dcb_cfg(struct 
    ret = ice_aq_get_cee_dcb_cfg(pi->hw, &cee_cfg, NULL);
    if (!ret) {
    	/* CEE mode */
- 		dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
- 		dcbx_cfg->dcbx_mode = ICE_DCBX_MODE_CEE;
- 		dcbx_cfg->tlv_status = le32_to_cpu(cee_cfg.tlv_status);
- 		ice_cee_to_dcb_cfg(&cee_cfg, dcbx_cfg);
    	ret = ice_get_ieee_or_cee_dcb_cfg(pi, ICE_DCBX_MODE_CEE);
+ 		ice_cee_to_dcb_cfg(&cee_cfg, pi);
    } else if (pi->hw->adminq.sq_last_status == ICE_AQ_RC_ENOENT) {
    	/* CEE mode not enabled try querying IEEE data */
    	dcbx_cfg = &pi->qos_cfg.local_dcbx_cfg;
diff --combined drivers/net/ethernet/intel/ice/ice_ethtool.c
index 51d3a929ecfd,32ba71a16165..a39e890100d9
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@@ -60,6 -60,7 +60,6 @@@ static const struct ice_stats ice_gstri
    ICE_VSI_STAT("rx_unknown_protocol", eth_stats.rx_unknown_protocol),
    ICE_VSI_STAT("rx_alloc_fail", rx_buf_failed),
    ICE_VSI_STAT("rx_pg_alloc_fail", rx_page_failed),
 -	ICE_VSI_STAT("rx_gro_dropped", rx_gro_dropped),
    ICE_VSI_STAT("tx_errors", eth_stats.tx_errors),
    ICE_VSI_STAT("tx_linearize", tx_linearize),
    ICE_VSI_STAT("tx_busy", tx_busy),
@@@ -870,47 -871,68 +870,47 @@@ static void ice_get_strings(struct net_
  {
    struct ice_netdev_priv *np = netdev_priv(netdev);
    struct ice_vsi *vsi = np->vsi;
 -	char *p = (char *)data;
    unsigned int i;
 +	u8 *p = data;
switch (stringset) {
    case ETH_SS_STATS:
 -		for (i = 0; i < ICE_VSI_STATS_LEN; i++) {
 -			snprintf(p, ETH_GSTRING_LEN, "%s",
 -				 ice_gstrings_vsi_stats[i].stat_string);
 -			p += ETH_GSTRING_LEN;
 -		}
 +		for (i = 0; i < ICE_VSI_STATS_LEN; i++)
 +			ethtool_sprintf(&p,
 +					ice_gstrings_vsi_stats[i].stat_string);
ice_for_each_alloc_txq(vsi, i) {
 -			snprintf(p, ETH_GSTRING_LEN,
 -				 "tx_queue_%u_packets", i);
 -			p += ETH_GSTRING_LEN;
 -			snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_bytes", i);
 -			p += ETH_GSTRING_LEN;
 +			ethtool_sprintf(&p, "tx_queue_%u_packets", i);
 +			ethtool_sprintf(&p, "tx_queue_%u_bytes", i);
    	}
ice_for_each_alloc_rxq(vsi, i) {
 -			snprintf(p, ETH_GSTRING_LEN,
 -				 "rx_queue_%u_packets", i);
 -			p += ETH_GSTRING_LEN;
 -			snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_bytes", i);
 -			p += ETH_GSTRING_LEN;
 +			ethtool_sprintf(&p, "rx_queue_%u_packets", i);
 +			ethtool_sprintf(&p, "rx_queue_%u_bytes", i);
    	}
if (vsi->type != ICE_VSI_PF)
    		return;
-		for (i = 0; i < ICE_PF_STATS_LEN; i++) {
 -			snprintf(p, ETH_GSTRING_LEN, "%s",
 -				 ice_gstrings_pf_stats[i].stat_string);
 -			p += ETH_GSTRING_LEN;
 -		}
 +		for (i = 0; i < ICE_PF_STATS_LEN; i++)
 +			ethtool_sprintf(&p,
 +					ice_gstrings_pf_stats[i].stat_string);
for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
 -			snprintf(p, ETH_GSTRING_LEN,
 -				 "tx_priority_%u_xon.nic", i);
 -			p += ETH_GSTRING_LEN;
 -			snprintf(p, ETH_GSTRING_LEN,
 -				 "tx_priority_%u_xoff.nic", i);
 -			p += ETH_GSTRING_LEN;
 +			ethtool_sprintf(&p, "tx_priority_%u_xon.nic", i);
 +			ethtool_sprintf(&p, "tx_priority_%u_xoff.nic", i);
    	}
    	for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) {
 -			snprintf(p, ETH_GSTRING_LEN,
 -				 "rx_priority_%u_xon.nic", i);
 -			p += ETH_GSTRING_LEN;
 -			snprintf(p, ETH_GSTRING_LEN,
 -				 "rx_priority_%u_xoff.nic", i);
 -			p += ETH_GSTRING_LEN;
 +			ethtool_sprintf(&p, "rx_priority_%u_xon.nic", i);
 +			ethtool_sprintf(&p, "rx_priority_%u_xoff.nic", i);
    	}
    	break;
    case ETH_SS_TEST:
    	memcpy(data, ice_gstrings_test, ICE_TEST_LEN * ETH_GSTRING_LEN);
    	break;
    case ETH_SS_PRIV_FLAGS:
 -		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++) {
 -			snprintf(p, ETH_GSTRING_LEN, "%s",
 -				 ice_gstrings_priv_flags[i].name);
 -			p += ETH_GSTRING_LEN;
 -		}
 +		for (i = 0; i < ICE_PRIV_FLAG_ARRAY_SIZE; i++)
 +			ethtool_sprintf(&p, ice_gstrings_priv_flags[i].name);
    	break;
    default:
    	break;
@@@ -1059,7 -1081,7 +1059,7 @@@ ice_get_fecparam(struct net_device *net
    if (!caps)
    	return -ENOMEM;
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP,
 +	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
    			     caps, NULL);
    if (status) {
    	err = -EAGAIN;
@@@ -1094,15 -1116,24 +1094,15 @@@ static int ice_nway_reset(struct net_de
  {
    struct ice_netdev_priv *np = netdev_priv(netdev);
    struct ice_vsi *vsi = np->vsi;
 -	struct ice_port_info *pi;
 -	enum ice_status status;
 +	int err;
-	pi = vsi->port_info;
    /* If VSI state is up, then restart autoneg with link up */
    if (!test_bit(__ICE_DOWN, vsi->back->state))
 -		status = ice_aq_set_link_restart_an(pi, true, NULL);
 +		err = ice_set_link(vsi, true);
    else
 -		status = ice_aq_set_link_restart_an(pi, false, NULL);
 -
 -	if (status) {
 -		netdev_info(netdev, "link restart failed, err %s aq_err %s\n",
 -			    ice_stat_str(status),
 -			    ice_aq_str(pi->hw->adminq.sq_last_status));
 -		return -EIO;
 -	}
 +		err = ice_set_link(vsi, false);
-	return 0;
 +	return err;
  }
/**
@@@ -1444,8 -1475,8 +1444,8 @@@ void ice_mask_min_supported_speeds(u64 
    do {								     \
    	if (req_speeds & (aq_link_speed) ||			     \
    	    (!req_speeds &&					     \
 -		     (adv_phy_type_lo & phy_type_mask_lo ||		     \
 -		      adv_phy_type_hi & phy_type_mask_hi)))		     \
 +		     (advert_phy_type_lo & phy_type_mask_lo ||		     \
 +		      advert_phy_type_hi & phy_type_mask_hi)))		     \
    		ethtool_link_ksettings_add_link_mode(ks, advertising,\
    						ethtool_link_mode);  \
    } while (0)
@@@ -1462,10 -1493,10 +1462,10 @@@ ice_phy_type_to_ethtool(struct net_devi
    struct ice_netdev_priv *np = netdev_priv(netdev);
    struct ice_vsi *vsi = np->vsi;
    struct ice_pf *pf = vsi->back;
 +	u64 advert_phy_type_lo = 0;
 +	u64 advert_phy_type_hi = 0;
    u64 phy_type_mask_lo = 0;
    u64 phy_type_mask_hi = 0;
 -	u64 adv_phy_type_lo = 0;
 -	u64 adv_phy_type_hi = 0;
    u64 phy_types_high = 0;
    u64 phy_types_low = 0;
    u16 req_speeds;
@@@ -1483,35 -1514,28 +1483,35 @@@
     * requested by user.
     */
    if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags)) {
 -		struct ice_link_default_override_tlv *ldo;
 -
 -		ldo = &pf->link_dflt_override;
    	phy_types_low = le64_to_cpu(pf->nvm_phy_type_lo);
    	phy_types_high = le64_to_cpu(pf->nvm_phy_type_hi);
ice_mask_min_supported_speeds(phy_types_high, &phy_types_low);
 -
 -		/* If override enabled and PHY mask set, then
 -		 * Advertising link mode is the intersection of the PHY
 -		 * types without media and the override PHY mask.
 +		/* determine advertised modes based on link override only
 +		 * if it's supported and if the FW doesn't abstract the
 +		 * driver from having to account for link overrides
    	 */
 -		if (ldo->options & ICE_LINK_OVERRIDE_EN &&
 -		    (ldo->phy_type_low || ldo->phy_type_high)) {
 -			adv_phy_type_lo =
 -				le64_to_cpu(pf->nvm_phy_type_lo) &
 -				ldo->phy_type_low;
 -			adv_phy_type_hi =
 -				le64_to_cpu(pf->nvm_phy_type_hi) &
 -				ldo->phy_type_high;
 +		if (ice_fw_supports_link_override(&pf->hw) &&
 +		    !ice_fw_supports_report_dflt_cfg(&pf->hw)) {
 +			struct ice_link_default_override_tlv *ldo;
 +
 +			ldo = &pf->link_dflt_override;
 +			/* If override enabled and PHY mask set, then
 +			 * Advertising link mode is the intersection of the PHY
 +			 * types without media and the override PHY mask.
 +			 */
 +			if (ldo->options & ICE_LINK_OVERRIDE_EN &&
 +			    (ldo->phy_type_low || ldo->phy_type_high)) {
 +				advert_phy_type_lo =
 +					le64_to_cpu(pf->nvm_phy_type_lo) &
 +					ldo->phy_type_low;
 +				advert_phy_type_hi =
 +					le64_to_cpu(pf->nvm_phy_type_hi) &
 +					ldo->phy_type_high;
 +			}
    	}
    } else {
 +		/* strict mode */
    	phy_types_low = vsi->port_info->phy.phy_type_low;
    	phy_types_high = vsi->port_info->phy.phy_type_high;
    }
@@@ -1519,9 -1543,9 +1519,9 @@@
    /* If Advertising link mode PHY type is not using override PHY type,
     * then use PHY type with media.
     */
 -	if (!adv_phy_type_lo && !adv_phy_type_hi) {
 -		adv_phy_type_lo = vsi->port_info->phy.phy_type_low;
 -		adv_phy_type_hi = vsi->port_info->phy.phy_type_high;
 +	if (!advert_phy_type_lo && !advert_phy_type_hi) {
 +		advert_phy_type_lo = vsi->port_info->phy.phy_type_low;
 +		advert_phy_type_hi = vsi->port_info->phy.phy_type_high;
    }
ethtool_link_ksettings_zero_link_mode(ks, supported);
@@@ -1997,7 -2021,7 +1997,7 @@@ ice_get_link_ksettings(struct net_devic
    	return -ENOMEM;
status = ice_aq_get_phy_caps(vsi->port_info, false,
 -				     ICE_AQC_REPORT_SW_CFG, caps, NULL);
 +				     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
    if (status) {
    	err = -EIO;
    	goto done;
@@@ -2034,7 -2058,7 +2034,7 @@@
    	ethtool_link_ksettings_add_link_mode(ks, advertising, FEC_RS);
status = ice_aq_get_phy_caps(vsi->port_info, false,
 -				     ICE_AQC_REPORT_TOPO_CAP, caps, NULL);
 +				     ICE_AQC_REPORT_TOPO_CAP_MEDIA, caps, NULL);
    if (status) {
    	err = -EIO;
    	goto done;
@@@ -2201,14 -2225,13 +2201,14 @@@ ice_set_link_ksettings(struct net_devic
    	       const struct ethtool_link_ksettings *ks)
  {
    struct ice_netdev_priv *np = netdev_priv(netdev);
 -	struct ethtool_link_ksettings safe_ks, copy_ks;
 -	struct ice_aqc_get_phy_caps_data *abilities;
    u8 autoneg, timeout = TEST_SET_BITS_TIMEOUT;
 -	u16 adv_link_speed, curr_link_speed, idx;
 +	struct ethtool_link_ksettings copy_ks = *ks;
 +	struct ethtool_link_ksettings safe_ks = {};
 +	struct ice_aqc_get_phy_caps_data *phy_caps;
    struct ice_aqc_set_phy_cfg_data config;
 +	u16 adv_link_speed, curr_link_speed;
    struct ice_pf *pf = np->vsi->back;
 -	struct ice_port_info *p;
 +	struct ice_port_info *pi;
    u8 autoneg_changed = 0;
    enum ice_status status;
    u64 phy_type_high = 0;
@@@ -2216,37 -2239,46 +2216,37 @@@
    int err = 0;
    bool linkup;
-	p = np->vsi->port_info;
 -
 -	if (!p)
 -		return -EOPNOTSUPP;
 +	pi = np->vsi->port_info;
-	/* Check if this is LAN VSI */
 -	ice_for_each_vsi(pf, idx)
 -		if (pf->vsi[idx]->type == ICE_VSI_PF) {
 -			if (np->vsi != pf->vsi[idx])
 -				return -EOPNOTSUPP;
 -			break;
 -		}
 +	if (!pi)
 +		return -EIO;
-	if (p->phy.media_type != ICE_MEDIA_BASET &&
 -	    p->phy.media_type != ICE_MEDIA_FIBER &&
 -	    p->phy.media_type != ICE_MEDIA_BACKPLANE &&
 -	    p->phy.media_type != ICE_MEDIA_DA &&
 -	    p->phy.link_info.link_info & ICE_AQ_LINK_UP)
 +	if (pi->phy.media_type != ICE_MEDIA_BASET &&
 +	    pi->phy.media_type != ICE_MEDIA_FIBER &&
 +	    pi->phy.media_type != ICE_MEDIA_BACKPLANE &&
 +	    pi->phy.media_type != ICE_MEDIA_DA &&
 +	    pi->phy.link_info.link_info & ICE_AQ_LINK_UP)
    	return -EOPNOTSUPP;
-	abilities = kzalloc(sizeof(*abilities), GFP_KERNEL);
 -	if (!abilities)
 +	phy_caps = kzalloc(sizeof(*phy_caps), GFP_KERNEL);
 +	if (!phy_caps)
    	return -ENOMEM;
/* Get the PHY capabilities based on media */
 -	status = ice_aq_get_phy_caps(p, false, ICE_AQC_REPORT_TOPO_CAP,
 -				     abilities, NULL);
 +	if (ice_fw_supports_report_dflt_cfg(pi->hw))
 +		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
 +					     phy_caps, NULL);
 +	else
 +		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
 +					     phy_caps, NULL);
    if (status) {
 -		err = -EAGAIN;
 +		err = -EIO;
    	goto done;
    }
-	/* copy the ksettings to copy_ks to avoid modifying the original */
 -	memcpy(&copy_ks, ks, sizeof(copy_ks));
 -
    /* save autoneg out of ksettings */
    autoneg = copy_ks.base.autoneg;
-	memset(&safe_ks, 0, sizeof(safe_ks));
 -
    /* Get link modes supported by hardware.*/
    ice_phy_type_to_ethtool(netdev, &safe_ks);
@@@ -2258,7 -2290,7 +2258,7 @@@
    		   __ETHTOOL_LINK_MODE_MASK_NBITS)) {
    	if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags))
    		netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
 -		err = -EINVAL;
 +		err = -EOPNOTSUPP;
    	goto done;
    }
@@@ -2295,26 -2327,26 +2295,26 @@@
     * configuration is initialized during probe from PHY capabilities
     * software mode, and updated on set PHY configuration.
     */
 -	memcpy(&config, &p->phy.curr_user_phy_cfg, sizeof(config));
 +	config = pi->phy.curr_user_phy_cfg;
config.caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT;
/* Check autoneg */
 -	err = ice_setup_autoneg(p, &safe_ks, &config, autoneg, &autoneg_changed,
 +	err = ice_setup_autoneg(pi, &safe_ks, &config, autoneg, &autoneg_changed,
    			netdev);
if (err)
    	goto done;
/* Call to get the current link speed */
 -	p->phy.get_link_info = true;
 -	status = ice_get_link_status(p, &linkup);
 +	pi->phy.get_link_info = true;
 +	status = ice_get_link_status(pi, &linkup);
    if (status) {
 -		err = -EAGAIN;
 +		err = -EIO;
    	goto done;
    }
-	curr_link_speed = p->phy.link_info.link_speed;
 +	curr_link_speed = pi->phy.link_info.link_speed;
    adv_link_speed = ice_ksettings_find_adv_link_speed(ks);
/* If speed didn't get set, set it to what it currently is.
@@@ -2333,7 -2365,7 +2333,7 @@@
    }
/* save the requested speeds */
 -	p->phy.link_info.req_speeds = adv_link_speed;
 +	pi->phy.link_info.req_speeds = adv_link_speed;
/* set link and auto negotiation so changes take effect */
    config.caps |= ICE_AQ_PHY_ENA_LINK;
@@@ -2341,7 -2373,7 +2341,7 @@@
    /* check if there is a PHY type for the requested advertised speed */
    if (!(phy_type_low || phy_type_high)) {
    	netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
 -		err = -EAGAIN;
 +		err = -EOPNOTSUPP;
    	goto done;
    }
@@@ -2349,9 -2381,9 +2349,9 @@@
     * for set PHY configuration
     */
    config.phy_type_high = cpu_to_le64(phy_type_high) &
 -			abilities->phy_type_high;
 +			phy_caps->phy_type_high;
    config.phy_type_low = cpu_to_le64(phy_type_low) &
 -			abilities->phy_type_low;
 +			phy_caps->phy_type_low;
if (!(config.phy_type_high || config.phy_type_low)) {
    	/* If there is no intersection and lenient mode is enabled, then
@@@ -2365,13 -2397,13 +2365,13 @@@
    				      pf->nvm_phy_type_lo;
    	} else {
    		netdev_info(netdev, "The selected speed is not supported by the current media. Please select a link speed that is supported by the current media.\n");
 -			err = -EAGAIN;
 +			err = -EOPNOTSUPP;
    		goto done;
    	}
    }
/* If link is up put link down */
 -	if (p->phy.link_info.link_info & ICE_AQ_LINK_UP) {
 +	if (pi->phy.link_info.link_info & ICE_AQ_LINK_UP) {
    	/* Tell the OS link is going down, the link will go
    	 * back up when fw says it is ready asynchronously
    	 */
@@@ -2381,17 -2413,17 +2381,17 @@@
    }
/* make the aq call */
 -	status = ice_aq_set_phy_cfg(&pf->hw, p, &config, NULL);
 +	status = ice_aq_set_phy_cfg(&pf->hw, pi, &config, NULL);
    if (status) {
    	netdev_info(netdev, "Set phy config failed,\n");
 -		err = -EAGAIN;
 +		err = -EIO;
    	goto done;
    }
/* Save speed request */
 -	p->phy.curr_user_speed_req = adv_link_speed;
 +	pi->phy.curr_user_speed_req = adv_link_speed;
  done:
 -	kfree(abilities);
 +	kfree(phy_caps);
    clear_bit(__ICE_CFG_BUSY, pf->state);
return err;
@@@ -2875,7 -2907,7 +2875,7 @@@ process_link
    /* Bring interface down, copy in the new ring info, then restore the
     * interface. if VSI is up, bring it down and then back up
     */
 -	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
 +	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
    	ice_down(vsi);
if (tx_rings) {
@@@ -2961,7 -2993,7 +2961,7 @@@ ice_get_pauseparam(struct net_device *n
    	return;
/* Get current PHY config */
 -	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
 +	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
    			     NULL);
    if (status)
    	goto out;
@@@ -3028,7 -3060,7 +3028,7 @@@ ice_set_pauseparam(struct net_device *n
    	return -ENOMEM;
/* Get current PHY config */
 -	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
 +	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
    			     NULL);
    if (status) {
    	kfree(pcaps);
@@@ -3129,7 -3161,7 +3129,7 @@@ ice_get_rxfh(struct net_device *netdev
    struct ice_netdev_priv *np = netdev_priv(netdev);
    struct ice_vsi *vsi = np->vsi;
    struct ice_pf *pf = vsi->back;
 -	int ret = 0, i;
 +	int err, i;
    u8 *lut;
if (hfunc)
@@@ -3148,20 -3180,17 +3148,20 @@@
    if (!lut)
    	return -ENOMEM;
-	if (ice_get_rss(vsi, key, lut, vsi->rss_table_size)) {
 -		ret = -EIO;
 +	err = ice_get_rss_key(vsi, key);
 +	if (err)
 +		goto out;
 +
 +	err = ice_get_rss_lut(vsi, lut, vsi->rss_table_size);
 +	if (err)
    	goto out;
 -	}
for (i = 0; i < vsi->rss_table_size; i++)
    	indir[i] = (u32)(lut[i]);
out:
    kfree(lut);
 -	return ret;
 +	return err;
  }
/**
@@@ -3182,7 -3211,7 +3182,7 @@@ ice_set_rxfh(struct net_device *netdev
    struct ice_vsi *vsi = np->vsi;
    struct ice_pf *pf = vsi->back;
    struct device *dev;
 -	u8 *seed = NULL;
 +	int err;
dev = ice_pf_to_dev(pf);
    if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
@@@ -3203,10 -3232,7 +3203,10 @@@
    			return -ENOMEM;
    	}
    	memcpy(vsi->rss_hkey_user, key, ICE_VSIQF_HKEY_ARRAY_SIZE);
 -		seed = vsi->rss_hkey_user;
 +
 +		err = ice_set_rss_key(vsi, vsi->rss_hkey_user);
 +		if (err)
 +			return err;
    }
if (!vsi->rss_lut_user) {
@@@ -3227,9 -3253,8 +3227,9 @@@
    			 vsi->rss_size);
    }
-	if (ice_set_rss(vsi, seed, vsi->rss_lut_user, vsi->rss_table_size))
 -		return -EIO;
 +	err = ice_set_rss_lut(vsi, vsi->rss_lut_user, vsi->rss_table_size);
 +	if (err)
 +		return err;
return 0;
  }
@@@ -3325,9 -3350,10 +3325,9 @@@ static int ice_get_valid_rss_size(struc
  static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
  {
    struct ice_pf *pf = vsi->back;
 -	enum ice_status status;
    struct device *dev;
    struct ice_hw *hw;
 -	int err = 0;
 +	int err;
    u8 *lut;
dev = ice_pf_to_dev(pf);
@@@ -3348,10 -3374,14 +3348,10 @@@
/* create/set RSS LUT */
    ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
 -	status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut,
 -				    vsi->rss_table_size);
 -	if (status) {
 -		dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
 -			ice_stat_str(status),
 +	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
 +	if (err)
 +		dev_err(dev, "Cannot set RSS lut, err %d aq_err %s\n", err,
    		ice_aq_str(hw->adminq.sq_last_status));
 -		err = -EIO;
 -	}
kfree(lut);
    return err;
@@@ -3442,7 -3472,7 +3442,7 @@@ static void ice_get_wol(struct net_devi
    	netdev_warn(netdev, "Wake on LAN is not supported on this interface!\n");
/* Get WoL settings based on the HW capability */
- 	if (ice_is_wol_supported(pf)) {
+ 	if (ice_is_wol_supported(&pf->hw)) {
    	wol->supported = WAKE_MAGIC;
    	wol->wolopts = pf->wol_ena ? WAKE_MAGIC : 0;
    } else {
@@@ -3462,7 -3492,7 +3462,7 @@@ static int ice_set_wol(struct net_devic
    struct ice_vsi *vsi = np->vsi;
    struct ice_pf *pf = vsi->back;
- 	if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(pf))
+ 	if (vsi->type != ICE_VSI_PF || !ice_is_wol_supported(&pf->hw))
    	return -EOPNOTSUPP;
/* only magic packet is supported */
@@@ -3917,14 -3947,14 +3917,14 @@@ ice_get_module_eeprom(struct net_devic
    u8 value = 0;
    u8 page = 0;
-	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
 -				   &value, 1, 0, NULL);
 -	if (status)
 -		return -EIO;
 -
    if (!ee || !ee->len || !data)
    	return -EINVAL;
+	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0, &value, 1, 0,
 +				   NULL);
 +	if (status)
 +		return -EIO;
 +
    if (value == ICE_MODULE_TYPE_SFP)
    	is_sfp = true;
diff --combined drivers/net/ethernet/intel/ice/ice_lib.c
index 5edc0da8b8c3,d13c7fc8fb0a..16d0ee5b48a5
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@@ -343,9 -343,6 +343,9 @@@ static int ice_vsi_clear(struct ice_vs
    pf->vsi[vsi->idx] = NULL;
    if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL)
    	pf->next_vsi = vsi->idx;
 +	if (vsi->idx < pf->next_vsi && vsi->type == ICE_VSI_CTRL &&
 +	    vsi->vf_id != ICE_INVAL_VFID)
 +		pf->next_vsi = vsi->idx;
ice_vsi_free_arrays(vsi);
    mutex_unlock(&pf->sw_mutex);
@@@ -422,7 -419,7 +422,7 @@@ ice_vsi_alloc(struct ice_pf *pf, enum i
vsi->type = vsi_type;
    vsi->back = pf;
 -	set_bit(__ICE_DOWN, vsi->state);
 +	set_bit(ICE_VSI_DOWN, vsi->state);
if (vsi_type == ICE_VSI_VF)
    	ice_vsi_set_num_qs(vsi, vf_id);
@@@ -457,8 -454,8 +457,8 @@@
    	goto unlock_pf;
    }
-	if (vsi->type == ICE_VSI_CTRL) {
 -		/* Use the last VSI slot as the index for the control VSI */
 +	if (vsi->type == ICE_VSI_CTRL && vf_id == ICE_INVAL_VFID) {
 +		/* Use the last VSI slot as the index for PF control VSI */
    	vsi->idx = pf->num_alloc_vsi - 1;
    	pf->ctrl_vsi_idx = vsi->idx;
    	pf->vsi[vsi->idx] = vsi;
@@@ -471,9 -468,6 +471,9 @@@
    	pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
    					 pf->next_vsi);
    }
 +
 +	if (vsi->type == ICE_VSI_CTRL && vf_id != ICE_INVAL_VFID)
 +		pf->vf[vf_id].ctrl_vsi_idx = vsi->idx;
    goto unlock_pf;
err_rings:
@@@ -512,7 -506,7 +512,7 @@@ static int ice_alloc_fd_res(struct ice_
    if (!b_val)
    	return -EPERM;
-	if (vsi->type != ICE_VSI_PF)
 +	if (!(vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF))
    	return -EPERM;
if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
@@@ -523,13 -517,6 +523,13 @@@
    /* each VSI gets same "best_effort" quota */
    vsi->num_bfltr = b_val;
+	if (vsi->type == ICE_VSI_VF) {
 +		vsi->num_gfltr = 0;
 +
 +		/* each VSI gets same "best_effort" quota */
 +		vsi->num_bfltr = b_val;
 +	}
 +
    return 0;
  }
@@@ -742,10 -729,11 +742,10 @@@ static void ice_set_dflt_vsi_ctx(struc
   */
  static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
  {
 -	u16 offset = 0, qmap = 0, tx_count = 0;
 +	u16 offset = 0, qmap = 0, tx_count = 0, pow = 0;
 +	u16 num_txq_per_tc, num_rxq_per_tc;
    u16 qcount_tx = vsi->alloc_txq;
    u16 qcount_rx = vsi->alloc_rxq;
 -	u16 tx_numq_tc, rx_numq_tc;
 -	u16 pow = 0, max_rss = 0;
    bool ena_tc0 = false;
    u8 netdev_tc = 0;
    int i;
@@@ -763,15 -751,12 +763,15 @@@
    	vsi->tc_cfg.ena_tc |= 1;
    }
-	rx_numq_tc = qcount_rx / vsi->tc_cfg.numtc;
 -	if (!rx_numq_tc)
 -		rx_numq_tc = 1;
 -	tx_numq_tc = qcount_tx / vsi->tc_cfg.numtc;
 -	if (!tx_numq_tc)
 -		tx_numq_tc = 1;
 +	num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC);
 +	if (!num_rxq_per_tc)
 +		num_rxq_per_tc = 1;
 +	num_txq_per_tc = qcount_tx / vsi->tc_cfg.numtc;
 +	if (!num_txq_per_tc)
 +		num_txq_per_tc = 1;
 +
 +	/* find the (rounded up) power-of-2 of qcount */
 +	pow = (u16)order_base_2(num_rxq_per_tc);
/* TC mapping is a function of the number of Rx queues assigned to the
     * VSI for each traffic class and the offset of these queues.
@@@ -784,6 -769,26 +784,6 @@@
     *
     * Setup number and offset of Rx queues for all TCs for the VSI
     */
 -
 -	qcount_rx = rx_numq_tc;
 -
 -	/* qcount will change if RSS is enabled */
 -	if (test_bit(ICE_FLAG_RSS_ENA, vsi->back->flags)) {
 -		if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_VF) {
 -			if (vsi->type == ICE_VSI_PF)
 -				max_rss = ICE_MAX_LG_RSS_QS;
 -			else
 -				max_rss = ICE_MAX_RSS_QS_PER_VF;
 -			qcount_rx = min_t(u16, rx_numq_tc, max_rss);
 -			if (!vsi->req_rxq)
 -				qcount_rx = min_t(u16, qcount_rx,
 -						  vsi->rss_size);
 -		}
 -	}
 -
 -	/* find the (rounded up) power-of-2 of qcount */
 -	pow = (u16)order_base_2(qcount_rx);
 -
    ice_for_each_traffic_class(i) {
    	if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
    		/* TC is not enabled */
@@@ -797,16 -802,16 +797,16 @@@
/* TC is enabled */
    	vsi->tc_cfg.tc_info[i].qoffset = offset;
 -		vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx;
 -		vsi->tc_cfg.tc_info[i].qcount_tx = tx_numq_tc;
 +		vsi->tc_cfg.tc_info[i].qcount_rx = num_rxq_per_tc;
 +		vsi->tc_cfg.tc_info[i].qcount_tx = num_txq_per_tc;
    	vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++;
qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) &
    		ICE_AQ_VSI_TC_Q_OFFSET_M) |
    		((pow << ICE_AQ_VSI_TC_Q_NUM_S) &
    		 ICE_AQ_VSI_TC_Q_NUM_M);
 -		offset += qcount_rx;
 -		tx_count += tx_numq_tc;
 +		offset += num_rxq_per_tc;
 +		tx_count += num_txq_per_tc;
    	ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
    }
@@@ -819,7 -824,7 +819,7 @@@
    if (offset)
    	vsi->num_rxq = offset;
    else
 -		vsi->num_rxq = qcount_rx;
 +		vsi->num_rxq = num_rxq_per_tc;
vsi->num_txq = tx_count;
@@@ -851,8 -856,7 +851,8 @@@ static void ice_set_fd_vsi_ctx(struct i
    u8 dflt_q_group, dflt_q_prio;
    u16 dflt_q, report_q, val;
-	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL)
 +	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL &&
 +	    vsi->type != ICE_VSI_VF)
    	return;
val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID;
@@@ -1175,24 -1179,7 +1175,24 @@@ static int ice_vsi_setup_vector_base(st
num_q_vectors = vsi->num_q_vectors;
    /* reserve slots from OS requested IRQs */
 -	base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx);
 +	if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
 +		struct ice_vf *vf;
 +		int i;
 +
 +		ice_for_each_vf(pf, i) {
 +			vf = &pf->vf[i];
 +			if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI) {
 +				base = pf->vsi[vf->ctrl_vsi_idx]->base_vector;
 +				break;
 +			}
 +		}
 +		if (i == pf->num_alloc_vfs)
 +			base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
 +					   ICE_RES_VF_CTRL_VEC_ID);
 +	} else {
 +		base = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
 +				   vsi->idx);
 +	}
if (base < 0) {
    	dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n",
@@@ -1326,7 -1313,7 +1326,7 @@@ int ice_vsi_manage_rss_lut(struct ice_v
    				 vsi->rss_size);
    }
-	err = ice_set_rss(vsi, NULL, lut, vsi->rss_table_size);
 +	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
    kfree(lut);
    return err;
  }
@@@ -1337,10 -1324,12 +1337,10 @@@
   */
  static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
  {
 -	struct ice_aqc_get_set_rss_keys *key;
    struct ice_pf *pf = vsi->back;
 -	enum ice_status status;
    struct device *dev;
 -	int err = 0;
 -	u8 *lut;
 +	u8 *lut, *key;
 +	int err;
dev = ice_pf_to_dev(pf);
    vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
@@@ -1354,26 -1343,37 +1354,26 @@@
    else
    	ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
-	status = ice_aq_set_rss_lut(&pf->hw, vsi->idx, vsi->rss_lut_type, lut,
 -				    vsi->rss_table_size);
 -
 -	if (status) {
 -		dev_err(dev, "set_rss_lut failed, error %s\n",
 -			ice_stat_str(status));
 -		err = -EIO;
 +	err = ice_set_rss_lut(vsi, lut, vsi->rss_table_size);
 +	if (err) {
 +		dev_err(dev, "set_rss_lut failed, error %d\n", err);
    	goto ice_vsi_cfg_rss_exit;
    }
-	key = kzalloc(sizeof(*key), GFP_KERNEL);
 +	key = kzalloc(ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE, GFP_KERNEL);
    if (!key) {
    	err = -ENOMEM;
    	goto ice_vsi_cfg_rss_exit;
    }
if (vsi->rss_hkey_user)
 -		memcpy(key,
 -		       (struct ice_aqc_get_set_rss_keys *)vsi->rss_hkey_user,
 -		       ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
 +		memcpy(key, vsi->rss_hkey_user, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
    else
 -		netdev_rss_key_fill((void *)key,
 -				    ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
 -
 -	status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key);
 +		netdev_rss_key_fill((void *)key, ICE_GET_SET_RSS_KEY_EXTEND_KEY_SIZE);
-	if (status) {
 -		dev_err(dev, "set_rss_key failed, error %s\n",
 -			ice_stat_str(status));
 -		err = -EIO;
 -	}
 +	err = ice_set_rss_key(vsi, key);
 +	if (err)
 +		dev_err(dev, "set_rss_key failed, error %d\n", err);
kfree(key);
  ice_vsi_cfg_rss_exit:
@@@ -2308,7 -2308,7 +2308,7 @@@ ice_vsi_setup(struct ice_pf *pf, struc
    struct ice_vsi *vsi;
    int ret, i;
-	if (vsi_type == ICE_VSI_VF)
 +	if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL)
    	vsi = ice_vsi_alloc(pf, vsi_type, vf_id);
    else
    	vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID);
@@@ -2323,7 -2323,7 +2323,7 @@@
    if (vsi->type == ICE_VSI_PF)
    	vsi->ethtype = ETH_P_PAUSE;
-	if (vsi->type == ICE_VSI_VF)
 +	if (vsi->type == ICE_VSI_VF || vsi->type == ICE_VSI_CTRL)
    	vsi->vf_id = vf_id;
ice_alloc_fd_res(vsi);
@@@ -2593,7 -2593,7 +2593,7 @@@ void ice_vsi_free_rx_rings(struct ice_v
   */
  void ice_vsi_close(struct ice_vsi *vsi)
  {
 -	if (!test_and_set_bit(__ICE_DOWN, vsi->state))
 +	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state))
    	ice_down(vsi);
ice_vsi_free_irq(vsi);
@@@ -2610,17 -2610,17 +2610,17 @@@ int ice_ena_vsi(struct ice_vsi *vsi, bo
  {
    int err = 0;
-	if (!test_bit(__ICE_NEEDS_RESTART, vsi->state))
 +	if (!test_bit(ICE_VSI_NEEDS_RESTART, vsi->state))
    	return 0;
-	clear_bit(__ICE_NEEDS_RESTART, vsi->state);
 +	clear_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
if (vsi->netdev && vsi->type == ICE_VSI_PF) {
    	if (netif_running(vsi->netdev)) {
    		if (!locked)
    			rtnl_lock();
- 			err = ice_open(vsi->netdev);
+ 			err = ice_open_internal(vsi->netdev);
if (!locked)
    			rtnl_unlock();
@@@ -2639,17 -2639,17 +2639,17 @@@
   */
  void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
  {
 -	if (test_bit(__ICE_DOWN, vsi->state))
 +	if (test_bit(ICE_VSI_DOWN, vsi->state))
    	return;
-	set_bit(__ICE_NEEDS_RESTART, vsi->state);
 +	set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
if (vsi->type == ICE_VSI_PF && vsi->netdev) {
    	if (netif_running(vsi->netdev)) {
    		if (!locked)
    			rtnl_lock();
- 			ice_stop(vsi->netdev);
+ 			ice_vsi_close(vsi);
if (!locked)
    			rtnl_unlock();
@@@ -2770,24 -2770,7 +2770,24 @@@ int ice_vsi_release(struct ice_vsi *vsi
     * many interrupts each VF needs. SR-IOV MSIX resources are also
     * cleared in the same manner.
     */
 -	if (vsi->type != ICE_VSI_VF) {
 +	if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID) {
 +		struct ice_vf *vf;
 +		int i;
 +
 +		ice_for_each_vf(pf, i) {
 +			vf = &pf->vf[i];
 +			if (i != vsi->vf_id && vf->ctrl_vsi_idx != ICE_NO_VSI)
 +				break;
 +		}
 +		if (i == pf->num_alloc_vfs) {
 +			/* No other VFs left that have control VSI, reclaim SW
 +			 * interrupts back to the common pool
 +			 */
 +			ice_free_res(pf->irq_tracker, vsi->base_vector,
 +				     ICE_RES_VF_CTRL_VEC_ID);
 +			pf->num_avail_sw_msix += vsi->num_q_vectors;
 +		}
 +	} else if (vsi->type != ICE_VSI_VF) {
    	/* reclaim SW interrupts back to the common pool */
    	ice_free_res(pf->irq_tracker, vsi->base_vector, vsi->idx);
    	pf->num_avail_sw_msix += vsi->num_q_vectors;
@@@ -2812,7 -2795,7 +2812,7 @@@
    ice_vsi_free_q_vectors(vsi);
/* make sure unregister_netdev() was called by checking __ICE_DOWN */
 -	if (vsi->netdev && test_bit(__ICE_DOWN, vsi->state)) {
 +	if (vsi->netdev && test_bit(ICE_VSI_DOWN, vsi->state)) {
    	free_netdev(vsi->netdev);
    	vsi->netdev = NULL;
    }
@@@ -2835,46 -2818,38 +2835,46 @@@
  }
/**
 - * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector
 + * ice_vsi_rebuild_update_coalesce_intrl - set interrupt rate limit for a q_vector
   * @q_vector: pointer to q_vector which is being updated
 - * @coalesce: pointer to array of struct with stored coalesce
 + * @stored_intrl_setting: original INTRL setting
   *
   * Set coalesce param in q_vector and update these parameters in HW.
   */
  static void
 -ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector,
 -				struct ice_coalesce_stored *coalesce)
 +ice_vsi_rebuild_update_coalesce_intrl(struct ice_q_vector *q_vector,
 +				      u16 stored_intrl_setting)
  {
 -	struct ice_ring_container *rx_rc = &q_vector->rx;
 -	struct ice_ring_container *tx_rc = &q_vector->tx;
    struct ice_hw *hw = &q_vector->vsi->back->hw;
-	tx_rc->itr_setting = coalesce->itr_tx;
 -	rx_rc->itr_setting = coalesce->itr_rx;
 -
 -	/* dynamic ITR values will be updated during Tx/Rx */
 -	if (!ITR_IS_DYNAMIC(tx_rc->itr_setting))
 -		wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx),
 -		     ITR_REG_ALIGN(tx_rc->itr_setting) >>
 -		     ICE_ITR_GRAN_S);
 -	if (!ITR_IS_DYNAMIC(rx_rc->itr_setting))
 -		wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx),
 -		     ITR_REG_ALIGN(rx_rc->itr_setting) >>
 -		     ICE_ITR_GRAN_S);
 -
 -	q_vector->intrl = coalesce->intrl;
 +	q_vector->intrl = stored_intrl_setting;
    wr32(hw, GLINT_RATE(q_vector->reg_idx),
         ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
  }
+/**
 + * ice_vsi_rebuild_update_coalesce_itr - set coalesce for a q_vector
 + * @q_vector: pointer to q_vector which is being updated
 + * @rc: pointer to ring container
 + * @stored_itr_setting: original ITR setting
 + *
 + * Set coalesce param in q_vector and update these parameters in HW.
 + */
 +static void
 +ice_vsi_rebuild_update_coalesce_itr(struct ice_q_vector *q_vector,
 +				    struct ice_ring_container *rc,
 +				    u16 stored_itr_setting)
 +{
 +	struct ice_hw *hw = &q_vector->vsi->back->hw;
 +
 +	rc->itr_setting = stored_itr_setting;
 +
 +	/* dynamic ITR values will be updated during Tx/Rx */
 +	if (!ITR_IS_DYNAMIC(rc->itr_setting))
 +		wr32(hw, GLINT_ITR(rc->itr_idx, q_vector->reg_idx),
 +		     ITR_REG_ALIGN(rc->itr_setting) >> ICE_ITR_GRAN_S);
 +}
 +
  /**
   * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
   * @vsi: VSI connected with q_vectors
@@@ -2894,11 -2869,6 +2894,11 @@@ ice_vsi_rebuild_get_coalesce(struct ice
    	coalesce[i].itr_tx = q_vector->tx.itr_setting;
    	coalesce[i].itr_rx = q_vector->rx.itr_setting;
    	coalesce[i].intrl = q_vector->intrl;
 +
 +		if (i < vsi->num_txq)
 +			coalesce[i].tx_valid = true;
 +		if (i < vsi->num_rxq)
 +			coalesce[i].rx_valid = true;
    }
return vsi->num_q_vectors;
@@@ -2923,59 -2893,17 +2923,59 @@@ ice_vsi_rebuild_set_coalesce(struct ice
    if ((size && !coalesce) || !vsi)
    	return;
-	for (i = 0; i < size && i < vsi->num_q_vectors; i++)
 -		ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
 -						&coalesce[i]);
 -
 -	/* number of q_vectors increased, so assume coalesce settings were
 -	 * changed globally (i.e. ethtool -C eth0 instead of per-queue) and use
 -	 * the previous settings from q_vector 0 for all of the new q_vectors
 +	/* There are a couple of cases that have to be handled here:
 +	 *   1. The case where the number of queue vectors stays the same, but
 +	 *      the number of Tx or Rx rings changes (the first for loop)
 +	 *   2. The case where the number of queue vectors increased (the
 +	 *      second for loop)
 +	 */
 +	for (i = 0; i < size && i < vsi->num_q_vectors; i++) {
 +		/* There are 2 cases to handle here and they are the same for
 +		 * both Tx and Rx:
 +		 *   if the entry was valid previously (coalesce[i].[tr]x_valid
 +		 *   and the loop variable is less than the number of rings
 +		 *   allocated, then write the previous values
 +		 *
 +		 *   if the entry was not valid previously, but the number of
 +		 *   rings is less than are allocated (this means the number of
 +		 *   rings increased from previously), then write out the
 +		 *   values in the first element
 +		 */
 +		if (i < vsi->alloc_rxq && coalesce[i].rx_valid)
 +			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
 +							    &vsi->q_vectors[i]->rx,
 +							    coalesce[i].itr_rx);
 +		else if (i < vsi->alloc_rxq)
 +			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
 +							    &vsi->q_vectors[i]->rx,
 +							    coalesce[0].itr_rx);
 +
 +		if (i < vsi->alloc_txq && coalesce[i].tx_valid)
 +			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
 +							    &vsi->q_vectors[i]->tx,
 +							    coalesce[i].itr_tx);
 +		else if (i < vsi->alloc_txq)
 +			ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
 +							    &vsi->q_vectors[i]->tx,
 +							    coalesce[0].itr_tx);
 +
 +		ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i],
 +						      coalesce[i].intrl);
 +	}
 +
 +	/* the number of queue vectors increased so write whatever is in
 +	 * the first element
     */
 -	for (; i < vsi->num_q_vectors; i++)
 -		ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
 -						&coalesce[0]);
 +	for (; i < vsi->num_q_vectors; i++) {
 +		ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
 +						    &vsi->q_vectors[i]->tx,
 +						    coalesce[0].itr_tx);
 +		ice_vsi_rebuild_update_coalesce_itr(vsi->q_vectors[i],
 +						    &vsi->q_vectors[i]->rx,
 +						    coalesce[0].itr_rx);
 +		ice_vsi_rebuild_update_coalesce_intrl(vsi->q_vectors[i],
 +						      coalesce[0].intrl);
 +	}
  }
/**
@@@ -3004,11 -2932,9 +3004,11 @@@ int ice_vsi_rebuild(struct ice_vsi *vsi
coalesce = kcalloc(vsi->num_q_vectors,
    		   sizeof(struct ice_coalesce_stored), GFP_KERNEL);
 -	if (coalesce)
 -		prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi,
 -								  coalesce);
 +	if (!coalesce)
 +		return -ENOMEM;
 +
 +	prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi, coalesce);
 +
    ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
    ice_vsi_free_q_vectors(vsi);
@@@ -3152,7 -3078,6 +3152,6 @@@ err_vsi
  bool ice_is_reset_in_progress(unsigned long *state)
  {
    return test_bit(__ICE_RESET_OICR_RECV, state) ||
- 	       test_bit(__ICE_DCBNL_DEVRESET, state) ||
           test_bit(__ICE_PFR_REQ, state) ||
           test_bit(__ICE_CORER_REQ, state) ||
           test_bit(__ICE_GLOBR_REQ, state);
@@@ -3423,40 -3348,3 +3422,40 @@@ int ice_clear_dflt_vsi(struct ice_sw *s
return 0;
  }
 +
 +/**
 + * ice_set_link - turn on/off physical link
 + * @vsi: VSI to modify physical link on
 + * @ena: turn on/off physical link
 + */
 +int ice_set_link(struct ice_vsi *vsi, bool ena)
 +{
 +	struct device *dev = ice_pf_to_dev(vsi->back);
 +	struct ice_port_info *pi = vsi->port_info;
 +	struct ice_hw *hw = pi->hw;
 +	enum ice_status status;
 +
 +	if (vsi->type != ICE_VSI_PF)
 +		return -EINVAL;
 +
 +	status = ice_aq_set_link_restart_an(pi, ena, NULL);
 +
 +	/* if link is owned by manageability, FW will return ICE_AQ_RC_EMODE.
 +	 * this is not a fatal error, so print a warning message and return
 +	 * a success code. Return an error if FW returns an error code other
 +	 * than ICE_AQ_RC_EMODE
 +	 */
 +	if (status == ICE_ERR_AQ_ERROR) {
 +		if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
 +			dev_warn(dev, "can't set link to %s, err %s aq_err %s. not fatal, continuing\n",
 +				 (ena ? "ON" : "OFF"), ice_stat_str(status),
 +				 ice_aq_str(hw->adminq.sq_last_status));
 +	} else if (status) {
 +		dev_err(dev, "can't set link to %s, err %s aq_err %s\n",
 +			(ena ? "ON" : "OFF"), ice_stat_str(status),
 +			ice_aq_str(hw->adminq.sq_last_status));
 +		return -EIO;
 +	}
 +
 +	return 0;
 +}
diff --combined drivers/net/ethernet/intel/ice/ice_main.c
index 30935aaa8935,d821c687f239..1b2f1e258e5c
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@@ -84,7 -84,7 +84,7 @@@ static void ice_check_for_hang_subtask(
    		break;
    	}
-	if (!vsi || test_bit(__ICE_DOWN, vsi->state))
 +	if (!vsi || test_bit(ICE_VSI_DOWN, vsi->state))
    	return;
if (!(vsi->netdev && netif_carrier_ok(vsi->netdev)))
@@@ -140,10 -140,21 +140,10 @@@ static int ice_init_mac_fltr(struct ice
perm_addr = vsi->port_info->mac.perm_addr;
    status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
 -	if (!status)
 -		return 0;
 -
 -	/* We aren't useful with no MAC filters, so unregister if we
 -	 * had an error
 -	 */
 -	if (vsi->netdev->reg_state == NETREG_REGISTERED) {
 -		dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n",
 -			ice_stat_str(status));
 -		unregister_netdev(vsi->netdev);
 -		free_netdev(vsi->netdev);
 -		vsi->netdev = NULL;
 -	}
 +	if (status)
 +		return -EIO;
-	return -EIO;
 +	return 0;
  }
/**
@@@ -198,9 -209,9 +198,9 @@@ static int ice_add_mac_to_unsync_list(s
   */
  static bool ice_vsi_fltr_changed(struct ice_vsi *vsi)
  {
 -	return test_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags) ||
 -	       test_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags) ||
 -	       test_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
 +	return test_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state) ||
 +	       test_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state) ||
 +	       test_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
  }
/**
@@@ -267,9 -278,9 +267,9 @@@ static int ice_vsi_sync_fltr(struct ice
    INIT_LIST_HEAD(&vsi->tmp_unsync_list);
if (ice_vsi_fltr_changed(vsi)) {
 -		clear_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
 -		clear_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
 -		clear_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
 +		clear_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
 +		clear_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
 +		clear_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
/* grab the netdev's addr_list_lock */
    	netif_addr_lock_bh(netdev);
@@@ -350,8 -361,8 +350,8 @@@
    }
if (((changed_flags & IFF_PROMISC) || promisc_forced_on) ||
 -	    test_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags)) {
 -		clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
 +	    test_bit(ICE_VSI_PROMISC_CHANGED, vsi->state)) {
 +		clear_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
    	if (vsi->current_netdev_flags & IFF_PROMISC) {
    		/* Apply Rx filter rule to get traffic from wire */
    		if (!ice_is_dflt_vsi_in_use(pf->first_sw)) {
@@@ -384,12 -395,12 +384,12 @@@
    goto exit;
out_promisc:
 -	set_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
 +	set_bit(ICE_VSI_PROMISC_CHANGED, vsi->state);
    goto exit;
  out:
    /* if something went wrong then set the changed flag so we try again */
 -	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
 -	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
 +	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
 +	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
  exit:
    clear_bit(__ICE_CFG_BUSY, vsi->state);
    return err;
@@@ -436,6 -447,7 +436,6 @@@ static void ice_pf_dis_all_vsi(struct i
for (node = 0; node < ICE_MAX_VF_AGG_NODES; node++)
    	pf->vf_agg_node[node].num_vsis = 0;
 -
  }
/**
@@@ -597,7 -609,7 +597,7 @@@ static void ice_print_topo_conflict(str
    case ICE_AQ_LINK_TOPO_UNREACH_PRT:
    case ICE_AQ_LINK_TOPO_UNDRUTIL_PRT:
    case ICE_AQ_LINK_TOPO_UNDRUTIL_MEDIA:
 -		netdev_info(vsi->netdev, "Possible mis-configuration of the Ethernet port detected, please use the Intel(R) Ethernet Port Configuration Tool application to address the issue.\n");
 +		netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n");
    	break;
    case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA:
    	netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
@@@ -719,7 -731,7 +719,7 @@@ void ice_print_link_msg(struct ice_vsi 
    }
status = ice_aq_get_phy_caps(vsi->port_info, false,
 -				     ICE_AQC_REPORT_SW_CFG, caps, NULL);
 +				     ICE_AQC_REPORT_ACTIVE_CFG, caps, NULL);
    if (status)
    	netdev_info(vsi->netdev, "Get phy capability failed.\n");
@@@ -752,7 -764,7 +752,7 @@@ static void ice_vsi_link_event(struct i
    if (!vsi)
    	return;
-	if (test_bit(__ICE_DOWN, vsi->state) || !vsi->netdev)
 +	if (test_bit(ICE_VSI_DOWN, vsi->state) || !vsi->netdev)
    	return;
if (vsi->type == ICE_VSI_PF) {
@@@ -872,10 -884,10 +872,10 @@@ ice_link_event(struct ice_pf *pf, struc
  {
    struct device *dev = ice_pf_to_dev(pf);
    struct ice_phy_info *phy_info;
 +	enum ice_status status;
    struct ice_vsi *vsi;
    u16 old_link_speed;
    bool old_link;
 -	int result;
phy_info = &pi->phy;
    phy_info->link_info_old = phy_info->link_info;
@@@ -886,11 -898,10 +886,11 @@@
    /* update the link info structures and re-enable link events,
     * don't bail on failure due to other book keeping needed
     */
 -	result = ice_update_link_info(pi);
 -	if (result)
 -		dev_dbg(dev, "Failed to update link status and re-enable link events for port %d\n",
 -			pi->lport);
 +	status = ice_update_link_info(pi);
 +	if (status)
 +		dev_dbg(dev, "Failed to update link status on port %d, err %s aq_err %s\n",
 +			pi->lport, ice_stat_str(status),
 +			ice_aq_str(pi->hw->adminq.sq_last_status));
/* Check if the link state is up after updating link info, and treat
     * this event as an UP event since the link is actually UP now.
@@@ -906,12 -917,18 +906,12 @@@
    if (!test_bit(ICE_FLAG_NO_MEDIA, pf->flags) &&
        !(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE)) {
    	set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
 -
 -		result = ice_aq_set_link_restart_an(pi, false, NULL);
 -		if (result) {
 -			dev_dbg(dev, "Failed to set link down, VSI %d error %d\n",
 -				vsi->vsi_num, result);
 -			return result;
 -		}
 +		ice_set_link(vsi, false);
    }
/* if the old link up/down and speed is the same as the new */
    if (link_up == old_link && link_speed == old_link_speed)
 -		return result;
 +		return 0;
if (ice_is_dcb_active(pf)) {
    	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
@@@ -925,7 -942,7 +925,7 @@@
ice_vc_notify_link_state(pf);
-	return result;
 +	return 0;
  }
/**
@@@ -1027,7 -1044,7 +1027,7 @@@ struct ice_aq_task 
  };
/**
 - * ice_wait_for_aq_event - Wait for an AdminQ event from firmware
 + * ice_aq_wait_for_event - Wait for an AdminQ event from firmware
   * @pf: pointer to the PF private structure
   * @opcode: the opcode to wait for
   * @timeout: how long to wait, in jiffies
@@@ -1625,7 -1642,7 +1625,7 @@@ static int ice_force_phys_link_state(st
    if (!pcaps)
    	return -ENOMEM;
-	retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
 +	retcode = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
    			      NULL);
    if (retcode) {
    	dev_err(dev, "Failed to get phy capabilities, VSI %d error %d\n",
@@@ -1685,7 -1702,7 +1685,7 @@@ static int ice_init_nvm_phy_type(struc
    if (!pcaps)
    	return -ENOMEM;
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_NVM_CAP, pcaps,
 +	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_NO_MEDIA, pcaps,
    			     NULL);
if (status) {
@@@ -1731,7 -1748,7 +1731,7 @@@ static void ice_init_link_dflt_override
   * ice_init_phy_cfg_dflt_override - Initialize PHY cfg default override settings
   * @pi: port info structure
   *
 - * If default override is enabled, initialized the user PHY cfg speed and FEC
 + * If default override is enabled, initialize the user PHY cfg speed and FEC
   * settings using the default override mask from the NVM.
   *
   * The PHY should only be configured with the default override settings the
@@@ -1740,9 -1757,6 +1740,9 @@@
   * and the PHY has not been configured with the default override settings. The
   * state is set here, and cleared in ice_configure_phy the first time the PHY is
   * configured.
 + *
 + * This function should be called only if the FW doesn't support default
 + * configuration mode, as reported by ice_fw_supports_report_dflt_cfg.
   */
  static void ice_init_phy_cfg_dflt_override(struct ice_port_info *pi)
  {
@@@ -1790,21 -1804,22 +1790,21 @@@ static int ice_init_phy_user_cfg(struc
    struct ice_phy_info *phy = &pi->phy;
    struct ice_pf *pf = pi->hw->back;
    enum ice_status status;
 -	struct ice_vsi *vsi;
    int err = 0;
if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
    	return -EIO;
-	vsi = ice_get_main_vsi(pf);
 -	if (!vsi)
 -		return -EINVAL;
 -
    pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
    if (!pcaps)
    	return -ENOMEM;
-	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
 -				     NULL);
 +	if (ice_fw_supports_report_dflt_cfg(pi->hw))
 +		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
 +					     pcaps, NULL);
 +	else
 +		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
 +					     pcaps, NULL);
    if (status) {
    	dev_err(ice_pf_to_dev(pf), "Get PHY capability failed.\n");
    	err = -EIO;
@@@ -1814,24 -1829,22 +1814,24 @@@
    ice_copy_phy_caps_to_cfg(pi, pcaps, &pi->phy.curr_user_phy_cfg);
/* check if lenient mode is supported and enabled */
 -	if (ice_fw_supports_link_override(&vsi->back->hw) &&
 +	if (ice_fw_supports_link_override(pi->hw) &&
        !(pcaps->module_compliance_enforcement &
          ICE_AQC_MOD_ENFORCE_STRICT_MODE)) {
    	set_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags);
-		/* if link default override is enabled, initialize user PHY
 -		 * configuration with link default override values
 +		/* if the FW supports default PHY configuration mode, then the driver
 +		 * does not have to apply link override settings. If not,
 +		 * initialize user PHY configuration with link override values
    	 */
 -		if (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN) {
 +		if (!ice_fw_supports_report_dflt_cfg(pi->hw) &&
 +		    (pf->link_dflt_override.options & ICE_LINK_OVERRIDE_EN)) {
    		ice_init_phy_cfg_dflt_override(pi);
    		goto out;
    	}
    }
-	/* if link default override is not enabled, initialize PHY using
 -	 * topology with media
 +	/* if link default override is not enabled, set user flow control and
 +	 * FEC settings based on what get_phy_caps returned
     */
    phy->curr_user_fec_req = ice_caps_to_fec_mode(pcaps->caps,
    					      pcaps->link_fec_options);
@@@ -1856,24 -1869,27 +1856,24 @@@ err_out
  static int ice_configure_phy(struct ice_vsi *vsi)
  {
    struct device *dev = ice_pf_to_dev(vsi->back);
 +	struct ice_port_info *pi = vsi->port_info;
    struct ice_aqc_get_phy_caps_data *pcaps;
    struct ice_aqc_set_phy_cfg_data *cfg;
 -	struct ice_port_info *pi;
 +	struct ice_phy_info *phy = &pi->phy;
 +	struct ice_pf *pf = vsi->back;
    enum ice_status status;
    int err = 0;
-	pi = vsi->port_info;
 -	if (!pi)
 -		return -EINVAL;
 -
    /* Ensure we have media as we cannot configure a medialess port */
 -	if (!(pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
 +	if (!(phy->link_info.link_info & ICE_AQ_MEDIA_AVAILABLE))
    	return -EPERM;
ice_print_topo_conflict(vsi);
-	if (vsi->port_info->phy.link_info.topo_media_conflict ==
 -	    ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
 +	if (phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA)
    	return -EPERM;
-	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
 +	if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))
    	return ice_force_phys_link_state(vsi, true);
pcaps = kzalloc(sizeof(*pcaps), GFP_KERNEL);
@@@ -1881,7 -1897,7 +1881,7 @@@
    	return -ENOMEM;
/* Get current PHY config */
 -	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_SW_CFG, pcaps,
 +	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_ACTIVE_CFG, pcaps,
    			     NULL);
    if (status) {
    	dev_err(dev, "Failed to get PHY configuration, VSI %d error %s\n",
@@@ -1894,19 -1910,15 +1894,19 @@@
     * there's nothing to do
     */
    if (pcaps->caps & ICE_AQC_PHY_EN_LINK &&
 -	    ice_phy_caps_equals_cfg(pcaps, &pi->phy.curr_user_phy_cfg))
 +	    ice_phy_caps_equals_cfg(pcaps, &phy->curr_user_phy_cfg))
    	goto done;
/* Use PHY topology as baseline for configuration */
    memset(pcaps, 0, sizeof(*pcaps));
 -	status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP, pcaps,
 -				     NULL);
 +	if (ice_fw_supports_report_dflt_cfg(pi->hw))
 +		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_DFLT_CFG,
 +					     pcaps, NULL);
 +	else
 +		status = ice_aq_get_phy_caps(pi, false, ICE_AQC_REPORT_TOPO_CAP_MEDIA,
 +					     pcaps, NULL);
    if (status) {
 -		dev_err(dev, "Failed to get PHY topology, VSI %d error %s\n",
 +		dev_err(dev, "Failed to get PHY caps, VSI %d error %s\n",
    		vsi->vsi_num, ice_stat_str(status));
    	err = -EIO;
    	goto done;
@@@ -1925,8 -1937,8 +1925,8 @@@
     */
    if (test_and_clear_bit(__ICE_LINK_DEFAULT_OVERRIDE_PENDING,
    		       vsi->back->state)) {
 -		cfg->phy_type_low = pi->phy.curr_user_phy_cfg.phy_type_low;
 -		cfg->phy_type_high = pi->phy.curr_user_phy_cfg.phy_type_high;
 +		cfg->phy_type_low = phy->curr_user_phy_cfg.phy_type_low;
 +		cfg->phy_type_high = phy->curr_user_phy_cfg.phy_type_high;
    } else {
    	u64 phy_low = 0, phy_high = 0;
@@@ -1944,7 -1956,7 +1944,7 @@@
    }
/* FEC */
 -	ice_cfg_phy_fec(pi, cfg, pi->phy.curr_user_fec_req);
 +	ice_cfg_phy_fec(pi, cfg, phy->curr_user_fec_req);
/* Can't provide what was requested; use PHY capabilities */
    if (cfg->link_fec_opt !=
@@@ -1956,12 -1968,12 +1956,12 @@@
    /* Flow Control - always supported; no need to check against
     * capabilities
     */
 -	ice_cfg_phy_fc(pi, cfg, pi->phy.curr_user_fc_req);
 +	ice_cfg_phy_fc(pi, cfg, phy->curr_user_fc_req);
/* Enable link and link update */
    cfg->caps |= ICE_AQ_PHY_ENA_AUTO_LINK_UPDT | ICE_AQ_PHY_ENA_LINK;
-	status = ice_aq_set_phy_cfg(&vsi->back->hw, pi, cfg, NULL);
 +	status = ice_aq_set_phy_cfg(&pf->hw, pi, cfg, NULL);
    if (status) {
    	dev_err(dev, "Failed to set phy config, VSI %d error %s\n",
    		vsi->vsi_num, ice_stat_str(status));
@@@ -2008,7 -2020,7 +2008,7 @@@ static void ice_check_media_subtask(str
    	/* PHY settings are reset on media insertion, reconfigure
    	 * PHY to preserve settings.
    	 */
 -		if (test_bit(__ICE_DOWN, vsi->state) &&
 +		if (test_bit(ICE_VSI_DOWN, vsi->state) &&
    	    test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags))
    		return;
@@@ -2059,7 -2071,6 +2059,7 @@@ static void ice_service_task(struct wor
    ice_process_vflr_event(pf);
    ice_clean_mailboxq_subtask(pf);
    ice_sync_arfs_fltrs(pf);
 +	ice_flush_fdir_ctx(pf);
    /* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
    ice_service_task_complete(pf);
@@@ -2071,7 -2082,6 +2071,7 @@@
        test_bit(__ICE_MDD_EVENT_PENDING, pf->state) ||
        test_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
        test_bit(__ICE_MAILBOXQ_EVENT_PENDING, pf->state) ||
 +	    test_bit(__ICE_FD_VF_FLUSH_CTX, pf->state) ||
        test_bit(__ICE_ADMINQ_EVENT_PENDING, pf->state))
    	mod_timer(&pf->serv_tmr, jiffies);
  }
@@@ -2210,13 -2220,8 +2210,13 @@@ static int ice_vsi_req_irq_msix(struct 
    		/* skip this unused q_vector */
    		continue;
    	}
 -		err = devm_request_irq(dev, irq_num, vsi->irq_handler, 0,
 -				       q_vector->name, q_vector);
 +		if (vsi->type == ICE_VSI_CTRL && vsi->vf_id != ICE_INVAL_VFID)
 +			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
 +					       IRQF_SHARED, q_vector->name,
 +					       q_vector);
 +		else
 +			err = devm_request_irq(dev, irq_num, vsi->irq_handler,
 +					       0, q_vector->name, q_vector);
    	if (err) {
    		netdev_err(vsi->netdev, "MSIX request_irq failed, error: %d\n",
    			   err);
@@@ -2519,7 -2524,7 +2519,7 @@@ ice_xdp_setup_prog(struct ice_vsi *vsi
    }
/* need to stop netdev while setting up the program for Rx rings */
 -	if (if_running && !test_and_set_bit(__ICE_DOWN, vsi->state)) {
 +	if (if_running && !test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
    	ret = ice_down(vsi);
    	if (ret) {
    		NL_SET_ERR_MSG_MOD(extack, "Preparing device for XDP attach failed");
@@@ -2970,11 -2975,18 +2970,11 @@@ static int ice_cfg_netdev(struct ice_vs
    struct ice_netdev_priv *np;
    struct net_device *netdev;
    u8 mac_addr[ETH_ALEN];
 -	int err;
 -
 -	err = ice_devlink_create_port(vsi);
 -	if (err)
 -		return err;
netdev = alloc_etherdev_mqs(sizeof(*np), vsi->alloc_txq,
    			    vsi->alloc_rxq);
 -	if (!netdev) {
 -		err = -ENOMEM;
 -		goto err_destroy_devlink_port;
 -	}
 +	if (!netdev)
 +		return -ENOMEM;
vsi->netdev = netdev;
    np = netdev_priv(netdev);
@@@ -3002,7 -3014,25 +3002,7 @@@
    netdev->min_mtu = ETH_MIN_MTU;
    netdev->max_mtu = ICE_MAX_MTU;
-	err = register_netdev(vsi->netdev);
 -	if (err)
 -		goto err_free_netdev;
 -
 -	devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev);
 -
 -	netif_carrier_off(vsi->netdev);
 -
 -	/* make sure transmit queues start off as stopped */
 -	netif_tx_stop_all_queues(vsi->netdev);
 -
    return 0;
 -
 -err_free_netdev:
 -	free_netdev(vsi->netdev);
 -	vsi->netdev = NULL;
 -err_destroy_devlink_port:
 -	ice_devlink_destroy_port(vsi);
 -	return err;
  }
/**
@@@ -3077,6 -3107,15 +3077,6 @@@ ice_vlan_rx_add_vid(struct net_device *
    struct ice_vsi *vsi = np->vsi;
    int ret;
-	if (vid >= VLAN_N_VID) {
 -		netdev_err(netdev, "VLAN id requested %d is out of range %d\n",
 -			   vid, VLAN_N_VID);
 -		return -EINVAL;
 -	}
 -
 -	if (vsi->info.pvid)
 -		return -EINVAL;
 -
    /* VLAN 0 is added by default during load/reset */
    if (!vid)
    	return 0;
@@@ -3093,7 -3132,7 +3093,7 @@@
     */
    ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
    if (!ret)
 -		set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
 +		set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
return ret;
  }
@@@ -3114,6 -3153,9 +3114,6 @@@ ice_vlan_rx_kill_vid(struct net_device 
    struct ice_vsi *vsi = np->vsi;
    int ret;
-	if (vsi->info.pvid)
 -		return -EINVAL;
 -
    /* don't allow removal of VLAN 0 */
    if (!vid)
    	return 0;
@@@ -3129,7 -3171,7 +3129,7 @@@
    if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi))
    	ret = ice_cfg_vlan_pruning(vsi, false, false);
-	set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
 +	set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state);
    return ret;
  }
@@@ -3188,6 -3230,8 +3188,6 @@@ unroll_napi_add
    if (vsi) {
    	ice_napi_del(vsi);
    	if (vsi->netdev) {
 -			if (vsi->netdev->reg_state == NETREG_REGISTERED)
 -				unregister_netdev(vsi->netdev);
    		free_netdev(vsi->netdev);
    		vsi->netdev = NULL;
    	}
@@@ -3493,15 -3537,14 +3493,14 @@@ static int ice_init_interrupt_scheme(st
  }
/**
-  * ice_is_wol_supported - get NVM state of WoL
-  * @pf: board private structure
+  * ice_is_wol_supported - check if WoL is supported
+  * @hw: pointer to hardware info
   *
   * Check if WoL is supported based on the HW configuration.
   * Returns true if NVM supports and enables WoL for this port, false otherwise
   */
- bool ice_is_wol_supported(struct ice_pf *pf)
+ bool ice_is_wol_supported(struct ice_hw *hw)
  {
- 	struct ice_hw *hw = &pf->hw;
    u16 wol_ctrl;
/* A bit set to 1 in the NVM Software Reserved Word 2 (WoL control
@@@ -3510,7 -3553,7 +3509,7 @@@
    if (ice_read_sr_word(hw, ICE_SR_NVM_WOL_CFG, &wol_ctrl))
    	return false;
- 	return !(BIT(hw->pf_id) & wol_ctrl);
+ 	return !(BIT(hw->port_info->lport) & wol_ctrl);
  }
/**
@@@ -3941,40 -3984,6 +3940,40 @@@ static void ice_print_wake_reason(struc
    dev_info(ice_pf_to_dev(pf), "Wake reason: %s", wake_str);
  }
+/**
 + * ice_register_netdev - register netdev and devlink port
 + * @pf: pointer to the PF struct
 + */
 +static int ice_register_netdev(struct ice_pf *pf)
 +{
 +	struct ice_vsi *vsi;
 +	int err = 0;
 +
 +	vsi = ice_get_main_vsi(pf);
 +	if (!vsi || !vsi->netdev)
 +		return -EIO;
 +
 +	err = register_netdev(vsi->netdev);
 +	if (err)
 +		goto err_register_netdev;
 +
 +	netif_carrier_off(vsi->netdev);
 +	netif_tx_stop_all_queues(vsi->netdev);
 +	err = ice_devlink_create_port(vsi);
 +	if (err)
 +		goto err_devlink_create;
 +
 +	devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev);
 +
 +	return 0;
 +err_devlink_create:
 +	unregister_netdev(vsi->netdev);
 +err_register_netdev:
 +	free_netdev(vsi->netdev);
 +	vsi->netdev = NULL;
 +	return err;
 +}
 +
  /**
   * ice_probe - Device initialization routine
   * @pdev: PCI device information struct
@@@ -4182,28 -4191,25 +4181,25 @@@ ice_probe(struct pci_dev *pdev, const s
    	goto err_send_version_unroll;
    }
+ 	/* not a fatal error if this fails */
    err = ice_init_nvm_phy_type(pf->hw.port_info);
- 	if (err) {
+ 	if (err)
    	dev_err(dev, "ice_init_nvm_phy_type failed: %d\n", err);
- 		goto err_send_version_unroll;
- 	}
+ 	/* not a fatal error if this fails */
    err = ice_update_link_info(pf->hw.port_info);
- 	if (err) {
+ 	if (err)
    	dev_err(dev, "ice_update_link_info failed: %d\n", err);
- 		goto err_send_version_unroll;
- 	}
ice_init_link_dflt_override(pf->hw.port_info);
/* if media available, initialize PHY settings */
    if (pf->hw.port_info->phy.link_info.link_info &
        ICE_AQ_MEDIA_AVAILABLE) {
+ 		/* not a fatal error if this fails */
    	err = ice_init_phy_user_cfg(pf->hw.port_info);
- 		if (err) {
+ 		if (err)
    		dev_err(dev, "ice_init_phy_user_cfg failed: %d\n", err);
- 			goto err_send_version_unroll;
- 		}
if (!test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) {
    		struct ice_vsi *vsi = ice_get_main_vsi(pf);
@@@ -4255,16 -4261,10 +4251,16 @@@
    pcie_print_link_status(pf->pdev);
probe_done:
 +	err = ice_register_netdev(pf);
 +	if (err)
 +		goto err_netdev_reg;
 +
    /* ready to go, so clear down state bit */
    clear_bit(__ICE_DOWN, pf->state);
 +
    return 0;
+err_netdev_reg:
  err_send_version_unroll:
    ice_vsi_release_all(pf);
  err_alloc_sw_unroll:
@@@ -4310,7 -4310,7 +4306,7 @@@ static void ice_set_wake(struct ice_pf 
  }
/**
 - * ice_setup_magic_mc_wake - setup device to wake on multicast magic packet
 + * ice_setup_mc_magic_wake - setup device to wake on multicast magic packet
   * @pf: pointer to the PF struct
   *
   * Issue firmware command to enable multicast magic wake, making
@@@ -4564,6 -4564,7 +4560,7 @@@ static int __maybe_unused ice_suspend(s
    		continue;
    	ice_vsi_free_q_vectors(pf->vsi[v]);
    }
+ 	ice_free_cpu_rx_rmap(ice_get_main_vsi(pf));
    ice_clear_interrupt_scheme(pf);
pci_save_state(pdev);
@@@ -4960,8 -4961,8 +4957,8 @@@ static void ice_set_rx_mode(struct net_
     * ndo_set_rx_mode may be triggered even without a change in netdev
     * flags
     */
 -	set_bit(ICE_VSI_FLAG_UMAC_FLTR_CHANGED, vsi->flags);
 -	set_bit(ICE_VSI_FLAG_MMAC_FLTR_CHANGED, vsi->flags);
 +	set_bit(ICE_VSI_UMAC_FLTR_CHANGED, vsi->state);
 +	set_bit(ICE_VSI_MMAC_FLTR_CHANGED, vsi->state);
    set_bit(ICE_FLAG_FLTR_SYNC, vsi->back->flags);
/* schedule our worker thread which will take care of
@@@ -5234,7 -5235,7 +5231,7 @@@ static int ice_up_complete(struct ice_v
    if (err)
    	return err;
-	clear_bit(__ICE_DOWN, vsi->state);
 +	clear_bit(ICE_VSI_DOWN, vsi->state);
    ice_napi_enable_all(vsi);
    ice_vsi_ena_irq(vsi);
@@@ -5341,6 -5342,7 +5338,6 @@@ static void ice_update_vsi_ring_stats(s
    vsi->tx_linearize = 0;
    vsi->rx_buf_failed = 0;
    vsi->rx_page_failed = 0;
 -	vsi->rx_gro_dropped = 0;
rcu_read_lock();
@@@ -5355,6 -5357,7 +5352,6 @@@
    	vsi_stats->rx_bytes += bytes;
    	vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed;
    	vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
 -		vsi->rx_gro_dropped += ring->rx_stats.gro_dropped;
    }
/* update XDP Tx rings counters */
@@@ -5375,7 -5378,7 +5372,7 @@@ void ice_update_vsi_stats(struct ice_vs
    struct ice_eth_stats *cur_es = &vsi->eth_stats;
    struct ice_pf *pf = vsi->back;
-	if (test_bit(__ICE_DOWN, vsi->state) ||
 +	if (test_bit(ICE_VSI_DOWN, vsi->state) ||
        test_bit(__ICE_CFG_BUSY, pf->state))
    	return;
@@@ -5386,7 -5389,7 +5383,7 @@@
    ice_update_eth_stats(vsi);
cur_ns->tx_errors = cur_es->tx_errors;
 -	cur_ns->rx_dropped = cur_es->rx_discards + vsi->rx_gro_dropped;
 +	cur_ns->rx_dropped = cur_es->rx_discards;
    cur_ns->tx_dropped = cur_es->tx_discards;
    cur_ns->multicast = cur_es->rx_multicast;
@@@ -5580,7 -5583,7 +5577,7 @@@ void ice_get_stats64(struct net_device 
     * But, only call the update routine and read the registers if VSI is
     * not down.
     */
 -	if (!test_bit(__ICE_DOWN, vsi->state))
 +	if (!test_bit(ICE_VSI_DOWN, vsi->state))
    	ice_update_vsi_ring_stats(vsi);
    stats->tx_packets = vsi_stats->tx_packets;
    stats->tx_bytes = vsi_stats->tx_bytes;
@@@ -5780,7 -5783,7 +5777,7 @@@ int ice_vsi_open_ctrl(struct ice_vsi *v
    if (err)
    	goto err_up_complete;
-	clear_bit(__ICE_DOWN, vsi->state);
 +	clear_bit(ICE_VSI_DOWN, vsi->state);
    ice_vsi_ena_irq(vsi);
return 0;
@@@ -6167,7 -6170,7 +6164,7 @@@ static int ice_change_mtu(struct net_de
    netdev->mtu = (unsigned int)new_mtu;
/* if VSI is up, bring it down and then back up */
 -	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
 +	if (!test_and_set_bit(ICE_VSI_DOWN, vsi->state)) {
    	int err;
err = ice_down(vsi);
@@@ -6302,118 -6305,89 +6299,118 @@@ const char *ice_stat_str(enum ice_statu
  }
/**
 - * ice_set_rss - Set RSS keys and lut
 + * ice_set_rss_lut - Set RSS LUT
   * @vsi: Pointer to VSI structure
 - * @seed: RSS hash seed
   * @lut: Lookup table
   * @lut_size: Lookup table size
   *
   * Returns 0 on success, negative on failure
   */
 -int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 +int ice_set_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
  {
 -	struct ice_pf *pf = vsi->back;
 -	struct ice_hw *hw = &pf->hw;
 +	struct ice_aq_get_set_rss_lut_params params = {};
 +	struct ice_hw *hw = &vsi->back->hw;
    enum ice_status status;
 -	struct device *dev;
-	dev = ice_pf_to_dev(pf);
 -	if (seed) {
 -		struct ice_aqc_get_set_rss_keys *buf =
 -				  (struct ice_aqc_get_set_rss_keys *)seed;
 +	if (!lut)
 +		return -EINVAL;
-		status = ice_aq_set_rss_key(hw, vsi->idx, buf);
 +	params.vsi_handle = vsi->idx;
 +	params.lut_size = lut_size;
 +	params.lut_type = vsi->rss_lut_type;
 +	params.lut = lut;
-		if (status) {
 -			dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n",
 -				ice_stat_str(status),
 -				ice_aq_str(hw->adminq.sq_last_status));
 -			return -EIO;
 -		}
 +	status = ice_aq_set_rss_lut(hw, &params);
 +	if (status) {
 +		dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS lut, err %s aq_err %s\n",
 +			ice_stat_str(status),
 +			ice_aq_str(hw->adminq.sq_last_status));
 +		return -EIO;
    }
-	if (lut) {
 -		status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
 -					    lut, lut_size);
 -		if (status) {
 -			dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
 -				ice_stat_str(status),
 -				ice_aq_str(hw->adminq.sq_last_status));
 -			return -EIO;
 -		}
 +	return 0;
 +}
 +
 +/**
 + * ice_set_rss_key - Set RSS key
 + * @vsi: Pointer to the VSI structure
 + * @seed: RSS hash seed
 + *
 + * Returns 0 on success, negative on failure
 + */
 +int ice_set_rss_key(struct ice_vsi *vsi, u8 *seed)
 +{
 +	struct ice_hw *hw = &vsi->back->hw;
 +	enum ice_status status;
 +
 +	if (!seed)
 +		return -EINVAL;
 +
 +	status = ice_aq_set_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
 +	if (status) {
 +		dev_err(ice_pf_to_dev(vsi->back), "Cannot set RSS key, err %s aq_err %s\n",
 +			ice_stat_str(status),
 +			ice_aq_str(hw->adminq.sq_last_status));
 +		return -EIO;
    }
return 0;
  }
/**
 - * ice_get_rss - Get RSS keys and lut
 + * ice_get_rss_lut - Get RSS LUT
   * @vsi: Pointer to VSI structure
 - * @seed: Buffer to store the keys
   * @lut: Buffer to store the lookup table entries
   * @lut_size: Size of buffer to store the lookup table entries
   *
   * Returns 0 on success, negative on failure
   */
 -int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 +int ice_get_rss_lut(struct ice_vsi *vsi, u8 *lut, u16 lut_size)
  {
 -	struct ice_pf *pf = vsi->back;
 -	struct ice_hw *hw = &pf->hw;
 +	struct ice_aq_get_set_rss_lut_params params = {};
 +	struct ice_hw *hw = &vsi->back->hw;
    enum ice_status status;
 -	struct device *dev;
-	dev = ice_pf_to_dev(pf);
 -	if (seed) {
 -		struct ice_aqc_get_set_rss_keys *buf =
 -				  (struct ice_aqc_get_set_rss_keys *)seed;
 +	if (!lut)
 +		return -EINVAL;
-		status = ice_aq_get_rss_key(hw, vsi->idx, buf);
 -		if (status) {
 -			dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n",
 -				ice_stat_str(status),
 -				ice_aq_str(hw->adminq.sq_last_status));
 -			return -EIO;
 -		}
 +	params.vsi_handle = vsi->idx;
 +	params.lut_size = lut_size;
 +	params.lut_type = vsi->rss_lut_type;
 +	params.lut = lut;
 +
 +	status = ice_aq_get_rss_lut(hw, &params);
 +	if (status) {
 +		dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS lut, err %s aq_err %s\n",
 +			ice_stat_str(status),
 +			ice_aq_str(hw->adminq.sq_last_status));
 +		return -EIO;
    }
-	if (lut) {
 -		status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
 -					    lut, lut_size);
 -		if (status) {
 -			dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n",
 -				ice_stat_str(status),
 -				ice_aq_str(hw->adminq.sq_last_status));
 -			return -EIO;
 -		}
 +	return 0;
 +}
 +
 +/**
 + * ice_get_rss_key - Get RSS key
 + * @vsi: Pointer to VSI structure
 + * @seed: Buffer to store the key in
 + *
 + * Returns 0 on success, negative on failure
 + */
 +int ice_get_rss_key(struct ice_vsi *vsi, u8 *seed)
 +{
 +	struct ice_hw *hw = &vsi->back->hw;
 +	enum ice_status status;
 +
 +	if (!seed)
 +		return -EINVAL;
 +
 +	status = ice_aq_get_rss_key(hw, vsi->idx, (struct ice_aqc_get_set_rss_keys *)seed);
 +	if (status) {
 +		dev_err(ice_pf_to_dev(vsi->back), "Cannot get RSS key, err %s aq_err %s\n",
 +			ice_stat_str(status),
 +			ice_aq_str(hw->adminq.sq_last_status));
 +		return -EIO;
    }
return 0;
@@@ -6636,7 -6610,7 +6633,7 @@@ static void ice_tx_timeout(struct net_d
    default:
    	netdev_err(netdev, "tx_timeout recovery unsuccessful, device is in unrecoverable state.\n");
    	set_bit(__ICE_DOWN, pf->state);
 -		set_bit(__ICE_NEEDS_RESTART, vsi->state);
 +		set_bit(ICE_VSI_NEEDS_RESTART, vsi->state);
    	set_bit(__ICE_SERVICE_DIS, pf->state);
    	break;
    }
@@@ -6658,12 -6632,33 +6655,34 @@@
   * Returns 0 on success, negative value on failure
   */
  int ice_open(struct net_device *netdev)
+ {
+ 	struct ice_netdev_priv *np = netdev_priv(netdev);
+ 	struct ice_pf *pf = np->vsi->back;
+ 
+ 	if (ice_is_reset_in_progress(pf->state)) {
+ 		netdev_err(netdev, "can't open net device while reset is in progress");
+ 		return -EBUSY;
+ 	}
+ 
+ 	return ice_open_internal(netdev);
+ }
+ 
+ /**
+  * ice_open_internal - Called when a network interface becomes active
+  * @netdev: network interface device structure
+  *
+  * Internal ice_open implementation. Should not be used directly except for ice_open and reset
+  * handling routine
+  *
+  * Returns 0 on success, negative value on failure
+  */
+ int ice_open_internal(struct net_device *netdev)
  {
    struct ice_netdev_priv *np = netdev_priv(netdev);
    struct ice_vsi *vsi = np->vsi;
    struct ice_pf *pf = vsi->back;
    struct ice_port_info *pi;
 +	enum ice_status status;
    int err;
if (test_bit(__ICE_NEEDS_RESTART, pf->state)) {
@@@ -6671,14 -6666,19 +6690,14 @@@
    	return -EIO;
    }
-	if (test_bit(__ICE_DOWN, pf->state)) {
 -		netdev_err(netdev, "device is not ready yet\n");
 -		return -EBUSY;
 -	}
 -
    netif_carrier_off(netdev);
pi = vsi->port_info;
 -	err = ice_update_link_info(pi);
 -	if (err) {
 -		netdev_err(netdev, "Failed to get link info, error %d\n",
 -			   err);
 -		return err;
 +	status = ice_update_link_info(pi);
 +	if (status) {
 +		netdev_err(netdev, "Failed to get link info, error %s\n",
 +			   ice_stat_str(status));
 +		return -EIO;
    }
/* Set PHY if there is media, otherwise, turn off PHY */
@@@ -6701,7 -6701,12 +6720,7 @@@
    	}
    } else {
    	set_bit(ICE_FLAG_NO_MEDIA, pf->flags);
 -		err = ice_aq_set_link_restart_an(pi, false, NULL);
 -		if (err) {
 -			netdev_err(netdev, "Failed to set PHY state, VSI %d error %d\n",
 -				   vsi->vsi_num, err);
 -			return err;
 -		}
 +		ice_set_link(vsi, false);
    }
err = ice_vsi_open(vsi);
@@@ -6729,6 -6734,12 +6748,12 @@@ int ice_stop(struct net_device *netdev
  {
    struct ice_netdev_priv *np = netdev_priv(netdev);
    struct ice_vsi *vsi = np->vsi;
+ 	struct ice_pf *pf = vsi->back;
+ 
+ 	if (ice_is_reset_in_progress(pf->state)) {
+ 		netdev_err(netdev, "can't stop net device while reset is in progress");
+ 		return -EBUSY;
+ 	}
ice_vsi_close(vsi);
diff --combined drivers/net/ethernet/intel/ice/ice_switch.c
index 5e5683a3eb23,834cbd3f7b31..357d3073d814
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@@ -920,7 -920,7 +920,7 @@@ ice_create_vsi_list_map(struct ice_hw *
    struct ice_vsi_list_map_info *v_map;
    int i;
-	v_map = devm_kcalloc(ice_hw_to_dev(hw), 1, sizeof(*v_map), GFP_KERNEL);
 +	v_map = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*v_map), GFP_KERNEL);
    if (!v_map)
    	return NULL;
@@@ -1238,6 -1238,9 +1238,9 @@@ ice_add_update_vsi_list(struct ice_hw *
    		ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2,
    					vsi_list_id);
+ 		if (!m_entry->vsi_list_info)
+ 			return ICE_ERR_NO_MEMORY;
+ 
    	/* If this entry was large action then the large action needs
    	 * to be updated to point to FWD to VSI list
    	 */
@@@ -2220,6 -2223,7 +2223,7 @@@ ice_vsi_uses_fltr(struct ice_fltr_mgmt_
    return ((fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI &&
    	 fm_entry->fltr_info.vsi_handle == vsi_handle) ||
    	(fm_entry->fltr_info.fltr_act == ICE_FWD_TO_VSI_LIST &&
+ 		 fm_entry->vsi_list_info &&
    	 (test_bit(vsi_handle, fm_entry->vsi_list_info->vsi_map))));
  }
@@@ -2292,14 -2296,12 +2296,12 @@@ ice_add_to_vsi_fltr_list(struct ice_hw 
    	return ICE_ERR_PARAM;
list_for_each_entry(fm_entry, lkup_list_head, list_entry) {
- 		struct ice_fltr_info *fi;
- 
- 		fi = &fm_entry->fltr_info;
- 		if (!fi || !ice_vsi_uses_fltr(fm_entry, vsi_handle))
+ 		if (!ice_vsi_uses_fltr(fm_entry, vsi_handle))
    		continue;
status = ice_add_entry_to_vsi_fltr_list(hw, vsi_handle,
- 							vsi_list_head, fi);
+ 							vsi_list_head,
+ 							&fm_entry->fltr_info);
    	if (status)
    		return status;
    }
@@@ -2622,7 -2624,7 +2624,7 @@@ ice_remove_vsi_lkup_fltr(struct ice_hw 
    				  &remove_list_head);
    mutex_unlock(rule_lock);
    if (status)
- 		return;
+ 		goto free_fltr_list;
switch (lkup) {
    case ICE_SW_LKUP_MAC:
@@@ -2645,6 -2647,7 +2647,7 @@@
    	break;
    }
+ free_fltr_list:
    list_for_each_entry_safe(fm_entry, tmp, &remove_list_head, list_entry) {
    	list_del(&fm_entry->list_entry);
    	devm_kfree(ice_hw_to_dev(hw), fm_entry);
diff --combined drivers/net/ethernet/intel/ice/ice_type.h
index 2efc91b58c9e,266036b7a49a..7ead1c13f16f
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@@ -192,24 -192,6 +192,24 @@@ enum ice_fltr_ptype 
    ICE_FLTR_PTYPE_NONF_IPV4_TCP,
    ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
    ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
 +	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_UDP,
 +	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_TCP,
 +	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_ICMP,
 +	ICE_FLTR_PTYPE_NONF_IPV4_GTPU_IPV4_OTHER,
 +	ICE_FLTR_PTYPE_NONF_IPV6_GTPU_IPV6_OTHER,
 +	ICE_FLTR_PTYPE_NONF_IPV4_L2TPV3,
 +	ICE_FLTR_PTYPE_NONF_IPV6_L2TPV3,
 +	ICE_FLTR_PTYPE_NONF_IPV4_ESP,
 +	ICE_FLTR_PTYPE_NONF_IPV6_ESP,
 +	ICE_FLTR_PTYPE_NONF_IPV4_AH,
 +	ICE_FLTR_PTYPE_NONF_IPV6_AH,
 +	ICE_FLTR_PTYPE_NONF_IPV4_NAT_T_ESP,
 +	ICE_FLTR_PTYPE_NONF_IPV6_NAT_T_ESP,
 +	ICE_FLTR_PTYPE_NONF_IPV4_PFCP_NODE,
 +	ICE_FLTR_PTYPE_NONF_IPV4_PFCP_SESSION,
 +	ICE_FLTR_PTYPE_NONF_IPV6_PFCP_NODE,
 +	ICE_FLTR_PTYPE_NONF_IPV6_PFCP_SESSION,
 +	ICE_FLTR_PTYPE_NON_IP_L2,
    ICE_FLTR_PTYPE_FRAG_IPV4,
    ICE_FLTR_PTYPE_NONF_IPV6_UDP,
    ICE_FLTR_PTYPE_NONF_IPV6_TCP,
@@@ -553,6 -535,7 +553,7 @@@ struct ice_dcb_app_priority_table 
  #define ICE_TLV_STATUS_ERR	0x4
  #define ICE_APP_PROT_ID_FCOE	0x8906
  #define ICE_APP_PROT_ID_ISCSI	0x0cbc
+ #define ICE_APP_PROT_ID_ISCSI_860 0x035c
  #define ICE_APP_PROT_ID_FIP	0x8914
  #define ICE_APP_SEL_ETHTYPE	0x1
  #define ICE_APP_SEL_TCPIP	0x2
@@@ -720,13 -703,13 +721,13 @@@ struct ice_hw
enum ice_aq_err pkg_dwnld_status;
-	/* Driver's package ver - (from the Metadata seg) */
 +	/* Driver's package ver - (from the Ice Metadata section) */
    struct ice_pkg_ver pkg_ver;
    u8 pkg_name[ICE_PKG_NAME_SIZE];
-	/* Driver's Ice package version (from the Ice seg) */
 -	struct ice_pkg_ver ice_pkg_ver;
 -	u8 ice_pkg_name[ICE_PKG_NAME_SIZE];
 +	/* Driver's Ice segment format version and ID (from the Ice seg) */
 +	struct ice_pkg_ver ice_seg_fmt_ver;
 +	u8 ice_seg_id[ICE_SEG_ID_SIZE];
/* Pointer to the ice segment */
    struct ice_seg *seg;
@@@ -827,14 -810,6 +828,14 @@@ struct ice_hw_port_stats 
    u64 fd_sb_match;
  };
+struct ice_aq_get_set_rss_lut_params {
 +	u16 vsi_handle;		/* software VSI handle */
 +	u16 lut_size;		/* size of the LUT buffer */
 +	u8 lut_type;		/* type of the LUT (i.e. VSI, PF, Global) */
 +	u8 *lut;		/* input RSS LUT for set and output RSS LUT for get */
 +	u8 global_lut_id;	/* only valid when lut_type is global */
 +};
 +
  /* Checksum and Shadow RAM pointers */
  #define ICE_SR_NVM_CTRL_WORD		0x00
  #define ICE_SR_BOOT_CFG_PTR		0x132
@@@ -941,9 -916,4 +942,9 @@@
  #define ICE_FW_API_LLDP_FLTR_MIN	7
  #define ICE_FW_API_LLDP_FLTR_PATCH	1
+/* AQ API version for report default configuration */
 +#define ICE_FW_API_REPORT_DFLT_CFG_MAJ		1
 +#define ICE_FW_API_REPORT_DFLT_CFG_MIN		7
 +#define ICE_FW_API_REPORT_DFLT_CFG_PATCH	3
 +
  #endif /* _ICE_TYPE_H_ */
diff --combined drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 4def64d0e669,9153c9bda96f..a9166cd85013
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@@ -58,6 -58,9 +58,6 @@@ static bool is_eth_supported(struct mlx
    if (!IS_ENABLED(CONFIG_MLX5_CORE_EN))
    	return false;
-	if (is_eth_rep_supported(dev))
 -		return false;
 -
    if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
    	return false;
@@@ -188,12 -191,12 +188,12 @@@ static bool is_ib_supported(struct mlx5
  }
enum {
- 	MLX5_INTERFACE_PROTOCOL_ETH_REP,
    MLX5_INTERFACE_PROTOCOL_ETH,
+ 	MLX5_INTERFACE_PROTOCOL_ETH_REP,
+ 	MLX5_INTERFACE_PROTOCOL_IB,
    MLX5_INTERFACE_PROTOCOL_IB_REP,
    MLX5_INTERFACE_PROTOCOL_MPIB,
- 	MLX5_INTERFACE_PROTOCOL_IB,
MLX5_INTERFACE_PROTOCOL_VNET,
  };
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h
index b425b4a539bf,bc6f77ea0a31..e1c51eabe8fe
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@@ -269,7 -269,6 +269,7 @@@ struct mlx5e_params 
    struct mlx5e_xsk *xsk;
    unsigned int sw_mtu;
    int hard_mtu;
 +	bool ptp_rx;
  };
enum {
@@@ -517,6 -516,7 +517,7 @@@ struct mlx5e_icosq 
    struct mlx5_wq_cyc         wq;
    void __iomem              *uar_map;
    u32                        sqn;
+ 	u16                        reserved_room;
    unsigned long              state;
/* control path */
@@@ -708,11 -708,11 +709,11 @@@ struct mlx5e_channel 
    int                        cpu;
  };
-struct mlx5e_port_ptp;
 +struct mlx5e_ptp;
struct mlx5e_channels {
    struct mlx5e_channel **c;
 -	struct mlx5e_port_ptp  *port_ptp;
 +	struct mlx5e_ptp      *ptp;
    unsigned int           num;
    struct mlx5e_params    params;
  };
@@@ -727,11 -727,10 +728,11 @@@ struct mlx5e_channel_stats 
    struct mlx5e_xdpsq_stats xsksq;
  } ____cacheline_aligned_in_smp;
-struct mlx5e_port_ptp_stats {
 +struct mlx5e_ptp_stats {
    struct mlx5e_ch_stats ch;
    struct mlx5e_sq_stats sq[MLX5E_MAX_NUM_TC];
    struct mlx5e_ptp_cq_stats cq[MLX5E_MAX_NUM_TC];
 +	struct mlx5e_rq_stats rq;
  } ____cacheline_aligned_in_smp;
enum {
@@@ -838,7 -837,6 +839,7 @@@ struct mlx5e_priv 
    struct mlx5e_tir           inner_indir_tir[MLX5E_NUM_INDIR_TIRS];
    struct mlx5e_tir           direct_tir[MLX5E_MAX_NUM_CHANNELS];
    struct mlx5e_tir           xsk_tir[MLX5E_MAX_NUM_CHANNELS];
 +	struct mlx5e_tir           ptp_tir;
    struct mlx5e_rss_params    rss_params;
    u32                        tx_rates[MLX5E_MAX_NUM_SQS];
@@@ -858,11 -856,10 +859,11 @@@
    struct mlx5e_stats         stats;
    struct mlx5e_channel_stats channel_stats[MLX5E_MAX_NUM_CHANNELS];
    struct mlx5e_channel_stats trap_stats;
 -	struct mlx5e_port_ptp_stats port_ptp_stats;
 +	struct mlx5e_ptp_stats     ptp_stats;
    u16                        max_nch;
    u8                         max_opened_tc;
 -	bool                       port_ptp_opened;
 +	bool                       tx_ptp_opened;
 +	bool                       rx_ptp_opened;
    struct hwtstamp_config     tstamp;
    u16                        q_counter;
    u16                        drop_rq_q_counter;
@@@ -885,6 -882,7 +886,6 @@@
  #endif
    struct devlink_health_reporter *tx_reporter;
    struct devlink_health_reporter *rx_reporter;
 -	struct devlink_port            dl_port;
    struct mlx5e_xsk           xsk;
  #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
    struct mlx5e_hv_vhca_stats_agent stats_agent;
@@@ -918,12 -916,13 +919,12 @@@ struct mlx5e_profile 
    const struct mlx5e_rx_handlers *rx_handlers;
    int	max_tc;
    u8	rq_groups;
 +	bool	rx_ptp_support;
  };
void mlx5e_build_ptys2ethtool_map(void);
bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev);
 -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 -				struct mlx5e_params *params);
void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
  void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s);
@@@ -966,9 -965,9 +967,9 @@@ struct mlx5e_tirc_config mlx5e_tirc_get
  struct mlx5e_xsk_param;
struct mlx5e_rq_param;
 -int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 -		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
 -		  struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq);
 +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
 +		  struct mlx5e_xsk_param *xsk, int node,
 +		  struct mlx5e_rq *rq);
  int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time);
  void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
  void mlx5e_close_rq(struct mlx5e_rq *rq);
@@@ -1023,11 -1022,18 +1024,11 @@@ int mlx5e_num_channels_changed(struct m
  int mlx5e_num_channels_changed_ctx(struct mlx5e_priv *priv, void *context);
  void mlx5e_activate_priv_channels(struct mlx5e_priv *priv);
  void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv);
 +int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx);
void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len,
    			   int num_channels);
-void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
 -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode);
 -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
 -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode);
 -
 -void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params);
 -void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 -			       struct mlx5e_params *params);
  int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state);
  void mlx5e_activate_rq(struct mlx5e_rq *rq);
  void mlx5e_deactivate_rq(struct mlx5e_rq *rq);
@@@ -1086,10 -1092,10 +1087,10 @@@ int mlx5e_create_indirect_rqt(struct ml
  int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc);
  void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv);
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
 -void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
 -int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
 -void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs);
 +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
 +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
 +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
 +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n);
  void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn);
@@@ -1170,9 -1176,10 +1171,9 @@@ void mlx5e_detach_netdev(struct mlx5e_p
  void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
  int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
    			const struct mlx5e_profile *new_profile, void *new_ppriv);
 +void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
  void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
  void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
 -void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
 -			   struct mlx5e_params *params);
  void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
    		    u16 num_channels);
  void mlx5e_rx_dim_work(struct work_struct *work);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 1c44000ad675,68e54cc1cd16..5da5e5323a44
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@@ -29,8 -29,6 +29,8 @@@
  #define MLX5_CT_STATE_TRK_BIT BIT(2)
  #define MLX5_CT_STATE_NAT_BIT BIT(3)
  #define MLX5_CT_STATE_REPLY_BIT BIT(4)
 +#define MLX5_CT_STATE_RELATED_BIT BIT(5)
 +#define MLX5_CT_STATE_INVALID_BIT BIT(6)
#define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
  #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
@@@ -187,6 -185,28 +187,28 @@@ mlx5_tc_ct_entry_has_nat(struct mlx5_ct
    return !!(entry->tuple_nat_node.next);
  }
+ static int
+ mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
+ 		       u32 *labels, u32 *id)
+ {
+ 	if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
+ 		*id = 0;
+ 		return 0;
+ 	}
+ 
+ 	if (mapping_add(ct_priv->labels_mapping, labels, id))
+ 		return -EOPNOTSUPP;
+ 
+ 	return 0;
+ }
+ 
+ static void
+ mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
+ {
+ 	if (id)
+ 		mapping_remove(ct_priv->labels_mapping, id);
+ }
+ 
  static int
  mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
  {
@@@ -438,7 -458,7 +460,7 @@@ mlx5_tc_ct_entry_del_rule(struct mlx5_t
    mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
    mlx5e_mod_hdr_detach(ct_priv->dev,
    		     ct_priv->mod_hdr_tbl, zone_rule->mh);
- 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
+ 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
    kfree(attr);
  }
@@@ -641,8 -661,8 +663,8 @@@ mlx5_tc_ct_entry_create_mod_hdr(struct 
    if (!meta)
    	return -EOPNOTSUPP;
- 	err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
- 			  &attr->ct_attr.ct_labels_id);
+ 	err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
+ 				     &attr->ct_attr.ct_labels_id);
    if (err)
    	return -EOPNOTSUPP;
    if (nat) {
@@@ -679,7 -699,7 +701,7 @@@
err_mapping:
    dealloc_mod_hdr_actions(&mod_acts);
- 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
+ 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
    return err;
  }
@@@ -697,7 -717,7 +719,7 @@@ mlx5_tc_ct_entry_add_rule(struct mlx5_t
zone_rule->nat = nat;
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
 +	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
    if (!spec)
    	return -ENOMEM;
@@@ -739,7 -759,7 +761,7 @@@
zone_rule->attr = attr;
-	kfree(spec);
 +	kvfree(spec);
    ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
return 0;
@@@ -747,11 -767,11 +769,11 @@@
  err_rule:
    mlx5e_mod_hdr_detach(ct_priv->dev,
    		     ct_priv->mod_hdr_tbl, zone_rule->mh);
- 	mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
+ 	mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
  err_mod_hdr:
    kfree(attr);
  err_attr:
 -	kfree(spec);
 +	kvfree(spec);
    return err;
  }
@@@ -1199,7 -1219,7 +1221,7 @@@ void mlx5_tc_ct_match_del(struct mlx5_t
    if (!priv || !ct_attr->ct_labels_id)
    	return;
- 	mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
+ 	mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
  }
int
@@@ -1209,8 -1229,8 +1231,8 @@@ mlx5_tc_ct_match_add(struct mlx5_tc_ct_
    	     struct mlx5_ct_attr *ct_attr,
    	     struct netlink_ext_ack *extack)
  {
 +	bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
    struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 -	bool trk, est, untrk, unest, new, rpl, unrpl;
    struct flow_dissector_key_ct *mask, *key;
    u32 ctstate = 0, ctstate_mask = 0;
    u16 ct_state_on, ct_state_off;
@@@ -1238,9 -1258,7 +1260,9 @@@
    if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
    		      TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
    		      TCA_FLOWER_KEY_CT_FLAGS_NEW |
 -			      TCA_FLOWER_KEY_CT_FLAGS_REPLY)) {
 +			      TCA_FLOWER_KEY_CT_FLAGS_REPLY |
 +			      TCA_FLOWER_KEY_CT_FLAGS_RELATED |
 +			      TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
    	NL_SET_ERR_MSG_MOD(extack,
    			   "only ct_state trk, est, new and rpl are supported for offload");
    	return -EOPNOTSUPP;
@@@ -1252,13 -1270,9 +1274,13 @@@
    new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
    est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
    rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
 +	rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
 +	inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
    untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
    unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
    unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
 +	unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
 +	uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
    ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
@@@ -1266,20 -1280,6 +1288,20 @@@
    ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
    ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
    ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
 +	ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
 +	ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
 +
 +	if (rel) {
 +		NL_SET_ERR_MSG_MOD(extack,
 +				   "matching on ct_state +rel isn't supported");
 +		return -EOPNOTSUPP;
 +	}
 +
 +	if (inv) {
 +		NL_SET_ERR_MSG_MOD(extack,
 +				   "matching on ct_state +inv isn't supported");
 +		return -EOPNOTSUPP;
 +	}
if (new) {
    	NL_SET_ERR_MSG_MOD(extack,
@@@ -1302,7 -1302,7 +1324,7 @@@
    	ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
    	ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
    	ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
- 		if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
+ 		if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
    		return -EOPNOTSUPP;
    	mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
    				    MLX5_CT_LABELS_MASK);
@@@ -1562,14 -1562,6 +1584,14 @@@ mlx5_tc_ct_free_pre_ct_tables(struct ml
    mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
  }
+/* To avoid false lock dependency warning set the ct_entries_ht lock
 + * class different than the lock class of the ht being used when deleting
 + * last flow from a group and then deleting a group, we get into del_sw_flow_group()
 + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
 + * it's different than the ht->mutex here.
 + */
 +static struct lock_class_key ct_entries_ht_lock_key;
 +
  static struct mlx5_ct_ft *
  mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
    	     struct nf_flowtable *nf_ft)
@@@ -1604,8 -1596,6 +1626,8 @@@
    if (err)
    	goto err_init;
+	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
 +
    err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
    			     zone_params);
    if (err)
@@@ -1707,10 -1697,10 +1729,10 @@@ __mlx5_tc_ct_flow_offload(struct mlx5_t
    struct mlx5_ct_ft *ft;
    u32 fte_id = 1;
-	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
 +	post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
    ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
    if (!post_ct_spec || !ct_flow) {
 -		kfree(post_ct_spec);
 +		kvfree(post_ct_spec);
    	kfree(ct_flow);
    	return ERR_PTR(-ENOMEM);
    }
@@@ -1820,10 -1810,6 +1842,10 @@@
    ct_flow->post_ct_attr->prio = 0;
    ct_flow->post_ct_attr->ft = ct_priv->post_ct;
+	/* Splits were handled before CT */
 +	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
 +		ct_flow->post_ct_attr->esw_attr->split_count = 0;
 +
    ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
    ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
    ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
@@@ -1849,7 -1835,7 +1871,7 @@@
attr->ct_attr.ct_flow = ct_flow;
    dealloc_mod_hdr_actions(&pre_mod_acts);
 -	kfree(post_ct_spec);
 +	kvfree(post_ct_spec);
return rule;
@@@ -1870,7 -1856,7 +1892,7 @@@ err_alloc_pre
  err_idr:
    mlx5_tc_ct_del_ft_cb(ct_priv, ft);
  err_ft:
 -	kfree(post_ct_spec);
 +	kvfree(post_ct_spec);
    kfree(ct_flow);
    netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
    return ERR_PTR(err);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 89d5ca91566e,e1271998b937..9350ca05ce65
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@@ -21,6 -21,11 +21,11 @@@ enum 
    MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
  };
+ struct mlx5e_encap_key {
+ 	const struct ip_tunnel_key *ip_tun_key;
+ 	struct mlx5e_tc_tunnel     *tc_tunnel;
+ };
+ 
  struct mlx5e_tc_tunnel {
    int tunnel_type;
    enum mlx5_flow_match_level match_level;
@@@ -44,6 -49,8 +49,8 @@@
    		    struct flow_cls_offload *f,
    		    void *headers_c,
    		    void *headers_v);
+ 	bool (*encap_info_equal)(struct mlx5e_encap_key *a,
+ 				 struct mlx5e_encap_key *b);
  };
extern struct mlx5e_tc_tunnel vxlan_tunnel;
@@@ -76,12 -83,10 +83,12 @@@ int mlx5e_tc_tun_update_header_ipv6(str
  static inline int
  mlx5e_tc_tun_create_header_ipv6(struct mlx5e_priv *priv,
    			struct net_device *mirred_dev,
 -				struct mlx5e_encap_entry *e) { return -EOPNOTSUPP; }
 -int mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
 -				    struct net_device *mirred_dev,
 -				    struct mlx5e_encap_entry *e)
 +				struct mlx5e_encap_entry *e)
 +{ return -EOPNOTSUPP; }
 +static inline int
 +mlx5e_tc_tun_update_header_ipv6(struct mlx5e_priv *priv,
 +				struct net_device *mirred_dev,
 +				struct mlx5e_encap_entry *e)
  { return -EOPNOTSUPP; }
  #endif
  int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
@@@ -103,6 -108,9 +110,9 @@@ int mlx5e_tc_tun_parse_udp_ports(struc
    			 void *headers_c,
    			 void *headers_v);
+ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
+ 					   struct mlx5e_encap_key *b);
+ 
  #endif /* CONFIG_MLX5_ESWITCH */
#endif //__MLX5_EN_TC_TUNNEL_H__
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 01d435e15ad3,9f16ad2c0710..593503bc4d07
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@@ -2,7 -2,6 +2,7 @@@
  /* Copyright (c) 2021 Mellanox Technologies. */
#include <net/fib_notifier.h>
 +#include <net/nexthop.h>
  #include "tc_tun_encap.h"
  #include "en_tc.h"
  #include "tc_tun.h"
@@@ -477,16 -476,11 +477,11 @@@ void mlx5e_detach_decap(struct mlx5e_pr
    mlx5e_decap_dealloc(priv, d);
  }
- struct encap_key {
- 	const struct ip_tunnel_key *ip_tun_key;
- 	struct mlx5e_tc_tunnel *tc_tunnel;
- };
- 
- static int cmp_encap_info(struct encap_key *a,
- 			  struct encap_key *b)
+ bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
+ 					   struct mlx5e_encap_key *b)
  {
- 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) ||
- 		a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
+ 	return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
+ 		a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
  }
static int cmp_decap_info(struct mlx5e_decap_key *a,
@@@ -495,7 -489,7 +490,7 @@@
    return memcmp(&a->key, &b->key, sizeof(b->key));
  }
- static int hash_encap_info(struct encap_key *key)
+ static int hash_encap_info(struct mlx5e_encap_key *key)
  {
    return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
    	     key->tc_tunnel->tunnel_type);
@@@ -517,18 -511,18 +512,18 @@@ static bool mlx5e_decap_take(struct mlx
  }
static struct mlx5e_encap_entry *
- mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
+ mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
    	uintptr_t hash_key)
  {
    struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+ 	struct mlx5e_encap_key e_key;
    struct mlx5e_encap_entry *e;
- 	struct encap_key e_key;
hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
    			   encap_hlist, hash_key) {
    	e_key.ip_tun_key = &e->tun_info->key;
    	e_key.tc_tunnel = e->tunnel;
- 		if (!cmp_encap_info(&e_key, key) &&
+ 		if (e->tunnel->encap_info_equal(&e_key, key) &&
    	    mlx5e_encap_take(e))
    		return e;
    }
@@@ -695,8 -689,8 +690,8 @@@ int mlx5e_attach_encap(struct mlx5e_pri
    struct mlx5_flow_attr *attr = flow->attr;
    const struct ip_tunnel_info *tun_info;
    unsigned long tbl_time_before = 0;
- 	struct encap_key key;
    struct mlx5e_encap_entry *e;
+ 	struct mlx5e_encap_key key;
    bool entry_created = false;
    unsigned short family;
    uintptr_t hash_key;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index f7c880edae37,19d22a63313f..8c0f78c09215
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@@ -46,7 -46,8 +46,8 @@@ struct mlx5e_ktls_offload_context_rx 
    struct tls12_crypto_info_aes_gcm_128 crypto_info;
    struct accel_rule rule;
    struct sock *sk;
- 	struct mlx5e_rq_stats *stats;
+ 	struct mlx5e_rq_stats *rq_stats;
+ 	struct mlx5e_tls_sw_stats *sw_stats;
    struct completion add_ctx;
    u32 tirn;
    u32 key_id;
@@@ -84,7 -85,7 +85,7 @@@ static int mlx5e_ktls_create_tir(struc
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
-	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.td.tdn);
 +	MLX5_SET(tirc, tirc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
    MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
    MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8);
    MLX5_SET(tirc, tirc, indirect_table, rqtn);
@@@ -137,11 -138,10 +138,10 @@@ post_static_params(struct mlx5e_icosq *
  {
    struct mlx5e_set_tls_static_params_wqe *wqe;
    struct mlx5e_icosq_wqe_info wi;
- 	u16 pi, num_wqebbs, room;
+ 	u16 pi, num_wqebbs;
num_wqebbs = MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS;
- 	room = mlx5e_stop_room_for_wqe(num_wqebbs);
- 	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room)))
+ 	if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs)))
    	return ERR_PTR(-ENOSPC);
pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
@@@ -168,11 -168,10 +168,10 @@@ post_progress_params(struct mlx5e_icos
  {
    struct mlx5e_set_tls_progress_params_wqe *wqe;
    struct mlx5e_icosq_wqe_info wi;
- 	u16 pi, num_wqebbs, room;
+ 	u16 pi, num_wqebbs;
num_wqebbs = MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS;
- 	room = mlx5e_stop_room_for_wqe(num_wqebbs);
- 	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, room)))
+ 	if (unlikely(!mlx5e_icosq_can_post_wqe(sq, num_wqebbs)))
    	return ERR_PTR(-ENOSPC);
pi = mlx5e_icosq_get_next_pi(sq, num_wqebbs);
@@@ -218,7 -217,7 +217,7 @@@ unlock
    return err;
err_out:
- 	priv_rx->stats->tls_resync_req_skip++;
+ 	priv_rx->rq_stats->tls_resync_req_skip++;
    err = PTR_ERR(cseg);
    complete(&priv_rx->add_ctx);
    goto unlock;
@@@ -277,17 -276,15 +276,15 @@@ resync_post_get_progress_params(struct
buf->priv_rx = priv_rx;
- 	BUILD_BUG_ON(MLX5E_KTLS_GET_PROGRESS_WQEBBS != 1);
- 
    spin_lock_bh(&sq->channel->async_icosq_lock);
- 	if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, 1))) {
+ 	if (unlikely(!mlx5e_icosq_can_post_wqe(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS))) {
    	spin_unlock_bh(&sq->channel->async_icosq_lock);
    	err = -ENOSPC;
    	goto err_dma_unmap;
    }
- 	pi = mlx5e_icosq_get_next_pi(sq, 1);
+ 	pi = mlx5e_icosq_get_next_pi(sq, MLX5E_KTLS_GET_PROGRESS_WQEBBS);
    wqe = MLX5E_TLS_FETCH_GET_PROGRESS_PARAMS_WQE(sq, pi);
#define GET_PSV_DS_CNT (DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS))
@@@ -307,7 -304,7 +304,7 @@@
wi = (struct mlx5e_icosq_wqe_info) {
    	.wqe_type = MLX5E_ICOSQ_WQE_GET_PSV_TLS,
- 		.num_wqebbs = 1,
+ 		.num_wqebbs = MLX5E_KTLS_GET_PROGRESS_WQEBBS,
    	.tls_get_params.buf = buf,
    };
    icosq_fill_wi(sq, pi, &wi);
@@@ -322,7 -319,7 +319,7 @@@ err_dma_unmap
  err_free:
    kfree(buf);
  err_out:
- 	priv_rx->stats->tls_resync_req_skip++;
+ 	priv_rx->rq_stats->tls_resync_req_skip++;
    return err;
  }
@@@ -378,13 -375,13 +375,13 @@@ static int resync_handle_seq_match(stru
cseg = post_static_params(sq, priv_rx);
    if (IS_ERR(cseg)) {
- 		priv_rx->stats->tls_resync_res_skip++;
+ 		priv_rx->rq_stats->tls_resync_res_skip++;
    	err = PTR_ERR(cseg);
    	goto unlock;
    }
    /* Do not increment priv_rx refcnt, CQE handling is empty */
    mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, cseg);
- 	priv_rx->stats->tls_resync_res_ok++;
+ 	priv_rx->rq_stats->tls_resync_res_ok++;
  unlock:
    spin_unlock_bh(&c->async_icosq_lock);
@@@ -420,13 -417,13 +417,13 @@@ void mlx5e_ktls_handle_get_psv_completi
    auth_state = MLX5_GET(tls_progress_params, ctx, auth_state);
    if (tracker_state != MLX5E_TLS_PROGRESS_PARAMS_RECORD_TRACKER_STATE_TRACKING ||
        auth_state != MLX5E_TLS_PROGRESS_PARAMS_AUTH_STATE_NO_OFFLOAD) {
- 		priv_rx->stats->tls_resync_req_skip++;
+ 		priv_rx->rq_stats->tls_resync_req_skip++;
    	goto out;
    }
hw_seq = MLX5_GET(tls_progress_params, ctx, hw_resync_tcp_sn);
    tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq));
- 	priv_rx->stats->tls_resync_req_end++;
+ 	priv_rx->rq_stats->tls_resync_req_end++;
  out:
    mlx5e_ktls_priv_rx_put(priv_rx);
    dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE);
@@@ -609,7 -606,8 +606,8 @@@ int mlx5e_ktls_add_rx(struct net_devic
    priv_rx->rxq = rxq;
    priv_rx->sk = sk;
- 	priv_rx->stats = &priv->channel_stats[rxq].rq;
+ 	priv_rx->rq_stats = &priv->channel_stats[rxq].rq;
+ 	priv_rx->sw_stats = &priv->tls->sw_stats;
    mlx5e_set_ktls_rx_priv_ctx(tls_ctx, priv_rx);
rqtn = priv->direct_tir[rxq].rqt.rqtn;
@@@ -630,7 -628,7 +628,7 @@@
    if (err)
    	goto err_post_wqes;
- 	priv_rx->stats->tls_ctx++;
+ 	atomic64_inc(&priv_rx->sw_stats->rx_tls_ctx);
return 0;
@@@ -666,7 -664,7 +664,7 @@@ void mlx5e_ktls_del_rx(struct net_devic
    if (cancel_work_sync(&resync->work))
    	mlx5e_ktls_priv_rx_put(priv_rx);
- 	priv_rx->stats->tls_del++;
+ 	atomic64_inc(&priv_rx->sw_stats->rx_tls_del);
    if (priv_rx->rule.rule)
    	mlx5e_accel_fs_del_sk(priv_rx->rule.rule);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 964558086ad6,53802e18af90..b185a0452629
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@@ -34,7 -34,6 +34,7 @@@
  #include "en/port.h"
  #include "en/params.h"
  #include "en/xsk/pool.h"
 +#include "en/ptp.h"
  #include "lib/clock.h"
void mlx5e_ethtool_get_drvinfo(struct mlx5e_priv *priv,
@@@ -369,7 -368,7 +369,7 @@@ int mlx5e_ethtool_set_ringparam(struct 
    new_channels.params.log_rq_mtu_frames = log_rq_size;
    new_channels.params.log_sq_size = log_sq_size;
-	err = mlx5e_validate_params(priv, &new_channels.params);
 +	err = mlx5e_validate_params(priv->mdev, &new_channels.params);
    if (err)
    	goto unlock;
@@@ -759,11 -758,11 +759,11 @@@ static int get_fec_supported_advertised
    return 0;
  }
- static void ptys2ethtool_supported_advertised_port(struct ethtool_link_ksettings *link_ksettings,
- 						   u32 eth_proto_cap,
- 						   u8 connector_type, bool ext)
+ static void ptys2ethtool_supported_advertised_port(struct mlx5_core_dev *mdev,
+ 						   struct ethtool_link_ksettings *link_ksettings,
+ 						   u32 eth_proto_cap, u8 connector_type)
  {
- 	if ((!connector_type && !ext) || connector_type >= MLX5E_CONNECTOR_TYPE_NUMBER) {
+ 	if (!MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type)) {
    	if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR)
    			   | MLX5E_PROT_MASK(MLX5E_10GBASE_SR)
    			   | MLX5E_PROT_MASK(MLX5E_40GBASE_CR4)
@@@ -899,9 -898,9 +899,9 @@@ static int ptys2connector_type[MLX5E_CO
    	[MLX5E_PORT_OTHER]              = PORT_OTHER,
    };
- static u8 get_connector_port(u32 eth_proto, u8 connector_type, bool ext)
+ static u8 get_connector_port(struct mlx5_core_dev *mdev, u32 eth_proto, u8 connector_type)
  {
- 	if ((connector_type || ext) && connector_type < MLX5E_CONNECTOR_TYPE_NUMBER)
+ 	if (MLX5_CAP_PCAM_FEATURE(mdev, ptys_connector_type))
    	return ptys2connector_type[connector_type];
if (eth_proto &
@@@ -1002,11 -1001,11 +1002,11 @@@ int mlx5e_ethtool_get_link_ksettings(st
    		 data_rate_oper, link_ksettings);
eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap;
- 
- 	link_ksettings->base.port = get_connector_port(eth_proto_oper,
- 						       connector_type, ext);
- 	ptys2ethtool_supported_advertised_port(link_ksettings, eth_proto_admin,
- 					       connector_type, ext);
+ 	connector_type = connector_type < MLX5E_CONNECTOR_TYPE_NUMBER ?
+ 			 connector_type : MLX5E_PORT_UNKNOWN;
+ 	link_ksettings->base.port = get_connector_port(mdev, eth_proto_oper, connector_type);
+ 	ptys2ethtool_supported_advertised_port(mdev, link_ksettings, eth_proto_admin,
+ 					       connector_type);
    get_lp_advertising(mdev, eth_proto_lp, link_ksettings);
if (an_status == MLX5_AN_COMPLETE)
@@@ -1866,19 -1865,13 +1866,19 @@@ int mlx5e_modify_rx_cqe_compression_loc
new_channels.params = priv->channels.params;
    MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
 +	if (priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE)
 +		new_channels.params.ptp_rx = new_val;
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
    	priv->channels.params = new_channels.params;
    	return 0;
    }
-	err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
 +	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
 +		err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL);
 +	else
 +		err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
 +						 &new_channels.params.ptp_rx);
    if (err)
    	return err;
@@@ -1899,6 -1892,11 +1899,6 @@@ static int set_pflag_rx_cqe_compress(st
    if (!MLX5_CAP_GEN(mdev, cqe_compression))
    	return -EOPNOTSUPP;
-	if (enable && priv->tstamp.rx_filter != HWTSTAMP_FILTER_NONE) {
 -		netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
 -		return -EINVAL;
 -	}
 -
    err = mlx5e_modify_rx_cqe_compression_locked(priv, enable);
    if (err)
    	return err;
@@@ -2034,7 -2032,7 +2034,7 @@@ static int set_pflag_tx_port_ts(struct 
    				 mlx5e_num_channels_changed_ctx, NULL);
  out:
    if (!err)
 -		priv->port_ptp_opened = true;
 +		priv->tx_ptp_opened = true;
return err;
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 773449c1424b,5db63b9f3b70..2f47608bb9b9
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@@ -87,6 -87,51 +87,6 @@@ bool mlx5e_check_fragmented_striding_rq
    return true;
  }
-void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 -			       struct mlx5e_params *params)
 -{
 -	params->log_rq_mtu_frames = is_kdump_kernel() ?
 -		MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE :
 -		MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE;
 -
 -	mlx5_core_info(mdev, "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n",
 -		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
 -		       params->rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ?
 -		       BIT(mlx5e_mpwqe_get_log_rq_size(params, NULL)) :
 -		       BIT(params->log_rq_mtu_frames),
 -		       BIT(mlx5e_mpwqe_get_log_stride_size(mdev, params, NULL)),
 -		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
 -}
 -
 -bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev,
 -				struct mlx5e_params *params)
 -{
 -	if (!mlx5e_check_fragmented_striding_rq_cap(mdev))
 -		return false;
 -
 -	if (mlx5_fpga_is_ipsec_device(mdev))
 -		return false;
 -
 -	if (params->xdp_prog) {
 -		/* XSK params are not considered here. If striding RQ is in use,
 -		 * and an XSK is being opened, mlx5e_rx_mpwqe_is_linear_skb will
 -		 * be called with the known XSK params.
 -		 */
 -		if (!mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL))
 -			return false;
 -	}
 -
 -	return true;
 -}
 -
 -void mlx5e_set_rq_type(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 -{
 -	params->rq_wq_type = mlx5e_striding_rq_possible(mdev, params) &&
 -		MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) ?
 -		MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
 -		MLX5_WQ_TYPE_CYCLIC;
 -}
 -
  void mlx5e_update_carrier(struct mlx5e_priv *priv)
  {
    struct mlx5_core_dev *mdev = priv->mdev;
@@@ -214,17 -259,18 +214,17 @@@ static inline void mlx5e_build_umr_wqe(
    ucseg->mkey_mask     = cpu_to_be64(MLX5_MKEY_MASK_FREE);
  }
-static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
 -				     struct mlx5e_channel *c)
 +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, int node)
  {
    int wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
rq->mpwqe.info = kvzalloc_node(array_size(wq_sz,
    					  sizeof(*rq->mpwqe.info)),
 -				       GFP_KERNEL, cpu_to_node(c->cpu));
 +				       GFP_KERNEL, node);
    if (!rq->mpwqe.info)
    	return -ENOMEM;
-	mlx5e_build_umr_wqe(rq, &c->icosq, &rq->mpwqe.umr_wqe);
 +	mlx5e_build_umr_wqe(rq, rq->icosq, &rq->mpwqe.umr_wqe);
return 0;
  }
@@@ -256,7 -302,7 +256,7 @@@ static int mlx5e_create_umr_mkey(struc
    MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
    mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
    MLX5_SET(mkc, mkc, qpn, 0xffffff);
 -	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
 +	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
    MLX5_SET64(mkc, mkc, len, npages << page_shift);
    MLX5_SET(mkc, mkc, translations_octword_size,
    	 MLX5_MTT_OCTW(npages));
@@@ -373,53 -419,58 +373,53 @@@ static void mlx5e_free_mpwqe_rq_drop_pa
     __free_page(rq->wqe_overflow.page);
  }
-static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 -			  struct mlx5e_params *params,
 +static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 +			     struct mlx5e_rq *rq)
 +{
 +	struct mlx5_core_dev *mdev = c->mdev;
 +	int err;
 +
 +	rq->wq_type      = params->rq_wq_type;
 +	rq->pdev         = c->pdev;
 +	rq->netdev       = c->netdev;
 +	rq->priv         = c->priv;
 +	rq->tstamp       = c->tstamp;
 +	rq->clock        = &mdev->clock;
 +	rq->icosq        = &c->icosq;
 +	rq->ix           = c->ix;
 +	rq->mdev         = mdev;
 +	rq->hw_mtu       = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 +	rq->xdpsq        = &c->rq_xdpsq;
 +	rq->stats        = &c->priv->channel_stats[c->ix].rq;
 +	rq->ptp_cyc2time = mlx5_rq_ts_translator(mdev);
 +	err = mlx5e_rq_set_handlers(rq, params, NULL);
 +	if (err)
 +		return err;
 +
 +	return xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq->ix, 0);
 +}
 +
 +static int mlx5e_alloc_rq(struct mlx5e_params *params,
    		  struct mlx5e_xsk_param *xsk,
 -			  struct xsk_buff_pool *xsk_pool,
    		  struct mlx5e_rq_param *rqp,
 -			  struct mlx5e_rq *rq)
 +			  int node, struct mlx5e_rq *rq)
  {
    struct page_pool_params pp_params = { 0 };
 -	struct mlx5_core_dev *mdev = c->mdev;
 +	struct mlx5_core_dev *mdev = rq->mdev;
    void *rqc = rqp->rqc;
    void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
 -	u32 rq_xdp_ix;
    u32 pool_size;
    int wq_sz;
    int err;
    int i;
-	rqp->wq.db_numa_node = cpu_to_node(c->cpu);
 -
 -	rq->wq_type = params->rq_wq_type;
 -	rq->pdev    = c->pdev;
 -	rq->netdev  = c->netdev;
 -	rq->priv    = c->priv;
 -	rq->tstamp  = c->tstamp;
 -	rq->clock   = &mdev->clock;
 -	rq->icosq   = &c->icosq;
 -	rq->ix      = c->ix;
 -	rq->mdev    = mdev;
 -	rq->hw_mtu  = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 -	rq->xdpsq   = &c->rq_xdpsq;
 -	rq->xsk_pool = xsk_pool;
 -	rq->ptp_cyc2time = mlx5_is_real_time_rq(mdev) ?
 -			   mlx5_real_time_cyc2time :
 -			   mlx5_timecounter_cyc2time;
 -
 -	if (rq->xsk_pool)
 -		rq->stats = &c->priv->channel_stats[c->ix].xskrq;
 -	else
 -		rq->stats = &c->priv->channel_stats[c->ix].rq;
 +	rqp->wq.db_numa_node = node;
    INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
if (params->xdp_prog)
    	bpf_prog_inc(params->xdp_prog);
    RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
-	rq_xdp_ix = rq->ix;
 -	if (xsk)
 -		rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
 -	err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0);
 -	if (err < 0)
 -		goto err_rq_xdp_prog;
 -
    rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
    rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
    pool_size = 1 << params->log_rq_mtu_frames;
@@@ -429,7 -480,7 +429,7 @@@
    	err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->mpwqe.wq,
    				&rq->wq_ctrl);
    	if (err)
 -			goto err_rq_xdp;
 +			goto err_rq_xdp_prog;
err = mlx5e_alloc_mpwqe_rq_drop_page(rq);
    	if (err)
@@@ -453,7 -504,7 +453,7 @@@
    		goto err_rq_drop_page;
    	rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
-		err = mlx5e_rq_alloc_mpwqe_info(rq, c);
 +		err = mlx5e_rq_alloc_mpwqe_info(rq, node);
    	if (err)
    		goto err_rq_mkey;
    	break;
@@@ -461,7 -512,7 +461,7 @@@
    	err = mlx5_wq_cyc_create(mdev, &rqp->wq, rqc_wq, &rq->wqe.wq,
    				 &rq->wq_ctrl);
    	if (err)
 -			goto err_rq_xdp;
 +			goto err_rq_xdp_prog;
rq->wqe.wq.db = &rq->wqe.wq.db[MLX5_RCV_DBR];
@@@ -473,19 -524,23 +473,19 @@@
    	rq->wqe.frags =
    		kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
    				(wq_sz << rq->wqe.info.log_num_frags)),
 -				      GFP_KERNEL, cpu_to_node(c->cpu));
 +				      GFP_KERNEL, node);
    	if (!rq->wqe.frags) {
    		err = -ENOMEM;
    		goto err_rq_wq_destroy;
    	}
-		err = mlx5e_init_di_list(rq, wq_sz, cpu_to_node(c->cpu));
 +		err = mlx5e_init_di_list(rq, wq_sz, node);
    	if (err)
    		goto err_rq_frags;
-		rq->mkey_be = c->mkey_be;
 +		rq->mkey_be = cpu_to_be32(mdev->mlx5e_res.hw_objs.mkey.key);
    }
-	err = mlx5e_rq_set_handlers(rq, params, xsk);
 -	if (err)
 -		goto err_free_by_rq_type;
 -
    if (xsk) {
    	err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
    					 MEM_TYPE_XSK_BUFF_POOL, NULL);
@@@ -495,8 -550,8 +495,8 @@@
    	pp_params.order     = 0;
    	pp_params.flags     = 0; /* No-internal DMA mapping in page_pool */
    	pp_params.pool_size = pool_size;
 -		pp_params.nid       = cpu_to_node(c->cpu);
 -		pp_params.dev       = c->pdev;
 +		pp_params.nid       = node;
 +		pp_params.dev       = rq->pdev;
    	pp_params.dma_dir   = rq->buff.map_dir;
/* page_pool can be used even when there is no rq->xdp_prog,
@@@ -580,6 -635,8 +580,6 @@@ err_rq_frags
    }
  err_rq_wq_destroy:
    mlx5_wq_destroy(&rq->wq_ctrl);
 -err_rq_xdp:
 -	xdp_rxq_info_unreg(&rq->xdp_rxq);
  err_rq_xdp_prog:
    if (params->xdp_prog)
    	bpf_prog_put(params->xdp_prog);
@@@ -592,12 -649,10 +592,12 @@@ static void mlx5e_free_rq(struct mlx5e_
    struct bpf_prog *old_prog;
    int i;
-	old_prog = rcu_dereference_protected(rq->xdp_prog,
 -					     lockdep_is_held(&rq->priv->state_lock));
 -	if (old_prog)
 -		bpf_prog_put(old_prog);
 +	if (xdp_rxq_info_is_reg(&rq->xdp_rxq)) {
 +		old_prog = rcu_dereference_protected(rq->xdp_prog,
 +						     lockdep_is_held(&rq->priv->state_lock));
 +		if (old_prog)
 +			bpf_prog_put(old_prog);
 +	}
switch (rq->wq_type) {
    case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
@@@ -833,14 -888,13 +833,14 @@@ void mlx5e_free_rx_descs(struct mlx5e_r
}
-int mlx5e_open_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 -		  struct mlx5e_rq_param *param, struct mlx5e_xsk_param *xsk,
 -		  struct xsk_buff_pool *xsk_pool, struct mlx5e_rq *rq)
 +int mlx5e_open_rq(struct mlx5e_params *params, struct mlx5e_rq_param *param,
 +		  struct mlx5e_xsk_param *xsk, int node,
 +		  struct mlx5e_rq *rq)
  {
 +	struct mlx5_core_dev *mdev = rq->mdev;
    int err;
-	err = mlx5e_alloc_rq(c, params, xsk, xsk_pool, param, rq);
 +	err = mlx5e_alloc_rq(params, xsk, param, node, rq);
    if (err)
    	return err;
@@@ -852,28 -906,28 +852,28 @@@
    if (err)
    	goto err_destroy_rq;
-	if (mlx5e_is_tls_on(c->priv) && !mlx5_accel_is_ktls_device(c->mdev))
 -		__set_bit(MLX5E_RQ_STATE_FPGA_TLS, &c->rq.state); /* must be FPGA */
 +	if (mlx5e_is_tls_on(rq->priv) && !mlx5_accel_is_ktls_device(mdev))
 +		__set_bit(MLX5E_RQ_STATE_FPGA_TLS, &rq->state); /* must be FPGA */
-	if (MLX5_CAP_ETH(c->mdev, cqe_checksum_full))
 -		__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &c->rq.state);
 +	if (MLX5_CAP_ETH(mdev, cqe_checksum_full))
 +		__set_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state);
if (params->rx_dim_enabled)
 -		__set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);
 +		__set_bit(MLX5E_RQ_STATE_AM, &rq->state);
/* We disable csum_complete when XDP is enabled since
     * XDP programs might manipulate packets which will render
     * skb->checksum incorrect.
     */
 -	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || c->xdp)
 -		__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);
 +	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) || params->xdp_prog)
 +		__set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &rq->state);
/* For CQE compression on striding RQ, use stride index provided by
     * HW if capability is supported.
     */
    if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ) &&
 -	    MLX5_CAP_GEN(c->mdev, mini_cqe_resp_stride_index))
 -		__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &c->rq.state);
 +	    MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index))
 +		__set_bit(MLX5E_RQ_STATE_MINI_CQE_HW_STRIDX, &rq->state);
return 0;
@@@ -888,10 -942,7 +888,10 @@@ err_free_rq
  void mlx5e_activate_rq(struct mlx5e_rq *rq)
  {
    set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 -	mlx5e_trigger_irq(rq->icosq);
 +	if (rq->icosq)
 +		mlx5e_trigger_irq(rq->icosq);
 +	else
 +		napi_schedule(rq->cq.napi);
  }
void mlx5e_deactivate_rq(struct mlx5e_rq *rq)
@@@ -903,8 -954,7 +903,8 @@@
  void mlx5e_close_rq(struct mlx5e_rq *rq)
  {
    cancel_work_sync(&rq->dim.work);
 -	cancel_work_sync(&rq->icosq->recover_work);
 +	if (rq->icosq)
 +		cancel_work_sync(&rq->icosq->recover_work);
    cancel_work_sync(&rq->recover_work);
    mlx5e_destroy_rq(rq);
    mlx5e_free_rx_descs(rq);
@@@ -969,7 -1019,7 +969,7 @@@ static int mlx5e_alloc_xdpsq(struct mlx
    sq->pdev      = c->pdev;
    sq->mkey_be   = c->mkey_be;
    sq->channel   = c;
 -	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 +	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
    sq->min_inline_mode = params->tx_min_inline_mode;
    sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
    sq->xsk_pool  = xsk_pool;
@@@ -1040,7 -1090,8 +1040,8 @@@ static int mlx5e_alloc_icosq(struct mlx
    int err;
sq->channel   = c;
 -	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 +	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
+ 	sq->reserved_room = param->stop_room;
param->wq.db_numa_node = cpu_to_node(c->cpu);
    err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@@ -1124,7 -1175,7 +1125,7 @@@ static int mlx5e_alloc_txqsq(struct mlx
    sq->priv      = c->priv;
    sq->ch_ix     = c->ix;
    sq->txq_ix    = txq_ix;
 -	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
 +	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
    sq->min_inline_mode = params->tx_min_inline_mode;
    sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
    INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
@@@ -1137,7 -1188,9 +1138,7 @@@
    if (param->is_mpw)
    	set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state);
    sq->stop_room = param->stop_room;
 -	sq->ptp_cyc2time = mlx5_is_real_time_sq(mdev) ?
 -			   mlx5_real_time_cyc2time :
 -			   mlx5_timecounter_cyc2time;
 +	sq->ptp_cyc2time = mlx5_sq_ts_translator(mdev);
param->wq.db_numa_node = cpu_to_node(c->cpu);
    err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@@ -1205,7 -1258,7 +1206,7 @@@ static int mlx5e_create_sq(struct mlx5_
    MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
 -	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.bfreg.index);
 +	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.hw_objs.bfreg.index);
    MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
    				  MLX5_ADAPTER_PAGE_SHIFT);
    MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
@@@ -1808,16 -1861,14 +1809,16 @@@ static int mlx5e_set_tx_maxrate(struct 
    return err;
  }
-void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e_channel *c)
 +static int mlx5e_open_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *params,
 +			     struct mlx5e_rq_param *rq_params)
  {
 -	*ccp = (struct mlx5e_create_cq_param) {
 -		.napi = &c->napi,
 -		.ch_stats = c->stats,
 -		.node = cpu_to_node(c->cpu),
 -		.ix = c->ix,
 -	};
 +	int err;
 +
 +	err = mlx5e_init_rxq_rq(c, params, &c->rq);
 +	if (err)
 +		return err;
 +
 +	return mlx5e_open_rq(params, rq_params, NULL, cpu_to_node(c->cpu), &c->rq);
  }
static int mlx5e_open_queues(struct mlx5e_channel *c,
@@@ -1880,7 -1931,7 +1881,7 @@@
    		goto err_close_sqs;
    }
-	err = mlx5e_open_rq(c, params, &cparam->rq, NULL, NULL, &c->rq);
 +	err = mlx5e_open_rxq_rq(c, params, &cparam->rq);
    if (err)
    	goto err_close_xdp_sq;
@@@ -1982,7 -2033,7 +1983,7 @@@ static int mlx5e_open_channel(struct ml
    c->cpu      = cpu;
    c->pdev     = mlx5_core_dma_dev(priv->mdev);
    c->netdev   = priv->netdev;
 -	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
 +	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
    c->num_tc   = params->num_tc;
    c->xdp      = !!params->xdp_prog;
    c->stats    = &priv->channel_stats[ix].ch;
@@@ -2061,6 -2112,314 +2062,6 @@@ static void mlx5e_close_channel(struct 
    kvfree(c);
  }
-#define DEFAULT_FRAG_SIZE (2048)
 -
 -static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev,
 -				      struct mlx5e_params *params,
 -				      struct mlx5e_xsk_param *xsk,
 -				      struct mlx5e_rq_frags_info *info)
 -{
 -	u32 byte_count = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 -	int frag_size_max = DEFAULT_FRAG_SIZE;
 -	u32 buf_size = 0;
 -	int i;
 -
 -	if (mlx5_fpga_is_ipsec_device(mdev))
 -		byte_count += MLX5E_METADATA_ETHER_LEN;
 -
 -	if (mlx5e_rx_is_linear_skb(params, xsk)) {
 -		int frag_stride;
 -
 -		frag_stride = mlx5e_rx_get_linear_frag_sz(params, xsk);
 -		frag_stride = roundup_pow_of_two(frag_stride);
 -
 -		info->arr[0].frag_size = byte_count;
 -		info->arr[0].frag_stride = frag_stride;
 -		info->num_frags = 1;
 -		info->wqe_bulk = PAGE_SIZE / frag_stride;
 -		goto out;
 -	}
 -
 -	if (byte_count > PAGE_SIZE +
 -	    (MLX5E_MAX_RX_FRAGS - 1) * frag_size_max)
 -		frag_size_max = PAGE_SIZE;
 -
 -	i = 0;
 -	while (buf_size < byte_count) {
 -		int frag_size = byte_count - buf_size;
 -
 -		if (i < MLX5E_MAX_RX_FRAGS - 1)
 -			frag_size = min(frag_size, frag_size_max);
 -
 -		info->arr[i].frag_size = frag_size;
 -		info->arr[i].frag_stride = roundup_pow_of_two(frag_size);
 -
 -		buf_size += frag_size;
 -		i++;
 -	}
 -	info->num_frags = i;
 -	/* number of different wqes sharing a page */
 -	info->wqe_bulk = 1 + (info->num_frags % 2);
 -
 -out:
 -	info->wqe_bulk = max_t(u8, info->wqe_bulk, 8);
 -	info->log_num_frags = order_base_2(info->num_frags);
 -}
 -
 -static inline u8 mlx5e_get_rqwq_log_stride(u8 wq_type, int ndsegs)
 -{
 -	int sz = sizeof(struct mlx5_wqe_data_seg) * ndsegs;
 -
 -	switch (wq_type) {
 -	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 -		sz += sizeof(struct mlx5e_rx_wqe_ll);
 -		break;
 -	default: /* MLX5_WQ_TYPE_CYCLIC */
 -		sz += sizeof(struct mlx5e_rx_wqe_cyc);
 -	}
 -
 -	return order_base_2(sz);
 -}
 -
 -static u8 mlx5e_get_rq_log_wq_sz(void *rqc)
 -{
 -	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 -
 -	return MLX5_GET(wq, wq, log_wq_sz);
 -}
 -
 -void mlx5e_build_rq_param(struct mlx5e_priv *priv,
 -			  struct mlx5e_params *params,
 -			  struct mlx5e_xsk_param *xsk,
 -			  struct mlx5e_rq_param *param)
 -{
 -	struct mlx5_core_dev *mdev = priv->mdev;
 -	void *rqc = param->rqc;
 -	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 -	int ndsegs = 1;
 -
 -	switch (params->rq_wq_type) {
 -	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 -		MLX5_SET(wq, wq, log_wqe_num_of_strides,
 -			 mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk) -
 -			 MLX5_MPWQE_LOG_NUM_STRIDES_BASE);
 -		MLX5_SET(wq, wq, log_wqe_stride_size,
 -			 mlx5e_mpwqe_get_log_stride_size(mdev, params, xsk) -
 -			 MLX5_MPWQE_LOG_STRIDE_SZ_BASE);
 -		MLX5_SET(wq, wq, log_wq_sz, mlx5e_mpwqe_get_log_rq_size(params, xsk));
 -		break;
 -	default: /* MLX5_WQ_TYPE_CYCLIC */
 -		MLX5_SET(wq, wq, log_wq_sz, params->log_rq_mtu_frames);
 -		mlx5e_build_rq_frags_info(mdev, params, xsk, &param->frags_info);
 -		ndsegs = param->frags_info.num_frags;
 -	}
 -
 -	MLX5_SET(wq, wq, wq_type,          params->rq_wq_type);
 -	MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
 -	MLX5_SET(wq, wq, log_wq_stride,
 -		 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
 -	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.pdn);
 -	MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
 -	MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
 -	MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
 -
 -	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
 -	mlx5e_build_rx_cq_param(priv, params, xsk, &param->cqp);
 -}
 -
 -static void mlx5e_build_drop_rq_param(struct mlx5e_priv *priv,
 -				      struct mlx5e_rq_param *param)
 -{
 -	struct mlx5_core_dev *mdev = priv->mdev;
 -	void *rqc = param->rqc;
 -	void *wq = MLX5_ADDR_OF(rqc, rqc, wq);
 -
 -	MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC);
 -	MLX5_SET(wq, wq, log_wq_stride,
 -		 mlx5e_get_rqwq_log_stride(MLX5_WQ_TYPE_CYCLIC, 1));
 -	MLX5_SET(rqc, rqc, counter_set_id, priv->drop_rq_q_counter);
 -
 -	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev));
 -}
 -
 -void mlx5e_build_sq_param_common(struct mlx5e_priv *priv,
 -				 struct mlx5e_sq_param *param)
 -{
 -	void *sqc = param->sqc;
 -	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 -
 -	MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
 -	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
 -
 -	param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
 -}
 -
 -void mlx5e_build_sq_param(struct mlx5e_priv *priv, struct mlx5e_params *params,
 -			  struct mlx5e_sq_param *param)
 -{
 -	void *sqc = param->sqc;
 -	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 -	bool allow_swp;
 -
 -	allow_swp = mlx5_geneve_tx_allowed(priv->mdev) ||
 -		    !!MLX5_IPSEC_DEV(priv->mdev);
 -	mlx5e_build_sq_param_common(priv, param);
 -	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 -	MLX5_SET(sqc, sqc, allow_swp, allow_swp);
 -	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE);
 -	param->stop_room = mlx5e_calc_sq_stop_room(priv->mdev, params);
 -	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
 -}
 -
 -static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv,
 -					struct mlx5e_cq_param *param)
 -{
 -	void *cqc = param->cqc;
 -
 -	MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index);
 -	if (MLX5_CAP_GEN(priv->mdev, cqe_128_always) && cache_line_size() >= 128)
 -		MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD);
 -}
 -
 -void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 -			     struct mlx5e_params *params,
 -			     struct mlx5e_xsk_param *xsk,
 -			     struct mlx5e_cq_param *param)
 -{
 -	struct mlx5_core_dev *mdev = priv->mdev;
 -	bool hw_stridx = false;
 -	void *cqc = param->cqc;
 -	u8 log_cq_size;
 -
 -	switch (params->rq_wq_type) {
 -	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 -		log_cq_size = mlx5e_mpwqe_get_log_rq_size(params, xsk) +
 -			mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
 -		hw_stridx = MLX5_CAP_GEN(mdev, mini_cqe_resp_stride_index);
 -		break;
 -	default: /* MLX5_WQ_TYPE_CYCLIC */
 -		log_cq_size = params->log_rq_mtu_frames;
 -	}
 -
 -	MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
 -	if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
 -		MLX5_SET(cqc, cqc, mini_cqe_res_format, hw_stridx ?
 -			 MLX5_CQE_FORMAT_CSUM_STRIDX : MLX5_CQE_FORMAT_CSUM);
 -		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
 -	}
 -
 -	mlx5e_build_common_cq_param(priv, param);
 -	param->cq_period_mode = params->rx_cq_moderation.cq_period_mode;
 -}
 -
 -void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv,
 -			     struct mlx5e_params *params,
 -			     struct mlx5e_cq_param *param)
 -{
 -	void *cqc = param->cqc;
 -
 -	MLX5_SET(cqc, cqc, log_cq_size, params->log_sq_size);
 -
 -	mlx5e_build_common_cq_param(priv, param);
 -	param->cq_period_mode = params->tx_cq_moderation.cq_period_mode;
 -}
 -
 -void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv,
 -			      u8 log_wq_size,
 -			      struct mlx5e_cq_param *param)
 -{
 -	void *cqc = param->cqc;
 -
 -	MLX5_SET(cqc, cqc, log_cq_size, log_wq_size);
 -
 -	mlx5e_build_common_cq_param(priv, param);
 -
 -	param->cq_period_mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 -}
 -
 -void mlx5e_build_icosq_param(struct mlx5e_priv *priv,
 -			     u8 log_wq_size,
 -			     struct mlx5e_sq_param *param)
 -{
 -	void *sqc = param->sqc;
 -	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 -
 -	mlx5e_build_sq_param_common(priv, param);
 -
 -	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
 -	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
 -	mlx5e_build_ico_cq_param(priv, log_wq_size, &param->cqp);
 -}
 -
 -static void mlx5e_build_async_icosq_param(struct mlx5e_priv *priv,
 -					  struct mlx5e_params *params,
 -					  u8 log_wq_size,
 -					  struct mlx5e_sq_param *param)
 -{
 -	void *sqc = param->sqc;
 -	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 -
 -	mlx5e_build_sq_param_common(priv, param);
 -
 -	/* async_icosq is used by XSK only if xdp_prog is active */
 -	if (params->xdp_prog)
 -		param->stop_room = mlx5e_stop_room_for_wqe(1); /* for XSK NOP */
 -	MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq));
 -	MLX5_SET(wq, wq, log_wq_sz, log_wq_size);
 -	mlx5e_build_ico_cq_param(priv, log_wq_size, &param->cqp);
 -}
 -
 -void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv,
 -			     struct mlx5e_params *params,
 -			     struct mlx5e_sq_param *param)
 -{
 -	void *sqc = param->sqc;
 -	void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
 -
 -	mlx5e_build_sq_param_common(priv, param);
 -	MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size);
 -	param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE);
 -	mlx5e_build_tx_cq_param(priv, params, &param->cqp);
 -}
 -
 -static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
 -				      struct mlx5e_rq_param *rqp)
 -{
 -	switch (params->rq_wq_type) {
 -	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 -		return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE,
 -			     order_base_2(MLX5E_UMR_WQEBBS) +
 -			     mlx5e_get_rq_log_wq_sz(rqp->rqc));
 -	default: /* MLX5_WQ_TYPE_CYCLIC */
 -		return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
 -	}
 -}
 -
 -static u8 mlx5e_build_async_icosq_log_wq_sz(struct net_device *netdev)
 -{
 -	if (netdev->hw_features & NETIF_F_HW_TLS_RX)
 -		return MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 -
 -	return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
 -}
 -
 -static void mlx5e_build_channel_param(struct mlx5e_priv *priv,
 -				      struct mlx5e_params *params,
 -				      struct mlx5e_channel_param *cparam)
 -{
 -	u8 icosq_log_wq_sz, async_icosq_log_wq_sz;
 -
 -	mlx5e_build_rq_param(priv, params, NULL, &cparam->rq);
 -
 -	icosq_log_wq_sz = mlx5e_build_icosq_log_wq_sz(params, &cparam->rq);
 -	async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(priv->netdev);
 -
 -	mlx5e_build_sq_param(priv, params, &cparam->txq_sq);
 -	mlx5e_build_xdpsq_param(priv, params, &cparam->xdp_sq);
 -	mlx5e_build_icosq_param(priv, icosq_log_wq_sz, &cparam->icosq);
 -	mlx5e_build_async_icosq_param(priv, params, async_icosq_log_wq_sz, &cparam->async_icosq);
 -}
 -
  int mlx5e_open_channels(struct mlx5e_priv *priv,
    		struct mlx5e_channels *chs)
  {
@@@ -2075,7 -2434,7 +2076,7 @@@
    if (!chs->c || !cparam)
    	goto err_free;
-	mlx5e_build_channel_param(priv, &chs->params, cparam);
 +	mlx5e_build_channel_param(priv->mdev, &chs->params, priv->q_counter, cparam);
    for (i = 0; i < chs->num; i++) {
    	struct xsk_buff_pool *xsk_pool = NULL;
@@@ -2087,8 -2446,9 +2088,8 @@@
    		goto err_close_channels;
    }
-	if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS)) {
 -		err = mlx5e_port_ptp_open(priv, &chs->params, chs->c[0]->lag_port,
 -					  &chs->port_ptp);
 +	if (MLX5E_GET_PFLAG(&chs->params, MLX5E_PFLAG_TX_PORT_TS) || chs->params.ptp_rx) {
 +		err = mlx5e_ptp_open(priv, &chs->params, chs->c[0]->lag_port, &chs->ptp);
    	if (err)
    		goto err_close_channels;
    }
@@@ -2102,8 -2462,8 +2103,8 @@@
    return 0;
err_close_ptp:
 -	if (chs->port_ptp)
 -		mlx5e_port_ptp_close(chs->port_ptp);
 +	if (chs->ptp)
 +		mlx5e_ptp_close(chs->ptp);
err_close_channels:
    for (i--; i >= 0; i--)
@@@ -2123,8 -2483,8 +2124,8 @@@ static void mlx5e_activate_channels(str
    for (i = 0; i < chs->num; i++)
    	mlx5e_activate_channel(chs->c[i]);
-	if (chs->port_ptp)
 -		mlx5e_ptp_activate_channel(chs->port_ptp);
 +	if (chs->ptp)
 +		mlx5e_ptp_activate_channel(chs->ptp);
  }
#define MLX5E_RQ_WQES_TIMEOUT 20000 /* msecs */
@@@ -2151,8 -2511,8 +2152,8 @@@ static void mlx5e_deactivate_channels(s
  {
    int i;
-	if (chs->port_ptp)
 -		mlx5e_ptp_deactivate_channel(chs->port_ptp);
 +	if (chs->ptp)
 +		mlx5e_ptp_deactivate_channel(chs->ptp);
for (i = 0; i < chs->num; i++)
    	mlx5e_deactivate_channel(chs->c[i]);
@@@ -2162,10 -2522,11 +2163,10 @@@ void mlx5e_close_channels(struct mlx5e_
  {
    int i;
-	if (chs->port_ptp) {
 -		mlx5e_port_ptp_close(chs->port_ptp);
 -		chs->port_ptp = NULL;
 +	if (chs->ptp) {
 +		mlx5e_ptp_close(chs->ptp);
 +		chs->ptp = NULL;
    }
 -
    for (i = 0; i < chs->num; i++)
    	mlx5e_close_channel(chs->c[i]);
@@@ -2221,12 -2582,12 +2222,12 @@@ int mlx5e_create_indirect_rqt(struct ml
    return err;
  }
-int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
  {
    int err;
    int ix;
-	for (ix = 0; ix < priv->max_nch; ix++) {
 +	for (ix = 0; ix < n; ix++) {
    	err = mlx5e_create_rqt(priv, 1 /*size */, &tirs[ix].rqt);
    	if (unlikely(err))
    		goto err_destroy_rqts;
@@@ -2242,11 -2603,11 +2243,11 @@@ err_destroy_rqts
    return err;
  }
-void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 +void mlx5e_destroy_direct_rqts(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
  {
    int i;
-	for (i = 0; i < priv->max_nch; i++)
 +	for (i = 0; i < n; i++)
    	mlx5e_destroy_rqt(priv, &tirs[i].rqt);
  }
@@@ -2329,8 -2690,7 +2330,8 @@@ static u32 mlx5e_get_direct_rqn(struct 
  }
static void mlx5e_redirect_rqts(struct mlx5e_priv *priv,
 -				struct mlx5e_redirect_rqt_param rrp)
 +				struct mlx5e_redirect_rqt_param rrp,
 +				struct mlx5e_redirect_rqt_param *ptp_rrp)
  {
    u32 rqtn;
    int ix;
@@@ -2356,17 -2716,11 +2357,17 @@@
    	rqtn = priv->direct_tir[ix].rqt.rqtn;
    	mlx5e_redirect_rqt(priv, rqtn, 1, direct_rrp);
    }
 +	if (ptp_rrp) {
 +		rqtn = priv->ptp_tir.rqt.rqtn;
 +		mlx5e_redirect_rqt(priv, rqtn, 1, *ptp_rrp);
 +	}
  }
static void mlx5e_redirect_rqts_to_channels(struct mlx5e_priv *priv,
    				    struct mlx5e_channels *chs)
  {
 +	bool rx_ptp_support = priv->profile->rx_ptp_support;
 +	struct mlx5e_redirect_rqt_param *ptp_rrp_p = NULL;
    struct mlx5e_redirect_rqt_param rrp = {
    	.is_rss        = true,
    	{
@@@ -2376,22 -2730,12 +2377,22 @@@
    		}
    	},
    };
 +	struct mlx5e_redirect_rqt_param ptp_rrp;
 +
 +	if (rx_ptp_support) {
 +		u32 ptp_rqn;
-	mlx5e_redirect_rqts(priv, rrp);
 +		ptp_rrp.is_rss = false;
 +		ptp_rrp.rqn = mlx5e_ptp_get_rqn(priv->channels.ptp, &ptp_rqn) ?
 +			      priv->drop_rq.rqn : ptp_rqn;
 +		ptp_rrp_p = &ptp_rrp;
 +	}
 +	mlx5e_redirect_rqts(priv, rrp, ptp_rrp_p);
  }
static void mlx5e_redirect_rqts_to_drop(struct mlx5e_priv *priv)
  {
 +	bool rx_ptp_support = priv->profile->rx_ptp_support;
    struct mlx5e_redirect_rqt_param drop_rrp = {
    	.is_rss = false,
    	{
@@@ -2399,7 -2743,7 +2400,7 @@@
    	},
    };
-	mlx5e_redirect_rqts(priv, drop_rrp);
 +	mlx5e_redirect_rqts(priv, drop_rrp, rx_ptp_support ? &drop_rrp : NULL);
  }
static const struct mlx5e_tirc_config tirc_default_config[MLX5E_NUM_INDIR_TIRS] = {
@@@ -2688,8 -3032,6 +2689,8 @@@ static int mlx5e_update_netdev_queues(s
    nch = priv->channels.params.num_channels;
    ntc = priv->channels.params.num_tc;
    num_rxqs = nch * priv->profile->rq_groups;
 +	if (priv->channels.params.ptp_rx)
 +		num_rxqs++;
mlx5e_netdev_set_tcs(netdev, nch, ntc);
@@@ -2775,14 -3117,11 +2776,14 @@@ static void mlx5e_build_txq_maps(struc
    	}
    }
-	if (!priv->channels.port_ptp)
 +	if (!priv->channels.ptp)
 +		return;
 +
 +	if (!test_bit(MLX5E_PTP_STATE_TX, priv->channels.ptp->state))
    	return;
for (tc = 0; tc < num_tc; tc++) {
 -		struct mlx5e_port_ptp *c = priv->channels.port_ptp;
 +		struct mlx5e_ptp *c = priv->channels.ptp;
    	struct mlx5e_txqsq *sq = &c->ptpsq[tc].txqsq;
priv->txq2sq[sq->txq_ix] = sq;
@@@ -3056,7 -3395,7 +3057,7 @@@ int mlx5e_open_drop_rq(struct mlx5e_pri
    struct mlx5e_cq *cq = &drop_rq->cq;
    int err;
-	mlx5e_build_drop_rq_param(priv, &rq_param);
 +	mlx5e_build_drop_rq_param(mdev, priv->drop_rq_q_counter, &rq_param);
err = mlx5e_alloc_drop_cq(priv, cq, &cq_param);
    if (err)
@@@ -3104,10 -3443,10 +3105,10 @@@ int mlx5e_create_tis(struct mlx5_core_d
  {
    void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
-	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
 +	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
if (MLX5_GET(tisc, tisc, tls_en))
 -		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
 +		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
if (mlx5_lag_is_lacp_owner(mdev))
    	MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
@@@ -3177,7 -3516,7 +3178,7 @@@ static void mlx5e_cleanup_nic_tx(struc
  static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
    				     u32 rqtn, u32 *tirc)
  {
 -	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
 +	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn);
    MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
    MLX5_SET(tirc, tirc, indirect_table, rqtn);
    MLX5_SET(tirc, tirc, tunneled_offload_en,
@@@ -3269,7 -3608,7 +3270,7 @@@ err_destroy_inner_tirs
    return err;
  }
-int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
  {
    struct mlx5e_tir *tir;
    void *tirc;
@@@ -3283,7 -3622,7 +3284,7 @@@
    if (!in)
    	return -ENOMEM;
-	for (ix = 0; ix < priv->max_nch; ix++) {
 +	for (ix = 0; ix < n; ix++) {
    	memset(in, 0, inlen);
    	tir = &tirs[ix];
    	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
@@@ -3321,11 -3660,11 +3322,11 @@@ void mlx5e_destroy_indirect_tirs(struc
    	mlx5e_destroy_tir(priv->mdev, &priv->inner_indir_tir[i]);
  }
-void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs, int n)
  {
    int i;
-	for (i = 0; i < priv->max_nch; i++)
 +	for (i = 0; i < n; i++)
    	mlx5e_destroy_tir(priv->mdev, &tirs[i]);
  }
@@@ -3452,16 -3791,8 +3453,16 @@@ static int mlx5e_setup_tc(struct net_de
    		  void *type_data)
  {
    struct mlx5e_priv *priv = netdev_priv(dev);
 +	bool tc_unbind = false;
    int err;
+	if (type == TC_SETUP_BLOCK &&
 +	    ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
 +		tc_unbind = true;
 +
 +	if (!netif_device_present(dev) && !tc_unbind)
 +		return -ENODEV;
 +
    switch (type) {
    case TC_SETUP_BLOCK: {
    	struct flow_block_offload *f = type_data;
@@@ -3506,22 -3837,15 +3507,22 @@@ void mlx5e_fold_sw_stats64(struct mlx5e
    		s->tx_dropped    += sq_stats->dropped;
    	}
    }
 -	if (priv->port_ptp_opened) {
 +	if (priv->tx_ptp_opened) {
    	for (i = 0; i < priv->max_opened_tc; i++) {
 -			struct mlx5e_sq_stats *sq_stats = &priv->port_ptp_stats.sq[i];
 +			struct mlx5e_sq_stats *sq_stats = &priv->ptp_stats.sq[i];
s->tx_packets    += sq_stats->packets;
    		s->tx_bytes      += sq_stats->bytes;
    		s->tx_dropped    += sq_stats->dropped;
    	}
    }
 +	if (priv->rx_ptp_opened) {
 +		struct mlx5e_rq_stats *rq_stats = &priv->ptp_stats.rq;
 +
 +		s->rx_packets   += rq_stats->packets;
 +		s->rx_bytes     += rq_stats->bytes;
 +		s->multicast    += rq_stats->mcast_packets;
 +	}
  }
void
@@@ -3530,9 -3854,6 +3531,9 @@@ mlx5e_get_stats(struct net_device *dev
    struct mlx5e_priv *priv = netdev_priv(dev);
    struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+	if (!netif_device_present(dev))
 +		return;
 +
    /* In switchdev mode, monitor counters doesn't monitor
     * rx/tx stats of 802_3. The update stats mechanism
     * should keep the 802_3 layout counters updated
@@@ -3574,19 -3895,11 +3575,19 @@@
    stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
  }
+static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
 +{
 +	if (mlx5e_is_uplink_rep(priv))
 +		return; /* no rx mode for uplink rep */
 +
 +	queue_work(priv->wq, &priv->set_rx_mode_work);
 +}
 +
  static void mlx5e_set_rx_mode(struct net_device *dev)
  {
    struct mlx5e_priv *priv = netdev_priv(dev);
-	queue_work(priv->wq, &priv->set_rx_mode_work);
 +	mlx5e_nic_set_rx_mode(priv);
  }
static int mlx5e_set_mac(struct net_device *netdev, void *addr)
@@@ -3601,7 -3914,7 +3602,7 @@@
    ether_addr_copy(netdev->dev_addr, saddr->sa_data);
    netif_addr_unlock_bh(netdev);
-	queue_work(priv->wq, &priv->set_rx_mode_work);
 +	mlx5e_nic_set_rx_mode(priv);
return 0;
  }
@@@ -3823,8 -4136,7 +3824,8 @@@ static netdev_features_t mlx5e_fix_feat
mutex_lock(&priv->state_lock);
    params = &priv->channels.params;
 -	if (!bitmap_empty(priv->fs.vlan.active_svlans, VLAN_N_VID)) {
 +	if (!priv->fs.vlan ||
 +	    !bitmap_empty(mlx5e_vlan_get_active_svlans(priv->fs.vlan), VLAN_N_VID)) {
    	/* HW strips the outer C-tag header, this is a problem
    	 * for S-tag traffic.
    	 */
@@@ -3907,7 -4219,7 +3908,7 @@@ int mlx5e_change_mtu(struct net_device
new_channels.params = *params;
    new_channels.params.sw_mtu = new_mtu;
 -	err = mlx5e_validate_params(priv, &new_channels.params);
 +	err = mlx5e_validate_params(priv->mdev, &new_channels.params);
    if (err)
    	goto out;
@@@ -3971,18 -4283,9 +3972,18 @@@ static int mlx5e_change_nic_mtu(struct 
    return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
  }
+int mlx5e_ptp_rx_manage_fs_ctx(struct mlx5e_priv *priv, void *ctx)
 +{
 +	bool set  = *(bool *)ctx;
 +
 +	return mlx5e_ptp_rx_manage_fs(priv, set);
 +}
 +
  int mlx5e_hwstamp_set(struct mlx5e_priv *priv, struct ifreq *ifr)
  {
 +	struct mlx5e_channels new_channels = {};
    struct hwtstamp_config config;
 +	bool rx_cqe_compress_def;
    int err;
if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz) ||
@@@ -4002,13 -4305,11 +4003,13 @@@
    }
mutex_lock(&priv->state_lock);
 +	new_channels.params = priv->channels.params;
 +	rx_cqe_compress_def = priv->channels.params.rx_cqe_compress_def;
 +
    /* RX HW timestamp */
    switch (config.rx_filter) {
    case HWTSTAMP_FILTER_NONE:
 -		/* Reset CQE compression to Admin default */
 -		mlx5e_modify_rx_cqe_compression_locked(priv, priv->channels.params.rx_cqe_compress_def);
 +		new_channels.params.ptp_rx = false;
    	break;
    case HWTSTAMP_FILTER_ALL:
    case HWTSTAMP_FILTER_SOME:
@@@ -4025,7 -4326,15 +4026,7 @@@
    case HWTSTAMP_FILTER_PTP_V2_SYNC:
    case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
    case HWTSTAMP_FILTER_NTP_ALL:
 -		/* Disable CQE compression */
 -		if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS))
 -			netdev_warn(priv->netdev, "Disabling RX cqe compression\n");
 -		err = mlx5e_modify_rx_cqe_compression_locked(priv, false);
 -		if (err) {
 -			netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err);
 -			mutex_unlock(&priv->state_lock);
 -			return err;
 -		}
 +		new_channels.params.ptp_rx = rx_cqe_compress_def;
    	config.rx_filter = HWTSTAMP_FILTER_ALL;
    	break;
    default:
@@@ -4033,20 -4342,6 +4034,20 @@@
    	return -ERANGE;
    }
+	if (new_channels.params.ptp_rx == priv->channels.params.ptp_rx)
 +		goto out;
 +
 +	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 +		priv->channels.params = new_channels.params;
 +		goto out;
 +	}
 +	err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_ptp_rx_manage_fs_ctx,
 +					 &new_channels.params.ptp_rx);
 +	if (err) {
 +		mutex_unlock(&priv->state_lock);
 +		return err;
 +	}
 +out:
    memcpy(&priv->tstamp, &config, sizeof(config));
    mutex_unlock(&priv->state_lock);
@@@ -4157,9 -4452,6 +4158,9 @@@ static int mlx5e_set_vf_link_state(stru
    struct mlx5e_priv *priv = netdev_priv(dev);
    struct mlx5_core_dev *mdev = priv->mdev;
+	if (mlx5e_is_uplink_rep(priv))
 +		return -EOPNOTSUPP;
 +
    return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
    				    mlx5_ifla_link2vport(link_state));
  }
@@@ -4171,9 -4463,6 +4172,9 @@@ int mlx5e_get_vf_config(struct net_devi
    struct mlx5_core_dev *mdev = priv->mdev;
    int err;
+	if (!netif_device_present(dev))
 +		return -EOPNOTSUPP;
 +
    err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
    if (err)
    	return err;
@@@ -4190,32 -4479,6 +4191,32 @@@ int mlx5e_get_vf_stats(struct net_devic
    return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
    				    vf_stats);
  }
 +
 +static bool
 +mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
 +{
 +	struct mlx5e_priv *priv = netdev_priv(dev);
 +
 +	if (!netif_device_present(dev))
 +		return false;
 +
 +	if (!mlx5e_is_uplink_rep(priv))
 +		return false;
 +
 +	return mlx5e_rep_has_offload_stats(dev, attr_id);
 +}
 +
 +static int
 +mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
 +			void *sp)
 +{
 +	struct mlx5e_priv *priv = netdev_priv(dev);
 +
 +	if (!mlx5e_is_uplink_rep(priv))
 +		return -EOPNOTSUPP;
 +
 +	return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
 +}
  #endif
static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
@@@ -4574,8 -4837,6 +4575,8 @@@ const struct net_device_ops mlx5e_netde
    .ndo_get_vf_config       = mlx5e_get_vf_config,
    .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
    .ndo_get_vf_stats        = mlx5e_get_vf_stats,
 +	.ndo_has_offload_stats   = mlx5e_has_offload_stats,
 +	.ndo_get_offload_stats   = mlx5e_get_offload_stats,
  #endif
    .ndo_get_devlink_port    = mlx5e_get_devlink_port,
  };
@@@ -4589,6 -4850,93 +4590,6 @@@ void mlx5e_build_default_indir_rqt(u32 
    	indirection_rqt[i] = i % num_channels;
  }
-static bool slow_pci_heuristic(struct mlx5_core_dev *mdev)
 -{
 -	u32 link_speed = 0;
 -	u32 pci_bw = 0;
 -
 -	mlx5e_port_max_linkspeed(mdev, &link_speed);
 -	pci_bw = pcie_bandwidth_available(mdev->pdev, NULL, NULL, NULL);
 -	mlx5_core_dbg_once(mdev, "Max link speed = %d, PCI BW = %d\n",
 -			   link_speed, pci_bw);
 -
 -#define MLX5E_SLOW_PCI_RATIO (2)
 -
 -	return link_speed && pci_bw &&
 -		link_speed > MLX5E_SLOW_PCI_RATIO * pci_bw;
 -}
 -
 -static struct dim_cq_moder mlx5e_get_def_tx_moderation(u8 cq_period_mode)
 -{
 -	struct dim_cq_moder moder;
 -
 -	moder.cq_period_mode = cq_period_mode;
 -	moder.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS;
 -	moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC;
 -	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
 -		moder.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC_FROM_CQE;
 -
 -	return moder;
 -}
 -
 -static struct dim_cq_moder mlx5e_get_def_rx_moderation(u8 cq_period_mode)
 -{
 -	struct dim_cq_moder moder;
 -
 -	moder.cq_period_mode = cq_period_mode;
 -	moder.pkts = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS;
 -	moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC;
 -	if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE)
 -		moder.usec = MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE;
 -
 -	return moder;
 -}
 -
 -static u8 mlx5_to_net_dim_cq_period_mode(u8 cq_period_mode)
 -{
 -	return cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE ?
 -		DIM_CQ_PERIOD_MODE_START_FROM_CQE :
 -		DIM_CQ_PERIOD_MODE_START_FROM_EQE;
 -}
 -
 -void mlx5e_reset_tx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
 -{
 -	if (params->tx_dim_enabled) {
 -		u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
 -
 -		params->tx_cq_moderation = net_dim_get_def_tx_moderation(dim_period_mode);
 -	} else {
 -		params->tx_cq_moderation = mlx5e_get_def_tx_moderation(cq_period_mode);
 -	}
 -}
 -
 -void mlx5e_reset_rx_moderation(struct mlx5e_params *params, u8 cq_period_mode)
 -{
 -	if (params->rx_dim_enabled) {
 -		u8 dim_period_mode = mlx5_to_net_dim_cq_period_mode(cq_period_mode);
 -
 -		params->rx_cq_moderation = net_dim_get_def_rx_moderation(dim_period_mode);
 -	} else {
 -		params->rx_cq_moderation = mlx5e_get_def_rx_moderation(cq_period_mode);
 -	}
 -}
 -
 -void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 -{
 -	mlx5e_reset_tx_moderation(params, cq_period_mode);
 -	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_TX_CQE_BASED_MODER,
 -			params->tx_cq_moderation.cq_period_mode ==
 -				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 -}
 -
 -void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode)
 -{
 -	mlx5e_reset_rx_moderation(params, cq_period_mode);
 -	MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_BASED_MODER,
 -			params->rx_cq_moderation.cq_period_mode ==
 -				MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
 -}
 -
  static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout)
  {
    int i;
@@@ -4601,6 -4949,25 +4602,6 @@@
    return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]);
  }
-void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
 -			   struct mlx5e_params *params)
 -{
 -	/* Prefer Striding RQ, unless any of the following holds:
 -	 * - Striding RQ configuration is not possible/supported.
 -	 * - Slow PCI heuristic.
 -	 * - Legacy RQ would use linear SKB while Striding RQ would use non-linear.
 -	 *
 -	 * No XSK params: checking the availability of striding RQ in general.
 -	 */
 -	if (!slow_pci_heuristic(mdev) &&
 -	    mlx5e_striding_rq_possible(mdev, params) &&
 -	    (mlx5e_rx_mpwqe_is_linear_skb(mdev, params, NULL) ||
 -	     !mlx5e_rx_is_linear_skb(params, NULL)))
 -		MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ, true);
 -	mlx5e_set_rq_type(mdev, params);
 -	mlx5e_init_rq_type_params(mdev, params);
 -}
 -
  void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
    		    u16 num_channels)
  {
@@@ -4931,6 -5298,10 +4932,6 @@@ static int mlx5e_nic_init(struct mlx5_c
    if (err)
    	mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
-	err = mlx5e_devlink_port_register(priv);
 -	if (err)
 -		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
 -
    mlx5e_health_create_reporters(priv);
return 0;
@@@ -4939,6 -5310,7 +4940,6 @@@
  static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
  {
    mlx5e_health_destroy_reporters(priv);
 -	mlx5e_devlink_port_unregister(priv);
    mlx5e_tls_cleanup(priv);
    mlx5e_ipsec_cleanup(priv);
  }
@@@ -4946,7 -5318,6 +4947,7 @@@
  static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
  {
    struct mlx5_core_dev *mdev = priv->mdev;
 +	u16 max_nch = priv->max_nch;
    int err;
mlx5e_create_q_counters(priv);
@@@ -4961,7 -5332,7 +4962,7 @@@
    if (err)
    	goto err_close_drop_rq;
-	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
 +	err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
    if (err)
    	goto err_destroy_indirect_rqts;
@@@ -4969,30 -5340,22 +4970,30 @@@
    if (err)
    	goto err_destroy_direct_rqts;
-	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
 +	err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
    if (err)
    	goto err_destroy_indirect_tirs;
-	err = mlx5e_create_direct_rqts(priv, priv->xsk_tir);
 +	err = mlx5e_create_direct_rqts(priv, priv->xsk_tir, max_nch);
    if (unlikely(err))
    	goto err_destroy_direct_tirs;
-	err = mlx5e_create_direct_tirs(priv, priv->xsk_tir);
 +	err = mlx5e_create_direct_tirs(priv, priv->xsk_tir, max_nch);
    if (unlikely(err))
    	goto err_destroy_xsk_rqts;
+	err = mlx5e_create_direct_rqts(priv, &priv->ptp_tir, 1);
 +	if (err)
 +		goto err_destroy_xsk_tirs;
 +
 +	err = mlx5e_create_direct_tirs(priv, &priv->ptp_tir, 1);
 +	if (err)
 +		goto err_destroy_ptp_rqt;
 +
    err = mlx5e_create_flow_steering(priv);
    if (err) {
    	mlx5_core_warn(mdev, "create flow steering failed, %d\n", err);
 -		goto err_destroy_xsk_tirs;
 +		goto err_destroy_ptp_direct_tir;
    }
err = mlx5e_tc_nic_init(priv);
@@@ -5013,20 -5376,16 +5014,20 @@@ err_tc_nic_cleanup
    mlx5e_tc_nic_cleanup(priv);
  err_destroy_flow_steering:
    mlx5e_destroy_flow_steering(priv);
 +err_destroy_ptp_direct_tir:
 +	mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1);
 +err_destroy_ptp_rqt:
 +	mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1);
  err_destroy_xsk_tirs:
 -	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
 +	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
  err_destroy_xsk_rqts:
 -	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
 +	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
  err_destroy_direct_tirs:
 -	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 +	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
  err_destroy_indirect_tirs:
    mlx5e_destroy_indirect_tirs(priv);
  err_destroy_direct_rqts:
 -	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 +	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
  err_destroy_indirect_rqts:
    mlx5e_destroy_rqt(priv, &priv->indir_rqt);
  err_close_drop_rq:
@@@ -5038,18 -5397,14 +5039,18 @@@ err_destroy_q_counters
static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv)
  {
 +	u16 max_nch = priv->max_nch;
 +
    mlx5e_accel_cleanup_rx(priv);
    mlx5e_tc_nic_cleanup(priv);
    mlx5e_destroy_flow_steering(priv);
 -	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir);
 -	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir);
 -	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 +	mlx5e_destroy_direct_tirs(priv, &priv->ptp_tir, 1);
 +	mlx5e_destroy_direct_rqts(priv, &priv->ptp_tir, 1);
 +	mlx5e_destroy_direct_tirs(priv, priv->xsk_tir, max_nch);
 +	mlx5e_destroy_direct_rqts(priv, priv->xsk_tir, max_nch);
 +	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
    mlx5e_destroy_indirect_tirs(priv);
 -	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 +	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
    mlx5e_destroy_rqt(priv, &priv->indir_rqt);
    mlx5e_close_drop_rq(&priv->drop_rq);
    mlx5e_destroy_q_counters(priv);
@@@ -5095,7 -5450,7 +5096,7 @@@ static void mlx5e_nic_enable(struct mlx
    	return;
    mlx5e_dcbnl_init_app(priv);
-	queue_work(priv->wq, &priv->set_rx_mode_work);
 +	mlx5e_nic_set_rx_mode(priv);
rtnl_lock();
    if (netif_running(netdev))
@@@ -5118,7 -5473,7 +5119,7 @@@ static void mlx5e_nic_disable(struct ml
    netif_device_detach(priv->netdev);
    rtnl_unlock();
-	queue_work(priv->wq, &priv->set_rx_mode_work);
 +	mlx5e_nic_set_rx_mode(priv);
mlx5e_hv_vhca_stats_destroy(priv);
    if (mlx5e_monitor_counter_supported(priv))
@@@ -5157,7 -5512,6 +5158,7 @@@ static const struct mlx5e_profile mlx5e
    .rq_groups	   = MLX5E_NUM_RQ_GROUPS(XSK),
    .stats_grps	   = mlx5e_nic_stats_grps,
    .stats_grps_num	   = mlx5e_nic_stats_grps_num,
 +	.rx_ptp_support    = true,
  };
/* mlx5e generic netdev management API (move to en_common.c) */
@@@ -5392,11 -5746,6 +5393,11 @@@ rollback
    return err;
  }
+void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
 +{
 +	mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
 +}
 +
  void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
  {
    struct net_device *netdev = priv->netdev;
@@@ -5479,17 -5828,10 +5480,17 @@@ static int mlx5e_probe(struct auxiliary
priv->profile = profile;
    priv->ppriv = NULL;
 +
 +	err = mlx5e_devlink_port_register(priv);
 +	if (err) {
 +		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
 +		goto err_destroy_netdev;
 +	}
 +
    err = profile->init(mdev, netdev);
    if (err) {
    	mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
 -		goto err_destroy_netdev;
 +		goto err_devlink_cleanup;
    }
err = mlx5e_resume(adev);
@@@ -5507,15 -5849,12 +5508,15 @@@
    mlx5e_devlink_port_type_eth_set(priv);
mlx5e_dcbnl_init_app(priv);
 +	mlx5_uplink_netdev_set(mdev, netdev);
    return 0;
err_resume:
    mlx5e_suspend(adev, state);
  err_profile_cleanup:
    profile->cleanup(priv);
 +err_devlink_cleanup:
 +	mlx5e_devlink_port_unregister(priv);
  err_destroy_netdev:
    mlx5e_destroy_netdev(priv);
    return err;
@@@ -5530,7 -5869,6 +5531,7 @@@ static void mlx5e_remove(struct auxilia
    unregister_netdev(priv->netdev);
    mlx5e_suspend(adev, state);
    priv->profile->cleanup(priv);
 +	mlx5e_devlink_port_unregister(priv);
    mlx5e_destroy_netdev(priv);
  }
@@@ -5556,18 -5894,18 +5557,18 @@@ int mlx5e_init(void
mlx5e_ipsec_build_inverse_table();
    mlx5e_build_ptys2ethtool_map();
 -	ret = mlx5e_rep_init();
 +	ret = auxiliary_driver_register(&mlx5e_driver);
    if (ret)
    	return ret;
-	ret = auxiliary_driver_register(&mlx5e_driver);
 +	ret = mlx5e_rep_init();
    if (ret)
 -		mlx5e_rep_cleanup();
 +		auxiliary_driver_unregister(&mlx5e_driver);
    return ret;
  }
void mlx5e_cleanup(void)
  {
 -	auxiliary_driver_unregister(&mlx5e_driver);
    mlx5e_rep_cleanup();
 +	auxiliary_driver_unregister(&mlx5e_driver);
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 58a2c1abb195,8d39bfee84a9..e58ef8c713e4
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@@ -40,12 -40,10 +40,12 @@@
  #include "eswitch.h"
  #include "en.h"
  #include "en_rep.h"
 +#include "en/params.h"
  #include "en/txrx.h"
  #include "en_tc.h"
  #include "en/rep/tc.h"
  #include "en/rep/neigh.h"
 +#include "en/devlink.h"
  #include "fs_core.h"
  #include "lib/mlx5.h"
  #define CREATE_TRACE_POINTS
@@@ -71,6 -69,16 +71,6 @@@ static void mlx5e_rep_get_drvinfo(struc
    	 fw_rev_sub(mdev), mdev->board_id);
  }
-static void mlx5e_uplink_rep_get_drvinfo(struct net_device *dev,
 -					 struct ethtool_drvinfo *drvinfo)
 -{
 -	struct mlx5e_priv *priv = netdev_priv(dev);
 -
 -	mlx5e_rep_get_drvinfo(dev, drvinfo);
 -	strlcpy(drvinfo->bus_info, pci_name(priv->mdev->pdev),
 -		sizeof(drvinfo->bus_info));
 -}
 -
  static const struct counter_desc sw_rep_stats_desc[] = {
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) },
@@@ -277,6 -285,46 +277,6 @@@ static u32 mlx5e_rep_get_rxfh_indir_siz
    return mlx5e_ethtool_get_rxfh_indir_size(priv);
  }
-static void mlx5e_uplink_rep_get_pause_stats(struct net_device *netdev,
 -					     struct ethtool_pause_stats *stats)
 -{
 -	struct mlx5e_priv *priv = netdev_priv(netdev);
 -
 -	mlx5e_stats_pause_get(priv, stats);
 -}
 -
 -static void mlx5e_uplink_rep_get_pauseparam(struct net_device *netdev,
 -					    struct ethtool_pauseparam *pauseparam)
 -{
 -	struct mlx5e_priv *priv = netdev_priv(netdev);
 -
 -	mlx5e_ethtool_get_pauseparam(priv, pauseparam);
 -}
 -
 -static int mlx5e_uplink_rep_set_pauseparam(struct net_device *netdev,
 -					   struct ethtool_pauseparam *pauseparam)
 -{
 -	struct mlx5e_priv *priv = netdev_priv(netdev);
 -
 -	return mlx5e_ethtool_set_pauseparam(priv, pauseparam);
 -}
 -
 -static int mlx5e_uplink_rep_get_link_ksettings(struct net_device *netdev,
 -					       struct ethtool_link_ksettings *link_ksettings)
 -{
 -	struct mlx5e_priv *priv = netdev_priv(netdev);
 -
 -	return mlx5e_ethtool_get_link_ksettings(priv, link_ksettings);
 -}
 -
 -static int mlx5e_uplink_rep_set_link_ksettings(struct net_device *netdev,
 -					       const struct ethtool_link_ksettings *link_ksettings)
 -{
 -	struct mlx5e_priv *priv = netdev_priv(netdev);
 -
 -	return mlx5e_ethtool_set_link_ksettings(priv, link_ksettings);
 -}
 -
  static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
    .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
    			     ETHTOOL_COALESCE_MAX_FRAMES |
@@@ -296,6 -344,34 +296,6 @@@
    .get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
  };
-static const struct ethtool_ops mlx5e_uplink_rep_ethtool_ops = {
 -	.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
 -				     ETHTOOL_COALESCE_MAX_FRAMES |
 -				     ETHTOOL_COALESCE_USE_ADAPTIVE,
 -	.get_drvinfo	   = mlx5e_uplink_rep_get_drvinfo,
 -	.get_link	   = ethtool_op_get_link,
 -	.get_strings       = mlx5e_rep_get_strings,
 -	.get_sset_count    = mlx5e_rep_get_sset_count,
 -	.get_ethtool_stats = mlx5e_rep_get_ethtool_stats,
 -	.get_ringparam     = mlx5e_rep_get_ringparam,
 -	.set_ringparam     = mlx5e_rep_set_ringparam,
 -	.get_channels      = mlx5e_rep_get_channels,
 -	.set_channels      = mlx5e_rep_set_channels,
 -	.get_coalesce      = mlx5e_rep_get_coalesce,
 -	.set_coalesce      = mlx5e_rep_set_coalesce,
 -	.get_link_ksettings = mlx5e_uplink_rep_get_link_ksettings,
 -	.set_link_ksettings = mlx5e_uplink_rep_set_link_ksettings,
 -	.get_rxfh_key_size   = mlx5e_rep_get_rxfh_key_size,
 -	.get_rxfh_indir_size = mlx5e_rep_get_rxfh_indir_size,
 -	.get_rxfh          = mlx5e_get_rxfh,
 -	.set_rxfh          = mlx5e_set_rxfh,
 -	.get_rxnfc         = mlx5e_get_rxnfc,
 -	.set_rxnfc         = mlx5e_set_rxnfc,
 -	.get_pause_stats   = mlx5e_uplink_rep_get_pause_stats,
 -	.get_pauseparam    = mlx5e_uplink_rep_get_pauseparam,
 -	.set_pauseparam    = mlx5e_uplink_rep_set_pauseparam,
 -};
 -
  static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
    			 struct mlx5_eswitch_rep *rep)
  {
@@@ -335,7 -411,8 +335,7 @@@ static int mlx5e_sqs2vport_start(struc
    	}
/* Add re-inject rule to the PF/representor sqs */
 -		flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw,
 -								rep->vport,
 +		flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, rep,
    							sqns_array[i]);
    	if (IS_ERR(flow_rule)) {
    		err = PTR_ERR(flow_rule);
@@@ -445,7 -522,7 +445,7 @@@ bool mlx5e_is_uplink_rep(struct mlx5e_p
    return (rep->vport == MLX5_VPORT_UPLINK);
  }
-static bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
 +bool mlx5e_rep_has_offload_stats(const struct net_device *dev, int attr_id)
  {
    switch (attr_id) {
    case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@@ -465,8 -542,8 +465,8 @@@ mlx5e_get_sw_stats64(const struct net_d
    return 0;
  }
-static int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
 -				       void *sp)
 +int mlx5e_rep_get_offload_stats(int attr_id, const struct net_device *dev,
 +				void *sp)
  {
    switch (attr_id) {
    case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@@ -491,6 -568,34 +491,6 @@@ static int mlx5e_rep_change_mtu(struct 
    return mlx5e_change_mtu(netdev, new_mtu, NULL);
  }
-static int mlx5e_uplink_rep_change_mtu(struct net_device *netdev, int new_mtu)
 -{
 -	return mlx5e_change_mtu(netdev, new_mtu, mlx5e_set_dev_port_mtu_ctx);
 -}
 -
 -static int mlx5e_uplink_rep_set_mac(struct net_device *netdev, void *addr)
 -{
 -	struct sockaddr *saddr = addr;
 -
 -	if (!is_valid_ether_addr(saddr->sa_data))
 -		return -EADDRNOTAVAIL;
 -
 -	ether_addr_copy(netdev->dev_addr, saddr->sa_data);
 -	return 0;
 -}
 -
 -static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,
 -					__be16 vlan_proto)
 -{
 -	netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n");
 -
 -	if (vlan != 0)
 -		return -EOPNOTSUPP;
 -
 -	/* allow setting 0-vid for compatibility with libvirt */
 -	return 0;
 -}
 -
  static struct devlink_port *mlx5e_rep_get_devlink_port(struct net_device *netdev)
  {
    struct mlx5e_priv *priv = netdev_priv(netdev);
@@@ -536,10 -641,29 +536,10 @@@ static const struct net_device_ops mlx5
    .ndo_change_carrier      = mlx5e_rep_change_carrier,
  };
-static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = {
 -	.ndo_open                = mlx5e_open,
 -	.ndo_stop                = mlx5e_close,
 -	.ndo_start_xmit          = mlx5e_xmit,
 -	.ndo_set_mac_address     = mlx5e_uplink_rep_set_mac,
 -	.ndo_setup_tc            = mlx5e_rep_setup_tc,
 -	.ndo_get_devlink_port    = mlx5e_rep_get_devlink_port,
 -	.ndo_get_stats64         = mlx5e_get_stats,
 -	.ndo_has_offload_stats	 = mlx5e_rep_has_offload_stats,
 -	.ndo_get_offload_stats	 = mlx5e_rep_get_offload_stats,
 -	.ndo_change_mtu          = mlx5e_uplink_rep_change_mtu,
 -	.ndo_features_check      = mlx5e_features_check,
 -	.ndo_set_vf_mac          = mlx5e_set_vf_mac,
 -	.ndo_set_vf_rate         = mlx5e_set_vf_rate,
 -	.ndo_get_vf_config       = mlx5e_get_vf_config,
 -	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
 -	.ndo_set_vf_vlan         = mlx5e_uplink_rep_set_vf_vlan,
 -	.ndo_set_features        = mlx5e_set_features,
 -};
 -
  bool mlx5e_eswitch_uplink_rep(struct net_device *netdev)
  {
 -	return netdev->netdev_ops == &mlx5e_netdev_ops_uplink_rep;
 +	return netdev->netdev_ops == &mlx5e_netdev_ops &&
 +	       mlx5e_is_uplink_rep(netdev_priv(netdev));
  }
bool mlx5e_eswitch_vf_rep(struct net_device *netdev)
@@@ -589,15 -713,26 +589,15 @@@ static void mlx5e_build_rep_params(stru
  }
static void mlx5e_build_rep_netdev(struct net_device *netdev,
 -				   struct mlx5_core_dev *mdev,
 -				   struct mlx5_eswitch_rep *rep)
 +				   struct mlx5_core_dev *mdev)
  {
    SET_NETDEV_DEV(netdev, mdev->device);
 -	if (rep->vport == MLX5_VPORT_UPLINK) {
 -		netdev->netdev_ops = &mlx5e_netdev_ops_uplink_rep;
 -		/* we want a persistent mac for the uplink rep */
 -		mlx5_query_mac_address(mdev, netdev->dev_addr);
 -		netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
 -		mlx5e_dcbnl_build_rep_netdev(netdev);
 -	} else {
 -		netdev->netdev_ops = &mlx5e_netdev_ops_rep;
 -		eth_hw_addr_random(netdev);
 -		netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
 -	}
 +	netdev->netdev_ops = &mlx5e_netdev_ops_rep;
 +	eth_hw_addr_random(netdev);
 +	netdev->ethtool_ops = &mlx5e_rep_ethtool_ops;
netdev->watchdog_timeo    = 15 * HZ;
-	netdev->features       |= NETIF_F_NETNS_LOCAL;
 -
  #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
    netdev->hw_features    |= NETIF_F_HW_TC;
  #endif
@@@ -609,9 -744,12 +609,9 @@@
    netdev->hw_features    |= NETIF_F_TSO6;
    netdev->hw_features    |= NETIF_F_RXCSUM;
-	if (rep->vport == MLX5_VPORT_UPLINK)
 -		netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
 -	else
 -		netdev->features |= NETIF_F_VLAN_CHALLENGED;
 -
    netdev->features |= netdev->hw_features;
 +	netdev->features |= NETIF_F_VLAN_CHALLENGED;
 +	netdev->features |= NETIF_F_NETNS_LOCAL;
  }
static int mlx5e_init_rep(struct mlx5_core_dev *mdev,
@@@ -752,7 -890,6 +752,7 @@@ int mlx5e_rep_bond_update(struct mlx5e_
  static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
  {
    struct mlx5_core_dev *mdev = priv->mdev;
 +	u16 max_nch = priv->max_nch;
    int err;
mlx5e_init_l2_addr(priv);
@@@ -767,7 -904,7 +767,7 @@@
    if (err)
    	goto err_close_drop_rq;
-	err = mlx5e_create_direct_rqts(priv, priv->direct_tir);
 +	err = mlx5e_create_direct_rqts(priv, priv->direct_tir, max_nch);
    if (err)
    	goto err_destroy_indirect_rqts;
@@@ -775,7 -912,7 +775,7 @@@
    if (err)
    	goto err_destroy_direct_rqts;
-	err = mlx5e_create_direct_tirs(priv, priv->direct_tir);
 +	err = mlx5e_create_direct_tirs(priv, priv->direct_tir, max_nch);
    if (err)
    	goto err_destroy_indirect_tirs;
@@@ -800,11 -937,11 +800,11 @@@ err_destroy_root_ft
  err_destroy_ttc_table:
    mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
  err_destroy_direct_tirs:
 -	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 +	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
  err_destroy_indirect_tirs:
    mlx5e_destroy_indirect_tirs(priv);
  err_destroy_direct_rqts:
 -	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 +	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
  err_destroy_indirect_rqts:
    mlx5e_destroy_rqt(priv, &priv->indir_rqt);
  err_close_drop_rq:
@@@ -814,15 -951,13 +814,15 @@@
static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
  {
 +	u16 max_nch = priv->max_nch;
 +
    mlx5e_ethtool_cleanup_steering(priv);
    rep_vport_rx_rule_destroy(priv);
    mlx5e_destroy_rep_root_ft(priv);
    mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
 -	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
 +	mlx5e_destroy_direct_tirs(priv, priv->direct_tir, max_nch);
    mlx5e_destroy_indirect_tirs(priv);
 -	mlx5e_destroy_direct_rqts(priv, priv->direct_tir);
 +	mlx5e_destroy_direct_rqts(priv, priv->direct_tir, max_nch);
    mlx5e_destroy_rqt(priv, &priv->indir_rqt);
    mlx5e_close_drop_rq(&priv->drop_rq);
  }
@@@ -972,22 -1107,15 +972,23 @@@ static void mlx5e_uplink_rep_enable(str
mlx5e_rep_tc_enable(priv);
- 	mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
- 				      0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
+ 	if (MLX5_CAP_GEN(mdev, uplink_follow))
+ 		mlx5_modify_vport_admin_state(mdev, MLX5_VPORT_STATE_OP_MOD_UPLINK,
+ 					      0, 0, MLX5_VPORT_ADMIN_STATE_AUTO);
    mlx5_lag_add(mdev, netdev);
    priv->events_nb.notifier_call = uplink_rep_async_event;
    mlx5_notifier_register(mdev, &priv->events_nb);
    mlx5e_dcbnl_initialize(priv);
    mlx5e_dcbnl_init_app(priv);
    mlx5e_rep_neigh_init(rpriv);
 +
 +	netdev->wanted_features |= NETIF_F_HW_TC;
 +
 +	rtnl_lock();
 +	if (netif_running(netdev))
 +		mlx5e_open(netdev);
 +	netif_device_attach(netdev);
 +	rtnl_unlock();
  }
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
@@@ -995,12 -1123,6 +996,12 @@@
    struct mlx5e_rep_priv *rpriv = priv->ppriv;
    struct mlx5_core_dev *mdev = priv->mdev;
+	rtnl_lock();
 +	if (netif_running(priv->netdev))
 +		mlx5e_close(priv->netdev);
 +	netif_device_detach(priv->netdev);
 +	rtnl_unlock();
 +
    mlx5e_rep_neigh_cleanup(rpriv);
    mlx5e_dcbnl_delete_app(priv);
    mlx5_notifier_unregister(mdev, &priv->events_nb);
@@@ -1061,7 -1183,6 +1062,7 @@@ static const struct mlx5e_profile mlx5e
    .rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
    .stats_grps		= mlx5e_rep_stats_grps,
    .stats_grps_num		= mlx5e_rep_stats_grps_num,
 +	.rx_ptp_support		= false,
  };
static const struct mlx5e_profile mlx5e_uplink_rep_profile = {
@@@ -1078,65 -1199,33 +1079,65 @@@
    .update_carrier	        = mlx5e_update_carrier,
    .rx_handlers            = &mlx5e_rx_handlers_rep,
    .max_tc			= MLX5E_MAX_NUM_TC,
 -	.rq_groups		= MLX5E_NUM_RQ_GROUPS(REGULAR),
 +	/* XSK is needed so we can replace profile with NIC netdev */
 +	.rq_groups		= MLX5E_NUM_RQ_GROUPS(XSK),
    .stats_grps		= mlx5e_ul_rep_stats_grps,
    .stats_grps_num		= mlx5e_ul_rep_stats_grps_num,
 +	.rx_ptp_support		= false,
  };
/* e-Switch vport representors */
  static int
 -mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 +mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 +{
 +	struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev));
 +	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
 +	struct devlink_port *dl_port;
 +	int err;
 +
 +	rpriv->netdev = priv->netdev;
 +
 +	err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile,
 +					  rpriv);
 +	if (err)
 +		return err;
 +
 +	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
 +	if (dl_port)
 +		devlink_port_type_eth_set(dl_port, rpriv->netdev);
 +
 +	return 0;
 +}
 +
 +static void
 +mlx5e_vport_uplink_rep_unload(struct mlx5e_rep_priv *rpriv)
 +{
 +	struct net_device *netdev = rpriv->netdev;
 +	struct devlink_port *dl_port;
 +	struct mlx5_core_dev *dev;
 +	struct mlx5e_priv *priv;
 +
 +	priv = netdev_priv(netdev);
 +	dev = priv->mdev;
 +
 +	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
 +	if (dl_port)
 +		devlink_port_type_clear(dl_port);
 +	mlx5e_netdev_attach_nic_profile(priv);
 +}
 +
 +static int
 +mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
  {
 +	struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep);
    const struct mlx5e_profile *profile;
 -	struct mlx5e_rep_priv *rpriv;
    struct devlink_port *dl_port;
    struct net_device *netdev;
    struct mlx5e_priv *priv;
    unsigned int txqs, rxqs;
    int nch, err;
-	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
 -	if (!rpriv)
 -		return -ENOMEM;
 -
 -	/* rpriv->rep to be looked up when profile->init() is called */
 -	rpriv->rep = rep;
 -
 -	profile = (rep->vport == MLX5_VPORT_UPLINK) ?
 -		  &mlx5e_uplink_rep_profile : &mlx5e_rep_profile;
 -
 +	profile = &mlx5e_rep_profile;
    nch = mlx5e_get_max_num_channels(dev);
    txqs = nch * profile->max_tc;
    rxqs = nch * profile->rq_groups;
@@@ -1145,11 -1234,21 +1146,11 @@@
    	mlx5_core_warn(dev,
    		       "Failed to create representor netdev for vport %d\n",
    		       rep->vport);
 -		kfree(rpriv);
    	return -EINVAL;
    }
-	mlx5e_build_rep_netdev(netdev, dev, rep);
 -
 +	mlx5e_build_rep_netdev(netdev, dev);
    rpriv->netdev = netdev;
 -	rep->rep_data[REP_ETH].priv = rpriv;
 -	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 -
 -	if (rep->vport == MLX5_VPORT_UPLINK) {
 -		err = mlx5e_create_mdev_resources(dev);
 -		if (err)
 -			goto err_destroy_netdev;
 -	}
priv = netdev_priv(netdev);
    priv->profile = profile;
@@@ -1157,7 -1256,7 +1158,7 @@@
    err = profile->init(dev, netdev);
    if (err) {
    	netdev_warn(netdev, "rep profile init failed, %d\n", err);
 -		goto err_destroy_mdev_resources;
 +		goto err_destroy_netdev;
    }
err = mlx5e_attach_netdev(netdev_priv(netdev));
@@@ -1187,34 -1286,13 +1188,34 @@@ err_detach_netdev
  err_cleanup_profile:
    priv->profile->cleanup(priv);
-err_destroy_mdev_resources:
 -	if (rep->vport == MLX5_VPORT_UPLINK)
 -		mlx5e_destroy_mdev_resources(dev);
 -
  err_destroy_netdev:
    mlx5e_destroy_netdev(netdev_priv(netdev));
 -	kfree(rpriv);
 +	return err;
 +}
 +
 +static int
 +mlx5e_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 +{
 +	struct mlx5e_rep_priv *rpriv;
 +	int err;
 +
 +	rpriv = kzalloc(sizeof(*rpriv), GFP_KERNEL);
 +	if (!rpriv)
 +		return -ENOMEM;
 +
 +	/* rpriv->rep to be looked up when profile->init() is called */
 +	rpriv->rep = rep;
 +	rep->rep_data[REP_ETH].priv = rpriv;
 +	INIT_LIST_HEAD(&rpriv->vport_sqs_list);
 +
 +	if (rep->vport == MLX5_VPORT_UPLINK)
 +		err = mlx5e_vport_uplink_rep_load(dev, rep);
 +	else
 +		err = mlx5e_vport_vf_rep_load(dev, rep);
 +
 +	if (err)
 +		kfree(rpriv);
 +
    return err;
  }
@@@ -1228,19 -1306,15 +1229,19 @@@ mlx5e_vport_rep_unload(struct mlx5_eswi
    struct devlink_port *dl_port;
    void *ppriv = priv->ppriv;
+	if (rep->vport == MLX5_VPORT_UPLINK) {
 +		mlx5e_vport_uplink_rep_unload(rpriv);
 +		goto free_ppriv;
 +	}
 +
    dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch, rpriv->rep->vport);
    if (dl_port)
    	devlink_port_type_clear(dl_port);
    unregister_netdev(netdev);
    mlx5e_detach_netdev(priv);
    priv->profile->cleanup(priv);
 -	if (rep->vport == MLX5_VPORT_UPLINK)
 -		mlx5e_destroy_mdev_resources(priv->mdev);
    mlx5e_destroy_netdev(priv);
 +free_ppriv:
    kfree(ppriv); /* mlx5e_rep_priv */
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index f67e51d8291a,88a01c59ce61..ae0570ea08bf
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@@ -116,7 -116,6 +116,6 @@@ static const struct counter_desc sw_sta
  #ifdef CONFIG_MLX5_EN_TLS
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_bytes) },
- 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ctx) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_ooo) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_packets) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_dump_bytes) },
@@@ -180,8 -179,6 +179,6 @@@
  #ifdef CONFIG_MLX5_EN_TLS
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_packets) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_decrypted_bytes) },
- 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_ctx) },
- 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_del) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_pkt) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_start) },
    { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_tls_resync_req_end) },
@@@ -342,8 -339,6 +339,6 @@@ static void mlx5e_stats_grp_sw_update_s
  #ifdef CONFIG_MLX5_EN_TLS
    s->rx_tls_decrypted_packets   += rq_stats->tls_decrypted_packets;
    s->rx_tls_decrypted_bytes     += rq_stats->tls_decrypted_bytes;
- 	s->rx_tls_ctx                 += rq_stats->tls_ctx;
- 	s->rx_tls_del                 += rq_stats->tls_del;
    s->rx_tls_resync_req_pkt      += rq_stats->tls_resync_req_pkt;
    s->rx_tls_resync_req_start    += rq_stats->tls_resync_req_start;
    s->rx_tls_resync_req_end      += rq_stats->tls_resync_req_end;
@@@ -390,7 -385,6 +385,6 @@@ static void mlx5e_stats_grp_sw_update_s
  #ifdef CONFIG_MLX5_EN_TLS
    s->tx_tls_encrypted_packets += sq_stats->tls_encrypted_packets;
    s->tx_tls_encrypted_bytes   += sq_stats->tls_encrypted_bytes;
- 	s->tx_tls_ctx               += sq_stats->tls_ctx;
    s->tx_tls_ooo               += sq_stats->tls_ooo;
    s->tx_tls_dump_bytes        += sq_stats->tls_dump_bytes;
    s->tx_tls_dump_packets      += sq_stats->tls_dump_packets;
@@@ -407,21 -401,13 +401,21 @@@ static void mlx5e_stats_grp_sw_update_s
  {
    int i;
-	if (!priv->port_ptp_opened)
 +	if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
    	return;
-	mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->port_ptp_stats.ch);
 +	mlx5e_stats_grp_sw_update_stats_ch_stats(s, &priv->ptp_stats.ch);
-	for (i = 0; i < priv->max_opened_tc; i++) {
 -		mlx5e_stats_grp_sw_update_stats_sq(s, &priv->port_ptp_stats.sq[i]);
 +	if (priv->tx_ptp_opened) {
 +		for (i = 0; i < priv->max_opened_tc; i++) {
 +			mlx5e_stats_grp_sw_update_stats_sq(s, &priv->ptp_stats.sq[i]);
 +
 +			/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
 +			barrier();
 +		}
 +	}
 +	if (priv->rx_ptp_opened) {
 +		mlx5e_stats_grp_sw_update_stats_rq_stats(s, &priv->ptp_stats.rq);
/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
    	barrier();
@@@ -1630,8 -1616,6 +1624,6 @@@ static const struct counter_desc rq_sta
  #ifdef CONFIG_MLX5_EN_TLS
    { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_packets) },
    { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_decrypted_bytes) },
- 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_ctx) },
- 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_del) },
    { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_pkt) },
    { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_start) },
    { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, tls_resync_req_end) },
@@@ -1658,7 -1642,6 +1650,6 @@@ static const struct counter_desc sq_sta
  #ifdef CONFIG_MLX5_EN_TLS
    { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
    { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
- 	{ MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
    { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
    { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
    { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
@@@ -1768,38 -1751,6 +1759,38 @@@ static const struct counter_desc ptp_cq
    { MLX5E_DECLARE_PTP_CQ_STAT(struct mlx5e_ptp_cq_stats, abort_abs_diff_ns) },
  };
+static const struct counter_desc ptp_rq_stats_desc[] = {
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, packets) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, bytes) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_complete_tail_slow) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, csum_none) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_drop) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, xdp_redirect) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_packets) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, lro_bytes) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, ecn_mark) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, removed_vlan_packets) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, wqe_err) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_cqes) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, mpwqe_filler_strides) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, oversize_pkts_sw_drop) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) },
 +	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) },
 +};
 +
  static const struct counter_desc qos_sq_stats_desc[] = {
    { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, packets) },
    { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, bytes) },
@@@ -1816,7 -1767,6 +1807,6 @@@
  #ifdef CONFIG_MLX5_EN_TLS
    { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) },
    { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) },
- 	{ MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ctx) },
    { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_ooo) },
    { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_packets) },
    { MLX5E_DECLARE_QOS_TX_STAT(struct mlx5e_sq_stats, tls_dump_bytes) },
@@@ -1845,7 -1795,6 +1835,7 @@@
  #define NUM_PTP_SQ_STATS		ARRAY_SIZE(ptp_sq_stats_desc)
  #define NUM_PTP_CH_STATS		ARRAY_SIZE(ptp_ch_stats_desc)
  #define NUM_PTP_CQ_STATS		ARRAY_SIZE(ptp_cq_stats_desc)
 +#define NUM_PTP_RQ_STATS                ARRAY_SIZE(ptp_rq_stats_desc)
  #define NUM_QOS_SQ_STATS		ARRAY_SIZE(qos_sq_stats_desc)
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(qos)
@@@ -1892,46 -1841,32 +1882,46 @@@ static MLX5E_DECLARE_STATS_GRP_OP_UPDAT
static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ptp)
  {
 -	return priv->port_ptp_opened ?
 -	       NUM_PTP_CH_STATS +
 -	       ((NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc) :
 -	       0;
 +	int num = NUM_PTP_CH_STATS;
 +
 +	if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
 +		return 0;
 +
 +	if (priv->tx_ptp_opened)
 +		num += (NUM_PTP_SQ_STATS + NUM_PTP_CQ_STATS) * priv->max_opened_tc;
 +	if (priv->rx_ptp_opened)
 +		num += NUM_PTP_RQ_STATS;
 +
 +	return num;
  }
static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ptp)
  {
    int i, tc;
-	if (!priv->port_ptp_opened)
 +	if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
    	return idx;
for (i = 0; i < NUM_PTP_CH_STATS; i++)
    	sprintf(data + (idx++) * ETH_GSTRING_LEN,
    		ptp_ch_stats_desc[i].format);
-	for (tc = 0; tc < priv->max_opened_tc; tc++)
 -		for (i = 0; i < NUM_PTP_SQ_STATS; i++)
 -			sprintf(data + (idx++) * ETH_GSTRING_LEN,
 -				ptp_sq_stats_desc[i].format, tc);
 +	if (priv->tx_ptp_opened) {
 +		for (tc = 0; tc < priv->max_opened_tc; tc++)
 +			for (i = 0; i < NUM_PTP_SQ_STATS; i++)
 +				sprintf(data + (idx++) * ETH_GSTRING_LEN,
 +					ptp_sq_stats_desc[i].format, tc);
-	for (tc = 0; tc < priv->max_opened_tc; tc++)
 -		for (i = 0; i < NUM_PTP_CQ_STATS; i++)
 +		for (tc = 0; tc < priv->max_opened_tc; tc++)
 +			for (i = 0; i < NUM_PTP_CQ_STATS; i++)
 +				sprintf(data + (idx++) * ETH_GSTRING_LEN,
 +					ptp_cq_stats_desc[i].format, tc);
 +	}
 +	if (priv->rx_ptp_opened) {
 +		for (i = 0; i < NUM_PTP_RQ_STATS; i++)
    		sprintf(data + (idx++) * ETH_GSTRING_LEN,
 -				ptp_cq_stats_desc[i].format, tc);
 +				ptp_rq_stats_desc[i].format);
 +	}
    return idx;
  }
@@@ -1939,33 -1874,26 +1929,33 @@@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_
  {
    int i, tc;
-	if (!priv->port_ptp_opened)
 +	if (!priv->tx_ptp_opened && !priv->rx_ptp_opened)
    	return idx;
for (i = 0; i < NUM_PTP_CH_STATS; i++)
    	data[idx++] =
 -			MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.ch,
 +			MLX5E_READ_CTR64_CPU(&priv->ptp_stats.ch,
    				     ptp_ch_stats_desc, i);
-	for (tc = 0; tc < priv->max_opened_tc; tc++)
 -		for (i = 0; i < NUM_PTP_SQ_STATS; i++)
 -			data[idx++] =
 -				MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.sq[tc],
 -						     ptp_sq_stats_desc, i);
 -
 -	for (tc = 0; tc < priv->max_opened_tc; tc++)
 -		for (i = 0; i < NUM_PTP_CQ_STATS; i++)
 +	if (priv->tx_ptp_opened) {
 +		for (tc = 0; tc < priv->max_opened_tc; tc++)
 +			for (i = 0; i < NUM_PTP_SQ_STATS; i++)
 +				data[idx++] =
 +					MLX5E_READ_CTR64_CPU(&priv->ptp_stats.sq[tc],
 +							     ptp_sq_stats_desc, i);
 +
 +		for (tc = 0; tc < priv->max_opened_tc; tc++)
 +			for (i = 0; i < NUM_PTP_CQ_STATS; i++)
 +				data[idx++] =
 +					MLX5E_READ_CTR64_CPU(&priv->ptp_stats.cq[tc],
 +							     ptp_cq_stats_desc, i);
 +	}
 +	if (priv->rx_ptp_opened) {
 +		for (i = 0; i < NUM_PTP_RQ_STATS; i++)
    		data[idx++] =
 -				MLX5E_READ_CTR64_CPU(&priv->port_ptp_stats.cq[tc],
 -						     ptp_cq_stats_desc, i);
 -
 +				MLX5E_READ_CTR64_CPU(&priv->ptp_stats.rq,
 +						     ptp_rq_stats_desc, i);
 +	}
    return idx;
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index ca398eac09c1,adf9b7b8b712..21d3b8747f93
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@@ -54,7 -54,6 +54,7 @@@
  #define MLX5E_DECLARE_PTP_TX_STAT(type, fld) "ptp_tx%d_"#fld, offsetof(type, fld)
  #define MLX5E_DECLARE_PTP_CH_STAT(type, fld) "ptp_ch_"#fld, offsetof(type, fld)
  #define MLX5E_DECLARE_PTP_CQ_STAT(type, fld) "ptp_cq%d_"#fld, offsetof(type, fld)
 +#define MLX5E_DECLARE_PTP_RQ_STAT(type, fld) "ptp_rq%d_"#fld, offsetof(type, fld)
#define MLX5E_DECLARE_QOS_TX_STAT(type, fld) "qos_tx%d_"#fld, offsetof(type, fld)
@@@ -192,7 -191,6 +192,6 @@@ struct mlx5e_sw_stats 
  #ifdef CONFIG_MLX5_EN_TLS
    u64 tx_tls_encrypted_packets;
    u64 tx_tls_encrypted_bytes;
- 	u64 tx_tls_ctx;
    u64 tx_tls_ooo;
    u64 tx_tls_dump_packets;
    u64 tx_tls_dump_bytes;
@@@ -203,8 -201,6 +202,6 @@@
u64 rx_tls_decrypted_packets;
    u64 rx_tls_decrypted_bytes;
- 	u64 rx_tls_ctx;
- 	u64 rx_tls_del;
    u64 rx_tls_resync_req_pkt;
    u64 rx_tls_resync_req_start;
    u64 rx_tls_resync_req_end;
@@@ -335,8 -331,6 +332,6 @@@ struct mlx5e_rq_stats 
  #ifdef CONFIG_MLX5_EN_TLS
    u64 tls_decrypted_packets;
    u64 tls_decrypted_bytes;
- 	u64 tls_ctx;
- 	u64 tls_del;
    u64 tls_resync_req_pkt;
    u64 tls_resync_req_start;
    u64 tls_resync_req_end;
@@@ -365,7 -359,6 +360,6 @@@ struct mlx5e_sq_stats 
  #ifdef CONFIG_MLX5_EN_TLS
    u64 tls_encrypted_packets;
    u64 tls_encrypted_bytes;
- 	u64 tls_ctx;
    u64 tls_ooo;
    u64 tls_dump_packets;
    u64 tls_dump_bytes;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 4e8381030d77,1fa9c18563da..77c0ca655975
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@@ -271,7 -271,7 +271,7 @@@ static void init_eq_buf(struct mlx5_eq 
    struct mlx5_eqe *eqe;
    int i;
-	for (i = 0; i < eq->nent; i++) {
 +	for (i = 0; i < eq_get_size(eq); i++) {
    	eqe = get_eqe(eq, i);
    	eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
    }
@@@ -281,10 -281,8 +281,10 @@@ static in
  create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
          struct mlx5_eq_param *param)
  {
 +	u8 log_eq_size = order_base_2(param->nent + MLX5_NUM_SPARE_EQE);
    struct mlx5_cq_table *cq_table = &eq->cq_table;
    u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
 +	u8 log_eq_stride = ilog2(MLX5_EQE_SIZE);
    struct mlx5_priv *priv = &dev->priv;
    u8 vecidx = param->irq_index;
    __be64 *pas;
@@@ -299,18 -297,16 +299,18 @@@
    spin_lock_init(&cq_table->lock);
    INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
-	eq->nent = roundup_pow_of_two(param->nent + MLX5_NUM_SPARE_EQE);
    eq->cons_index = 0;
 -	err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
 +
 +	err = mlx5_frag_buf_alloc_node(dev, wq_get_byte_sz(log_eq_size, log_eq_stride),
 +				       &eq->frag_buf, dev->priv.numa_node);
    if (err)
    	return err;
+	mlx5_init_fbc(eq->frag_buf.frags, log_eq_stride, log_eq_size, &eq->fbc);
    init_eq_buf(eq);
inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
 -		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
 +		MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->frag_buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
    if (!in) {
@@@ -319,7 -315,7 +319,7 @@@
    }
pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
 -	mlx5_fill_page_array(&eq->buf, pas);
 +	mlx5_fill_page_frag_array(&eq->frag_buf, pas);
MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
    if (!param->mask[0] && MLX5_CAP_GEN(dev, log_max_uctx))
@@@ -330,11 -326,11 +330,11 @@@
    			 param->mask[i]);
eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
 -	MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
 +	MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz);
    MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
    MLX5_SET(eqc, eqc, intr, vecidx);
    MLX5_SET(eqc, eqc, log_page_size,
 -		 eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 +		 eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
    if (err)
@@@ -360,7 -356,7 +360,7 @@@ err_in
    kvfree(in);
err_buf:
 -	mlx5_buf_free(dev, &eq->buf);
 +	mlx5_frag_buf_free(dev, &eq->frag_buf);
    return err;
  }
@@@ -417,7 -413,7 +417,7 @@@ static int destroy_unmap_eq(struct mlx5
    		       eq->eqn);
    synchronize_irq(eq->irqn);
-	mlx5_buf_free(dev, &eq->buf);
 +	mlx5_frag_buf_free(dev, &eq->frag_buf);
return err;
  }
@@@ -768,11 -764,10 +768,11 @@@ EXPORT_SYMBOL(mlx5_eq_destroy_generic)
  struct mlx5_eqe *mlx5_eq_get_eqe(struct mlx5_eq *eq, u32 cc)
  {
    u32 ci = eq->cons_index + cc;
 +	u32 nent = eq_get_size(eq);
    struct mlx5_eqe *eqe;
-	eqe = get_eqe(eq, ci & (eq->nent - 1));
 -	eqe = ((eqe->owner & 1) ^ !!(ci & eq->nent)) ? NULL : eqe;
 +	eqe = get_eqe(eq, ci & (nent - 1));
 +	eqe = ((eqe->owner & 1) ^ !!(ci & nent)) ? NULL : eqe;
    /* Make sure we read EQ entry contents after we've
     * checked the ownership bit.
     */
@@@ -936,13 -931,24 +936,24 @@@ void mlx5_core_eq_free_irqs(struct mlx5
    mutex_unlock(&table->lock);
  }
+ #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+ #define MLX5_MAX_ASYNC_EQS 4
+ #else
+ #define MLX5_MAX_ASYNC_EQS 3
+ #endif
+ 
  int mlx5_eq_table_create(struct mlx5_core_dev *dev)
  {
    struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ 	int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ 		      MLX5_CAP_GEN(dev, max_num_eqs) :
+ 		      1 << MLX5_CAP_GEN(dev, log_max_eq);
    int err;
eq_table->num_comp_eqs =
- 		mlx5_irq_get_num_comp(eq_table->irq_table);
+ 		min_t(int,
+ 		      mlx5_irq_get_num_comp(eq_table->irq_table),
+ 		      num_eqs - MLX5_MAX_ASYNC_EQS);
err = create_async_eqs(dev);
    if (err) {
diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8b82f44bd6a7,d4a2f8d1ee9f..ab32f685cbb7
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@@ -40,6 -40,7 +40,6 @@@
  #include "eswitch.h"
  #include "esw/indir_table.h"
  #include "esw/acl/ofld.h"
 -#include "esw/indir_table.h"
  #include "rdma.h"
  #include "en.h"
  #include "fs_core.h"
@@@ -47,7 -48,6 +47,7 @@@
  #include "lib/eq.h"
  #include "lib/fs_chains.h"
  #include "en_tc.h"
 +#include "en/mapping.h"
/* There are two match-all miss flows, one for unicast dst mac and
   * one for multicast.
@@@ -55,14 -55,184 +55,14 @@@
  #define MLX5_ESW_MISS_FLOWS (2)
  #define UPLINK_REP_INDEX 0
-/* Per vport tables */
 -
 -#define MLX5_ESW_VPORT_TABLE_SIZE 128
 -
 -/* This struct is used as a key to the hash table and we need it to be packed
 - * so hash result is consistent
 - */
 -struct mlx5_vport_key {
 -	u32 chain;
 -	u16 prio;
 -	u16 vport;
 -	u16 vhca_id;
 -} __packed;
 -
 -struct mlx5_vport_tbl_attr {
 -	u16 chain;
 -	u16 prio;
 -	u16 vport;
 -};
 -
 -struct mlx5_vport_table {
 -	struct hlist_node hlist;
 -	struct mlx5_flow_table *fdb;
 -	u32 num_rules;
 -	struct mlx5_vport_key key;
 -};
 -
 +#define MLX5_ESW_VPORT_TBL_SIZE 128
  #define MLX5_ESW_VPORT_TBL_NUM_GROUPS  4
-static struct mlx5_flow_table *
 -esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns)
 -{
 -	struct mlx5_flow_table_attr ft_attr = {};
 -	struct mlx5_flow_table *fdb;
 -
 -	ft_attr.autogroup.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS;
 -	ft_attr.max_fte = MLX5_ESW_VPORT_TABLE_SIZE;
 -	ft_attr.prio = FDB_PER_VPORT;
 -	fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
 -	if (IS_ERR(fdb)) {
 -		esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n",
 -			 PTR_ERR(fdb));
 -	}
 -
 -	return fdb;
 -}
 -
 -static u32 flow_attr_to_vport_key(struct mlx5_eswitch *esw,
 -				  struct mlx5_vport_tbl_attr *attr,
 -				  struct mlx5_vport_key *key)
 -{
 -	key->vport = attr->vport;
 -	key->chain = attr->chain;
 -	key->prio = attr->prio;
 -	key->vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
 -	return jhash(key, sizeof(*key), 0);
 -}
 -
 -/* caller must hold vports.lock */
 -static struct mlx5_vport_table *
 -esw_vport_tbl_lookup(struct mlx5_eswitch *esw, struct mlx5_vport_key *skey, u32 key)
 -{
 -	struct mlx5_vport_table *e;
 -
 -	hash_for_each_possible(esw->fdb_table.offloads.vports.table, e, hlist, key)
 -		if (!memcmp(&e->key, skey, sizeof(*skey)))
 -			return e;
 -
 -	return NULL;
 -}
 -
 -static void
 -esw_vport_tbl_put(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
 -{
 -	struct mlx5_vport_table *e;
 -	struct mlx5_vport_key key;
 -	u32 hkey;
 -
 -	mutex_lock(&esw->fdb_table.offloads.vports.lock);
 -	hkey = flow_attr_to_vport_key(esw, attr, &key);
 -	e = esw_vport_tbl_lookup(esw, &key, hkey);
 -	if (!e || --e->num_rules)
 -		goto out;
 -
 -	hash_del(&e->hlist);
 -	mlx5_destroy_flow_table(e->fdb);
 -	kfree(e);
 -out:
 -	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
 -}
 -
 -static struct mlx5_flow_table *
 -esw_vport_tbl_get(struct mlx5_eswitch *esw, struct mlx5_vport_tbl_attr *attr)
 -{
 -	struct mlx5_core_dev *dev = esw->dev;
 -	struct mlx5_flow_namespace *ns;
 -	struct mlx5_flow_table *fdb;
 -	struct mlx5_vport_table *e;
 -	struct mlx5_vport_key skey;
 -	u32 hkey;
 -
 -	mutex_lock(&esw->fdb_table.offloads.vports.lock);
 -	hkey = flow_attr_to_vport_key(esw, attr, &skey);
 -	e = esw_vport_tbl_lookup(esw, &skey, hkey);
 -	if (e) {
 -		e->num_rules++;
 -		goto out;
 -	}
 -
 -	e = kzalloc(sizeof(*e), GFP_KERNEL);
 -	if (!e) {
 -		fdb = ERR_PTR(-ENOMEM);
 -		goto err_alloc;
 -	}
 -
 -	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
 -	if (!ns) {
 -		esw_warn(dev, "Failed to get FDB namespace\n");
 -		fdb = ERR_PTR(-ENOENT);
 -		goto err_ns;
 -	}
 -
 -	fdb = esw_vport_tbl_create(esw, ns);
 -	if (IS_ERR(fdb))
 -		goto err_ns;
 -
 -	e->fdb = fdb;
 -	e->num_rules = 1;
 -	e->key = skey;
 -	hash_add(esw->fdb_table.offloads.vports.table, &e->hlist, hkey);
 -out:
 -	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
 -	return e->fdb;
 -
 -err_ns:
 -	kfree(e);
 -err_alloc:
 -	mutex_unlock(&esw->fdb_table.offloads.vports.lock);
 -	return fdb;
 -}
 -
 -int mlx5_esw_vport_tbl_get(struct mlx5_eswitch *esw)
 -{
 -	struct mlx5_vport_tbl_attr attr;
 -	struct mlx5_flow_table *fdb;
 -	struct mlx5_vport *vport;
 -	int i;
 -
 -	attr.chain = 0;
 -	attr.prio = 1;
 -	mlx5_esw_for_all_vports(esw, i, vport) {
 -		attr.vport = vport->vport;
 -		fdb = esw_vport_tbl_get(esw, &attr);
 -		if (IS_ERR(fdb))
 -			goto out;
 -	}
 -	return 0;
 -
 -out:
 -	mlx5_esw_vport_tbl_put(esw);
 -	return PTR_ERR(fdb);
 -}
 -
 -void mlx5_esw_vport_tbl_put(struct mlx5_eswitch *esw)
 -{
 -	struct mlx5_vport_tbl_attr attr;
 -	struct mlx5_vport *vport;
 -	int i;
 -
 -	attr.chain = 0;
 -	attr.prio = 1;
 -	mlx5_esw_for_all_vports(esw, i, vport) {
 -		attr.vport = vport->vport;
 -		esw_vport_tbl_put(esw, &attr);
 -	}
 -}
 -
 -/* End: Per vport tables */
 +static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_mirror_ns = {
 +	.max_fte = MLX5_ESW_VPORT_TBL_SIZE,
 +	.max_num_groups = MLX5_ESW_VPORT_TBL_NUM_GROUPS,
 +	.flags = 0,
 +};
static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
    					     u16 vport_num)
@@@ -86,26 -256,6 +86,26 @@@ mlx5_eswitch_set_rule_flow_source(struc
    			MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
  }
+/* Actually only the upper 16 bits of reg c0 need to be cleared, but the lower 16 bits
 + * are not needed as well in the following process. So clear them all for simplicity.
 + */
 +void
 +mlx5_eswitch_clear_rule_source_port(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec)
 +{
 +	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
 +		void *misc2;
 +
 +		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2);
 +		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
 +
 +		misc2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2);
 +		MLX5_SET(fte_match_set_misc2, misc2, metadata_reg_c_0, 0);
 +
 +		if (!memchr_inv(misc2, 0, MLX5_ST_SZ_BYTES(fte_match_set_misc2)))
 +			spec->match_criteria_enable &= ~MLX5_MATCH_MISC_PARAMETERS_2;
 +	}
 +}
 +
  static void
  mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
    			  struct mlx5_flow_spec *spec,
@@@ -176,19 -326,6 +176,19 @@@ esw_cleanup_decap_indir(struct mlx5_esw
    				 true);
  }
+static int
 +esw_setup_sampler_dest(struct mlx5_flow_destination *dest,
 +		       struct mlx5_flow_act *flow_act,
 +		       struct mlx5_esw_flow_attr *esw_attr,
 +		       int i)
 +{
 +	flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
 +	dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_SAMPLER;
 +	dest[i].sampler_id = esw_attr->sample->sampler_id;
 +
 +	return 0;
 +}
 +
  static int
  esw_setup_ft_dest(struct mlx5_flow_destination *dest,
    	  struct mlx5_flow_act *flow_act,
@@@ -400,6 -537,14 +400,14 @@@ esw_setup_vport_dests(struct mlx5_flow_
    return i;
  }
+ static bool
+ esw_src_port_rewrite_supported(struct mlx5_eswitch *esw)
+ {
+ 	return MLX5_CAP_GEN(esw->dev, reg_c_preserve) &&
+ 	       mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+ 	       MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level);
+ }
+ 
  static int
  esw_setup_dests(struct mlx5_flow_destination *dest,
    	struct mlx5_flow_act *flow_act,
@@@ -413,15 -558,10 +421,13 @@@
    int err = 0;
if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
- 	    MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) &&
- 	    mlx5_eswitch_vport_match_metadata_enabled(esw) &&
- 	    MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level))
+ 	    esw_src_port_rewrite_supported(esw))
    	attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
-	if (attr->dest_ft) {
 +	if (attr->flags & MLX5_ESW_ATTR_FLAG_SAMPLE) {
 +		esw_setup_sampler_dest(dest, flow_act, esw_attr, *i);
 +		(*i)++;
 +	} else if (attr->dest_ft) {
    	esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i);
    	(*i)++;
    } else if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
@@@ -524,16 -664,12 +530,16 @@@ mlx5_eswitch_add_offloaded_rule(struct 
    if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
    	flow_act.modify_hdr = attr->modify_hdr;
-	if (split) {
 +	/* esw_attr->sample is allocated only when there is a sample action */
 +	if (esw_attr->sample && esw_attr->sample->sample_default_tbl) {
 +		fdb = esw_attr->sample->sample_default_tbl;
 +	} else if (split) {
    	fwd_attr.chain = attr->chain;
    	fwd_attr.prio = attr->prio;
    	fwd_attr.vport = esw_attr->in_rep->vport;
 +		fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
-		fdb = esw_vport_tbl_get(esw, &fwd_attr);
 +		fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
    } else {
    	if (attr->chain || attr->prio)
    		fdb = mlx5_chains_get_table(chains, attr->chain,
@@@ -565,7 -701,7 +571,7 @@@
err_add_rule:
    if (split)
 -		esw_vport_tbl_put(esw, &fwd_attr);
 +		mlx5_esw_vporttbl_put(esw, &fwd_attr);
    else if (attr->chain || attr->prio)
    	mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
  err_esw_get:
@@@ -598,8 -734,7 +604,8 @@@ mlx5_eswitch_add_fwd_rule(struct mlx5_e
    fwd_attr.chain = attr->chain;
    fwd_attr.prio = attr->prio;
    fwd_attr.vport = esw_attr->in_rep->vport;
 -	fwd_fdb = esw_vport_tbl_get(esw, &fwd_attr);
 +	fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 +	fwd_fdb = mlx5_esw_vporttbl_get(esw, &fwd_attr);
    if (IS_ERR(fwd_fdb)) {
    	rule = ERR_CAST(fwd_fdb);
    	goto err_get_fwd;
@@@ -644,7 -779,7 +650,7 @@@
    return rule;
  err_chain_src_rewrite:
    esw_put_dest_tables_loop(esw, attr, 0, i);
 -	esw_vport_tbl_put(esw, &fwd_attr);
 +	mlx5_esw_vporttbl_put(esw, &fwd_attr);
  err_get_fwd:
    mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
  err_get_fast:
@@@ -679,16 -814,15 +685,16 @@@ __mlx5_eswitch_del_rule(struct mlx5_esw
    	fwd_attr.chain = attr->chain;
    	fwd_attr.prio = attr->prio;
    	fwd_attr.vport = esw_attr->in_rep->vport;
 +		fwd_attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
    }
if (fwd_rule)  {
 -		esw_vport_tbl_put(esw, &fwd_attr);
 +		mlx5_esw_vporttbl_put(esw, &fwd_attr);
    	mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
    	esw_put_dest_tables_loop(esw, attr, 0, esw_attr->split_count);
    } else {
    	if (split)
 -			esw_vport_tbl_put(esw, &fwd_attr);
 +			mlx5_esw_vporttbl_put(esw, &fwd_attr);
    	else if (attr->chain || attr->prio)
    		mlx5_chains_put_table(chains, attr->chain, attr->prio, 0);
    	esw_cleanup_dests(esw, attr);
@@@ -909,8 -1043,7 +915,8 @@@ out
  }
struct mlx5_flow_handle *
 -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, u16 vport,
 +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw,
 +				    struct mlx5_eswitch_rep *rep,
    			    u32 sqn)
  {
    struct mlx5_flow_act flow_act = {0};
@@@ -928,30 -1061,21 +934,30 @@@
    misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
    MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn);
    /* source vport is the esw manager */
 -	MLX5_SET(fte_match_set_misc, misc, source_port, esw->manager_vport);
 +	MLX5_SET(fte_match_set_misc, misc, source_port, rep->esw->manager_vport);
 +	if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
 +		MLX5_SET(fte_match_set_misc, misc, source_eswitch_owner_vhca_id,
 +			 MLX5_CAP_GEN(rep->esw->dev, vhca_id));
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters);
    MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn);
    MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
 +	if (MLX5_CAP_ESW(on_esw->dev, merged_eswitch))
 +		MLX5_SET_TO_ONES(fte_match_set_misc, misc,
 +				 source_eswitch_owner_vhca_id);
spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
    dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 -	dest.vport.num = vport;
 +	dest.vport.num = rep->vport;
 +	dest.vport.vhca_id = MLX5_CAP_GEN(rep->esw->dev, vhca_id);
 +	dest.vport.flags |= MLX5_FLOW_DEST_VPORT_VHCA_ID;
    flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.slow_fdb,
 +	flow_rule = mlx5_add_flow_rules(on_esw->fdb_table.offloads.slow_fdb,
    				spec, &flow_act, &dest, 1);
    if (IS_ERR(flow_rule))
 -		esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
 +		esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n",
 +			 PTR_ERR(flow_rule));
  out:
    kvfree(spec);
    return flow_rule;
@@@ -1329,14 -1453,14 +1335,14 @@@ esw_add_restore_rule(struct mlx5_eswitc
    if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
    	return ERR_PTR(-EOPNOTSUPP);
-	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
 +	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
    if (!spec)
    	return ERR_PTR(-ENOMEM);
misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
    		    misc_parameters_2);
    MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
 -		 ESW_CHAIN_TAG_METADATA_MASK);
 +		 ESW_REG_C0_USER_DATA_METADATA_MASK);
    misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
    		    misc_parameters_2);
    MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, tag);
@@@ -1352,7 -1476,7 +1358,7 @@@
    dest.ft = esw->offloads.ft_offloads;
flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 -	kfree(spec);
 +	kvfree(spec);
if (IS_ERR(flow_rule))
    	esw_warn(esw->dev,
@@@ -1362,6 -1486,12 +1368,6 @@@
    return flow_rule;
  }
-u32
 -esw_get_max_restore_tag(struct mlx5_eswitch *esw)
 -{
 -	return ESW_CHAIN_TAG_METADATA_MASK;
 -}
 -
  #define MAX_PF_SQ 256
  #define MAX_SQ_NVPORTS 32
@@@ -1391,44 -1521,6 +1397,44 @@@ static void esw_set_flow_group_source_p
  }
#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 +static void esw_vport_tbl_put(struct mlx5_eswitch *esw)
 +{
 +	struct mlx5_vport_tbl_attr attr;
 +	struct mlx5_vport *vport;
 +	int i;
 +
 +	attr.chain = 0;
 +	attr.prio = 1;
 +	mlx5_esw_for_all_vports(esw, i, vport) {
 +		attr.vport = vport->vport;
 +		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 +		mlx5_esw_vporttbl_put(esw, &attr);
 +	}
 +}
 +
 +static int esw_vport_tbl_get(struct mlx5_eswitch *esw)
 +{
 +	struct mlx5_vport_tbl_attr attr;
 +	struct mlx5_flow_table *fdb;
 +	struct mlx5_vport *vport;
 +	int i;
 +
 +	attr.chain = 0;
 +	attr.prio = 1;
 +	mlx5_esw_for_all_vports(esw, i, vport) {
 +		attr.vport = vport->vport;
 +		attr.vport_ns = &mlx5_esw_vport_tbl_mirror_ns;
 +		fdb = mlx5_esw_vporttbl_get(esw, &attr);
 +		if (IS_ERR(fdb))
 +			goto out;
 +	}
 +	return 0;
 +
 +out:
 +	esw_vport_tbl_put(esw);
 +	return PTR_ERR(fdb);
 +}
 +
  #define fdb_modify_header_fwd_to_table_supported(esw) \
    (MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table))
  static void esw_init_chains_offload_flags(struct mlx5_eswitch *esw, u32 *flags)
@@@ -1478,7 -1570,7 +1484,7 @@@ esw_chains_create(struct mlx5_eswitch *
    attr.max_ft_sz = fdb_max;
    attr.max_grp_num = esw->params.large_group_num;
    attr.default_ft = miss_fdb;
 -	attr.max_restore_tag = esw_get_max_restore_tag(esw);
 +	attr.mapping = esw->offloads.reg_c0_obj_pool;
chains = mlx5_chains_create(dev, &attr);
    if (IS_ERR(chains)) {
@@@ -1506,7 -1598,7 +1512,7 @@@
/* Open level 1 for split fdb rules now if prios isn't supported  */
    if (!mlx5_chains_prios_supported(chains)) {
 -		err = mlx5_esw_vport_tbl_get(esw);
 +		err = esw_vport_tbl_get(esw);
    	if (err)
    		goto level_1_err;
    }
@@@ -1530,7 -1622,7 +1536,7 @@@ static voi
  esw_chains_destroy(struct mlx5_eswitch *esw, struct mlx5_fs_chains *chains)
  {
    if (!mlx5_chains_prios_supported(chains))
 -		mlx5_esw_vport_tbl_put(esw);
 +		esw_vport_tbl_put(esw);
    mlx5_chains_put_table(chains, 0, 1, 0);
    mlx5_chains_put_table(chains, mlx5_chains_get_nf_ft_chain(chains), 1, 0);
    mlx5_chains_destroy(chains);
@@@ -1617,12 -1709,6 +1623,12 @@@ static int esw_create_offloads_fdb_tabl
MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn);
    MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port);
 +	if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
 +		MLX5_SET_TO_ONES(fte_match_param, match_criteria,
 +				 misc_parameters.source_eswitch_owner_vhca_id);
 +		MLX5_SET(create_flow_group_in, flow_group_in,
 +			 source_eswitch_owner_vhca_id_valid, 1);
 +	}
ix = esw->total_vports * MAX_SQ_NVPORTS + MAX_PF_SQ;
    MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
@@@ -1636,36 -1722,40 +1642,40 @@@
    }
    esw->fdb_table.offloads.send_to_vport_grp = g;
- 	/* meta send to vport */
- 	memset(flow_group_in, 0, inlen);
- 	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
- 		 MLX5_MATCH_MISC_PARAMETERS_2);
+ 	if (esw_src_port_rewrite_supported(esw)) {
+ 		/* meta send to vport */
+ 		memset(flow_group_in, 0, inlen);
+ 		MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+ 			 MLX5_MATCH_MISC_PARAMETERS_2);
- 	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+ 		match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
- 	MLX5_SET(fte_match_param, match_criteria,
- 		 misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
- 	MLX5_SET(fte_match_param, match_criteria,
- 		 misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
- 
- 	num_vfs = esw->esw_funcs.num_vfs;
- 	if (num_vfs) {
- 		MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
- 		MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + num_vfs - 1);
- 		ix += num_vfs;
- 
- 		g = mlx5_create_flow_group(fdb, flow_group_in);
- 		if (IS_ERR(g)) {
- 			err = PTR_ERR(g);
- 			esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n",
- 				 err);
- 			goto send_vport_meta_err;
+ 		MLX5_SET(fte_match_param, match_criteria,
+ 			 misc_parameters_2.metadata_reg_c_0,
+ 			 mlx5_eswitch_get_vport_metadata_mask());
+ 		MLX5_SET(fte_match_param, match_criteria,
+ 			 misc_parameters_2.metadata_reg_c_1, ESW_TUN_MASK);
+ 
+ 		num_vfs = esw->esw_funcs.num_vfs;
+ 		if (num_vfs) {
+ 			MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix);
+ 			MLX5_SET(create_flow_group_in, flow_group_in,
+ 				 end_flow_index, ix + num_vfs - 1);
+ 			ix += num_vfs;
+ 
+ 			g = mlx5_create_flow_group(fdb, flow_group_in);
+ 			if (IS_ERR(g)) {
+ 				err = PTR_ERR(g);
+ 				esw_warn(dev, "Failed to create send-to-vport meta flow group err(%d)\n",
+ 					 err);
+ 				goto send_vport_meta_err;
+ 			}
+ 			esw->fdb_table.offloads.send_to_vport_meta_grp = g;
+ 
+ 			err = mlx5_eswitch_add_send_to_vport_meta_rules(esw);
+ 			if (err)
+ 				goto meta_rule_err;
    	}
- 		esw->fdb_table.offloads.send_to_vport_meta_grp = g;
- 
- 		err = mlx5_eswitch_add_send_to_vport_meta_rules(esw);
- 		if (err)
- 			goto meta_rule_err;
    }
if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) {
@@@ -1775,7 -1865,6 +1785,7 @@@ static void esw_destroy_offloads_fdb_ta
    /* Holds true only as long as DMFS is the default */
    mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
    			     MLX5_FLOW_STEERING_MODE_DMFS);
 +	atomic64_set(&esw->user_count, 0);
  }
static int esw_create_offloads_table(struct mlx5_eswitch *esw)
@@@ -1978,7 -2067,7 +1988,7 @@@ static int esw_create_restore_table(str
    	goto out_free;
    }
-	ft_attr.max_fte = 1 << ESW_CHAIN_TAG_METADATA_BITS;
 +	ft_attr.max_fte = 1 << ESW_REG_C0_USER_DATA_METADATA_BITS;
    ft = mlx5_create_flow_table(ns, &ft_attr);
    if (IS_ERR(ft)) {
    	err = PTR_ERR(ft);
@@@ -1993,7 -2082,7 +2003,7 @@@
    		    misc_parameters_2);
MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0,
 -		 ESW_CHAIN_TAG_METADATA_MASK);
 +		 ESW_REG_C0_USER_DATA_METADATA_MASK);
    MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
    MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
    	 ft_attr.max_fte - 1);
@@@ -2181,11 -2270,9 +2191,11 @@@ int esw_offloads_load_rep(struct mlx5_e
    if (esw->mode != MLX5_ESWITCH_OFFLOADS)
    	return 0;
-	err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
 -	if (err)
 -		return err;
 +	if (vport_num != MLX5_VPORT_UPLINK) {
 +		err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
 +		if (err)
 +			return err;
 +	}
err = mlx5_esw_offloads_rep_load(esw, vport_num);
    if (err)
@@@ -2193,8 -2280,7 +2203,8 @@@
    return err;
load_err:
 -	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
 +	if (vport_num != MLX5_VPORT_UPLINK)
 +		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
    return err;
  }
@@@ -2204,9 -2290,7 +2214,9 @@@ void esw_offloads_unload_rep(struct mlx
    	return;
mlx5_esw_offloads_rep_unload(esw, vport_num);
 -	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
 +
 +	if (vport_num != MLX5_VPORT_UPLINK)
 +		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
  }
#define ESW_OFFLOADS_DEVCOM_PAIR	(0)
@@@ -2215,8 -2299,13 +2225,8 @@@
  static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
    			  struct mlx5_eswitch *peer_esw)
  {
 -	int err;
 -
 -	err = esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
 -	if (err)
 -		return err;
-	return 0;
 +	return esw_add_fdb_peer_miss_rules(esw, peer_esw->dev);
  }
static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
@@@ -2476,9 -2565,6 +2486,9 @@@ static int esw_create_uplink_offloads_a
    struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
 +	if (IS_ERR(vport))
 +		return PTR_ERR(vport);
 +
    return esw_vport_create_offloads_acl_tables(esw, vport);
  }
@@@ -2487,9 -2573,6 +2497,9 @@@ static void esw_destroy_uplink_offloads
    struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
 +	if (IS_ERR(vport))
 +		return;
 +
    esw_vport_destroy_offloads_acl_tables(esw, vport);
  }
@@@ -2501,7 -2584,6 +2511,7 @@@ static int esw_offloads_steering_init(s
    memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
    mutex_init(&esw->fdb_table.offloads.vports.lock);
    hash_init(esw->fdb_table.offloads.vports.table);
 +	atomic64_set(&esw->user_count, 0);
indir = mlx5_esw_indir_table_init();
    if (IS_ERR(indir)) {
@@@ -2646,7 -2728,6 +2656,7 @@@ static int mlx5_esw_host_number_init(st
int esw_offloads_enable(struct mlx5_eswitch *esw)
  {
 +	struct mapping_ctx *reg_c0_obj_pool;
    struct mlx5_vport *vport;
    int err, i;
@@@ -2674,15 -2755,6 +2684,15 @@@
    if (err)
    	goto err_vport_metadata;
+	reg_c0_obj_pool = mapping_create(sizeof(struct mlx5_mapped_obj),
 +					 ESW_REG_C0_USER_DATA_METADATA_MASK,
 +					 true);
 +	if (IS_ERR(reg_c0_obj_pool)) {
 +		err = PTR_ERR(reg_c0_obj_pool);
 +		goto err_pool;
 +	}
 +	esw->offloads.reg_c0_obj_pool = reg_c0_obj_pool;
 +
    err = esw_offloads_steering_init(esw);
    if (err)
    	goto err_steering_init;
@@@ -2709,8 -2781,6 +2719,8 @@@ err_vports
  err_uplink:
    esw_offloads_steering_cleanup(esw);
  err_steering_init:
 +	mapping_destroy(reg_c0_obj_pool);
 +err_pool:
    esw_set_passing_vport_metadata(esw, false);
  err_vport_metadata:
    esw_offloads_metadata_uninit(esw);
@@@ -2749,7 -2819,6 +2759,7 @@@ void esw_offloads_disable(struct mlx5_e
    esw_offloads_unload_rep(esw, MLX5_VPORT_UPLINK);
    esw_set_passing_vport_metadata(esw, false);
    esw_offloads_steering_cleanup(esw);
 +	mapping_destroy(esw->offloads.reg_c0_obj_pool);
    esw_offloads_metadata_uninit(esw);
    esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
    mlx5_rdma_disable_roce(esw->dev);
@@@ -2856,14 -2925,8 +2866,14 @@@ int mlx5_devlink_eswitch_mode_set(struc
    if (esw_mode_from_devlink(mode, &mlx5_mode))
    	return -EINVAL;
-	mutex_lock(&esw->mode_lock);
 -	cur_mlx5_mode = esw->mode;
 +	err = mlx5_esw_try_lock(esw);
 +	if (err < 0) {
 +		NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
 +		return err;
 +	}
 +	cur_mlx5_mode = err;
 +	err = 0;
 +
    if (cur_mlx5_mode == mlx5_mode)
    	goto unlock;
@@@ -2875,7 -2938,7 +2885,7 @@@
    	err = -EINVAL;
unlock:
 -	mutex_unlock(&esw->mode_lock);
 +	mlx5_esw_unlock(esw);
    return err;
  }
@@@ -2888,14 -2951,14 +2898,14 @@@ int mlx5_devlink_eswitch_mode_get(struc
    if (IS_ERR(esw))
    	return PTR_ERR(esw);
-	mutex_lock(&esw->mode_lock);
 +	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto unlock;
err = esw_mode_to_devlink(esw->mode, mode);
  unlock:
 -	mutex_unlock(&esw->mode_lock);
 +	up_write(&esw->mode_lock);
    return err;
  }
@@@ -2911,7 -2974,7 +2921,7 @@@ int mlx5_devlink_eswitch_inline_mode_se
    if (IS_ERR(esw))
    	return PTR_ERR(esw);
-	mutex_lock(&esw->mode_lock);
 +	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto out;
@@@ -2950,7 -3013,7 +2960,7 @@@
    }
esw->offloads.inline_mode = mlx5_mode;
 -	mutex_unlock(&esw->mode_lock);
 +	up_write(&esw->mode_lock);
    return 0;
revert_inline_mode:
@@@ -2960,7 -3023,7 +2970,7 @@@
    					 vport,
    					 esw->offloads.inline_mode);
  out:
 -	mutex_unlock(&esw->mode_lock);
 +	up_write(&esw->mode_lock);
    return err;
  }
@@@ -2973,14 -3036,14 +2983,14 @@@ int mlx5_devlink_eswitch_inline_mode_ge
    if (IS_ERR(esw))
    	return PTR_ERR(esw);
-	mutex_lock(&esw->mode_lock);
 +	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto unlock;
err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
  unlock:
 -	mutex_unlock(&esw->mode_lock);
 +	up_write(&esw->mode_lock);
    return err;
  }
@@@ -2996,7 -3059,7 +3006,7 @@@ int mlx5_devlink_eswitch_encap_mode_set
    if (IS_ERR(esw))
    	return PTR_ERR(esw);
-	mutex_lock(&esw->mode_lock);
 +	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto unlock;
@@@ -3042,7 -3105,7 +3052,7 @@@
    }
unlock:
 -	mutex_unlock(&esw->mode_lock);
 +	up_write(&esw->mode_lock);
    return err;
  }
@@@ -3057,14 -3120,14 +3067,14 @@@ int mlx5_devlink_eswitch_encap_mode_get
    	return PTR_ERR(esw);
-	mutex_lock(&esw->mode_lock);
 +	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto unlock;
*encap = esw->offloads.encap;
  unlock:
 -	mutex_unlock(&esw->mode_lock);
 +	up_write(&esw->mode_lock);
    return 0;
  }
@@@ -3094,7 -3157,6 +3104,7 @@@ void mlx5_eswitch_register_vport_reps(s
    esw->offloads.rep_ops[rep_type] = ops;
    mlx5_esw_for_all_reps(esw, i, rep) {
    	if (likely(mlx5_eswitch_vport_has_rep(esw, i))) {
 +			rep->esw = esw;
    		rep_data = &rep->rep_data[rep_type];
    		atomic_set(&rep_data->state, REP_REGISTERED);
    	}
diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 97d074d7b78d,ba28ac7e79bc..f99db88ee884
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@@ -16,12 -16,12 +16,13 @@@
  #include <linux/in6.h>
  #include <linux/notifier.h>
  #include <linux/net_namespace.h>
 +#include <linux/spinlock.h>
  #include <net/psample.h>
  #include <net/pkt_cls.h>
  #include <net/red.h>
  #include <net/vxlan.h>
  #include <net/flow_offload.h>
+ #include <net/inet_ecn.h>
#include "port.h"
  #include "core.h"
@@@ -87,15 -87,10 +88,15 @@@ enum mlxsw_sp_rif_type 
    MLXSW_SP_RIF_TYPE_MAX,
  };
-struct mlxsw_sp_rif_ops;
 +struct mlxsw_sp_router_ops;
-extern const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[];
 -extern const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[];
 +extern const struct mlxsw_sp_router_ops mlxsw_sp1_router_ops;
 +extern const struct mlxsw_sp_router_ops mlxsw_sp2_router_ops;
 +
 +struct mlxsw_sp_switchdev_ops;
 +
 +extern const struct mlxsw_sp_switchdev_ops mlxsw_sp1_switchdev_ops;
 +extern const struct mlxsw_sp_switchdev_ops mlxsw_sp2_switchdev_ops;
enum mlxsw_sp_fid_type {
    MLXSW_SP_FID_TYPE_8021Q,
@@@ -139,7 -134,6 +140,7 @@@ struct mlxsw_sp_ptp_state
  struct mlxsw_sp_ptp_ops;
  struct mlxsw_sp_span_ops;
  struct mlxsw_sp_qdisc_state;
 +struct mlxsw_sp_mall_entry;
struct mlxsw_sp_port_mapping {
    u8 module;
@@@ -155,7 -149,6 +156,7 @@@ struct mlxsw_sp 
    const unsigned char *mac_mask;
    struct mlxsw_sp_upper *lags;
    struct mlxsw_sp_port_mapping **port_mapping;
 +	struct rhashtable sample_trigger_ht;
    struct mlxsw_sp_sb *sb;
    struct mlxsw_sp_bridge *bridge;
    struct mlxsw_sp_router *router;
@@@ -172,7 -165,6 +173,7 @@@
    struct mlxsw_sp_counter_pool *counter_pool;
    struct mlxsw_sp_span *span;
    struct mlxsw_sp_trap *trap;
 +	const struct mlxsw_sp_switchdev_ops *switchdev_ops;
    const struct mlxsw_sp_kvdl_ops *kvdl_ops;
    const struct mlxsw_afa_ops *afa_ops;
    const struct mlxsw_afk_ops *afk_ops;
@@@ -180,6 -172,7 +181,6 @@@
    const struct mlxsw_sp_acl_rulei_ops *acl_rulei_ops;
    const struct mlxsw_sp_acl_tcam_ops *acl_tcam_ops;
    const struct mlxsw_sp_nve_ops **nve_ops_arr;
 -	const struct mlxsw_sp_rif_ops **rif_ops_arr;
    const struct mlxsw_sp_sb_vals *sb_vals;
    const struct mlxsw_sp_sb_ops *sb_ops;
    const struct mlxsw_sp_port_type_speed_ops *port_type_speed_ops;
@@@ -187,8 -180,6 +188,8 @@@
    const struct mlxsw_sp_span_ops *span_ops;
    const struct mlxsw_sp_policer_core_ops *policer_core_ops;
    const struct mlxsw_sp_trap_ops *trap_ops;
 +	const struct mlxsw_sp_mall_ops *mall_ops;
 +	const struct mlxsw_sp_router_ops *router_ops;
    const struct mlxsw_listener *listeners;
    size_t listeners_count;
    u32 lowest_shaper_bs;
@@@ -242,18 -233,7 +243,18 @@@ struct mlxsw_sp_port_pcpu_stats 
    u32			tx_dropped;
  };
-struct mlxsw_sp_port_sample {
 +enum mlxsw_sp_sample_trigger_type {
 +	MLXSW_SP_SAMPLE_TRIGGER_TYPE_INGRESS,
 +	MLXSW_SP_SAMPLE_TRIGGER_TYPE_EGRESS,
 +	MLXSW_SP_SAMPLE_TRIGGER_TYPE_POLICY_ENGINE,
 +};
 +
 +struct mlxsw_sp_sample_trigger {
 +	enum mlxsw_sp_sample_trigger_type type;
 +	u8 local_port; /* Reserved when trigger type is not ingress / egress. */
 +};
 +
 +struct mlxsw_sp_sample_params {
    struct psample_group *psample_group;
    u32 trunc_size;
    u32 rate;
@@@ -323,6 -303,7 +324,6 @@@ struct mlxsw_sp_port 
    	struct mlxsw_sp_port_xstats xstats;
    	struct delayed_work update_dw;
    } periodic_hw_stats;
 -	struct mlxsw_sp_port_sample __rcu *sample;
    struct list_head vlans_list;
    struct mlxsw_sp_port_vlan *default_vlan;
    struct mlxsw_sp_qdisc_state *qdisc;
@@@ -367,6 -348,20 +368,20 @@@ struct mlxsw_sp_port_type_speed_ops 
    u32 (*ptys_proto_cap_masked_get)(u32 eth_proto_cap);
  };
+ static inline u8 mlxsw_sp_tunnel_ecn_decap(u8 outer_ecn, u8 inner_ecn,
+ 					   bool *trap_en)
+ {
+ 	bool set_ce = false;
+ 
+ 	*trap_en = !!__INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
+ 	if (set_ce)
+ 		return INET_ECN_CE;
+ 	else if (outer_ecn == INET_ECN_ECT_1 && inner_ecn == INET_ECN_ECT_0)
+ 		return INET_ECN_ECT_1;
+ 	else
+ 		return inner_ecn;
+ }
+ 
  static inline struct net_device *
  mlxsw_sp_bridge_vxlan_dev_find(struct net_device *br_dev)
  {
@@@ -551,17 -546,6 +566,17 @@@ void mlxsw_sp_hdroom_bufs_reset_sizes(s
    			      struct mlxsw_sp_hdroom *hdroom);
  int mlxsw_sp_hdroom_configure(struct mlxsw_sp_port *mlxsw_sp_port,
    		      const struct mlxsw_sp_hdroom *hdroom);
 +struct mlxsw_sp_sample_params *
 +mlxsw_sp_sample_trigger_params_lookup(struct mlxsw_sp *mlxsw_sp,
 +				      const struct mlxsw_sp_sample_trigger *trigger);
 +int
 +mlxsw_sp_sample_trigger_params_set(struct mlxsw_sp *mlxsw_sp,
 +				   const struct mlxsw_sp_sample_trigger *trigger,
 +				   const struct mlxsw_sp_sample_params *params,
 +				   struct netlink_ext_ack *extack);
 +void
 +mlxsw_sp_sample_trigger_params_unset(struct mlxsw_sp *mlxsw_sp,
 +				     const struct mlxsw_sp_sample_trigger *trigger);
extern const struct mlxsw_sp_sb_vals mlxsw_sp1_sb_vals;
  extern const struct mlxsw_sp_sb_vals mlxsw_sp2_sb_vals;
@@@ -599,6 -583,8 +614,6 @@@ void mlxsw_sp_rx_listener_no_mark_func(
    			       u8 local_port, void *priv);
  void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
    		  u8 local_port);
 -void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 -			     u8 local_port);
  int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed);
  int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
    		  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
@@@ -615,8 -601,6 +630,8 @@@ int mlxsw_sp_port_vp_mode_set(struct ml
  int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
    			   bool learn_enable);
  int mlxsw_sp_ethtype_to_sver_type(u16 ethtype, u8 *p_sver_type);
 +int mlxsw_sp_port_egress_ethtype_set(struct mlxsw_sp_port *mlxsw_sp_port,
 +				     u16 ethtype);
  int mlxsw_sp_port_pvid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid,
    		   u16 ethtype);
  struct mlxsw_sp_port_vlan *
@@@ -955,12 -939,6 +970,12 @@@ int mlxsw_sp_acl_rulei_act_count(struc
  int mlxsw_sp_acl_rulei_act_fid_set(struct mlxsw_sp *mlxsw_sp,
    			   struct mlxsw_sp_acl_rule_info *rulei,
    			   u16 fid, struct netlink_ext_ack *extack);
 +int mlxsw_sp_acl_rulei_act_sample(struct mlxsw_sp *mlxsw_sp,
 +				  struct mlxsw_sp_acl_rule_info *rulei,
 +				  struct mlxsw_sp_flow_block *block,
 +				  struct psample_group *psample_group, u32 rate,
 +				  u32 trunc_size, bool truncate,
 +				  struct netlink_ext_ack *extack);
struct mlxsw_sp_acl_rule;
@@@ -1070,19 -1048,6 +1085,19 @@@ extern const struct mlxsw_afk_ops mlxsw
  extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
/* spectrum_matchall.c */
 +struct mlxsw_sp_mall_ops {
 +	int (*sample_add)(struct mlxsw_sp *mlxsw_sp,
 +			  struct mlxsw_sp_port *mlxsw_sp_port,
 +			  struct mlxsw_sp_mall_entry *mall_entry,
 +			  struct netlink_ext_ack *extack);
 +	void (*sample_del)(struct mlxsw_sp *mlxsw_sp,
 +			   struct mlxsw_sp_port *mlxsw_sp_port,
 +			   struct mlxsw_sp_mall_entry *mall_entry);
 +};
 +
 +extern const struct mlxsw_sp_mall_ops mlxsw_sp1_mall_ops;
 +extern const struct mlxsw_sp_mall_ops mlxsw_sp2_mall_ops;
 +
  enum mlxsw_sp_mall_action_type {
    MLXSW_SP_MALL_ACTION_TYPE_MIRROR,
    MLXSW_SP_MALL_ACTION_TYPE_SAMPLE,
@@@ -1098,11 -1063,6 +1113,11 @@@ struct mlxsw_sp_mall_trap_entry 
    int span_id;
  };
+struct mlxsw_sp_mall_sample_entry {
 +	struct mlxsw_sp_sample_params params;
 +	int span_id;	/* Relevant for Spectrum-2 onwards. */
 +};
 +
  struct mlxsw_sp_mall_entry {
    struct list_head list;
    unsigned long cookie;
@@@ -1112,7 -1072,7 +1127,7 @@@
    union {
    	struct mlxsw_sp_mall_mirror_entry mirror;
    	struct mlxsw_sp_mall_trap_entry trap;
 -		struct mlxsw_sp_port_sample sample;
 +		struct mlxsw_sp_mall_sample_entry sample;
    };
    struct rcu_head rcu;
  };
@@@ -1123,8 -1083,7 +1138,8 @@@ int mlxsw_sp_mall_replace(struct mlxsw_
  void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
    		   struct tc_cls_matchall_offload *f);
  int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
 -			    struct mlxsw_sp_port *mlxsw_sp_port);
 +			    struct mlxsw_sp_port *mlxsw_sp_port,
 +			    struct netlink_ext_ack *extack);
  void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block,
    		       struct mlxsw_sp_port *mlxsw_sp_port);
  int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
index b8b08a6a1d10,64a8f838eb53..5facabd86882
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c
@@@ -127,16 -127,14 +127,16 @@@ bool mlxsw_sp_l3addr_is_zero(union mlxs
static int
  mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
 -				  struct mlxsw_sp_ipip_entry *ipip_entry)
 +				  struct mlxsw_sp_ipip_entry *ipip_entry,
 +				  bool force, char *ratr_pl)
  {
    u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb);
    __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev);
 -	char ratr_pl[MLXSW_REG_RATR_LEN];
 +	enum mlxsw_reg_ratr_op op;
-	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
 -			    true, MLXSW_REG_RATR_TYPE_IPIP,
 +	op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY :
 +		     MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY;
 +	mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP,
    		    adj_index, rif_index);
    mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4));
@@@ -337,12 -335,11 +337,11 @@@ static int mlxsw_sp_ipip_ecn_decap_init
    				    u8 inner_ecn, u8 outer_ecn)
  {
    char tidem_pl[MLXSW_REG_TIDEM_LEN];
- 	bool trap_en, set_ce = false;
    u8 new_inner_ecn;
+ 	bool trap_en;
- 	trap_en = __INET_ECN_decapsulate(outer_ecn, inner_ecn, &set_ce);
- 	new_inner_ecn = set_ce ? INET_ECN_CE : inner_ecn;
- 
+ 	new_inner_ecn = mlxsw_sp_tunnel_ecn_decap(outer_ecn, inner_ecn,
+ 						  &trap_en);
    mlxsw_reg_tidem_pack(tidem_pl, outer_ecn, inner_ecn, new_inner_ecn,
    		     trap_en, trap_en ? MLXSW_TRAP_ID_DECAP_ECN0 : 0);
    return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tidem), tidem_pl);
diff --combined drivers/net/ethernet/microchip/lan743x_main.c
index e7ab5f3f73fd,7b6794aa8ea9..11a1dc4c436d
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@@ -885,8 -885,8 +885,8 @@@ static int lan743x_mac_set_mtu(struct l
    }
mac_rx &= ~(MAC_RX_MAX_SIZE_MASK_);
- 	mac_rx |= (((new_mtu + ETH_HLEN + 4) << MAC_RX_MAX_SIZE_SHIFT_) &
- 		  MAC_RX_MAX_SIZE_MASK_);
+ 	mac_rx |= (((new_mtu + ETH_HLEN + ETH_FCS_LEN)
+ 		  << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_);
    lan743x_csr_write(adapter, MAC_RX, mac_rx);
if (enabled) {
@@@ -1944,7 -1944,7 +1944,7 @@@ static int lan743x_rx_init_ring_element
    struct sk_buff *skb;
    dma_addr_t dma_ptr;
- 	buffer_length = netdev->mtu + ETH_HLEN + 4 + RX_HEAD_PADDING;
+ 	buffer_length = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + RX_HEAD_PADDING;
descriptor = &rx->ring_cpu_ptr[index];
    buffer_info = &rx->buffer_info[index];
@@@ -2040,7 -2040,7 +2040,7 @@@ lan743x_rx_trim_skb(struct sk_buff *skb
    	dev_kfree_skb_irq(skb);
    	return NULL;
    }
- 	frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 4);
+ 	frame_length = max_t(int, 0, frame_length - ETH_FCS_LEN);
    if (skb->len > frame_length) {
    	skb->tail -= skb->len - frame_length;
    	skb->len = frame_length;
@@@ -3004,7 -3004,7 +3004,7 @@@ static int lan743x_pm_suspend(struct de
    	lan743x_pm_set_wol(adapter);
/* Host sets PME_En, put D3hot */
 -	return pci_prepare_to_sleep(pdev);;
 +	return pci_prepare_to_sleep(pdev);
  }
static int lan743x_pm_resume(struct device *dev)
diff --combined drivers/net/ethernet/netronome/nfp/flower/main.h
index e13e26e72ca0,56833a41f3d2..31377923ea3d
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@@ -47,7 -47,6 +47,7 @@@ struct nfp_app
  #define NFP_FL_FEATS_PRE_TUN_RULES	BIT(6)
  #define NFP_FL_FEATS_IPV6_TUN		BIT(7)
  #define NFP_FL_FEATS_VLAN_QINQ		BIT(8)
 +#define NFP_FL_FEATS_QOS_PPS		BIT(9)
  #define NFP_FL_FEATS_HOST_ACK		BIT(31)
#define NFP_FL_ENABLE_FLOW_MERGE	BIT(0)
@@@ -62,8 -61,7 +62,8 @@@
    NFP_FL_FEATS_FLOW_MOD | \
    NFP_FL_FEATS_PRE_TUN_RULES | \
    NFP_FL_FEATS_IPV6_TUN | \
 -	NFP_FL_FEATS_VLAN_QINQ)
 +	NFP_FL_FEATS_VLAN_QINQ | \
 +	NFP_FL_FEATS_QOS_PPS)
struct nfp_fl_mask_id {
    struct circ_buf mask_id_free_list;
@@@ -192,6 -190,7 +192,7 @@@ struct nfp_fl_internal_ports 
   * @qos_rate_limiters:	Current active qos rate limiters
   * @qos_stats_lock:	Lock on qos stats updates
   * @pre_tun_rule_cnt:	Number of pre-tunnel rules offloaded
+  * @merge_table:	Hash table to store merged flows
   */
  struct nfp_flower_priv {
    struct nfp_app *app;
@@@ -225,6 -224,7 +226,7 @@@
    unsigned int qos_rate_limiters;
    spinlock_t qos_stats_lock; /* Protect the qos stats */
    int pre_tun_rule_cnt;
+ 	struct rhashtable merge_table;
  };
/**
@@@ -352,6 -352,12 +354,12 @@@ struct nfp_fl_payload_link 
  };
extern const struct rhashtable_params nfp_flower_table_params;
+ extern const struct rhashtable_params merge_table_params;
+ 
+ struct nfp_merge_info {
+ 	u64 parent_ctx;
+ 	struct rhash_head ht_node;
+ };
struct nfp_fl_stats_frame {
    __be32 stats_con_id;
diff --combined drivers/net/ethernet/xilinx/xilinx_axienet.h
index 708769349f76,aca7f82f6791..5b4d153b1492
--- a/drivers/net/ethernet/xilinx/xilinx_axienet.h
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h
@@@ -376,8 -376,6 +376,8 @@@ struct axidma_bd 
    struct sk_buff *skb;
  } __aligned(XAXIDMA_BD_MINIMUM_ALIGNMENT);
+#define XAE_NUM_MISC_CLOCKS 3
 +
  /**
   * struct axienet_local - axienet private per device data
   * @ndev:	Pointer for net_device to which it will be attached.
@@@ -387,8 -385,7 +387,8 @@@
   * @phylink_config: phylink configuration settings
   * @pcs_phy:	Reference to PCS/PMA PHY if used
   * @switch_x_sgmii: Whether switchable 1000BaseX/SGMII mode is enabled in the core
 - * @clk:	Clock for AXI bus
 + * @axi_clk:	AXI4-Lite bus clock
 + * @misc_clks:	Misc ethernet clocks (AXI4-Stream, Ref, MGT clocks)
   * @mii_bus:	Pointer to MII bus structure
   * @mii_clk_div: MII bus clock divider value
   * @regs_start: Resource start for axienet device addresses
@@@ -437,8 -434,7 +437,8 @@@ struct axienet_local
bool switch_x_sgmii;
-	struct clk *clk;
 +	struct clk *axi_clk;
 +	struct clk_bulk_data misc_clks[XAE_NUM_MISC_CLOCKS];
struct mii_bus *mii_bus;
    u8 mii_clk_div;
@@@ -508,6 -504,18 +508,18 @@@ static inline u32 axinet_ior_read_mcr(s
    return axienet_ior(lp, XAE_MDIO_MCR_OFFSET);
  }
+ static inline void axienet_lock_mii(struct axienet_local *lp)
+ {
+ 	if (lp->mii_bus)
+ 		mutex_lock(&lp->mii_bus->mdio_lock);
+ }
+ 
+ static inline void axienet_unlock_mii(struct axienet_local *lp)
+ {
+ 	if (lp->mii_bus)
+ 		mutex_unlock(&lp->mii_bus->mdio_lock);
+ }
+ 
  /**
   * axienet_iow - Memory mapped Axi Ethernet register write
   * @lp:         Pointer to axienet local structure
diff --combined drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index 92cf9051d557,f8f8654ea728..feb1aa4ec927
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@@ -1053,9 -1053,9 +1053,9 @@@ static int axienet_open(struct net_devi
     * including the MDIO. MDIO must be disabled before resetting.
     * Hold MDIO bus lock to avoid MDIO accesses during the reset.
     */
- 	mutex_lock(&lp->mii_bus->mdio_lock);
+ 	axienet_lock_mii(lp);
    ret = axienet_device_reset(ndev);
- 	mutex_unlock(&lp->mii_bus->mdio_lock);
+ 	axienet_unlock_mii(lp);
ret = phylink_of_phy_connect(lp->phylink, lp->dev->of_node, 0);
    if (ret) {
@@@ -1148,9 -1148,9 +1148,9 @@@ static int axienet_stop(struct net_devi
    }
/* Do a reset to ensure DMA is really stopped */
- 	mutex_lock(&lp->mii_bus->mdio_lock);
+ 	axienet_lock_mii(lp);
    __axienet_device_reset(lp);
- 	mutex_unlock(&lp->mii_bus->mdio_lock);
+ 	axienet_unlock_mii(lp);
cancel_work_sync(&lp->dma_err_task);
@@@ -1709,9 -1709,9 +1709,9 @@@ static void axienet_dma_err_handler(str
     * including the MDIO. MDIO must be disabled before resetting.
     * Hold MDIO bus lock to avoid MDIO accesses during the reset.
     */
- 	mutex_lock(&lp->mii_bus->mdio_lock);
+ 	axienet_lock_mii(lp);
    __axienet_device_reset(lp);
- 	mutex_unlock(&lp->mii_bus->mdio_lock);
+ 	axienet_unlock_mii(lp);
for (i = 0; i < lp->tx_bd_num; i++) {
    	cur_p = &lp->tx_bd_v[i];
@@@ -1863,39 -1863,22 +1863,39 @@@ static int axienet_probe(struct platfor
    lp->rx_bd_num = RX_BD_NUM_DEFAULT;
    lp->tx_bd_num = TX_BD_NUM_DEFAULT;
-	lp->clk = devm_clk_get_optional(&pdev->dev, NULL);
 -	if (IS_ERR(lp->clk)) {
 -		ret = PTR_ERR(lp->clk);
 +	lp->axi_clk = devm_clk_get_optional(&pdev->dev, "s_axi_lite_clk");
 +	if (!lp->axi_clk) {
 +		/* For backward compatibility, if named AXI clock is not present,
 +		 * treat the first clock specified as the AXI clock.
 +		 */
 +		lp->axi_clk = devm_clk_get_optional(&pdev->dev, NULL);
 +	}
 +	if (IS_ERR(lp->axi_clk)) {
 +		ret = PTR_ERR(lp->axi_clk);
    	goto free_netdev;
    }
 -	ret = clk_prepare_enable(lp->clk);
 +	ret = clk_prepare_enable(lp->axi_clk);
    if (ret) {
 -		dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret);
 +		dev_err(&pdev->dev, "Unable to enable AXI clock: %d\n", ret);
    	goto free_netdev;
    }
+	lp->misc_clks[0].id = "axis_clk";
 +	lp->misc_clks[1].id = "ref_clk";
 +	lp->misc_clks[2].id = "mgt_clk";
 +
 +	ret = devm_clk_bulk_get_optional(&pdev->dev, XAE_NUM_MISC_CLOCKS, lp->misc_clks);
 +	if (ret)
 +		goto cleanup_clk;
 +
 +	ret = clk_bulk_prepare_enable(XAE_NUM_MISC_CLOCKS, lp->misc_clks);
 +	if (ret)
 +		goto cleanup_clk;
 +
    /* Map device registers */
    ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0);
    lp->regs = devm_ioremap_resource(&pdev->dev, ethres);
    if (IS_ERR(lp->regs)) {
 -		dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n");
    	ret = PTR_ERR(lp->regs);
    	goto cleanup_clk;
    }
@@@ -2126,8 -2109,7 +2126,8 @@@ cleanup_mdio
    of_node_put(lp->phy_node);
cleanup_clk:
 -	clk_disable_unprepare(lp->clk);
 +	clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks);
 +	clk_disable_unprepare(lp->axi_clk);
free_netdev:
    free_netdev(ndev);
@@@ -2150,8 -2132,7 +2150,8 @@@ static int axienet_remove(struct platfo
axienet_mdio_teardown(lp);
-	clk_disable_unprepare(lp->clk);
 +	clk_bulk_disable_unprepare(XAE_NUM_MISC_CLOCKS, lp->misc_clks);
 +	clk_disable_unprepare(lp->axi_clk);
of_node_put(lp->phy_node);
    lp->phy_node = NULL;
diff --combined drivers/net/geneve.c
index 5d7a2b1469f4,d5b1e48e0c09..e3b2375ac5eb
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@@ -461,7 -461,6 +461,7 @@@ static struct socket *geneve_create_soc
    if (err < 0)
    	return ERR_PTR(err);
+	udp_allow_gso(sock->sk);
    return sock;
  }
@@@ -909,8 -908,16 +909,16 @@@ static int geneve_xmit_skb(struct sk_bu
info = skb_tunnel_info(skb);
    	if (info) {
- 			info->key.u.ipv4.dst = fl4.saddr;
- 			info->key.u.ipv4.src = fl4.daddr;
+ 			struct ip_tunnel_info *unclone;
+ 
+ 			unclone = skb_tunnel_info_unclone(skb);
+ 			if (unlikely(!unclone)) {
+ 				dst_release(&rt->dst);
+ 				return -ENOMEM;
+ 			}
+ 
+ 			unclone->key.u.ipv4.dst = fl4.saddr;
+ 			unclone->key.u.ipv4.src = fl4.daddr;
    	}
if (!pskb_may_pull(skb, ETH_HLEN)) {
@@@ -994,8 -1001,16 +1002,16 @@@ static int geneve6_xmit_skb(struct sk_b
    	struct ip_tunnel_info *info = skb_tunnel_info(skb);
if (info) {
- 			info->key.u.ipv6.dst = fl6.saddr;
- 			info->key.u.ipv6.src = fl6.daddr;
+ 			struct ip_tunnel_info *unclone;
+ 
+ 			unclone = skb_tunnel_info_unclone(skb);
+ 			if (unlikely(!unclone)) {
+ 				dst_release(dst);
+ 				return -ENOMEM;
+ 			}
+ 
+ 			unclone->key.u.ipv6.dst = fl6.saddr;
+ 			unclone->key.u.ipv6.src = fl6.daddr;
    	}
if (!pskb_may_pull(skb, ETH_HLEN)) {
diff --combined drivers/net/tun.c
index 6e55697315de,4cf38be26dc9..36443d506b67
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@@ -69,6 -69,14 +69,14 @@@
  #include <linux/bpf.h>
  #include <linux/bpf_trace.h>
  #include <linux/mutex.h>
+ #include <linux/ieee802154.h>
+ #include <linux/if_ltalk.h>
+ #include <uapi/linux/if_fddi.h>
+ #include <uapi/linux/if_hippi.h>
+ #include <uapi/linux/if_fc.h>
+ #include <net/ax25.h>
+ #include <net/rose.h>
+ #include <net/6lowpan.h>
#include <linux/uaccess.h>
  #include <linux/proc_fs.h>
@@@ -1181,7 -1189,8 +1189,7 @@@ static int tun_xdp_xmit(struct net_devi
    struct tun_struct *tun = netdev_priv(dev);
    struct tun_file *tfile;
    u32 numqueues;
 -	int drops = 0;
 -	int cnt = n;
 +	int nxmit = 0;
    int i;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
@@@ -1211,9 -1220,9 +1219,9 @@@ resample
if (__ptr_ring_produce(&tfile->tx_ring, frame)) {
    		atomic_long_inc(&dev->tx_dropped);
 -			xdp_return_frame_rx_napi(xdp);
 -			drops++;
 +			break;
    	}
 +		nxmit++;
    }
    spin_unlock(&tfile->tx_ring.producer_lock);
@@@ -1221,21 -1230,17 +1229,21 @@@
    	__tun_xdp_flush_tfile(tfile);
rcu_read_unlock();
 -	return cnt - drops;
 +	return nxmit;
  }
static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
  {
    struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
 +	int nxmit;
if (unlikely(!frame))
    	return -EOVERFLOW;
-	return tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
 +	nxmit = tun_xdp_xmit(dev, 1, &frame, XDP_XMIT_FLUSH);
 +	if (!nxmit)
 +		xdp_return_frame_rx_napi(frame);
 +	return nxmit;
  }
static const struct net_device_ops tap_netdev_ops = {
@@@ -2922,6 -2927,45 +2930,45 @@@ static int tun_set_ebpf(struct tun_stru
    return __tun_set_ebpf(tun, prog_p, prog);
  }
+ /* Return correct value for tun->dev->addr_len based on tun->dev->type. */
+ static unsigned char tun_get_addr_len(unsigned short type)
+ {
+ 	switch (type) {
+ 	case ARPHRD_IP6GRE:
+ 	case ARPHRD_TUNNEL6:
+ 		return sizeof(struct in6_addr);
+ 	case ARPHRD_IPGRE:
+ 	case ARPHRD_TUNNEL:
+ 	case ARPHRD_SIT:
+ 		return 4;
+ 	case ARPHRD_ETHER:
+ 		return ETH_ALEN;
+ 	case ARPHRD_IEEE802154:
+ 	case ARPHRD_IEEE802154_MONITOR:
+ 		return IEEE802154_EXTENDED_ADDR_LEN;
+ 	case ARPHRD_PHONET_PIPE:
+ 	case ARPHRD_PPP:
+ 	case ARPHRD_NONE:
+ 		return 0;
+ 	case ARPHRD_6LOWPAN:
+ 		return EUI64_ADDR_LEN;
+ 	case ARPHRD_FDDI:
+ 		return FDDI_K_ALEN;
+ 	case ARPHRD_HIPPI:
+ 		return HIPPI_ALEN;
+ 	case ARPHRD_IEEE802:
+ 		return FC_ALEN;
+ 	case ARPHRD_ROSE:
+ 		return ROSE_ADDR_LEN;
+ 	case ARPHRD_NETROM:
+ 		return AX25_ADDR_LEN;
+ 	case ARPHRD_LOCALTLK:
+ 		return LTALK_ALEN;
+ 	default:
+ 		return 0;
+ 	}
+ }
+ 
  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
    		    unsigned long arg, int ifreq_len)
  {
@@@ -3085,6 -3129,7 +3132,7 @@@
    			break;
    		}
    		tun->dev->type = (int) arg;
+ 			tun->dev->addr_len = tun_get_addr_len(tun->dev->type);
    		netif_info(tun, drv, tun->dev, "linktype set to %d\n",
    			   tun->dev->type);
    		call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
diff --combined drivers/net/virtio_net.c
index bb4ea9dbc16b,0824e6999e49..101659cd4b87
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@@ -195,9 -195,6 +195,9 @@@ struct virtnet_info 
    /* # of XDP queue pairs currently used by the driver */
    u16 xdp_queue_pairs;
+	/* xdp_queue_pairs may be 0, when xdp is already loaded. So add this. */
 +	bool xdp_enabled;
 +
    /* I like... big packets and I cannot lie! */
    bool big_packets;
@@@ -409,9 -406,13 +409,13 @@@ static struct sk_buff *page_to_skb(stru
    offset += hdr_padded_len;
    p += hdr_padded_len;
- 	copy = len;
- 	if (copy > skb_tailroom(skb))
- 		copy = skb_tailroom(skb);
+ 	/* Copy all frame if it fits skb->head, otherwise
+ 	 * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
+ 	 */
+ 	if (len <= skb_tailroom(skb))
+ 		copy = len;
+ 	else
+ 		copy = ETH_HLEN + metasize;
    skb_put_data(skb, p, copy);
if (metasize) {
@@@ -484,41 -485,12 +488,41 @@@ static int __virtnet_xdp_xmit_one(struc
    return 0;
  }
-static struct send_queue *virtnet_xdp_sq(struct virtnet_info *vi)
 -{
 -	unsigned int qp;
 -
 -	qp = vi->curr_queue_pairs - vi->xdp_queue_pairs + smp_processor_id();
 -	return &vi->sq[qp];
 +/* when vi->curr_queue_pairs > nr_cpu_ids, the txq/sq is only used for xdp tx on
 + * the current cpu, so it does not need to be locked.
 + *
 + * Here we use marco instead of inline functions because we have to deal with
 + * three issues at the same time: 1. the choice of sq. 2. judge and execute the
 + * lock/unlock of txq 3. make sparse happy. It is difficult for two inline
 + * functions to perfectly solve these three problems at the same time.
 + */
 +#define virtnet_xdp_get_sq(vi) ({                                       \
 +	struct netdev_queue *txq;                                       \
 +	typeof(vi) v = (vi);                                            \
 +	unsigned int qp;                                                \
 +									\
 +	if (v->curr_queue_pairs > nr_cpu_ids) {                         \
 +		qp = v->curr_queue_pairs - v->xdp_queue_pairs;          \
 +		qp += smp_processor_id();                               \
 +		txq = netdev_get_tx_queue(v->dev, qp);                  \
 +		__netif_tx_acquire(txq);                                \
 +	} else {                                                        \
 +		qp = smp_processor_id() % v->curr_queue_pairs;          \
 +		txq = netdev_get_tx_queue(v->dev, qp);                  \
 +		__netif_tx_lock(txq, raw_smp_processor_id());           \
 +	}                                                               \
 +	v->sq + qp;                                                     \
 +})
 +
 +#define virtnet_xdp_put_sq(vi, q) {                                     \
 +	struct netdev_queue *txq;                                       \
 +	typeof(vi) v = (vi);                                            \
 +									\
 +	txq = netdev_get_tx_queue(v->dev, (q) - v->sq);                 \
 +	if (v->curr_queue_pairs > nr_cpu_ids)                           \
 +		__netif_tx_release(txq);                                \
 +	else                                                            \
 +		__netif_tx_unlock(txq);                                 \
  }
static int virtnet_xdp_xmit(struct net_device *dev,
@@@ -531,10 -503,10 +535,10 @@@
    unsigned int len;
    int packets = 0;
    int bytes = 0;
 -	int drops = 0;
 +	int nxmit = 0;
    int kicks = 0;
 -	int ret, err;
    void *ptr;
 +	int ret;
    int i;
/* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this
@@@ -544,10 -516,11 +548,10 @@@
    if (!xdp_prog)
    	return -ENXIO;
-	sq = virtnet_xdp_sq(vi);
 +	sq = virtnet_xdp_get_sq(vi);
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
    	ret = -EINVAL;
 -		drops = n;
    	goto out;
    }
@@@ -570,11 -543,13 +574,11 @@@
    for (i = 0; i < n; i++) {
    	struct xdp_frame *xdpf = frames[i];
-		err = __virtnet_xdp_xmit_one(vi, sq, xdpf);
 -		if (err) {
 -			xdp_return_frame_rx_napi(xdpf);
 -			drops++;
 -		}
 +		if (__virtnet_xdp_xmit_one(vi, sq, xdpf))
 +			break;
 +		nxmit++;
    }
 -	ret = n - drops;
 +	ret = nxmit;
if (flags & XDP_XMIT_FLUSH) {
    	if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
@@@ -585,17 -560,16 +589,17 @@@ out
    sq->stats.bytes += bytes;
    sq->stats.packets += packets;
    sq->stats.xdp_tx += n;
 -	sq->stats.xdp_tx_drops += drops;
 +	sq->stats.xdp_tx_drops += n - nxmit;
    sq->stats.kicks += kicks;
    u64_stats_update_end(&sq->stats.syncp);
+	virtnet_xdp_put_sq(vi, sq);
    return ret;
  }
static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
  {
 -	return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
 +	return vi->xdp_enabled ? VIRTIO_XDP_HEADROOM : 0;
  }
/* We copy the packet for XDP in the following cases:
@@@ -739,9 -713,7 +743,9 @@@ static struct sk_buff *receive_small(st
    		if (unlikely(!xdpf))
    			goto err_xdp;
    		err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
 -			if (unlikely(err < 0)) {
 +			if (unlikely(!err)) {
 +				xdp_return_frame_rx_napi(xdpf);
 +			} else if (unlikely(err < 0)) {
    			trace_xdp_exception(vi->dev, xdp_prog, act);
    			goto err_xdp;
    		}
@@@ -928,9 -900,7 +932,9 @@@ static struct sk_buff *receive_mergeabl
    		if (unlikely(!xdpf))
    			goto err_xdp;
    		err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
 -			if (unlikely(err < 0)) {
 +			if (unlikely(!err)) {
 +				xdp_return_frame_rx_napi(xdpf);
 +			} else if (unlikely(err < 0)) {
    			trace_xdp_exception(vi->dev, xdp_prog, act);
    			if (unlikely(xdp_page != page))
    				put_page(xdp_page);
@@@ -1492,13 -1462,12 +1496,13 @@@ static int virtnet_poll(struct napi_str
    	xdp_do_flush();
if (xdp_xmit & VIRTIO_XDP_TX) {
 -		sq = virtnet_xdp_sq(vi);
 +		sq = virtnet_xdp_get_sq(vi);
    	if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
    		u64_stats_update_begin(&sq->stats.syncp);
    		sq->stats.kicks++;
    		u64_stats_update_end(&sq->stats.syncp);
    	}
 +		virtnet_xdp_put_sq(vi, sq);
    }
return received;
@@@ -2016,7 -1985,7 +2020,7 @@@ static void virtnet_set_affinity(struc
    	}
    	virtqueue_set_affinity(vi->rq[i].vq, mask);
    	virtqueue_set_affinity(vi->sq[i].vq, mask);
 -		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, false);
 +		__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
    	cpumask_clear(mask);
    }
@@@ -2139,21 -2108,25 +2143,21 @@@ static int virtnet_set_channels(struct 
  static void virtnet_get_strings(struct net_device *dev, u32 stringset, u8 *data)
  {
    struct virtnet_info *vi = netdev_priv(dev);
 -	char *p = (char *)data;
    unsigned int i, j;
 +	u8 *p = data;
switch (stringset) {
    case ETH_SS_STATS:
    	for (i = 0; i < vi->curr_queue_pairs; i++) {
 -			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++) {
 -				snprintf(p, ETH_GSTRING_LEN, "rx_queue_%u_%s",
 -					 i, virtnet_rq_stats_desc[j].desc);
 -				p += ETH_GSTRING_LEN;
 -			}
 +			for (j = 0; j < VIRTNET_RQ_STATS_LEN; j++)
 +				ethtool_sprintf(&p, "rx_queue_%u_%s", i,
 +						virtnet_rq_stats_desc[j].desc);
    	}
for (i = 0; i < vi->curr_queue_pairs; i++) {
 -			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++) {
 -				snprintf(p, ETH_GSTRING_LEN, "tx_queue_%u_%s",
 -					 i, virtnet_sq_stats_desc[j].desc);
 -				p += ETH_GSTRING_LEN;
 -			}
 +			for (j = 0; j < VIRTNET_SQ_STATS_LEN; j++)
 +				ethtool_sprintf(&p, "tx_queue_%u_%s", i,
 +						virtnet_sq_stats_desc[j].desc);
    	}
    	break;
    }
@@@ -2449,9 -2422,10 +2453,9 @@@ static int virtnet_xdp_set(struct net_d
/* XDP requires extra queues for XDP_TX */
    if (curr_qp + xdp_qp > vi->max_queue_pairs) {
 -		NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available");
 -		netdev_warn(dev, "request %i queues but max is %i\n",
 +		netdev_warn(dev, "XDP request %i queues but max is %i. XDP_TX and XDP_REDIRECT will operate in a slower locked tx mode.\n",
    		    curr_qp + xdp_qp, vi->max_queue_pairs);
 -		return -ENOMEM;
 +		xdp_qp = 0;
    }
old_prog = rtnl_dereference(vi->rq[0].xdp_prog);
@@@ -2485,14 -2459,11 +2489,14 @@@
    vi->xdp_queue_pairs = xdp_qp;
if (prog) {
 +		vi->xdp_enabled = true;
    	for (i = 0; i < vi->max_queue_pairs; i++) {
    		rcu_assign_pointer(vi->rq[i].xdp_prog, prog);
    		if (i == 0 && !old_prog)
    			virtnet_clear_guest_offloads(vi);
    	}
 +	} else {
 +		vi->xdp_enabled = false;
    }
for (i = 0; i < vi->max_queue_pairs; i++) {
@@@ -2560,7 -2531,7 +2564,7 @@@ static int virtnet_set_features(struct 
    int err;
if ((dev->features ^ features) & NETIF_F_LRO) {
 -		if (vi->xdp_queue_pairs)
 +		if (vi->xdp_enabled)
    		return -EBUSY;
if (features & NETIF_F_LRO)
@@@ -3006,8 -2977,7 +3010,8 @@@ static int virtnet_probe(struct virtio_
    	return -ENOMEM;
/* Set up network device as normal. */
 -	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE;
 +	dev->priv_flags |= IFF_UNICAST_FLT | IFF_LIVE_ADDR_CHANGE |
 +			   IFF_TX_SKB_NO_LINEAR;
    dev->netdev_ops = &virtnet_netdev;
    dev->features = NETIF_F_HIGHDMA;
diff --combined drivers/net/vxlan.c
index 39ee1300cdd9,53dbc67e8a34..02a14f1b938a
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -2725,12 -2725,17 +2725,17 @@@ static void vxlan_xmit_one(struct sk_bu
    		goto tx_error;
    	} else if (err) {
    		if (info) {
+ 				struct ip_tunnel_info *unclone;
    			struct in_addr src, dst;
+ 				unclone = skb_tunnel_info_unclone(skb);
+ 				if (unlikely(!unclone))
+ 					goto tx_error;
+ 
    			src = remote_ip.sin.sin_addr;
    			dst = local_ip.sin.sin_addr;
- 				info->key.u.ipv4.src = src.s_addr;
- 				info->key.u.ipv4.dst = dst.s_addr;
+ 				unclone->key.u.ipv4.src = src.s_addr;
+ 				unclone->key.u.ipv4.dst = dst.s_addr;
    		}
    		vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
    		dst_release(ndst);
@@@ -2781,12 -2786,17 +2786,17 @@@
    		goto tx_error;
    	} else if (err) {
    		if (info) {
+ 				struct ip_tunnel_info *unclone;
    			struct in6_addr src, dst;
+ 				unclone = skb_tunnel_info_unclone(skb);
+ 				if (unlikely(!unclone))
+ 					goto tx_error;
+ 
    			src = remote_ip.sin6.sin6_addr;
    			dst = local_ip.sin6.sin6_addr;
- 				info->key.u.ipv6.src = src;
- 				info->key.u.ipv6.dst = dst;
+ 				unclone->key.u.ipv6.src = src;
+ 				unclone->key.u.ipv6.dst = dst;
    		}
vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
@@@ -3484,7 -3494,6 +3494,7 @@@ static struct socket *vxlan_create_sock
    if (err < 0)
    	return ERR_PTR(err);
+	udp_allow_gso(sock->sk);
    return sock;
  }
@@@ -3704,7 -3713,6 +3714,7 @@@ static int vxlan_config_validate(struc
  #if IS_ENABLED(CONFIG_IPV6)
    	if (use_ipv6) {
    		struct inet6_dev *idev = __in6_dev_get(lowerdev);
 +
    		if (idev && idev->cnf.disable_ipv6) {
    			NL_SET_ERR_MSG(extack,
    				       "IPv6 support disabled by administrator");
diff --combined include/linux/avf/virtchnl.h
index 47482049f640,532bcbfc4716..40dd6afbfd81
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@@ -136,9 -136,6 +136,9 @@@ enum virtchnl_ops 
    VIRTCHNL_OP_DISABLE_CHANNELS = 31,
    VIRTCHNL_OP_ADD_CLOUD_FILTER = 32,
    VIRTCHNL_OP_DEL_CLOUD_FILTER = 33,
 +	/* opcode 34 - 46 are reserved */
 +	VIRTCHNL_OP_ADD_FDIR_FILTER = 47,
 +	VIRTCHNL_OP_DEL_FDIR_FILTER = 48,
  };
/* These macros are used to generate compilation errors if a structure/union
@@@ -250,7 -247,6 +250,7 @@@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_
  #define VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM		0X00200000
  #define VIRTCHNL_VF_OFFLOAD_RX_ENCAP_CSUM	0X00400000
  #define VIRTCHNL_VF_OFFLOAD_ADQ			0X00800000
 +#define VIRTCHNL_VF_OFFLOAD_FDIR_PF		0X10000000
/* Define below the capability flags that are not offloads */
  #define VIRTCHNL_VF_CAP_ADV_LINK_SPEED		0x00000080
@@@ -480,7 -476,6 +480,6 @@@ struct virtchnl_rss_key 
    u16 vsi_id;
    u16 key_len;
    u8 key[1];         /* RSS hash key, packed bytes */
- 	u8 pad[1];
  };
VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key);
@@@ -489,7 -484,6 +488,6 @@@ struct virtchnl_rss_lut 
    u16 vsi_id;
    u16 lut_entries;
    u8 lut[1];        /* RSS lookup table */
- 	u8 pad[1];
  };
VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut);
@@@ -563,11 -557,6 +561,11 @@@ enum virtchnl_action 
    /* action types */
    VIRTCHNL_ACTION_DROP = 0,
    VIRTCHNL_ACTION_TC_REDIRECT,
 +	VIRTCHNL_ACTION_PASSTHRU,
 +	VIRTCHNL_ACTION_QUEUE,
 +	VIRTCHNL_ACTION_Q_REGION,
 +	VIRTCHNL_ACTION_MARK,
 +	VIRTCHNL_ACTION_COUNT,
  };
enum virtchnl_flow_type {
@@@ -677,269 -666,6 +675,269 @@@ enum virtchnl_vfr_states 
    VIRTCHNL_VFR_VFACTIVE,
  };
+#define VIRTCHNL_MAX_NUM_PROTO_HDRS	32
 +#define PROTO_HDR_SHIFT			5
 +#define PROTO_HDR_FIELD_START(proto_hdr_type) ((proto_hdr_type) << PROTO_HDR_SHIFT)
 +#define PROTO_HDR_FIELD_MASK ((1UL << PROTO_HDR_SHIFT) - 1)
 +
 +/* VF use these macros to configure each protocol header.
 + * Specify which protocol headers and protocol header fields base on
 + * virtchnl_proto_hdr_type and virtchnl_proto_hdr_field.
 + * @param hdr: a struct of virtchnl_proto_hdr
 + * @param hdr_type: ETH/IPV4/TCP, etc
 + * @param field: SRC/DST/TEID/SPI, etc
 + */
 +#define VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, field) \
 +	((hdr)->field_selector |= BIT((field) & PROTO_HDR_FIELD_MASK))
 +#define VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, field) \
 +	((hdr)->field_selector &= ~BIT((field) & PROTO_HDR_FIELD_MASK))
 +#define VIRTCHNL_TEST_PROTO_HDR_FIELD(hdr, val) \
 +	((hdr)->field_selector & BIT((val) & PROTO_HDR_FIELD_MASK))
 +#define VIRTCHNL_GET_PROTO_HDR_FIELD(hdr)	((hdr)->field_selector)
 +
 +#define VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \
 +	(VIRTCHNL_ADD_PROTO_HDR_FIELD(hdr, \
 +		VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field))
 +#define VIRTCHNL_DEL_PROTO_HDR_FIELD_BIT(hdr, hdr_type, field) \
 +	(VIRTCHNL_DEL_PROTO_HDR_FIELD(hdr, \
 +		VIRTCHNL_PROTO_HDR_ ## hdr_type ## _ ## field))
 +
 +#define VIRTCHNL_SET_PROTO_HDR_TYPE(hdr, hdr_type) \
 +	((hdr)->type = VIRTCHNL_PROTO_HDR_ ## hdr_type)
 +#define VIRTCHNL_GET_PROTO_HDR_TYPE(hdr) \
 +	(((hdr)->type) >> PROTO_HDR_SHIFT)
 +#define VIRTCHNL_TEST_PROTO_HDR_TYPE(hdr, val) \
 +	((hdr)->type == ((val) >> PROTO_HDR_SHIFT))
 +#define VIRTCHNL_TEST_PROTO_HDR(hdr, val) \
 +	(VIRTCHNL_TEST_PROTO_HDR_TYPE((hdr), (val)) && \
 +	 VIRTCHNL_TEST_PROTO_HDR_FIELD((hdr), (val)))
 +
 +/* Protocol header type within a packet segment. A segment consists of one or
 + * more protocol headers that make up a logical group of protocol headers. Each
 + * logical group of protocol headers encapsulates or is encapsulated using/by
 + * tunneling or encapsulation protocols for network virtualization.
 + */
 +enum virtchnl_proto_hdr_type {
 +	VIRTCHNL_PROTO_HDR_NONE,
 +	VIRTCHNL_PROTO_HDR_ETH,
 +	VIRTCHNL_PROTO_HDR_S_VLAN,
 +	VIRTCHNL_PROTO_HDR_C_VLAN,
 +	VIRTCHNL_PROTO_HDR_IPV4,
 +	VIRTCHNL_PROTO_HDR_IPV6,
 +	VIRTCHNL_PROTO_HDR_TCP,
 +	VIRTCHNL_PROTO_HDR_UDP,
 +	VIRTCHNL_PROTO_HDR_SCTP,
 +	VIRTCHNL_PROTO_HDR_GTPU_IP,
 +	VIRTCHNL_PROTO_HDR_GTPU_EH,
 +	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN,
 +	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP,
 +	VIRTCHNL_PROTO_HDR_PPPOE,
 +	VIRTCHNL_PROTO_HDR_L2TPV3,
 +	VIRTCHNL_PROTO_HDR_ESP,
 +	VIRTCHNL_PROTO_HDR_AH,
 +	VIRTCHNL_PROTO_HDR_PFCP,
 +};
 +
 +/* Protocol header field within a protocol header. */
 +enum virtchnl_proto_hdr_field {
 +	/* ETHER */
 +	VIRTCHNL_PROTO_HDR_ETH_SRC =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ETH),
 +	VIRTCHNL_PROTO_HDR_ETH_DST,
 +	VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE,
 +	/* S-VLAN */
 +	VIRTCHNL_PROTO_HDR_S_VLAN_ID =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_S_VLAN),
 +	/* C-VLAN */
 +	VIRTCHNL_PROTO_HDR_C_VLAN_ID =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_C_VLAN),
 +	/* IPV4 */
 +	VIRTCHNL_PROTO_HDR_IPV4_SRC =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV4),
 +	VIRTCHNL_PROTO_HDR_IPV4_DST,
 +	VIRTCHNL_PROTO_HDR_IPV4_DSCP,
 +	VIRTCHNL_PROTO_HDR_IPV4_TTL,
 +	VIRTCHNL_PROTO_HDR_IPV4_PROT,
 +	/* IPV6 */
 +	VIRTCHNL_PROTO_HDR_IPV6_SRC =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_IPV6),
 +	VIRTCHNL_PROTO_HDR_IPV6_DST,
 +	VIRTCHNL_PROTO_HDR_IPV6_TC,
 +	VIRTCHNL_PROTO_HDR_IPV6_HOP_LIMIT,
 +	VIRTCHNL_PROTO_HDR_IPV6_PROT,
 +	/* TCP */
 +	VIRTCHNL_PROTO_HDR_TCP_SRC_PORT =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_TCP),
 +	VIRTCHNL_PROTO_HDR_TCP_DST_PORT,
 +	/* UDP */
 +	VIRTCHNL_PROTO_HDR_UDP_SRC_PORT =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_UDP),
 +	VIRTCHNL_PROTO_HDR_UDP_DST_PORT,
 +	/* SCTP */
 +	VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_SCTP),
 +	VIRTCHNL_PROTO_HDR_SCTP_DST_PORT,
 +	/* GTPU_IP */
 +	VIRTCHNL_PROTO_HDR_GTPU_IP_TEID =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_IP),
 +	/* GTPU_EH */
 +	VIRTCHNL_PROTO_HDR_GTPU_EH_PDU =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_GTPU_EH),
 +	VIRTCHNL_PROTO_HDR_GTPU_EH_QFI,
 +	/* PPPOE */
 +	VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PPPOE),
 +	/* L2TPV3 */
 +	VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_L2TPV3),
 +	/* ESP */
 +	VIRTCHNL_PROTO_HDR_ESP_SPI =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_ESP),
 +	/* AH */
 +	VIRTCHNL_PROTO_HDR_AH_SPI =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_AH),
 +	/* PFCP */
 +	VIRTCHNL_PROTO_HDR_PFCP_S_FIELD =
 +		PROTO_HDR_FIELD_START(VIRTCHNL_PROTO_HDR_PFCP),
 +	VIRTCHNL_PROTO_HDR_PFCP_SEID,
 +};
 +
 +struct virtchnl_proto_hdr {
 +	enum virtchnl_proto_hdr_type type;
 +	u32 field_selector; /* a bit mask to select field for header type */
 +	u8 buffer[64];
 +	/**
 +	 * binary buffer in network order for specific header type.
 +	 * For example, if type = VIRTCHNL_PROTO_HDR_IPV4, a IPv4
 +	 * header is expected to be copied into the buffer.
 +	 */
 +};
 +
 +VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_proto_hdr);
 +
 +struct virtchnl_proto_hdrs {
 +	u8 tunnel_level;
 +	/**
 +	 * specify where protocol header start from.
 +	 * 0 - from the outer layer
 +	 * 1 - from the first inner layer
 +	 * 2 - from the second inner layer
 +	 * ....
 +	 **/
 +	int count; /* the proto layers must < VIRTCHNL_MAX_NUM_PROTO_HDRS */
 +	struct virtchnl_proto_hdr proto_hdr[VIRTCHNL_MAX_NUM_PROTO_HDRS];
 +};
 +
 +VIRTCHNL_CHECK_STRUCT_LEN(2312, virtchnl_proto_hdrs);
 +
 +/* action configuration for FDIR */
 +struct virtchnl_filter_action {
 +	enum virtchnl_action type;
 +	union {
 +		/* used for queue and qgroup action */
 +		struct {
 +			u16 index;
 +			u8 region;
 +		} queue;
 +		/* used for count action */
 +		struct {
 +			/* share counter ID with other flow rules */
 +			u8 shared;
 +			u32 id; /* counter ID */
 +		} count;
 +		/* used for mark action */
 +		u32 mark_id;
 +		u8 reserve[32];
 +	} act_conf;
 +};
 +
 +VIRTCHNL_CHECK_STRUCT_LEN(36, virtchnl_filter_action);
 +
 +#define VIRTCHNL_MAX_NUM_ACTIONS  8
 +
 +struct virtchnl_filter_action_set {
 +	/* action number must be less then VIRTCHNL_MAX_NUM_ACTIONS */
 +	int count;
 +	struct virtchnl_filter_action actions[VIRTCHNL_MAX_NUM_ACTIONS];
 +};
 +
 +VIRTCHNL_CHECK_STRUCT_LEN(292, virtchnl_filter_action_set);
 +
 +/* pattern and action for FDIR rule */
 +struct virtchnl_fdir_rule {
 +	struct virtchnl_proto_hdrs proto_hdrs;
 +	struct virtchnl_filter_action_set action_set;
 +};
 +
 +VIRTCHNL_CHECK_STRUCT_LEN(2604, virtchnl_fdir_rule);
 +
 +/* Status returned to VF after VF requests FDIR commands
 + * VIRTCHNL_FDIR_SUCCESS
 + * VF FDIR related request is successfully done by PF
 + * The request can be OP_ADD/DEL.
 + *
 + * VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE
 + * OP_ADD_FDIR_FILTER request is failed due to no Hardware resource.
 + *
 + * VIRTCHNL_FDIR_FAILURE_RULE_EXIST
 + * OP_ADD_FDIR_FILTER request is failed due to the rule is already existed.
 + *
 + * VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT
 + * OP_ADD_FDIR_FILTER request is failed due to conflict with existing rule.
 + *
 + * VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST
 + * OP_DEL_FDIR_FILTER request is failed due to this rule doesn't exist.
 + *
 + * VIRTCHNL_FDIR_FAILURE_RULE_INVALID
 + * OP_ADD_FDIR_FILTER request is failed due to parameters validation
 + * or HW doesn't support.
 + *
 + * VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT
 + * OP_ADD/DEL_FDIR_FILTER request is failed due to timing out
 + * for programming.
 + */
 +enum virtchnl_fdir_prgm_status {
 +	VIRTCHNL_FDIR_SUCCESS = 0,
 +	VIRTCHNL_FDIR_FAILURE_RULE_NORESOURCE,
 +	VIRTCHNL_FDIR_FAILURE_RULE_EXIST,
 +	VIRTCHNL_FDIR_FAILURE_RULE_CONFLICT,
 +	VIRTCHNL_FDIR_FAILURE_RULE_NONEXIST,
 +	VIRTCHNL_FDIR_FAILURE_RULE_INVALID,
 +	VIRTCHNL_FDIR_FAILURE_RULE_TIMEOUT,
 +};
 +
 +/* VIRTCHNL_OP_ADD_FDIR_FILTER
 + * VF sends this request to PF by filling out vsi_id,
 + * validate_only and rule_cfg. PF will return flow_id
 + * if the request is successfully done and return add_status to VF.
 + */
 +struct virtchnl_fdir_add {
 +	u16 vsi_id;  /* INPUT */
 +	/*
 +	 * 1 for validating a fdir rule, 0 for creating a fdir rule.
 +	 * Validate and create share one ops: VIRTCHNL_OP_ADD_FDIR_FILTER.
 +	 */
 +	u16 validate_only; /* INPUT */
 +	u32 flow_id;       /* OUTPUT */
 +	struct virtchnl_fdir_rule rule_cfg; /* INPUT */
 +	enum virtchnl_fdir_prgm_status status; /* OUTPUT */
 +};
 +
 +VIRTCHNL_CHECK_STRUCT_LEN(2616, virtchnl_fdir_add);
 +
 +/* VIRTCHNL_OP_DEL_FDIR_FILTER
 + * VF sends this request to PF by filling out vsi_id
 + * and flow_id. PF will return del_status to VF.
 + */
 +struct virtchnl_fdir_del {
 +	u16 vsi_id;  /* INPUT */
 +	u16 pad;
 +	u32 flow_id; /* INPUT */
 +	enum virtchnl_fdir_prgm_status status; /* OUTPUT */
 +};
 +
 +VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_fdir_del);
 +
  /**
   * virtchnl_vc_validate_vf_msg
   * @ver: Virtchnl version info
@@@ -1100,12 -826,6 +1098,12 @@@ virtchnl_vc_validate_vf_msg(struct virt
    case VIRTCHNL_OP_DEL_CLOUD_FILTER:
    	valid_len = sizeof(struct virtchnl_filter);
    	break;
 +	case VIRTCHNL_OP_ADD_FDIR_FILTER:
 +		valid_len = sizeof(struct virtchnl_fdir_add);
 +		break;
 +	case VIRTCHNL_OP_DEL_FDIR_FILTER:
 +		valid_len = sizeof(struct virtchnl_fdir_del);
 +		break;
    /* These are always errors coming from the VF. */
    case VIRTCHNL_OP_EVENT:
    case VIRTCHNL_OP_UNKNOWN:
diff --combined include/linux/bpf.h
index 9fdd839b418c,fdac0534ce79..c9b7a876b0c8
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@@ -40,7 -40,7 +40,8 @@@ struct bpf_local_storage
  struct bpf_local_storage_map;
  struct kobject;
  struct mem_cgroup;
+ struct module;
 +struct bpf_func_state;
extern struct idr btf_idr;
  extern spinlock_t btf_idr_lock;
@@@ -56,7 -56,7 +57,7 @@@ struct bpf_iter_seq_info 
    u32 seq_priv_size;
  };
-/* map is generic key/value storage optionally accesible by eBPF programs */
 +/* map is generic key/value storage optionally accessible by eBPF programs */
  struct bpf_map_ops {
    /* funcs callable from userspace (via syscall) */
    int (*map_alloc_check)(union bpf_attr *attr);
@@@ -119,9 -119,6 +120,9 @@@
    				   void *owner, u32 size);
    struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+	/* Misc helpers.*/
 +	int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
 +
    /* map_meta_equal must be implemented for maps that can be
     * used as an inner map.  It is a runtime check to ensure
     * an inner map can be inserted to an outer map.
@@@ -134,13 -131,6 +135,13 @@@
    bool (*map_meta_equal)(const struct bpf_map *meta0,
    		       const struct bpf_map *meta1);
+
 +	int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
 +					      struct bpf_func_state *caller,
 +					      struct bpf_func_state *callee);
 +	int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
 +				     void *callback_ctx, u64 flags);
 +
    /* BTF name and id of struct allocated by map_alloc */
    const char * const map_btf_name;
    int *map_btf_id;
@@@ -307,8 -297,6 +308,8 @@@ enum bpf_arg_type 
    ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
    ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
    ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
 +	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
 +	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
    __BPF_ARG_TYPE_MAX,
  };
@@@ -425,9 -413,6 +426,9 @@@ enum bpf_reg_type 
    PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
    PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
    PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
 +	PTR_TO_FUNC,		 /* reg points to a bpf program function */
 +	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
 +	__BPF_REG_TYPE_MAX,
  };
/* The information passed from prog-specific *_is_valid_access
@@@ -481,7 -466,6 +482,7 @@@ struct bpf_verifier_ops 
    			 const struct btf_type *t, int off, int size,
    			 enum bpf_access_type atype,
    			 u32 *next_btf_id);
 +	bool (*check_kfunc_call)(u32 kfunc_btf_id);
  };
struct bpf_prog_offload_ops {
@@@ -524,11 -508,6 +525,11 @@@ enum bpf_cgroup_storage_type 
   */
  #define MAX_BPF_FUNC_ARGS 12
+/* The maximum number of arguments passed through registers
 + * a single function may have.
 + */
 +#define MAX_BPF_FUNC_REG_ARGS 5
 +
  struct btf_func_model {
    u8 ret_size;
    u8 nr_args;
@@@ -645,6 -624,7 +646,7 @@@ struct bpf_trampoline 
    /* Executable image of trampoline */
    struct bpf_tramp_image *cur_image;
    u64 selector;
+ 	struct module *mod;
  };
struct bpf_attach_target_info {
@@@ -798,8 -778,6 +800,8 @@@ struct btf_mod_pair 
    struct module *module;
  };
+struct bpf_kfunc_desc_tab;
 +
  struct bpf_prog_aux {
    atomic64_t refcnt;
    u32 used_map_cnt;
@@@ -836,7 -814,6 +838,7 @@@
    struct bpf_prog **func;
    void *jit_data; /* JIT specific data. arch dependent */
    struct bpf_jit_poke_descriptor *poke_tab;
 +	struct bpf_kfunc_desc_tab *kfunc_tab;
    u32 size_poke_tab;
    struct bpf_ksym ksym;
    const struct bpf_prog_ops *ops;
@@@ -1111,13 -1088,6 +1113,13 @@@ int bpf_prog_array_copy(struct bpf_prog
  /* BPF program asks to set CN on the packet. */
  #define BPF_RET_SET_CN						(1 << 0)
+/* For BPF_PROG_RUN_ARRAY_FLAGS and __BPF_PROG_RUN_ARRAY,
 + * if bpf_cgroup_storage_set() failed, the rest of programs
 + * will not execute. This should be a really rare scenario
 + * as it requires BPF_CGROUP_STORAGE_NEST_MAX number of
 + * preemptions all between bpf_cgroup_storage_set() and
 + * bpf_cgroup_storage_unset() on the same cpu.
 + */
  #define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags)		\
    ({								\
    	struct bpf_prog_array_item *_item;			\
@@@ -1130,12 -1100,10 +1132,12 @@@
    	_array = rcu_dereference(array);			\
    	_item = &_array->items[0];				\
    	while ((_prog = READ_ONCE(_item->prog))) {		\
 -			bpf_cgroup_storage_set(_item->cgroup_storage);	\
 +			if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
 +				break;					\
    		func_ret = func(_prog, ctx);			\
    		_ret &= (func_ret & 1);				\
    		*(ret_flags) |= (func_ret >> 1);			\
 +			bpf_cgroup_storage_unset();			\
    		_item++;					\
    	}							\
    	rcu_read_unlock();					\
@@@ -1156,14 -1124,9 +1158,14 @@@
    		goto _out;			\
    	_item = &_array->items[0];		\
    	while ((_prog = READ_ONCE(_item->prog))) {		\
 -			if (set_cg_storage)		\
 -				bpf_cgroup_storage_set(_item->cgroup_storage);	\
 -			_ret &= func(_prog, ctx);	\
 +			if (!set_cg_storage) {			\
 +				_ret &= func(_prog, ctx);	\
 +			} else {				\
 +				if (unlikely(bpf_cgroup_storage_set(_item->cgroup_storage)))	\
 +					break;			\
 +				_ret &= func(_prog, ctx);	\
 +				bpf_cgroup_storage_unset();	\
 +			}				\
    		_item++;			\
    	}					\
  _out:							\
@@@ -1436,10 -1399,6 +1438,10 @@@ void bpf_iter_map_show_fdinfo(const str
  int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
    			struct bpf_link_info *info);
+int map_set_for_each_callback_args(struct bpf_verifier_env *env,
 +				   struct bpf_func_state *caller,
 +				   struct bpf_func_state *callee);
 +
  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
  int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
  int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@@@ -1489,9 -1448,9 +1491,9 @@@ struct btf *bpf_get_btf_vmlinux(void)
  /* Map specifics */
  struct xdp_buff;
  struct sk_buff;
 +struct bpf_dtab_netdev;
 +struct bpf_cpu_map_entry;
-struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 -struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
  void __dev_flush(void);
  int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
    	    struct net_device *dev_rx);
@@@ -1501,6 -1460,7 +1503,6 @@@ int dev_map_generic_redirect(struct bpf
    		     struct bpf_prog *xdp_prog);
  bool dev_map_can_have_prog(struct bpf_map *map);
-struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
  void __cpu_map_flush(void);
  int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
    	    struct net_device *dev_rx);
@@@ -1529,10 -1489,6 +1531,10 @@@ int bpf_prog_test_run_flow_dissector(st
  int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
    		     const union bpf_attr *kattr,
    		     union bpf_attr __user *uattr);
 +int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
 +				const union bpf_attr *kattr,
 +				union bpf_attr __user *uattr);
 +bool bpf_prog_test_check_kfunc_call(u32 kfunc_id);
  bool btf_ctx_access(int off, int size, enum bpf_access_type type,
    	    const struct bpf_prog *prog,
    	    struct bpf_insn_access_aux *info);
@@@ -1551,11 -1507,8 +1553,11 @@@ int btf_distill_func_proto(struct bpf_v
    		   struct btf_func_model *m);
struct bpf_reg_state;
 -int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
 -			     struct bpf_reg_state *regs);
 +int btf_check_subprog_arg_match(struct bpf_verifier_env *env, int subprog,
 +				struct bpf_reg_state *regs);
 +int btf_check_kfunc_arg_match(struct bpf_verifier_env *env,
 +			      const struct btf *btf, u32 func_id,
 +			      struct bpf_reg_state *regs);
  int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog,
    		  struct bpf_reg_state *reg);
  int btf_check_type_match(struct bpf_verifier_log *log, const struct bpf_prog *prog,
@@@ -1565,11 -1518,6 +1567,11 @@@ struct bpf_prog *bpf_prog_by_id(u32 id)
  struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
 +void bpf_task_storage_free(struct task_struct *task);
 +bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
 +const struct btf_func_model *
 +bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
 +			 const struct bpf_insn *insn);
  #else /* !CONFIG_BPF_SYSCALL */
  static inline struct bpf_prog *bpf_prog_get(u32 ufd)
  {
@@@ -1639,6 -1587,17 +1641,6 @@@ static inline int bpf_obj_get_user(cons
    return -EOPNOTSUPP;
  }
-static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
 -						       u32 key)
 -{
 -	return NULL;
 -}
 -
 -static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map,
 -							     u32 key)
 -{
 -	return NULL;
 -}
  static inline bool dev_map_can_have_prog(struct bpf_map *map)
  {
    return false;
@@@ -1650,7 -1609,6 +1652,7 @@@ static inline void __dev_flush(void
struct xdp_buff;
  struct bpf_dtab_netdev;
 +struct bpf_cpu_map_entry;
static inline
  int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
@@@ -1675,6 -1633,12 +1677,6 @@@ static inline int dev_map_generic_redir
    return 0;
  }
-static inline
 -struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 -{
 -	return NULL;
 -}
 -
  static inline void __cpu_map_flush(void)
  {
  }
@@@ -1725,18 -1689,6 +1727,18 @@@ static inline int bpf_prog_test_run_flo
    return -ENOTSUPP;
  }
+static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
 +					      const union bpf_attr *kattr,
 +					      union bpf_attr __user *uattr)
 +{
 +	return -ENOTSUPP;
 +}
 +
 +static inline bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
 +{
 +	return false;
 +}
 +
  static inline void bpf_map_put(struct bpf_map *map)
  {
  }
@@@ -1751,22 -1703,6 +1753,22 @@@ bpf_base_func_proto(enum bpf_func_id fu
  {
    return NULL;
  }
 +
 +static inline void bpf_task_storage_free(struct task_struct *task)
 +{
 +}
 +
 +static inline bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
 +{
 +	return false;
 +}
 +
 +static inline const struct btf_func_model *
 +bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
 +			 const struct bpf_insn *insn)
 +{
 +	return NULL;
 +}
  #endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@@ -1851,24 -1787,22 +1853,24 @@@ static inline void bpf_map_offload_map_
  }
  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
-#if defined(CONFIG_BPF_STREAM_PARSER)
 -int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
 -			 struct bpf_prog *old, u32 which);
 +#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
  int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
  void sock_map_unhash(struct sock *sk);
  void sock_map_close(struct sock *sk, long timeout);
 +
 +void bpf_sk_reuseport_detach(struct sock *sk);
 +int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
 +				       void *value);
 +int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
 +				       void *value, u64 map_flags);
  #else
 -static inline int sock_map_prog_update(struct bpf_map *map,
 -				       struct bpf_prog *prog,
 -				       struct bpf_prog *old, u32 which)
 +static inline void bpf_sk_reuseport_detach(struct sock *sk)
  {
 -	return -EOPNOTSUPP;
  }
+#ifdef CONFIG_BPF_SYSCALL
  static inline int sock_map_get_from_fd(const union bpf_attr *attr,
    			       struct bpf_prog *prog)
  {
@@@ -1886,7 -1820,20 +1888,7 @@@ static inline int sock_map_update_elem_
  {
    return -EOPNOTSUPP;
  }
 -#endif /* CONFIG_BPF_STREAM_PARSER */
-#if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
 -void bpf_sk_reuseport_detach(struct sock *sk);
 -int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
 -				       void *value);
 -int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
 -				       void *value, u64 map_flags);
 -#else
 -static inline void bpf_sk_reuseport_detach(struct sock *sk)
 -{
 -}
 -
 -#ifdef CONFIG_BPF_SYSCALL
  static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
    					     void *key, void *value)
  {
@@@ -1958,9 -1905,6 +1960,9 @@@ extern const struct bpf_func_proto bpf_
  extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
  extern const struct bpf_func_proto bpf_sock_from_file_proto;
  extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
 +extern const struct bpf_func_proto bpf_task_storage_get_proto;
 +extern const struct bpf_func_proto bpf_task_storage_delete_proto;
 +extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
    enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --combined include/linux/ethtool.h
index 5c631a298994,cdca84e6dd6b..4290e2fa3117
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@@ -87,9 -87,7 +87,7 @@@ u32 ethtool_op_get_link(struct net_devi
  int ethtool_op_get_ts_info(struct net_device *dev, struct ethtool_ts_info *eti);
- /**
-  * struct ethtool_link_ext_state_info - link extended state and substate.
-  */
+ /* Link extended state and substate. */
  struct ethtool_link_ext_state_info {
    enum ethtool_link_ext_state link_ext_state;
    union {
@@@ -129,7 -127,6 +127,6 @@@ struct ethtool_link_ksettings 
    	__ETHTOOL_DECLARE_LINK_MODE_MASK(lp_advertising);
    } link_modes;
    u32	lanes;
- 	enum ethtool_link_mode_bit_indices link_mode;
  };
/**
@@@ -292,6 -289,9 +289,9 @@@ struct ethtool_pause_stats 
   *	do not attach ext_substate attribute to netlink message). If link_ext_state
   *	and link_ext_substate are unknown, return -ENODATA. If not implemented,
   *	link_ext_state and link_ext_substate will not be sent to userspace.
+  * @get_eeprom_len: Read range of EEPROM addresses for validation of
+  *	@get_eeprom and @set_eeprom requests.
+  *	Returns 0 if device does not support EEPROM access.
   * @get_eeprom: Read data from the device EEPROM.
   *	Should fill in the magic field.  Don't need to check len for zero
   *	or wraparound.  Fill in the data argument with the eeprom values
@@@ -384,6 -384,8 +384,8 @@@
   * @get_module_eeprom: Get the eeprom information from the plug-in module
   * @get_eee: Get Energy-Efficient (EEE) supported and status.
   * @set_eee: Set EEE status (enable/disable) as well as LPI timers.
+  * @get_tunable: Read the value of a driver / device tunable.
+  * @set_tunable: Set the value of a driver / device tunable.
   * @get_per_queue_coalesce: Get interrupt coalescing parameters per queue.
   *	It must check that the given queue number is valid. If neither a RX nor
   *	a TX queue has this number, return -EINVAL. If only a RX queue or a TX
@@@ -410,8 -412,6 +412,8 @@@
   * @get_ethtool_phy_stats: Return extended statistics about the PHY device.
   *	This is only useful if the device maintains PHY statistics and
   *	cannot use the standard PHY library helpers.
 + * @get_phy_tunable: Read the value of a PHY tunable.
 + * @set_phy_tunable: Set the value of a PHY tunable.
   *
   * All operations are optional (i.e. the function pointer may be set
   * to %NULL) and callers must take this into account.  Callers must
@@@ -549,8 -549,8 +551,8 @@@ struct phy_tdr_config
   * @get_sset_count: Get number of strings that @get_strings will write.
   * @get_strings: Return a set of strings that describe the requested objects
   * @get_stats: Return extended statistics about the PHY device.
-  * @start_cable_test - Start a cable test
-  * @start_cable_test_tdr - Start a Time Domain Reflectometry cable test
+  * @start_cable_test: Start a cable test
+  * @start_cable_test_tdr: Start a Time Domain Reflectometry cable test
   *
   * All operations are optional (i.e. the function pointer may be set to %NULL)
   * and callers must take this into account. Callers must hold the RTNL lock.
@@@ -573,13 -573,12 +575,22 @@@ struct ethtool_phy_ops 
   */
  void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops);
-/*
++/**
+  * ethtool_params_from_link_mode - Derive link parameters from a given link mode
+  * @link_ksettings: Link parameters to be derived from the link mode
+  * @link_mode: Link mode
+  */
+ void
+ ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings,
+ 			      enum ethtool_link_mode_bit_indices link_mode);
++
 +/**
 + * ethtool_sprintf - Write formatted string to ethtool string data
 + * @data: Pointer to start of string to update
 + * @fmt: Format of string to write
 + *
 + * Write formatted string to data. Update data to point at start of
 + * next string.
 + */
 +extern __printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...);
  #endif /* _LINUX_ETHTOOL_H */
diff --combined include/linux/mlx5/mlx5_ifc.h
index 432290b58a0b,9c68b2da14c6..1599deee0456
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@@ -437,11 -437,11 +437,11 @@@ struct mlx5_ifc_flow_table_prop_layout_
    u8         reserved_at_60[0x18];
    u8         log_max_ft_num[0x8];
- 	u8         reserved_at_80[0x18];
+ 	u8         reserved_at_80[0x10];
+ 	u8         log_max_flow_counter[0x8];
    u8         log_max_destination[0x8];
- 	u8         log_max_flow_counter[0x8];
- 	u8         reserved_at_a8[0x10];
+ 	u8         reserved_at_a0[0x18];
    u8         log_max_flow[0x8];
u8         reserved_at_c0[0x40];
@@@ -806,11 -806,9 +806,11 @@@ struct mlx5_ifc_e_switch_cap_bits 
    u8         vport_svlan_insert[0x1];
    u8         vport_cvlan_insert_if_not_exist[0x1];
    u8         vport_cvlan_insert_overwrite[0x1];
 -	u8         reserved_at_5[0x3];
 +	u8         reserved_at_5[0x2];
 +	u8         esw_shared_ingress_acl[0x1];
    u8         esw_uplink_ingress_acl[0x1];
 -	u8         reserved_at_9[0x10];
 +	u8         root_ft_on_other_esw[0x1];
 +	u8         reserved_at_a[0xf];
    u8         esw_functions_changed[0x1];
    u8         reserved_at_1a[0x1];
    u8         ecpf_vport_exists[0x1];
@@@ -1504,8 -1502,7 +1504,8 @@@ struct mlx5_ifc_cmd_hca_cap_bits 
    u8         reserved_at_270[0x6];
    u8         lag_dct[0x2];
    u8         lag_tx_port_affinity[0x1];
 -	u8         reserved_at_279[0x2];
 +	u8         lag_native_fdb_selection[0x1];
 +	u8         reserved_at_27a[0x1];
    u8         lag_master[0x1];
    u8         num_lag_ports[0x4];
@@@ -1683,16 -1680,7 +1683,16 @@@
    u8	   reserved_at_6e0[0x10];
    u8	   sf_base_id[0x10];
-	u8	   reserved_at_700[0x80];
 +	u8	   reserved_at_700[0x8];
 +	u8	   num_total_dynamic_vf_msix[0x18];
 +	u8	   reserved_at_720[0x14];
 +	u8	   dynamic_msix_table_size[0xc];
 +	u8	   reserved_at_740[0xc];
 +	u8	   min_dynamic_vf_msix_table_size[0x4];
 +	u8	   reserved_at_750[0x4];
 +	u8	   max_dynamic_vf_msix_table_size[0xc];
 +
 +	u8	   reserved_at_760[0x20];
    u8	   vhca_tunnel_commands[0x40];
    u8	   reserved_at_7c0[0x40];
  };
@@@ -8847,6 -8835,8 +8847,8 @@@ struct mlx5_ifc_pplm_reg_bits
u8         fec_override_admin_100g_2x[0x10];
    u8         fec_override_admin_50g_1x[0x10];
+ 
+ 	u8         reserved_at_140[0x140];
  };
struct mlx5_ifc_ppcnt_reg_bits {
@@@ -10048,19 -10038,14 +10050,19 @@@ struct mlx5_ifc_set_flow_table_root_in_
    u8         reserved_at_60[0x20];
u8         table_type[0x8];
 -	u8         reserved_at_88[0x18];
 +	u8         reserved_at_88[0x7];
 +	u8         table_of_other_vport[0x1];
 +	u8         table_vport_number[0x10];
u8         reserved_at_a0[0x8];
    u8         table_id[0x18];
u8         reserved_at_c0[0x8];
    u8         underlay_qpn[0x18];
 -	u8         reserved_at_e0[0x120];
 +	u8         table_eswitch_owner_vhca_id_valid[0x1];
 +	u8         reserved_at_e1[0xf];
 +	u8         table_eswitch_owner_vhca_id[0x10];
 +	u8         reserved_at_100[0x100];
  };
enum {
@@@ -10215,7 -10200,7 +10217,7 @@@ struct mlx5_ifc_pbmc_reg_bits
struct mlx5_ifc_bufferx_reg_bits buffer[10];
- 	u8         reserved_at_2e0[0x40];
+ 	u8         reserved_at_2e0[0x80];
  };
struct mlx5_ifc_qtct_reg_bits {
@@@ -10290,8 -10275,7 +10292,8 @@@ struct mlx5_ifc_dcbx_param_bits 
  };
struct mlx5_ifc_lagc_bits {
 -	u8         reserved_at_0[0x1d];
 +	u8         fdb_selection_mode[0x1];
 +	u8         reserved_at_1[0x1c];
    u8         lag_state[0x3];
u8         reserved_at_20[0x14];
diff --combined include/linux/skmsg.h
index f78e90a04a69,822c048934e3..e242bf3d2b4a
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@@ -56,8 -56,7 +56,8 @@@ struct sk_msg
struct sk_psock_progs {
    struct bpf_prog			*msg_parser;
 -	struct bpf_prog			*skb_parser;
 +	struct bpf_prog			*stream_parser;
 +	struct bpf_prog			*stream_verdict;
    struct bpf_prog			*skb_verdict;
  };
@@@ -71,6 -70,12 +71,6 @@@ struct sk_psock_link 
    void				*link_raw;
  };
-struct sk_psock_parser {
 -	struct strparser		strp;
 -	bool				enabled;
 -	void (*saved_data_ready)(struct sock *sk);
 -};
 -
  struct sk_psock_work_state {
    struct sk_buff			*skb;
    u32				len;
@@@ -85,12 -90,9 +85,12 @@@ struct sk_psock 
    u32				eval;
    struct sk_msg			*cork;
    struct sk_psock_progs		progs;
 -	struct sk_psock_parser		parser;
 +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
 +	struct strparser		strp;
 +#endif
    struct sk_buff_head		ingress_skb;
    struct list_head		ingress_msg;
 +	spinlock_t			ingress_lock;
    unsigned long			state;
    struct list_head		link;
    spinlock_t			link_lock;
@@@ -98,13 -100,13 +98,13 @@@
    void (*saved_unhash)(struct sock *sk);
    void (*saved_close)(struct sock *sk, long timeout);
    void (*saved_write_space)(struct sock *sk);
 +	void (*saved_data_ready)(struct sock *sk);
 +	int  (*psock_update_sk_prot)(struct sock *sk, bool restore);
    struct proto			*sk_proto;
 +	struct mutex			work_mutex;
    struct sk_psock_work_state	work_state;
    struct work_struct		work;
 -	union {
 -		struct rcu_head		rcu;
 -		struct work_struct	gc;
 -	};
 +	struct rcu_work			rwork;
  };
int sk_msg_alloc(struct sock *sk, struct sk_msg *msg, int len,
@@@ -125,10 -127,6 +125,10 @@@ int sk_msg_zerocopy_from_iter(struct so
    		      struct sk_msg *msg, u32 bytes);
  int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from,
    		     struct sk_msg *msg, u32 bytes);
 +int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
 +		     long timeo, int *err);
 +int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 +		   int len, int flags);
static inline void sk_msg_check_to_free(struct sk_msg *msg, u32 i, u32 bytes)
  {
@@@ -289,45 -287,7 +289,45 @@@ static inline struct sk_psock *sk_psock
  static inline void sk_psock_queue_msg(struct sk_psock *psock,
    			      struct sk_msg *msg)
  {
 +	spin_lock_bh(&psock->ingress_lock);
    list_add_tail(&msg->list, &psock->ingress_msg);
 +	spin_unlock_bh(&psock->ingress_lock);
 +}
 +
 +static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock)
 +{
 +	struct sk_msg *msg;
 +
 +	spin_lock_bh(&psock->ingress_lock);
 +	msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
 +	if (msg)
 +		list_del(&msg->list);
 +	spin_unlock_bh(&psock->ingress_lock);
 +	return msg;
 +}
 +
 +static inline struct sk_msg *sk_psock_peek_msg(struct sk_psock *psock)
 +{
 +	struct sk_msg *msg;
 +
 +	spin_lock_bh(&psock->ingress_lock);
 +	msg = list_first_entry_or_null(&psock->ingress_msg, struct sk_msg, list);
 +	spin_unlock_bh(&psock->ingress_lock);
 +	return msg;
 +}
 +
 +static inline struct sk_msg *sk_psock_next_msg(struct sk_psock *psock,
 +					       struct sk_msg *msg)
 +{
 +	struct sk_msg *ret;
 +
 +	spin_lock_bh(&psock->ingress_lock);
 +	if (list_is_last(&msg->list, &psock->ingress_msg))
 +		ret = NULL;
 +	else
 +		ret = list_next_entry(msg, list);
 +	spin_unlock_bh(&psock->ingress_lock);
 +	return ret;
  }
static inline bool sk_psock_queue_empty(const struct sk_psock *psock)
@@@ -335,13 -295,6 +335,13 @@@
    return psock ? list_empty(&psock->ingress_msg) : true;
  }
+static inline void kfree_sk_msg(struct sk_msg *msg)
 +{
 +	if (msg->skb)
 +		consume_skb(msg->skb);
 +	kfree(msg);
 +}
 +
  static inline void sk_psock_report_error(struct sk_psock *psock, int err)
  {
    struct sock *sk = psock->sk;
@@@ -351,27 -304,10 +351,27 @@@
  }
struct sk_psock *sk_psock_init(struct sock *sk, int node);
 +void sk_psock_stop(struct sk_psock *psock, bool wait);
+#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
  int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
  void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
  void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock);
 +#else
 +static inline int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
 +{
 +	return -EOPNOTSUPP;
 +}
 +
 +static inline void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
 +{
 +}
 +
 +static inline void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
 +{
 +}
 +#endif
 +
  void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock);
  void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock);
@@@ -391,6 -327,8 +391,6 @@@ static inline void sk_psock_free_link(s
struct sk_psock_link *sk_psock_link_pop(struct sk_psock *psock);
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock);
 -
  static inline void sk_psock_cork_free(struct sk_psock *psock)
  {
    if (psock->cork) {
@@@ -400,12 -338,30 +400,11 @@@
    }
  }
-static inline void sk_psock_update_proto(struct sock *sk,
 -					 struct sk_psock *psock,
 -					 struct proto *ops)
 -{
 -	/* Pairs with lockless read in sk_clone_lock() */
 -	WRITE_ONCE(sk->sk_prot, ops);
 -}
 -
  static inline void sk_psock_restore_proto(struct sock *sk,
    				  struct sk_psock *psock)
  {
- 	sk->sk_prot->unhash = psock->saved_unhash;
 -	if (inet_csk_has_ulp(sk)) {
 -		/* TLS does not have an unhash proto in SW cases, but we need
 -		 * to ensure we stop using the sock_map unhash routine because
 -		 * the associated psock is being removed. So use the original
 -		 * unhash handler.
 -		 */
 -		WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash);
 -		tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
 -	} else {
 -		sk->sk_write_space = psock->saved_write_space;
 -		/* Pairs with lockless read in sk_clone_lock() */
 -		WRITE_ONCE(sk->sk_prot, psock->sk_proto);
 -	}
 +	if (psock->psock_update_sk_prot)
 +		psock->psock_update_sk_prot(sk, true);
  }
static inline void sk_psock_set_state(struct sk_psock *psock,
@@@ -438,6 -394,7 +437,6 @@@ static inline struct sk_psock *sk_psock
    return psock;
  }
-void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
  void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)
@@@ -448,8 -405,8 +447,8 @@@
static inline void sk_psock_data_ready(struct sock *sk, struct sk_psock *psock)
  {
 -	if (psock->parser.enabled)
 -		psock->parser.saved_data_ready(sk);
 +	if (psock->saved_data_ready)
 +		psock->saved_data_ready(sk);
    else
    	sk->sk_data_ready(sk);
  }
@@@ -478,8 -435,7 +477,8 @@@ static inline int psock_replace_prog(st
  static inline void psock_progs_drop(struct sk_psock_progs *progs)
  {
    psock_set_prog(&progs->msg_parser, NULL);
 -	psock_set_prog(&progs->skb_parser, NULL);
 +	psock_set_prog(&progs->stream_parser, NULL);
 +	psock_set_prog(&progs->stream_verdict, NULL);
    psock_set_prog(&progs->skb_verdict, NULL);
  }
@@@ -489,44 -445,6 +488,44 @@@ static inline bool sk_psock_strp_enable
  {
    if (!psock)
    	return false;
 -	return psock->parser.enabled;
 +	return !!psock->saved_data_ready;
 +}
 +
 +#if IS_ENABLED(CONFIG_NET_SOCK_MSG)
 +
 +/* We only have one bit so far. */
 +#define BPF_F_PTR_MASK ~(BPF_F_INGRESS)
 +
 +static inline bool skb_bpf_ingress(const struct sk_buff *skb)
 +{
 +	unsigned long sk_redir = skb->_sk_redir;
 +
 +	return sk_redir & BPF_F_INGRESS;
 +}
 +
 +static inline void skb_bpf_set_ingress(struct sk_buff *skb)
 +{
 +	skb->_sk_redir |= BPF_F_INGRESS;
 +}
 +
 +static inline void skb_bpf_set_redir(struct sk_buff *skb, struct sock *sk_redir,
 +				     bool ingress)
 +{
 +	skb->_sk_redir = (unsigned long)sk_redir;
 +	if (ingress)
 +		skb->_sk_redir |= BPF_F_INGRESS;
 +}
 +
 +static inline struct sock *skb_bpf_redirect_fetch(const struct sk_buff *skb)
 +{
 +	unsigned long sk_redir = skb->_sk_redir;
 +
 +	return (struct sock *)(sk_redir & BPF_F_PTR_MASK);
 +}
 +
 +static inline void skb_bpf_redirect_clear(struct sk_buff *skb)
 +{
 +	skb->_sk_redir = 0;
  }
 +#endif /* CONFIG_NET_SOCK_MSG */
  #endif /* _LINUX_SKMSG_H */
diff --combined include/net/sock.h
index 8b4155e756c2,8487f58da36d..cadcc12cc316
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@@ -934,9 -934,13 +934,13 @@@ static inline void sk_acceptq_added(str
    WRITE_ONCE(sk->sk_ack_backlog, sk->sk_ack_backlog + 1);
  }
+ /* Note: If you think the test should be:
+  *	return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog);
+  * Then please take a look at commit 64a146513f8f ("[NET]: Revert incorrect accept queue backlog changes.")
+  */
  static inline bool sk_acceptq_is_full(const struct sock *sk)
  {
- 	return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog);
+ 	return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog);
  }
/*
@@@ -1184,9 -1188,6 +1188,9 @@@ struct proto 
    void			(*unhash)(struct sock *sk);
    void			(*rehash)(struct sock *sk);
    int			(*get_port)(struct sock *sk, unsigned short snum);
 +#ifdef CONFIG_BPF_SYSCALL
 +	int			(*psock_update_sk_prot)(struct sock *sk, bool restore);
 +#endif
/* Keeping track of sockets in use */
  #ifdef CONFIG_PROC_FS
@@@ -2224,6 -2225,15 +2228,15 @@@ static inline void skb_set_owner_r(stru
    sk_mem_charge(sk, skb->truesize);
  }
+ static inline void skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk)
+ {
+ 	if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
+ 		skb_orphan(skb);
+ 		skb->destructor = sock_efree;
+ 		skb->sk = sk;
+ 	}
+ }
+ 
  void sk_reset_timer(struct sock *sk, struct timer_list *timer,
    	    unsigned long expires);
diff --combined include/uapi/linux/ethtool.h
index 868b513d4f54,5afea692a3f7..f91e079e3108
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@@ -26,6 -26,14 +26,14 @@@
   * have the same layout for 32-bit and 64-bit userland.
   */
+ /* Note on reserved space.
+  * Reserved fields must not be accessed directly by user space because
+  * they may be replaced by a different field in the future. They must
+  * be initialized to zero before making the request, e.g. via memset
+  * of the entire structure or implicitly by not being set in a structure
+  * initializer.
+  */
+ 
  /**
   * struct ethtool_cmd - DEPRECATED, link control and status
   * This structure is DEPRECATED, please use struct ethtool_link_settings.
@@@ -67,6 -75,7 +75,7 @@@
   *	and other link features that the link partner advertised
   *	through autonegotiation; 0 if unknown or not applicable.
   *	Read-only.
+  * @reserved: Reserved for future use; see the note on reserved space.
   *
   * The link speed in Mbps is split between @speed and @speed_hi.  Use
   * the ethtool_cmd_speed() and ethtool_cmd_speed_set() functions to
@@@ -155,6 -164,7 +164,7 @@@ static inline __u32 ethtool_cmd_speed(c
   * @bus_info: Device bus address.  This should match the dev_name()
   *	string for the underlying bus device, if there is one.  May be
   *	an empty string.
+  * @reserved2: Reserved for future use; see the note on reserved space.
   * @n_priv_flags: Number of flags valid for %ETHTOOL_GPFLAGS and
   *	%ETHTOOL_SPFLAGS commands; also the number of strings in the
   *	%ETH_SS_PRIV_FLAGS set
@@@ -356,6 -366,7 +366,7 @@@ struct ethtool_eeprom 
   * @tx_lpi_timer: Time in microseconds the interface delays prior to asserting
   *	its tx lpi (after reaching 'idle' state). Effective only when eee
   *	was negotiated and tx_lpi_enabled was set.
+  * @reserved: Reserved for future use; see the note on reserved space.
   */
  struct ethtool_eee {
    __u32	cmd;
@@@ -374,6 -385,7 +385,7 @@@
   * @cmd: %ETHTOOL_GMODULEINFO
   * @type: Standard the module information conforms to %ETH_MODULE_SFF_xxxx
   * @eeprom_len: Length of the eeprom
+  * @reserved: Reserved for future use; see the note on reserved space.
   *
   * This structure is used to return the information to
   * properly size memory for a subsequent call to %ETHTOOL_GMODULEEEPROM.
@@@ -579,9 -591,7 +591,7 @@@ struct ethtool_pauseparam 
    __u32	tx_pause;
  };
- /**
-  * enum ethtool_link_ext_state - link extended state
-  */
+ /* Link extended state */
  enum ethtool_link_ext_state {
    ETHTOOL_LINK_EXT_STATE_AUTONEG,
    ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE,
@@@ -595,10 -605,7 +605,7 @@@
    ETHTOOL_LINK_EXT_STATE_OVERHEAT,
  };
- /**
-  * enum ethtool_link_ext_substate_autoneg - more information in addition to
-  * ETHTOOL_LINK_EXT_STATE_AUTONEG.
-  */
+ /* More information in addition to ETHTOOL_LINK_EXT_STATE_AUTONEG. */
  enum ethtool_link_ext_substate_autoneg {
    ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_PARTNER_DETECTED = 1,
    ETHTOOL_LINK_EXT_SUBSTATE_AN_ACK_NOT_RECEIVED,
@@@ -608,9 -615,7 +615,7 @@@
    ETHTOOL_LINK_EXT_SUBSTATE_AN_NO_HCD,
  };
- /**
-  * enum ethtool_link_ext_substate_link_training - more information in addition to
-  * ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
+ /* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_TRAINING_FAILURE.
   */
  enum ethtool_link_ext_substate_link_training {
    ETHTOOL_LINK_EXT_SUBSTATE_LT_KR_FRAME_LOCK_NOT_ACQUIRED = 1,
@@@ -619,9 -624,7 +624,7 @@@
    ETHTOOL_LINK_EXT_SUBSTATE_LT_REMOTE_FAULT,
  };
- /**
-  * enum ethtool_link_ext_substate_logical_mismatch - more information in addition
-  * to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
+ /* More information in addition to ETHTOOL_LINK_EXT_STATE_LINK_LOGICAL_MISMATCH.
   */
  enum ethtool_link_ext_substate_link_logical_mismatch {
    ETHTOOL_LINK_EXT_SUBSTATE_LLM_PCS_DID_NOT_ACQUIRE_BLOCK_LOCK = 1,
@@@ -631,19 -634,14 +634,14 @@@
    ETHTOOL_LINK_EXT_SUBSTATE_LLM_RS_FEC_IS_NOT_LOCKED,
  };
- /**
-  * enum ethtool_link_ext_substate_bad_signal_integrity - more information in
-  * addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
+ /* More information in addition to ETHTOOL_LINK_EXT_STATE_BAD_SIGNAL_INTEGRITY.
   */
  enum ethtool_link_ext_substate_bad_signal_integrity {
    ETHTOOL_LINK_EXT_SUBSTATE_BSI_LARGE_NUMBER_OF_PHYSICAL_ERRORS = 1,
    ETHTOOL_LINK_EXT_SUBSTATE_BSI_UNSUPPORTED_RATE,
  };
- /**
-  * enum ethtool_link_ext_substate_cable_issue - more information in
-  * addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE.
-  */
+ /* More information in addition to ETHTOOL_LINK_EXT_STATE_CABLE_ISSUE. */
  enum ethtool_link_ext_substate_cable_issue {
    ETHTOOL_LINK_EXT_SUBSTATE_CI_UNSUPPORTED_CABLE = 1,
    ETHTOOL_LINK_EXT_SUBSTATE_CI_CABLE_TEST_FAILURE,
@@@ -661,6 -659,7 +659,7 @@@
   *	now deprecated
   * @ETH_SS_FEATURES: Device feature names
   * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
+  * @ETH_SS_TUNABLES: tunable names
   * @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
   * @ETH_SS_PHY_TUNABLES: PHY tunable names
   * @ETH_SS_LINK_MODES: link mode names
@@@ -670,6 -669,8 +669,8 @@@
   * @ETH_SS_TS_TX_TYPES: timestamping Tx types
   * @ETH_SS_TS_RX_FILTERS: timestamping Rx filters
   * @ETH_SS_UDP_TUNNEL_TYPES: UDP tunnel types
+  *
+  * @ETH_SS_COUNT: number of defined string sets
   */
  enum ethtool_stringset {
    ETH_SS_TEST		= 0,
@@@ -715,6 -716,7 +716,7 @@@ struct ethtool_gstrings 
  /**
   * struct ethtool_sset_info - string set information
   * @cmd: Command number = %ETHTOOL_GSSET_INFO
+  * @reserved: Reserved for future use; see the note on reserved space.
   * @sset_mask: On entry, a bitmask of string sets to query, with bits
   *	numbered according to &enum ethtool_stringset.  On return, a
   *	bitmask of those string sets queried that are supported.
@@@ -759,6 -761,7 +761,7 @@@ enum ethtool_test_flags 
   * @flags: A bitmask of flags from &enum ethtool_test_flags.  Some
   *	flags may be set by the user on entry; others may be set by
   *	the driver on return.
+  * @reserved: Reserved for future use; see the note on reserved space.
   * @len: On return, the number of test results
   * @data: Array of test results
   *
@@@ -959,6 -962,7 +962,7 @@@ union ethtool_flow_union 
   * @vlan_etype: VLAN EtherType
   * @vlan_tci: VLAN tag control information
   * @data: user defined data
+  * @padding: Reserved for future use; see the note on reserved space.
   *
   * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT
   * is set in &struct ethtool_rx_flow_spec @flow_type.
@@@ -1134,7 -1138,8 +1138,8 @@@ struct ethtool_rxfh_indir 
   *	hardware hash key.
   * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
   *	Valid values are one of the %ETH_RSS_HASH_*.
-  * @rsvd:	Reserved for future extensions.
+  * @rsvd8: Reserved for future use; see the note on reserved space.
+  * @rsvd32: Reserved for future use; see the note on reserved space.
   * @rss_config: RX ring/queue index for each hash value i.e., indirection table
   *	of @indir_size __u32 elements, followed by hash key of @key_size
   *	bytes.
@@@ -1302,7 -1307,9 +1307,9 @@@ struct ethtool_sfeatures 
   * @so_timestamping: bit mask of the sum of the supported SO_TIMESTAMPING flags
   * @phc_index: device index of the associated PHC, or -1 if there is none
   * @tx_types: bit mask of the supported hwtstamp_tx_types enumeration values
+  * @tx_reserved: Reserved for future use; see the note on reserved space.
   * @rx_filters: bit mask of the supported hwtstamp_rx_filters enumeration values
+  * @rx_reserved: Reserved for future use; see the note on reserved space.
   *
   * The bits in the 'tx_types' and 'rx_filters' fields correspond to
   * the 'hwtstamp_tx_types' and 'hwtstamp_rx_filters' enumeration values,
@@@ -1376,33 -1383,15 +1383,33 @@@ struct ethtool_per_queue_op 
  };
/**
 - * struct ethtool_fecparam - Ethernet forward error correction(fec) parameters
 + * struct ethtool_fecparam - Ethernet Forward Error Correction parameters
   * @cmd: Command number = %ETHTOOL_GFECPARAM or %ETHTOOL_SFECPARAM
 - * @active_fec: FEC mode which is active on porte
 - * @fec: Bitmask of supported/configured FEC modes
 - * @rsvd: Reserved for future extensions. i.e FEC bypass feature.
 + * @active_fec: FEC mode which is active on the port, single bit set, GET only.
 + * @fec: Bitmask of configured FEC modes.
 + * @reserved: Reserved for future extensions, ignore on GET, write 0 for SET.
   *
 - * Drivers should reject a non-zero setting of @autoneg when
 - * autoneogotiation is disabled (or not supported) for the link.
 + * Note that @reserved was never validated on input and ethtool user space
 + * left it uninitialized when calling SET. Hence going forward it can only be
 + * used to return a value to userspace with GET.
 + *
 + * FEC modes supported by the device can be read via %ETHTOOL_GLINKSETTINGS.
 + * FEC settings are configured by link autonegotiation whenever it's enabled.
 + * With autoneg on %ETHTOOL_GFECPARAM can be used to read the current mode.
 + *
 + * When autoneg is disabled %ETHTOOL_SFECPARAM controls the FEC settings.
 + * It is recommended that drivers only accept a single bit set in @fec.
 + * When multiple bits are set in @fec drivers may pick mode in an implementation
 + * dependent way. Drivers should reject mixing %ETHTOOL_FEC_AUTO_BIT with other
 + * FEC modes, because it's unclear whether in this case other modes constrain
 + * AUTO or are independent choices.
 + * Drivers must reject SET requests if they support none of the requested modes.
 + *
 + * If device does not support FEC drivers may use %ETHTOOL_FEC_NONE instead
 + * of returning %EOPNOTSUPP from %ETHTOOL_GFECPARAM.
   *
 + * See enum ethtool_fec_config_bits for definition of valid bits for both
 + * @fec and @active_fec.
   */
  struct ethtool_fecparam {
    __u32   cmd;
@@@ -1414,16 -1403,11 +1421,16 @@@
/**
   * enum ethtool_fec_config_bits - flags definition of ethtool_fec_configuration
 - * @ETHTOOL_FEC_NONE: FEC mode configuration is not supported
 - * @ETHTOOL_FEC_AUTO: Default/Best FEC mode provided by driver
 - * @ETHTOOL_FEC_OFF: No FEC Mode
 - * @ETHTOOL_FEC_RS: Reed-Solomon Forward Error Detection mode
 - * @ETHTOOL_FEC_BASER: Base-R/Reed-Solomon Forward Error Detection mode
 + * @ETHTOOL_FEC_NONE_BIT: FEC mode configuration is not supported. Should not
 + *			be used together with other bits. GET only.
 + * @ETHTOOL_FEC_AUTO_BIT: Select default/best FEC mode automatically, usually
 + *			based link mode and SFP parameters read from module's
 + *			EEPROM. This bit does _not_ mean autonegotiation.
 + * @ETHTOOL_FEC_OFF_BIT: No FEC Mode
 + * @ETHTOOL_FEC_RS_BIT: Reed-Solomon FEC Mode
 + * @ETHTOOL_FEC_BASER_BIT: Base-R/Reed-Solomon FEC Mode
 + * @ETHTOOL_FEC_LLRS_BIT: Low Latency Reed Solomon FEC Mode (25G/50G Ethernet
 + *			Consortium)
   */
  enum ethtool_fec_config_bits {
    ETHTOOL_FEC_NONE_BIT,
@@@ -1981,6 -1965,11 +1988,11 @@@ enum ethtool_reset_flags 
   *	autonegotiation; 0 if unknown or not applicable.  Read-only.
   * @transceiver: Used to distinguish different possible PHY types,
   *	reported consistently by PHYLIB.  Read-only.
+  * @master_slave_cfg: Master/slave port mode.
+  * @master_slave_state: Master/slave port state.
+  * @reserved: Reserved for future use; see the note on reserved space.
+  * @reserved1: Reserved for future use; see the note on reserved space.
+  * @link_mode_masks: Variable length bitmaps.
   *
   * If autonegotiation is disabled, the speed and @duplex represent the
   * fixed link mode and are writable if the driver supports multiple
diff --combined kernel/bpf/disasm.c
index dad821c8ecd0,faa54d58972c..bbfc6bb79240
--- a/kernel/bpf/disasm.c
+++ b/kernel/bpf/disasm.c
@@@ -19,23 -19,16 +19,23 @@@ static const char *__func_get_name(cons
  {
    BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
-	if (insn->src_reg != BPF_PSEUDO_CALL &&
 +	if (!insn->src_reg &&
        insn->imm >= 0 && insn->imm < __BPF_FUNC_MAX_ID &&
        func_id_str[insn->imm])
    	return func_id_str[insn->imm];
-	if (cbs && cbs->cb_call)
 -		return cbs->cb_call(cbs->private_data, insn);
 +	if (cbs && cbs->cb_call) {
 +		const char *res;
 +
 +		res = cbs->cb_call(cbs->private_data, insn);
 +		if (res)
 +			return res;
 +	}
if (insn->src_reg == BPF_PSEUDO_CALL)
    	snprintf(buff, len, "%+d", insn->imm);
 +	else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
 +		snprintf(buff, len, "kernel-function");
return buff;
  }
@@@ -91,7 -84,7 +91,7 @@@ static const char *const bpf_atomic_alu
    [BPF_ADD >> 4]  = "add",
    [BPF_AND >> 4]  = "and",
    [BPF_OR >> 4]  = "or",
- 	[BPF_XOR >> 4]  = "or",
+ 	[BPF_XOR >> 4]  = "xor",
  };
static const char *const bpf_ldst_string[] = {
diff --combined kernel/bpf/verifier.c
index 852541a435ef,3a738724a380..f63b27574b3a
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -234,18 -234,6 +234,18 @@@ static bool bpf_pseudo_call(const struc
           insn->src_reg == BPF_PSEUDO_CALL;
  }
+static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
 +{
 +	return insn->code == (BPF_JMP | BPF_CALL) &&
 +	       insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
 +}
 +
 +static bool bpf_pseudo_func(const struct bpf_insn *insn)
 +{
 +	return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
 +	       insn->src_reg == BPF_PSEUDO_FUNC;
 +}
 +
  struct bpf_call_arg_meta {
    struct bpf_map *map_ptr;
    bool raw_mode;
@@@ -260,7 -248,6 +260,7 @@@
    u32 btf_id;
    struct btf *ret_btf;
    u32 ret_btf_id;
 +	u32 subprogno;
  };
struct btf *btf_vmlinux;
@@@ -403,24 -390,6 +403,24 @@@ __printf(3, 4) static void verbose_linf
    env->prev_linfo = linfo;
  }
+static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 +				   struct bpf_reg_state *reg,
 +				   struct tnum *range, const char *ctx,
 +				   const char *reg_name)
 +{
 +	char tn_buf[48];
 +
 +	verbose(env, "At %s the register %s ", ctx, reg_name);
 +	if (!tnum_is_unknown(reg->var_off)) {
 +		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 +		verbose(env, "has value %s", tn_buf);
 +	} else {
 +		verbose(env, "has unknown scalar value");
 +	}
 +	tnum_strn(tn_buf, sizeof(tn_buf), *range);
 +	verbose(env, " should have been in %s\n", tn_buf);
 +}
 +
  static bool type_is_pkt_pointer(enum bpf_reg_type type)
  {
    return type == PTR_TO_PACKET ||
@@@ -440,7 -409,6 +440,7 @@@ static bool reg_type_not_null(enum bpf_
    return type == PTR_TO_SOCKET ||
    	type == PTR_TO_TCP_SOCK ||
    	type == PTR_TO_MAP_VALUE ||
 +		type == PTR_TO_MAP_KEY ||
    	type == PTR_TO_SOCK_COMMON;
  }
@@@ -483,8 -451,7 +483,8 @@@ static bool arg_type_may_be_null(enum b
           type == ARG_PTR_TO_MEM_OR_NULL ||
           type == ARG_PTR_TO_CTX_OR_NULL ||
           type == ARG_PTR_TO_SOCKET_OR_NULL ||
 -	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
 +	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
 +	       type == ARG_PTR_TO_STACK_OR_NULL;
  }
/* Determine whether the function releases some resources allocated by another
@@@ -574,8 -541,6 +574,8 @@@ static const char * const reg_type_str[
    [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
    [PTR_TO_RDWR_BUF]	= "rdwr_buf",
    [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
 +	[PTR_TO_FUNC]		= "func",
 +	[PTR_TO_MAP_KEY]	= "map_key",
  };
static char slot_type_char[] = {
@@@ -647,7 -612,6 +647,7 @@@ static void print_verifier_state(struc
    		if (type_is_pkt_pointer(t))
    			verbose(env, ",r=%d", reg->range);
    		else if (t == CONST_PTR_TO_MAP ||
 +				 t == PTR_TO_MAP_KEY ||
    			 t == PTR_TO_MAP_VALUE ||
    			 t == PTR_TO_MAP_VALUE_OR_NULL)
    			verbose(env, ",ks=%d,vs=%d",
@@@ -1555,210 -1519,39 +1555,210 @@@ static int add_subprog(struct bpf_verif
    }
    ret = find_subprog(env, off);
    if (ret >= 0)
 -		return 0;
 +		return ret;
    if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
    	verbose(env, "too many subprograms\n");
    	return -E2BIG;
    }
 +	/* determine subprog starts. The end is one before the next starts */
    env->subprog_info[env->subprog_cnt++].start = off;
    sort(env->subprog_info, env->subprog_cnt,
         sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
 +	return env->subprog_cnt - 1;
 +}
 +
 +struct bpf_kfunc_desc {
 +	struct btf_func_model func_model;
 +	u32 func_id;
 +	s32 imm;
 +};
 +
 +#define MAX_KFUNC_DESCS 256
 +struct bpf_kfunc_desc_tab {
 +	struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
 +	u32 nr_descs;
 +};
 +
 +static int kfunc_desc_cmp_by_id(const void *a, const void *b)
 +{
 +	const struct bpf_kfunc_desc *d0 = a;
 +	const struct bpf_kfunc_desc *d1 = b;
 +
 +	/* func_id is not greater than BTF_MAX_TYPE */
 +	return d0->func_id - d1->func_id;
 +}
 +
 +static const struct bpf_kfunc_desc *
 +find_kfunc_desc(const struct bpf_prog *prog, u32 func_id)
 +{
 +	struct bpf_kfunc_desc desc = {
 +		.func_id = func_id,
 +	};
 +	struct bpf_kfunc_desc_tab *tab;
 +
 +	tab = prog->aux->kfunc_tab;
 +	return bsearch(&desc, tab->descs, tab->nr_descs,
 +		       sizeof(tab->descs[0]), kfunc_desc_cmp_by_id);
 +}
 +
 +static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id)
 +{
 +	const struct btf_type *func, *func_proto;
 +	struct bpf_kfunc_desc_tab *tab;
 +	struct bpf_prog_aux *prog_aux;
 +	struct bpf_kfunc_desc *desc;
 +	const char *func_name;
 +	unsigned long addr;
 +	int err;
 +
 +	prog_aux = env->prog->aux;
 +	tab = prog_aux->kfunc_tab;
 +	if (!tab) {
 +		if (!btf_vmlinux) {
 +			verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
 +			return -ENOTSUPP;
 +		}
 +
 +		if (!env->prog->jit_requested) {
 +			verbose(env, "JIT is required for calling kernel function\n");
 +			return -ENOTSUPP;
 +		}
 +
 +		if (!bpf_jit_supports_kfunc_call()) {
 +			verbose(env, "JIT does not support calling kernel function\n");
 +			return -ENOTSUPP;
 +		}
 +
 +		if (!env->prog->gpl_compatible) {
 +			verbose(env, "cannot call kernel function from non-GPL compatible program\n");
 +			return -EINVAL;
 +		}
 +
 +		tab = kzalloc(sizeof(*tab), GFP_KERNEL);
 +		if (!tab)
 +			return -ENOMEM;
 +		prog_aux->kfunc_tab = tab;
 +	}
 +
 +	if (find_kfunc_desc(env->prog, func_id))
 +		return 0;
 +
 +	if (tab->nr_descs == MAX_KFUNC_DESCS) {
 +		verbose(env, "too many different kernel function calls\n");
 +		return -E2BIG;
 +	}
 +
 +	func = btf_type_by_id(btf_vmlinux, func_id);
 +	if (!func || !btf_type_is_func(func)) {
 +		verbose(env, "kernel btf_id %u is not a function\n",
 +			func_id);
 +		return -EINVAL;
 +	}
 +	func_proto = btf_type_by_id(btf_vmlinux, func->type);
 +	if (!func_proto || !btf_type_is_func_proto(func_proto)) {
 +		verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
 +			func_id);
 +		return -EINVAL;
 +	}
 +
 +	func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
 +	addr = kallsyms_lookup_name(func_name);
 +	if (!addr) {
 +		verbose(env, "cannot find address for kernel function %s\n",
 +			func_name);
 +		return -EINVAL;
 +	}
 +
 +	desc = &tab->descs[tab->nr_descs++];
 +	desc->func_id = func_id;
 +	desc->imm = BPF_CAST_CALL(addr) - __bpf_call_base;
 +	err = btf_distill_func_proto(&env->log, btf_vmlinux,
 +				     func_proto, func_name,
 +				     &desc->func_model);
 +	if (!err)
 +		sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
 +		     kfunc_desc_cmp_by_id, NULL);
 +	return err;
 +}
 +
 +static int kfunc_desc_cmp_by_imm(const void *a, const void *b)
 +{
 +	const struct bpf_kfunc_desc *d0 = a;
 +	const struct bpf_kfunc_desc *d1 = b;
 +
 +	if (d0->imm > d1->imm)
 +		return 1;
 +	else if (d0->imm < d1->imm)
 +		return -1;
    return 0;
  }
-static int check_subprogs(struct bpf_verifier_env *env)
 +static void sort_kfunc_descs_by_imm(struct bpf_prog *prog)
 +{
 +	struct bpf_kfunc_desc_tab *tab;
 +
 +	tab = prog->aux->kfunc_tab;
 +	if (!tab)
 +		return;
 +
 +	sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
 +	     kfunc_desc_cmp_by_imm, NULL);
 +}
 +
 +bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
 +{
 +	return !!prog->aux->kfunc_tab;
 +}
 +
 +const struct btf_func_model *
 +bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
 +			 const struct bpf_insn *insn)
 +{
 +	const struct bpf_kfunc_desc desc = {
 +		.imm = insn->imm,
 +	};
 +	const struct bpf_kfunc_desc *res;
 +	struct bpf_kfunc_desc_tab *tab;
 +
 +	tab = prog->aux->kfunc_tab;
 +	res = bsearch(&desc, tab->descs, tab->nr_descs,
 +		      sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm);
 +
 +	return res ? &res->func_model : NULL;
 +}
 +
 +static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
  {
 -	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
    struct bpf_subprog_info *subprog = env->subprog_info;
    struct bpf_insn *insn = env->prog->insnsi;
 -	int insn_cnt = env->prog->len;
 +	int i, ret, insn_cnt = env->prog->len;
/* Add entry function. */
    ret = add_subprog(env, 0);
 -	if (ret < 0)
 +	if (ret)
    	return ret;
-	/* determine subprog starts. The end is one before the next starts */
 -	for (i = 0; i < insn_cnt; i++) {
 -		if (!bpf_pseudo_call(insn + i))
 +	for (i = 0; i < insn_cnt; i++, insn++) {
 +		if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
 +		    !bpf_pseudo_kfunc_call(insn))
    		continue;
 +
    	if (!env->bpf_capable) {
 -			verbose(env,
 -				"function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
 +			verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
    		return -EPERM;
    	}
 -		ret = add_subprog(env, i + insn[i].imm + 1);
 +
 +		if (bpf_pseudo_func(insn)) {
 +			ret = add_subprog(env, i + insn->imm + 1);
 +			if (ret >= 0)
 +				/* remember subprog */
 +				insn[1].imm = ret;
 +		} else if (bpf_pseudo_call(insn)) {
 +			ret = add_subprog(env, i + insn->imm + 1);
 +		} else {
 +			ret = add_kfunc_call(env, insn->imm);
 +		}
 +
    	if (ret < 0)
    		return ret;
    }
@@@ -1772,16 -1565,6 +1772,16 @@@
    	for (i = 0; i < env->subprog_cnt; i++)
    		verbose(env, "func#%d @%d\n", i, subprog[i].start);
+	return 0;
 +}
 +
 +static int check_subprogs(struct bpf_verifier_env *env)
 +{
 +	int i, subprog_start, subprog_end, off, cur_subprog = 0;
 +	struct bpf_subprog_info *subprog = env->subprog_info;
 +	struct bpf_insn *insn = env->prog->insnsi;
 +	int insn_cnt = env->prog->len;
 +
    /* now check that all jumps are within the same subprog */
    subprog_start = subprog[cur_subprog].start;
    subprog_end = subprog[cur_subprog + 1].start;
@@@ -2090,17 -1873,6 +2090,17 @@@ static int get_prev_insn_idx(struct bpf
    return i;
  }
+static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
 +{
 +	const struct btf_type *func;
 +
 +	if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
 +		return NULL;
 +
 +	func = btf_type_by_id(btf_vmlinux, insn->imm);
 +	return btf_name_by_offset(btf_vmlinux, func->name_off);
 +}
 +
  /* For given verifier state backtrack_insn() is called from the last insn to
   * the first insn. Its purpose is to compute a bitmask of registers and
   * stack slots that needs precision in the parent verifier state.
@@@ -2109,7 -1881,6 +2109,7 @@@ static int backtrack_insn(struct bpf_ve
    		  u32 *reg_mask, u64 *stack_mask)
  {
    const struct bpf_insn_cbs cbs = {
 +		.cb_call	= disasm_kfunc_name,
    	.cb_print	= verbose,
    	.private_data	= env,
    };
@@@ -2524,8 -2295,6 +2524,8 @@@ static bool is_spillable_regtype(enum b
    case PTR_TO_PERCPU_BTF_ID:
    case PTR_TO_MEM:
    case PTR_TO_MEM_OR_NULL:
 +	case PTR_TO_FUNC:
 +	case PTR_TO_MAP_KEY:
    	return true;
    default:
    	return false;
@@@ -3130,10 -2899,6 +3130,10 @@@ static int __check_mem_access(struct bp
reg = &cur_regs(env)[regno];
    switch (reg->type) {
 +	case PTR_TO_MAP_KEY:
 +		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
 +			mem_size, off, size);
 +		break;
    case PTR_TO_MAP_VALUE:
    	verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
    		mem_size, off, size);
@@@ -3539,9 -3304,6 +3539,9 @@@ static int check_ptr_alignment(struct b
    case PTR_TO_FLOW_KEYS:
    	pointer_desc = "flow keys ";
    	break;
 +	case PTR_TO_MAP_KEY:
 +		pointer_desc = "key ";
 +		break;
    case PTR_TO_MAP_VALUE:
    	pointer_desc = "value ";
    	break;
@@@ -3643,7 -3405,7 +3643,7 @@@ process_func
  continue_func:
    subprog_end = subprog[idx + 1].start;
    for (; i < subprog_end; i++) {
 -		if (!bpf_pseudo_call(insn + i))
 +		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
    		continue;
    	/* remember insn and function to return to */
    	ret_insn[frame] = i + 1;
@@@ -4080,19 -3842,7 +4080,19 @@@ static int check_mem_access(struct bpf_
    /* for access checks, reg->off is just part of off */
    off += reg->off;
-	if (reg->type == PTR_TO_MAP_VALUE) {
 +	if (reg->type == PTR_TO_MAP_KEY) {
 +		if (t == BPF_WRITE) {
 +			verbose(env, "write to change key R%d not allowed\n", regno);
 +			return -EACCES;
 +		}
 +
 +		err = check_mem_region_access(env, regno, off, size,
 +					      reg->map_ptr->key_size, false);
 +		if (err)
 +			return err;
 +		if (value_regno >= 0)
 +			mark_reg_unknown(env, regs, value_regno);
 +	} else if (reg->type == PTR_TO_MAP_VALUE) {
    	if (t == BPF_WRITE && value_regno >= 0 &&
    	    is_pointer_value(env, value_regno)) {
    		verbose(env, "R%d leaks addr into map\n", value_regno);
@@@ -4508,9 -4258,6 +4508,9 @@@ static int check_helper_mem_access(stru
    case PTR_TO_PACKET_META:
    	return check_packet_access(env, regno, reg->off, access_size,
    				   zero_size_allowed);
 +	case PTR_TO_MAP_KEY:
 +		return check_mem_region_access(env, regno, reg->off, access_size,
 +					       reg->map_ptr->key_size, false);
    case PTR_TO_MAP_VALUE:
    	if (check_map_access_type(env, regno, reg->off, access_size,
    				  meta && meta->raw_mode ? BPF_WRITE :
@@@ -4727,7 -4474,6 +4727,7 @@@ static const struct bpf_reg_types map_k
    	PTR_TO_STACK,
    	PTR_TO_PACKET,
    	PTR_TO_PACKET_META,
 +		PTR_TO_MAP_KEY,
    	PTR_TO_MAP_VALUE,
    },
  };
@@@ -4759,7 -4505,6 +4759,7 @@@ static const struct bpf_reg_types mem_t
    	PTR_TO_STACK,
    	PTR_TO_PACKET,
    	PTR_TO_PACKET_META,
 +		PTR_TO_MAP_KEY,
    	PTR_TO_MAP_VALUE,
    	PTR_TO_MEM,
    	PTR_TO_RDONLY_BUF,
@@@ -4772,7 -4517,6 +4772,7 @@@ static const struct bpf_reg_types int_p
    	PTR_TO_STACK,
    	PTR_TO_PACKET,
    	PTR_TO_PACKET_META,
 +		PTR_TO_MAP_KEY,
    	PTR_TO_MAP_VALUE,
    },
  };
@@@ -4785,8 -4529,6 +4785,8 @@@ static const struct bpf_reg_types const
  static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
  static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
  static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
 +static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
 +static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
    [ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@@ -4815,8 -4557,6 +4815,8 @@@
    [ARG_PTR_TO_INT]		= &int_ptr_types,
    [ARG_PTR_TO_LONG]		= &int_ptr_types,
    [ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
 +	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
 +	[ARG_PTR_TO_STACK_OR_NULL]	= &stack_ptr_types,
  };
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@@ -4998,8 -4738,6 +4998,8 @@@ skip_type_check
    		verbose(env, "verifier internal error\n");
    		return -EFAULT;
    	}
 +	} else if (arg_type == ARG_PTR_TO_FUNC) {
 +		meta->subprogno = reg->subprogno;
    } else if (arg_type_is_mem_ptr(arg_type)) {
    	/* The access to this pointer is only checked when we hit the
    	 * next is_mem_size argument below.
@@@ -5520,19 -5258,13 +5520,19 @@@ static void clear_caller_saved_regs(str
    }
  }
-static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 -			   int *insn_idx)
 +typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
 +				   struct bpf_func_state *caller,
 +				   struct bpf_func_state *callee,
 +				   int insn_idx);
 +
 +static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 +			     int *insn_idx, int subprog,
 +			     set_callee_state_fn set_callee_state_cb)
  {
    struct bpf_verifier_state *state = env->cur_state;
    struct bpf_func_info_aux *func_info_aux;
    struct bpf_func_state *caller, *callee;
 -	int i, err, subprog, target_insn;
 +	int err;
    bool is_global = false;
if (state->curframe + 1 >= MAX_CALL_FRAMES) {
@@@ -5541,6 -5273,14 +5541,6 @@@
    	return -E2BIG;
    }
-	target_insn = *insn_idx + insn->imm;
 -	subprog = find_subprog(env, target_insn + 1);
 -	if (subprog < 0) {
 -		verbose(env, "verifier bug. No program starts at insn %d\n",
 -			target_insn + 1);
 -		return -EFAULT;
 -	}
 -
    caller = state->frame[state->curframe];
    if (state->frame[state->curframe + 1]) {
    	verbose(env, "verifier bug. Frame %d already allocated\n",
@@@ -5551,7 -5291,7 +5551,7 @@@
    func_info_aux = env->prog->aux->func_info_aux;
    if (func_info_aux)
    	is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
 -	err = btf_check_func_arg_match(env, subprog, caller->regs);
 +	err = btf_check_subprog_arg_match(env, subprog, caller->regs);
    if (err == -EFAULT)
    	return err;
    if (is_global) {
@@@ -5595,9 -5335,11 +5595,9 @@@
    if (err)
    	return err;
-	/* copy r1 - r5 args that callee can access.  The copy includes parent
 -	 * pointers, which connects us up to the liveness chain
 -	 */
 -	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
 -		callee->regs[i] = caller->regs[i];
 +	err = set_callee_state_cb(env, caller, callee, *insn_idx);
 +	if (err)
 +		return err;
clear_caller_saved_regs(env, caller->regs);
@@@ -5605,7 -5347,7 +5605,7 @@@
    state->curframe++;
/* and go analyze first insn of the callee */
 -	*insn_idx = target_insn;
 +	*insn_idx = env->subprog_info[subprog].start - 1;
if (env->log.level & BPF_LOG_LEVEL) {
    	verbose(env, "caller:\n");
@@@ -5616,92 -5358,6 +5616,92 @@@
    return 0;
  }
+int map_set_for_each_callback_args(struct bpf_verifier_env *env,
 +				   struct bpf_func_state *caller,
 +				   struct bpf_func_state *callee)
 +{
 +	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
 +	 *      void *callback_ctx, u64 flags);
 +	 * callback_fn(struct bpf_map *map, void *key, void *value,
 +	 *      void *callback_ctx);
 +	 */
 +	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
 +
 +	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
 +	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
 +	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
 +
 +	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
 +	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
 +	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
 +
 +	/* pointer to stack or null */
 +	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
 +
 +	/* unused */
 +	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
 +	return 0;
 +}
 +
 +static int set_callee_state(struct bpf_verifier_env *env,
 +			    struct bpf_func_state *caller,
 +			    struct bpf_func_state *callee, int insn_idx)
 +{
 +	int i;
 +
 +	/* copy r1 - r5 args that callee can access.  The copy includes parent
 +	 * pointers, which connects us up to the liveness chain
 +	 */
 +	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
 +		callee->regs[i] = caller->regs[i];
 +	return 0;
 +}
 +
 +static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 +			   int *insn_idx)
 +{
 +	int subprog, target_insn;
 +
 +	target_insn = *insn_idx + insn->imm + 1;
 +	subprog = find_subprog(env, target_insn);
 +	if (subprog < 0) {
 +		verbose(env, "verifier bug. No program starts at insn %d\n",
 +			target_insn);
 +		return -EFAULT;
 +	}
 +
 +	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
 +}
 +
 +static int set_map_elem_callback_state(struct bpf_verifier_env *env,
 +				       struct bpf_func_state *caller,
 +				       struct bpf_func_state *callee,
 +				       int insn_idx)
 +{
 +	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
 +	struct bpf_map *map;
 +	int err;
 +
 +	if (bpf_map_ptr_poisoned(insn_aux)) {
 +		verbose(env, "tail_call abusing map_ptr\n");
 +		return -EINVAL;
 +	}
 +
 +	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
 +	if (!map->ops->map_set_for_each_callback_args ||
 +	    !map->ops->map_for_each_callback) {
 +		verbose(env, "callback function not allowed for map\n");
 +		return -ENOTSUPP;
 +	}
 +
 +	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
 +	if (err)
 +		return err;
 +
 +	callee->in_callback_fn = true;
 +	return 0;
 +}
 +
  static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
  {
    struct bpf_verifier_state *state = env->cur_state;
@@@ -5724,22 -5380,8 +5724,22 @@@
state->curframe--;
    caller = state->frame[state->curframe];
 -	/* return to the caller whatever r0 had in the callee */
 -	caller->regs[BPF_REG_0] = *r0;
 +	if (callee->in_callback_fn) {
 +		/* enforce R0 return value range [0, 1]. */
 +		struct tnum range = tnum_range(0, 1);
 +
 +		if (r0->type != SCALAR_VALUE) {
 +			verbose(env, "R0 not a scalar value\n");
 +			return -EACCES;
 +		}
 +		if (!tnum_in(range, r0->var_off)) {
 +			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
 +			return -EINVAL;
 +		}
 +	} else {
 +		/* return to the caller whatever r0 had in the callee */
 +		caller->regs[BPF_REG_0] = *r0;
 +	}
/* Transfer references to the caller */
    err = transfer_reference_state(caller, callee);
@@@ -5794,9 -5436,7 +5794,9 @@@ record_func_map(struct bpf_verifier_en
        func_id != BPF_FUNC_map_delete_elem &&
        func_id != BPF_FUNC_map_push_elem &&
        func_id != BPF_FUNC_map_pop_elem &&
 -	    func_id != BPF_FUNC_map_peek_elem)
 +	    func_id != BPF_FUNC_map_peek_elem &&
 +	    func_id != BPF_FUNC_for_each_map_elem &&
 +	    func_id != BPF_FUNC_redirect_map)
    	return 0;
if (map == NULL) {
@@@ -5877,18 -5517,15 +5877,18 @@@ static int check_reference_leak(struct 
    return state->acquired_refs ? -EINVAL : 0;
  }
-static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
 +static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 +			     int *insn_idx_p)
  {
    const struct bpf_func_proto *fn = NULL;
    struct bpf_reg_state *regs;
    struct bpf_call_arg_meta meta;
 +	int insn_idx = *insn_idx_p;
    bool changes_data;
 -	int i, err;
 +	int i, err, func_id;
/* find function prototype */
 +	func_id = insn->imm;
    if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
    	verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
    		func_id);
@@@ -5934,7 -5571,7 +5934,7 @@@
meta.func_id = func_id;
    /* check args */
 -	for (i = 0; i < 5; i++) {
 +	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
    	err = check_func_arg(env, i, &meta, fn);
    	if (err)
    		return err;
@@@ -5984,13 -5621,6 +5984,13 @@@
    	return -EINVAL;
    }
+	if (func_id == BPF_FUNC_for_each_map_elem) {
 +		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
 +					set_map_elem_callback_state);
 +		if (err < 0)
 +			return -EINVAL;
 +	}
 +
    /* reset caller saved regs */
    for (i = 0; i < CALLER_SAVED_REGS; i++) {
    	mark_reg_not_init(env, regs, caller_saved[i]);
@@@ -6146,98 -5776,6 +6146,98 @@@
    return 0;
  }
+/* mark_btf_func_reg_size() is used when the reg size is determined by
 + * the BTF func_proto's return value size and argument.
 + */
 +static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
 +				   size_t reg_size)
 +{
 +	struct bpf_reg_state *reg = &cur_regs(env)[regno];
 +
 +	if (regno == BPF_REG_0) {
 +		/* Function return value */
 +		reg->live |= REG_LIVE_WRITTEN;
 +		reg->subreg_def = reg_size == sizeof(u64) ?
 +			DEF_NOT_SUBREG : env->insn_idx + 1;
 +	} else {
 +		/* Function argument */
 +		if (reg_size == sizeof(u64)) {
 +			mark_insn_zext(env, reg);
 +			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
 +		} else {
 +			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
 +		}
 +	}
 +}
 +
 +static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn)
 +{
 +	const struct btf_type *t, *func, *func_proto, *ptr_type;
 +	struct bpf_reg_state *regs = cur_regs(env);
 +	const char *func_name, *ptr_type_name;
 +	u32 i, nargs, func_id, ptr_type_id;
 +	const struct btf_param *args;
 +	int err;
 +
 +	func_id = insn->imm;
 +	func = btf_type_by_id(btf_vmlinux, func_id);
 +	func_name = btf_name_by_offset(btf_vmlinux, func->name_off);
 +	func_proto = btf_type_by_id(btf_vmlinux, func->type);
 +
 +	if (!env->ops->check_kfunc_call ||
 +	    !env->ops->check_kfunc_call(func_id)) {
 +		verbose(env, "calling kernel function %s is not allowed\n",
 +			func_name);
 +		return -EACCES;
 +	}
 +
 +	/* Check the arguments */
 +	err = btf_check_kfunc_arg_match(env, btf_vmlinux, func_id, regs);
 +	if (err)
 +		return err;
 +
 +	for (i = 0; i < CALLER_SAVED_REGS; i++)
 +		mark_reg_not_init(env, regs, caller_saved[i]);
 +
 +	/* Check return type */
 +	t = btf_type_skip_modifiers(btf_vmlinux, func_proto->type, NULL);
 +	if (btf_type_is_scalar(t)) {
 +		mark_reg_unknown(env, regs, BPF_REG_0);
 +		mark_btf_func_reg_size(env, BPF_REG_0, t->size);
 +	} else if (btf_type_is_ptr(t)) {
 +		ptr_type = btf_type_skip_modifiers(btf_vmlinux, t->type,
 +						   &ptr_type_id);
 +		if (!btf_type_is_struct(ptr_type)) {
 +			ptr_type_name = btf_name_by_offset(btf_vmlinux,
 +							   ptr_type->name_off);
 +			verbose(env, "kernel function %s returns pointer type %s %s is not supported\n",
 +				func_name, btf_type_str(ptr_type),
 +				ptr_type_name);
 +			return -EINVAL;
 +		}
 +		mark_reg_known_zero(env, regs, BPF_REG_0);
 +		regs[BPF_REG_0].btf = btf_vmlinux;
 +		regs[BPF_REG_0].type = PTR_TO_BTF_ID;
 +		regs[BPF_REG_0].btf_id = ptr_type_id;
 +		mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
 +	} /* else { add_kfunc_call() ensures it is btf_type_is_void(t) } */
 +
 +	nargs = btf_type_vlen(func_proto);
 +	args = (const struct btf_param *)(func_proto + 1);
 +	for (i = 0; i < nargs; i++) {
 +		u32 regno = i + 1;
 +
 +		t = btf_type_skip_modifiers(btf_vmlinux, args[i].type, NULL);
 +		if (btf_type_is_ptr(t))
 +			mark_btf_func_reg_size(env, regno, sizeof(void *));
 +		else
 +			/* scalar. ensured by btf_check_kfunc_arg_match() */
 +			mark_btf_func_reg_size(env, regno, t->size);
 +	}
 +
 +	return 0;
 +}
 +
  static bool signed_add_overflows(s64 a, s64 b)
  {
    /* Do the add in u64, where overflow is well-defined */
@@@ -6371,7 -5909,7 +6371,7 @@@ static int update_alu_sanitation_state(
         aux->alu_limit != alu_limit))
    	return -EACCES;
-	/* Corresponding fixup done in fixup_bpf_calls(). */
 +	/* Corresponding fixup done in do_misc_fixups(). */
    aux->alu_state = alu_state;
    aux->alu_limit = alu_limit;
    return 0;
@@@ -8725,24 -8263,6 +8725,24 @@@ static int check_ld_imm(struct bpf_veri
    	return 0;
    }
+	if (insn->src_reg == BPF_PSEUDO_FUNC) {
 +		struct bpf_prog_aux *aux = env->prog->aux;
 +		u32 subprogno = insn[1].imm;
 +
 +		if (!aux->func_info) {
 +			verbose(env, "missing btf func_info\n");
 +			return -EINVAL;
 +		}
 +		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
 +			verbose(env, "callback function not static\n");
 +			return -EINVAL;
 +		}
 +
 +		dst_reg->type = PTR_TO_FUNC;
 +		dst_reg->subprogno = subprogno;
 +		return 0;
 +	}
 +
    map = env->used_maps[aux->map_index];
    mark_reg_known_zero(env, regs, insn->dst_reg);
    dst_reg->map_ptr = map;
@@@ -8971,7 -8491,17 +8971,7 @@@ static int check_return_code(struct bpf
    }
if (!tnum_in(range, reg->var_off)) {
 -		char tn_buf[48];
 -
 -		verbose(env, "At program exit the register R0 ");
 -		if (!tnum_is_unknown(reg->var_off)) {
 -			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 -			verbose(env, "has value %s", tn_buf);
 -		} else {
 -			verbose(env, "has unknown scalar value");
 -		}
 -		tnum_strn(tn_buf, sizeof(tn_buf), range);
 -		verbose(env, " should have been in %s\n", tn_buf);
 +		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
    	return -EINVAL;
    }
@@@ -9098,27 -8628,6 +9098,27 @@@ static int push_insn(int t, int w, int 
    return DONE_EXPLORING;
  }
+static int visit_func_call_insn(int t, int insn_cnt,
 +				struct bpf_insn *insns,
 +				struct bpf_verifier_env *env,
 +				bool visit_callee)
 +{
 +	int ret;
 +
 +	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
 +	if (ret)
 +		return ret;
 +
 +	if (t + 1 < insn_cnt)
 +		init_explored_state(env, t + 1);
 +	if (visit_callee) {
 +		init_explored_state(env, t);
 +		ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
 +				env, false);
 +	}
 +	return ret;
 +}
 +
  /* Visits the instruction at index t and returns one of the following:
   *  < 0 - an error occurred
   *  DONE_EXPLORING - the instruction was fully explored
@@@ -9129,9 -8638,6 +9129,9 @@@ static int visit_insn(int t, int insn_c
    struct bpf_insn *insns = env->prog->insnsi;
    int ret;
+	if (bpf_pseudo_func(insns + t))
 +		return visit_func_call_insn(t, insn_cnt, insns, env, true);
 +
    /* All non-branch instructions have a single fall-through edge. */
    if (BPF_CLASS(insns[t].code) != BPF_JMP &&
        BPF_CLASS(insns[t].code) != BPF_JMP32)
@@@ -9142,8 -8648,18 +9142,8 @@@
    	return DONE_EXPLORING;
case BPF_CALL:
 -		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
 -		if (ret)
 -			return ret;
 -
 -		if (t + 1 < insn_cnt)
 -			init_explored_state(env, t + 1);
 -		if (insns[t].src_reg == BPF_PSEUDO_CALL) {
 -			init_explored_state(env, t);
 -			ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
 -					env, false);
 -		}
 -		return ret;
 +		return visit_func_call_insn(t, insn_cnt, insns, env,
 +					    insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
    	if (BPF_SRC(insns[t].code) != BPF_K)
@@@ -9756,7 -9272,6 +9756,7 @@@ static bool regsafe(struct bpf_reg_stat
    		 */
    		return false;
    	}
 +	case PTR_TO_MAP_KEY:
    case PTR_TO_MAP_VALUE:
    	/* If the new min/max/var_off satisfy the old ones and
    	 * everything else matches, we are OK.
@@@ -10440,7 -9955,6 +10440,7 @@@ static int do_check(struct bpf_verifier
if (env->log.level & BPF_LOG_LEVEL) {
    		const struct bpf_insn_cbs cbs = {
 +				.cb_call	= disasm_kfunc_name,
    			.cb_print	= verbose,
    			.private_data	= env,
    		};
@@@ -10588,8 -10102,7 +10588,8 @@@
    			if (BPF_SRC(insn->code) != BPF_K ||
    			    insn->off != 0 ||
    			    (insn->src_reg != BPF_REG_0 &&
 -				     insn->src_reg != BPF_PSEUDO_CALL) ||
 +				     insn->src_reg != BPF_PSEUDO_CALL &&
 +				     insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
    			    insn->dst_reg != BPF_REG_0 ||
    			    class == BPF_JMP32) {
    				verbose(env, "BPF_CALL uses reserved fields\n");
@@@ -10604,12 -10117,11 +10604,12 @@@
    			}
    			if (insn->src_reg == BPF_PSEUDO_CALL)
    				err = check_func_call(env, insn, &env->insn_idx);
 +				else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
 +					err = check_kfunc_call(env, insn);
    			else
 -					err = check_helper_call(env, insn->imm, env->insn_idx);
 +					err = check_helper_call(env, insn, &env->insn_idx);
    			if (err)
    				return err;
 -
    		} else if (opcode == BPF_JA) {
    			if (BPF_SRC(insn->code) != BPF_K ||
    			    insn->imm != 0 ||
@@@ -11038,12 -10550,6 +11038,12 @@@ static int resolve_pseudo_ldimm64(struc
    			goto next_insn;
    		}
+			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
 +				aux = &env->insn_aux_data[i];
 +				aux->ptr_type = PTR_TO_FUNC;
 +				goto next_insn;
 +			}
 +
    		/* In final convert_pseudo_ld_imm64() step, this is
    		 * converted into regular 64-bit imm load insn.
    		 */
@@@ -11176,13 -10682,9 +11176,13 @@@ static void convert_pseudo_ld_imm64(str
    int insn_cnt = env->prog->len;
    int i;
-	for (i = 0; i < insn_cnt; i++, insn++)
 -		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
 -			insn->src_reg = 0;
 +	for (i = 0; i < insn_cnt; i++, insn++) {
 +		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
 +			continue;
 +		if (insn->src_reg == BPF_PSEUDO_FUNC)
 +			continue;
 +		insn->src_reg = 0;
 +	}
  }
/* single env->prog->insni[off] instruction was replaced with the range
@@@ -11821,12 -11323,6 +11821,12 @@@ static int jit_subprogs(struct bpf_veri
    	return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
 +		if (bpf_pseudo_func(insn)) {
 +			env->insn_aux_data[i].call_imm = insn->imm;
 +			/* subprog is encoded in insn[1].imm */
 +			continue;
 +		}
 +
    	if (!bpf_pseudo_call(insn))
    		continue;
    	/* Upon error here we cannot fall back to interpreter but
@@@ -11916,7 -11412,6 +11916,7 @@@
    	func[i]->aux->name[0] = 'F';
    	func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
    	func[i]->jit_requested = 1;
 +		func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
    	func[i]->aux->linfo = prog->aux->linfo;
    	func[i]->aux->nr_linfo = prog->aux->nr_linfo;
    	func[i]->aux->jited_linfo = prog->aux->jited_linfo;
@@@ -11957,12 -11452,6 +11957,12 @@@
    for (i = 0; i < env->subprog_cnt; i++) {
    	insn = func[i]->insnsi;
    	for (j = 0; j < func[i]->len; j++, insn++) {
 +			if (bpf_pseudo_func(insn)) {
 +				subprog = insn[1].imm;
 +				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
 +				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
 +				continue;
 +			}
    		if (!bpf_pseudo_call(insn))
    			continue;
    		subprog = insn->off;
@@@ -12008,11 -11497,6 +12008,11 @@@
     * later look the same as if they were interpreted only.
     */
    for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
 +		if (bpf_pseudo_func(insn)) {
 +			insn[0].imm = env->insn_aux_data[i].call_imm;
 +			insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
 +			continue;
 +		}
    	if (!bpf_pseudo_call(insn))
    		continue;
    	insn->off = env->insn_aux_data[i].call_imm;
@@@ -12024,7 -11508,7 +12024,7 @@@
    prog->bpf_func = func[0]->bpf_func;
    prog->aux->func = func;
    prog->aux->func_cnt = env->subprog_cnt;
 -	bpf_prog_free_unused_jited_linfo(prog);
 +	bpf_prog_jit_attempt_done(prog);
    return 0;
  out_free:
    for (i = 0; i < env->subprog_cnt; i++) {
@@@ -12047,7 -11531,7 +12047,7 @@@ out_undo_insn
    	insn->off = 0;
    	insn->imm = env->insn_aux_data[i].call_imm;
    }
 -	bpf_prog_free_jited_linfo(prog);
 +	bpf_prog_jit_attempt_done(prog);
    return err;
  }
@@@ -12056,7 -11540,6 +12056,7 @@@ static int fixup_call_args(struct bpf_v
  #ifndef CONFIG_BPF_JIT_ALWAYS_ON
    struct bpf_prog *prog = env->prog;
    struct bpf_insn *insn = prog->insnsi;
 +	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
    int i, depth;
  #endif
    int err = 0;
@@@ -12070,10 -11553,6 +12070,10 @@@
    		return err;
    }
  #ifndef CONFIG_BPF_JIT_ALWAYS_ON
 +	if (has_kfunc_call) {
 +		verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
 +		return -EINVAL;
 +	}
    if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
    	/* When JIT fails the progs with bpf2bpf calls and tail_calls
    	 * have to be rejected, since interpreter doesn't support them yet.
@@@ -12082,14 -11561,6 +12082,14 @@@
    	return -EINVAL;
    }
    for (i = 0; i < prog->len; i++, insn++) {
 +		if (bpf_pseudo_func(insn)) {
 +			/* When JIT fails the progs with callback calls
 +			 * have to be rejected, since interpreter doesn't support them yet.
 +			 */
 +			verbose(env, "callbacks are not allowed in non-JITed programs\n");
 +			return -EINVAL;
 +		}
 +
    	if (!bpf_pseudo_call(insn))
    		continue;
    	depth = get_callee_stack_depth(env, insn, i);
@@@ -12102,30 -11573,12 +12102,30 @@@
    return err;
  }
-/* fixup insn->imm field of bpf_call instructions
 - * and inline eligible helpers as explicit sequence of BPF instructions
 - *
 - * this function is called after eBPF program passed verification
 +static int fixup_kfunc_call(struct bpf_verifier_env *env,
 +			    struct bpf_insn *insn)
 +{
 +	const struct bpf_kfunc_desc *desc;
 +
 +	/* insn->imm has the btf func_id. Replace it with
 +	 * an address (relative to __bpf_base_call).
 +	 */
 +	desc = find_kfunc_desc(env->prog, insn->imm);
 +	if (!desc) {
 +		verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
 +			insn->imm);
 +		return -EFAULT;
 +	}
 +
 +	insn->imm = desc->imm;
 +
 +	return 0;
 +}
 +
 +/* Do various post-verification rewrites in a single program pass.
 + * These rewrites simplify JIT and interpreter implementations.
   */
 -static int fixup_bpf_calls(struct bpf_verifier_env *env)
 +static int do_misc_fixups(struct bpf_verifier_env *env)
  {
    struct bpf_prog *prog = env->prog;
    bool expect_blinding = bpf_jit_blinding_enabled(prog);
@@@ -12140,7 -11593,6 +12140,7 @@@
    int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
 +		/* Make divide-by-zero exceptions impossible. */
    	if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
    	    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
    	    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
@@@ -12181,7 -11633,6 +12181,7 @@@
    		continue;
    	}
+		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
    	if (BPF_CLASS(insn->code) == BPF_LD &&
    	    (BPF_MODE(insn->code) == BPF_ABS ||
    	     BPF_MODE(insn->code) == BPF_IND)) {
@@@ -12201,11 -11652,11 +12201,11 @@@
    		continue;
    	}
+		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
    	if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
    	    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
    		const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
    		const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
 -			struct bpf_insn insn_buf[16];
    		struct bpf_insn *patch = &insn_buf[0];
    		bool issrc, isneg;
    		u32 off_reg;
@@@ -12257,12 -11708,6 +12257,12 @@@
    		continue;
    	if (insn->src_reg == BPF_PSEUDO_CALL)
    		continue;
 +		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
 +			ret = fixup_kfunc_call(env, insn);
 +			if (ret)
 +				return ret;
 +			continue;
 +		}
if (insn->imm == BPF_FUNC_get_route_realm)
    		prog->dst_needed = 1;
@@@ -12355,8 -11800,7 +12355,8 @@@
    	     insn->imm == BPF_FUNC_map_delete_elem ||
    	     insn->imm == BPF_FUNC_map_push_elem   ||
    	     insn->imm == BPF_FUNC_map_pop_elem    ||
 -		     insn->imm == BPF_FUNC_map_peek_elem)) {
 +		     insn->imm == BPF_FUNC_map_peek_elem   ||
 +		     insn->imm == BPF_FUNC_redirect_map)) {
    		aux = &env->insn_aux_data[i + delta];
    		if (bpf_map_ptr_poisoned(aux))
    			goto patch_call_imm;
@@@ -12398,9 -11842,6 +12398,9 @@@
    			     (int (*)(struct bpf_map *map, void *value))NULL));
    		BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
    			     (int (*)(struct bpf_map *map, void *value))NULL));
 +			BUILD_BUG_ON(!__same_type(ops->map_redirect,
 +				     (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
 +
  patch_map_ops_generic:
    		switch (insn->imm) {
    		case BPF_FUNC_map_lookup_elem:
@@@ -12427,16 -11868,11 +12427,16 @@@
    			insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
    				    __bpf_call_base;
    			continue;
 +			case BPF_FUNC_redirect_map:
 +				insn->imm = BPF_CAST_CALL(ops->map_redirect) -
 +					    __bpf_call_base;
 +				continue;
    		}
goto patch_call_imm;
    	}
+		/* Implement bpf_jiffies64 inline. */
    	if (prog->jit_requested && BITS_PER_LONG == 64 &&
    	    insn->imm == BPF_FUNC_jiffies64) {
    		struct bpf_insn ld_jiffies_addr[2] = {
@@@ -12492,8 -11928,6 +12492,8 @@@ patch_call_imm
    	}
    }
+	sort_kfunc_descs_by_imm(env->prog);
 +
    return 0;
  }
@@@ -12604,7 -12038,7 +12604,7 @@@ static int do_check_common(struct bpf_v
    	/* 1st arg to a function */
    	regs[BPF_REG_1].type = PTR_TO_CTX;
    	mark_reg_known_zero(env, regs, BPF_REG_1);
 -		ret = btf_check_func_arg_match(env, subprog, regs);
 +		ret = btf_check_subprog_arg_match(env, subprog, regs);
    	if (ret == -EFAULT)
    		/* unlikely verifier bug. abort.
    		 * ret == 0 and ret < 0 are sadly acceptable for
@@@ -12724,6 -12158,11 +12724,11 @@@ static int check_struct_ops_btf_id(stru
    u32 btf_id, member_idx;
    const char *mname;
+ 	if (!prog->gpl_compatible) {
+ 		verbose(env, "struct ops programs must have a GPL compatible license\n");
+ 		return -EINVAL;
+ 	}
+ 
    btf_id = prog->aux->attach_btf_id;
    st_ops = bpf_struct_ops_find(btf_id);
    if (!st_ops) {
@@@ -13199,10 -12638,6 +13204,10 @@@ int bpf_check(struct bpf_prog **prog, u
    if (!env->explored_states)
    	goto skip_full_check;
+	ret = add_subprog_and_kfunc(env);
 +	if (ret < 0)
 +		goto skip_full_check;
 +
    ret = check_subprogs(env);
    if (ret < 0)
    	goto skip_full_check;
@@@ -13253,7 -12688,7 +13258,7 @@@ skip_full_check
    	ret = convert_ctx_accesses(env);
if (ret == 0)
 -		ret = fixup_bpf_calls(env);
 +		ret = do_misc_fixups(env);
/* do 32-bit optimization after insn patching has done so those patched
     * insns could be handled correctly.
diff --combined kernel/fork.c
index 50209691f21a,426cd0c51f9e..85ca68f4b01e
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@@ -96,7 -96,6 +96,7 @@@
  #include <linux/kasan.h>
  #include <linux/scs.h>
  #include <linux/io_uring.h>
 +#include <linux/bpf.h>
#include <asm/pgalloc.h>
  #include <linux/uaccess.h>
@@@ -735,7 -734,6 +735,7 @@@ void __put_task_struct(struct task_stru
    cgroup_free(tsk);
    task_numa_free(tsk, true);
    security_task_free(tsk);
 +	bpf_task_storage_free(tsk);
    exit_creds(tsk);
    delayacct_tsk_free(tsk);
    put_signal_struct(tsk->signal);
@@@ -1950,8 -1948,14 +1950,14 @@@ static __latent_entropy struct task_str
    p = dup_task_struct(current, node);
    if (!p)
    	goto fork_out;
- 	if (args->io_thread)
+ 	if (args->io_thread) {
+ 		/*
+ 		 * Mark us an IO worker, and block any signal that isn't
+ 		 * fatal or STOP
+ 		 */
    	p->flags |= PF_IO_WORKER;
+ 		siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
+ 	}
/*
     * This _must_ happen before we call free_task(), i.e. before we jump
@@@ -2074,9 -2078,6 +2080,9 @@@
    p->sequential_io	= 0;
    p->sequential_io_avg	= 0;
  #endif
 +#ifdef CONFIG_BPF_SYSCALL
 +	RCU_INIT_POINTER(p->bpf_storage, NULL);
 +#endif
/* Perform scheduler related setup. Assign this task to a CPU. */
    retval = sched_fork(clone_flags, p);
@@@ -2443,14 -2444,8 +2449,8 @@@ struct task_struct *create_io_thread(in
    	.stack_size	= (unsigned long)arg,
    	.io_thread	= 1,
    };
- 	struct task_struct *tsk;
- 	tsk = copy_process(NULL, 0, node, &args);
- 	if (!IS_ERR(tsk)) {
- 		sigfillset(&tsk->blocked);
- 		sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
- 	}
- 	return tsk;
+ 	return copy_process(NULL, 0, node, &args);
  }
/*
diff --combined net/core/dev.c
index 33ff4a944109,af8c1ea040b9..cc5df273f766
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@@ -848,52 -848,6 +848,52 @@@ int dev_fill_metadata_dst(struct net_de
  }
  EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
 +{
 +	int k = stack->num_paths++;
 +
 +	if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
 +		return NULL;
 +
 +	return &stack->path[k];
 +}
 +
 +int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
 +			  struct net_device_path_stack *stack)
 +{
 +	const struct net_device *last_dev;
 +	struct net_device_path_ctx ctx = {
 +		.dev	= dev,
 +		.daddr	= daddr,
 +	};
 +	struct net_device_path *path;
 +	int ret = 0;
 +
 +	stack->num_paths = 0;
 +	while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
 +		last_dev = ctx.dev;
 +		path = dev_fwd_path(stack);
 +		if (!path)
 +			return -1;
 +
 +		memset(path, 0, sizeof(struct net_device_path));
 +		ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
 +		if (ret < 0)
 +			return -1;
 +
 +		if (WARN_ON_ONCE(last_dev == ctx.dev))
 +			return -1;
 +	}
 +	path = dev_fwd_path(stack);
 +	if (!path)
 +		return -1;
 +	path->type = DEV_PATH_ETHERNET;
 +	path->dev = ctx.dev;
 +
 +	return ret;
 +}
 +EXPORT_SYMBOL_GPL(dev_fill_forward_path);
 +
  /**
   *	__dev_get_by_name	- find a device by its name
   *	@net: the applicable net namespace
@@@ -2509,14 -2463,16 +2509,14 @@@ int netdev_txq_to_tc(struct net_device 
  EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
 -struct static_key xps_needed __read_mostly;
 -EXPORT_SYMBOL(xps_needed);
 -struct static_key xps_rxqs_needed __read_mostly;
 -EXPORT_SYMBOL(xps_rxqs_needed);
 +static struct static_key xps_needed __read_mostly;
 +static struct static_key xps_rxqs_needed __read_mostly;
  static DEFINE_MUTEX(xps_map_mutex);
  #define xmap_dereference(P)		\
    rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
 -			     int tci, u16 index)
 +			     struct xps_dev_maps *old_maps, int tci, u16 index)
  {
    struct xps_map *map = NULL;
    int pos;
@@@ -2535,8 -2491,6 +2535,8 @@@
    		break;
    	}
+		if (old_maps)
 +			RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
    	RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
    	kfree_rcu(map, rcu);
    	return false;
@@@ -2549,7 -2503,7 +2549,7 @@@ static bool remove_xps_queue_cpu(struc
    			 struct xps_dev_maps *dev_maps,
    			 int cpu, u16 offset, u16 count)
  {
 -	int num_tc = dev->num_tc ? : 1;
 +	int num_tc = dev_maps->num_tc;
    bool active = false;
    int tci;
@@@ -2557,7 -2511,7 +2557,7 @@@
    	int i, j;
for (i = count, j = offset; i--; j++) {
 -			if (!remove_xps_queue(dev_maps, tci, j))
 +			if (!remove_xps_queue(dev_maps, NULL, tci, j))
    			break;
    	}
@@@ -2569,54 -2523,74 +2569,54 @@@
static void reset_xps_maps(struct net_device *dev,
    		   struct xps_dev_maps *dev_maps,
 -			   bool is_rxqs_map)
 +			   enum xps_map_type type)
  {
 -	if (is_rxqs_map) {
 -		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
 -		RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
 -	} else {
 -		RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
 -	}
    static_key_slow_dec_cpuslocked(&xps_needed);
 +	if (type == XPS_RXQS)
 +		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
 +
 +	RCU_INIT_POINTER(dev->xps_maps[type], NULL);
 +
    kfree_rcu(dev_maps, rcu);
  }
-static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
 -			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
 -			   u16 offset, u16 count, bool is_rxqs_map)
 +static void clean_xps_maps(struct net_device *dev, enum xps_map_type type,
 +			   u16 offset, u16 count)
  {
 +	struct xps_dev_maps *dev_maps;
    bool active = false;
    int i, j;
-	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
 -	     j < nr_ids;)
 -		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
 -					       count);
 +	dev_maps = xmap_dereference(dev->xps_maps[type]);
 +	if (!dev_maps)
 +		return;
 +
 +	for (j = 0; j < dev_maps->nr_ids; j++)
 +		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
    if (!active)
 -		reset_xps_maps(dev, dev_maps, is_rxqs_map);
 +		reset_xps_maps(dev, dev_maps, type);
-	if (!is_rxqs_map) {
 -		for (i = offset + (count - 1); count--; i--) {
 +	if (type == XPS_CPUS) {
 +		for (i = offset + (count - 1); count--; i--)
    		netdev_queue_numa_node_write(
 -				netdev_get_tx_queue(dev, i),
 -				NUMA_NO_NODE);
 -		}
 +				netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
    }
  }
static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
    			   u16 count)
  {
 -	const unsigned long *possible_mask = NULL;
 -	struct xps_dev_maps *dev_maps;
 -	unsigned int nr_ids;
 -
    if (!static_key_false(&xps_needed))
    	return;
cpus_read_lock();
    mutex_lock(&xps_map_mutex);
-	if (static_key_false(&xps_rxqs_needed)) {
 -		dev_maps = xmap_dereference(dev->xps_rxqs_map);
 -		if (dev_maps) {
 -			nr_ids = dev->num_rx_queues;
 -			clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
 -				       offset, count, true);
 -		}
 -	}
 -
 -	dev_maps = xmap_dereference(dev->xps_cpus_map);
 -	if (!dev_maps)
 -		goto out_no_maps;
 +	if (static_key_false(&xps_rxqs_needed))
 +		clean_xps_maps(dev, XPS_RXQS, offset, count);
-	if (num_possible_cpus() > 1)
 -		possible_mask = cpumask_bits(cpu_possible_mask);
 -	nr_ids = nr_cpu_ids;
 -	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
 -		       false);
 +	clean_xps_maps(dev, XPS_CPUS, offset, count);
-out_no_maps:
    mutex_unlock(&xps_map_mutex);
    cpus_read_unlock();
  }
@@@ -2666,35 -2640,16 +2666,35 @@@ static struct xps_map *expand_xps_map(s
    return new_map;
  }
+/* Copy xps maps at a given index */
 +static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
 +			      struct xps_dev_maps *new_dev_maps, int index,
 +			      int tc, bool skip_tc)
 +{
 +	int i, tci = index * dev_maps->num_tc;
 +	struct xps_map *map;
 +
 +	/* copy maps belonging to foreign traffic classes */
 +	for (i = 0; i < dev_maps->num_tc; i++, tci++) {
 +		if (i == tc && skip_tc)
 +			continue;
 +
 +		/* fill in the new device map from the old device map */
 +		map = xmap_dereference(dev_maps->attr_map[tci]);
 +		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 +	}
 +}
 +
  /* Must be called under cpus_read_lock */
  int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 -			  u16 index, bool is_rxqs_map)
 +			  u16 index, enum xps_map_type type)
  {
 -	const unsigned long *online_mask = NULL, *possible_mask = NULL;
 -	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
 +	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL;
 +	const unsigned long *online_mask = NULL;
 +	bool active = false, copy = false;
    int i, j, tci, numa_node_id = -2;
    int maps_sz, num_tc = 1, tc = 0;
    struct xps_map *map, *new_map;
 -	bool active = false;
    unsigned int nr_ids;
if (dev->num_tc) {
@@@ -2712,48 -2667,38 +2712,48 @@@
    }
mutex_lock(&xps_map_mutex);
 -	if (is_rxqs_map) {
 +
 +	dev_maps = xmap_dereference(dev->xps_maps[type]);
 +	if (type == XPS_RXQS) {
    	maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
 -		dev_maps = xmap_dereference(dev->xps_rxqs_map);
    	nr_ids = dev->num_rx_queues;
    } else {
    	maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
 -		if (num_possible_cpus() > 1) {
 +		if (num_possible_cpus() > 1)
    		online_mask = cpumask_bits(cpu_online_mask);
 -			possible_mask = cpumask_bits(cpu_possible_mask);
 -		}
 -		dev_maps = xmap_dereference(dev->xps_cpus_map);
    	nr_ids = nr_cpu_ids;
    }
if (maps_sz < L1_CACHE_BYTES)
    	maps_sz = L1_CACHE_BYTES;
+	/* The old dev_maps could be larger or smaller than the one we're
 +	 * setting up now, as dev->num_tc or nr_ids could have been updated in
 +	 * between. We could try to be smart, but let's be safe instead and only
 +	 * copy foreign traffic classes if the two map sizes match.
 +	 */
 +	if (dev_maps &&
 +	    dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
 +		copy = true;
 +
    /* allocate memory for queue storage */
    for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
         j < nr_ids;) {
 -		if (!new_dev_maps)
 -			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
    	if (!new_dev_maps) {
 -			mutex_unlock(&xps_map_mutex);
 -			return -ENOMEM;
 +			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
 +			if (!new_dev_maps) {
 +				mutex_unlock(&xps_map_mutex);
 +				return -ENOMEM;
 +			}
 +
 +			new_dev_maps->nr_ids = nr_ids;
 +			new_dev_maps->num_tc = num_tc;
    	}
tci = j * num_tc + tc;
 -		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
 -				 NULL;
 +		map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
-		map = expand_xps_map(map, j, index, is_rxqs_map);
 +		map = expand_xps_map(map, j, index, type == XPS_RXQS);
    	if (!map)
    		goto error;
@@@ -2766,21 -2711,29 +2766,21 @@@
    if (!dev_maps) {
    	/* Increment static keys at most once per type */
    	static_key_slow_inc_cpuslocked(&xps_needed);
 -		if (is_rxqs_map)
 +		if (type == XPS_RXQS)
    		static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
    }
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 -	     j < nr_ids;) {
 -		/* copy maps belonging to foreign traffic classes */
 -		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
 -			/* fill in the new device map from the old device map */
 -			map = xmap_dereference(dev_maps->attr_map[tci]);
 -			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 -		}
 +	for (j = 0; j < nr_ids; j++) {
 +		bool skip_tc = false;
-		/* We need to explicitly update tci as prevous loop
 -		 * could break out early if dev_maps is NULL.
 -		 */
    	tci = j * num_tc + tc;
 -
    	if (netif_attr_test_mask(j, mask, nr_ids) &&
    	    netif_attr_test_online(j, online_mask, nr_ids)) {
    		/* add tx-queue to CPU/rx-queue maps */
    		int pos = 0;
+			skip_tc = true;
 +
    		map = xmap_dereference(new_dev_maps->attr_map[tci]);
    		while ((pos < map->len) && (map->queues[pos] != index))
    			pos++;
@@@ -2788,81 -2741,78 +2788,81 @@@
    		if (pos == map->len)
    			map->queues[map->len++] = index;
  #ifdef CONFIG_NUMA
 -			if (!is_rxqs_map) {
 +			if (type == XPS_CPUS) {
    			if (numa_node_id == -2)
    				numa_node_id = cpu_to_node(j);
    			else if (numa_node_id != cpu_to_node(j))
    				numa_node_id = -1;
    		}
  #endif
 -		} else if (dev_maps) {
 -			/* fill in the new device map from the old device map */
 -			map = xmap_dereference(dev_maps->attr_map[tci]);
 -			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
    	}
-		/* copy maps belonging to foreign traffic classes */
 -		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
 -			/* fill in the new device map from the old device map */
 -			map = xmap_dereference(dev_maps->attr_map[tci]);
 -			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
 -		}
 +		if (copy)
 +			xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc,
 +					  skip_tc);
    }
-	if (is_rxqs_map)
 -		rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
 -	else
 -		rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
 +	rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
/* Cleanup old maps */
    if (!dev_maps)
    	goto out_no_old_maps;
-	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 -	     j < nr_ids;) {
 -		for (i = num_tc, tci = j * num_tc; i--; tci++) {
 -			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 +	for (j = 0; j < dev_maps->nr_ids; j++) {
 +		for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
    		map = xmap_dereference(dev_maps->attr_map[tci]);
 -			if (map && map != new_map)
 -				kfree_rcu(map, rcu);
 +			if (!map)
 +				continue;
 +
 +			if (copy) {
 +				new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 +				if (map == new_map)
 +					continue;
 +			}
 +
 +			RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
 +			kfree_rcu(map, rcu);
    	}
    }
-	kfree_rcu(dev_maps, rcu);
 +	old_dev_maps = dev_maps;
out_no_old_maps:
    dev_maps = new_dev_maps;
    active = true;
out_no_new_maps:
 -	if (!is_rxqs_map) {
 +	if (type == XPS_CPUS)
    	/* update Tx queue numa node */
    	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
    				     (numa_node_id >= 0) ?
    				     numa_node_id : NUMA_NO_NODE);
 -	}
if (!dev_maps)
    	goto out_no_maps;
/* removes tx-queue from unused CPUs/rx-queues */
 -	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 -	     j < nr_ids;) {
 -		for (i = tc, tci = j * num_tc; i--; tci++)
 -			active |= remove_xps_queue(dev_maps, tci, index);
 -		if (!netif_attr_test_mask(j, mask, nr_ids) ||
 -		    !netif_attr_test_online(j, online_mask, nr_ids))
 -			active |= remove_xps_queue(dev_maps, tci, index);
 -		for (i = num_tc - tc, tci++; --i; tci++)
 -			active |= remove_xps_queue(dev_maps, tci, index);
 +	for (j = 0; j < dev_maps->nr_ids; j++) {
 +		tci = j * dev_maps->num_tc;
 +
 +		for (i = 0; i < dev_maps->num_tc; i++, tci++) {
 +			if (i == tc &&
 +			    netif_attr_test_mask(j, mask, dev_maps->nr_ids) &&
 +			    netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
 +				continue;
 +
 +			active |= remove_xps_queue(dev_maps,
 +						   copy ? old_dev_maps : NULL,
 +						   tci, index);
 +		}
    }
+	if (old_dev_maps)
 +		kfree_rcu(old_dev_maps, rcu);
 +
    /* free map if not active */
    if (!active)
 -		reset_xps_maps(dev, dev_maps, is_rxqs_map);
 +		reset_xps_maps(dev, dev_maps, type);
out_no_maps:
    mutex_unlock(&xps_map_mutex);
@@@ -2870,10 -2820,11 +2870,10 @@@
    return 0;
  error:
    /* remove any maps that we added */
 -	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
 -	     j < nr_ids;) {
 +	for (j = 0; j < nr_ids; j++) {
    	for (i = num_tc, tci = j * num_tc; i--; tci++) {
    		new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
 -			map = dev_maps ?
 +			map = copy ?
    		      xmap_dereference(dev_maps->attr_map[tci]) :
    		      NULL;
    		if (new_map && new_map != map)
@@@ -2894,7 -2845,7 +2894,7 @@@ int netif_set_xps_queue(struct net_devi
    int ret;
cpus_read_lock();
 -	ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
 +	ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
    cpus_read_unlock();
return ret;
@@@ -4005,15 -3956,13 +4005,15 @@@ sch_handle_egress(struct sk_buff *skb, 
  static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
    		       struct xps_dev_maps *dev_maps, unsigned int tci)
  {
 +	int tc = netdev_get_prio_tc_map(dev, skb->priority);
    struct xps_map *map;
    int queue_index = -1;
-	if (dev->num_tc) {
 -		tci *= dev->num_tc;
 -		tci += netdev_get_prio_tc_map(dev, skb->priority);
 -	}
 +	if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids)
 +		return queue_index;
 +
 +	tci *= dev_maps->num_tc;
 +	tci += tc;
map = rcu_dereference(dev_maps->attr_map[tci]);
    if (map) {
@@@ -4044,18 -3993,18 +4044,18 @@@ static int get_xps_queue(struct net_dev
    if (!static_key_false(&xps_rxqs_needed))
    	goto get_cpus_map;
-	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
 +	dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
    if (dev_maps) {
    	int tci = sk_rx_queue_get(sk);
-		if (tci >= 0 && tci < dev->num_rx_queues)
 +		if (tci >= 0)
    		queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
    						  tci);
    }
get_cpus_map:
    if (queue_index < 0) {
 -		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
 +		dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
    	if (dev_maps) {
    		unsigned int tci = skb->sender_cpu - 1;
@@@ -5335,7 -5284,6 +5335,7 @@@ skip_classify
    		goto another_round;
    	case RX_HANDLER_EXACT:
    		deliver_exact = true;
 +			break;
    	case RX_HANDLER_PASS:
    		break;
    	default:
@@@ -5928,13 -5876,15 +5928,13 @@@ void napi_gro_flush(struct napi_struct 
  }
  EXPORT_SYMBOL(napi_gro_flush);
-static struct list_head *gro_list_prepare(struct napi_struct *napi,
 -					  struct sk_buff *skb)
 +static void gro_list_prepare(const struct list_head *head,
 +			     const struct sk_buff *skb)
  {
    unsigned int maclen = skb->dev->hard_header_len;
    u32 hash = skb_get_hash_raw(skb);
 -	struct list_head *head;
    struct sk_buff *p;
-	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
    list_for_each_entry(p, head, list) {
    	unsigned long diffs;
@@@ -5960,6 -5910,8 +5960,6 @@@
    			       maclen);
    	NAPI_GRO_CB(p)->same_flow = !diffs;
    }
 -
 -	return head;
  }
static void skb_gro_reset_offset(struct sk_buff *skb)
@@@ -6022,11 -5974,11 +6022,11 @@@ static void gro_flush_oldest(struct nap
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
  {
 -	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
 +	u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
 +	struct gro_list *gro_list = &napi->gro_hash[bucket];
    struct list_head *head = &offload_base;
    struct packet_offload *ptype;
    __be16 type = skb->protocol;
 -	struct list_head *gro_head;
    struct sk_buff *pp = NULL;
    enum gro_result ret;
    int same_flow;
@@@ -6035,7 -5987,7 +6035,7 @@@
    if (netif_elide_gro(skb->dev))
    	goto normal;
-	gro_head = gro_list_prepare(napi, skb);
 +	gro_list_prepare(&gro_list->list, skb);
rcu_read_lock();
    list_for_each_entry_rcu(ptype, head, list) {
@@@ -6071,7 -6023,7 +6071,7 @@@
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
    				ipv6_gro_receive, inet_gro_receive,
 -					gro_head, skb);
 +					&gro_list->list, skb);
    	break;
    }
    rcu_read_unlock();
@@@ -6090,7 -6042,7 +6090,7 @@@
    if (pp) {
    	skb_list_del_init(pp);
    	napi_gro_complete(napi, pp);
 -		napi->gro_hash[hash].count--;
 +		gro_list->count--;
    }
if (same_flow)
@@@ -6099,16 -6051,16 +6099,16 @@@
    if (NAPI_GRO_CB(skb)->flush)
    	goto normal;
-	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
 -		gro_flush_oldest(napi, gro_head);
 -	} else {
 -		napi->gro_hash[hash].count++;
 -	}
 +	if (unlikely(gro_list->count >= MAX_GRO_SKBS))
 +		gro_flush_oldest(napi, &gro_list->list);
 +	else
 +		gro_list->count++;
 +
    NAPI_GRO_CB(skb)->count = 1;
    NAPI_GRO_CB(skb)->age = jiffies;
    NAPI_GRO_CB(skb)->last = skb;
    skb_shinfo(skb)->gso_size = skb_gro_len(skb);
 -	list_add(&skb->list, gro_head);
 +	list_add(&skb->list, &gro_list->list);
    ret = GRO_HELD;
pull:
@@@ -6116,11 -6068,11 +6116,11 @@@
    if (grow > 0)
    	gro_pull_from_frag0(skb, grow);
  ok:
 -	if (napi->gro_hash[hash].count) {
 -		if (!test_bit(hash, &napi->gro_bitmask))
 -			__set_bit(hash, &napi->gro_bitmask);
 -	} else if (test_bit(hash, &napi->gro_bitmask)) {
 -		__clear_bit(hash, &napi->gro_bitmask);
 +	if (gro_list->count) {
 +		if (!test_bit(bucket, &napi->gro_bitmask))
 +			__set_bit(bucket, &napi->gro_bitmask);
 +	} else if (test_bit(bucket, &napi->gro_bitmask)) {
 +		__clear_bit(bucket, &napi->gro_bitmask);
    }
return ret;
@@@ -6837,7 -6789,6 +6837,7 @@@ int dev_set_threaded(struct net_device
return err;
  }
 +EXPORT_SYMBOL(dev_set_threaded);
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
    	    int (*poll)(struct napi_struct *, int), int weight)
@@@ -7041,7 -6992,7 +7041,7 @@@ static int napi_thread_wait(struct napi
set_current_state(TASK_INTERRUPTIBLE);
- 	while (!kthread_should_stop() && !napi_disable_pending(napi)) {
+ 	while (!kthread_should_stop()) {
    	/* Testing SCHED_THREADED bit here to make sure the current
    	 * kthread owns this napi and could poll on this napi.
    	 * Testing SCHED bit is not enough because SCHED bit might be
@@@ -7059,6 -7010,7 +7059,7 @@@
    	set_current_state(TASK_INTERRUPTIBLE);
    }
    __set_current_state(TASK_RUNNING);
+ 
    return -1;
  }
@@@ -10385,20 -10337,14 +10386,20 @@@ EXPORT_SYMBOL(register_netdev)
int netdev_refcnt_read(const struct net_device *dev)
  {
 +#ifdef CONFIG_PCPU_DEV_REFCNT
    int i, refcnt = 0;
for_each_possible_cpu(i)
    	refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
    return refcnt;
 +#else
 +	return refcount_read(&dev->dev_refcnt);
 +#endif
  }
  EXPORT_SYMBOL(netdev_refcnt_read);
+int netdev_unregister_timeout_secs __read_mostly = 10;
 +
  #define WAIT_REFS_MIN_MSECS 1
  #define WAIT_REFS_MAX_MSECS 250
  /**
@@@ -10423,7 -10369,7 +10424,7 @@@ static void netdev_wait_allrefs(struct 
    rebroadcast_time = warning_time = jiffies;
    refcnt = netdev_refcnt_read(dev);
-	while (refcnt != 0) {
 +	while (refcnt != 1) {
    	if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
    		rtnl_lock();
@@@ -10460,9 -10406,7 +10461,9 @@@
refcnt = netdev_refcnt_read(dev);
-		if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
 +		if (refcnt != 1 &&
 +		    time_after(jiffies, warning_time +
 +			       netdev_unregister_timeout_secs * HZ)) {
    		pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
    			 dev->name, refcnt);
    		warning_time = jiffies;
@@@ -10538,7 -10482,7 +10539,7 @@@ void netdev_run_todo(void
    	netdev_wait_allrefs(dev);
/* paranoia */
 -		BUG_ON(netdev_refcnt_read(dev));
 +		BUG_ON(netdev_refcnt_read(dev) != 1);
    	BUG_ON(!list_empty(&dev->ptype_all));
    	BUG_ON(!list_empty(&dev->ptype_specific));
    	WARN_ON(rcu_access_pointer(dev->ip_ptr));
@@@ -10755,14 -10699,9 +10756,14 @@@ struct net_device *alloc_netdev_mqs(in
    dev = PTR_ALIGN(p, NETDEV_ALIGN);
    dev->padded = (char *)dev - (char *)p;
+#ifdef CONFIG_PCPU_DEV_REFCNT
    dev->pcpu_refcnt = alloc_percpu(int);
    if (!dev->pcpu_refcnt)
    	goto free_dev;
 +	dev_hold(dev);
 +#else
 +	refcount_set(&dev->dev_refcnt, 1);
 +#endif
if (dev_addr_init(dev))
    	goto free_pcpu;
@@@ -10826,10 -10765,8 +10827,10 @@@ free_all
    return NULL;
free_pcpu:
 +#ifdef CONFIG_PCPU_DEV_REFCNT
    free_percpu(dev->pcpu_refcnt);
  free_dev:
 +#endif
    netdev_freemem(dev);
    return NULL;
  }
@@@ -10871,10 -10808,8 +10872,10 @@@ void free_netdev(struct net_device *dev
    list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
    	netif_napi_del(p);
+#ifdef CONFIG_PCPU_DEV_REFCNT
    free_percpu(dev->pcpu_refcnt);
    dev->pcpu_refcnt = NULL;
 +#endif
    free_percpu(dev->xdp_bulkq);
    dev->xdp_bulkq = NULL;
@@@ -11062,13 -10997,11 +11063,13 @@@ void unregister_netdev(struct net_devic
  EXPORT_SYMBOL(unregister_netdev);
/**
 - *	dev_change_net_namespace - move device to different nethost namespace
 + *	__dev_change_net_namespace - move device to different nethost namespace
   *	@dev: device
   *	@net: network namespace
   *	@pat: If not NULL name pattern to try if the current device name
   *	      is already taken in the destination network namespace.
 + *	@new_ifindex: If not zero, specifies device index in the target
 + *	              namespace.
   *
   *	This function shuts down a device interface and moves it
   *	to a new network namespace. On success 0 is returned, on
@@@ -11077,11 -11010,10 +11078,11 @@@
   *	Callers must hold the rtnl semaphore.
   */
-int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
 +int __dev_change_net_namespace(struct net_device *dev, struct net *net,
 +			       const char *pat, int new_ifindex)
  {
    struct net *net_old = dev_net(dev);
 -	int err, new_nsid, new_ifindex;
 +	int err, new_nsid;
ASSERT_RTNL();
@@@ -11112,11 -11044,6 +11113,11 @@@
    		goto out;
    }
+	/* Check that new_ifindex isn't used yet. */
 +	err = -EBUSY;
 +	if (new_ifindex && __dev_get_by_index(net, new_ifindex))
 +		goto out;
 +
    /*
     * And now a mini version of register_netdevice unregister_netdevice.
     */
@@@ -11144,12 -11071,10 +11145,12 @@@
new_nsid = peernet2id_alloc(dev_net(dev), net, GFP_KERNEL);
    /* If there is an ifindex conflict assign a new one */
 -	if (__dev_get_by_index(net, dev->ifindex))
 -		new_ifindex = dev_new_index(net);
 -	else
 -		new_ifindex = dev->ifindex;
 +	if (!new_ifindex) {
 +		if (__dev_get_by_index(net, dev->ifindex))
 +			new_ifindex = dev_new_index(net);
 +		else
 +			new_ifindex = dev->ifindex;
 +	}
rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid,
    		    new_ifindex);
@@@ -11202,7 -11127,7 +11203,7 @@@
  out:
    return err;
  }
 -EXPORT_SYMBOL_GPL(dev_change_net_namespace);
 +EXPORT_SYMBOL_GPL(__dev_change_net_namespace);
static int dev_cpu_dead(unsigned int oldcpu)
  {
diff --combined net/core/rtnetlink.c
index 9f1f55785a6f,3485b16a7ff3..714d5fa38546
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@@ -1877,7 -1877,6 +1877,7 @@@ static const struct nla_policy ifla_pol
    			    .len = ALTIFNAMSIZ - 1 },
    [IFLA_PERM_ADDRESS]	= { .type = NLA_REJECT },
    [IFLA_PROTO_DOWN_REASON] = { .type = NLA_NESTED },
 +	[IFLA_NEW_IFINDEX]	= NLA_POLICY_MIN(NLA_S32, 1),
  };
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@@ -2604,22 -2603,14 +2604,22 @@@ static int do_setlink(const struct sk_b
    	return err;
if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD] || tb[IFLA_TARGET_NETNSID]) {
 -		struct net *net = rtnl_link_get_net_capable(skb, dev_net(dev),
 -							    tb, CAP_NET_ADMIN);
 +		struct net *net;
 +		int new_ifindex;
 +
 +		net = rtnl_link_get_net_capable(skb, dev_net(dev),
 +						tb, CAP_NET_ADMIN);
    	if (IS_ERR(net)) {
    		err = PTR_ERR(net);
    		goto errout;
    	}
-		err = dev_change_net_namespace(dev, net, ifname);
 +		if (tb[IFLA_NEW_IFINDEX])
 +			new_ifindex = nla_get_s32(tb[IFLA_NEW_IFINDEX]);
 +		else
 +			new_ifindex = 0;
 +
 +		err = __dev_change_net_namespace(dev, net, ifname, new_ifindex);
    	put_net(net);
    	if (err)
    		goto errout;
@@@ -2872,7 -2863,7 +2872,7 @@@
BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af))));
- 			err = af_ops->set_link_af(dev, af);
+ 			err = af_ops->set_link_af(dev, af, extack);
    		if (err < 0) {
    			rcu_read_unlock();
    			goto errout;
diff --combined net/core/skmsg.c
index 92a83c02562a,5def3a2e85be..43ce17a6a585
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@@ -399,104 -399,6 +399,104 @@@ out
  }
  EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter);
+int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags,
 +		     long timeo, int *err)
 +{
 +	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 +	int ret = 0;
 +
 +	if (sk->sk_shutdown & RCV_SHUTDOWN)
 +		return 1;
 +
 +	if (!timeo)
 +		return ret;
 +
 +	add_wait_queue(sk_sleep(sk), &wait);
 +	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 +	ret = sk_wait_event(sk, &timeo,
 +			    !list_empty(&psock->ingress_msg) ||
 +			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
 +	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 +	remove_wait_queue(sk_sleep(sk), &wait);
 +	return ret;
 +}
 +EXPORT_SYMBOL_GPL(sk_msg_wait_data);
 +
 +/* Receive sk_msg from psock->ingress_msg to @msg. */
 +int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
 +		   int len, int flags)
 +{
 +	struct iov_iter *iter = &msg->msg_iter;
 +	int peek = flags & MSG_PEEK;
 +	struct sk_msg *msg_rx;
 +	int i, copied = 0;
 +
 +	msg_rx = sk_psock_peek_msg(psock);
 +	while (copied != len) {
 +		struct scatterlist *sge;
 +
 +		if (unlikely(!msg_rx))
 +			break;
 +
 +		i = msg_rx->sg.start;
 +		do {
 +			struct page *page;
 +			int copy;
 +
 +			sge = sk_msg_elem(msg_rx, i);
 +			copy = sge->length;
 +			page = sg_page(sge);
 +			if (copied + copy > len)
 +				copy = len - copied;
 +			copy = copy_page_to_iter(page, sge->offset, copy, iter);
 +			if (!copy)
 +				return copied ? copied : -EFAULT;
 +
 +			copied += copy;
 +			if (likely(!peek)) {
 +				sge->offset += copy;
 +				sge->length -= copy;
 +				if (!msg_rx->skb)
 +					sk_mem_uncharge(sk, copy);
 +				msg_rx->sg.size -= copy;
 +
 +				if (!sge->length) {
 +					sk_msg_iter_var_next(i);
 +					if (!msg_rx->skb)
 +						put_page(page);
 +				}
 +			} else {
 +				/* Lets not optimize peek case if copy_page_to_iter
 +				 * didn't copy the entire length lets just break.
 +				 */
 +				if (copy != sge->length)
 +					return copied;
 +				sk_msg_iter_var_next(i);
 +			}
 +
 +			if (copied == len)
 +				break;
 +		} while (i != msg_rx->sg.end);
 +
 +		if (unlikely(peek)) {
 +			msg_rx = sk_psock_next_msg(psock, msg_rx);
 +			if (!msg_rx)
 +				break;
 +			continue;
 +		}
 +
 +		msg_rx->sg.start = i;
 +		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
 +			msg_rx = sk_psock_dequeue_msg(psock);
 +			kfree_sk_msg(msg_rx);
 +		}
 +		msg_rx = sk_psock_peek_msg(psock);
 +	}
 +
 +	return copied;
 +}
 +EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
 +
  static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk,
    					  struct sk_buff *skb)
  {
@@@ -508,7 -410,7 +508,7 @@@
    if (!sk_rmem_schedule(sk, skb, skb->truesize))
    	return NULL;
-	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_ATOMIC);
 +	msg = kzalloc(sizeof(*msg), __GFP_NOWARN | GFP_KERNEL);
    if (unlikely(!msg))
    	return NULL;
@@@ -586,6 -488,7 +586,7 @@@ static int sk_psock_skb_ingress_self(st
    if (unlikely(!msg))
    	return -EAGAIN;
    sk_msg_init(msg);
+ 	skb_set_owner_r(skb, sk);
    return sk_psock_skb_ingress_enqueue(skb, psock, sk, msg);
  }
@@@ -595,7 -498,7 +596,7 @@@ static int sk_psock_handle_skb(struct s
    if (!ingress) {
    	if (!sock_writeable(psock->sk))
    		return -EAGAIN;
 -		return skb_send_sock_locked(psock->sk, skb, off, len);
 +		return skb_send_sock(psock->sk, skb, off, len);
    }
    return sk_psock_skb_ingress(psock, skb);
  }
@@@ -609,7 -512,8 +610,7 @@@ static void sk_psock_backlog(struct wor
    u32 len, off;
    int ret;
-	/* Lock sock to avoid losing sk_socket during loop. */
 -	lock_sock(psock->sk);
 +	mutex_lock(&psock->work_mutex);
    if (state->skb) {
    	skb = state->skb;
    	len = state->len;
@@@ -622,11 -526,10 +623,11 @@@
    	len = skb->len;
    	off = 0;
  start:
 -		ingress = tcp_skb_bpf_ingress(skb);
 +		ingress = skb_bpf_ingress(skb);
 +		skb_bpf_redirect_clear(skb);
    	do {
    		ret = -EIO;
 -			if (likely(psock->sk->sk_socket))
 +			if (!sock_flag(psock->sk, SOCK_DEAD))
    			ret = sk_psock_handle_skb(psock, skb, off,
    						  len, ingress);
    		if (ret <= 0) {
@@@ -650,7 -553,7 +651,7 @@@
    		kfree_skb(skb);
    }
  end:
 -	release_sock(psock->sk);
 +	mutex_unlock(&psock->work_mutex);
  }
struct sk_psock *sk_psock_init(struct sock *sk, int node)
@@@ -660,6 -563,11 +661,6 @@@
write_lock_bh(&sk->sk_callback_lock);
-	if (inet_csk_has_ulp(sk)) {
 -		psock = ERR_PTR(-EINVAL);
 -		goto out;
 -	}
 -
    if (sk->sk_user_data) {
    	psock = ERR_PTR(-EBUSY);
    	goto out;
@@@ -683,9 -591,7 +684,9 @@@
    spin_lock_init(&psock->link_lock);
INIT_WORK(&psock->work, sk_psock_backlog);
 +	mutex_init(&psock->work_mutex);
    INIT_LIST_HEAD(&psock->ingress_msg);
 +	spin_lock_init(&psock->ingress_lock);
    skb_queue_head_init(&psock->ingress_skb);
sk_psock_set_state(psock, SK_PSOCK_TX_ENABLED);
@@@ -713,7 -619,7 +714,7 @@@ struct sk_psock_link *sk_psock_link_pop
    return link;
  }
-void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
 +static void __sk_psock_purge_ingress_msg(struct sk_psock *psock)
  {
    struct sk_msg *msg, *tmp;
@@@ -724,14 -630,9 +725,14 @@@
    }
  }
-static void sk_psock_zap_ingress(struct sk_psock *psock)
 +static void __sk_psock_zap_ingress(struct sk_psock *psock)
  {
 -	__skb_queue_purge(&psock->ingress_skb);
 +	struct sk_buff *skb;
 +
 +	while ((skb = skb_dequeue(&psock->ingress_skb)) != NULL) {
 +		skb_bpf_redirect_clear(skb);
 +		kfree_skb(skb);
 +	}
    __sk_psock_purge_ingress_msg(psock);
  }
@@@ -745,35 -646,23 +746,35 @@@ static void sk_psock_link_destroy(struc
    }
  }
-static void sk_psock_destroy_deferred(struct work_struct *gc)
 +void sk_psock_stop(struct sk_psock *psock, bool wait)
  {
 -	struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
 +	spin_lock_bh(&psock->ingress_lock);
 +	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
 +	sk_psock_cork_free(psock);
 +	__sk_psock_zap_ingress(psock);
 +	spin_unlock_bh(&psock->ingress_lock);
+	if (wait)
 +		cancel_work_sync(&psock->work);
 +}
 +
 +static void sk_psock_done_strp(struct sk_psock *psock);
 +
 +static void sk_psock_destroy(struct work_struct *work)
 +{
 +	struct sk_psock *psock = container_of(to_rcu_work(work),
 +					      struct sk_psock, rwork);
    /* No sk_callback_lock since already detached. */
-	/* Parser has been stopped */
 -	if (psock->progs.skb_parser)
 -		strp_done(&psock->parser.strp);
 +	sk_psock_done_strp(psock);
cancel_work_sync(&psock->work);
 +	mutex_destroy(&psock->work_mutex);
psock_progs_drop(&psock->progs);
sk_psock_link_destroy(psock);
    sk_psock_cork_free(psock);
 -	sk_psock_zap_ingress(psock);
if (psock->sk_redir)
    	sock_put(psock->sk_redir);
@@@ -781,21 -670,30 +782,21 @@@
    kfree(psock);
  }
-static void sk_psock_destroy(struct rcu_head *rcu)
 -{
 -	struct sk_psock *psock = container_of(rcu, struct sk_psock, rcu);
 -
 -	INIT_WORK(&psock->gc, sk_psock_destroy_deferred);
 -	schedule_work(&psock->gc);
 -}
 -
  void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
  {
 -	sk_psock_cork_free(psock);
 -	sk_psock_zap_ingress(psock);
 +	sk_psock_stop(psock, false);
write_lock_bh(&sk->sk_callback_lock);
    sk_psock_restore_proto(sk, psock);
    rcu_assign_sk_user_data(sk, NULL);
 -	if (psock->progs.skb_parser)
 +	if (psock->progs.stream_parser)
    	sk_psock_stop_strp(sk, psock);
 -	else if (psock->progs.skb_verdict)
 +	else if (psock->progs.stream_verdict || psock->progs.skb_verdict)
    	sk_psock_stop_verdict(sk, psock);
    write_unlock_bh(&sk->sk_callback_lock);
 -	sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
-	call_rcu(&psock->rcu, sk_psock_destroy);
 +	INIT_RCU_WORK(&psock->rwork, sk_psock_destroy);
 +	queue_rcu_work(system_wq, &psock->rwork);
  }
  EXPORT_SYMBOL_GPL(sk_psock_drop);
@@@ -846,12 -744,27 +847,12 @@@ out
  }
  EXPORT_SYMBOL_GPL(sk_psock_msg_verdict);
-static int sk_psock_bpf_run(struct sk_psock *psock, struct bpf_prog *prog,
 -			    struct sk_buff *skb)
 -{
 -	bpf_compute_data_end_sk_skb(skb);
 -	return bpf_prog_run_pin_on_cpu(prog, skb);
 -}
 -
 -static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
 -{
 -	struct sk_psock_parser *parser;
 -
 -	parser = container_of(strp, struct sk_psock_parser, strp);
 -	return container_of(parser, struct sk_psock, parser);
 -}
 -
  static void sk_psock_skb_redirect(struct sk_buff *skb)
  {
    struct sk_psock *psock_other;
    struct sock *sk_other;
-	sk_other = tcp_skb_bpf_redirect_fetch(skb);
 +	sk_other = skb_bpf_redirect_fetch(skb);
    /* This error is a buggy BPF program, it returned a redirect
     * return code, but then didn't set a redirect interface.
     */
@@@ -864,27 -777,20 +865,26 @@@
     * error that caused the pipe to break. We can't send a packet on
     * a socket that is in this state so we drop the skb.
     */
 -	if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
 -	    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
 +	if (!psock_other || sock_flag(sk_other, SOCK_DEAD)) {
 +		kfree_skb(skb);
 +		return;
 +	}
 +	spin_lock_bh(&psock_other->ingress_lock);
 +	if (!sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
 +		spin_unlock_bh(&psock_other->ingress_lock);
    	kfree_skb(skb);
    	return;
    }
skb_queue_tail(&psock_other->ingress_skb, skb);
    schedule_work(&psock_other->work);
 +	spin_unlock_bh(&psock_other->ingress_lock);
  }
static void sk_psock_tls_verdict_apply(struct sk_buff *skb, struct sock *sk, int verdict)
  {
    switch (verdict) {
    case __SK_REDIRECT:
- 		skb_set_owner_r(skb, sk);
    	sk_psock_skb_redirect(skb);
    	break;
    case __SK_PASS:
@@@ -900,17 -806,12 +900,13 @@@ int sk_psock_tls_strp_read(struct sk_ps
    int ret = __SK_PASS;
rcu_read_lock();
 -	prog = READ_ONCE(psock->progs.skb_verdict);
 +	prog = READ_ONCE(psock->progs.stream_verdict);
    if (likely(prog)) {
- 		/* We skip full set_owner_r here because if we do a SK_PASS
- 		 * or SK_DROP we can skip skb memory accounting and use the
- 		 * TLS context.
- 		 */
    	skb->sk = psock->sk;
 -		tcp_skb_bpf_redirect_clear(skb);
 -		ret = sk_psock_bpf_run(psock, prog, skb);
 -		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
 +		skb_dst_drop(skb);
 +		skb_bpf_redirect_clear(skb);
 +		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 +		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
    	skb->sk = NULL;
    }
    sk_psock_tls_verdict_apply(skb, psock->sk, ret);
@@@ -922,6 -823,7 +918,6 @@@ EXPORT_SYMBOL_GPL(sk_psock_tls_strp_rea
  static void sk_psock_verdict_apply(struct sk_psock *psock,
    			   struct sk_buff *skb, int verdict)
  {
 -	struct tcp_skb_cb *tcp;
    struct sock *sk_other;
    int err = -EIO;
@@@ -933,7 -835,8 +929,7 @@@
    		goto out_free;
    	}
-		tcp = TCP_SKB_CB(skb);
 -		tcp->bpf.flags |= BPF_F_INGRESS;
 +		skb_bpf_set_ingress(skb);
/* If the queue is empty then we can submit directly
    	 * into the msg queue. If its not empty we have to
@@@ -945,12 -848,8 +941,12 @@@
    		err = sk_psock_skb_ingress_self(psock, skb);
    	}
    	if (err < 0) {
 -			skb_queue_tail(&psock->ingress_skb, skb);
 -			schedule_work(&psock->work);
 +			spin_lock_bh(&psock->ingress_lock);
 +			if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
 +				skb_queue_tail(&psock->ingress_skb, skb);
 +				schedule_work(&psock->work);
 +			}
 +			spin_unlock_bh(&psock->ingress_lock);
    	}
    	break;
    case __SK_REDIRECT:
@@@ -963,24 -862,6 +959,24 @@@ out_free
    }
  }
+static void sk_psock_write_space(struct sock *sk)
 +{
 +	struct sk_psock *psock;
 +	void (*write_space)(struct sock *sk) = NULL;
 +
 +	rcu_read_lock();
 +	psock = sk_psock(sk);
 +	if (likely(psock)) {
 +		if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
 +			schedule_work(&psock->work);
 +		write_space = psock->saved_write_space;
 +	}
 +	rcu_read_unlock();
 +	if (write_space)
 +		write_space(sk);
 +}
 +
 +#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
  static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
  {
    struct sk_psock *psock;
@@@ -995,13 -876,13 +991,14 @@@
    	kfree_skb(skb);
    	goto out;
    }
- 	skb_set_owner_r(skb, sk);
 -	prog = READ_ONCE(psock->progs.skb_verdict);
 +	prog = READ_ONCE(psock->progs.stream_verdict);
    if (likely(prog)) {
+ 		skb->sk = sk;
 -		tcp_skb_bpf_redirect_clear(skb);
 -		ret = sk_psock_bpf_run(psock, prog, skb);
 -		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
 +		skb_dst_drop(skb);
 +		skb_bpf_redirect_clear(skb);
 +		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 +		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
+ 		skb->sk = NULL;
    }
    sk_psock_verdict_apply(psock, skb, ret);
  out:
@@@ -1015,15 -896,15 +1012,15 @@@ static int sk_psock_strp_read_done(stru
static int sk_psock_strp_parse(struct strparser *strp, struct sk_buff *skb)
  {
 -	struct sk_psock *psock = sk_psock_from_strp(strp);
 +	struct sk_psock *psock = container_of(strp, struct sk_psock, strp);
    struct bpf_prog *prog;
    int ret = skb->len;
rcu_read_lock();
 -	prog = READ_ONCE(psock->progs.skb_parser);
 +	prog = READ_ONCE(psock->progs.stream_parser);
    if (likely(prog)) {
    	skb->sk = psock->sk;
 -		ret = sk_psock_bpf_run(psock, prog, skb);
 +		ret = bpf_prog_run_pin_on_cpu(prog, skb);
    	skb->sk = NULL;
    }
    rcu_read_unlock();
@@@ -1039,59 -920,16 +1036,59 @@@ static void sk_psock_strp_data_ready(st
    psock = sk_psock(sk);
    if (likely(psock)) {
    	if (tls_sw_has_ctx_rx(sk)) {
 -			psock->parser.saved_data_ready(sk);
 +			psock->saved_data_ready(sk);
    	} else {
    		write_lock_bh(&sk->sk_callback_lock);
 -			strp_data_ready(&psock->parser.strp);
 +			strp_data_ready(&psock->strp);
    		write_unlock_bh(&sk->sk_callback_lock);
    	}
    }
    rcu_read_unlock();
  }
+int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
 +{
 +	static const struct strp_callbacks cb = {
 +		.rcv_msg	= sk_psock_strp_read,
 +		.read_sock_done	= sk_psock_strp_read_done,
 +		.parse_msg	= sk_psock_strp_parse,
 +	};
 +
 +	return strp_init(&psock->strp, sk, &cb);
 +}
 +
 +void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
 +{
 +	if (psock->saved_data_ready)
 +		return;
 +
 +	psock->saved_data_ready = sk->sk_data_ready;
 +	sk->sk_data_ready = sk_psock_strp_data_ready;
 +	sk->sk_write_space = sk_psock_write_space;
 +}
 +
 +void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
 +{
 +	if (!psock->saved_data_ready)
 +		return;
 +
 +	sk->sk_data_ready = psock->saved_data_ready;
 +	psock->saved_data_ready = NULL;
 +	strp_stop(&psock->strp);
 +}
 +
 +static void sk_psock_done_strp(struct sk_psock *psock)
 +{
 +	/* Parser has been stopped */
 +	if (psock->progs.stream_parser)
 +		strp_done(&psock->strp);
 +}
 +#else
 +static void sk_psock_done_strp(struct sk_psock *psock)
 +{
 +}
 +#endif /* CONFIG_BPF_STREAM_PARSER */
 +
  static int sk_psock_verdict_recv(read_descriptor_t *desc, struct sk_buff *skb,
    			 unsigned int offset, size_t orig_len)
  {
@@@ -1115,15 -953,13 +1112,16 @@@
    	kfree_skb(skb);
    	goto out;
    }
- 	skb_set_owner_r(skb, sk);
 -	prog = READ_ONCE(psock->progs.skb_verdict);
 +	prog = READ_ONCE(psock->progs.stream_verdict);
 +	if (!prog)
 +		prog = READ_ONCE(psock->progs.skb_verdict);
    if (likely(prog)) {
+ 		skb->sk = sk;
 -		tcp_skb_bpf_redirect_clear(skb);
 -		ret = sk_psock_bpf_run(psock, prog, skb);
 -		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
 +		skb_dst_drop(skb);
 +		skb_bpf_redirect_clear(skb);
 +		ret = bpf_prog_run_pin_on_cpu(prog, skb);
 +		ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
+ 		skb->sk = NULL;
    }
    sk_psock_verdict_apply(psock, skb, ret);
  out:
@@@ -1146,21 -982,82 +1144,21 @@@ static void sk_psock_verdict_data_ready
    sock->ops->read_sock(sk, &desc, sk_psock_verdict_recv);
  }
-static void sk_psock_write_space(struct sock *sk)
 -{
 -	struct sk_psock *psock;
 -	void (*write_space)(struct sock *sk) = NULL;
 -
 -	rcu_read_lock();
 -	psock = sk_psock(sk);
 -	if (likely(psock)) {
 -		if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
 -			schedule_work(&psock->work);
 -		write_space = psock->saved_write_space;
 -	}
 -	rcu_read_unlock();
 -	if (write_space)
 -		write_space(sk);
 -}
 -
 -int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
 -{
 -	static const struct strp_callbacks cb = {
 -		.rcv_msg	= sk_psock_strp_read,
 -		.read_sock_done	= sk_psock_strp_read_done,
 -		.parse_msg	= sk_psock_strp_parse,
 -	};
 -
 -	psock->parser.enabled = false;
 -	return strp_init(&psock->parser.strp, sk, &cb);
 -}
 -
  void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
  {
 -	struct sk_psock_parser *parser = &psock->parser;
 -
 -	if (parser->enabled)
 +	if (psock->saved_data_ready)
    	return;
-	parser->saved_data_ready = sk->sk_data_ready;
 +	psock->saved_data_ready = sk->sk_data_ready;
    sk->sk_data_ready = sk_psock_verdict_data_ready;
    sk->sk_write_space = sk_psock_write_space;
 -	parser->enabled = true;
 -}
 -
 -void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
 -{
 -	struct sk_psock_parser *parser = &psock->parser;
 -
 -	if (parser->enabled)
 -		return;
 -
 -	parser->saved_data_ready = sk->sk_data_ready;
 -	sk->sk_data_ready = sk_psock_strp_data_ready;
 -	sk->sk_write_space = sk_psock_write_space;
 -	parser->enabled = true;
 -}
 -
 -void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
 -{
 -	struct sk_psock_parser *parser = &psock->parser;
 -
 -	if (!parser->enabled)
 -		return;
 -
 -	sk->sk_data_ready = parser->saved_data_ready;
 -	parser->saved_data_ready = NULL;
 -	strp_stop(&parser->strp);
 -	parser->enabled = false;
  }
void sk_psock_stop_verdict(struct sock *sk, struct sk_psock *psock)
  {
 -	struct sk_psock_parser *parser = &psock->parser;
 -
 -	if (!parser->enabled)
 +	if (!psock->saved_data_ready)
    	return;
-	sk->sk_data_ready = parser->saved_data_ready;
 -	parser->saved_data_ready = NULL;
 -	parser->enabled = false;
 +	sk->sk_data_ready = psock->saved_data_ready;
 +	psock->saved_data_ready = NULL;
  }
diff --combined net/ethtool/ioctl.c
index 26b3e7086075,771688e1b0da..a9f67574148f
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@@ -426,29 -426,13 +426,13 @@@ struct ethtool_link_usettings 
  int __ethtool_get_link_ksettings(struct net_device *dev,
    			 struct ethtool_link_ksettings *link_ksettings)
  {
- 	const struct link_mode_info *link_info;
- 	int err;
- 
    ASSERT_RTNL();
if (!dev->ethtool_ops->get_link_ksettings)
    	return -EOPNOTSUPP;
memset(link_ksettings, 0, sizeof(*link_ksettings));
- 
- 	link_ksettings->link_mode = -1;
- 	err = dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
- 	if (err)
- 		return err;
- 
- 	if (link_ksettings->link_mode != -1) {
- 		link_info = &link_mode_params[link_ksettings->link_mode];
- 		link_ksettings->base.speed = link_info->speed;
- 		link_ksettings->lanes = link_info->lanes;
- 		link_ksettings->base.duplex = link_info->duplex;
- 	}
- 
- 	return 0;
+ 	return dev->ethtool_ops->get_link_ksettings(dev, link_ksettings);
  }
  EXPORT_SYMBOL(__ethtool_get_link_ksettings);
@@@ -1844,18 -1828,6 +1828,18 @@@ out
    return ret;
  }
+__printf(2, 3) void ethtool_sprintf(u8 **data, const char *fmt, ...)
 +{
 +	va_list args;
 +
 +	va_start(args, fmt);
 +	vsnprintf(*data, ETH_GSTRING_LEN, fmt, args);
 +	va_end(args);
 +
 +	*data += ETH_GSTRING_LEN;
 +}
 +EXPORT_SYMBOL(ethtool_sprintf);
 +
  static int ethtool_phys_id(struct net_device *dev, void __user *useraddr)
  {
    struct ethtool_value id;
@@@ -2568,9 -2540,6 +2552,9 @@@ static int ethtool_get_fecparam(struct 
    if (rc)
    	return rc;
+	if (WARN_ON_ONCE(fecparam.reserved))
 +		fecparam.reserved = 0;
 +
    if (copy_to_user(useraddr, &fecparam, sizeof(fecparam)))
    	return -EFAULT;
    return 0;
@@@ -2586,12 -2555,6 +2570,12 @@@ static int ethtool_set_fecparam(struct 
    if (copy_from_user(&fecparam, useraddr, sizeof(fecparam)))
    	return -EFAULT;
+	if (!fecparam.fec || fecparam.fec & ETHTOOL_FEC_NONE)
 +		return -EINVAL;
 +
 +	fecparam.active_fec = 0;
 +	fecparam.reserved = 0;
 +
    return dev->ethtool_ops->set_fecparam(dev, &fecparam);
  }
diff --combined net/ipv4/esp4.c
index 1ae920b93f39,4b834bbf95e0..dd1c752ea122
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@@ -279,7 -279,7 +279,7 @@@ static void esp_output_done(struct cryp
    	    x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
    		esp_output_tail_tcp(x, skb);
    	else
- 			xfrm_output_resume(skb, err);
+ 			xfrm_output_resume(skb->sk, skb, err);
    }
  }
@@@ -309,7 -309,7 +309,7 @@@ static struct ip_esp_hdr *esp_output_se
    				       struct esp_output_extra *extra)
  {
    /* For ESN we move the header forward by 4 bytes to
 -	 * accomodate the high bits.  We will move it back after
 +	 * accommodate the high bits.  We will move it back after
     * encryption.
     */
    if ((x->props.flags & XFRM_STATE_ESN)) {
@@@ -854,7 -854,7 +854,7 @@@ static void esp_input_set_header(struc
    struct ip_esp_hdr *esph;
/* For ESN we move the header forward by 4 bytes to
 -	 * accomodate the high bits.  We will move it back after
 +	 * accommodate the high bits.  We will move it back after
     * decryption.
     */
    if ((x->props.flags & XFRM_STATE_ESN)) {
diff --combined net/ipv4/tcp_bpf.c
index 3d622a0d0753,bc7d2a586e18..4f49c12dae53
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@@ -10,6 -10,86 +10,6 @@@
  #include <net/inet_common.h>
  #include <net/tls.h>
-int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
 -		      struct msghdr *msg, int len, int flags)
 -{
 -	struct iov_iter *iter = &msg->msg_iter;
 -	int peek = flags & MSG_PEEK;
 -	struct sk_msg *msg_rx;
 -	int i, copied = 0;
 -
 -	msg_rx = list_first_entry_or_null(&psock->ingress_msg,
 -					  struct sk_msg, list);
 -
 -	while (copied != len) {
 -		struct scatterlist *sge;
 -
 -		if (unlikely(!msg_rx))
 -			break;
 -
 -		i = msg_rx->sg.start;
 -		do {
 -			struct page *page;
 -			int copy;
 -
 -			sge = sk_msg_elem(msg_rx, i);
 -			copy = sge->length;
 -			page = sg_page(sge);
 -			if (copied + copy > len)
 -				copy = len - copied;
 -			copy = copy_page_to_iter(page, sge->offset, copy, iter);
 -			if (!copy)
 -				return copied ? copied : -EFAULT;
 -
 -			copied += copy;
 -			if (likely(!peek)) {
 -				sge->offset += copy;
 -				sge->length -= copy;
 -				if (!msg_rx->skb)
 -					sk_mem_uncharge(sk, copy);
 -				msg_rx->sg.size -= copy;
 -
 -				if (!sge->length) {
 -					sk_msg_iter_var_next(i);
 -					if (!msg_rx->skb)
 -						put_page(page);
 -				}
 -			} else {
 -				/* Lets not optimize peek case if copy_page_to_iter
 -				 * didn't copy the entire length lets just break.
 -				 */
 -				if (copy != sge->length)
 -					return copied;
 -				sk_msg_iter_var_next(i);
 -			}
 -
 -			if (copied == len)
 -				break;
 -		} while (i != msg_rx->sg.end);
 -
 -		if (unlikely(peek)) {
 -			if (msg_rx == list_last_entry(&psock->ingress_msg,
 -						      struct sk_msg, list))
 -				break;
 -			msg_rx = list_next_entry(msg_rx, list);
 -			continue;
 -		}
 -
 -		msg_rx->sg.start = i;
 -		if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) {
 -			list_del(&msg_rx->list);
 -			if (msg_rx->skb)
 -				consume_skb(msg_rx->skb);
 -			kfree(msg_rx);
 -		}
 -		msg_rx = list_first_entry_or_null(&psock->ingress_msg,
 -						  struct sk_msg, list);
 -	}
 -
 -	return copied;
 -}
 -EXPORT_SYMBOL_GPL(__tcp_bpf_recvmsg);
 -
  static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
    		   struct sk_msg *msg, u32 apply_bytes, int flags)
  {
@@@ -149,7 -229,7 +149,7 @@@ int tcp_bpf_sendmsg_redir(struct sock *
  }
  EXPORT_SYMBOL_GPL(tcp_bpf_sendmsg_redir);
-#ifdef CONFIG_BPF_STREAM_PARSER
 +#ifdef CONFIG_BPF_SYSCALL
  static bool tcp_bpf_stream_read(const struct sock *sk)
  {
    struct sk_psock *psock;
@@@ -163,6 -243,28 +163,6 @@@
    return !empty;
  }
-static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
 -			     int flags, long timeo, int *err)
 -{
 -	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 -	int ret = 0;
 -
 -	if (sk->sk_shutdown & RCV_SHUTDOWN)
 -		return 1;
 -
 -	if (!timeo)
 -		return ret;
 -
 -	add_wait_queue(sk_sleep(sk), &wait);
 -	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 -	ret = sk_wait_event(sk, &timeo,
 -			    !list_empty(&psock->ingress_msg) ||
 -			    !skb_queue_empty(&sk->sk_receive_queue), &wait);
 -	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
 -	remove_wait_queue(sk_sleep(sk), &wait);
 -	return ret;
 -}
 -
  static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
    	    int nonblock, int flags, int *addr_len)
  {
@@@ -182,13 -284,13 +182,13 @@@
    }
    lock_sock(sk);
  msg_bytes_ready:
 -	copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags);
 +	copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
    if (!copied) {
    	int data, err = 0;
    	long timeo;
timeo = sock_rcvtimeo(sk, nonblock);
 -		data = tcp_bpf_wait_data(sk, psock, flags, timeo, &err);
 +		data = sk_msg_wait_data(sk, psock, flags, timeo, &err);
    	if (data) {
    		if (!sk_psock_queue_empty(psock))
    			goto msg_bytes_ready;
@@@ -499,38 -601,20 +499,44 @@@ static int tcp_bpf_assert_proto_ops(str
           ops->sendpage == tcp_sendpage ? 0 : -ENOTSUPP;
  }
-struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
 +int tcp_bpf_update_proto(struct sock *sk, bool restore)
  {
 +	struct sk_psock *psock = sk_psock(sk);
    int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
    int config = psock->progs.msg_parser   ? TCP_BPF_TX   : TCP_BPF_BASE;
+	if (restore) {
 +		if (inet_csk_has_ulp(sk)) {
++			/* TLS does not have an unhash proto in SW cases,
++			 * but we need to ensure we stop using the sock_map
++			 * unhash routine because the associated psock is being
++			 * removed. So use the original unhash handler.
++			 */
++			WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash);
 +			tcp_update_ulp(sk, psock->sk_proto, psock->saved_write_space);
 +		} else {
 +			sk->sk_write_space = psock->saved_write_space;
 +			/* Pairs with lockless read in sk_clone_lock() */
 +			WRITE_ONCE(sk->sk_prot, psock->sk_proto);
 +		}
 +		return 0;
 +	}
 +
 +	if (inet_csk_has_ulp(sk))
 +		return -EINVAL;
 +
    if (sk->sk_family == AF_INET6) {
    	if (tcp_bpf_assert_proto_ops(psock->sk_proto))
 -			return ERR_PTR(-EINVAL);
 +			return -EINVAL;
tcp_bpf_check_v6_needs_rebuild(psock->sk_proto);
    }
-	return &tcp_bpf_prots[family][config];
 +	/* Pairs with lockless read in sk_clone_lock() */
 +	WRITE_ONCE(sk->sk_prot, &tcp_bpf_prots[family][config]);
 +	return 0;
  }
 +EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
/* If a child got cloned from a listening socket that had tcp_bpf
   * protocol callbacks installed, we need to restore the callbacks to
@@@ -545,4 -629,4 +551,4 @@@ void tcp_bpf_clone(const struct sock *s
    if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
    	newsk->sk_prot = sk->sk_prot_creator;
  }
 -#endif /* CONFIG_BPF_STREAM_PARSER */
 +#endif /* CONFIG_BPF_SYSCALL */
diff --combined net/ipv4/udp.c
index bfcc7f1a8a7f,99d743eb9dc4..15f5504adf5b
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -1782,35 -1782,6 +1782,35 @@@ busy_check
  }
  EXPORT_SYMBOL(__skb_recv_udp);
+int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
 +		  sk_read_actor_t recv_actor)
 +{
 +	int copied = 0;
 +
 +	while (1) {
 +		struct sk_buff *skb;
 +		int err, used;
 +
 +		skb = skb_recv_udp(sk, 0, 1, &err);
 +		if (!skb)
 +			return err;
 +		used = recv_actor(desc, skb, 0, skb->len);
 +		if (used <= 0) {
 +			if (!copied)
 +				copied = used;
 +			break;
 +		} else if (used <= skb->len) {
 +			copied += used;
 +		}
 +
 +		if (!desc->count)
 +			break;
 +	}
 +
 +	return copied;
 +}
 +EXPORT_SYMBOL(udp_read_sock);
 +
  /*
   * 	This should be easy, if there is something there we
   * 	return it, otherwise we block.
@@@ -2207,8 -2178,6 +2207,8 @@@ static int udp_queue_rcv_skb(struct soc
    segs = udp_rcv_segment(sk, skb, true);
    skb_list_walk_safe(segs, skb, next) {
    	__skb_pull(skb, skb_transport_offset(skb));
 +
 +		udp_post_segment_fix_csum(skb);
    	ret = udp_queue_rcv_one_skb(sk, skb);
    	if (ret > 0)
    		ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
@@@ -2695,12 -2664,9 +2695,12 @@@ int udp_lib_setsockopt(struct sock *sk
case UDP_GRO:
    	lock_sock(sk);
 +
 +		/* when enabling GRO, accept the related GSO packet type */
    	if (valbool)
    		udp_tunnel_encap_enable(sk->sk_socket);
    	up->gro_enabled = valbool;
 +		up->accept_udp_l4 = valbool;
    	release_sock(sk);
    	break;
@@@ -2788,6 -2754,10 +2788,10 @@@ int udp_lib_getsockopt(struct sock *sk
    	val = up->gso_size;
    	break;
+ 	case UDP_GRO:
+ 		val = up->gro_enabled;
+ 		break;
+ 
    /* The following two cannot be changed on UDP sockets, the return is
     * always 0 (which corresponds to the full checksum coverage of UDP). */
    case UDPLITE_SEND_CSCOV:
@@@ -2883,9 -2853,6 +2887,9 @@@ struct proto udp_prot = 
    .unhash			= udp_lib_unhash,
    .rehash			= udp_v4_rehash,
    .get_port		= udp_v4_get_port,
 +#ifdef CONFIG_BPF_SYSCALL
 +	.psock_update_sk_prot	= udp_bpf_update_proto,
 +#endif
    .memory_allocated	= &udp_memory_allocated,
    .sysctl_mem		= sysctl_udp_mem,
    .sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_udp_wmem_min),
diff --combined net/ipv6/addrconf.c
index 120073ffb666,a9e53f5942fa..dbb5bb9269bb
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@@ -2358,7 -2358,7 +2358,7 @@@ regen
    /* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1:
     * check if generated address is not inappropriate:
     *
 -	 * - Reserved IPv6 Interface Identifers
 +	 * - Reserved IPv6 Interface Identifiers
     * - XXX: already assigned to an address on the device
     */
@@@ -5107,20 -5107,17 +5107,20 @@@ next
    	break;
    }
    case MULTICAST_ADDR:
 +		read_unlock_bh(&idev->lock);
    	fillargs->event = RTM_GETMULTICAST;
/* multicast address */
 -		for (ifmca = idev->mc_list; ifmca;
 -		     ifmca = ifmca->next, ip_idx++) {
 +		for (ifmca = rcu_dereference(idev->mc_list);
 +		     ifmca;
 +		     ifmca = rcu_dereference(ifmca->next), ip_idx++) {
    		if (ip_idx < s_ip_idx)
    			continue;
    		err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
    		if (err < 0)
    			break;
    	}
 +		read_lock_bh(&idev->lock);
    	break;
    case ANYCAST_ADDR:
    	fillargs->event = RTM_GETANYCAST;
@@@ -5672,7 -5669,8 +5672,8 @@@ static int inet6_fill_link_af(struct sk
    return 0;
  }
- static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
+ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token,
+ 			     struct netlink_ext_ack *extack)
  {
    struct inet6_ifaddr *ifp;
    struct net_device *dev = idev->dev;
@@@ -5683,12 -5681,29 +5684,29 @@@
if (!token)
    	return -EINVAL;
- 	if (dev->flags & (IFF_LOOPBACK | IFF_NOARP))
+ 
+ 	if (dev->flags & IFF_LOOPBACK) {
+ 		NL_SET_ERR_MSG_MOD(extack, "Device is loopback");
    	return -EINVAL;
- 	if (!ipv6_accept_ra(idev))
+ 	}
+ 
+ 	if (dev->flags & IFF_NOARP) {
+ 		NL_SET_ERR_MSG_MOD(extack,
+ 				   "Device does not do neighbour discovery");
+ 		return -EINVAL;
+ 	}
+ 
+ 	if (!ipv6_accept_ra(idev)) {
+ 		NL_SET_ERR_MSG_MOD(extack,
+ 				   "Router advertisement is disabled on device");
    	return -EINVAL;
- 	if (idev->cnf.rtr_solicits == 0)
+ 	}
+ 
+ 	if (idev->cnf.rtr_solicits == 0) {
+ 		NL_SET_ERR_MSG(extack,
+ 			       "Router solicitation is disabled on device");
    	return -EINVAL;
+ 	}
write_lock_bh(&idev->lock);
@@@ -5796,7 -5811,8 +5814,8 @@@ static int inet6_validate_link_af(cons
    return 0;
  }
- static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
+ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla,
+ 			     struct netlink_ext_ack *extack)
  {
    struct inet6_dev *idev = __in6_dev_get(dev);
    struct nlattr *tb[IFLA_INET6_MAX + 1];
@@@ -5809,7 -5825,8 +5828,8 @@@
    	BUG();
if (tb[IFLA_INET6_TOKEN]) {
- 		err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]));
+ 		err = inet6_set_iftoken(idev, nla_data(tb[IFLA_INET6_TOKEN]),
+ 					extack);
    	if (err)
    		return err;
    }
@@@ -6096,8 -6113,10 +6116,8 @@@ static void __ipv6_ifa_notify(int event
static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
  {
 -	rcu_read_lock_bh();
    if (likely(ifp->idev->dead == 0))
    	__ipv6_ifa_notify(event, ifp);
 -	rcu_read_unlock_bh();
  }
#ifdef CONFIG_SYSCTL
diff --combined net/ipv6/ip6_vti.c
index 856e46ad0895,e0cc32e45880..2d048e21abbb
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@@ -193,6 -193,7 +193,6 @@@ static int vti6_tnl_create2(struct net_
strcpy(t->parms.name, dev->name);
-	dev_hold(dev);
    vti6_tnl_link(ip6n, t);
return 0;
@@@ -493,7 -494,7 +493,7 @@@ vti6_xmit(struct sk_buff *skb, struct n
    }
if (dst->flags & DST_XFRM_QUEUE)
- 		goto queued;
+ 		goto xmit;
x = dst->xfrm;
    if (!vti6_state_check(x, &t->parms.raddr, &t->parms.laddr))
@@@ -522,6 -523,8 +522,8 @@@
icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
    	} else {
+ 			if (!(ip_hdr(skb)->frag_off & htons(IP_DF)))
+ 				goto xmit;
    		icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
    			      htonl(mtu));
    	}
@@@ -530,7 -533,7 +532,7 @@@
    	goto tx_err_dst_release;
    }
- queued:
+ xmit:
    skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev)));
    skb_dst_set(skb, dst);
    skb->dev = skb_dst(skb)->dev;
@@@ -931,7 -934,6 +933,7 @@@ static inline int vti6_dev_init_gen(str
    dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
    if (!dev->tstats)
    	return -ENOMEM;
 +	dev_hold(dev);
    return 0;
  }
@@@ -963,6 -965,7 +965,6 @@@ static int __net_init vti6_fb_tnl_dev_i
    struct vti6_net *ip6n = net_generic(net, vti6_net_id);
t->parms.proto = IPPROTO_IPV6;
 -	dev_hold(dev);
rcu_assign_pointer(ip6n->tnls_wc[0], t);
    return 0;
diff --combined net/ipv6/route.c
index 28801ae80548,373d48073106..a22822bdbf39
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -2085,10 -2085,13 +2085,10 @@@ static void rt6_age_examine_exception(s
if (rt->rt6i_flags & RTF_GATEWAY) {
    	struct neighbour *neigh;
 -		__u8 neigh_flags = 0;
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
 -		if (neigh)
 -			neigh_flags = neigh->flags;
-		if (!(neigh_flags & NTF_ROUTER)) {
 +		if (!(neigh && (neigh->flags & NTF_ROUTER))) {
    		RT6_TRACE("purging route %p via non-router but gateway\n",
    			  rt);
    		rt6_remove_exception(bucket, rt6_ex);
@@@ -2357,7 -2360,7 +2357,7 @@@ u32 rt6_multipath_hash(const struct ne
memset(&hash_keys, 0, sizeof(hash_keys));
-                        if (!flkeys) {
 +			if (!flkeys) {
    			skb_flow_dissect_flow_keys(skb, &keys, flag);
    			flkeys = &keys;
    		}
@@@ -2497,20 -2500,20 +2497,20 @@@ struct dst_entry *ip6_route_output_flag
    				 struct flowi6 *fl6,
    				 int flags)
  {
 -        struct dst_entry *dst;
 -        struct rt6_info *rt6;
 +	struct dst_entry *dst;
 +	struct rt6_info *rt6;
-        rcu_read_lock();
 -        dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
 -        rt6 = (struct rt6_info *)dst;
 -        /* For dst cached in uncached_list, refcnt is already taken. */
 -        if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
 -                dst = &net->ipv6.ip6_null_entry->dst;
 -                dst_hold(dst);
 -        }
 -        rcu_read_unlock();
 +	rcu_read_lock();
 +	dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
 +	rt6 = (struct rt6_info *)dst;
 +	/* For dst cached in uncached_list, refcnt is already taken. */
 +	if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
 +		dst = &net->ipv6.ip6_null_entry->dst;
 +		dst_hold(dst);
 +	}
 +	rcu_read_unlock();
-        return dst;
 +	return dst;
  }
  EXPORT_SYMBOL_GPL(ip6_route_output_flags);
@@@ -5206,9 -5209,11 +5206,11 @@@ static int ip6_route_multipath_add(stru
    	 * nexthops have been replaced by first new, the rest should
    	 * be added to it.
    	 */
- 		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
- 						     NLM_F_REPLACE);
- 		cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
+ 		if (cfg->fc_nlinfo.nlh) {
+ 			cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
+ 							     NLM_F_REPLACE);
+ 			cfg->fc_nlinfo.nlh->nlmsg_flags |= NLM_F_CREATE;
+ 		}
    	nhn++;
    }
@@@ -6072,7 -6077,7 +6074,7 @@@ void fib6_info_hw_flags_set(struct net
if (!rcu_access_pointer(f6i->fib6_node))
    	/* The route was removed from the tree, do not send
 -		 * notfication.
 +		 * notification.
    	 */
    	return;
diff --combined net/mac80211/cfg.c
index a0a11624a5be,860bc35383d5..7a99892e5aba
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@@ -1486,7 -1486,7 +1486,7 @@@ static int sta_apply_parameters(struct 
    	sta->sta.wme = set & BIT(NL80211_STA_FLAG_WME);
/* auth flags will be set later for TDLS,
 -	 * and for unassociated stations that move to assocaited */
 +	 * and for unassociated stations that move to associated */
    if (!test_sta_flag(sta, WLAN_STA_TDLS_PEER) &&
        !((mask & BIT(NL80211_STA_FLAG_ASSOCIATED)) &&
          (set & BIT(NL80211_STA_FLAG_ASSOCIATED)))) {
@@@ -1788,8 -1788,10 +1788,10 @@@ static int ieee80211_change_station(str
    	}
if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN &&
- 		    sta->sdata->u.vlan.sta)
+ 		    sta->sdata->u.vlan.sta) {
+ 			ieee80211_clear_fast_rx(sta);
    		RCU_INIT_POINTER(sta->sdata->u.vlan.sta, NULL);
+ 		}
if (test_sta_flag(sta, WLAN_STA_AUTHORIZED))
    		ieee80211_vif_dec_num_mcast(sta->sdata);
diff --combined net/mptcp/protocol.c
index e894345d10c1,4bde960e19dc..8009b3f8e4c1
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@@ -11,7 -11,6 +11,6 @@@
  #include <linux/netdevice.h>
  #include <linux/sched/signal.h>
  #include <linux/atomic.h>
- #include <linux/igmp.h>
  #include <net/sock.h>
  #include <net/inet_common.h>
  #include <net/inet_hashtables.h>
@@@ -20,7 -19,6 +19,6 @@@
  #include <net/tcp_states.h>
  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
  #include <net/transp_v6.h>
- #include <net/addrconf.h>
  #endif
  #include <net/mptcp.h>
  #include <net/xfrm.h>
@@@ -493,7 -491,7 +491,7 @@@ static bool mptcp_check_data_fin(struc
    u64 rcv_data_fin_seq;
    bool ret = false;
-	if (__mptcp_check_fallback(msk) || !msk->first)
 +	if (__mptcp_check_fallback(msk))
    	return ret;
/* Need to ack a DATA_FIN received from a peer while this side
@@@ -2047,21 -2045,28 +2045,21 @@@ out_err
    return copied;
  }
-static void mptcp_retransmit_handler(struct sock *sk)
 -{
 -	struct mptcp_sock *msk = mptcp_sk(sk);
 -
 -	set_bit(MPTCP_WORK_RTX, &msk->flags);
 -	mptcp_schedule_work(sk);
 -}
 -
  static void mptcp_retransmit_timer(struct timer_list *t)
  {
    struct inet_connection_sock *icsk = from_timer(icsk, t,
    					       icsk_retransmit_timer);
    struct sock *sk = &icsk->icsk_inet.sk;
 +	struct mptcp_sock *msk = mptcp_sk(sk);
bh_lock_sock(sk);
    if (!sock_owned_by_user(sk)) {
 -		mptcp_retransmit_handler(sk);
 +		/* we need a process context to retransmit */
 +		if (!test_and_set_bit(MPTCP_WORK_RTX, &msk->flags))
 +			mptcp_schedule_work(sk);
    } else {
    	/* delegate our work to tcp_release_cb() */
 -		if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED,
 -				      &sk->sk_tsq_flags))
 -			sock_hold(sk);
 +		set_bit(MPTCP_RETRANSMIT, &msk->flags);
    }
    bh_unlock_sock(sk);
    sock_put(sk);
@@@ -2871,6 -2876,48 +2869,48 @@@ static int mptcp_setsockopt_v6(struct m
    return ret;
  }
+ static bool mptcp_unsupported(int level, int optname)
+ {
+ 	if (level == SOL_IP) {
+ 		switch (optname) {
+ 		case IP_ADD_MEMBERSHIP:
+ 		case IP_ADD_SOURCE_MEMBERSHIP:
+ 		case IP_DROP_MEMBERSHIP:
+ 		case IP_DROP_SOURCE_MEMBERSHIP:
+ 		case IP_BLOCK_SOURCE:
+ 		case IP_UNBLOCK_SOURCE:
+ 		case MCAST_JOIN_GROUP:
+ 		case MCAST_LEAVE_GROUP:
+ 		case MCAST_JOIN_SOURCE_GROUP:
+ 		case MCAST_LEAVE_SOURCE_GROUP:
+ 		case MCAST_BLOCK_SOURCE:
+ 		case MCAST_UNBLOCK_SOURCE:
+ 		case MCAST_MSFILTER:
+ 			return true;
+ 		}
+ 		return false;
+ 	}
+ 	if (level == SOL_IPV6) {
+ 		switch (optname) {
+ 		case IPV6_ADDRFORM:
+ 		case IPV6_ADD_MEMBERSHIP:
+ 		case IPV6_DROP_MEMBERSHIP:
+ 		case IPV6_JOIN_ANYCAST:
+ 		case IPV6_LEAVE_ANYCAST:
+ 		case MCAST_JOIN_GROUP:
+ 		case MCAST_LEAVE_GROUP:
+ 		case MCAST_JOIN_SOURCE_GROUP:
+ 		case MCAST_LEAVE_SOURCE_GROUP:
+ 		case MCAST_BLOCK_SOURCE:
+ 		case MCAST_UNBLOCK_SOURCE:
+ 		case MCAST_MSFILTER:
+ 			return true;
+ 		}
+ 		return false;
+ 	}
+ 	return false;
+ }
+ 
  static int mptcp_setsockopt(struct sock *sk, int level, int optname,
    		    sockptr_t optval, unsigned int optlen)
  {
@@@ -2879,6 -2926,9 +2919,9 @@@
pr_debug("msk=%p", msk);
+ 	if (mptcp_unsupported(level, optname))
+ 		return -ENOPROTOOPT;
+ 
    if (level == SOL_SOCKET)
    	return mptcp_setsockopt_sol_socket(msk, optname, optval, optlen);
@@@ -2951,16 -3001,17 +2994,16 @@@ void __mptcp_check_push(struct sock *sk
    }
  }
-#define MPTCP_DEFERRED_ALL (TCPF_WRITE_TIMER_DEFERRED)
 -
  /* processes deferred events and flush wmem */
  static void mptcp_release_cb(struct sock *sk)
  {
 -	unsigned long flags, nflags;
 -
    for (;;) {
 -		flags = 0;
 +		unsigned long flags = 0;
 +
    	if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
    		flags |= BIT(MPTCP_PUSH_PENDING);
 +		if (test_and_clear_bit(MPTCP_RETRANSMIT, &mptcp_sk(sk)->flags))
 +			flags |= BIT(MPTCP_RETRANSMIT);
    	if (!flags)
    		break;
@@@ -2975,8 -3026,6 +3018,8 @@@
    	spin_unlock_bh(&sk->sk_lock.slock);
    	if (flags & BIT(MPTCP_PUSH_PENDING))
    		__mptcp_push_pending(sk, 0);
 +		if (flags & BIT(MPTCP_RETRANSMIT))
 +			__mptcp_retrans(sk);
cond_resched();
    	spin_lock_bh(&sk->sk_lock.slock);
@@@ -2992,6 -3041,20 +3035,6 @@@
     */
    __mptcp_update_wmem(sk);
    __mptcp_update_rmem(sk);
 -
 -	do {
 -		flags = sk->sk_tsq_flags;
 -		if (!(flags & MPTCP_DEFERRED_ALL))
 -			return;
 -		nflags = flags & ~MPTCP_DEFERRED_ALL;
 -	} while (cmpxchg(&sk->sk_tsq_flags, flags, nflags) != flags);
 -
 -	sock_release_ownership(sk);
 -
 -	if (flags & TCPF_WRITE_TIMER_DEFERRED) {
 -		mptcp_retransmit_handler(sk);
 -		__sock_put(sk);
 -	}
  }
void mptcp_subflow_process_delegated(struct sock *ssk)
@@@ -3090,18 -3153,14 +3133,18 @@@ bool mptcp_finish_join(struct sock *ssk
    pr_debug("msk=%p, subflow=%p", msk, subflow);
/* mptcp socket already closing? */
 -	if (!mptcp_is_fully_established(parent))
 +	if (!mptcp_is_fully_established(parent)) {
 +		subflow->reset_reason = MPTCP_RST_EMPTCP;
    	return false;
 +	}
if (!msk->pm.server_side)
    	goto out;
-	if (!mptcp_pm_allow_new_subflow(msk))
 +	if (!mptcp_pm_allow_new_subflow(msk)) {
 +		subflow->reset_reason = MPTCP_RST_EPROHIBIT;
    	return false;
 +	}
/* active connections are already on conn_list, and we can't acquire
     * msk lock here.
@@@ -3115,10 -3174,8 +3158,10 @@@
    	sock_hold(ssk);
    }
    spin_unlock_bh(&msk->join_list_lock);
 -	if (!ret)
 +	if (!ret) {
 +		subflow->reset_reason = MPTCP_RST_EPROHIBIT;
    	return false;
 +	}
/* attach to msk socket only after we are sure he will deal with us
     * at close time
@@@ -3230,12 -3287,8 +3273,12 @@@ static int mptcp_stream_connect(struct 
    if (rcu_access_pointer(tcp_sk(ssock->sk)->md5sig_info))
    	mptcp_subflow_early_fallback(msk, subflow);
  #endif
 -	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk))
 +	if (subflow->request_mptcp && mptcp_token_new_connect(ssock->sk)) {
 +		MPTCP_INC_STATS(sock_net(ssock->sk), MPTCP_MIB_TOKENFALLBACKINIT);
    	mptcp_subflow_early_fallback(msk, subflow);
 +	}
 +	if (likely(!__mptcp_check_fallback(msk)))
 +		MPTCP_INC_STATS(sock_net(sock->sk), MPTCP_MIB_MPCAPABLEACTIVE);
do_connect:
    err = ssock->ops->connect(ssock, uaddr, addr_len, flags);
@@@ -3409,34 -3462,10 +3452,10 @@@ static __poll_t mptcp_poll(struct file 
    return mask;
  }
- static int mptcp_release(struct socket *sock)
- {
- 	struct mptcp_subflow_context *subflow;
- 	struct sock *sk = sock->sk;
- 	struct mptcp_sock *msk;
- 
- 	if (!sk)
- 		return 0;
- 
- 	lock_sock(sk);
- 
- 	msk = mptcp_sk(sk);
- 
- 	mptcp_for_each_subflow(msk, subflow) {
- 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- 
- 		ip_mc_drop_socket(ssk);
- 	}
- 
- 	release_sock(sk);
- 
- 	return inet_release(sock);
- }
- 
  static const struct proto_ops mptcp_stream_ops = {
    .family		   = PF_INET,
    .owner		   = THIS_MODULE,
- 	.release	   = mptcp_release,
+ 	.release	   = inet_release,
    .bind		   = mptcp_bind,
    .connect	   = mptcp_stream_connect,
    .socketpair	   = sock_no_socketpair,
@@@ -3528,35 -3557,10 +3547,10 @@@ void __init mptcp_proto_init(void
  }
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- static int mptcp6_release(struct socket *sock)
- {
- 	struct mptcp_subflow_context *subflow;
- 	struct mptcp_sock *msk;
- 	struct sock *sk = sock->sk;
- 
- 	if (!sk)
- 		return 0;
- 
- 	lock_sock(sk);
- 
- 	msk = mptcp_sk(sk);
- 
- 	mptcp_for_each_subflow(msk, subflow) {
- 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- 
- 		ip_mc_drop_socket(ssk);
- 		ipv6_sock_mc_close(ssk);
- 		ipv6_sock_ac_close(ssk);
- 	}
- 
- 	release_sock(sk);
- 	return inet6_release(sock);
- }
- 
  static const struct proto_ops mptcp_v6_stream_ops = {
    .family		   = PF_INET6,
    .owner		   = THIS_MODULE,
- 	.release	   = mptcp6_release,
+ 	.release	   = inet6_release,
    .bind		   = mptcp_bind,
    .connect	   = mptcp_stream_connect,
    .socketpair	   = sock_no_socketpair,
diff --combined net/openvswitch/conntrack.c
index c29b0ef1fc27,d217bd91176b..cadb6a29b285
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@@ -809,7 -809,8 +809,7 @@@ static int ovs_ct_nat_execute(struct sk
err = nf_nat_packet(ct, ctinfo, hooknum, skb);
  push:
 -	skb_push(skb, nh_off);
 -	skb_postpush_rcsum(skb, skb->data, nh_off);
 +	skb_push_rcsum(skb, nh_off);
return err;
  }
@@@ -1321,7 -1322,8 +1321,7 @@@ int ovs_ct_execute(struct net *net, str
    else
    	err = ovs_ct_lookup(net, key, info, skb);
-	skb_push(skb, nh_ofs);
 -	skb_postpush_rcsum(skb, skb->data, nh_ofs);
 +	skb_push_rcsum(skb, nh_ofs);
    if (err)
    	kfree_skb(skb);
    return err;
@@@ -2032,10 -2034,10 +2032,10 @@@ static int ovs_ct_limit_del_zone_limit(
  static int ovs_ct_limit_get_default_limit(struct ovs_ct_limit_info *info,
    				  struct sk_buff *reply)
  {
- 	struct ovs_zone_limit zone_limit;
- 
- 	zone_limit.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE;
- 	zone_limit.limit = info->default_limit;
+ 	struct ovs_zone_limit zone_limit = {
+ 		.zone_id = OVS_ZONE_LIMIT_DEFAULT_ZONE,
+ 		.limit   = info->default_limit,
+ 	};
return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
  }
diff --combined net/qrtr/qrtr.c
index 4b46c69e14ab,1e4fb568fa84..c0477bec09bd
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@@ -20,8 -20,6 +20,8 @@@
  /* auto-bind range */
  #define QRTR_MIN_EPH_SOCKET 0x4000
  #define QRTR_MAX_EPH_SOCKET 0x7fff
 +#define QRTR_EPH_PORT_RANGE \
 +		XA_LIMIT(QRTR_MIN_EPH_SOCKET, QRTR_MAX_EPH_SOCKET)
/**
   * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1
@@@ -108,7 -106,8 +108,7 @@@ static LIST_HEAD(qrtr_all_nodes)
  static DEFINE_MUTEX(qrtr_node_lock);
/* local port allocation management */
 -static DEFINE_IDR(qrtr_ports);
 -static DEFINE_MUTEX(qrtr_port_lock);
 +static DEFINE_XARRAY_ALLOC(qrtr_ports);
/**
   * struct qrtr_node - endpoint node
@@@ -272,7 -271,10 +272,10 @@@ static int qrtr_tx_wait(struct qrtr_nod
    	flow = kzalloc(sizeof(*flow), GFP_KERNEL);
    	if (flow) {
    		init_waitqueue_head(&flow->resume_tx);
- 			radix_tree_insert(&node->qrtr_tx_flow, key, flow);
+ 			if (radix_tree_insert(&node->qrtr_tx_flow, key, flow)) {
+ 				kfree(flow);
+ 				flow = NULL;
+ 			}
    	}
    }
    mutex_unlock(&node->qrtr_tx_lock);
@@@ -654,7 -656,7 +657,7 @@@ static struct qrtr_sock *qrtr_port_look
    	port = 0;
rcu_read_lock();
 -	ipc = idr_find(&qrtr_ports, port);
 +	ipc = xa_load(&qrtr_ports, port);
    if (ipc)
    	sock_hold(&ipc->sk);
    rcu_read_unlock();
@@@ -696,7 -698,9 +699,7 @@@ static void qrtr_port_remove(struct qrt
__sock_put(&ipc->sk);
-	mutex_lock(&qrtr_port_lock);
 -	idr_remove(&qrtr_ports, port);
 -	mutex_unlock(&qrtr_port_lock);
 +	xa_erase(&qrtr_ports, port);
/* Ensure that if qrtr_port_lookup() did enter the RCU read section we
     * wait for it to up increment the refcount */
@@@ -715,20 -719,29 +718,20 @@@
   */
  static int qrtr_port_assign(struct qrtr_sock *ipc, int *port)
  {
 -	u32 min_port;
    int rc;
-	mutex_lock(&qrtr_port_lock);
    if (!*port) {
 -		min_port = QRTR_MIN_EPH_SOCKET;
 -		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, QRTR_MAX_EPH_SOCKET, GFP_ATOMIC);
 -		if (!rc)
 -			*port = min_port;
 +		rc = xa_alloc(&qrtr_ports, port, ipc, QRTR_EPH_PORT_RANGE,
 +				GFP_KERNEL);
    } else if (*port < QRTR_MIN_EPH_SOCKET && !capable(CAP_NET_ADMIN)) {
    	rc = -EACCES;
    } else if (*port == QRTR_PORT_CTRL) {
 -		min_port = 0;
 -		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, 0, GFP_ATOMIC);
 +		rc = xa_insert(&qrtr_ports, 0, ipc, GFP_KERNEL);
    } else {
 -		min_port = *port;
 -		rc = idr_alloc_u32(&qrtr_ports, ipc, &min_port, *port, GFP_ATOMIC);
 -		if (!rc)
 -			*port = min_port;
 +		rc = xa_insert(&qrtr_ports, *port, ipc, GFP_KERNEL);
    }
 -	mutex_unlock(&qrtr_port_lock);
-	if (rc == -ENOSPC)
 +	if (rc == -EBUSY)
    	return -EADDRINUSE;
    else if (rc < 0)
    	return rc;
@@@ -742,16 -755,20 +745,16 @@@
  static void qrtr_reset_ports(void)
  {
    struct qrtr_sock *ipc;
 -	int id;
 -
 -	mutex_lock(&qrtr_port_lock);
 -	idr_for_each_entry(&qrtr_ports, ipc, id) {
 -		/* Don't reset control port */
 -		if (id == 0)
 -			continue;
 +	unsigned long index;
+	rcu_read_lock();
 +	xa_for_each_start(&qrtr_ports, index, ipc, 1) {
    	sock_hold(&ipc->sk);
    	ipc->sk.sk_err = ENETRESET;
    	ipc->sk.sk_error_report(&ipc->sk);
    	sock_put(&ipc->sk);
    }
 -	mutex_unlock(&qrtr_port_lock);
 +	rcu_read_unlock();
  }
/* Bind socket to address.
diff --combined net/rds/send.c
index 53444397de66,fe5264b9d4b3..ee7214ea0fdb
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@@ -665,7 -665,7 +665,7 @@@ static void rds_send_remove_from_sock(s
  unlock_and_drop:
    	spin_unlock_irqrestore(&rm->m_rs_lock, flags);
    	rds_message_put(rm);
- 		if (was_on_sock)
+ 		if (was_on_sock && rm)
    		rds_message_put(rm);
    }
@@@ -1225,7 -1225,7 +1225,7 @@@ int rds_sendmsg(struct socket *sock, st
    	}
    	/* If the socket is already bound to a link local address,
    	 * it can only send to peers on the same link.  But allow
 -		 * communicating beween link local and non-link local address.
 +		 * communicating between link local and non-link local address.
    	 */
    	if (scope_id != rs->rs_bound_scope_id) {
    		if (!scope_id) {
diff --combined net/sched/cls_api.c
index d3db70865d66,340d5af86e87..40fbea626dfd
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@@ -646,7 -646,7 +646,7 @@@ static void tc_block_indr_cleanup(struc
    struct net_device *dev = block_cb->indr.dev;
    struct Qdisc *sch = block_cb->indr.sch;
    struct netlink_ext_ack extack = {};
- 	struct flow_block_offload bo;
+ 	struct flow_block_offload bo = {};
tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
    		       block_cb->indr.binder_type,
@@@ -3040,6 -3040,7 +3040,7 @@@ int tcf_exts_validate(struct net *net, 
  {
  #ifdef CONFIG_NET_CLS_ACT
    {
+ 		int init_res[TCA_ACT_MAX_PRIO] = {};
    	struct tc_action *act;
    	size_t attr_size = 0;
@@@ -3051,12 -3052,11 +3052,11 @@@
    			return PTR_ERR(a_o);
    		act = tcf_action_init_1(net, tp, tb[exts->police],
    					rate_tlv, "police", ovr,
- 						TCA_ACT_BIND, a_o, rtnl_held,
- 						extack);
- 			if (IS_ERR(act)) {
- 				module_put(a_o->owner);
+ 						TCA_ACT_BIND, a_o, init_res,
+ 						rtnl_held, extack);
+ 			module_put(a_o->owner);
+ 			if (IS_ERR(act))
    			return PTR_ERR(act);
- 			}
act->type = exts->type = TCA_OLD_COMPAT;
    		exts->actions[0] = act;
@@@ -3067,8 -3067,8 +3067,8 @@@
err = tcf_action_init(net, tp, tb[exts->action],
    				      rate_tlv, NULL, ovr, TCA_ACT_BIND,
- 					      exts->actions, &attr_size,
- 					      rtnl_held, extack);
+ 					      exts->actions, init_res,
+ 					      &attr_size, rtnl_held, extack);
    		if (err < 0)
    			return err;
    		exts->nr_actions = err;
@@@ -3662,9 -3662,6 +3662,9 @@@ int tc_setup_flow_action(struct flow_ac
    		entry->police.burst = tcf_police_burst(act);
    		entry->police.rate_bytes_ps =
    			tcf_police_rate_bytes_ps(act);
 +			entry->police.burst_pkt = tcf_police_burst_pkt(act);
 +			entry->police.rate_pkt_ps =
 +				tcf_police_rate_pkt_ps(act);
    		entry->police.mtu = tcf_police_tcfp_mtu(act);
    		entry->police.index = act->tcfa_index;
    	} else if (is_tcf_ct(act)) {
diff --combined net/tipc/crypto.c
index 76b8428c94a7,97710ce36047..e5c43d4d5a75
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@@ -317,7 -317,7 +317,7 @@@ static int tipc_aead_key_generate(struc
#define tipc_aead_rcu_replace(rcu_ptr, ptr, lock)			\
  do {									\
 -	typeof(rcu_ptr) __tmp = rcu_dereference_protected((rcu_ptr),	\
 +	struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr),	\
    					lockdep_is_held(lock));	\
    rcu_assign_pointer((rcu_ptr), (ptr));				\
    tipc_aead_put(__tmp);						\
@@@ -798,7 -798,7 +798,7 @@@ static int tipc_aead_encrypt(struct tip
    ehdr = (struct tipc_ehdr *)skb->data;
    salt = aead->salt;
    if (aead->mode == CLUSTER_KEY)
 -		salt ^= ehdr->addr; /* __be32 */
 +		salt ^= __be32_to_cpu(ehdr->addr);
    else if (__dnode)
    	salt ^= tipc_node_get_addr(__dnode);
    memcpy(iv, &salt, 4);
@@@ -929,7 -929,7 +929,7 @@@ static int tipc_aead_decrypt(struct ne
    ehdr = (struct tipc_ehdr *)skb->data;
    salt = aead->salt;
    if (aead->mode == CLUSTER_KEY)
 -		salt ^= ehdr->addr; /* __be32 */
 +		salt ^= __be32_to_cpu(ehdr->addr);
    else if (ehdr->destined)
    	salt ^= tipc_own_addr(net);
    memcpy(iv, &salt, 4);
@@@ -1492,8 -1492,6 +1492,8 @@@ int tipc_crypto_start(struct tipc_crypt
    /* Allocate statistic structure */
    c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC);
    if (!c->stats) {
 +		if (c->wq)
 +			destroy_workqueue(c->wq);
    	kfree_sensitive(c);
    	return -ENOMEM;
    }
@@@ -1943,21 -1941,22 +1943,22 @@@ static void tipc_crypto_rcv_complete(st
    		goto rcv;
    	if (tipc_aead_clone(&tmp, aead) < 0)
    		goto rcv;
+ 		WARN_ON(!refcount_inc_not_zero(&tmp->refcnt));
    	if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) {
    		tipc_aead_free(&tmp->rcu);
    		goto rcv;
    	}
    	tipc_aead_put(aead);
- 		aead = tipc_aead_get((struct tipc_aead __force __rcu *)tmp);
+ 		aead = tmp;
    }
if (unlikely(err)) {
 -		tipc_aead_users_dec(aead, INT_MIN);
 +		tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN);
    	goto free_skb;
    }
/* Set the RX key's user */
 -	tipc_aead_users_set(aead, 1);
 +	tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1);
/* Mark this point, RX works */
    rx->timer1 = jiffies;
diff --combined net/tipc/net.c
index 3f927949bb23,faf6bf554514..a130195af188
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@@ -89,7 -89,7 +89,7 @@@
   *     - A spin lock to protect the registry of kernel/driver users (reg.c)
   *     - A global spin_lock (tipc_port_lock), which only task is to ensure
   *       consistency where more than one port is involved in an operation,
-  *       i.e., whe a port is part of a linked list of ports.
+  *       i.e., when a port is part of a linked list of ports.
   *       There are two such lists; 'port_list', which is used for management,
   *       and 'wait_list', which is used to queue ports during congestion.
   *
@@@ -125,11 -125,6 +125,11 @@@ int tipc_net_init(struct net *net, u8 *
  static void tipc_net_finalize(struct net *net, u32 addr)
  {
    struct tipc_net *tn = tipc_net(net);
 +	struct tipc_socket_addr sk = {0, addr};
 +	struct tipc_uaddr ua;
 +
 +	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_CLUSTER_SCOPE,
 +		   TIPC_NODE_STATE, addr, addr);
if (cmpxchg(&tn->node_addr, 0, addr))
    	return;
@@@ -137,7 -132,8 +137,7 @@@
    tipc_named_reinit(net);
    tipc_sk_reinit(net);
    tipc_mon_reinit_self(net);
 -	tipc_nametbl_publish(net, TIPC_NODE_STATE, addr, addr,
 -			     TIPC_CLUSTER_SCOPE, 0, addr);
 +	tipc_nametbl_publish(net, &ua, &sk, addr);
  }
void tipc_net_finalize_work(struct work_struct *work)
diff --combined net/tipc/node.c
index 707d0dc71fad,e0ee83263a39..8217905348f4
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@@ -372,49 -372,42 +372,49 @@@ static struct tipc_node *tipc_node_find
  }
static void tipc_node_read_lock(struct tipc_node *n)
 +	__acquires(n->lock)
  {
    read_lock_bh(&n->lock);
  }
static void tipc_node_read_unlock(struct tipc_node *n)
 +	__releases(n->lock)
  {
    read_unlock_bh(&n->lock);
  }
static void tipc_node_write_lock(struct tipc_node *n)
 +	__acquires(n->lock)
  {
    write_lock_bh(&n->lock);
  }
static void tipc_node_write_unlock_fast(struct tipc_node *n)
 +	__releases(n->lock)
  {
    write_unlock_bh(&n->lock);
  }
static void tipc_node_write_unlock(struct tipc_node *n)
 +	__releases(n->lock)
  {
 +	struct tipc_socket_addr sk;
    struct net *net = n->net;
 -	u32 addr = 0;
    u32 flags = n->action_flags;
 -	u32 link_id = 0;
 -	u32 bearer_id;
    struct list_head *publ_list;
 +	struct tipc_uaddr ua;
 +	u32 bearer_id;
if (likely(!flags)) {
    	write_unlock_bh(&n->lock);
    	return;
    }
-	addr = n->addr;
 -	link_id = n->link_id;
 -	bearer_id = link_id & 0xffff;
 +	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
 +		   TIPC_LINK_STATE, n->addr, n->addr);
 +	sk.ref = n->link_id;
 +	sk.node = n->addr;
 +	bearer_id = n->link_id & 0xffff;
    publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@@ -423,18 -416,20 +423,18 @@@
    write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
 -		tipc_publ_notify(net, publ_list, addr, n->capabilities);
 +		tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
 -		tipc_named_node_up(net, addr, n->capabilities);
 +		tipc_named_node_up(net, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
 -		tipc_mon_peer_up(net, addr, bearer_id);
 -		tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
 -				     TIPC_NODE_SCOPE, link_id, link_id);
 +		tipc_mon_peer_up(net, n->addr, bearer_id);
 +		tipc_nametbl_publish(net, &ua, &sk, n->link_id);
    }
    if (flags & TIPC_NOTIFY_LINK_DOWN) {
 -		tipc_mon_peer_down(net, addr, bearer_id);
 -		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
 -				      addr, link_id);
 +		tipc_mon_peer_down(net, n->addr, bearer_id);
 +		tipc_nametbl_withdraw(net, &ua, &sk, n->link_id);
    }
  }
@@@ -1739,7 -1734,7 +1739,7 @@@ int tipc_node_xmit(struct net *net, str
  }
/* tipc_node_xmit_skb(): send single buffer to destination
-  * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE
+  * Buffers sent via this function are generally TIPC_SYSTEM_IMPORTANCE
   * messages, which will not be rejected
   * The only exception is datagram messages rerouted after secondary
   * lookup, which are rare and safe to dispose of anyway.
@@@ -2014,7 -2009,7 +2014,7 @@@ static bool tipc_node_check_state(struc
    	return true;
    }
-	/* No synching needed if only one link */
 +	/* No syncing needed if only one link */
    if (!pl || !tipc_link_is_up(pl))
    	return true;
diff --combined net/tipc/socket.c
index f21162aa0cf7,022999e0202d..58935cd0d068
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@@ -3,7 -3,7 +3,7 @@@
   *
   * Copyright (c) 2001-2007, 2012-2019, Ericsson AB
   * Copyright (c) 2004-2008, 2010-2013, Wind River Systems
 - * Copyright (c) 2020, Red Hat Inc
 + * Copyright (c) 2020-2021, Red Hat Inc
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@@ -111,6 -111,7 +111,6 @@@ struct tipc_sock 
    struct sock sk;
    u32 conn_type;
    u32 conn_instance;
 -	int published;
    u32 max_pkt;
    u32 maxnagle;
    u32 portid;
@@@ -140,7 -141,6 +140,7 @@@
    bool expect_ack;
    bool nodelay;
    bool group_is_open;
 +	bool published;
  };
static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb);
@@@ -151,8 -151,10 +151,8 @@@ static int tipc_release(struct socket *
  static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
    	       bool kern);
  static void tipc_sk_timeout(struct timer_list *t);
 -static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 -			   struct tipc_service_range const *seq);
 -static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 -			    struct tipc_service_range const *seq);
 +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua);
 +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua);
  static int tipc_sk_leave(struct tipc_sock *tsk);
  static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid);
  static int tipc_sk_insert(struct tipc_sock *tsk);
@@@ -642,7 -644,7 +642,7 @@@ static int tipc_release(struct socket *
    __tipc_shutdown(sock, TIPC_ERR_NO_PORT);
    sk->sk_shutdown = SHUTDOWN_MASK;
    tipc_sk_leave(tsk);
 -	tipc_sk_withdraw(tsk, 0, NULL);
 +	tipc_sk_withdraw(tsk, NULL);
    __skb_queue_purge(&tsk->mc_method.deferredq);
    sk_stop_timer(sk, &sk->sk_timer);
    tipc_sk_remove(tsk);
@@@ -675,31 -677,22 +675,31 @@@
   */
  static int __tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
  {
 -	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr;
 +	struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
    struct tipc_sock *tsk = tipc_sk(sock->sk);
 +	bool unbind = false;
if (unlikely(!alen))
 -		return tipc_sk_withdraw(tsk, 0, NULL);
 +		return tipc_sk_withdraw(tsk, NULL);
-	if (addr->addrtype == TIPC_SERVICE_ADDR)
 -		addr->addr.nameseq.upper = addr->addr.nameseq.lower;
 +	if (ua->addrtype == TIPC_SERVICE_ADDR) {
 +		ua->addrtype = TIPC_SERVICE_RANGE;
 +		ua->sr.upper = ua->sr.lower;
 +	}
 +	if (ua->scope < 0) {
 +		unbind = true;
 +		ua->scope = -ua->scope;
 +	}
 +	/* Users may still use deprecated TIPC_ZONE_SCOPE */
 +	if (ua->scope != TIPC_NODE_SCOPE)
 +		ua->scope = TIPC_CLUSTER_SCOPE;
if (tsk->group)
    	return -EACCES;
-	if (addr->scope >= 0)
 -		return tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq);
 -	else
 -		return tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
 +	if (unbind)
 +		return tipc_sk_withdraw(tsk, ua);
 +	return tipc_sk_publish(tsk, ua);
  }
int tipc_sk_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
@@@ -714,17 -707,18 +714,17 @@@
static int tipc_bind(struct socket *sock, struct sockaddr *skaddr, int alen)
  {
 -	struct sockaddr_tipc *addr = (struct sockaddr_tipc *)skaddr;
 +	struct tipc_uaddr *ua = (struct tipc_uaddr *)skaddr;
 +	u32 atype = ua->addrtype;
if (alen) {
 -		if (alen < sizeof(struct sockaddr_tipc))
 +		if (!tipc_uaddr_valid(ua, alen))
    		return -EINVAL;
 -		if (addr->family != AF_TIPC)
 +		if (atype == TIPC_SOCKET_ADDR)
    		return -EAFNOSUPPORT;
 -		if (addr->addrtype > TIPC_SERVICE_ADDR)
 -			return -EAFNOSUPPORT;
 -		if (addr->addr.nameseq.type < TIPC_RESERVED_TYPES) {
 +		if (ua->sr.type < TIPC_RESERVED_TYPES) {
    		pr_warn_once("Can't bind to reserved service type %u\n",
 -				     addr->addr.nameseq.type);
 +				     ua->sr.type);
    		return -EACCES;
    	}
    }
@@@ -832,7 -826,7 +832,7 @@@ static __poll_t tipc_poll(struct file *
  /**
   * tipc_sendmcast - send multicast message
   * @sock: socket structure
 - * @seq: destination address
 + * @ua: destination address struct
   * @msg: message to send
   * @dlen: length of data to send
   * @timeout: timeout to wait for wakeup
@@@ -840,7 -834,7 +840,7 @@@
   * Called from function tipc_sendmsg(), which has done all sanity checks
   * Return: the number of bytes sent on success, or errno
   */
 -static int tipc_sendmcast(struct  socket *sock, struct tipc_service_range *seq,
 +static int tipc_sendmcast(struct  socket *sock, struct tipc_uaddr *ua,
    		  struct msghdr *msg, size_t dlen, long timeout)
  {
    struct sock *sk = sock->sk;
@@@ -848,6 -842,7 +848,6 @@@
    struct tipc_msg *hdr = &tsk->phdr;
    struct net *net = sock_net(sk);
    int mtu = tipc_bcast_get_mtu(net);
 -	struct tipc_mc_method *method = &tsk->mc_method;
    struct sk_buff_head pkts;
    struct tipc_nlist dsts;
    int rc;
@@@ -862,7 -857,8 +862,7 @@@
/* Lookup destination nodes */
    tipc_nlist_init(&dsts, tipc_own_addr(net));
 -	tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower,
 -				      seq->upper, &dsts);
 +	tipc_nametbl_lookup_mcast_nodes(net, ua, &dsts);
    if (!dsts.local && !dsts.remote)
    	return -EHOSTUNREACH;
@@@ -872,9 -868,9 +872,9 @@@
    msg_set_lookup_scope(hdr, TIPC_CLUSTER_SCOPE);
    msg_set_destport(hdr, 0);
    msg_set_destnode(hdr, 0);
 -	msg_set_nametype(hdr, seq->type);
 -	msg_set_namelower(hdr, seq->lower);
 -	msg_set_nameupper(hdr, seq->upper);
 +	msg_set_nametype(hdr, ua->sr.type);
 +	msg_set_namelower(hdr, ua->sr.lower);
 +	msg_set_nameupper(hdr, ua->sr.upper);
/* Build message as chain of buffers */
    __skb_queue_head_init(&pkts);
@@@ -884,7 -880,7 +884,7 @@@
    if (unlikely(rc == dlen)) {
    	trace_tipc_sk_sendmcast(sk, skb_peek(&pkts),
    				TIPC_DUMP_SK_SNDQ, " ");
 -		rc = tipc_mcast_xmit(net, &pkts, method, &dsts,
 +		rc = tipc_mcast_xmit(net, &pkts, &tsk->mc_method, &dsts,
    			     &tsk->cong_link_cnt);
    }
@@@ -958,7 -954,7 +958,7 @@@ static int tipc_send_group_unicast(stru
    			   int dlen, long timeout)
  {
    struct sock *sk = sock->sk;
 -	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 +	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
    int blks = tsk_blocks(GROUP_H_SIZE + dlen);
    struct tipc_sock *tsk = tipc_sk(sk);
    struct net *net = sock_net(sk);
@@@ -966,8 -962,8 +966,8 @@@
    u32 node, port;
    int rc;
-	node = dest->addr.id.node;
 -	port = dest->addr.id.ref;
 +	node = ua->sk.node;
 +	port = ua->sk.ref;
    if (!port && !node)
    	return -EHOSTUNREACH;
@@@ -1001,7 -997,7 +1001,7 @@@
  static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m,
    			   int dlen, long timeout)
  {
 -	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 +	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
    struct sock *sk = sock->sk;
    struct tipc_sock *tsk = tipc_sk(sk);
    struct list_head *cong_links = &tsk->cong_links;
@@@ -1012,13 -1008,16 +1012,13 @@@
    struct net *net = sock_net(sk);
    u32 node, port, exclude;
    struct list_head dsts;
 -	u32 type, inst, scope;
    int lookups = 0;
    int dstcnt, rc;
    bool cong;
INIT_LIST_HEAD(&dsts);
 -
 -	type = msg_nametype(hdr);
 -	inst = dest->addr.name.name.instance;
 -	scope = msg_lookup_scope(hdr);
 +	ua->sa.type = msg_nametype(hdr);
 +	ua->scope = msg_lookup_scope(hdr);
while (++lookups < 4) {
    	exclude = tipc_group_exclude(tsk->group);
@@@ -1027,8 -1026,8 +1027,8 @@@
/* Look for a non-congested destination member, if any */
    	while (1) {
 -			if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
 -						 &dstcnt, exclude, false))
 +			if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt,
 +						       exclude, false))
    			return -EHOSTUNREACH;
    		tipc_dest_pop(&dsts, &node, &port);
    		cong = tipc_group_cong(tsk->group, node, port, blks,
@@@ -1083,7 -1082,7 +1083,7 @@@
  static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m,
    			 int dlen, long timeout)
  {
 -	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 +	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
    struct sock *sk = sock->sk;
    struct net *net = sock_net(sk);
    struct tipc_sock *tsk = tipc_sk(sk);
@@@ -1108,9 -1107,9 +1108,9 @@@
    	return -EHOSTUNREACH;
/* Complete message header */
 -	if (dest) {
 +	if (ua) {
    	msg_set_type(hdr, TIPC_GRP_MCAST_MSG);
 -		msg_set_nameinst(hdr, dest->addr.name.name.instance);
 +		msg_set_nameinst(hdr, ua->sa.instance);
    } else {
    	msg_set_type(hdr, TIPC_GRP_BCAST_MSG);
    	msg_set_nameinst(hdr, 0);
@@@ -1157,25 -1156,29 +1157,25 @@@
  static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m,
    			 int dlen, long timeout)
  {
 +	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
    struct sock *sk = sock->sk;
 -	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
    struct tipc_sock *tsk = tipc_sk(sk);
    struct tipc_group *grp = tsk->group;
    struct tipc_msg *hdr = &tsk->phdr;
    struct net *net = sock_net(sk);
 -	u32 type, inst, scope, exclude;
    struct list_head dsts;
 -	u32 dstcnt;
 +	u32 dstcnt, exclude;
INIT_LIST_HEAD(&dsts);
 -
 -	type = msg_nametype(hdr);
 -	inst = dest->addr.name.name.instance;
 -	scope = msg_lookup_scope(hdr);
 +	ua->sa.type = msg_nametype(hdr);
 +	ua->scope = msg_lookup_scope(hdr);
    exclude = tipc_group_exclude(grp);
-	if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
 -				 &dstcnt, exclude, true))
 +	if (!tipc_nametbl_lookup_group(net, ua, &dsts, &dstcnt, exclude, true))
    	return -EHOSTUNREACH;
if (dstcnt == 1) {
 -		tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref);
 +		tipc_dest_pop(&dsts, &ua->sk.node, &ua->sk.ref);
    	return tipc_send_group_unicast(sock, m, dlen, timeout);
    }
@@@ -1195,18 -1198,17 +1195,18 @@@ void tipc_sk_mcast_rcv(struct net *net
    	       struct sk_buff_head *inputq)
  {
    u32 self = tipc_own_addr(net);
 -	u32 type, lower, upper, scope;
    struct sk_buff *skb, *_skb;
    u32 portid, onode;
    struct sk_buff_head tmpq;
    struct list_head dports;
    struct tipc_msg *hdr;
 +	struct tipc_uaddr ua;
    int user, mtyp, hlen;
    bool exact;
__skb_queue_head_init(&tmpq);
    INIT_LIST_HEAD(&dports);
 +	ua.addrtype = TIPC_SERVICE_RANGE;
skb = tipc_skb_peek(arrvq, &inputq->lock);
    for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
@@@ -1215,7 -1217,7 +1215,7 @@@
    	mtyp = msg_type(hdr);
    	hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
    	onode = msg_orignode(hdr);
 -		type = msg_nametype(hdr);
 +		ua.sr.type = msg_nametype(hdr);
if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
    		spin_lock_bh(&inputq->lock);
@@@ -1230,23 -1232,24 +1230,23 @@@
/* Group messages require exact scope match */
    	if (msg_in_group(hdr)) {
 -			lower = 0;
 -			upper = ~0;
 -			scope = msg_lookup_scope(hdr);
 +			ua.sr.lower = 0;
 +			ua.sr.upper = ~0;
 +			ua.scope = msg_lookup_scope(hdr);
    		exact = true;
    	} else {
    		/* TIPC_NODE_SCOPE means "any scope" in this context */
    		if (onode == self)
 -				scope = TIPC_NODE_SCOPE;
 +				ua.scope = TIPC_NODE_SCOPE;
    		else
 -				scope = TIPC_CLUSTER_SCOPE;
 +				ua.scope = TIPC_CLUSTER_SCOPE;
    		exact = false;
 -			lower = msg_namelower(hdr);
 -			upper = msg_nameupper(hdr);
 +			ua.sr.lower = msg_namelower(hdr);
 +			ua.sr.upper = msg_nameupper(hdr);
    	}
/* Create destination port list: */
 -		tipc_nametbl_mc_lookup(net, type, lower, upper,
 -				       scope, exact, &dports);
 +		tipc_nametbl_lookup_mcast_sockets(net, &ua, exact, &dports);
/* Clone message per destination */
    	while (tipc_dest_pop(&dports, NULL, &portid)) {
@@@ -1262,7 -1265,7 +1262,7 @@@
    	spin_lock_bh(&inputq->lock);
    	if (skb_peek(arrvq) == skb) {
    		skb_queue_splice_tail_init(&tmpq, inputq);
- 			kfree_skb(__skb_dequeue(arrvq));
+ 			__skb_dequeue(arrvq);
    	}
    	spin_unlock_bh(&inputq->lock);
    	__skb_queue_purge(&tmpq);
@@@ -1414,43 -1417,44 +1414,43 @@@ static int __tipc_sendmsg(struct socke
    struct sock *sk = sock->sk;
    struct net *net = sock_net(sk);
    struct tipc_sock *tsk = tipc_sk(sk);
 -	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
 +	struct tipc_uaddr *ua = (struct tipc_uaddr *)m->msg_name;
    long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
    struct list_head *clinks = &tsk->cong_links;
    bool syn = !tipc_sk_type_connectionless(sk);
    struct tipc_group *grp = tsk->group;
    struct tipc_msg *hdr = &tsk->phdr;
 -	struct tipc_service_range *seq;
 +	struct tipc_socket_addr skaddr;
    struct sk_buff_head pkts;
 -	u32 dport = 0, dnode = 0;
 -	u32 type = 0, inst = 0;
 -	int mtu, rc;
 +	int atype, mtu, rc;
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
    	return -EMSGSIZE;
-	if (likely(dest)) {
 -		if (unlikely(m->msg_namelen < sizeof(*dest)))
 -			return -EINVAL;
 -		if (unlikely(dest->family != AF_TIPC))
 +	if (ua) {
 +		if (!tipc_uaddr_valid(ua, m->msg_namelen))
    		return -EINVAL;
 +		 atype = ua->addrtype;
    }
+	/* If socket belongs to a communication group follow other paths */
    if (grp) {
 -		if (!dest)
 +		if (!ua)
    		return tipc_send_group_bcast(sock, m, dlen, timeout);
 -		if (dest->addrtype == TIPC_SERVICE_ADDR)
 +		if (atype == TIPC_SERVICE_ADDR)
    		return tipc_send_group_anycast(sock, m, dlen, timeout);
 -		if (dest->addrtype == TIPC_SOCKET_ADDR)
 +		if (atype == TIPC_SOCKET_ADDR)
    		return tipc_send_group_unicast(sock, m, dlen, timeout);
 -		if (dest->addrtype == TIPC_ADDR_MCAST)
 +		if (atype == TIPC_SERVICE_RANGE)
    		return tipc_send_group_mcast(sock, m, dlen, timeout);
    	return -EINVAL;
    }
-	if (unlikely(!dest)) {
 -		dest = &tsk->peer;
 -		if (!syn && dest->family != AF_TIPC)
 +	if (!ua) {
 +		ua = (struct tipc_uaddr *)&tsk->peer;
 +		if (!syn && ua->family != AF_TIPC)
    		return -EDESTADDRREQ;
 +		atype = ua->addrtype;
    }
if (unlikely(syn)) {
@@@ -1460,51 -1464,54 +1460,51 @@@
    		return -EISCONN;
    	if (tsk->published)
    		return -EOPNOTSUPP;
 -		if (dest->addrtype == TIPC_SERVICE_ADDR) {
 -			tsk->conn_type = dest->addr.name.name.type;
 -			tsk->conn_instance = dest->addr.name.name.instance;
 +		if (atype == TIPC_SERVICE_ADDR) {
 +			tsk->conn_type = ua->sa.type;
 +			tsk->conn_instance = ua->sa.instance;
    	}
    	msg_set_syn(hdr, 1);
    }
-	seq = &dest->addr.nameseq;
 -	if (dest->addrtype == TIPC_ADDR_MCAST)
 -		return tipc_sendmcast(sock, seq, m, dlen, timeout);
 -
 -	if (dest->addrtype == TIPC_SERVICE_ADDR) {
 -		type = dest->addr.name.name.type;
 -		inst = dest->addr.name.name.instance;
 -		dnode = dest->addr.name.domain;
 -		dport = tipc_nametbl_translate(net, type, inst, &dnode);
 -		if (unlikely(!dport && !dnode))
 +	/* Determine destination */
 +	if (atype == TIPC_SERVICE_RANGE) {
 +		return tipc_sendmcast(sock, ua, m, dlen, timeout);
 +	} else if (atype == TIPC_SERVICE_ADDR) {
 +		skaddr.node = ua->lookup_node;
 +		ua->scope = tipc_node2scope(skaddr.node);
 +		if (!tipc_nametbl_lookup_anycast(net, ua, &skaddr))
    		return -EHOSTUNREACH;
 -	} else if (dest->addrtype == TIPC_SOCKET_ADDR) {
 -		dnode = dest->addr.id.node;
 +	} else if (atype == TIPC_SOCKET_ADDR) {
 +		skaddr = ua->sk;
    } else {
    	return -EINVAL;
    }
/* Block or return if destination link is congested */
    rc = tipc_wait_for_cond(sock, &timeout,
 -				!tipc_dest_find(clinks, dnode, 0));
 +				!tipc_dest_find(clinks, skaddr.node, 0));
    if (unlikely(rc))
    	return rc;
-	if (dest->addrtype == TIPC_SERVICE_ADDR) {
 +	/* Finally build message header */
 +	msg_set_destnode(hdr, skaddr.node);
 +	msg_set_destport(hdr, skaddr.ref);
 +	if (atype == TIPC_SERVICE_ADDR) {
    	msg_set_type(hdr, TIPC_NAMED_MSG);
    	msg_set_hdr_sz(hdr, NAMED_H_SIZE);
 -		msg_set_nametype(hdr, type);
 -		msg_set_nameinst(hdr, inst);
 -		msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
 -		msg_set_destnode(hdr, dnode);
 -		msg_set_destport(hdr, dport);
 +		msg_set_nametype(hdr, ua->sa.type);
 +		msg_set_nameinst(hdr, ua->sa.instance);
 +		msg_set_lookup_scope(hdr, ua->scope);
    } else { /* TIPC_SOCKET_ADDR */
    	msg_set_type(hdr, TIPC_DIRECT_MSG);
    	msg_set_lookup_scope(hdr, 0);
 -		msg_set_destnode(hdr, dnode);
 -		msg_set_destport(hdr, dest->addr.id.ref);
    	msg_set_hdr_sz(hdr, BASIC_H_SIZE);
    }
+	/* Add message body */
    __skb_queue_head_init(&pkts);
 -	mtu = tipc_node_get_mtu(net, dnode, tsk->portid, true);
 +	mtu = tipc_node_get_mtu(net, skaddr.node, tsk->portid, true);
    rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
    if (unlikely(rc != dlen))
    	return rc;
@@@ -1513,11 -1520,10 +1513,11 @@@
    	return -ENOMEM;
    }
+	/* Send message */
    trace_tipc_sk_sendmsg(sk, skb_peek(&pkts), TIPC_DUMP_SK_SNDQ, " ");
 -	rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid);
 +	rc = tipc_node_xmit(net, &pkts, skaddr.node, tsk->portid);
    if (unlikely(rc == -ELINKCONG)) {
 -		tipc_dest_push(clinks, dnode, 0);
 +		tipc_dest_push(clinks, skaddr.node, 0);
    	tsk->cong_link_cnt++;
    	rc = 0;
    }
@@@ -2885,62 -2891,66 +2885,62 @@@ static void tipc_sk_timeout(struct time
    sock_put(sk);
  }
-static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
 -			   struct tipc_service_range const *seq)
 +static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua)
  {
    struct sock *sk = &tsk->sk;
    struct net *net = sock_net(sk);
 -	struct publication *publ;
 +	struct tipc_socket_addr skaddr;
 +	struct publication *p;
    u32 key;
-	if (scope != TIPC_NODE_SCOPE)
 -		scope = TIPC_CLUSTER_SCOPE;
 -
    if (tipc_sk_connected(sk))
    	return -EINVAL;
    key = tsk->portid + tsk->pub_count + 1;
    if (key == tsk->portid)
    	return -EADDRINUSE;
 -
 -	publ = tipc_nametbl_publish(net, seq->type, seq->lower, seq->upper,
 -				    scope, tsk->portid, key);
 -	if (unlikely(!publ))
 +	skaddr.ref = tsk->portid;
 +	skaddr.node = tipc_own_addr(net);
 +	p = tipc_nametbl_publish(net, ua, &skaddr, key);
 +	if (unlikely(!p))
    	return -EINVAL;
-	list_add(&publ->binding_sock, &tsk->publications);
 +	list_add(&p->binding_sock, &tsk->publications);
    tsk->pub_count++;
 -	tsk->published = 1;
 +	tsk->published = true;
    return 0;
  }
-static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
 -			    struct tipc_service_range const *seq)
 +static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua)
  {
    struct net *net = sock_net(&tsk->sk);
 -	struct publication *publ;
 -	struct publication *safe;
 +	struct publication *safe, *p;
 +	struct tipc_uaddr _ua;
    int rc = -EINVAL;
-	if (scope != TIPC_NODE_SCOPE)
 -		scope = TIPC_CLUSTER_SCOPE;
 -
 -	list_for_each_entry_safe(publ, safe, &tsk->publications, binding_sock) {
 -		if (seq) {
 -			if (publ->scope != scope)
 -				continue;
 -			if (publ->type != seq->type)
 -				continue;
 -			if (publ->lower != seq->lower)
 -				continue;
 -			if (publ->upper != seq->upper)
 -				break;
 -			tipc_nametbl_withdraw(net, publ->type, publ->lower,
 -					      publ->upper, publ->key);
 -			rc = 0;
 -			break;
 +	list_for_each_entry_safe(p, safe, &tsk->publications, binding_sock) {
 +		if (!ua) {
 +			tipc_uaddr(&_ua, TIPC_SERVICE_RANGE, p->scope,
 +				   p->sr.type, p->sr.lower, p->sr.upper);
 +			tipc_nametbl_withdraw(net, &_ua, &p->sk, p->key);
 +			continue;
    	}
 -		tipc_nametbl_withdraw(net, publ->type, publ->lower,
 -				      publ->upper, publ->key);
 +		/* Unbind specific publication */
 +		if (p->scope != ua->scope)
 +			continue;
 +		if (p->sr.type != ua->sr.type)
 +			continue;
 +		if (p->sr.lower != ua->sr.lower)
 +			continue;
 +		if (p->sr.upper != ua->sr.upper)
 +			break;
 +		tipc_nametbl_withdraw(net, ua, &p->sk, p->key);
    	rc = 0;
 +		break;
    }
 -	if (list_empty(&tsk->publications))
 +	if (list_empty(&tsk->publications)) {
    	tsk->published = 0;
 +		rc = 0;
 +	}
    return rc;
  }
@@@ -3057,15 -3067,13 +3057,15 @@@ static int tipc_sk_join(struct tipc_soc
    struct net *net = sock_net(&tsk->sk);
    struct tipc_group *grp = tsk->group;
    struct tipc_msg *hdr = &tsk->phdr;
 -	struct tipc_service_range seq;
 +	struct tipc_uaddr ua;
    int rc;
if (mreq->type < TIPC_RESERVED_TYPES)
    	return -EACCES;
    if (mreq->scope > TIPC_NODE_SCOPE)
    	return -EINVAL;
 +	if (mreq->scope != TIPC_NODE_SCOPE)
 +		mreq->scope = TIPC_CLUSTER_SCOPE;
    if (grp)
    	return -EACCES;
    grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open);
@@@ -3075,10 -3083,11 +3075,10 @@@
    msg_set_lookup_scope(hdr, mreq->scope);
    msg_set_nametype(hdr, mreq->type);
    msg_set_dest_droppable(hdr, true);
 -	seq.type = mreq->type;
 -	seq.lower = mreq->instance;
 -	seq.upper = seq.lower;
 -	tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
 -	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
 +	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, mreq->scope,
 +		   mreq->type, mreq->instance, mreq->instance);
 +	tipc_nametbl_build_group(net, grp, &ua);
 +	rc = tipc_sk_publish(tsk, &ua);
    if (rc) {
    	tipc_group_delete(net, grp);
    	tsk->group = NULL;
@@@ -3095,17 -3104,15 +3095,17 @@@ static int tipc_sk_leave(struct tipc_so
  {
    struct net *net = sock_net(&tsk->sk);
    struct tipc_group *grp = tsk->group;
 -	struct tipc_service_range seq;
 +	struct tipc_uaddr ua;
    int scope;
if (!grp)
    	return -EINVAL;
 -	tipc_group_self(grp, &seq, &scope);
 +	ua.addrtype = TIPC_SERVICE_RANGE;
 +	tipc_group_self(grp, &ua.sr, &scope);
 +	ua.scope = scope;
    tipc_group_delete(net, grp);
    tsk->group = NULL;
 -	tipc_sk_withdraw(tsk, scope, &seq);
 +	tipc_sk_withdraw(tsk, &ua);
    return 0;
  }
@@@ -3704,11 -3711,11 +3704,11 @@@ static int __tipc_nl_add_sk_publ(struc
if (nla_put_u32(skb, TIPC_NLA_PUBL_KEY, publ->key))
    	goto attr_msg_cancel;
 -	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->type))
 +	if (nla_put_u32(skb, TIPC_NLA_PUBL_TYPE, publ->sr.type))
    	goto attr_msg_cancel;
 -	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->lower))
 +	if (nla_put_u32(skb, TIPC_NLA_PUBL_LOWER, publ->sr.lower))
    	goto attr_msg_cancel;
 -	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->upper))
 +	if (nla_put_u32(skb, TIPC_NLA_PUBL_UPPER, publ->sr.upper))
    	goto attr_msg_cancel;
nla_nest_end(skb, attrs);
@@@ -3856,9 -3863,9 +3856,9 @@@ bool tipc_sk_filtering(struct sock *sk
    	p = list_first_entry_or_null(&tsk->publications,
    				     struct publication, binding_sock);
    	if (p) {
 -			type = p->type;
 -			lower = p->lower;
 -			upper = p->upper;
 +			type = p->sr.type;
 +			lower = p->sr.lower;
 +			upper = p->sr.upper;
    	}
    }
@@@ -3957,9 -3964,9 +3957,9 @@@ int tipc_sk_dump(struct sock *sk, u16 d
    if (tsk->published) {
    	p = list_first_entry_or_null(&tsk->publications,
    				     struct publication, binding_sock);
 -		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->type : 0);
 -		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->lower : 0);
 -		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->upper : 0);
 +		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.type : 0);
 +		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.lower : 0);
 +		i += scnprintf(buf + i, sz - i, " %u", (p) ? p->sr.upper : 0);
    }
    i += scnprintf(buf + i, sz - i, " | %u", tsk->snd_win);
    i += scnprintf(buf + i, sz - i, " %u", tsk->rcv_win);
diff --combined tools/lib/bpf/xsk.c
index 95da0e19f4a5,d24b5cc720ec..cea62cc3e456
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@@ -28,7 -28,6 +28,7 @@@
  #include <sys/mman.h>
  #include <sys/socket.h>
  #include <sys/types.h>
 +#include <linux/if_link.h>
#include "bpf.h"
  #include "libbpf.h"
@@@ -60,6 -59,8 +60,8 @@@ struct xsk_umem 
    int fd;
    int refcount;
    struct list_head ctx_list;
+ 	bool rx_ring_setup_done;
+ 	bool tx_ring_setup_done;
  };
struct xsk_ctx {
@@@ -71,10 -72,8 +73,10 @@@
    int ifindex;
    struct list_head list;
    int prog_fd;
 +	int link_fd;
    int xsks_map_fd;
    char ifname[IFNAMSIZ];
 +	bool has_bpf_link;
  };
struct xsk_socket {
@@@ -412,7 -411,7 +414,7 @@@ static int xsk_load_xdp_prog(struct xsk
    static const int log_buf_size = 16 * 1024;
    struct xsk_ctx *ctx = xsk->ctx;
    char log_buf[log_buf_size];
 -	int err, prog_fd;
 +	int prog_fd;
/* This is the fallback C-program:
     * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
@@@ -502,41 -501,14 +504,41 @@@
    	return prog_fd;
    }
-	err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
 -				  xsk->config.xdp_flags);
 +	ctx->prog_fd = prog_fd;
 +	return 0;
 +}
 +
 +static int xsk_create_bpf_link(struct xsk_socket *xsk)
 +{
 +	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
 +	struct xsk_ctx *ctx = xsk->ctx;
 +	__u32 prog_id = 0;
 +	int link_fd;
 +	int err;
 +
 +	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
    if (err) {
 -		close(prog_fd);
 +		pr_warn("getting XDP prog id failed\n");
    	return err;
    }
-	ctx->prog_fd = prog_fd;
 +	/* if there's a netlink-based XDP prog loaded on interface, bail out
 +	 * and ask user to do the removal by himself
 +	 */
 +	if (prog_id) {
 +		pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
 +		return -EINVAL;
 +	}
 +
 +	opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
 +
 +	link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
 +	if (link_fd < 0) {
 +		pr_warn("bpf_link_create failed: %s\n", strerror(errno));
 +		return link_fd;
 +	}
 +
 +	ctx->link_fd = link_fd;
    return 0;
  }
@@@ -655,6 -627,7 +657,6 @@@ static int xsk_lookup_bpf_maps(struct x
    	close(fd);
    }
-	err = 0;
    if (ctx->xsks_map_fd == -1)
    	err = -ENOENT;
@@@ -671,98 -644,6 +673,98 @@@ static int xsk_set_bpf_maps(struct xsk_
    			   &xsk->fd, 0);
  }
+static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
 +{
 +	struct bpf_link_info link_info;
 +	__u32 link_len;
 +	__u32 id = 0;
 +	int err;
 +	int fd;
 +
 +	while (true) {
 +		err = bpf_link_get_next_id(id, &id);
 +		if (err) {
 +			if (errno == ENOENT) {
 +				err = 0;
 +				break;
 +			}
 +			pr_warn("can't get next link: %s\n", strerror(errno));
 +			break;
 +		}
 +
 +		fd = bpf_link_get_fd_by_id(id);
 +		if (fd < 0) {
 +			if (errno == ENOENT)
 +				continue;
 +			pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
 +			err = -errno;
 +			break;
 +		}
 +
 +		link_len = sizeof(struct bpf_link_info);
 +		memset(&link_info, 0, link_len);
 +		err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
 +		if (err) {
 +			pr_warn("can't get link info: %s\n", strerror(errno));
 +			close(fd);
 +			break;
 +		}
 +		if (link_info.type == BPF_LINK_TYPE_XDP) {
 +			if (link_info.xdp.ifindex == ifindex) {
 +				*link_fd = fd;
 +				if (prog_id)
 +					*prog_id = link_info.prog_id;
 +				break;
 +			}
 +		}
 +		close(fd);
 +	}
 +
 +	return err;
 +}
 +
 +static bool xsk_probe_bpf_link(void)
 +{
 +	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
 +			    .flags = XDP_FLAGS_SKB_MODE);
 +	struct bpf_load_program_attr prog_attr;
 +	struct bpf_insn insns[2] = {
 +		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
 +		BPF_EXIT_INSN()
 +	};
 +	int prog_fd, link_fd = -1;
 +	int ifindex_lo = 1;
 +	bool ret = false;
 +	int err;
 +
 +	err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
 +	if (err)
 +		return ret;
 +
 +	if (link_fd >= 0)
 +		return true;
 +
 +	memset(&prog_attr, 0, sizeof(prog_attr));
 +	prog_attr.prog_type = BPF_PROG_TYPE_XDP;
 +	prog_attr.insns = insns;
 +	prog_attr.insns_cnt = ARRAY_SIZE(insns);
 +	prog_attr.license = "GPL";
 +
 +	prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
 +	if (prog_fd < 0)
 +		return ret;
 +
 +	link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
 +	close(prog_fd);
 +
 +	if (link_fd >= 0) {
 +		ret = true;
 +		close(link_fd);
 +	}
 +
 +	return ret;
 +}
 +
  static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
  {
    char ifname[IFNAMSIZ];
@@@ -784,108 -665,64 +786,108 @@@
    ctx->ifname[IFNAMSIZ - 1] = 0;
xsk->ctx = ctx;
 +	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
return 0;
  }
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
 -				int *xsks_map_fd)
 +static int xsk_init_xdp_res(struct xsk_socket *xsk,
 +			    int *xsks_map_fd)
  {
 -	struct xsk_socket *xsk = _xdp;
    struct xsk_ctx *ctx = xsk->ctx;
 -	__u32 prog_id = 0;
    int err;
-	err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
 -				  xsk->config.xdp_flags);
 +	err = xsk_create_bpf_maps(xsk);
    if (err)
    	return err;
-	if (!prog_id) {
 -		err = xsk_create_bpf_maps(xsk);
 -		if (err)
 -			return err;
 +	err = xsk_load_xdp_prog(xsk);
 +	if (err)
 +		goto err_load_xdp_prog;
-		err = xsk_load_xdp_prog(xsk);
 -		if (err) {
 -			goto err_load_xdp_prog;
 -		}
 -	} else {
 -		ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
 -		if (ctx->prog_fd < 0)
 -			return -errno;
 -		err = xsk_lookup_bpf_maps(xsk);
 -		if (err) {
 -			close(ctx->prog_fd);
 -			return err;
 -		}
 -	}
 +	if (ctx->has_bpf_link)
 +		err = xsk_create_bpf_link(xsk);
 +	else
 +		err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
 +					  xsk->config.xdp_flags);
-	if (xsk->rx) {
 -		err = xsk_set_bpf_maps(xsk);
 -		if (err) {
 -			if (!prog_id) {
 -				goto err_set_bpf_maps;
 -			} else {
 -				close(ctx->prog_fd);
 -				return err;
 -			}
 -		}
 -	}
 -	if (xsks_map_fd)
 -		*xsks_map_fd = ctx->xsks_map_fd;
 +	if (err)
 +		goto err_attach_xdp_prog;
-	return 0;
 +	if (!xsk->rx)
 +		return err;
 +
 +	err = xsk_set_bpf_maps(xsk);
 +	if (err)
 +		goto err_set_bpf_maps;
 +
 +	return err;
err_set_bpf_maps:
 +	if (ctx->has_bpf_link)
 +		close(ctx->link_fd);
 +	else
 +		bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
 +err_attach_xdp_prog:
    close(ctx->prog_fd);
 -	bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
  err_load_xdp_prog:
    xsk_delete_bpf_maps(xsk);
 +	return err;
 +}
 +
 +static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
 +{
 +	struct xsk_ctx *ctx = xsk->ctx;
 +	int err;
 +
 +	ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
 +	if (ctx->prog_fd < 0) {
 +		err = -errno;
 +		goto err_prog_fd;
 +	}
 +	err = xsk_lookup_bpf_maps(xsk);
 +	if (err)
 +		goto err_lookup_maps;
 +
 +	if (!xsk->rx)
 +		return err;
 +
 +	err = xsk_set_bpf_maps(xsk);
 +	if (err)
 +		goto err_set_maps;
 +
 +	return err;
 +
 +err_set_maps:
 +	close(ctx->xsks_map_fd);
 +err_lookup_maps:
 +	close(ctx->prog_fd);
 +err_prog_fd:
 +	if (ctx->has_bpf_link)
 +		close(ctx->link_fd);
 +	return err;
 +}
 +
 +static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
 +{
 +	struct xsk_socket *xsk = _xdp;
 +	struct xsk_ctx *ctx = xsk->ctx;
 +	__u32 prog_id = 0;
 +	int err;
 +
 +	if (ctx->has_bpf_link)
 +		err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
 +	else
 +		err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
 +
 +	if (err)
 +		return err;
 +
 +	err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
 +			 xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
 +
 +	if (!err && xsks_map_fd)
 +		*xsks_map_fd = ctx->xsks_map_fd;
return err;
  }
@@@ -908,26 -745,30 +910,30 @@@ static struct xsk_ctx *xsk_get_ctx(stru
    return NULL;
  }
- static void xsk_put_ctx(struct xsk_ctx *ctx)
+ static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
  {
    struct xsk_umem *umem = ctx->umem;
    struct xdp_mmap_offsets off;
    int err;
- 	if (--ctx->refcount == 0) {
- 		err = xsk_get_mmap_offsets(umem->fd, &off);
- 		if (!err) {
- 			munmap(ctx->fill->ring - off.fr.desc,
- 			       off.fr.desc + umem->config.fill_size *
- 			       sizeof(__u64));
- 			munmap(ctx->comp->ring - off.cr.desc,
- 			       off.cr.desc + umem->config.comp_size *
- 			       sizeof(__u64));
- 		}
+ 	if (--ctx->refcount)
+ 		return;
- 		list_del(&ctx->list);
- 		free(ctx);
- 	}
+ 	if (!unmap)
+ 		goto out_free;
+ 
+ 	err = xsk_get_mmap_offsets(umem->fd, &off);
+ 	if (err)
+ 		goto out_free;
+ 
+ 	munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
+ 	       sizeof(__u64));
+ 	munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
+ 	       sizeof(__u64));
+ 
+ out_free:
+ 	list_del(&ctx->list);
+ 	free(ctx);
  }
static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
@@@ -962,8 -803,6 +968,6 @@@
    memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
    ctx->ifname[IFNAMSIZ - 1] = '\0';
- 	umem->fill_save = NULL;
- 	umem->comp_save = NULL;
    ctx->fill = fill;
    ctx->comp = comp;
    list_add(&ctx->list, &umem->ctx_list);
@@@ -1019,6 -858,8 +1023,8 @@@ int xsk_socket__create_shared(struct xs
    struct xsk_socket *xsk;
    struct xsk_ctx *ctx;
    int err, ifindex;
+ 	bool unmap = umem->fill_save != fill;
+ 	bool rx_setup_done = false, tx_setup_done = false;
if (!umem || !xsk_ptr || !(rx || tx))
    	return -EFAULT;
@@@ -1046,6 -887,8 +1052,8 @@@
    	}
    } else {
    	xsk->fd = umem->fd;
+ 		rx_setup_done = umem->rx_ring_setup_done;
+ 		tx_setup_done = umem->tx_ring_setup_done;
    }
ctx = xsk_get_ctx(umem, ifindex, queue_id);
@@@ -1063,9 -906,8 +1071,9 @@@
    	}
    }
    xsk->ctx = ctx;
 +	xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
- 	if (rx) {
+ 	if (rx && !rx_setup_done) {
    	err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
    			 &xsk->config.rx_size,
    			 sizeof(xsk->config.rx_size));
@@@ -1073,8 -915,10 +1081,10 @@@
    		err = -errno;
    		goto out_put_ctx;
    	}
+ 		if (xsk->fd == umem->fd)
+ 			umem->rx_ring_setup_done = true;
    }
- 	if (tx) {
+ 	if (tx && !tx_setup_done) {
    	err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
    			 &xsk->config.tx_size,
    			 sizeof(xsk->config.tx_size));
@@@ -1082,6 -926,8 +1092,8 @@@
    		err = -errno;
    		goto out_put_ctx;
    	}
+ 		if (xsk->fd == umem->fd)
+ 			umem->rx_ring_setup_done = true;
    }
err = xsk_get_mmap_offsets(xsk->fd, &off);
@@@ -1160,6 -1006,8 +1172,8 @@@
    }
*xsk_ptr = xsk;
+ 	umem->fill_save = NULL;
+ 	umem->comp_save = NULL;
    return 0;
out_mmap_tx:
@@@ -1171,7 -1019,7 +1185,7 @@@ out_mmap_rx
    	munmap(rx_map, off.rx.desc +
    	       xsk->config.rx_size * sizeof(struct xdp_desc));
  out_put_ctx:
- 	xsk_put_ctx(ctx);
+ 	xsk_put_ctx(ctx, unmap);
  out_socket:
    if (--umem->refcount)
    	close(xsk->fd);
@@@ -1185,6 -1033,9 +1199,9 @@@ int xsk_socket__create(struct xsk_socke
    	       struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
    	       const struct xsk_socket_config *usr_config)
  {
+ 	if (!umem)
+ 		return -EFAULT;
+ 
    return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
    				 rx, tx, umem->fill_save,
    				 umem->comp_save, usr_config);
@@@ -1220,8 -1071,6 +1237,8 @@@ void xsk_socket__delete(struct xsk_sock
    if (ctx->prog_fd != -1) {
    	xsk_delete_bpf_maps(xsk);
    	close(ctx->prog_fd);
 +		if (ctx->has_bpf_link)
 +			close(ctx->link_fd);
    }
err = xsk_get_mmap_offsets(xsk->fd, &off);
@@@ -1236,7 -1085,7 +1253,7 @@@
    	}
    }
- 	xsk_put_ctx(ctx);
+ 	xsk_put_ctx(ctx, true);
umem->refcount--;
    /* Do not close an fd that also has an associated umem connected
-- 
LinuxNextTracking

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

[linux-next] LinuxNextTracking branch, master, updated. next-20210412