[linux-next] LinuxNextTracking branch, master, updated. next-20210324 - linux-merge - lists.open-mesh.org

24 Mar 2021

The following commit has been merged in the master branch:
commit 0efb8a790ef4f4ef2fc3d1c265ed125a386fcfd1
Merge: 6132ececa50042edad15794eb5cb7a2e6831e98b 4c94fe88cde4bb5c8e1baa01106c4e6db1c75738
Author: Stephen Rothwell sfr@canb.auug.org.au
Date:   Wed Mar 24 12:21:24 2021 +1100
Merge remote-tracking branch 'net-next/master'
# Conflicts:
    #       drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
    #       drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
    #       drivers/net/ethernet/pensando/ionic/ionic_txrx.c
    #       kernel/bpf/verifier.c
diff --combined MAINTAINERS
index 4941a9ba6fc3,ad214621655f..97085cb1828f

--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -261,8 -261,8 +261,8 @@@ ABI/AP
  L:	linux-api@vger.kernel.org
  F:	include/linux/syscalls.h
  F:	kernel/sys_ni.c
 -F:	include/uapi/
 -F:	arch/*/include/uapi/
 +X:	include/uapi/
 +X:	arch/*/include/uapi/
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
  M:	Hans de Goede hdegoede@redhat.com
@@@ -1181,7 -1181,7 +1181,7 @@@ M:	Joel Fernandes <joel@joelfernandes.o
  M:	Christian Brauner christian@brauner.io
  M:	Hridya Valsaraju hridya@google.com
  M:	Suren Baghdasaryan surenb@google.com
 -L:	devel@driverdev.osuosl.org
 +L:	linux-kernel@vger.kernel.org
  S:	Supported
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
  F:	drivers/android/
@@@ -1530,6 -1530,7 +1530,7 @@@ F:	Documentation/devicetree/bindings/dm
  F:	Documentation/devicetree/bindings/i2c/i2c-owl.yaml
  F:	Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml
  F:	Documentation/devicetree/bindings/mmc/owl-mmc.yaml
+ F:	Documentation/devicetree/bindings/net/actions,owl-emac.yaml
  F:	Documentation/devicetree/bindings/pinctrl/actions,*
  F:	Documentation/devicetree/bindings/power/actions,owl-sps.txt
  F:	Documentation/devicetree/bindings/timer/actions,owl-timer.txt
@@@ -1542,6 -1543,7 +1543,7 @@@ F:	drivers/dma/owl-dma.
  F:	drivers/i2c/busses/i2c-owl.c
  F:	drivers/irqchip/irq-owl-sirq.c
  F:	drivers/mmc/host/owl-mmc.c
+ F:	drivers/net/ethernet/actions/
  F:	drivers/pinctrl/actions/*
  F:	drivers/soc/actions/
  F:	include/dt-bindings/power/owl-*
@@@ -2375,7 -2377,7 +2377,7 @@@ F:	sound/soc/rockchip
  N:	rockchip
ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:	linux-samsung-soc@vger.kernel.org
  S:	Maintained
@@@ -2489,7 -2491,7 +2491,7 @@@ N:	sc27x
  N:	sc2731
ARM/STI ARCHITECTURE
 -M:	Patrice Chotard patrice.chotard@st.com
 +M:	Patrice Chotard patrice.chotard@foss.st.com
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  W:	http://www.stlinux.com
@@@ -2522,7 -2524,7 +2524,7 @@@ F:	include/linux/remoteproc/st_slim_rpr
ARM/STM32 ARCHITECTURE
  M:	Maxime Coquelin mcoquelin.stm32@gmail.com
 -M:	Alexandre Torgue alexandre.torgue@st.com
 +M:	Alexandre Torgue alexandre.torgue@foss.st.com
  L:	linux-stm32@st-md-mailman.stormreply.com (moderated for non-subscribers)
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
@@@ -3115,7 -3117,7 +3117,7 @@@ C:	irc://irc.oftc.net/bcach
  F:	drivers/md/bcache/
BDISP ST MEDIA DRIVER
 -M:	Fabien Dessenne fabien.dessenne@st.com
 +M:	Fabien Dessenne fabien.dessenne@foss.st.com
  L:	linux-media@vger.kernel.org
  S:	Supported
  W:	https://linuxtv.org
@@@ -3233,6 -3235,7 +3235,7 @@@ T:	git git://git.kernel.org/pub/scm/lin
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
  F:	Documentation/bpf/
  F:	Documentation/networking/filter.rst
+ F:	Documentation/userspace-api/ebpf/
  F:	arch/*/net/*
  F:	include/linux/bpf*
  F:	include/linux/filter.h
@@@ -3247,6 -3250,7 +3250,7 @@@ F:	net/core/filter.
  F:	net/sched/act_bpf.c
  F:	net/sched/cls_bpf.c
  F:	samples/bpf/
+ F:	scripts/bpf_doc.py
  F:	tools/bpf/
  F:	tools/lib/bpf/
  F:	tools/testing/selftests/bpf/
@@@ -3675,7 -3679,7 +3679,7 @@@ M:	bcm-kernel-feedback-list@broadcom.co
  L:	linux-pm@vger.kernel.org
  S:	Maintained
  T:	git git://github.com/broadcom/stblinux.git
 -F:	drivers/soc/bcm/bcm-pmb.c
 +F:	drivers/soc/bcm/bcm63xx/bcm-pmb.c
  F:	include/dt-bindings/soc/bcm-pmb.h
BROADCOM SPECIFIC AMBA DRIVER (BCMA)
@@@ -4181,18 -4185,9 +4185,18 @@@ X:	drivers/char/tpm
  CHECKPATCH
  M:	Andy Whitcroft apw@canonical.com
  M:	Joe Perches joe@perches.com
 +R:	Dwaipayan Ray dwaipayanray1@gmail.com
 +R:	Lukas Bulwahn lukas.bulwahn@gmail.com
  S:	Maintained
  F:	scripts/checkpatch.pl
+CHECKPATCH DOCUMENTATION
 +M:	Dwaipayan Ray dwaipayanray1@gmail.com
 +M:	Lukas Bulwahn lukas.bulwahn@gmail.com
 +R:	Joe Perches joe@perches.com
 +S:	Maintained
 +F:	Documentation/dev-tools/checkpatch.rst
 +
  CHINESE DOCUMENTATION
  M:	Harry Wei harryxiyou@gmail.com
  M:	Alex Shi alex.shi@linux.alibaba.com
@@@ -4443,7 -4438,7 +4447,7 @@@ F:	include/linux/clk
  F:	include/linux/of_clk.h
  X:	drivers/clk/clkdev.c
-COMMON INTERNET FILE SYSTEM (CIFS)
 +COMMON INTERNET FILE SYSTEM CLIENT (CIFS)
  M:	Steve French sfrench@samba.org
  L:	linux-cifs@vger.kernel.org
  L:	samba-technical@lists.samba.org (moderated for non-subscribers)
@@@ -4453,16 -4448,6 +4457,16 @@@ T:	git git://git.samba.org/sfrench/cifs
  F:	Documentation/admin-guide/cifs/
  F:	fs/cifs/
+COMMON INTERNET FILE SYSTEM SERVER (CIFSD)
 +M:	Namjae Jeon namjae.jeon@samsung.com
 +M:	Sergey Senozhatsky sergey.senozhatsky@gmail.com
 +M:	Steve French sfrench@samba.org
 +M:	Hyunchul Lee hyc.lee@gmail.com
 +L:	linux-cifs@vger.kernel.org
 +L:	linux-cifsd-devel@lists.sourceforge.net
 +S:	Maintained
 +F:	fs/cifsd/
 +
  COMPACTPCI HOTPLUG CORE
  M:	Scott Murray scott@spiteful.org
  L:	linux-pci@vger.kernel.org
@@@ -5099,7 -5084,7 +5103,7 @@@ S:	Maintaine
  F:	drivers/platform/x86/dell/dell-wmi.c
DELTA ST MEDIA DRIVER
 -M:	Hugues Fruchet hugues.fruchet@st.com
 +M:	Hugues Fruchet hugues.fruchet@foss.st.com
  L:	linux-media@vger.kernel.org
  S:	Supported
  W:	https://linuxtv.org
@@@ -5397,7 -5382,7 +5401,7 @@@ F:	drivers/hwmon/dme1737.
  DMI/SMBIOS SUPPORT
  M:	Jean Delvare jdelvare@suse.com
  S:	Maintained
 -T:	quilt http://jdelvare.nerim.net/devel/linux/jdelvare-dmi/
 +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging.git dmi-for-next
  F:	Documentation/ABI/testing/sysfs-firmware-dmi-tables
  F:	drivers/firmware/dmi-id.c
  F:	drivers/firmware/dmi_scan.c
@@@ -5490,11 -5475,11 +5494,11 @@@ F:	drivers/net/ethernet/freescale/dpaa2
  F:	drivers/net/ethernet/freescale/dpaa2/dpni*
DPAA2 ETHERNET SWITCH DRIVER
- M:	Ioana Radulescu ruxandra.radulescu@nxp.com
  M:	Ioana Ciornei ioana.ciornei@nxp.com
- L:	linux-kernel@vger.kernel.org
+ L:	netdev@vger.kernel.org
  S:	Maintained
- F:	drivers/staging/fsl-dpaa2/ethsw
+ F:	drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
+ F:	drivers/net/ethernet/freescale/dpaa2/dpsw*
DPT_I2O SCSI RAID DRIVER
  M:	Adaptec OEM Raid Solutions aacraid@microsemi.com
@@@ -5854,7 -5839,7 +5858,7 @@@ M:	David Airlie <airlied@linux.ie
  M:	Daniel Vetter daniel@ffwll.ch
  L:	dri-devel@lists.freedesktop.org
  S:	Maintained
 -B:	https://bugs.freedesktop.org/
 +B:	https://gitlab.freedesktop.org/drm
  C:	irc://chat.freenode.net/dri-devel
  T:	git git://anongit.freedesktop.org/drm/drm
  F:	Documentation/devicetree/bindings/display/
@@@ -6025,6 -6010,7 +6029,6 @@@ F:	drivers/gpu/drm/rockchip
DRM DRIVERS FOR STI
  M:	Benjamin Gaignard benjamin.gaignard@linaro.org
 -M:	Vincent Abriou vincent.abriou@st.com
  L:	dri-devel@lists.freedesktop.org
  S:	Maintained
  T:	git git://anongit.freedesktop.org/drm/drm-misc
@@@ -6032,9 -6018,10 +6036,9 @@@ F:	Documentation/devicetree/bindings/di
  F:	drivers/gpu/drm/sti
DRM DRIVERS FOR STM
 -M:	Yannick Fertre yannick.fertre@st.com
 -M:	Philippe Cornu philippe.cornu@st.com
 +M:	Yannick Fertre yannick.fertre@foss.st.com
 +M:	Philippe Cornu philippe.cornu@foss.st.com
  M:	Benjamin Gaignard benjamin.gaignard@linaro.org
 -M:	Vincent Abriou vincent.abriou@st.com
  L:	dri-devel@lists.freedesktop.org
  S:	Maintained
  T:	git git://anongit.freedesktop.org/drm/drm-misc
@@@ -7345,13 -7332,6 +7349,13 @@@ F:	fs/verity
  F:	include/linux/fsverity.h
  F:	include/uapi/linux/fsverity.h
+FT260 FTDI USB-HID TO I2C BRIDGE DRIVER
 +M:	Michael Zaidman michael.zaidman@gmail.com
 +L:	linux-i2c@vger.kernel.org
 +L:	linux-input@vger.kernel.org
 +S:	Maintained
 +F:	drivers/hid/hid-ft260.c
 +
  FUJITSU LAPTOP EXTRAS
  M:	Jonathan Woithe jwoithe@just42.net
  L:	platform-driver-x86@vger.kernel.org
@@@ -7500,9 -7480,8 +7504,9 @@@ F:	include/uapi/asm-generic
  GENERIC PHY FRAMEWORK
  M:	Kishon Vijay Abraham I kishon@ti.com
  M:	Vinod Koul vkoul@kernel.org
 -L:	linux-kernel@vger.kernel.org
 +L:	linux-phy@lists.infradead.org
  S:	Supported
 +Q:	https://patchwork.kernel.org/project/linux-phy/list/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git
  F:	Documentation/devicetree/bindings/phy/
  F:	drivers/phy/
@@@ -8141,6 -8120,7 +8145,6 @@@ F:	drivers/crypto/hisilicon/sec2/sec_ma
HISILICON STAGING DRIVERS FOR HIKEY 960/970
  M:	Mauro Carvalho Chehab mchehab+huawei@kernel.org
 -L:	devel@driverdev.osuosl.org
  S:	Maintained
  F:	drivers/staging/hikey9xx/
@@@ -8255,7 -8235,7 +8259,7 @@@ F:	include/linux/hugetlb.
  F:	mm/hugetlb.c
HVA ST MEDIA DRIVER
 -M:	Jean-Christophe Trotin jean-christophe.trotin@st.com
 +M:	Jean-Christophe Trotin jean-christophe.trotin@foss.st.com
  L:	linux-media@vger.kernel.org
  S:	Supported
  W:	https://linuxtv.org
@@@ -8545,7 -8525,6 +8549,7 @@@ IBM Power SRIOV Virtual NIC Device Driv
  M:	Dany Madden drt@linux.ibm.com
  M:	Lijun Pan ljp@linux.ibm.com
  M:	Sukadev Bhattiprolu sukadev@linux.ibm.com
 +R:	Thomas Falcon tlfalcon@linux.ibm.com
  L:	netdev@vger.kernel.org
  S:	Supported
  F:	drivers/net/ethernet/ibm/ibmvnic.*
@@@ -8717,12 -8696,6 +8721,12 @@@ S:	Maintaine
  F:	Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt
  F:	drivers/iio/multiplexer/iio-mux.c
+IIO SCMI BASED DRIVER
 +M:	Jyoti Bhayana jbhayana@google.com
 +L:	linux-iio@vger.kernel.org
 +S:	Maintained
 +F:	drivers/iio/common/scmi_sensors/scmi_iio.c
 +
  IIO SUBSYSTEM AND DRIVERS
  M:	Jonathan Cameron jic23@kernel.org
  R:	Lars-Peter Clausen lars@metafoo.de
@@@ -10900,7 -10873,7 +10904,7 @@@ F:	drivers/regulator/max77802-regulator
  F:	include/dt-bindings/*/*max77802.h
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  M:	Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com
  L:	linux-pm@vger.kernel.org
  S:	Supported
@@@ -10909,7 -10882,7 +10913,7 @@@ F:	drivers/power/supply/max77693_charge
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS
  M:	Chanwoo Choi cw00.choi@samsung.com
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  M:	Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com
  L:	linux-kernel@vger.kernel.org
  S:	Supported
@@@ -11198,7 -11171,7 +11202,7 @@@ T:	git git://linuxtv.org/media_tree.gi
  F:	drivers/media/dvb-frontends/stv6111*
MEDIA DRIVERS FOR STM32 - DCMI
 -M:	Hugues Fruchet hugues.fruchet@st.com
 +M:	Hugues Fruchet hugues.fruchet@foss.st.com
  L:	linux-media@vger.kernel.org
  S:	Supported
  T:	git git://linuxtv.org/media_tree.git
@@@ -11561,7 -11534,7 +11565,7 @@@ F:	include/linux/memblock.
  F:	mm/memblock.c
MEMORY CONTROLLER DRIVERS
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  L:	linux-kernel@vger.kernel.org
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git
@@@ -12569,7 -12542,7 +12573,7 @@@ NETWORKING [MPTCP
  M:	Mat Martineau mathew.j.martineau@linux.intel.com
  M:	Matthieu Baerts matthieu.baerts@tessares.net
  L:	netdev@vger.kernel.org
 -L:	mptcp@lists.01.org
 +L:	mptcp@lists.linux.dev
  S:	Maintained
  W:	https://github.com/multipath-tcp/mptcp_net-next/wiki
  B:	https://github.com/multipath-tcp/mptcp_net-next/issues
@@@ -12899,7 -12872,7 +12903,7 @@@ F:	Documentation/devicetree/bindings/re
  F:	drivers/regulator/pf8x00-regulator.c
NXP PTN5150A CC LOGIC AND EXTCON DRIVER
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  L:	linux-kernel@vger.kernel.org
  S:	Maintained
  F:	Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml
@@@ -12942,21 -12915,6 +12946,21 @@@ L:	linux-nfc@lists.01.org (moderated fo
  S:	Supported
  F:	drivers/nfc/nxp-nci
+NXP i.MX 8QXP/8QM JPEG V4L2 DRIVER
 +M:	Mirela Rabulea mirela.rabulea@nxp.com
 +R:	NXP Linux Team linux-imx@nxp.com
 +L:	linux-media@vger.kernel.org
 +S:	Maintained
 +F:	Documentation/devicetree/bindings/media/imx8-jpeg.yaml
 +F:	drivers/media/platform/imx-jpeg
 +
 +NZXT-KRAKEN2 HARDWARE MONITORING DRIVER
 +M:	Jonas Malaco jonas@protocubo.io
 +L:	linux-hwmon@vger.kernel.org
 +S:	Maintained
 +F:	Documentation/hwmon/nzxt-kraken2.rst
 +F:	drivers/hwmon/nzxt-kraken2.c
 +
  OBJAGG
  M:	Jiri Pirko jiri@nvidia.com
  L:	netdev@vger.kernel.org
@@@ -13890,7 -13848,7 +13894,7 @@@ M:	Lorenzo Pieralisi <lorenzo.pieralisi
  R:	Rob Herring robh@kernel.org
  L:	linux-pci@vger.kernel.org
  S:	Supported
 -Q:	http://patchwork.ozlabs.org/project/linux-pci/list/
 +Q:	http://patchwork.kernel.org/project/linux-pci/list/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git/
  F:	drivers/pci/controller/
@@@ -13898,7 -13856,7 +13902,7 @@@ PCI SUBSYSTE
  M:	Bjorn Helgaas bhelgaas@google.com
  L:	linux-pci@vger.kernel.org
  S:	Supported
 -Q:	http://patchwork.ozlabs.org/project/linux-pci/list/
 +Q:	http://patchwork.kernel.org/project/linux-pci/list/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git
  F:	Documentation/PCI/
  F:	Documentation/devicetree/bindings/pci/
@@@ -14205,7 -14163,7 +14209,7 @@@ F:	drivers/pinctrl/renesas
PIN CONTROLLER - SAMSUNG
  M:	Tomasz Figa tomasz.figa@gmail.com
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  M:	Sylwester Nawrocki s.nawrocki@samsung.com
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  L:	linux-samsung-soc@vger.kernel.org
@@@ -14361,15 -14319,6 +14365,15 @@@ F:	include/linux/pm_
  F:	include/linux/powercap.h
  F:	kernel/configs/nopm.config
+DYNAMIC THERMAL POWER MANAGEMENT (DTPM)
 +M:	Daniel Lezcano daniel.lezcano@kernel.org
 +L:	linux-pm@vger.kernel.org
 +S:	Supported
 +B:	https://bugzilla.kernel.org
 +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
 +F:	drivers/powercap/dtpm*
 +F:	include/linux/dtpm.h
 +
  POWER STATE COORDINATION INTERFACE (PSCI)
  M:	Mark Rutland mark.rutland@arm.com
  M:	Lorenzo Pieralisi lorenzo.pieralisi@arm.com
@@@ -14447,7 -14396,7 +14451,7 @@@ F:	kernel/sched/psi.
PRINTK
  M:	Petr Mladek pmladek@suse.com
 -M:	Sergey Senozhatsky sergey.senozhatsky@gmail.com
 +M:	Sergey Senozhatsky senozhatsky@chromium.org
  R:	Steven Rostedt rostedt@goodmis.org
  R:	John Ogness john.ogness@linutronix.de
  S:	Maintained
@@@ -14764,11 -14713,15 +14768,11 @@@ F:	drivers/net/ethernet/qlogic/qlcnic
  QLOGIC QLGE 10Gb ETHERNET DRIVER
  M:	Manish Chopra manishc@marvell.com
  M:	GR-Linux-NIC-Dev@marvell.com
 -L:	netdev@vger.kernel.org
 -S:	Supported
 -F:	drivers/staging/qlge/
 -
 -QLOGIC QLGE 10Gb ETHERNET DRIVER
  M:	Coiby Xu coiby.xu@gmail.com
  L:	netdev@vger.kernel.org
 -S:	Maintained
 +S:	Supported
  F:	Documentation/networking/device_drivers/qlogic/qlge.rst
 +F:	drivers/staging/qlge/
QM1D1B0004 MEDIA DRIVER
  M:	Akihiro Tsukada tskd08@gmail.com
@@@ -14839,7 -14792,7 +14843,7 @@@ M:	Todor Tomov <todor.too@gmail.com
  L:	linux-media@vger.kernel.org
  S:	Maintained
  F:	Documentation/admin-guide/media/qcom_camss.rst
 -F:	Documentation/devicetree/bindings/media/qcom,camss.txt
 +F:	Documentation/devicetree/bindings/media/*camss*
  F:	drivers/media/platform/qcom/camss/
QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER
@@@ -15769,7 -15722,7 +15773,7 @@@ F:	Documentation/admin-guide/LSM/SafeSe
  F:	security/safesetid/
SAMSUNG AUDIO (ASoC) DRIVERS
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  M:	Sylwester Nawrocki s.nawrocki@samsung.com
  L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:	Supported
@@@ -15777,7 -15730,7 +15781,7 @@@ F:	Documentation/devicetree/bindings/so
  F:	sound/soc/samsung/
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  L:	linux-crypto@vger.kernel.org
  L:	linux-samsung-soc@vger.kernel.org
  S:	Maintained
@@@ -15812,7 -15765,7 +15816,7 @@@ S:	Maintaine
  F:	drivers/platform/x86/samsung-laptop.c
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  M:	Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com
  L:	linux-kernel@vger.kernel.org
  L:	linux-samsung-soc@vger.kernel.org
@@@ -15837,7 -15790,7 +15841,7 @@@ F:	drivers/media/platform/s3c-camif
  F:	include/media/drv-intf/s3c_camif.h
SAMSUNG S3FWRN5 NFC DRIVER
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  M:	Krzysztof Opasiak k.opasiak@samsung.com
  L:	linux-nfc@lists.01.org (moderated for non-subscribers)
  S:	Maintained
@@@ -15857,7 -15810,7 +15861,7 @@@ S:	Supporte
  F:	drivers/media/i2c/s5k5baf.c
SAMSUNG S5P Security SubSystem (SSS) DRIVER
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  M:	Vladimir Zapolskiy vz@mleia.com
  L:	linux-crypto@vger.kernel.org
  L:	linux-samsung-soc@vger.kernel.org
@@@ -15889,7 -15842,7 +15893,7 @@@ F:	include/linux/clk/samsung.
  F:	include/linux/platform_data/clk-s3c2410.h
SAMSUNG SPI DRIVERS
 -M:	Krzysztof Kozlowski krzk@kernel.org
 +M:	Krzysztof Kozlowski krzysztof.kozlowski@canonical.com
  M:	Andi Shyti andi@etezian.org
  L:	linux-spi@vger.kernel.org
  L:	linux-samsung-soc@vger.kernel.org
@@@ -16652,13 -16605,6 +16656,13 @@@ F:	drivers/firmware/arm_sdei.
  F:	include/linux/arm_sdei.h
  F:	include/uapi/linux/arm_sdei.h
+SOFTWARE NODES
 +R:	Andy Shevchenko andriy.shevchenko@linux.intel.com
 +R:	Heikki Krogerus heikki.krogerus@linux.intel.com
 +L:	linux-acpi@vger.kernel.org
 +S:	Maintained
 +F:	drivers/base/swnode.c
 +
  SOFTWARE RAID (Multiple Disks) SUPPORT
  M:	Song Liu song@kernel.org
  L:	linux-raid@vger.kernel.org
@@@ -16945,10 -16891,8 +16949,10 @@@ F:	tools/spi
SPIDERNET NETWORK DRIVER for CELL
  M:	Ishizaki Kou kou.ishizaki@toshiba.co.jp
 +M:	Geoff Levand geoff@infradead.org
  L:	netdev@vger.kernel.org
 -S:	Supported
 +L:	linuxppc-dev@lists.ozlabs.org
 +S:	Maintained
  F:	Documentation/networking/device_drivers/ethernet/toshiba/spider_net.rst
  F:	drivers/net/ethernet/toshiba/spider_net*
@@@ -17002,19 -16946,11 +17006,19 @@@ F:	Documentation/devicetree/bindings/me
  F:	drivers/media/i2c/st-mipid02.c
ST STM32 I2C/SMBUS DRIVER
 -M:	Pierre-Yves MORDRET pierre-yves.mordret@st.com
 +M:	Pierre-Yves MORDRET pierre-yves.mordret@foss.st.com
 +M:	Alain Volmat alain.volmat@foss.st.com
  L:	linux-i2c@vger.kernel.org
  S:	Maintained
  F:	drivers/i2c/busses/i2c-stm32*
+ST STPDDC60 DRIVER
 +M:	Daniel Nilsson daniel.nilsson@flex.com
 +L:	linux-hwmon@vger.kernel.org
 +S:	Maintained
 +F:	Documentation/hwmon/stpddc60.rst
 +F:	drivers/hwmon/pmbus/stpddc60.c
 +
  ST VL53L0X ToF RANGER(I2C) IIO DRIVER
  M:	Song Qiang songqiang1304521@gmail.com
  L:	linux-iio@vger.kernel.org
@@@ -17108,7 -17044,7 +17112,7 @@@ F:	drivers/staging/vt665?
STAGING SUBSYSTEM
  M:	Greg Kroah-Hartman gregkh@linuxfoundation.org
 -L:	devel@driverdev.osuosl.org
 +L:	linux-staging@lists.linux.dev
  S:	Supported
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
  F:	drivers/staging/
@@@ -17135,7 -17071,7 +17139,7 @@@ F:	kernel/jump_label.
  F:	kernel/static_call.c
STI AUDIO (ASoC) DRIVERS
 -M:	Arnaud Pouliquen arnaud.pouliquen@st.com
 +M:	Arnaud Pouliquen arnaud.pouliquen@foss.st.com
  L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:	Maintained
  F:	Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
@@@ -17155,15 -17091,15 +17159,15 @@@ T:	git git://linuxtv.org/media_tree.gi
  F:	drivers/media/usb/stk1160/
STM32 AUDIO (ASoC) DRIVERS
 -M:	Olivier Moysan olivier.moysan@st.com
 -M:	Arnaud Pouliquen arnaud.pouliquen@st.com
 +M:	Olivier Moysan olivier.moysan@foss.st.com
 +M:	Arnaud Pouliquen arnaud.pouliquen@foss.st.com
  L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
  S:	Maintained
  F:	Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml
  F:	sound/soc/stm/
STM32 TIMER/LPTIMER DRIVERS
 -M:	Fabrice Gasnier fabrice.gasnier@st.com
 +M:	Fabrice Gasnier fabrice.gasnier@foss.st.com
  S:	Maintained
  F:	Documentation/ABI/testing/*timer-stm32
  F:	Documentation/devicetree/bindings/*/*stm32-*timer*
@@@ -17173,7 -17109,7 +17177,7 @@@ F:	include/linux/*/stm32-*tim
STMMAC ETHERNET DRIVER
  M:	Giuseppe Cavallaro peppe.cavallaro@st.com
 -M:	Alexandre Torgue alexandre.torgue@st.com
 +M:	Alexandre Torgue alexandre.torgue@foss.st.com
  M:	Jose Abreu joabreu@synopsys.com
  L:	netdev@vger.kernel.org
  S:	Supported
@@@ -19203,7 -19139,7 +19207,7 @@@ VME SUBSYSTE
  M:	Martyn Welch martyn@welchs.me.uk
  M:	Manohar Vanga manohar.vanga@gmail.com
  M:	Greg Kroah-Hartman gregkh@linuxfoundation.org
 -L:	devel@driverdev.osuosl.org
 +L:	linux-kernel@vger.kernel.org
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
  F:	Documentation/driver-api/vme.rst
@@@ -19234,7 -19170,7 +19238,7 @@@ S:	Maintaine
  F:	drivers/infiniband/hw/vmw_pvrdma/
VMware PVSCSI driver
 -M:	Jim Gill jgill@vmware.com
 +M:	Vishal Bhakta vbhakta@vmware.com
  M:	VMware PV-Drivers pv-drivers@vmware.com
  L:	linux-scsi@vger.kernel.org
  S:	Maintained
@@@ -19293,7 -19229,7 +19297,7 @@@ F:	drivers/net/vrf.
  VSPRINTF
  M:	Petr Mladek pmladek@suse.com
  M:	Steven Rostedt rostedt@goodmis.org
 -M:	Sergey Senozhatsky sergey.senozhatsky@gmail.com
 +M:	Sergey Senozhatsky senozhatsky@chromium.org
  R:	Andy Shevchenko andriy.shevchenko@linux.intel.com
  R:	Rasmus Villemoes linux@rasmusvillemoes.dk
  S:	Maintained
@@@ -19944,7 -19880,7 +19948,7 @@@ F:	drivers/staging/media/zoran
  ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER
  M:	Minchan Kim minchan@kernel.org
  M:	Nitin Gupta ngupta@vflare.org
 -R:	Sergey Senozhatsky sergey.senozhatsky.work@gmail.com
 +R:	Sergey Senozhatsky senozhatsky@chromium.org
  L:	linux-kernel@vger.kernel.org
  S:	Maintained
  F:	Documentation/admin-guide/blockdev/zram.rst
@@@ -19958,7 -19894,7 +19962,7 @@@ F:	drivers/tty/serial/zs.
  ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR
  M:	Minchan Kim minchan@kernel.org
  M:	Nitin Gupta ngupta@vflare.org
 -R:	Sergey Senozhatsky sergey.senozhatsky.work@gmail.com
 +R:	Sergey Senozhatsky senozhatsky@chromium.org
  L:	linux-mm@kvack.org
  S:	Maintained
  F:	Documentation/vm/zsmalloc.rst
diff --combined drivers/atm/fore200e.c
index 495fd0a1f040,0b9c99c3d218..b508df2ecada
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@@ -21,7 -21,6 +21,6 @@@
  #include <linux/module.h>
  #include <linux/atmdev.h>
  #include <linux/sonet.h>
- #include <linux/atm_suni.h>
  #include <linux/dma-mapping.h>
  #include <linux/delay.h>
  #include <linux/firmware.h>
@@@ -100,6 -99,8 +99,6 @@@ static LIST_HEAD(fore200e_boards)
MODULE_AUTHOR("Christophe Lizzi - credits to Uwe Dannowski and Heikki Vatiainen");
  MODULE_DESCRIPTION("FORE Systems 200E-series ATM driver - version " FORE200E_VERSION);
 -MODULE_SUPPORTED_DEVICE("PCA-200E, SBA-200E");
 -
static const int fore200e_rx_buf_nbr[ BUFFER_SCHEME_NBR ][ BUFFER_MAGN_NBR ] = {
      { BUFFER_S1_NBR, BUFFER_L1_NBR },
diff --combined drivers/net/dsa/b53/b53_common.c
index eb443721c58e,8d5a82dedce8..3ca6b394dd5f
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@@ -349,7 -349,7 +349,7 @@@ static void b53_set_forwarding(struct b
    b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt);
  }
- static void b53_enable_vlan(struct b53_device *dev, bool enable,
+ static void b53_enable_vlan(struct b53_device *dev, int port, bool enable,
    		    bool enable_filtering)
  {
    u8 mgmt, vc0, vc1, vc4 = 0, vc5;
@@@ -431,6 -431,9 +431,9 @@@
    b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
dev->vlan_enabled = enable;
+ 
+ 	dev_dbg(dev->dev, "Port %d VLAN enabled: %d, filtering: %d\n",
+ 		port, enable, enable_filtering);
  }
static int b53_set_jumbo(struct b53_device *dev, bool enable, bool allow_10_100)
@@@ -743,7 -746,7 +746,7 @@@ int b53_configure_vlan(struct dsa_switc
    	b53_do_vlan_op(dev, VTA_CMD_CLEAR);
    }
- 	b53_enable_vlan(dev, dev->vlan_enabled, ds->vlan_filtering);
+ 	b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering);
b53_for_each_port(dev, i)
    	b53_write16(dev, B53_VLAN_PAGE,
@@@ -1105,6 -1108,13 +1108,6 @@@ static int b53_setup(struct dsa_switch 
    		b53_disable_port(ds, port);
    }
-	/* Let DSA handle the case were multiple bridges span the same switch
 -	 * device and different VLAN awareness settings are requested, which
 -	 * would be breaking filtering semantics for any of the other bridge
 -	 * devices. (not hardware supported)
 -	 */
 -	ds->vlan_filtering_is_global = true;
 -
    return b53_setup_devlink_resources(ds);
  }
@@@ -1422,7 -1432,7 +1425,7 @@@ int b53_vlan_filtering(struct dsa_switc
  {
    struct b53_device *dev = ds->priv;
- 	b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering);
+ 	b53_enable_vlan(dev, port, dev->vlan_enabled, vlan_filtering);
return 0;
  }
@@@ -1447,7 -1457,7 +1450,7 @@@ static int b53_vlan_prepare(struct dsa_
    if (vlan->vid >= dev->num_vlans)
    	return -ERANGE;
- 	b53_enable_vlan(dev, true, ds->vlan_filtering);
+ 	b53_enable_vlan(dev, port, true, ds->vlan_filtering);
return 0;
  }
@@@ -2045,15 -2055,17 +2048,17 @@@ enum dsa_tag_protocol b53_get_tag_proto
  {
    struct b53_device *dev = ds->priv;
- 	/* Older models (5325, 5365) support a different tag format that we do
- 	 * not support in net/dsa/tag_brcm.c yet.
- 	 */
- 	if (is5325(dev) || is5365(dev) ||
- 	    !b53_can_enable_brcm_tags(ds, port, mprot)) {
+ 	if (!b53_can_enable_brcm_tags(ds, port, mprot)) {
    	dev->tag_protocol = DSA_TAG_PROTO_NONE;
    	goto out;
    }
+ 	/* Older models require a different 6 byte tag */
+ 	if (is5325(dev) || is5365(dev) || is63xx(dev)) {
+ 		dev->tag_protocol = DSA_TAG_PROTO_BRCM_LEGACY;
+ 		goto out;
+ 	}
+ 
    /* Broadcom BCM58xx chips have a flow accelerator on Port 8
     * which requires us to use the prepended Broadcom tag type
     */
@@@ -2657,13 -2669,6 +2662,13 @@@ struct b53_device *b53_switch_alloc(str
    ds->ops = &b53_switch_ops;
    ds->untag_bridge_pvid = true;
    dev->vlan_enabled = true;
 +	/* Let DSA handle the case were multiple bridges span the same switch
 +	 * device and different VLAN awareness settings are requested, which
 +	 * would be breaking filtering semantics for any of the other bridge
 +	 * devices. (not hardware supported)
 +	 */
 +	ds->vlan_filtering_is_global = true;
 +
    mutex_init(&dev->reg_mutex);
    mutex_init(&dev->stats_mutex);
diff --combined drivers/net/dsa/bcm_sf2.c
index ba5d546d06aa,7e0ca8012983..9150038b60cb
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@@ -32,6 -32,36 +32,36 @@@
  #include "b53/b53_priv.h"
  #include "b53/b53_regs.h"
+ static u16 bcm_sf2_reg_rgmii_cntrl(struct bcm_sf2_priv *priv, int port)
+ {
+ 	switch (priv->type) {
+ 	case BCM4908_DEVICE_ID:
+ 		switch (port) {
+ 		case 7:
+ 			return REG_RGMII_11_CNTRL;
+ 		default:
+ 			break;
+ 		}
+ 		break;
+ 	default:
+ 		switch (port) {
+ 		case 0:
+ 			return REG_RGMII_0_CNTRL;
+ 		case 1:
+ 			return REG_RGMII_1_CNTRL;
+ 		case 2:
+ 			return REG_RGMII_2_CNTRL;
+ 		default:
+ 			break;
+ 		}
+ 	}
+ 
+ 	WARN_ONCE(1, "Unsupported port %d\n", port);
+ 
+ 	/* RO fallback reg */
+ 	return REG_SWITCH_STATUS;
+ }
+ 
  /* Return the number of active ports, not counting the IMP (CPU) port */
  static unsigned int bcm_sf2_num_active_ports(struct dsa_switch *ds)
  {
@@@ -114,10 -144,7 +144,10 @@@ static void bcm_sf2_imp_setup(struct ds
    	/* Force link status for IMP port */
    	reg = core_readl(priv, offset);
    	reg |= (MII_SW_OR | LINK_STS);
 -		reg &= ~GMII_SPEED_UP_2G;
 +		if (priv->type == BCM4908_DEVICE_ID)
 +			reg |= GMII_SPEED_UP_2G;
 +		else
 +			reg &= ~GMII_SPEED_UP_2G;
    	core_writel(priv, reg, offset);
/* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
@@@ -435,6 -462,44 +465,44 @@@ static int bcm_sf2_sw_rst(struct bcm_sf
    return 0;
  }
+ static void bcm_sf2_crossbar_setup(struct bcm_sf2_priv *priv)
+ {
+ 	struct device *dev = priv->dev->ds->dev;
+ 	int shift;
+ 	u32 mask;
+ 	u32 reg;
+ 	int i;
+ 
+ 	mask = BIT(priv->num_crossbar_int_ports) - 1;
+ 
+ 	reg = reg_readl(priv, REG_CROSSBAR);
+ 	switch (priv->type) {
+ 	case BCM4908_DEVICE_ID:
+ 		shift = CROSSBAR_BCM4908_INT_P7 * priv->num_crossbar_int_ports;
+ 		reg &= ~(mask << shift);
+ 		if (0) /* FIXME */
+ 			reg |= CROSSBAR_BCM4908_EXT_SERDES << shift;
+ 		else if (priv->int_phy_mask & BIT(7))
+ 			reg |= CROSSBAR_BCM4908_EXT_GPHY4 << shift;
+ 		else if (phy_interface_mode_is_rgmii(priv->port_sts[7].mode))
+ 			reg |= CROSSBAR_BCM4908_EXT_RGMII << shift;
+ 		else if (WARN(1, "Invalid port mode\n"))
+ 			return;
+ 		break;
+ 	default:
+ 		return;
+ 	}
+ 	reg_writel(priv, reg, REG_CROSSBAR);
+ 
+ 	reg = reg_readl(priv, REG_CROSSBAR);
+ 	for (i = 0; i < priv->num_crossbar_int_ports; i++) {
+ 		shift = i * priv->num_crossbar_int_ports;
+ 
+ 		dev_dbg(dev, "crossbar int port #%d - ext port #%d\n", i,
+ 			(reg >> shift) & mask);
+ 	}
+ }
+ 
  static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv)
  {
    intrl2_0_mask_set(priv, 0xffffffff);
@@@ -446,10 -511,11 +514,11 @@@
  static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv,
    			   struct device_node *dn)
  {
+ 	struct device *dev = priv->dev->ds->dev;
+ 	struct bcm_sf2_port_status *port_st;
    struct device_node *port;
    unsigned int port_num;
    struct property *prop;
- 	phy_interface_t mode;
    int err;
priv->moca_port = -1;
@@@ -458,19 -524,26 +527,26 @@@
    	if (of_property_read_u32(port, "reg", &port_num))
    		continue;
+ 		if (port_num >= DSA_MAX_PORTS) {
+ 			dev_err(dev, "Invalid port number %d\n", port_num);
+ 			continue;
+ 		}
+ 
+ 		port_st = &priv->port_sts[port_num];
+ 
    	/* Internal PHYs get assigned a specific 'phy-mode' property
    	 * value: "internal" to help flag them before MDIO probing
    	 * has completed, since they might be turned off at that
    	 * time
    	 */
- 		err = of_get_phy_mode(port, &mode);
+ 		err = of_get_phy_mode(port, &port_st->mode);
    	if (err)
    		continue;
- 		if (mode == PHY_INTERFACE_MODE_INTERNAL)
+ 		if (port_st->mode == PHY_INTERFACE_MODE_INTERNAL)
    		priv->int_phy_mask |= 1 << port_num;
- 		if (mode == PHY_INTERFACE_MODE_MOCA)
+ 		if (port_st->mode == PHY_INTERFACE_MODE_MOCA)
    		priv->moca_port = port_num;
if (of_property_read_bool(port, "brcm,use-bcm-hdr"))
@@@ -588,10 -661,8 +664,10 @@@ static u32 bcm_sf2_sw_get_phy_flags(str
     * in bits 15:8 and the patch level in bits 7:0 which is exactly what
     * the REG_PHY_REVISION register layout is.
     */
 -
 -	return priv->hw_params.gphy_rev;
 +	if (priv->int_phy_mask & BIT(port))
 +		return priv->hw_params.gphy_rev;
 +	else
 +		return 0;
  }
static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port,
@@@ -647,6 -718,7 +723,7 @@@ static void bcm_sf2_sw_mac_config(struc
  {
    struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
    u32 id_mode_dis = 0, port_mode;
+ 	u32 reg_rgmii_ctrl;
    u32 reg;
if (port == core_readl(priv, CORE_IMP0_PRT_ID))
@@@ -670,10 -742,12 +747,12 @@@
    	return;
    }
+ 	reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+ 
    /* Clear id_mode_dis bit, and the existing port mode, let
     * RGMII_MODE_EN bet set by mac_link_{up,down}
     */
- 	reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+ 	reg = reg_readl(priv, reg_rgmii_ctrl);
    reg &= ~ID_MODE_DIS;
    reg &= ~(PORT_MODE_MASK << PORT_MODE_SHIFT);
@@@ -681,13 -755,14 +760,14 @@@
    if (id_mode_dis)
    	reg |= ID_MODE_DIS;
- 	reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+ 	reg_writel(priv, reg, reg_rgmii_ctrl);
  }
static void bcm_sf2_sw_mac_link_set(struct dsa_switch *ds, int port,
    			    phy_interface_t interface, bool link)
  {
    struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
+ 	u32 reg_rgmii_ctrl;
    u32 reg;
if (!phy_interface_mode_is_rgmii(interface) &&
@@@ -695,13 -770,15 +775,15 @@@
        interface != PHY_INTERFACE_MODE_REVMII)
    	return;
+ 	reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+ 
    /* If the link is down, just disable the interface to conserve power */
- 	reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+ 	reg = reg_readl(priv, reg_rgmii_ctrl);
    if (link)
    	reg |= RGMII_MODE_EN;
    else
    	reg &= ~RGMII_MODE_EN;
- 	reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+ 	reg_writel(priv, reg, reg_rgmii_ctrl);
  }
static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port,
@@@ -735,11 -812,15 +817,15 @@@ static void bcm_sf2_sw_mac_link_up(stru
  {
    struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds);
    struct ethtool_eee *p = &priv->dev->ports[port].eee;
- 	u32 reg, offset;
bcm_sf2_sw_mac_link_set(ds, port, interface, true);
if (port != core_readl(priv, CORE_IMP0_PRT_ID)) {
+ 		u32 reg_rgmii_ctrl;
+ 		u32 reg, offset;
+ 
+ 		reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port);
+ 
    	if (priv->type == BCM4908_DEVICE_ID ||
    	    priv->type == BCM7445_DEVICE_ID)
    		offset = CORE_STS_OVERRIDE_GMIIP_PORT(port);
@@@ -750,7 -831,7 +836,7 @@@
    	    interface == PHY_INTERFACE_MODE_RGMII_TXID ||
    	    interface == PHY_INTERFACE_MODE_MII ||
    	    interface == PHY_INTERFACE_MODE_REVMII) {
- 			reg = reg_readl(priv, REG_RGMII_CNTRL_P(port));
+ 			reg = reg_readl(priv, reg_rgmii_ctrl);
    		reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
if (tx_pause)
@@@ -758,7 -839,7 +844,7 @@@
    		if (rx_pause)
    			reg |= RX_PAUSE_EN;
- 			reg_writel(priv, reg, REG_RGMII_CNTRL_P(port));
+ 			reg_writel(priv, reg, reg_rgmii_ctrl);
    	}
reg = SW_OVERRIDE | LINK_STS;
@@@ -861,6 -942,8 +947,8 @@@ static int bcm_sf2_sw_resume(struct dsa
    	return ret;
    }
+ 	bcm_sf2_crossbar_setup(priv);
+ 
    ret = bcm_sf2_cfp_resume(ds);
    if (ret)
    	return ret;
@@@ -1133,6 -1216,7 +1221,7 @@@ struct bcm_sf2_of_data 
    const u16 *reg_offsets;
    unsigned int core_reg_align;
    unsigned int num_cfp_rules;
+ 	unsigned int num_crossbar_int_ports;
  };
static const u16 bcm_sf2_4908_reg_offsets[] = {
@@@ -1144,9 -1228,7 +1233,7 @@@
    [REG_PHY_REVISION]	= 0x14,
    [REG_SPHY_CNTRL]	= 0x24,
    [REG_CROSSBAR]		= 0xc8,
- 	[REG_RGMII_0_CNTRL]	= 0xe0,
- 	[REG_RGMII_1_CNTRL]	= 0xec,
- 	[REG_RGMII_2_CNTRL]	= 0xf8,
+ 	[REG_RGMII_11_CNTRL]	= 0x014c,
    [REG_LED_0_CNTRL]	= 0x40,
    [REG_LED_1_CNTRL]	= 0x4c,
    [REG_LED_2_CNTRL]	= 0x58,
@@@ -1156,7 -1238,8 +1243,8 @@@ static const struct bcm_sf2_of_data bcm
    .type		= BCM4908_DEVICE_ID,
    .core_reg_align	= 0,
    .reg_offsets	= bcm_sf2_4908_reg_offsets,
- 	.num_cfp_rules	= 0, /* FIXME */
+ 	.num_cfp_rules	= 256,
+ 	.num_crossbar_int_ports = 2,
  };
/* Register offsets for the SWITCH_REG_* block */
@@@ -1267,6 -1350,7 +1355,7 @@@ static int bcm_sf2_sw_probe(struct plat
    priv->reg_offsets = data->reg_offsets;
    priv->core_reg_align = data->core_reg_align;
    priv->num_cfp_rules = data->num_cfp_rules;
+ 	priv->num_crossbar_int_ports = data->num_crossbar_int_ports;
priv->rcdev = devm_reset_control_get_optional_exclusive(&pdev->dev,
    							"switch");
@@@ -1340,6 -1424,8 +1429,8 @@@
    	goto out_clk_mdiv;
    }
+ 	bcm_sf2_crossbar_setup(priv);
+ 
    bcm_sf2_gphy_enable_set(priv->dev->ds, true);
ret = bcm_sf2_mdio_register(ds);
diff --combined drivers/net/dsa/mt7530.c
index 9871d7cff93a,127856823a3b..c442a5885fca
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@@ -436,32 -436,34 +436,32 @@@ mt7530_pad_clk_setup(struct dsa_switch 
    		     TD_DM_DRVP(8) | TD_DM_DRVN(8));
/* Setup core clock for MT7530 */
 -	if (!trgint) {
 -		/* Disable MT7530 core clock */
 -		core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
 -
 -		/* Disable PLL, since phy_device has not yet been created
 -		 * provided for phy_[read,write]_mmd_indirect is called, we
 -		 * provide our own core_write_mmd_indirect to complete this
 -		 * function.
 -		 */
 -		core_write_mmd_indirect(priv,
 -					CORE_GSWPLL_GRP1,
 -					MDIO_MMD_VEND2,
 -					0);
 -
 -		/* Set core clock into 500Mhz */
 -		core_write(priv, CORE_GSWPLL_GRP2,
 -			   RG_GSWPLL_POSDIV_500M(1) |
 -			   RG_GSWPLL_FBKDIV_500M(25));
 -
 -		/* Enable PLL */
 -		core_write(priv, CORE_GSWPLL_GRP1,
 -			   RG_GSWPLL_EN_PRE |
 -			   RG_GSWPLL_POSDIV_200M(2) |
 -			   RG_GSWPLL_FBKDIV_200M(32));
 -
 -		/* Enable MT7530 core clock */
 -		core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
 -	}
 +	/* Disable MT7530 core clock */
 +	core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
 +
 +	/* Disable PLL, since phy_device has not yet been created
 +	 * provided for phy_[read,write]_mmd_indirect is called, we
 +	 * provide our own core_write_mmd_indirect to complete this
 +	 * function.
 +	 */
 +	core_write_mmd_indirect(priv,
 +				CORE_GSWPLL_GRP1,
 +				MDIO_MMD_VEND2,
 +				0);
 +
 +	/* Set core clock into 500Mhz */
 +	core_write(priv, CORE_GSWPLL_GRP2,
 +		   RG_GSWPLL_POSDIV_500M(1) |
 +		   RG_GSWPLL_FBKDIV_500M(25));
 +
 +	/* Enable PLL */
 +	core_write(priv, CORE_GSWPLL_GRP1,
 +		   RG_GSWPLL_EN_PRE |
 +		   RG_GSWPLL_POSDIV_200M(2) |
 +		   RG_GSWPLL_FBKDIV_200M(32));
 +
 +	/* Enable MT7530 core clock */
 +	core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
/* Setup the MT7530 TRGMII Tx Clock */
    core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
@@@ -997,8 -999,9 +997,9 @@@ mt753x_cpu_port_enable(struct dsa_switc
    mt7530_write(priv, MT7530_PVC_P(port),
    	     PORT_SPEC_TAG);
- 	/* Unknown multicast frame forwarding to the cpu port */
- 	mt7530_rmw(priv, MT7530_MFC, UNM_FFP_MASK, UNM_FFP(BIT(port)));
+ 	/* Disable flooding by default */
+ 	mt7530_rmw(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | UNU_FFP_MASK,
+ 		   BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | UNU_FFP(BIT(port)));
/* Set CPU port number */
    if (priv->id == ID_MT7621)
@@@ -1135,6 -1138,56 +1136,56 @@@ mt7530_stp_state_set(struct dsa_switch 
    mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state);
  }
+ static int
+ mt7530_port_pre_bridge_flags(struct dsa_switch *ds, int port,
+ 			     struct switchdev_brport_flags flags,
+ 			     struct netlink_ext_ack *extack)
+ {
+ 	if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD |
+ 			   BR_BCAST_FLOOD))
+ 		return -EINVAL;
+ 
+ 	return 0;
+ }
+ 
+ static int
+ mt7530_port_bridge_flags(struct dsa_switch *ds, int port,
+ 			 struct switchdev_brport_flags flags,
+ 			 struct netlink_ext_ack *extack)
+ {
+ 	struct mt7530_priv *priv = ds->priv;
+ 
+ 	if (flags.mask & BR_LEARNING)
+ 		mt7530_rmw(priv, MT7530_PSC_P(port), SA_DIS,
+ 			   flags.val & BR_LEARNING ? 0 : SA_DIS);
+ 
+ 	if (flags.mask & BR_FLOOD)
+ 		mt7530_rmw(priv, MT7530_MFC, UNU_FFP(BIT(port)),
+ 			   flags.val & BR_FLOOD ? UNU_FFP(BIT(port)) : 0);
+ 
+ 	if (flags.mask & BR_MCAST_FLOOD)
+ 		mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
+ 			   flags.val & BR_MCAST_FLOOD ? UNM_FFP(BIT(port)) : 0);
+ 
+ 	if (flags.mask & BR_BCAST_FLOOD)
+ 		mt7530_rmw(priv, MT7530_MFC, BC_FFP(BIT(port)),
+ 			   flags.val & BR_BCAST_FLOOD ? BC_FFP(BIT(port)) : 0);
+ 
+ 	return 0;
+ }
+ 
+ static int
+ mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
+ 			struct netlink_ext_ack *extack)
+ {
+ 	struct mt7530_priv *priv = ds->priv;
+ 
+ 	mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)),
+ 		   mrouter ? UNM_FFP(BIT(port)) : 0);
+ 
+ 	return 0;
+ }
+ 
  static int
  mt7530_port_bridge_join(struct dsa_switch *ds, int port,
    		struct net_device *bridge)
@@@ -1346,6 -1399,59 +1397,59 @@@ err
    return 0;
  }
+ static int
+ mt7530_port_mdb_add(struct dsa_switch *ds, int port,
+ 		    const struct switchdev_obj_port_mdb *mdb)
+ {
+ 	struct mt7530_priv *priv = ds->priv;
+ 	const u8 *addr = mdb->addr;
+ 	u16 vid = mdb->vid;
+ 	u8 port_mask = 0;
+ 	int ret;
+ 
+ 	mutex_lock(&priv->reg_mutex);
+ 
+ 	mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP);
+ 	if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL))
+ 		port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP)
+ 			    & PORT_MAP_MASK;
+ 
+ 	port_mask |= BIT(port);
+ 	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
+ 	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
+ 
+ 	mutex_unlock(&priv->reg_mutex);
+ 
+ 	return ret;
+ }
+ 
+ static int
+ mt7530_port_mdb_del(struct dsa_switch *ds, int port,
+ 		    const struct switchdev_obj_port_mdb *mdb)
+ {
+ 	struct mt7530_priv *priv = ds->priv;
+ 	const u8 *addr = mdb->addr;
+ 	u16 vid = mdb->vid;
+ 	u8 port_mask = 0;
+ 	int ret;
+ 
+ 	mutex_lock(&priv->reg_mutex);
+ 
+ 	mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP);
+ 	if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL))
+ 		port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP)
+ 			    & PORT_MAP_MASK;
+ 
+ 	port_mask &= ~BIT(port);
+ 	mt7530_fdb_write(priv, vid, port_mask, addr, -1,
+ 			 port_mask ? STATIC_ENT : STATIC_EMP);
+ 	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL);
+ 
+ 	mutex_unlock(&priv->reg_mutex);
+ 
+ 	return ret;
+ }
+ 
  static int
  mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid)
  {
@@@ -1818,9 -1924,12 +1922,12 @@@ mt7530_setup(struct dsa_switch *ds
    		ret = mt753x_cpu_port_enable(ds, i);
    		if (ret)
    			return ret;
- 		} else
+ 		} else {
    		mt7530_port_disable(ds, i);
+ 			/* Disable learning by default on all user ports */
+ 			mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+ 		}
    	/* Enable consistent egress tag */
    	mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
    		   PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
@@@ -1982,9 -2091,13 +2089,13 @@@ mt7531_setup(struct dsa_switch *ds
    		ret = mt753x_cpu_port_enable(ds, i);
    		if (ret)
    			return ret;
- 		} else
+ 		} else {
    		mt7530_port_disable(ds, i);
+ 			/* Disable learning by default on all user ports */
+ 			mt7530_set(priv, MT7530_PSC_P(i), SA_DIS);
+ 		}
+ 
    	/* Enable consistent egress tag */
    	mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK,
    		   PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT));
@@@ -2706,11 -2819,16 +2817,16 @@@ static const struct dsa_switch_ops mt75
    .port_change_mtu	= mt7530_port_change_mtu,
    .port_max_mtu		= mt7530_port_max_mtu,
    .port_stp_state_set	= mt7530_stp_state_set,
+ 	.port_pre_bridge_flags	= mt7530_port_pre_bridge_flags,
+ 	.port_bridge_flags	= mt7530_port_bridge_flags,
+ 	.port_set_mrouter	= mt7530_port_set_mrouter,
    .port_bridge_join	= mt7530_port_bridge_join,
    .port_bridge_leave	= mt7530_port_bridge_leave,
    .port_fdb_add		= mt7530_port_fdb_add,
    .port_fdb_del		= mt7530_port_fdb_del,
    .port_fdb_dump		= mt7530_port_fdb_dump,
+ 	.port_mdb_add		= mt7530_port_mdb_add,
+ 	.port_mdb_del		= mt7530_port_mdb_del,
    .port_vlan_filtering	= mt7530_port_vlan_filtering,
    .port_vlan_add		= mt7530_port_vlan_add,
    .port_vlan_del		= mt7530_port_vlan_del,
diff --combined drivers/net/ethernet/intel/e1000e/netdev.c
index a0948002ddf8,31b8726fd69b..88e9035b75cf
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@@ -25,6 -25,7 +25,7 @@@
  #include <linux/pm_runtime.h>
  #include <linux/aer.h>
  #include <linux/prefetch.h>
+ #include <linux/suspend.h>
#include "e1000.h"
@@@ -5974,23 -5975,19 +5975,23 @@@ static void e1000_reset_task(struct wor
    struct e1000_adapter *adapter;
    adapter = container_of(work, struct e1000_adapter, reset_task);
+	rtnl_lock();
    /* don't run the task if already down */
 -	if (test_bit(__E1000_DOWN, &adapter->state))
 +	if (test_bit(__E1000_DOWN, &adapter->state)) {
 +		rtnl_unlock();
    	return;
 +	}
if (!(adapter->flags & FLAG_RESTART_NOW)) {
    	e1000e_dump(adapter);
    	e_err("Reset adapter unexpectedly\n");
    }
    e1000e_reinit_locked(adapter);
 +	rtnl_unlock();
  }
/**
-  * e1000_get_stats64 - Get System Network Statistics
+  * e1000e_get_stats64 - Get System Network Statistics
   * @netdev: network interface device structure
   * @stats: rtnl_link_stats64 pointer
   *
@@@ -6163,7 -6160,7 +6164,7 @@@ static int e1000_mii_ioctl(struct net_d
  }
/**
-  * e1000e_hwtstamp_ioctl - control hardware time stamping
+  * e1000e_hwtstamp_set - control hardware time stamping
   * @netdev: network interface device structure
   * @ifr: interface request
   *
@@@ -6821,7 -6818,7 +6822,7 @@@ static void e1000e_disable_aspm(struct 
  }
/**
-  * e1000e_disable_aspm_locked   Disable ASPM states.
+  * e1000e_disable_aspm_locked - Disable ASPM states.
   * @pdev: pointer to PCI device struct
   * @state: bit-mask of ASPM states to disable
   *
@@@ -6922,6 -6919,12 +6923,12 @@@ static int __e1000_resume(struct pci_de
    return 0;
  }
+ static __maybe_unused int e1000e_pm_prepare(struct device *dev)
+ {
+ 	return pm_runtime_suspended(dev) &&
+ 		pm_suspend_via_firmware();
+ }
+ 
  static __maybe_unused int e1000e_pm_suspend(struct device *dev)
  {
    struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
@@@ -7630,9 -7633,9 +7637,9 @@@ static int e1000_probe(struct pci_dev *
e1000_print_device_info(adapter);
- 	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
+ 	dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
- 	if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp)
+ 	if (pci_dev_run_wake(pdev) && hw->mac.type != e1000_pch_cnp)
    	pm_runtime_put_noidle(&pdev->dev);
return 0;
@@@ -7855,6 -7858,7 +7862,7 @@@ MODULE_DEVICE_TABLE(pci, e1000_pci_tbl)
static const struct dev_pm_ops e1000_pm_ops = {
  #ifdef CONFIG_PM_SLEEP
+ 	.prepare	= e1000e_pm_prepare,
    .suspend	= e1000e_pm_suspend,
    .resume		= e1000e_pm_resume,
    .freeze		= e1000e_pm_freeze,
diff --combined drivers/net/ethernet/intel/i40e/i40e_main.c
index 17f3b800640e,14a1bad9af74..0f84ed0143e4
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@@ -2023,7 -2023,7 +2023,7 @@@ static void i40e_undo_add_filter_entrie
  }
/**
-  * i40e_next_entry - Get the next non-broadcast filter from a list
+  * i40e_next_filter - Get the next non-broadcast filter from a list
   * @next: pointer to filter in list
   *
   * Returns the next non-broadcast filter in the list. Required so that we
@@@ -3258,17 -3258,6 +3258,17 @@@ static int i40e_configure_tx_ring(struc
    return 0;
  }
+/**
 + * i40e_rx_offset - Return expected offset into page to access data
 + * @rx_ring: Ring we are requesting offset of
 + *
 + * Returns the offset value for ring into the data buffer.
 + */
 +static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
 +{
 +	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
 +}
 +
  /**
   * i40e_configure_rx_ring - Configure a receive ring context
   * @ring: The Rx ring to configure
@@@ -3380,8 -3369,6 +3380,8 @@@ static int i40e_configure_rx_ring(struc
    else
    	set_ring_build_skb_enabled(ring);
+	ring->rx_offset = i40e_rx_offset(ring);
 +
    /* cache tail for quicker writes, and clear the reg before use */
    ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
    writel(0, ring->tail);
@@@ -5204,7 -5191,7 +5204,7 @@@ static u8 i40e_pf_get_num_tc(struct i40
  }
/**
-  * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes
+  * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes
   * @pf: PF being queried
   *
   * Return a bitmap for enabled traffic classes for this PF.
@@@ -9467,7 -9454,7 +9467,7 @@@ static void i40e_fdir_flush_and_replay(
  }
/**
-  * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed
+  * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed
   * @pf: board private structure
   **/
  u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
diff --combined drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 5747a99122fb,895f59a06fdb..8b5820921377
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@@ -1569,6 -1569,17 +1569,6 @@@ void i40e_free_rx_resources(struct i40e
    }
  }
-/**
 - * i40e_rx_offset - Return expected offset into page to access data
 - * @rx_ring: Ring we are requesting offset of
 - *
 - * Returns the offset value for ring into the data buffer.
 - */
 -static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
 -{
 -	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
 -}
 -
  /**
   * i40e_setup_rx_descriptors - Allocate Rx descriptors
   * @rx_ring: Rx descriptor ring (for a specific queue) to setup
@@@ -1597,6 -1608,7 +1597,6 @@@ int i40e_setup_rx_descriptors(struct i4
    rx_ring->next_to_alloc = 0;
    rx_ring->next_to_clean = 0;
    rx_ring->next_to_use = 0;
 -	rx_ring->rx_offset = i40e_rx_offset(rx_ring);
/* XDP RX-queue info only needed for RX rings exposed to XDP */
    if (rx_ring->vsi->type == I40E_VSI_MAIN) {
@@@ -3333,7 -3345,7 +3333,7 @@@ static int i40e_tx_enable_csum(struct s
  }
/**
-  * i40e_create_tx_ctx Build the Tx context descriptor
+  * i40e_create_tx_ctx - Build the Tx context descriptor
   * @tx_ring:  ring to create the descriptor on
   * @cd_type_cmd_tso_mss: Quad Word 1
   * @cd_tunneling: Quad Word 0 - bits 0-31
diff --combined drivers/net/ethernet/intel/ice/ice_txrx.c
index b91dcfd12727,6d87dd9d456e..3148e78adc36
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@@ -443,6 -443,22 +443,6 @@@ void ice_free_rx_ring(struct ice_ring *
    }
  }
-/**
 - * ice_rx_offset - Return expected offset into page to access data
 - * @rx_ring: Ring we are requesting offset of
 - *
 - * Returns the offset value for ring into the data buffer.
 - */
 -static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
 -{
 -	if (ice_ring_uses_build_skb(rx_ring))
 -		return ICE_SKB_PAD;
 -	else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
 -		return XDP_PACKET_HEADROOM;
 -
 -	return 0;
 -}
 -
  /**
   * ice_setup_rx_ring - Allocate the Rx descriptors
   * @rx_ring: the Rx ring to set up
@@@ -477,6 -493,7 +477,6 @@@ int ice_setup_rx_ring(struct ice_ring *
rx_ring->next_to_use = 0;
    rx_ring->next_to_clean = 0;
 -	rx_ring->rx_offset = ice_rx_offset(rx_ring);
if (ice_is_xdp_ena_vsi(rx_ring->vsi))
    	WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
@@@ -1098,6 -1115,11 +1098,11 @@@ int ice_clean_rx_irq(struct ice_ring *r
    	dma_rmb();
if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
+ 			struct ice_vsi *ctrl_vsi = rx_ring->vsi;
+ 
+ 			if (rx_desc->wb.rxdid == FDIR_DESC_RXDID &&
+ 			    ctrl_vsi->vf_id != ICE_INVAL_VFID)
+ 				ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc);
    		ice_put_rx_buf(rx_ring, NULL, 0);
    		cleaned_count++;
    		continue;
diff --combined drivers/net/ethernet/intel/ice/ice_xsk.c
index 9f94d9159acd,727f277e9d75..17ab8ef024ad
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@@ -358,18 -358,18 +358,18 @@@ xsk_pool_if_up
   * This function allocates a number of Rx buffers from the fill ring
   * or the internal recycle mechanism and places them on the Rx ring.
   *
 - * Returns false if all allocations were successful, true if any fail.
 + * Returns true if all allocations were successful, false if any fail.
   */
  bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
  {
    union ice_32b_rx_flex_desc *rx_desc;
    u16 ntu = rx_ring->next_to_use;
    struct ice_rx_buf *rx_buf;
 -	bool ret = false;
 +	bool ok = true;
    dma_addr_t dma;
if (!count)
 -		return false;
 +		return true;
rx_desc = ICE_RX_DESC(rx_ring, ntu);
    rx_buf = &rx_ring->rx_buf[ntu];
@@@ -377,7 -377,7 +377,7 @@@
    do {
    	rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
    	if (!rx_buf->xdp) {
 -			ret = true;
 +			ok = false;
    		break;
    	}
@@@ -402,7 -402,7 +402,7 @@@
    	ice_release_rx_desc(rx_ring, ntu);
    }
-	return ret;
 +	return ok;
  }
/**
@@@ -473,6 -473,14 +473,14 @@@ ice_run_xdp_zc(struct ice_ring *rx_ring
    xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp);
+ 
+ 	if (likely(act == XDP_REDIRECT)) {
+ 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
+ 		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
+ 		rcu_read_unlock();
+ 		return result;
+ 	}
+ 
    switch (act) {
    case XDP_PASS:
    	break;
@@@ -480,10 -488,6 +488,6 @@@
    	xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index];
    	result = ice_xmit_xdp_buff(xdp, xdp_ring);
    	break;
- 	case XDP_REDIRECT:
- 		err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog);
- 		result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED;
- 		break;
    default:
    	bpf_warn_invalid_xdp_action(act);
    	fallthrough;
diff --combined drivers/net/ethernet/intel/igb/igb_main.c
index a45cd2b416c8,854d19fbf4a4..b83966aa6647
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@@ -2037,7 -2037,7 +2037,7 @@@ static void igb_power_down_link(struct 
  }
/**
-  * Detect and switch function for Media Auto Sense
+  * igb_check_swap_media -  Detect and switch function for Media Auto Sense
   * @adapter: address of the board private structure
   **/
  static void igb_check_swap_media(struct igb_adapter *adapter)
@@@ -3115,7 -3115,7 +3115,7 @@@ static s32 igb_init_i2c(struct igb_adap
    	return 0;
/* Initialize the i2c bus which is controlled by the registers.
- 	 * This bus will use the i2c_algo_bit structue that implements
+ 	 * This bus will use the i2c_algo_bit structure that implements
     * the protocol through toggling of the 4 bits in the register.
     */
    adapter->i2c_adap.owner = THIS_MODULE;
@@@ -4020,7 -4020,7 +4020,7 @@@ static int igb_sw_init(struct igb_adapt
  }
/**
-  *  igb_open - Called when a network interface is made active
+  *  __igb_open - Called when a network interface is made active
   *  @netdev: network interface device structure
   *  @resuming: indicates whether we are in a resume call
   *
@@@ -4138,7 -4138,7 +4138,7 @@@ int igb_open(struct net_device *netdev
  }
/**
-  *  igb_close - Disables a network interface
+  *  __igb_close - Disables a network interface
   *  @netdev: network interface device structure
   *  @suspending: indicates we are in a suspend call
   *
@@@ -5856,7 -5856,7 +5856,7 @@@ static void igb_tx_ctxtdesc(struct igb_
     */
    if (tx_ring->launchtime_enable) {
    	ts = ktime_to_timespec64(first->skb->tstamp);
- 		first->skb->tstamp = ktime_set(0, 0);
+ 		skb_txtime_consumed(first->skb);
    	context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32);
    } else {
    	context_desc->seqnum_seed = 0;
@@@ -8214,8 -8214,7 +8214,8 @@@ static void igb_reuse_rx_page(struct ig
    new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
  }
-static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer)
 +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
 +				  int rx_buf_pgcnt)
  {
    unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
    struct page *page = rx_buffer->page;
@@@ -8226,7 -8225,7 +8226,7 @@@
#if (PAGE_SIZE < 8192)
    /* if we are only owner of page we can reuse it */
 -	if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
 +	if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
    	return false;
  #else
  #define IGB_LAST_OFFSET \
@@@ -8302,10 -8301,9 +8302,10 @@@ static struct sk_buff *igb_construct_sk
    	return NULL;
if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
 -		igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb);
 -		xdp->data += IGB_TS_HDR_LEN;
 -		size -= IGB_TS_HDR_LEN;
 +		if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) {
 +			xdp->data += IGB_TS_HDR_LEN;
 +			size -= IGB_TS_HDR_LEN;
 +		}
    }
/* Determine available headroom for copy */
@@@ -8366,8 -8364,8 +8366,8 @@@ static struct sk_buff *igb_build_skb(st
/* pull timestamp out of packet data */
    if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
 -		igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
 -		__skb_pull(skb, IGB_TS_HDR_LEN);
 +		if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb))
 +			__skb_pull(skb, IGB_TS_HDR_LEN);
    }
/* update buffer offset */
@@@ -8616,17 -8614,11 +8616,17 @@@ static unsigned int igb_rx_offset(struc
  }
static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
 -					       const unsigned int size)
 +					       const unsigned int size, int *rx_buf_pgcnt)
  {
    struct igb_rx_buffer *rx_buffer;
rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
 +	*rx_buf_pgcnt =
 +#if (PAGE_SIZE < 8192)
 +		page_count(rx_buffer->page);
 +#else
 +		0;
 +#endif
    prefetchw(rx_buffer->page);
/* we are reusing so sync this buffer for CPU use */
@@@ -8642,9 -8634,9 +8642,9 @@@
  }
static void igb_put_rx_buffer(struct igb_ring *rx_ring,
 -			      struct igb_rx_buffer *rx_buffer)
 +			      struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt)
  {
 -	if (igb_can_reuse_rx_page(rx_buffer)) {
 +	if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) {
    	/* hand second half of page back to the ring */
    	igb_reuse_rx_page(rx_ring, rx_buffer);
    } else {
@@@ -8672,7 -8664,6 +8672,7 @@@ static int igb_clean_rx_irq(struct igb_
    unsigned int xdp_xmit = 0;
    struct xdp_buff xdp;
    u32 frame_sz = 0;
 +	int rx_buf_pgcnt;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
  #if (PAGE_SIZE < 8192)
@@@ -8702,7 -8693,7 +8702,7 @@@
    	 */
    	dma_rmb();
-		rx_buffer = igb_get_rx_buffer(rx_ring, size);
 +		rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt);
/* retrieve a buffer from the ring */
    	if (!skb) {
@@@ -8745,7 -8736,7 +8745,7 @@@
    		break;
    	}
-		igb_put_rx_buffer(rx_ring, rx_buffer);
 +		igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt);
    	cleaned_count++;
/* fetch next buffer in frame if non-eop */
diff --combined drivers/net/ethernet/intel/igb/igb_ptp.c
index 86a576201f5f,f3ff565da0a1..ba61fe9bfaf4
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@@ -856,9 -856,6 +856,9 @@@ static void igb_ptp_tx_hwtstamp(struct 
    dev_kfree_skb_any(skb);
  }
+#define IGB_RET_PTP_DISABLED 1
 +#define IGB_RET_PTP_INVALID 2
 +
  /**
   * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
   * @q_vector: Pointer to interrupt specific structure
@@@ -867,29 -864,19 +867,29 @@@
   *
   * This function is meant to retrieve a timestamp from the first buffer of an
   * incoming frame.  The value is stored in little endian format starting on
 - * byte 8.
 + * byte 8
 + *
 + * Returns: 0 if success, nonzero if failure
   **/
 -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
 -			 struct sk_buff *skb)
 +int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
 +			struct sk_buff *skb)
  {
 -	__le64 *regval = (__le64 *)va;
    struct igb_adapter *adapter = q_vector->adapter;
 +	__le64 *regval = (__le64 *)va;
    int adjust = 0;
+	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
 +		return IGB_RET_PTP_DISABLED;
 +
    /* The timestamp is recorded in little endian format.
     * DWORD: 0        1        2        3
     * Field: Reserved Reserved SYSTIML  SYSTIMH
     */
 +
 +	/* check reserved dwords are zero, be/le doesn't matter for zero */
 +	if (regval[0])
 +		return IGB_RET_PTP_INVALID;
 +
    igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
    			   le64_to_cpu(regval[1]));
@@@ -909,8 -896,6 +909,8 @@@
    }
    skb_hwtstamps(skb)->hwtstamp =
    	ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
 +
 +	return 0;
  }
/**
@@@ -921,15 -906,13 +921,15 @@@
   * This function is meant to retrieve a timestamp from the internal registers
   * of the adapter and store it in the skb.
   **/
 -void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
 -			 struct sk_buff *skb)
 +void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
  {
    struct igb_adapter *adapter = q_vector->adapter;
    struct e1000_hw *hw = &adapter->hw;
 -	u64 regval;
    int adjust = 0;
 +	u64 regval;
 +
 +	if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
 +		return;
/* If this bit is set, then the RX registers contain the time stamp. No
     * other packet will be time stamped until we read these registers, so
@@@ -1025,6 -1008,7 +1025,7 @@@ static int igb_ptp_set_timestamp_mode(s
    switch (config->tx_type) {
    case HWTSTAMP_TX_OFF:
    	tsync_tx_ctl = 0;
+ 		break;
    case HWTSTAMP_TX_ON:
    	break;
    default:
diff --combined drivers/net/ethernet/intel/igc/igc_main.c
index 4d989ebc9713,a476837eafca..baa45a1f3a65
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@@ -941,7 -941,7 +941,7 @@@ static void igc_tx_ctxtdesc(struct igc_
    	struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
    	ktime_t txtime = first->skb->tstamp;
- 		first->skb->tstamp = ktime_set(0, 0);
+ 		skb_txtime_consumed(first->skb);
    	context_desc->launch_time = igc_tx_launchtime(adapter,
    						      txtime);
    } else {
@@@ -3580,7 -3580,7 +3580,7 @@@ void igc_up(struct igc_adapter *adapter
    netif_tx_start_all_queues(adapter->netdev);
/* start the watchdog. */
- 	hw->mac.get_link_status = 1;
+ 	hw->mac.get_link_status = true;
    schedule_work(&adapter->watchdog_task);
  }
@@@ -3831,19 -3831,10 +3831,19 @@@ static void igc_reset_task(struct work_
adapter = container_of(work, struct igc_adapter, reset_task);
+	rtnl_lock();
 +	/* If we're already down or resetting, just bail */
 +	if (test_bit(__IGC_DOWN, &adapter->state) ||
 +	    test_bit(__IGC_RESETTING, &adapter->state)) {
 +		rtnl_unlock();
 +		return;
 +	}
 +
    igc_rings_dump(adapter);
    igc_regs_dump(adapter);
    netdev_err(adapter->netdev, "Reset adapter\n");
    igc_reinit_locked(adapter);
 +	rtnl_unlock();
  }
/**
@@@ -4009,7 -4000,7 +4009,7 @@@ static irqreturn_t igc_msix_other(int i
    }
if (icr & IGC_ICR_LSC) {
- 		hw->mac.get_link_status = 1;
+ 		hw->mac.get_link_status = true;
    	/* guard against interrupt when we're going down */
    	if (!test_bit(__IGC_DOWN, &adapter->state))
    		mod_timer(&adapter->watchdog_timer, jiffies + 1);
@@@ -4387,7 -4378,7 +4387,7 @@@ static irqreturn_t igc_intr_msi(int irq
    }
if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
- 		hw->mac.get_link_status = 1;
+ 		hw->mac.get_link_status = true;
    	if (!test_bit(__IGC_DOWN, &adapter->state))
    		mod_timer(&adapter->watchdog_timer, jiffies + 1);
    }
@@@ -4429,7 -4420,7 +4429,7 @@@ static irqreturn_t igc_intr(int irq, vo
    }
if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
- 		hw->mac.get_link_status = 1;
+ 		hw->mac.get_link_status = true;
    	/* guard against interrupt when we're going down */
    	if (!test_bit(__IGC_DOWN, &adapter->state))
    		mod_timer(&adapter->watchdog_timer, jiffies + 1);
@@@ -4583,7 -4574,7 +4583,7 @@@ static int __igc_open(struct net_devic
    netif_tx_start_all_queues(netdev);
/* start the watchdog. */
- 	hw->mac.get_link_status = 1;
+ 	hw->mac.get_link_status = true;
    schedule_work(&adapter->watchdog_task);
return IGC_SUCCESS;
@@@ -4924,7 -4915,7 +4924,7 @@@ int igc_set_spd_dplx(struct igc_adapte
  {
    struct igc_mac_info *mac = &adapter->hw.mac;
- 	mac->autoneg = 0;
+ 	mac->autoneg = false;
/* Make sure dplx is at most 1 bit and lsb of speed is not set
     * for the switch() below to work
@@@ -4946,13 -4937,13 +4946,13 @@@
    	mac->forced_speed_duplex = ADVERTISE_100_FULL;
    	break;
    case SPEED_1000 + DUPLEX_FULL:
- 		mac->autoneg = 1;
+ 		mac->autoneg = true;
    	adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
    	break;
    case SPEED_1000 + DUPLEX_HALF: /* not supported */
    	goto err_inval;
    case SPEED_2500 + DUPLEX_FULL:
- 		mac->autoneg = 1;
+ 		mac->autoneg = true;
    	adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
    	break;
    case SPEED_2500 + DUPLEX_HALF: /* not supported */
diff --combined drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 03d9aad516d4,4c90f83fd6ce..19fe21116fe8
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@@ -225,7 -225,7 +225,7 @@@ static s32 ixgbe_get_parent_bus_info(st
  }
/**
-  * ixgbe_check_from_parent - Determine whether PCIe info should come from parent
+  * ixgbe_pcie_from_parent - Determine whether PCIe info should come from parent
   * @hw: hw specific details
   *
   * This function is used by probe to determine whether a device's PCI-Express
@@@ -4118,8 -4118,6 +4118,8 @@@ void ixgbe_configure_rx_ring(struct ixg
  #endif
    }
+	ring->rx_offset = ixgbe_rx_offset(ring);
 +
    if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) {
    	u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
@@@ -6158,7 -6156,7 +6158,7 @@@ void ixgbe_down(struct ixgbe_adapter *a
  }
/**
-  * ixgbe_eee_capable - helper function to determine EEE support on X550
+  * ixgbe_set_eee_capable - helper function to determine EEE support on X550
   * @adapter: board private structure
   */
  static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
@@@ -6580,6 -6578,7 +6580,6 @@@ int ixgbe_setup_rx_resources(struct ixg
rx_ring->next_to_clean = 0;
    rx_ring->next_to_use = 0;
 -	rx_ring->rx_offset = ixgbe_rx_offset(rx_ring);
/* XDP RX-queue info */
    if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 76f399229ddb,baaba01bd8c5..c2cc4806d13c
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@@ -548,6 -548,12 +548,12 @@@ static inline int is_afvf(u16 pcifunc
    return !(pcifunc & ~RVU_PFVF_FUNC_MASK);
  }
+ /* check if PF_FUNC is AF */
+ static inline bool is_pffunc_af(u16 pcifunc)
+ {
+ 	return !pcifunc;
+ }
+ 
  static inline bool is_rvu_fwdata_valid(struct rvu *rvu)
  {
    return (rvu->fwdata->header_magic == RVU_FWDATA_HEADER_MAGIC) &&
@@@ -640,7 -646,8 +646,8 @@@ int npc_config_ts_kpuaction(struct rvu 
  void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc,
    			 int nixlf, u64 chan, u8 *mac_addr);
  void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
- 				   int nixlf, u64 chan, bool allmulti);
+ 				   int nixlf, u64 chan, u8 chan_cnt,
+ 				   bool allmulti);
  void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
  void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf);
  void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
@@@ -665,9 -672,6 +672,6 @@@ int rvu_npc_get_tx_nibble_cfg(struct rv
  int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel);
  int npc_flow_steering_init(struct rvu *rvu, int blkaddr);
  const char *npc_get_field_name(u8 hdr);
- bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf,
- 				u16 pcifunc, u8 intf, struct mcam_entry *entry,
- 				int *entry_index);
  int npc_get_bank(struct npc_mcam *mcam, int index);
  void npc_mcam_enable_flows(struct rvu *rvu, u16 target);
  void npc_mcam_disable_flows(struct rvu *rvu, u16 target);
@@@ -678,8 -682,12 +682,13 @@@ void npc_read_mcam_entry(struct rvu *rv
    		 u8 *intf, u8 *ena);
  bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature);
  u32  rvu_cgx_get_fifolen(struct rvu *rvu);
 +void *rvu_first_cgx_pdata(struct rvu *rvu);
+ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf,
+ 			     int type);
+ bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr,
+ 			   int index);
+ 
  /* CPT APIs */
  int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot);
diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
index b4c53b19f535,741da112fdf0..8ec17ee72b5d
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
@@@ -234,14 -234,12 +234,14 @@@ static ssize_t rvu_dbg_rsrc_attach_stat
    				  char __user *buffer,
    				  size_t count, loff_t *ppos)
  {
 -	int index, off = 0, flag = 0, go_back = 0, off_prev;
 +	int index, off = 0, flag = 0, go_back = 0, len = 0;
    struct rvu *rvu = filp->private_data;
    int lf, pf, vf, pcifunc;
    struct rvu_block block;
    int bytes_not_copied;
 +	int lf_str_size = 12;
    int buf_size = 2048;
 +	char *lfs;
    char *buf;
/* don't allow partial reads */
@@@ -251,18 -249,12 +251,18 @@@
    buf = kzalloc(buf_size, GFP_KERNEL);
    if (!buf)
    	return -ENOSPC;
 -	off +=	scnprintf(&buf[off], buf_size - 1 - off, "\npcifunc\t\t");
 +
 +	lfs = kzalloc(lf_str_size, GFP_KERNEL);
 +	if (!lfs)
 +		return -ENOMEM;
 +	off +=	scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size,
 +			  "pcifunc");
    for (index = 0; index < BLK_COUNT; index++)
 -		if (strlen(rvu->hw->block[index].name))
 -			off +=	scnprintf(&buf[off], buf_size - 1 - off,
 -					  "%*s\t", (index - 1) * 2,
 -					  rvu->hw->block[index].name);
 +		if (strlen(rvu->hw->block[index].name)) {
 +			off += scnprintf(&buf[off], buf_size - 1 - off,
 +					 "%-*s", lf_str_size,
 +					 rvu->hw->block[index].name);
 +		}
    off += scnprintf(&buf[off], buf_size - 1 - off, "\n");
    for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
    	for (vf = 0; vf <= rvu->hw->total_vfs; vf++) {
@@@ -271,15 -263,14 +271,15 @@@
    			continue;
if (vf) {
 +				sprintf(lfs, "PF%d:VF%d", pf, vf - 1);
    			go_back = scnprintf(&buf[off],
    					    buf_size - 1 - off,
 -						    "PF%d:VF%d\t\t", pf,
 -						    vf - 1);
 +						    "%-*s", lf_str_size, lfs);
    		} else {
 +				sprintf(lfs, "PF%d", pf);
    			go_back = scnprintf(&buf[off],
    					    buf_size - 1 - off,
 -						    "PF%d\t\t", pf);
 +						    "%-*s", lf_str_size, lfs);
    		}
off += go_back;
@@@ -287,22 -278,20 +287,22 @@@
    			block = rvu->hw->block[index];
    			if (!strlen(block.name))
    				continue;
 -				off_prev = off;
 +				len = 0;
 +				lfs[len] = '\0';
    			for (lf = 0; lf < block.lf.max; lf++) {
    				if (block.fn_map[lf] != pcifunc)
    					continue;
    				flag = 1;
 -					off += scnprintf(&buf[off], buf_size - 1
 -							- off, "%3d,", lf);
 +					len += sprintf(&lfs[len], "%d,", lf);
    			}
 -				if (flag && off_prev != off)
 -					off--;
 -				else
 -					go_back++;
 +
 +				if (flag)
 +					len--;
 +				lfs[len] = '\0';
    			off += scnprintf(&buf[off], buf_size - 1 - off,
 -						"\t");
 +						 "%-*s", lf_str_size, lfs);
 +				if (!strlen(lfs))
 +					go_back += lf_str_size;
    		}
    		if (!flag)
    			off -= go_back;
@@@ -314,7 -303,6 +314,7 @@@
    }
bytes_not_copied = copy_to_user(buffer, buf, off);
 +	kfree(lfs);
    kfree(buf);
if (bytes_not_copied)
@@@ -331,6 -319,7 +331,6 @@@ static int rvu_dbg_rvu_pf_cgx_map_displ
    struct rvu *rvu = filp->private;
    struct pci_dev *pdev = NULL;
    struct mac_ops *mac_ops;
 -	int rvu_def_cgx_id = 0;
    char cgx[10], lmac[10];
    struct rvu_pfvf *pfvf;
    int pf, domain, blkid;
@@@ -338,10 -327,7 +338,10 @@@
    u16 pcifunc;
domain = 2;
 -	mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu));
 +	mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu));
 +	/* There can be no CGX devices at all */
 +	if (!mac_ops)
 +		return 0;
    seq_printf(filp, "PCI dev\t\tRVU PF Func\tNIX block\t%s\tLMAC\n",
    	   mac_ops->name);
    for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
@@@ -1832,6 -1818,7 +1832,6 @@@ static void rvu_dbg_cgx_init(struct rv
  {
    struct mac_ops *mac_ops;
    unsigned long lmac_bmap;
 -	int rvu_def_cgx_id = 0;
    int i, lmac_id;
    char dname[20];
    void *cgx;
@@@ -1839,7 -1826,7 +1839,7 @@@
    if (!cgx_get_cgxcnt_max())
    	return;
-	mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu));
 +	mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu));
    if (!mac_ops)
    	return;
@@@ -2015,7 -2002,7 +2015,7 @@@ static void rvu_dbg_npc_mcam_show_flows
    		seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.etype));
    		break;
    	case NPC_OUTER_VID:
- 			seq_printf(s, "%d ", ntohs(rule->packet.vlan_tci));
+ 			seq_printf(s, "0x%x ", ntohs(rule->packet.vlan_tci));
    		seq_printf(s, "mask 0x%x\n",
    			   ntohs(rule->mask.vlan_tci));
    		break;
@@@ -2158,7 -2145,7 +2158,7 @@@ static int rvu_dbg_npc_mcam_show_rules(
    	seq_printf(s, "\tmcam entry: %d\n", iter->entry);
rvu_dbg_npc_mcam_show_flows(s, iter);
- 		if (iter->intf == NIX_INTF_RX) {
+ 		if (is_npc_intf_rx(iter->intf)) {
    		target = iter->rx_action.pf_func;
    		pf = (target >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK;
    		seq_printf(s, "\tForward to: PF%d ", pf);
diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 3d068b7d46bd,a87104121344..0a8bd667cb11
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@@ -273,7 -273,8 +273,8 @@@ static int nix_interface_init(struct rv
    	pfvf->rx_chan_cnt = 1;
    	pfvf->tx_chan_cnt = 1;
    	rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
- 					      pfvf->rx_chan_base, false);
+ 					      pfvf->rx_chan_base,
+ 					      pfvf->rx_chan_cnt, false);
    	break;
    }
@@@ -2629,7 -2630,7 +2630,7 @@@ static int set_flowkey_fields(struct ni
    struct nix_rx_flowkey_alg *field;
    struct nix_rx_flowkey_alg tmp;
    u32 key_type, valid_key;
 -	int l4_key_offset;
 +	int l4_key_offset = 0;
if (!alg)
    	return -EINVAL;
@@@ -3088,7 -3089,8 +3089,8 @@@ int rvu_mbox_handler_nix_set_rx_mode(st
    	rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf);
    else
    	rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf,
- 					      pfvf->rx_chan_base, allmulti);
+ 					      pfvf->rx_chan_base,
+ 					      pfvf->rx_chan_cnt, allmulti);
    return 0;
  }
@@@ -3635,9 -3637,7 +3637,7 @@@ int rvu_mbox_handler_nix_lf_stop_rx(str
    if (err)
    	return err;
- 	rvu_npc_disable_default_entries(rvu, pcifunc, nixlf);
- 
- 	npc_mcam_disable_flows(rvu, pcifunc);
+ 	rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf);
return rvu_cgx_start_stop_io(rvu, pcifunc, false);
  }
diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
index 0bd49c7080a6,16d7797b7a14..0bc4529691ec
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
@@@ -22,10 -22,6 +22,6 @@@
  #define RSVD_MCAM_ENTRIES_PER_PF	2 /* Bcast & Promisc */
  #define RSVD_MCAM_ENTRIES_PER_NIXLF	1 /* Ucast for LFs */
- #define NIXLF_UCAST_ENTRY	0
- #define NIXLF_BCAST_ENTRY	1
- #define NIXLF_PROMISC_ENTRY	2
- 
  #define NPC_PARSE_RESULT_DMAC_OFFSET	8
  #define NPC_HW_TSTAMP_OFFSET		8
  #define NPC_KEX_CHAN_MASK		0xFFFULL
@@@ -96,6 -92,10 +92,10 @@@ int npc_mcam_verify_channel(struct rvu 
    if (is_npc_intf_tx(intf))
    	return 0;
+ 	/* return in case of AF installed rules */
+ 	if (is_pffunc_af(pcifunc))
+ 		return 0;
+ 
    if (is_afvf(pcifunc)) {
    	end = rvu_get_num_lbk_chans();
    	if (end < 0)
@@@ -196,8 -196,8 +196,8 @@@ static int npc_get_ucast_mcam_index(str
    return mcam->nixlf_offset + (max + nixlf) * RSVD_MCAM_ENTRIES_PER_NIXLF;
  }
- static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
- 				    u16 pcifunc, int nixlf, int type)
+ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam,
+ 			     u16 pcifunc, int nixlf, int type)
  {
    int pf = rvu_get_pf(pcifunc);
    int index;
@@@ -230,8 -230,8 +230,8 @@@ int npc_get_bank(struct npc_mcam *mcam
    return bank;
  }
- static bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam,
- 				  int blkaddr, int index)
+ bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam,
+ 			   int blkaddr, int index)
  {
    int bank = npc_get_bank(mcam, index);
    u64 cfg;
@@@ -647,13 -647,17 +647,17 @@@ void rvu_npc_install_ucast_entry(struc
  }
void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc,
- 				   int nixlf, u64 chan, bool allmulti)
+ 				   int nixlf, u64 chan, u8 chan_cnt,
+ 				   bool allmulti)
  {
    struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
+ 	struct npc_install_flow_req req = { 0 };
+ 	struct npc_install_flow_rsp rsp = { 0 };
    struct npc_mcam *mcam = &rvu->hw->mcam;
- 	int blkaddr, ucast_idx, index, kwi;
- 	struct mcam_entry entry = { {0} };
- 	struct nix_rx_action action = { };
+ 	int blkaddr, ucast_idx, index;
+ 	u8 mac_addr[ETH_ALEN] = { 0 };
+ 	struct nix_rx_action action;
+ 	u64 relaxed_mask;
/* Only PF or AF VF can add a promiscuous entry */
    if ((pcifunc & RVU_PFVF_FUNC_MASK) && !is_afvf(pcifunc))
@@@ -663,24 -667,15 +667,15 @@@
    if (blkaddr < 0)
    	return;
+ 	*(u64 *)&action = 0x00;
    index = npc_get_nixlf_mcam_index(mcam, pcifunc,
    				 nixlf, NIXLF_PROMISC_ENTRY);
- 	entry.kw[0] = chan;
- 	entry.kw_mask[0] = 0xFFFULL;
- 
- 	if (allmulti) {
- 		kwi = NPC_KEXOF_DMAC / sizeof(u64);
- 		entry.kw[kwi] = BIT_ULL(40); /* LSB bit of 1st byte in DMAC */
- 		entry.kw_mask[kwi] = BIT_ULL(40);
- 	}
- 
- 	ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
- 					     nixlf, NIXLF_UCAST_ENTRY);
- 
    /* If the corresponding PF's ucast action is RSS,
     * use the same action for promisc also
     */
+ 	ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc,
+ 					     nixlf, NIXLF_UCAST_ENTRY);
    if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx))
    	*(u64 *)&action = npc_get_mcam_action(rvu, mcam,
    						blkaddr, ucast_idx);
@@@ -691,9 -686,36 +686,36 @@@
    	action.pf_func = pcifunc;
    }
- 	entry.action = *(u64 *)&action;
- 	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
- 			      pfvf->nix_rx_intf, &entry, true);
+ 	if (allmulti) {
+ 		mac_addr[0] = 0x01;	/* LSB bit of 1st byte in DMAC */
+ 		ether_addr_copy(req.packet.dmac, mac_addr);
+ 		ether_addr_copy(req.mask.dmac, mac_addr);
+ 		req.features = BIT_ULL(NPC_DMAC);
+ 	}
+ 
+ 	req.chan_mask = 0xFFFU;
+ 	if (chan_cnt > 1) {
+ 		if (!is_power_of_2(chan_cnt)) {
+ 			dev_err(rvu->dev,
+ 				"%s: channel count more than 1, must be power of 2\n", __func__);
+ 			return;
+ 		}
+ 		relaxed_mask = GENMASK_ULL(BITS_PER_LONG_LONG - 1,
+ 					   ilog2(chan_cnt));
+ 		req.chan_mask &= relaxed_mask;
+ 	}
+ 
+ 	req.channel = chan;
+ 	req.intf = pfvf->nix_rx_intf;
+ 	req.entry = index;
+ 	req.op = action.op;
+ 	req.hdr.pcifunc = 0; /* AF is requester */
+ 	req.vf = pcifunc;
+ 	req.index = action.index;
+ 	req.match_id = action.match_id;
+ 	req.flow_key_alg = action.flow_key_alg;
+ 
+ 	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
  }
static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc,
@@@ -728,12 -750,14 +750,14 @@@ void rvu_npc_enable_promisc_entry(struc
  void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc,
    			       int nixlf, u64 chan)
  {
+ 	struct rvu_pfvf *pfvf;
+ 	struct npc_install_flow_req req = { 0 };
+ 	struct npc_install_flow_rsp rsp = { 0 };
    struct npc_mcam *mcam = &rvu->hw->mcam;
- 	struct mcam_entry entry = { {0} };
    struct rvu_hwinfo *hw = rvu->hw;
- 	struct nix_rx_action action;
- 	struct rvu_pfvf *pfvf;
    int blkaddr, index;
+ 	u32 req_index = 0;
+ 	u8 op;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
    if (blkaddr < 0)
@@@ -755,32 -779,29 +779,29 @@@
    index = npc_get_nixlf_mcam_index(mcam, pcifunc,
    				 nixlf, NIXLF_BCAST_ENTRY);
- 	/* Match ingress channel */
- 	entry.kw[0] = chan;
- 	entry.kw_mask[0] = 0xfffull;
- 
- 	/* Match broadcast MAC address.
- 	 * DMAC is extracted at 0th bit of PARSE_KEX::KW1
- 	 */
- 	entry.kw[1] = 0xffffffffffffull;
- 	entry.kw_mask[1] = 0xffffffffffffull;
- 
- 	*(u64 *)&action = 0x00;
    if (!hw->cap.nix_rx_multicast) {
    	/* Early silicon doesn't support pkt replication,
    	 * so install entry with UCAST action, so that PF
    	 * receives all broadcast packets.
    	 */
- 		action.op = NIX_RX_ACTIONOP_UCAST;
- 		action.pf_func = pcifunc;
+ 		op = NIX_RX_ACTIONOP_UCAST;
    } else {
- 		action.index = pfvf->bcast_mce_idx;
- 		action.op = NIX_RX_ACTIONOP_MCAST;
+ 		op = NIX_RX_ACTIONOP_MCAST;
+ 		req_index = pfvf->bcast_mce_idx;
    }
- 	entry.action = *(u64 *)&action;
- 	npc_config_mcam_entry(rvu, mcam, blkaddr, index,
- 			      pfvf->nix_rx_intf, &entry, true);
+ 	eth_broadcast_addr((u8 *)&req.packet.dmac);
+ 	eth_broadcast_addr((u8 *)&req.mask.dmac);
+ 	req.features = BIT_ULL(NPC_DMAC);
+ 	req.channel = chan;
+ 	req.intf = pfvf->nix_rx_intf;
+ 	req.entry = index;
+ 	req.op = op;
+ 	req.hdr.pcifunc = 0; /* AF is requester */
+ 	req.vf = pcifunc;
+ 	req.index = req_index;
+ 
+ 	rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp);
  }
void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable)
@@@ -967,7 -988,7 +988,7 @@@ void rvu_npc_disable_mcam_entries(struc
  {
    struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
    struct npc_mcam *mcam = &rvu->hw->mcam;
- 	struct rvu_npc_mcam_rule *rule;
+ 	struct rvu_npc_mcam_rule *rule, *tmp;
    int blkaddr;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
@@@ -977,15 -998,18 +998,18 @@@
    mutex_lock(&mcam->lock);
/* Disable MCAM entries directing traffic to this 'pcifunc' */
- 	list_for_each_entry(rule, &mcam->mcam_rules, list) {
+ 	list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) {
    	if (is_npc_intf_rx(rule->intf) &&
    	    rule->rx_action.pf_func == pcifunc) {
    		npc_enable_mcam_entry(rvu, mcam, blkaddr,
    				      rule->entry, false);
    		rule->enable = false;
    		/* Indicate that default rule is disabled */
- 			if (rule->default_rule)
+ 			if (rule->default_rule) {
    			pfvf->def_ucast_rule = NULL;
+ 				list_del(&rule->list);
+ 				kfree(rule);
+ 			}
    	}
    }
@@@ -1674,6 -1698,9 +1698,9 @@@ void rvu_npc_get_mcam_counter_alloc_inf
  static int npc_mcam_verify_entry(struct npc_mcam *mcam,
    			 u16 pcifunc, int entry)
  {
+ 	/* verify AF installed entries */
+ 	if (is_pffunc_af(pcifunc))
+ 		return 0;
    /* Verify if entry is valid and if it is indeed
     * allocated to the requesting PFFUNC.
     */
@@@ -2268,6 -2295,10 +2295,10 @@@ int rvu_mbox_handler_npc_mcam_write_ent
    	goto exit;
    }
+ 	/* For AF installed rules, the nix_intf should be set to target NIX */
+ 	if (is_pffunc_af(req->hdr.pcifunc))
+ 		nix_intf = req->intf;
+ 
    npc_config_mcam_entry(rvu, mcam, blkaddr, req->entry, nix_intf,
    		      &req->entry_data, req->enable_entry);
@@@ -2490,10 -2521,10 +2521,10 @@@ int rvu_mbox_handler_npc_mcam_free_coun
    	index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry);
    	if (index >= mcam->bmap_entries)
    		break;
 +		entry = index + 1;
    	if (mcam->entry2cntr_map[index] != req->cntr)
    		continue;
-		entry = index + 1;
    	npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
    				      index, req->cntr);
    }
@@@ -2730,30 -2761,6 +2761,6 @@@ int rvu_mbox_handler_npc_get_kex_cfg(st
    return 0;
  }
- bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf,
- 				u16 pcifunc, u8 intf, struct mcam_entry *entry,
- 				int *index)
- {
- 	struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc);
- 	struct npc_mcam *mcam = &rvu->hw->mcam;
- 	bool enable;
- 	u8 nix_intf;
- 
- 	if (is_npc_intf_tx(intf))
- 		nix_intf = pfvf->nix_tx_intf;
- 	else
- 		nix_intf = pfvf->nix_rx_intf;
- 
- 	*index = npc_get_nixlf_mcam_index(mcam, pcifunc,
- 					  nixlf, NIXLF_UCAST_ENTRY);
- 	/* dont force enable unicast entry  */
- 	enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, *index);
- 	npc_config_mcam_entry(rvu, mcam, blkaddr, *index, nix_intf,
- 			      entry, enable);
- 
- 	return enable;
- }
- 
  int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu,
    				      struct msg_req *req,
    				      struct npc_mcam_read_base_rule_rsp *rsp)
@@@ -2799,3 -2806,42 +2806,42 @@@ read_entry
  out:
    return rc;
  }
+ 
+ int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu,
+ 					  struct npc_mcam_get_stats_req *req,
+ 					  struct npc_mcam_get_stats_rsp *rsp)
+ {
+ 	struct npc_mcam *mcam = &rvu->hw->mcam;
+ 	u16 index, cntr;
+ 	int blkaddr;
+ 	u64 regval;
+ 	u32 bank;
+ 
+ 	blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0);
+ 	if (blkaddr < 0)
+ 		return NPC_MCAM_INVALID_REQ;
+ 
+ 	mutex_lock(&mcam->lock);
+ 
+ 	index = req->entry & (mcam->banksize - 1);
+ 	bank = npc_get_bank(mcam, req->entry);
+ 
+ 	/* read MCAM entry STAT_ACT register */
+ 	regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank));
+ 
+ 	if (!(regval & BIT_ULL(9))) {
+ 		rsp->stat_ena = 0;
+ 		mutex_unlock(&mcam->lock);
+ 		return 0;
+ 	}
+ 
+ 	cntr = regval & 0x1FF;
+ 
+ 	rsp->stat_ena = 1;
+ 	rsp->stat = rvu_read64(rvu, blkaddr, NPC_AF_MATCH_STATX(cntr));
+ 	rsp->stat &= BIT_ULL(48) - 1;
+ 
+ 	mutex_unlock(&mcam->lock);
+ 
+ 	return 0;
+ }
diff --combined drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
index dc1778420978,fa7a46aa15ef..0b4fa92ba821
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
@@@ -57,10 -57,13 +57,13 @@@ int otx2_alloc_mcam_entries(struct otx2
    	flow_cfg->ntuple_max_flows = rsp->count;
    	flow_cfg->ntuple_offset = 0;
    	pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT;
+ 		flow_cfg->tc_max_flows = flow_cfg->ntuple_max_flows;
+ 		pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
    } else {
    	flow_cfg->vf_vlan_offset = 0;
    	flow_cfg->ntuple_offset = flow_cfg->vf_vlan_offset +
    					vf_vlan_max_flows;
+ 		flow_cfg->tc_flower_offset = flow_cfg->ntuple_offset;
    	flow_cfg->unicast_offset = flow_cfg->ntuple_offset +
    					OTX2_MAX_NTUPLE_FLOWS;
    	flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset +
@@@ -69,6 -72,7 +72,7 @@@
    	pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT;
    	pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT;
    	pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT;
+ 		pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT;
    }
for (i = 0; i < rsp->count; i++)
@@@ -93,6 -97,7 +97,7 @@@ int otx2_mcam_flow_init(struct otx2_ni
    INIT_LIST_HEAD(&pf->flow_cfg->flow_list);
pf->flow_cfg->ntuple_max_flows = OTX2_MAX_NTUPLE_FLOWS;
+ 	pf->flow_cfg->tc_max_flows = pf->flow_cfg->ntuple_max_flows;
err = otx2_alloc_mcam_entries(pf);
    if (err)
@@@ -257,19 -262,17 +262,19 @@@ int otx2_get_flow(struct otx2_nic *pfvf
  int otx2_get_all_flows(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc,
    	       u32 *rule_locs)
  {
 +	u32 rule_cnt = nfc->rule_cnt;
    u32 location = 0;
    int idx = 0;
    int err = 0;
nfc->data = pfvf->flow_cfg->ntuple_max_flows;
 -	while ((!err || err == -ENOENT) && idx < nfc->rule_cnt) {
 +	while ((!err || err == -ENOENT) && idx < rule_cnt) {
    	err = otx2_get_flow(pfvf, nfc, location);
    	if (!err)
    		rule_locs[idx++] = location;
    	location++;
    }
 +	nfc->rule_cnt = rule_cnt;
return err;
  }
@@@ -303,6 -306,35 +308,35 @@@ static int otx2_prepare_ipv4_flow(struc
    		       sizeof(pmask->ip4dst));
    		req->features |= BIT_ULL(NPC_DIP_IPV4);
    	}
+ 		if (ipv4_usr_mask->tos) {
+ 			pkt->tos = ipv4_usr_hdr->tos;
+ 			pmask->tos = ipv4_usr_mask->tos;
+ 			req->features |= BIT_ULL(NPC_TOS);
+ 		}
+ 		if (ipv4_usr_mask->proto) {
+ 			switch (ipv4_usr_hdr->proto) {
+ 			case IPPROTO_ICMP:
+ 				req->features |= BIT_ULL(NPC_IPPROTO_ICMP);
+ 				break;
+ 			case IPPROTO_TCP:
+ 				req->features |= BIT_ULL(NPC_IPPROTO_TCP);
+ 				break;
+ 			case IPPROTO_UDP:
+ 				req->features |= BIT_ULL(NPC_IPPROTO_UDP);
+ 				break;
+ 			case IPPROTO_SCTP:
+ 				req->features |= BIT_ULL(NPC_IPPROTO_SCTP);
+ 				break;
+ 			case IPPROTO_AH:
+ 				req->features |= BIT_ULL(NPC_IPPROTO_AH);
+ 				break;
+ 			case IPPROTO_ESP:
+ 				req->features |= BIT_ULL(NPC_IPPROTO_ESP);
+ 				break;
+ 			default:
+ 				return -EOPNOTSUPP;
+ 			}
+ 		}
    	pkt->etype = cpu_to_be16(ETH_P_IP);
    	pmask->etype = cpu_to_be16(0xFFFF);
    	req->features |= BIT_ULL(NPC_ETYPE);
@@@ -327,6 -359,11 +361,11 @@@
    		       sizeof(pmask->ip4dst));
    		req->features |= BIT_ULL(NPC_DIP_IPV4);
    	}
+ 		if (ipv4_l4_mask->tos) {
+ 			pkt->tos = ipv4_l4_hdr->tos;
+ 			pmask->tos = ipv4_l4_mask->tos;
+ 			req->features |= BIT_ULL(NPC_TOS);
+ 		}
    	if (ipv4_l4_mask->psrc) {
    		memcpy(&pkt->sport, &ipv4_l4_hdr->psrc,
    		       sizeof(pkt->sport));
@@@ -377,10 -414,14 +416,14 @@@
    		       sizeof(pmask->ip4dst));
    		req->features |= BIT_ULL(NPC_DIP_IPV4);
    	}
+ 		if (ah_esp_mask->tos) {
+ 			pkt->tos = ah_esp_hdr->tos;
+ 			pmask->tos = ah_esp_mask->tos;
+ 			req->features |= BIT_ULL(NPC_TOS);
+ 		}
/* NPC profile doesn't extract AH/ESP header fields */
- 		if ((ah_esp_mask->spi & ah_esp_hdr->spi) ||
- 		    (ah_esp_mask->tos & ah_esp_mask->tos))
+ 		if (ah_esp_mask->spi & ah_esp_hdr->spi)
    		return -EOPNOTSUPP;
if (flow_type == AH_V4_FLOW)
diff --combined drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 2fd3d235d292,772a29ba8503..03004fdac0c6
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@@ -1672,7 -1672,6 +1672,7 @@@ int otx2_stop(struct net_device *netdev
    struct otx2_nic *pf = netdev_priv(netdev);
    struct otx2_cq_poll *cq_poll = NULL;
    struct otx2_qset *qset = &pf->qset;
 +	struct otx2_rss_info *rss;
    int qidx, vec, wrk;
netif_carrier_off(netdev);
@@@ -1685,10 -1684,6 +1685,10 @@@
    /* First stop packet Rx/Tx */
    otx2_rxtx_enable(pf, false);
+	/* Clear RSS enable flag */
 +	rss = &pf->hw.rss_info;
 +	rss->enable = false;
 +
    /* Cleanup Queue IRQ */
    vec = pci_irq_vector(pf->pdev,
    		     pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START);
@@@ -1765,6 -1760,24 +1765,24 @@@ static netdev_tx_t otx2_xmit(struct sk_
    return NETDEV_TX_OK;
  }
+ static netdev_features_t otx2_fix_features(struct net_device *dev,
+ 					   netdev_features_t features)
+ {
+ 	/* check if n-tuple filters are ON */
+ 	if ((features & NETIF_F_HW_TC) && (dev->features & NETIF_F_NTUPLE)) {
+ 		netdev_info(dev, "Disabling n-tuple filters\n");
+ 		features &= ~NETIF_F_NTUPLE;
+ 	}
+ 
+ 	/* check if tc hw offload is ON */
+ 	if ((features & NETIF_F_NTUPLE) && (dev->features & NETIF_F_HW_TC)) {
+ 		netdev_info(dev, "Disabling TC hardware offload\n");
+ 		features &= ~NETIF_F_HW_TC;
+ 	}
+ 
+ 	return features;
+ }
+ 
  static void otx2_set_rx_mode(struct net_device *netdev)
  {
    struct otx2_nic *pf = netdev_priv(netdev);
@@@ -1827,6 -1840,12 +1845,12 @@@ static int otx2_set_features(struct net
    if ((changed & NETIF_F_NTUPLE) && !ntuple)
    	otx2_destroy_ntuple_flows(pf);
+ 	if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) &&
+ 	    pf->tc_info.num_entries) {
+ 		netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n");
+ 		return -EBUSY;
+ 	}
+ 
    return 0;
  }
@@@ -2225,6 -2244,7 +2249,7 @@@ static const struct net_device_ops otx2
    .ndo_open		= otx2_open,
    .ndo_stop		= otx2_stop,
    .ndo_start_xmit		= otx2_xmit,
+ 	.ndo_fix_features	= otx2_fix_features,
    .ndo_set_mac_address    = otx2_set_mac_address,
    .ndo_change_mtu		= otx2_change_mtu,
    .ndo_set_rx_mode	= otx2_set_rx_mode,
@@@ -2235,6 -2255,7 +2260,7 @@@
    .ndo_set_vf_mac		= otx2_set_vf_mac,
    .ndo_set_vf_vlan	= otx2_set_vf_vlan,
    .ndo_get_vf_config	= otx2_get_vf_config,
+ 	.ndo_setup_tc		= otx2_setup_tc,
  };
static int otx2_wq_init(struct otx2_nic *pf)
@@@ -2454,6 -2475,10 +2480,10 @@@ static int otx2_probe(struct pci_dev *p
    			       NETIF_F_HW_VLAN_STAG_RX;
    netdev->features |= netdev->hw_features;
+ 	/* HW supports tc offload but mutually exclusive with n-tuple filters */
+ 	if (pf->flags & OTX2_FLAG_TC_FLOWER_SUPPORT)
+ 		netdev->hw_features |= NETIF_F_HW_TC;
+ 
    netdev->gso_max_segs = OTX2_MAX_GSO_SEGS;
    netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
@@@ -2475,6 -2500,10 +2505,10 @@@
otx2_set_ethtool_ops(netdev);
+ 	err = otx2_init_tc(pf);
+ 	if (err)
+ 		goto err_mcam_flow_del;
+ 
    /* Enable link notifications */
    otx2_cgx_config_linkevents(pf, true);
@@@ -2484,6 -2513,8 +2518,8 @@@
return 0;
+ err_mcam_flow_del:
+ 	otx2_mcam_flow_del(pf);
  err_unreg_netdev:
    unregister_netdev(netdev);
  err_del_mcam_entries:
@@@ -2651,6 -2682,7 +2687,7 @@@ static void otx2_remove(struct pci_dev
otx2_ptp_destroy(pf);
    otx2_mcam_flow_del(pf);
+ 	otx2_shutdown_tc(pf);
    otx2_detach_resources(&pf->mbox);
    if (pf->hw.lmt_base)
    	iounmap(pf->hw.lmt_base);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h
index 304b296fe8b9,1f5bc4d91060..9ea3f3befe74
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@@ -92,15 -92,14 +92,15 @@@ struct page_pool
    			    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
  #define MLX5_MPWRQ_PAGES_PER_WQE		BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
-#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
 +#define MLX5_ALIGN_MTTS(mtts)		(ALIGN(mtts, 8))
 +#define MLX5_ALIGNED_MTTS_OCTW(mtts)	((mtts) / 2)
 +#define MLX5_MTT_OCTW(mtts)		(MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts)))
  /* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
   * WQEs, This page will absorb write overflow by the hardware, when
   * receiving packets larger than MTU. These oversize packets are
   * dropped by the driver at a later stage.
   */
 -#define MLX5E_REQUIRED_WQE_MTTS		(ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8))
 -#define MLX5E_LOG_ALIGNED_MPWQE_PPW	(ilog2(MLX5E_REQUIRED_WQE_MTTS))
 +#define MLX5E_REQUIRED_WQE_MTTS		(MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1))
  #define MLX5E_REQUIRED_MTTS(wqes)	(wqes * MLX5E_REQUIRED_WQE_MTTS)
  #define MLX5E_MAX_RQ_NUM_MTTS	\
    ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
@@@ -881,7 -880,6 +881,6 @@@ struct mlx5e_priv 
  #endif
    struct devlink_health_reporter *tx_reporter;
    struct devlink_health_reporter *rx_reporter;
- 	struct devlink_port            dl_port;
    struct mlx5e_xsk           xsk;
  #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE)
    struct mlx5e_hv_vhca_stats_agent stats_agent;
@@@ -1175,6 -1173,7 +1174,7 @@@ void mlx5e_detach_netdev(struct mlx5e_p
  void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
  int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
    			const struct mlx5e_profile *new_profile, void *new_ppriv);
+ void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv);
  void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
  void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu);
  void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index b2cd29847a37,5e3d31b888ce..df13e5094034
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@@ -695,7 -695,7 +695,7 @@@ mlx5_tc_ct_entry_add_rule(struct mlx5_t
zone_rule->nat = nat;
- 	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+ 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
    if (!spec)
    	return -ENOMEM;
@@@ -737,7 -737,7 +737,7 @@@
zone_rule->attr = attr;
- 	kfree(spec);
+ 	kvfree(spec);
    ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
return 0;
@@@ -749,7 -749,7 +749,7 @@@ err_rule
  err_mod_hdr:
    kfree(attr);
  err_attr:
- 	kfree(spec);
+ 	kvfree(spec);
    return err;
  }
@@@ -1181,8 -1181,7 +1181,8 @@@ int mlx5_tc_ct_add_no_trk_match(struct
mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
    				&ctstate, &ctstate_mask);
 -	if (ctstate_mask)
 +
 +	if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
    	return -EOPNOTSUPP;
ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
@@@ -1540,6 -1539,14 +1540,14 @@@ mlx5_tc_ct_free_pre_ct_tables(struct ml
    mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
  }
+ /* To avoid false lock dependency warning set the ct_entries_ht lock
+  * class different than the lock class of the ht being used when deleting
+  * last flow from a group and then deleting a group, we get into del_sw_flow_group()
+  * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
+  * it's different than the ht->mutex here.
+  */
+ static struct lock_class_key ct_entries_ht_lock_key;
+ 
  static struct mlx5_ct_ft *
  mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
    	     struct nf_flowtable *nf_ft)
@@@ -1574,6 -1581,8 +1582,8 @@@
    if (err)
    	goto err_init;
+ 	lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
+ 
    err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
    			     zone_params);
    if (err)
@@@ -1675,10 -1684,10 +1685,10 @@@ __mlx5_tc_ct_flow_offload(struct mlx5_t
    struct mlx5_ct_ft *ft;
    u32 fte_id = 1;
- 	post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
+ 	post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
    ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
    if (!post_ct_spec || !ct_flow) {
- 		kfree(post_ct_spec);
+ 		kvfree(post_ct_spec);
    	kfree(ct_flow);
    	return ERR_PTR(-ENOMEM);
    }
@@@ -1788,6 -1797,10 +1798,10 @@@
    ct_flow->post_ct_attr->prio = 0;
    ct_flow->post_ct_attr->ft = ct_priv->post_ct;
+ 	/* Splits were handled before CT */
+ 	if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
+ 		ct_flow->post_ct_attr->esw_attr->split_count = 0;
+ 
    ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
    ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
    ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
@@@ -1813,7 -1826,7 +1827,7 @@@
attr->ct_attr.ct_flow = ct_flow;
    dealloc_mod_hdr_actions(&pre_mod_acts);
- 	kfree(post_ct_spec);
+ 	kvfree(post_ct_spec);
return rule;
@@@ -1834,7 -1847,7 +1848,7 @@@ err_alloc_pre
  err_idr:
    mlx5_tc_ct_del_ft_cb(ct_priv, ft);
  err_ft:
- 	kfree(post_ct_spec);
+ 	kvfree(post_ct_spec);
    kfree(ct_flow);
    netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
    return ERR_PTR(err);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index 7f7b0f6dcdf9,32d06fe94acc..01d435e15ad3
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@@ -2,6 -2,7 +2,7 @@@
  /* Copyright (c) 2021 Mellanox Technologies. */
#include <net/fib_notifier.h>
+ #include <net/nexthop.h>
  #include "tc_tun_encap.h"
  #include "en_tc.h"
  #include "tc_tun.h"
@@@ -89,7 -90,6 +90,7 @@@ int mlx5e_tc_set_attr_rx_tun(struct mlx
     * required to establish routing.
     */
    flow_flag_set(flow, TUN_RX);
 +	flow->attr->tun_ip_version = ip_version;
    return 0;
  }
@@@ -1092,7 -1092,7 +1093,7 @@@ int mlx5e_attach_decap_route(struct mlx
    if (err || !esw_attr->rx_tun_attr->decap_vport)
    	goto out;
-	key.ip_version = attr->ip_version;
 +	key.ip_version = attr->tun_ip_version;
    if (key.ip_version == 4)
    	key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
    else
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 158f947a8503,9c08f0bd1fcc..d40fc2672530
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@@ -302,7 -302,7 +302,7 @@@ static int mlx5e_create_umr_mkey(struc
    MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
    mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
    MLX5_SET(mkc, mkc, qpn, 0xffffff);
- 	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+ 	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
    MLX5_SET64(mkc, mkc, len, npages << page_shift);
    MLX5_SET(mkc, mkc, translations_octword_size,
    	 MLX5_MTT_OCTW(npages));
@@@ -334,9 -334,9 +334,9 @@@ static int mlx5e_create_rq_umr_mkey(str
    			     rq->wqe_overflow.addr);
  }
-static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
 +static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix)
  {
 -	return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT;
 +	return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT;
  }
static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
@@@ -577,7 -577,7 +577,7 @@@ static int mlx5e_alloc_rq(struct mlx5e_
    			mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
    		u32 byte_count =
    			rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
 -			u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
 +			u64 dma_offset = mlx5e_get_mpwqe_offset(i);
wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
    		wqe->data[0].byte_count = cpu_to_be32(byte_count);
@@@ -1019,7 -1019,7 +1019,7 @@@ static int mlx5e_alloc_xdpsq(struct mlx
    sq->pdev      = c->pdev;
    sq->mkey_be   = c->mkey_be;
    sq->channel   = c;
- 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+ 	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
    sq->min_inline_mode = params->tx_min_inline_mode;
    sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
    sq->xsk_pool  = xsk_pool;
@@@ -1090,7 -1090,7 +1090,7 @@@ static int mlx5e_alloc_icosq(struct mlx
    int err;
sq->channel   = c;
- 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+ 	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
param->wq.db_numa_node = cpu_to_node(c->cpu);
    err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
@@@ -1174,7 -1174,7 +1174,7 @@@ static int mlx5e_alloc_txqsq(struct mlx
    sq->priv      = c->priv;
    sq->ch_ix     = c->ix;
    sq->txq_ix    = txq_ix;
- 	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+ 	sq->uar_map   = mdev->mlx5e_res.hw_objs.bfreg.map;
    sq->min_inline_mode = params->tx_min_inline_mode;
    sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
    INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
@@@ -1257,7 -1257,7 +1257,7 @@@ static int mlx5e_create_sq(struct mlx5_
    MLX5_SET(sqc,  sqc, flush_in_error_en, 1);
MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
- 	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.bfreg.index);
+ 	MLX5_SET(wq,   wq, uar_page,      mdev->mlx5e_res.hw_objs.bfreg.index);
    MLX5_SET(wq,   wq, log_wq_pg_sz,  csp->wq_ctrl->buf.page_shift -
    				  MLX5_ADAPTER_PAGE_SHIFT);
    MLX5_SET64(wq, wq, dbr_addr,      csp->wq_ctrl->db.dma);
@@@ -2032,7 -2032,7 +2032,7 @@@ static int mlx5e_open_channel(struct ml
    c->cpu      = cpu;
    c->pdev     = mlx5_core_dma_dev(priv->mdev);
    c->netdev   = priv->netdev;
- 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
+ 	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key);
    c->num_tc   = params->num_tc;
    c->xdp      = !!params->xdp_prog;
    c->stats    = &priv->channel_stats[ix].ch;
@@@ -2217,7 -2217,7 +2217,7 @@@ void mlx5e_build_rq_param(struct mlx5e_
    MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN);
    MLX5_SET(wq, wq, log_wq_stride,
    	 mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs));
- 	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.pdn);
+ 	MLX5_SET(wq, wq, pd,               mdev->mlx5e_res.hw_objs.pdn);
    MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter);
    MLX5_SET(rqc, rqc, vsd,            params->vlan_strip_disable);
    MLX5_SET(rqc, rqc, scatter_fcs,    params->scatter_fcs_en);
@@@ -2248,7 -2248,7 +2248,7 @@@ void mlx5e_build_sq_param_common(struc
    void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB));
- 	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.pdn);
+ 	MLX5_SET(wq, wq, pd,            priv->mdev->mlx5e_res.hw_objs.pdn);
param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev));
  }
@@@ -2368,9 -2368,8 +2368,9 @@@ static u8 mlx5e_build_icosq_log_wq_sz(s
  {
    switch (params->rq_wq_type) {
    case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 -		return order_base_2(MLX5E_UMR_WQEBBS) +
 -			mlx5e_get_rq_log_wq_sz(rqp->rqc);
 +		return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE,
 +			     order_base_2(MLX5E_UMR_WQEBBS) +
 +			     mlx5e_get_rq_log_wq_sz(rqp->rqc));
    default: /* MLX5_WQ_TYPE_CYCLIC */
    	return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
    }
@@@ -2503,10 -2502,8 +2503,10 @@@ void mlx5e_close_channels(struct mlx5e_
  {
    int i;
-	if (chs->port_ptp)
 +	if (chs->port_ptp) {
    	mlx5e_port_ptp_close(chs->port_ptp);
 +		chs->port_ptp = NULL;
 +	}
for (i = 0; i < chs->num; i++)
    	mlx5e_close_channel(chs->c[i]);
@@@ -3424,10 -3421,10 +3424,10 @@@ int mlx5e_create_tis(struct mlx5_core_d
  {
    void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
- 	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn);
+ 	MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
if (MLX5_GET(tisc, tisc, tls_en))
- 		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn);
+ 		MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
if (mlx5_lag_is_lacp_owner(mdev))
    	MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
@@@ -3497,7 -3494,7 +3497,7 @@@ static void mlx5e_cleanup_nic_tx(struc
  static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv,
    				     u32 rqtn, u32 *tirc)
  {
- 	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn);
+ 	MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn);
    MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
    MLX5_SET(tirc, tirc, indirect_table, rqtn);
    MLX5_SET(tirc, tirc, tunneled_offload_en,
@@@ -3772,8 -3769,16 +3772,16 @@@ static int mlx5e_setup_tc(struct net_de
    		  void *type_data)
  {
    struct mlx5e_priv *priv = netdev_priv(dev);
+ 	bool tc_unbind = false;
    int err;
+ 	if (type == TC_SETUP_BLOCK &&
+ 	    ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND)
+ 		tc_unbind = true;
+ 
+ 	if (!netif_device_present(dev) && !tc_unbind)
+ 		return -ENODEV;
+ 
    switch (type) {
    case TC_SETUP_BLOCK: {
    	struct flow_block_offload *f = type_data;
@@@ -3813,15 -3818,6 +3821,15 @@@ void mlx5e_fold_sw_stats64(struct mlx5e
    	for (j = 0; j < priv->max_opened_tc; j++) {
    		struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+			s->tx_packets    += sq_stats->packets;
 +			s->tx_bytes      += sq_stats->bytes;
 +			s->tx_dropped    += sq_stats->dropped;
 +		}
 +	}
 +	if (priv->port_ptp_opened) {
 +		for (i = 0; i < priv->max_opened_tc; i++) {
 +			struct mlx5e_sq_stats *sq_stats = &priv->port_ptp_stats.sq[i];
 +
    		s->tx_packets    += sq_stats->packets;
    		s->tx_bytes      += sq_stats->bytes;
    		s->tx_dropped    += sq_stats->dropped;
@@@ -3835,6 -3831,9 +3843,9 @@@ mlx5e_get_stats(struct net_device *dev
    struct mlx5e_priv *priv = netdev_priv(dev);
    struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ 	if (!netif_device_present(dev))
+ 		return;
+ 
    /* In switchdev mode, monitor counters doesn't monitor
     * rx/tx stats of 802_3. The update stats mechanism
     * should keep the 802_3 layout counters updated
@@@ -3846,17 -3845,10 +3857,17 @@@
    }
if (mlx5e_is_uplink_rep(priv)) {
 +		struct mlx5e_vport_stats *vstats = &priv->stats.vport;
 +
    	stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
    	stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
    	stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
    	stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
 +
 +		/* vport multicast also counts packets that are dropped due to steering
 +		 * or rx out of buffer
 +		 */
 +		stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
    } else {
    	mlx5e_fold_sw_stats64(priv, stats);
    }
@@@ -3876,11 -3868,19 +3887,19 @@@
    stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors;
  }
+ static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv)
+ {
+ 	if (mlx5e_is_uplink_rep(priv))
+ 		return; /* no rx mode for uplink rep */
+ 
+ 	queue_work(priv->wq, &priv->set_rx_mode_work);
+ }
+ 
  static void mlx5e_set_rx_mode(struct net_device *dev)
  {
    struct mlx5e_priv *priv = netdev_priv(dev);
- 	queue_work(priv->wq, &priv->set_rx_mode_work);
+ 	mlx5e_nic_set_rx_mode(priv);
  }
static int mlx5e_set_mac(struct net_device *netdev, void *addr)
@@@ -3895,7 -3895,7 +3914,7 @@@
    ether_addr_copy(netdev->dev_addr, saddr->sa_data);
    netif_addr_unlock_bh(netdev);
- 	queue_work(priv->wq, &priv->set_rx_mode_work);
+ 	mlx5e_nic_set_rx_mode(priv);
return 0;
  }
@@@ -4433,6 -4433,9 +4452,9 @@@ static int mlx5e_set_vf_link_state(stru
    struct mlx5e_priv *priv = netdev_priv(dev);
    struct mlx5_core_dev *mdev = priv->mdev;
+ 	if (mlx5e_is_uplink_rep(priv))
+ 		return -EOPNOTSUPP;
+ 
    return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1,
    				    mlx5_ifla_link2vport(link_state));
  }
@@@ -4444,6 -4447,9 +4466,9 @@@ int mlx5e_get_vf_config(struct net_devi
    struct mlx5_core_dev *mdev = priv->mdev;
    int err;
+ 	if (!netif_device_present(dev))
+ 		return -EOPNOTSUPP;
+ 
    err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi);
    if (err)
    	return err;
@@@ -4460,6 -4466,32 +4485,32 @@@ int mlx5e_get_vf_stats(struct net_devic
    return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1,
    				    vf_stats);
  }
+ 
+ static bool
+ mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+ {
+ 	struct mlx5e_priv *priv = netdev_priv(dev);
+ 
+ 	if (!netif_device_present(dev))
+ 		return false;
+ 
+ 	if (!mlx5e_is_uplink_rep(priv))
+ 		return false;
+ 
+ 	return mlx5e_rep_has_offload_stats(dev, attr_id);
+ }
+ 
+ static int
+ mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+ 			void *sp)
+ {
+ 	struct mlx5e_priv *priv = netdev_priv(dev);
+ 
+ 	if (!mlx5e_is_uplink_rep(priv))
+ 		return -EOPNOTSUPP;
+ 
+ 	return mlx5e_rep_get_offload_stats(attr_id, dev, sp);
+ }
  #endif
static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type)
@@@ -4702,10 -4734,8 +4753,10 @@@ static int mlx5e_xdp_set(struct net_dev
    	struct mlx5e_channel *c = priv->channels.c[i];
mlx5e_rq_replace_xdp_prog(&c->rq, prog);
 -		if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
 +		if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) {
 +			bpf_prog_inc(prog);
    		mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
 +		}
    }
unlock:
@@@ -4818,6 -4848,8 +4869,8 @@@ const struct net_device_ops mlx5e_netde
    .ndo_get_vf_config       = mlx5e_get_vf_config,
    .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
    .ndo_get_vf_stats        = mlx5e_get_vf_stats,
+ 	.ndo_has_offload_stats   = mlx5e_has_offload_stats,
+ 	.ndo_get_offload_stats   = mlx5e_get_offload_stats,
  #endif
    .ndo_get_devlink_port    = mlx5e_get_devlink_port,
  };
@@@ -4979,11 -5011,6 +5032,11 @@@ void mlx5e_build_nic_params(struct mlx5
    			     priv->max_nch);
    params->num_tc       = 1;
+	/* Set an initial non-zero value, so that mlx5e_select_queue won't
 +	 * divide by zero if called before first activating channels.
 +	 */
 +	priv->num_tc_x_num_ch = params->num_channels * params->num_tc;
 +
    /* SQ */
    params->log_sq_size = is_kdump_kernel() ?
    	MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
@@@ -5279,10 -5306,6 +5332,6 @@@ static int mlx5e_nic_init(struct mlx5_c
    if (err)
    	mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
- 	err = mlx5e_devlink_port_register(priv);
- 	if (err)
- 		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
- 
    mlx5e_health_create_reporters(priv);
return 0;
@@@ -5291,7 -5314,6 +5340,6 @@@
  static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
  {
    mlx5e_health_destroy_reporters(priv);
- 	mlx5e_devlink_port_unregister(priv);
    mlx5e_tls_cleanup(priv);
    mlx5e_ipsec_cleanup(priv);
  }
@@@ -5431,7 -5453,7 +5479,7 @@@ static void mlx5e_nic_enable(struct mlx
    	return;
    mlx5e_dcbnl_init_app(priv);
- 	queue_work(priv->wq, &priv->set_rx_mode_work);
+ 	mlx5e_nic_set_rx_mode(priv);
rtnl_lock();
    if (netif_running(netdev))
@@@ -5454,7 -5476,7 +5502,7 @@@ static void mlx5e_nic_disable(struct ml
    netif_device_detach(priv->netdev);
    rtnl_unlock();
- 	queue_work(priv->wq, &priv->set_rx_mode_work);
+ 	mlx5e_nic_set_rx_mode(priv);
mlx5e_hv_vhca_stats_destroy(priv);
    if (mlx5e_monitor_counter_supported(priv))
@@@ -5500,6 -5522,8 +5548,6 @@@ int mlx5e_priv_init(struct mlx5e_priv *
    	    struct net_device *netdev,
    	    struct mlx5_core_dev *mdev)
  {
 -	memset(priv, 0, sizeof(*priv));
 -
    /* priv init */
    priv->mdev        = mdev;
    priv->netdev      = netdev;
@@@ -5532,18 -5556,12 +5580,18 @@@ void mlx5e_priv_cleanup(struct mlx5e_pr
  {
    int i;
+	/* bail if change profile failed and also rollback failed */
 +	if (!priv->mdev)
 +		return;
 +
    destroy_workqueue(priv->wq);
    free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb.max_qos_sqs; i++)
    	kfree(priv->htb.qos_sq_stats[i]);
    kvfree(priv->htb.qos_sq_stats);
 +
 +	memset(priv, 0, sizeof(*priv));
  }
struct net_device *
@@@ -5660,10 -5678,11 +5708,10 @@@ void mlx5e_detach_netdev(struct mlx5e_p
  }
static int
 -mlx5e_netdev_attach_profile(struct mlx5e_priv *priv,
 +mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
    		    const struct mlx5e_profile *new_profile, void *new_ppriv)
  {
 -	struct net_device *netdev = priv->netdev;
 -	struct mlx5_core_dev *mdev = priv->mdev;
 +	struct mlx5e_priv *priv = netdev_priv(netdev);
    int err;
err = mlx5e_priv_init(priv, netdev, mdev);
@@@ -5676,16 -5695,10 +5724,16 @@@
    priv->ppriv = new_ppriv;
    err = new_profile->init(priv->mdev, priv->netdev);
    if (err)
 -		return err;
 +		goto priv_cleanup;
    err = mlx5e_attach_netdev(priv);
    if (err)
 -		new_profile->cleanup(priv);
 +		goto profile_cleanup;
 +	return err;
 +
 +profile_cleanup:
 +	new_profile->cleanup(priv);
 +priv_cleanup:
 +	mlx5e_priv_cleanup(priv);
    return err;
  }
@@@ -5694,14 -5707,13 +5742,14 @@@ int mlx5e_netdev_change_profile(struct 
  {
    unsigned int new_max_nch = mlx5e_calc_max_nch(priv, new_profile);
    const struct mlx5e_profile *orig_profile = priv->profile;
 +	struct net_device *netdev = priv->netdev;
 +	struct mlx5_core_dev *mdev = priv->mdev;
    void *orig_ppriv = priv->ppriv;
    int err, rollback_err;
/* sanity */
    if (new_max_nch != priv->max_nch) {
 -		netdev_warn(priv->netdev,
 -			    "%s: Replacing profile with different max channels\n",
 +		netdev_warn(netdev, "%s: Replacing profile with different max channels\n",
    		    __func__);
    	return -EINVAL;
    }
@@@ -5711,22 -5723,30 +5759,27 @@@
    priv->profile->cleanup(priv);
    mlx5e_priv_cleanup(priv);
-	err = mlx5e_netdev_attach_profile(priv, new_profile, new_ppriv);
 +	err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
    if (err) { /* roll back to original profile */
 -		netdev_warn(priv->netdev, "%s: new profile init failed, %d\n",
 -			    __func__, err);
 +		netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
    	goto rollback;
    }
return 0;
rollback:
 -	rollback_err = mlx5e_netdev_attach_profile(priv, orig_profile, orig_ppriv);
 -	if (rollback_err) {
 -		netdev_err(priv->netdev,
 -			   "%s: failed to rollback to orig profile, %d\n",
 +	rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv);
 +	if (rollback_err)
 +		netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n",
    		   __func__, rollback_err);
 -	}
    return err;
  }
+ void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv)
+ {
+ 	mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL);
+ }
+ 
  void mlx5e_destroy_netdev(struct mlx5e_priv *priv)
  {
    struct net_device *netdev = priv->netdev;
@@@ -5809,10 -5829,17 +5862,17 @@@ static int mlx5e_probe(struct auxiliary
priv->profile = profile;
    priv->ppriv = NULL;
+ 
+ 	err = mlx5e_devlink_port_register(priv);
+ 	if (err) {
+ 		mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err);
+ 		goto err_destroy_netdev;
+ 	}
+ 
    err = profile->init(mdev, netdev);
    if (err) {
    	mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err);
- 		goto err_destroy_netdev;
+ 		goto err_devlink_cleanup;
    }
err = mlx5e_resume(adev);
@@@ -5830,12 -5857,15 +5890,15 @@@
    mlx5e_devlink_port_type_eth_set(priv);
mlx5e_dcbnl_init_app(priv);
+ 	mlx5_uplink_netdev_set(mdev, netdev);
    return 0;
err_resume:
    mlx5e_suspend(adev, state);
  err_profile_cleanup:
    profile->cleanup(priv);
+ err_devlink_cleanup:
+ 	mlx5e_devlink_port_unregister(priv);
  err_destroy_netdev:
    mlx5e_destroy_netdev(priv);
    return err;
@@@ -5850,6 -5880,7 +5913,7 @@@ static void mlx5e_remove(struct auxilia
    unregister_netdev(priv->netdev);
    mlx5e_suspend(adev, state);
    priv->profile->cleanup(priv);
+ 	mlx5e_devlink_port_unregister(priv);
    mlx5e_destroy_netdev(priv);
  }
@@@ -5875,18 -5906,18 +5939,18 @@@ int mlx5e_init(void
mlx5e_ipsec_build_inverse_table();
    mlx5e_build_ptys2ethtool_map();
- 	ret = mlx5e_rep_init();
+ 	ret = auxiliary_driver_register(&mlx5e_driver);
    if (ret)
    	return ret;
- 	ret = auxiliary_driver_register(&mlx5e_driver);
+ 	ret = mlx5e_rep_init();
    if (ret)
- 		mlx5e_rep_cleanup();
+ 		auxiliary_driver_unregister(&mlx5e_driver);
    return ret;
  }
void mlx5e_cleanup(void)
  {
- 	auxiliary_driver_unregister(&mlx5e_driver);
    mlx5e_rep_cleanup();
+ 	auxiliary_driver_unregister(&mlx5e_driver);
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 249d8905e644,b0604b113530..f90894eea9e0
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@@ -52,6 -52,7 +52,7 @@@
  #include "en/health.h"
  #include "en/params.h"
  #include "devlink.h"
+ #include "en/devlink.h"
static struct sk_buff *
  mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
@@@ -500,6 -501,7 +501,6 @@@ static int mlx5e_alloc_rx_mpwqe(struct 
    struct mlx5e_icosq *sq = rq->icosq;
    struct mlx5_wq_cyc *wq = &sq->wq;
    struct mlx5e_umr_wqe *umr_wqe;
 -	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
    u16 pi;
    int err;
    int i;
@@@ -530,8 -532,7 +531,8 @@@
    umr_wqe->ctrl.opmod_idx_opcode =
    	cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
    		    MLX5_OPCODE_UMR);
 -	umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
 +	umr_wqe->uctrl.xlt_offset =
 +		cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix)));
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
    	.wqe_type   = MLX5E_ICOSQ_WQE_UMR_RX,
@@@ -669,6 -670,7 +670,7 @@@ int mlx5e_poll_ico_cq(struct mlx5e_cq *
    					 get_cqe_opcode(cqe));
    			mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
    					     (struct mlx5_err_cqe *)cqe);
+ 				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
    			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
    				queue_work(cq->priv->wq, &sq->recover_work);
    			break;
@@@ -1822,6 -1824,7 +1824,7 @@@ static void mlx5e_trap_handle_rx_cqe(st
    struct mlx5e_priv *priv = netdev_priv(rq->netdev);
    struct mlx5_wq_cyc *wq = &rq->wqe.wq;
    struct mlx5e_wqe_frag_info *wi;
+ 	struct devlink_port *dl_port;
    struct sk_buff *skb;
    u32 cqe_bcnt;
    u16 trap_id;
@@@ -1844,7 -1847,8 +1847,8 @@@
    mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
    skb_push(skb, ETH_HLEN);
- 	mlx5_devlink_trap_report(rq->mdev, trap_id, skb, &priv->dl_port);
+ 	dl_port = mlx5e_devlink_get_dl_port(priv);
+ 	mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port);
    dev_kfree_skb_any(skb);
free_wqe:
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index df2a0af854bb,730f33ada90a..3a82e2c64a3e
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@@ -445,12 -445,16 +445,16 @@@ static void mlx5e_hairpin_destroy_trans
    mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
  }
- static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
+ static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc)
  {
- 	u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn;
+ 	u32 *indirection_rqt, rqn;
    struct mlx5e_priv *priv = hp->func_priv;
    int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
+ 	indirection_rqt = kzalloc(sz, GFP_KERNEL);
+ 	if (!indirection_rqt)
+ 		return -ENOMEM;
+ 
    mlx5e_build_default_indir_rqt(indirection_rqt, sz,
    			      hp->num_channels);
@@@ -462,6 -466,9 +466,9 @@@
    	rqn = hp->pair->rqn[ix];
    	MLX5_SET(rqtc, rqtc, rq_num[i], rqn);
    }
+ 
+ 	kfree(indirection_rqt);
+ 	return 0;
  }
static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
@@@ -482,12 -489,15 +489,15 @@@
    MLX5_SET(rqtc, rqtc, rqt_actual_size, sz);
    MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
- 	mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+ 	err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc);
+ 	if (err)
+ 		goto out;
err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn);
    if (!err)
    	hp->indir_rqt.enabled = true;
+ out:
    kvfree(in);
    return err;
  }
@@@ -1077,19 -1087,23 +1087,23 @@@ mlx5e_tc_offload_fdb_rules(struct mlx5_
    if (flow_flag_test(flow, CT)) {
    	mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
- 		return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
+ 		rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv),
    				       flow, spec, attr,
    				       mod_hdr_acts);
+ 	} else {
+ 		rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
    }
- 	rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
    if (IS_ERR(rule))
    	return rule;
if (attr->esw_attr->split_count) {
    	flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
    	if (IS_ERR(flow->rule[1])) {
- 			mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
+ 			if (flow_flag_test(flow, CT))
+ 				mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr);
+ 			else
+ 				mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
    		return flow->rule[1];
    	}
    }
@@@ -1947,6 -1961,10 +1961,10 @@@ static int __parse_cls_flower(struct ml
    			    misc_parameters);
    void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
    			    misc_parameters);
+ 	void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+ 				    misc_parameters_3);
+ 	void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+ 				    misc_parameters_3);
    struct flow_rule *rule = flow_cls_offload_flow_rule(f);
    struct flow_dissector *dissector = rule->match.dissector;
    u16 addr_type = 0;
@@@ -1976,6 -1994,7 +1994,7 @@@
          BIT(FLOW_DISSECTOR_KEY_CT) |
          BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
          BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+ 	      BIT(FLOW_DISSECTOR_KEY_ICMP) |
          BIT(FLOW_DISSECTOR_KEY_MPLS))) {
    	NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
    	netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
@@@ -2295,17 -2314,49 +2314,60 @@@
    	if (match.mask->flags)
    		*match_level = MLX5_MATCH_L4;
    }
 +
 +	/* Currenlty supported only for MPLS over UDP */
 +	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
 +	    !netif_is_bareudp(filter_dev)) {
 +		NL_SET_ERR_MSG_MOD(extack,
 +				   "Matching on MPLS is supported only for MPLS over UDP");
 +		netdev_err(priv->netdev,
 +			   "Matching on MPLS is supported only for MPLS over UDP\n");
 +		return -EOPNOTSUPP;
 +	}
 +
+ 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
+ 		struct flow_match_icmp match;
+ 
+ 		flow_rule_match_icmp(rule, &match);
+ 		switch (ip_proto) {
+ 		case IPPROTO_ICMP:
+ 			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+ 			      MLX5_FLEX_PROTO_ICMP))
+ 				return -EOPNOTSUPP;
+ 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
+ 				 match.mask->type);
+ 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
+ 				 match.key->type);
+ 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
+ 				 match.mask->code);
+ 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
+ 				 match.key->code);
+ 			break;
+ 		case IPPROTO_ICMPV6:
+ 			if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+ 			      MLX5_FLEX_PROTO_ICMPV6))
+ 				return -EOPNOTSUPP;
+ 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
+ 				 match.mask->type);
+ 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
+ 				 match.key->type);
+ 			MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
+ 				 match.mask->code);
+ 			MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
+ 				 match.key->code);
+ 			break;
+ 		default:
+ 			NL_SET_ERR_MSG_MOD(extack,
+ 					   "Code and type matching only with ICMP and ICMPv6");
+ 			netdev_err(priv->netdev,
+ 				   "Code and type matching only with ICMP and ICMPv6\n");
+ 			return -EINVAL;
+ 		}
+ 		if (match.mask->code || match.mask->type) {
+ 			*match_level = MLX5_MATCH_L4;
+ 			spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
+ 		}
+ 	}
    return 0;
  }
@@@ -2909,37 -2960,6 +2971,37 @@@ static int is_action_keys_supported(con
    return 0;
  }
+static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
 +				   bool ct_flow, struct netlink_ext_ack *extack,
 +				   struct mlx5e_priv *priv,
 +				   struct mlx5_flow_spec *spec)
 +{
 +	if (!modify_tuple || ct_clear)
 +		return true;
 +
 +	if (ct_flow) {
 +		NL_SET_ERR_MSG_MOD(extack,
 +				   "can't offload tuple modification with non-clear ct()");
 +		netdev_info(priv->netdev,
 +			    "can't offload tuple modification with non-clear ct()");
 +		return false;
 +	}
 +
 +	/* Add ct_state=-trk match so it will be offloaded for non ct flows
 +	 * (or after clear action), as otherwise, since the tuple is changed,
 +	 * we can't restore ct state
 +	 */
 +	if (mlx5_tc_ct_add_no_trk_match(spec)) {
 +		NL_SET_ERR_MSG_MOD(extack,
 +				   "can't offload tuple modification with ct matches and no ct(clear) action");
 +		netdev_info(priv->netdev,
 +			    "can't offload tuple modification with ct matches and no ct(clear) action");
 +		return false;
 +	}
 +
 +	return true;
 +}
 +
  static bool modify_header_match_supported(struct mlx5e_priv *priv,
    				  struct mlx5_flow_spec *spec,
    				  struct flow_action *flow_action,
@@@ -2978,9 -2998,18 +3040,9 @@@
    		return err;
    }
-	/* Add ct_state=-trk match so it will be offloaded for non ct flows
 -	 * (or after clear action), as otherwise, since the tuple is changed,
 -	 *  we can't restore ct state
 -	 */
 -	if (!ct_clear && modify_tuple &&
 -	    mlx5_tc_ct_add_no_trk_match(spec)) {
 -		NL_SET_ERR_MSG_MOD(extack,
 -				   "can't offload tuple modify header with ct matches");
 -		netdev_info(priv->netdev,
 -			    "can't offload tuple modify header with ct matches");
 +	if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
 +				    priv, spec))
    	return false;
 -	}
ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
    if (modify_ip_header && ip_proto != IPPROTO_TCP &&
@@@ -3011,7 -3040,8 +3073,8 @@@ static bool actions_match_supported(str
    actions = flow->attr->action;
if (mlx5e_is_eswitch_flow(flow)) {
- 		if (flow->attr->esw_attr->split_count && ct_flow) {
+ 		if (flow->attr->esw_attr->split_count && ct_flow &&
+ 		    !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) {
    		/* All registers used by ct are cleared when using
    		 * split rules.
    		 */
@@@ -3811,6 -3841,7 +3874,7 @@@ static int parse_tc_fdb_actions(struct 
    			return err;
flow_flag_set(flow, CT);
+ 			esw_attr->split_count = esw_attr->out_count;
    		break;
    	default:
    		NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported");
@@@ -3873,11 -3904,6 +3937,6 @@@
    		return -EOPNOTSUPP;
    	}
- 		if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
- 			NL_SET_ERR_MSG_MOD(extack,
- 					   "Mirroring goto chain rules isn't supported");
- 			return -EOPNOTSUPP;
- 		}
    	attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
    }
@@@ -4297,6 -4323,11 +4356,11 @@@ int mlx5e_configure_flower(struct net_d
    struct mlx5e_tc_flow *flow;
    int err = 0;
+ 	if (!mlx5_esw_hold(priv->mdev))
+ 		return -EAGAIN;
+ 
+ 	mlx5_esw_get(priv->mdev);
+ 
    rcu_read_lock();
    flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
    if (flow) {
@@@ -4334,11 -4365,14 +4398,14 @@@ rcu_unlock
    if (err)
    	goto err_free;
+ 	mlx5_esw_release(priv->mdev);
    return 0;
err_free:
    mlx5e_flow_put(priv, flow);
  out:
+ 	mlx5_esw_put(priv->mdev);
+ 	mlx5_esw_release(priv->mdev);
    return err;
  }
@@@ -4378,6 -4412,7 +4445,7 @@@ int mlx5e_delete_flower(struct net_devi
    trace_mlx5e_delete_flower(f);
    mlx5e_flow_put(priv, flow);
+ 	mlx5_esw_put(priv->mdev);
    return 0;
errout:
@@@ -4477,8 -4512,7 +4545,8 @@@ static int apply_police_params(struct m
     */
    if (rate) {
    	rate = (rate * BITS_PER_BYTE) + 500000;
 -		rate_mbps = max_t(u64, do_div(rate, 1000000), 1);
 +		do_div(rate, 1000000);
 +		rate_mbps = max_t(u32, rate, 1);
    }
err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
@@@ -4513,6 -4547,10 +4581,10 @@@ static int scan_tc_matchall_fdb_actions
    flow_action_for_each(i, act, flow_action) {
    	switch (act->id) {
    	case FLOW_ACTION_POLICE:
+ 			if (act->police.rate_pkt_ps) {
+ 				NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second");
+ 				return -EOPNOTSUPP;
+ 			}
    		err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
    		if (err)
    			return err;
@@@ -4679,10 -4717,6 +4751,6 @@@ int mlx5e_tc_nic_init(struct mlx5e_pri
tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
    			 MLX5_FLOW_NAMESPACE_KERNEL);
- 	if (IS_ERR(tc->ct)) {
- 		err = PTR_ERR(tc->ct);
- 		goto err_ct;
- 	}
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
    err = register_netdevice_notifier_dev_net(priv->netdev,
@@@ -4698,7 -4732,6 +4766,6 @@@
err_reg:
    mlx5_tc_ct_clean(tc->ct);
- err_ct:
    mlx5_chains_destroy(tc->chains);
  err_chains:
    rhashtable_destroy(&tc->ht);
@@@ -4757,8 -4790,6 +4824,6 @@@ int mlx5e_tc_esw_init(struct rhashtabl
    				       esw_chains(esw),
    				       &esw->offloads.mod_hdr,
    				       MLX5_FLOW_NAMESPACE_FDB);
- 	if (IS_ERR(uplink_priv->ct_priv))
- 		goto err_ct;
mapping = mapping_create(sizeof(struct tunnel_match_key),
    			 TUNNEL_INFO_BITS_MASK, true);
@@@ -4798,7 -4829,6 +4863,6 @@@ err_enc_opts_mapping
    mapping_destroy(uplink_priv->tunnel_mapping);
  err_tun_mapping:
    mlx5_tc_ct_clean(uplink_priv->ct_priv);
- err_ct:
    netdev_warn(priv->netdev,
    	    "Failed to initialize tc (eswitch), err: %d", err);
    return err;
@@@ -4871,9 -4901,17 +4935,17 @@@ static int mlx5e_setup_tc_cls_flower(st
  int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
    		    void *cb_priv)
  {
- 	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
+ 	unsigned long flags = MLX5_TC_FLAG(INGRESS);
    struct mlx5e_priv *priv = cb_priv;
+ 	if (!priv->netdev || !netif_device_present(priv->netdev))
+ 		return -EOPNOTSUPP;
+ 
+ 	if (mlx5e_is_uplink_rep(priv))
+ 		flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
+ 	else
+ 		flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
+ 
    switch (type) {
    case TC_SETUP_CLSFLOWER:
    	return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 8694b83968b4,ab2694835246..d5de6bf622ce
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@@ -40,7 -40,6 +40,6 @@@
  #include "eswitch.h"
  #include "esw/indir_table.h"
  #include "esw/acl/ofld.h"
- #include "esw/indir_table.h"
  #include "rdma.h"
  #include "en.h"
  #include "fs_core.h"
@@@ -551,8 -550,7 +550,8 @@@ esw_setup_dests(struct mlx5_flow_destin
if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
        MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) &&
 -	    mlx5_eswitch_vport_match_metadata_enabled(esw))
 +	    mlx5_eswitch_vport_match_metadata_enabled(esw) &&
 +	    MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level))
    	attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
if (attr->dest_ft) {
@@@ -1447,7 -1445,7 +1446,7 @@@ esw_add_restore_rule(struct mlx5_eswitc
    if (!mlx5_eswitch_reg_c1_loopback_supported(esw))
    	return ERR_PTR(-EOPNOTSUPP);
- 	spec = kzalloc(sizeof(*spec), GFP_KERNEL);
+ 	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
    if (!spec)
    	return ERR_PTR(-ENOMEM);
@@@ -1470,7 -1468,7 +1469,7 @@@
    dest.ft = esw->offloads.ft_offloads;
flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
- 	kfree(spec);
+ 	kvfree(spec);
if (IS_ERR(flow_rule))
    	esw_warn(esw->dev,
@@@ -1855,6 -1853,7 +1854,7 @@@ static void esw_destroy_offloads_fdb_ta
    /* Holds true only as long as DMFS is the default */
    mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns,
    			     MLX5_FLOW_STEERING_MODE_DMFS);
+ 	atomic64_set(&esw->user_count, 0);
  }
static int esw_create_offloads_table(struct mlx5_eswitch *esw)
@@@ -2260,9 -2259,11 +2260,11 @@@ int esw_offloads_load_rep(struct mlx5_e
    if (esw->mode != MLX5_ESWITCH_OFFLOADS)
    	return 0;
- 	err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
- 	if (err)
- 		return err;
+ 	if (vport_num != MLX5_VPORT_UPLINK) {
+ 		err = mlx5_esw_offloads_devlink_port_register(esw, vport_num);
+ 		if (err)
+ 			return err;
+ 	}
err = mlx5_esw_offloads_rep_load(esw, vport_num);
    if (err)
@@@ -2270,7 -2271,8 +2272,8 @@@
    return err;
load_err:
- 	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+ 	if (vport_num != MLX5_VPORT_UPLINK)
+ 		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
    return err;
  }
@@@ -2280,7 -2282,9 +2283,9 @@@ void esw_offloads_unload_rep(struct mlx
    	return;
mlx5_esw_offloads_rep_unload(esw, vport_num);
- 	mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
+ 
+ 	if (vport_num != MLX5_VPORT_UPLINK)
+ 		mlx5_esw_offloads_devlink_port_unregister(esw, vport_num);
  }
#define ESW_OFFLOADS_DEVCOM_PAIR	(0)
@@@ -2555,6 -2559,9 +2560,9 @@@ static int esw_create_uplink_offloads_a
    struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ 	if (IS_ERR(vport))
+ 		return PTR_ERR(vport);
+ 
    return esw_vport_create_offloads_acl_tables(esw, vport);
  }
@@@ -2563,6 -2570,9 +2571,9 @@@ static void esw_destroy_uplink_offloads
    struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK);
+ 	if (IS_ERR(vport))
+ 		return;
+ 
    esw_vport_destroy_offloads_acl_tables(esw, vport);
  }
@@@ -2574,6 -2584,7 +2585,7 @@@ static int esw_offloads_steering_init(s
    memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
    mutex_init(&esw->fdb_table.offloads.vports.lock);
    hash_init(esw->fdb_table.offloads.vports.table);
+ 	atomic64_set(&esw->user_count, 0);
indir = mlx5_esw_indir_table_init();
    if (IS_ERR(indir)) {
@@@ -2915,8 -2926,14 +2927,14 @@@ int mlx5_devlink_eswitch_mode_set(struc
    if (esw_mode_from_devlink(mode, &mlx5_mode))
    	return -EINVAL;
- 	mutex_lock(&esw->mode_lock);
- 	cur_mlx5_mode = esw->mode;
+ 	err = mlx5_esw_try_lock(esw);
+ 	if (err < 0) {
+ 		NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy");
+ 		return err;
+ 	}
+ 	cur_mlx5_mode = err;
+ 	err = 0;
+ 
    if (cur_mlx5_mode == mlx5_mode)
    	goto unlock;
@@@ -2928,7 -2945,7 +2946,7 @@@
    	err = -EINVAL;
unlock:
- 	mutex_unlock(&esw->mode_lock);
+ 	mlx5_esw_unlock(esw);
    return err;
  }
@@@ -2941,14 -2958,14 +2959,14 @@@ int mlx5_devlink_eswitch_mode_get(struc
    if (IS_ERR(esw))
    	return PTR_ERR(esw);
- 	mutex_lock(&esw->mode_lock);
+ 	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto unlock;
err = esw_mode_to_devlink(esw->mode, mode);
  unlock:
- 	mutex_unlock(&esw->mode_lock);
+ 	up_write(&esw->mode_lock);
    return err;
  }
@@@ -2964,7 -2981,7 +2982,7 @@@ int mlx5_devlink_eswitch_inline_mode_se
    if (IS_ERR(esw))
    	return PTR_ERR(esw);
- 	mutex_lock(&esw->mode_lock);
+ 	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto out;
@@@ -3003,7 -3020,7 +3021,7 @@@
    }
esw->offloads.inline_mode = mlx5_mode;
- 	mutex_unlock(&esw->mode_lock);
+ 	up_write(&esw->mode_lock);
    return 0;
revert_inline_mode:
@@@ -3013,7 -3030,7 +3031,7 @@@
    					 vport,
    					 esw->offloads.inline_mode);
  out:
- 	mutex_unlock(&esw->mode_lock);
+ 	up_write(&esw->mode_lock);
    return err;
  }
@@@ -3026,14 -3043,14 +3044,14 @@@ int mlx5_devlink_eswitch_inline_mode_ge
    if (IS_ERR(esw))
    	return PTR_ERR(esw);
- 	mutex_lock(&esw->mode_lock);
+ 	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto unlock;
err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
  unlock:
- 	mutex_unlock(&esw->mode_lock);
+ 	up_write(&esw->mode_lock);
    return err;
  }
@@@ -3049,7 -3066,7 +3067,7 @@@ int mlx5_devlink_eswitch_encap_mode_set
    if (IS_ERR(esw))
    	return PTR_ERR(esw);
- 	mutex_lock(&esw->mode_lock);
+ 	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto unlock;
@@@ -3095,7 -3112,7 +3113,7 @@@
    }
unlock:
- 	mutex_unlock(&esw->mode_lock);
+ 	up_write(&esw->mode_lock);
    return err;
  }
@@@ -3110,14 -3127,14 +3128,14 @@@ int mlx5_devlink_eswitch_encap_mode_get
    	return PTR_ERR(esw);
- 	mutex_lock(&esw->mode_lock);
+ 	down_write(&esw->mode_lock);
    err = eswitch_devlink_esw_mode_check(esw);
    if (err)
    	goto unlock;
*encap = esw->offloads.encap;
  unlock:
- 	mutex_unlock(&esw->mode_lock);
+ 	up_write(&esw->mode_lock);
    return 0;
  }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 6f7cef47e04c,0fc055cdf221..48303286c133
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@@ -233,7 -233,6 +233,7 @@@ int mlx5i_create_underlay_qp(struct mlx
    }
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 +	MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(priv->mdev));
    MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
    MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
    MLX5_SET(qpc, qpc, ulp_stateless_offload_mode,
@@@ -695,7 -694,6 +695,7 @@@ static int mlx5i_check_required_hca_cap
  static void mlx5_rdma_netdev_free(struct net_device *netdev)
  {
    struct mlx5e_priv *priv = mlx5i_epriv(netdev);
 +	struct mlx5_core_dev *mdev = priv->mdev;
    struct mlx5i_priv *ipriv = priv->ppriv;
    const struct mlx5e_profile *profile = priv->profile;
@@@ -704,13 -702,13 +704,13 @@@
if (!ipriv->sub_interface) {
    	mlx5i_pkey_qpn_ht_cleanup(netdev);
 -		mlx5e_destroy_mdev_resources(priv->mdev);
 +		mlx5e_destroy_mdev_resources(mdev);
    }
  }
static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev)
  {
- 	return mdev->mlx5e_res.pdn != 0;
+ 	return mdev->mlx5e_res.hw_objs.pdn != 0;
  }
static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev)
diff --combined drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
index a5a0f60bef66,3c8a00dd573a..699d615e4e2a
--- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
@@@ -5,8 -5,7 +5,7 @@@
  #include "priv.h"
  #include "sf.h"
  #include "mlx5_ifc_vhca_event.h"
- #include "vhca_event.h"
 -#include "ecpf.h"
 +#include "mlx5_core.h"
struct mlx5_sf_hw {
    u32 usr_sfnum;
@@@ -18,6 -17,7 +17,6 @@@ struct mlx5_sf_hw_table 
    struct mlx5_core_dev *dev;
    struct mlx5_sf_hw *sfs;
    int max_local_functions;
 -	u8 ecpu: 1;
    struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */
    struct notifier_block vhca_nb;
  };
@@@ -63,7 -63,7 +62,7 @@@ int mlx5_sf_hw_table_sf_alloc(struct ml
    }
    if (sw_id == -ENOSPC) {
    	err = -ENOSPC;
 -		goto err;
 +		goto exist_err;
    }
hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sw_id);
@@@ -71,7 -71,7 +70,7 @@@
    if (err)
    	goto err;
-	err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, table->ecpu, usr_sfnum);
 +	err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, usr_sfnum);
    if (err)
    	goto vhca_err;
@@@ -117,7 -117,7 +116,7 @@@ void mlx5_sf_hw_table_sf_deferred_free(
hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
    mutex_lock(&table->table_lock);
 -	err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, table->ecpu, out, sizeof(out));
 +	err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out));
    if (err)
    	goto err;
    state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
@@@ -163,6 -163,7 +162,6 @@@ int mlx5_sf_hw_table_init(struct mlx5_c
    table->dev = dev;
    table->sfs = sfs;
    table->max_local_functions = max_functions;
 -	table->ecpu = mlx5_read_embedded_cpu(dev);
    dev->priv.sf_hw_table = table;
    mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions);
    return 0;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index 9143ec326ebf,815951617e7c..616ebc38381a
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@@ -264,8 -264,8 +264,8 @@@ static void dr_ste_v1_set_miss_addr(u8 
  static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p)
  {
    u64 index =
 -		(MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) |
 -		 MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32) << 26);
 +		((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) |
 +		 ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32)) << 26);
return index << 6;
  }
@@@ -437,21 -437,6 +437,6 @@@ static void dr_ste_v1_set_rx_decap(u8 *
    dr_ste_v1_set_reparse(hw_ste_p);
  }
- static void dr_ste_v1_set_rx_decap_l3(u8 *hw_ste_p,
- 				      u8 *s_action,
- 				      u16 decap_actions,
- 				      u32 decap_index)
- {
- 	MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id,
- 		 DR_STE_V1_ACTION_ID_MODIFY_LIST);
- 	MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions,
- 		 decap_actions);
- 	MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr,
- 		 decap_index);
- 
- 	dr_ste_v1_set_reparse(hw_ste_p);
- }
- 
  static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p,
    				  u8 *s_action,
    				  u16 num_of_actions,
@@@ -571,9 -556,6 +556,6 @@@ static void dr_ste_v1_set_actions_rx(st
    bool allow_ctr = true;
if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) {
- 		dr_ste_v1_set_rx_decap_l3(last_ste, action,
- 					  attr->decap_actions,
- 					  attr->decap_index);
    	dr_ste_v1_set_rewrite_actions(last_ste, action,
    				      attr->decap_actions,
    				      attr->decap_index);
@@@ -1532,6 -1514,7 +1514,7 @@@ static void dr_ste_v1_build_src_gvmi_qp
DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port);
    DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn);
+ 	misc_mask->source_eswitch_owner_vhca_id = 0;
  }
static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
diff --combined drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 4087311f7082,03e00a6c413a..4fa1ae7db13e
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@@ -10,12 -10,6 +10,6 @@@
  #include "ionic_lif.h"
  #include "ionic_txrx.h"
- static void ionic_rx_clean(struct ionic_queue *q,
- 			   struct ionic_desc_info *desc_info,
- 			   struct ionic_cq_info *cq_info,
- 			   void *cb_arg);
- 
- static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
@@@ -40,72 -34,149 +34,149 @@@ static inline struct netdev_queue *q_to
    return netdev_get_tx_queue(q->lif->netdev, q->index);
  }
- static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
- 					  unsigned int len, bool frags)
+ static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
+ {
+ 	buf_info->page = NULL;
+ 	buf_info->page_offset = 0;
+ 	buf_info->dma_addr = 0;
+ }
+ 
+ static int ionic_rx_page_alloc(struct ionic_queue *q,
+ 			       struct ionic_buf_info *buf_info)
  {
- 	struct ionic_lif *lif = q->lif;
+ 	struct net_device *netdev = q->lif->netdev;
    struct ionic_rx_stats *stats;
- 	struct net_device *netdev;
- 	struct sk_buff *skb;
+ 	struct device *dev;
- 	netdev = lif->netdev;
- 	stats = &q->lif->rxqstats[q->index];
+ 	dev = q->dev;
+ 	stats = q_to_rx_stats(q);
- 	if (frags)
- 		skb = napi_get_frags(&q_to_qcq(q)->napi);
- 	else
- 		skb = netdev_alloc_skb_ip_align(netdev, len);
+ 	if (unlikely(!buf_info)) {
+ 		net_err_ratelimited("%s: %s invalid buf_info in alloc\n",
+ 				    netdev->name, q->name);
+ 		return -EINVAL;
+ 	}
- 	if (unlikely(!skb)) {
- 		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
- 				     netdev->name, q->name);
+ 	buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
+ 	if (unlikely(!buf_info->page)) {
+ 		net_err_ratelimited("%s: %s page alloc failed\n",
+ 				    netdev->name, q->name);
    	stats->alloc_err++;
- 		return NULL;
+ 		return -ENOMEM;
    }
+ 	buf_info->page_offset = 0;
- 	return skb;
+ 	buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
+ 					  IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+ 	if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
+ 		__free_pages(buf_info->page, 0);
+ 		ionic_rx_buf_reset(buf_info);
+ 		net_err_ratelimited("%s: %s dma map failed\n",
+ 				    netdev->name, q->name);
+ 		stats->dma_map_err++;
+ 		return -EIO;
+ 	}
+ 
+ 	return 0;
+ }
+ 
+ static void ionic_rx_page_free(struct ionic_queue *q,
+ 			       struct ionic_buf_info *buf_info)
+ {
+ 	struct net_device *netdev = q->lif->netdev;
+ 	struct device *dev = q->dev;
+ 
+ 	if (unlikely(!buf_info)) {
+ 		net_err_ratelimited("%s: %s invalid buf_info in free\n",
+ 				    netdev->name, q->name);
+ 		return;
+ 	}
+ 
+ 	if (!buf_info->page)
+ 		return;
+ 
+ 	dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+ 	__free_pages(buf_info->page, 0);
+ 	ionic_rx_buf_reset(buf_info);
+ }
+ 
+ static bool ionic_rx_buf_recycle(struct ionic_queue *q,
+ 				 struct ionic_buf_info *buf_info, u32 used)
+ {
+ 	u32 size;
+ 
+ 	/* don't re-use pages allocated in low-mem condition */
+ 	if (page_is_pfmemalloc(buf_info->page))
+ 		return false;
+ 
+ 	/* don't re-use buffers from non-local numa nodes */
+ 	if (page_to_nid(buf_info->page) != numa_mem_id())
+ 		return false;
+ 
+ 	size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
+ 	buf_info->page_offset += size;
+ 	if (buf_info->page_offset >= IONIC_PAGE_SIZE)
+ 		return false;
+ 
+ 	get_page(buf_info->page);
+ 
+ 	return true;
  }
static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
    			      struct ionic_desc_info *desc_info,
- 				      struct ionic_cq_info *cq_info)
+ 				      struct ionic_rxq_comp *comp)
  {
- 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
- 	struct device *dev = q->lif->ionic->dev;
- 	struct ionic_page_info *page_info;
+ 	struct net_device *netdev = q->lif->netdev;
+ 	struct ionic_buf_info *buf_info;
+ 	struct ionic_rx_stats *stats;
+ 	struct device *dev = q->dev;
    struct sk_buff *skb;
    unsigned int i;
    u16 frag_len;
    u16 len;
- 	page_info = &desc_info->pages[0];
+ 	stats = q_to_rx_stats(q);
+ 
+ 	buf_info = &desc_info->bufs[0];
    len = le16_to_cpu(comp->len);
- 	prefetch(page_address(page_info->page) + NET_IP_ALIGN);
+ 	prefetch(buf_info->page);
- 	skb = ionic_rx_skb_alloc(q, len, true);
- 	if (unlikely(!skb))
+ 	skb = napi_get_frags(&q_to_qcq(q)->napi);
+ 	if (unlikely(!skb)) {
+ 		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+ 				     netdev->name, q->name);
+ 		stats->alloc_err++;
    	return NULL;
+ 	}
i = comp->num_sg_elems + 1;
    do {
- 		if (unlikely(!page_info->page)) {
- 			struct napi_struct *napi = &q_to_qcq(q)->napi;
- 
- 			napi->skb = NULL;
+ 		if (unlikely(!buf_info->page)) {
    		dev_kfree_skb(skb);
    		return NULL;
    	}
- 		frag_len = min(len, (u16)PAGE_SIZE);
+ 		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
    	len -= frag_len;
- 		dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
- 			       PAGE_SIZE, DMA_FROM_DEVICE);
+ 		dma_sync_single_for_cpu(dev,
+ 					buf_info->dma_addr + buf_info->page_offset,
+ 					frag_len, DMA_FROM_DEVICE);
+ 
    	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- 				page_info->page, 0, frag_len, PAGE_SIZE);
- 		page_info->page = NULL;
- 		page_info++;
+ 				buf_info->page, buf_info->page_offset, frag_len,
+ 				IONIC_PAGE_SIZE);
+ 
+ 		if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
+ 			dma_unmap_page(dev, buf_info->dma_addr,
+ 				       IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+ 			ionic_rx_buf_reset(buf_info);
+ 		}
+ 
+ 		buf_info++;
+ 
    	i--;
    } while (i > 0);
@@@ -114,30 -185,37 +185,37 @@@
static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
    				  struct ionic_desc_info *desc_info,
- 					  struct ionic_cq_info *cq_info)
+ 					  struct ionic_rxq_comp *comp)
  {
- 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
- 	struct device *dev = q->lif->ionic->dev;
- 	struct ionic_page_info *page_info;
+ 	struct net_device *netdev = q->lif->netdev;
+ 	struct ionic_buf_info *buf_info;
+ 	struct ionic_rx_stats *stats;
+ 	struct device *dev = q->dev;
    struct sk_buff *skb;
    u16 len;
- 	page_info = &desc_info->pages[0];
+ 	stats = q_to_rx_stats(q);
+ 
+ 	buf_info = &desc_info->bufs[0];
    len = le16_to_cpu(comp->len);
- 	skb = ionic_rx_skb_alloc(q, len, false);
- 	if (unlikely(!skb))
+ 	skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
+ 	if (unlikely(!skb)) {
+ 		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+ 				     netdev->name, q->name);
+ 		stats->alloc_err++;
    	return NULL;
+ 	}
- 	if (unlikely(!page_info->page)) {
+ 	if (unlikely(!buf_info->page)) {
    	dev_kfree_skb(skb);
    	return NULL;
    }
- 	dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
+ 	dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
    			len, DMA_FROM_DEVICE);
- 	skb_copy_to_linear_data(skb, page_address(page_info->page), len);
- 	dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
+ 	skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
+ 	dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
    			   len, DMA_FROM_DEVICE);
skb_put(skb, len);
@@@ -151,14 -229,13 +229,13 @@@ static void ionic_rx_clean(struct ionic
    		   struct ionic_cq_info *cq_info,
    		   void *cb_arg)
  {
- 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+ 	struct ionic_rxq_comp *comp = cq_info->rxcq;
+ 	struct net_device *netdev = q->lif->netdev;
    struct ionic_qcq *qcq = q_to_qcq(q);
    struct ionic_rx_stats *stats;
- 	struct net_device *netdev;
    struct sk_buff *skb;
stats = q_to_rx_stats(q);
- 	netdev = q->lif->netdev;
if (comp->status) {
    	stats->dropped++;
@@@ -169,9 -246,9 +246,9 @@@
    stats->bytes += le16_to_cpu(comp->len);
if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
- 		skb = ionic_rx_copybreak(q, desc_info, cq_info);
+ 		skb = ionic_rx_copybreak(q, desc_info, comp);
    else
- 		skb = ionic_rx_frags(q, desc_info, cq_info);
+ 		skb = ionic_rx_frags(q, desc_info, comp);
if (unlikely(!skb)) {
    	stats->dropped++;
@@@ -227,7 -304,7 +304,7 @@@
static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
  {
- 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+ 	struct ionic_rxq_comp *comp = cq_info->rxcq;
    struct ionic_queue *q = cq->bound_q;
    struct ionic_desc_info *desc_info;
@@@ -253,138 -330,75 +330,75 @@@
    return true;
  }
- static int ionic_rx_page_alloc(struct ionic_queue *q,
- 			       struct ionic_page_info *page_info)
- {
- 	struct ionic_lif *lif = q->lif;
- 	struct ionic_rx_stats *stats;
- 	struct net_device *netdev;
- 	struct device *dev;
- 
- 	netdev = lif->netdev;
- 	dev = lif->ionic->dev;
- 	stats = q_to_rx_stats(q);
- 
- 	if (unlikely(!page_info)) {
- 		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
- 				    netdev->name, q->name);
- 		return -EINVAL;
- 	}
- 
- 	page_info->page = dev_alloc_page();
- 	if (unlikely(!page_info->page)) {
- 		net_err_ratelimited("%s: %s page alloc failed\n",
- 				    netdev->name, q->name);
- 		stats->alloc_err++;
- 		return -ENOMEM;
- 	}
- 
- 	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
- 					   DMA_FROM_DEVICE);
- 	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
- 		put_page(page_info->page);
- 		page_info->dma_addr = 0;
- 		page_info->page = NULL;
- 		net_err_ratelimited("%s: %s dma map failed\n",
- 				    netdev->name, q->name);
- 		stats->dma_map_err++;
- 		return -EIO;
- 	}
- 
- 	return 0;
- }
- 
- static void ionic_rx_page_free(struct ionic_queue *q,
- 			       struct ionic_page_info *page_info)
- {
- 	struct ionic_lif *lif = q->lif;
- 	struct net_device *netdev;
- 	struct device *dev;
- 
- 	netdev = lif->netdev;
- 	dev = lif->ionic->dev;
- 
- 	if (unlikely(!page_info)) {
- 		net_err_ratelimited("%s: %s invalid page_info in free\n",
- 				    netdev->name, q->name);
- 		return;
- 	}
- 
- 	if (unlikely(!page_info->page)) {
- 		net_err_ratelimited("%s: %s invalid page in free\n",
- 				    netdev->name, q->name);
- 		return;
- 	}
- 
- 	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
- 
- 	put_page(page_info->page);
- 	page_info->dma_addr = 0;
- 	page_info->page = NULL;
- }
- 
  void ionic_rx_fill(struct ionic_queue *q)
  {
    struct net_device *netdev = q->lif->netdev;
    struct ionic_desc_info *desc_info;
- 	struct ionic_page_info *page_info;
    struct ionic_rxq_sg_desc *sg_desc;
    struct ionic_rxq_sg_elem *sg_elem;
+ 	struct ionic_buf_info *buf_info;
    struct ionic_rxq_desc *desc;
    unsigned int remain_len;
- 	unsigned int seg_len;
+ 	unsigned int frag_len;
    unsigned int nfrags;
    unsigned int i, j;
    unsigned int len;
len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
- 	nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
for (i = ionic_q_space_avail(q); i; i--) {
+ 		nfrags = 0;
    	remain_len = len;
    	desc_info = &q->info[q->head_idx];
    	desc = desc_info->desc;
- 		sg_desc = desc_info->sg_desc;
- 		page_info = &desc_info->pages[0];
+ 		buf_info = &desc_info->bufs[0];
- 		if (page_info->page) { /* recycle the buffer */
- 			ionic_rxq_post(q, false, ionic_rx_clean, NULL);
- 			continue;
- 		}
- 
- 		/* fill main descriptor - pages[0] */
- 		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
- 					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
- 		desc_info->npages = nfrags;
- 		if (unlikely(ionic_rx_page_alloc(q, page_info))) {
- 			desc->addr = 0;
- 			desc->len = 0;
- 			return;
+ 		if (!buf_info->page) { /* alloc a new buffer? */
+ 			if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
+ 				desc->addr = 0;
+ 				desc->len = 0;
+ 				return;
+ 			}
    	}
- 		desc->addr = cpu_to_le64(page_info->dma_addr);
- 		seg_len = min_t(unsigned int, PAGE_SIZE, len);
- 		desc->len = cpu_to_le16(seg_len);
- 		remain_len -= seg_len;
- 		page_info++;
- 		/* fill sg descriptors - pages[1..n] */
- 		for (j = 0; j < nfrags - 1; j++) {
- 			if (page_info->page) /* recycle the sg buffer */
- 				continue;
+ 		/* fill main descriptor - buf[0] */
+ 		desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
+ 		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
+ 		desc->len = cpu_to_le16(frag_len);
+ 		remain_len -= frag_len;
+ 		buf_info++;
+ 		nfrags++;
+ 		/* fill sg descriptors - buf[1..n] */
+ 		sg_desc = desc_info->sg_desc;
+ 		for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++) {
    		sg_elem = &sg_desc->elems[j];
- 			if (unlikely(ionic_rx_page_alloc(q, page_info))) {
- 				sg_elem->addr = 0;
- 				sg_elem->len = 0;
- 				return;
+ 			if (!buf_info->page) { /* alloc a new sg buffer? */
+ 				if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
+ 					sg_elem->addr = 0;
+ 					sg_elem->len = 0;
+ 					return;
+ 				}
    		}
- 			sg_elem->addr = cpu_to_le64(page_info->dma_addr);
- 			seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
- 			sg_elem->len = cpu_to_le16(seg_len);
- 			remain_len -= seg_len;
- 			page_info++;
+ 
+ 			sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
+ 			frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
+ 			sg_elem->len = cpu_to_le16(frag_len);
+ 			remain_len -= frag_len;
+ 			buf_info++;
+ 			nfrags++;
+ 		}
+ 
+ 		/* clear end sg element as a sentinel */
+ 		if (j < q->max_sg_elems) {
+ 			sg_elem = &sg_desc->elems[j];
+ 			memset(sg_elem, 0, sizeof(*sg_elem));
    	}
+ 		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
+ 					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
+ 		desc_info->nbufs = nfrags;
+ 
    	ionic_rxq_post(q, false, ionic_rx_clean, NULL);
    }
@@@ -395,21 -409,24 +409,24 @@@
  void ionic_rx_empty(struct ionic_queue *q)
  {
    struct ionic_desc_info *desc_info;
- 	struct ionic_page_info *page_info;
+ 	struct ionic_buf_info *buf_info;
    unsigned int i, j;
for (i = 0; i < q->num_descs; i++) {
    	desc_info = &q->info[i];
    	for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) {
- 			page_info = &desc_info->pages[j];
- 			if (page_info->page)
- 				ionic_rx_page_free(q, page_info);
+ 			buf_info = &desc_info->bufs[j];
+ 			if (buf_info->page)
+ 				ionic_rx_page_free(q, buf_info);
    	}
- 		desc_info->npages = 0;
+ 		desc_info->nbufs = 0;
    	desc_info->cb = NULL;
    	desc_info->cb_arg = NULL;
    }
+ 
+ 	q->head_idx = 0;
+ 	q->tail_idx = 0;
  }
static void ionic_dim_update(struct ionic_qcq *qcq)
@@@ -525,7 -542,7 +542,7 @@@ int ionic_txrx_napi(struct napi_struct 
    idev = &lif->ionic->idev;
    txcq = &lif->txqcqs[qi]->cq;
- 	tx_work_done = ionic_cq_service(txcq, lif->tx_budget,
+ 	tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT,
    				ionic_tx_service, NULL, NULL);
rx_work_done = ionic_cq_service(rxcq, budget,
@@@ -558,7 -575,7 +575,7 @@@ static dma_addr_t ionic_tx_map_single(s
    			      void *data, size_t len)
  {
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
- 	struct device *dev = q->lif->ionic->dev;
+ 	struct device *dev = q->dev;
    dma_addr_t dma_addr;
dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE);
@@@ -576,7 -593,7 +593,7 @@@ static dma_addr_t ionic_tx_map_frag(str
    			    size_t offset, size_t len)
  {
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
- 	struct device *dev = q->lif->ionic->dev;
+ 	struct device *dev = q->dev;
    dma_addr_t dma_addr;
dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE);
@@@ -588,42 -605,72 +605,72 @@@
    return dma_addr;
  }
+ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb,
+ 			    struct ionic_desc_info *desc_info)
+ {
+ 	struct ionic_buf_info *buf_info = desc_info->bufs;
+ 	struct device *dev = q->dev;
+ 	dma_addr_t dma_addr;
+ 	unsigned int nfrags;
+ 	skb_frag_t *frag;
+ 	int frag_idx;
+ 
+ 	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
+ 	if (dma_mapping_error(dev, dma_addr))
+ 		return -EIO;
+ 	buf_info->dma_addr = dma_addr;
+ 	buf_info->len = skb_headlen(skb);
+ 	buf_info++;
+ 
+ 	frag = skb_shinfo(skb)->frags;
+ 	nfrags = skb_shinfo(skb)->nr_frags;
+ 	for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) {
+ 		dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag));
+ 		if (dma_mapping_error(dev, dma_addr))
+ 			goto dma_fail;
+ 		buf_info->dma_addr = dma_addr;
+ 		buf_info->len = skb_frag_size(frag);
+ 		buf_info++;
+ 	}
+ 
+ 	desc_info->nbufs = 1 + nfrags;
+ 
+ 	return 0;
+ 
+ dma_fail:
+ 	/* unwind the frag mappings and the head mapping */
+ 	while (frag_idx > 0) {
+ 		frag_idx--;
+ 		buf_info--;
+ 		dma_unmap_page(dev, buf_info->dma_addr,
+ 			       buf_info->len, DMA_TO_DEVICE);
+ 	}
+ 	dma_unmap_single(dev, buf_info->dma_addr, buf_info->len, DMA_TO_DEVICE);
+ 	return -EIO;
+ }
+ 
  static void ionic_tx_clean(struct ionic_queue *q,
    		   struct ionic_desc_info *desc_info,
    		   struct ionic_cq_info *cq_info,
    		   void *cb_arg)
  {
- 	struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc;
- 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
+ 	struct ionic_buf_info *buf_info = desc_info->bufs;
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
- 	struct ionic_txq_desc *desc = desc_info->desc;
- 	struct device *dev = q->lif->ionic->dev;
- 	u8 opcode, flags, nsge;
+ 	struct device *dev = q->dev;
    u16 queue_index;
    unsigned int i;
- 	u64 addr;
- 
- 	decode_txq_desc_cmd(le64_to_cpu(desc->cmd),
- 			    &opcode, &flags, &nsge, &addr);
- 
- 	/* use unmap_single only if either this is not TSO,
- 	 * or this is first descriptor of a TSO
- 	 */
- 	if (opcode != IONIC_TXQ_DESC_OPCODE_TSO ||
- 	    flags & IONIC_TXQ_DESC_FLAG_TSO_SOT)
- 		dma_unmap_single(dev, (dma_addr_t)addr,
- 				 le16_to_cpu(desc->len), DMA_TO_DEVICE);
- 	else
- 		dma_unmap_page(dev, (dma_addr_t)addr,
- 			       le16_to_cpu(desc->len), DMA_TO_DEVICE);
- 	for (i = 0; i < nsge; i++, elem++)
- 		dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr),
- 			       le16_to_cpu(elem->len), DMA_TO_DEVICE);
+ 	if (desc_info->nbufs) {
+ 		dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr,
+ 				 buf_info->len, DMA_TO_DEVICE);
+ 		buf_info++;
+ 		for (i = 1; i < desc_info->nbufs; i++, buf_info++)
+ 			dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr,
+ 				       buf_info->len, DMA_TO_DEVICE);
+ 	}
if (cb_arg) {
    	struct sk_buff *skb = cb_arg;
- 		u32 len = skb->len;
queue_index = skb_get_queue_mapping(skb);
    	if (unlikely(__netif_subqueue_stopped(q->lif->netdev,
@@@ -631,17 -678,21 +678,21 @@@
    		netif_wake_subqueue(q->lif->netdev, queue_index);
    		q->wake++;
    	}
- 		dev_kfree_skb_any(skb);
+ 
+ 		desc_info->bytes = skb->len;
    	stats->clean++;
- 		netdev_tx_completed_queue(q_to_ndq(q), 1, len);
+ 
+ 		dev_consume_skb_any(skb);
    }
  }
static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
  {
- 	struct ionic_txq_comp *comp = cq_info->cq_desc;
+ 	struct ionic_txq_comp *comp = cq_info->txcq;
    struct ionic_queue *q = cq->bound_q;
    struct ionic_desc_info *desc_info;
+ 	int bytes = 0;
+ 	int pkts = 0;
    u16 index;
if (!color_match(comp->color, cq->done_color))
@@@ -652,13 -703,21 +703,21 @@@
     */
    do {
    	desc_info = &q->info[q->tail_idx];
+ 		desc_info->bytes = 0;
    	index = q->tail_idx;
    	q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
    	ionic_tx_clean(q, desc_info, cq_info, desc_info->cb_arg);
+ 		if (desc_info->cb_arg) {
+ 			pkts++;
+ 			bytes += desc_info->bytes;
+ 		}
    	desc_info->cb = NULL;
    	desc_info->cb_arg = NULL;
    } while (index != le16_to_cpu(comp->comp_index));
+ 	if (pkts && bytes)
+ 		netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
+ 
    return true;
  }
@@@ -677,15 -736,25 +736,25 @@@ void ionic_tx_flush(struct ionic_cq *cq
  void ionic_tx_empty(struct ionic_queue *q)
  {
    struct ionic_desc_info *desc_info;
+ 	int bytes = 0;
+ 	int pkts = 0;
/* walk the not completed tx entries, if any */
    while (q->head_idx != q->tail_idx) {
    	desc_info = &q->info[q->tail_idx];
+ 		desc_info->bytes = 0;
    	q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1);
    	ionic_tx_clean(q, desc_info, NULL, desc_info->cb_arg);
+ 		if (desc_info->cb_arg) {
+ 			pkts++;
+ 			bytes += desc_info->bytes;
+ 		}
    	desc_info->cb = NULL;
    	desc_info->cb_arg = NULL;
    }
+ 
+ 	if (pkts && bytes)
+ 		netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes);
  }
static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb)
@@@ -756,50 -825,33 +825,33 @@@ static void ionic_tx_tso_post(struct io
    desc->hdr_len = cpu_to_le16(hdrlen);
    desc->mss = cpu_to_le16(mss);
- 	if (done) {
+ 	if (start) {
    	skb_tx_timestamp(skb);
    	netdev_tx_sent_queue(q_to_ndq(q), skb->len);
- 		ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb);
+ 		ionic_txq_post(q, false, ionic_tx_clean, skb);
    } else {
- 		ionic_txq_post(q, false, ionic_tx_clean, NULL);
+ 		ionic_txq_post(q, done, NULL, NULL);
    }
  }
- static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q,
- 						struct ionic_txq_sg_elem **elem)
- {
- 	struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
- 	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
- 
- 	*elem = sg_desc->elems;
- 	return desc;
- }
- 
  static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
  {
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
- 	struct ionic_desc_info *rewind_desc_info;
- 	struct device *dev = q->lif->ionic->dev;
+ 	struct ionic_desc_info *desc_info;
+ 	struct ionic_buf_info *buf_info;
    struct ionic_txq_sg_elem *elem;
    struct ionic_txq_desc *desc;
- 	unsigned int frag_left = 0;
- 	unsigned int offset = 0;
- 	u16 abort = q->head_idx;
- 	unsigned int len_left;
+ 	unsigned int chunk_len;
+ 	unsigned int frag_rem;
+ 	unsigned int tso_rem;
+ 	unsigned int seg_rem;
    dma_addr_t desc_addr;
+ 	dma_addr_t frag_addr;
    unsigned int hdrlen;
- 	unsigned int nfrags;
- 	unsigned int seglen;
- 	u64 total_bytes = 0;
- 	u64 total_pkts = 0;
- 	u16 rewind = abort;
- 	unsigned int left;
    unsigned int len;
    unsigned int mss;
- 	skb_frag_t *frag;
    bool start, done;
    bool outer_csum;
- 	dma_addr_t addr;
    bool has_vlan;
    u16 desc_len;
    u8 desc_nsge;
@@@ -807,9 -859,14 +859,14 @@@
    bool encap;
    int err;
+ 	desc_info = &q->info[q->head_idx];
+ 	buf_info = desc_info->bufs;
+ 
+ 	if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
+ 		return -EIO;
+ 
+ 	len = skb->len;
    mss = skb_shinfo(skb)->gso_size;
- 	nfrags = skb_shinfo(skb)->nr_frags;
- 	len_left = skb->len - skb_headlen(skb);
    outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) ||
    	     (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
    has_vlan = !!skb_vlan_tag_present(skb);
@@@ -834,125 -891,75 +891,75 @@@
    else
    	hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
- 	seglen = hdrlen + mss;
- 	left = skb_headlen(skb);
+ 	tso_rem = len;
+ 	seg_rem = min(tso_rem, hdrlen + mss);
- 	desc = ionic_tx_tso_next(q, &elem);
- 	start = true;
+ 	frag_addr = 0;
+ 	frag_rem = 0;
- 	/* Chop skb->data up into desc segments */
+ 	start = true;
- 	while (left > 0) {
- 		len = min(seglen, left);
- 		frag_left = seglen - len;
- 		desc_addr = ionic_tx_map_single(q, skb->data + offset, len);
- 		if (dma_mapping_error(dev, desc_addr))
- 			goto err_out_abort;
- 		desc_len = len;
+ 	while (tso_rem > 0) {
+ 		desc = NULL;
+ 		elem = NULL;
+ 		desc_addr = 0;
+ 		desc_len = 0;
    	desc_nsge = 0;
- 		left -= len;
- 		offset += len;
- 		if (nfrags > 0 && frag_left > 0)
- 			continue;
- 		done = (nfrags == 0 && left == 0);
- 		ionic_tx_tso_post(q, desc, skb,
- 				  desc_addr, desc_nsge, desc_len,
- 				  hdrlen, mss,
- 				  outer_csum,
- 				  vlan_tci, has_vlan,
- 				  start, done);
- 		total_pkts++;
- 		total_bytes += start ? len : len + hdrlen;
- 		desc = ionic_tx_tso_next(q, &elem);
- 		start = false;
- 		seglen = mss;
- 	}
- 
- 	/* Chop skb frags into desc segments */
- 
- 	for (frag = skb_shinfo(skb)->frags; len_left; frag++) {
- 		offset = 0;
- 		left = skb_frag_size(frag);
- 		len_left -= left;
- 		nfrags--;
- 		stats->frags++;
- 
- 		while (left > 0) {
- 			if (frag_left > 0) {
- 				len = min(frag_left, left);
- 				frag_left -= len;
- 				addr = ionic_tx_map_frag(q, frag, offset, len);
- 				if (dma_mapping_error(dev, addr))
- 					goto err_out_abort;
- 				elem->addr = cpu_to_le64(addr);
- 				elem->len = cpu_to_le16(len);
+ 		/* use fragments until we have enough to post a single descriptor */
+ 		while (seg_rem > 0) {
+ 			/* if the fragment is exhausted then move to the next one */
+ 			if (frag_rem == 0) {
+ 				/* grab the next fragment */
+ 				frag_addr = buf_info->dma_addr;
+ 				frag_rem = buf_info->len;
+ 				buf_info++;
+ 			}
+ 			chunk_len = min(frag_rem, seg_rem);
+ 			if (!desc) {
+ 				/* fill main descriptor */
+ 				desc = desc_info->txq_desc;
+ 				elem = desc_info->txq_sg_desc->elems;
+ 				desc_addr = frag_addr;
+ 				desc_len = chunk_len;
+ 			} else {
+ 				/* fill sg descriptor */
+ 				elem->addr = cpu_to_le64(frag_addr);
+ 				elem->len = cpu_to_le16(chunk_len);
    			elem++;
    			desc_nsge++;
- 				left -= len;
- 				offset += len;
- 				if (nfrags > 0 && frag_left > 0)
- 					continue;
- 				done = (nfrags == 0 && left == 0);
- 				ionic_tx_tso_post(q, desc, skb, desc_addr,
- 						  desc_nsge, desc_len,
- 						  hdrlen, mss, outer_csum,
- 						  vlan_tci, has_vlan,
- 						  start, done);
- 				total_pkts++;
- 				total_bytes += start ? len : len + hdrlen;
- 				desc = ionic_tx_tso_next(q, &elem);
- 				start = false;
- 			} else {
- 				len = min(mss, left);
- 				frag_left = mss - len;
- 				desc_addr = ionic_tx_map_frag(q, frag,
- 							      offset, len);
- 				if (dma_mapping_error(dev, desc_addr))
- 					goto err_out_abort;
- 				desc_len = len;
- 				desc_nsge = 0;
- 				left -= len;
- 				offset += len;
- 				if (nfrags > 0 && frag_left > 0)
- 					continue;
- 				done = (nfrags == 0 && left == 0);
- 				ionic_tx_tso_post(q, desc, skb, desc_addr,
- 						  desc_nsge, desc_len,
- 						  hdrlen, mss, outer_csum,
- 						  vlan_tci, has_vlan,
- 						  start, done);
- 				total_pkts++;
- 				total_bytes += start ? len : len + hdrlen;
- 				desc = ionic_tx_tso_next(q, &elem);
- 				start = false;
    		}
+ 			frag_addr += chunk_len;
+ 			frag_rem -= chunk_len;
+ 			tso_rem -= chunk_len;
+ 			seg_rem -= chunk_len;
    	}
+ 		seg_rem = min(tso_rem, mss);
+ 		done = (tso_rem == 0);
+ 		/* post descriptor */
+ 		ionic_tx_tso_post(q, desc, skb,
+ 				  desc_addr, desc_nsge, desc_len,
+ 				  hdrlen, mss, outer_csum, vlan_tci, has_vlan,
+ 				  start, done);
+ 		start = false;
+ 		/* Buffer information is stored with the first tso descriptor */
+ 		desc_info = &q->info[q->head_idx];
+ 		desc_info->nbufs = 0;
    }
- 	stats->pkts += total_pkts;
- 	stats->bytes += total_bytes;
+ 	stats->pkts += DIV_ROUND_UP(len - hdrlen, mss);
+ 	stats->bytes += len;
    stats->tso++;
- 	stats->tso_bytes += total_bytes;
+ 	stats->tso_bytes = len;
return 0;
- 
- err_out_abort:
- 	while (rewind != q->head_idx) {
- 		rewind_desc_info = &q->info[rewind];
- 		ionic_tx_clean(q, rewind_desc_info, NULL, NULL);
- 		rewind = (rewind + 1) & (q->num_descs - 1);
- 	}
- 	q->head_idx = abort;
- 
- 	return -ENOMEM;
  }
- static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
+ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb,
+ 			      struct ionic_desc_info *desc_info)
  {
- 	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
+ 	struct ionic_txq_desc *desc = desc_info->txq_desc;
+ 	struct ionic_buf_info *buf_info = desc_info->bufs;
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
- 	struct device *dev = q->lif->ionic->dev;
- 	dma_addr_t dma_addr;
    bool has_vlan;
    u8 flags = 0;
    bool encap;
@@@ -961,23 -968,22 +968,22 @@@
    has_vlan = !!skb_vlan_tag_present(skb);
    encap = skb->encapsulation;
- 	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
- 	if (dma_mapping_error(dev, dma_addr))
- 		return -ENOMEM;
- 
    flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
    flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL,
- 				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
+ 				  flags, skb_shinfo(skb)->nr_frags,
+ 				  buf_info->dma_addr);
    desc->cmd = cpu_to_le64(cmd);
- 	desc->len = cpu_to_le16(skb_headlen(skb));
- 	desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
- 	desc->csum_offset = cpu_to_le16(skb->csum_offset);
+ 	desc->len = cpu_to_le16(buf_info->len);
    if (has_vlan) {
    	desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
    	stats->vlan_inserted++;
+ 	} else {
+ 		desc->vlan_tci = 0;
    }
+ 	desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
+ 	desc->csum_offset = cpu_to_le16(skb->csum_offset);
if (skb_csum_is_sctp(skb))
    	stats->crc32_csum++;
@@@ -987,12 -993,12 +993,12 @@@
    return 0;
  }
- static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
+ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb,
+ 				 struct ionic_desc_info *desc_info)
  {
- 	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
+ 	struct ionic_txq_desc *desc = desc_info->txq_desc;
+ 	struct ionic_buf_info *buf_info = desc_info->bufs;
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
- 	struct device *dev = q->lif->ionic->dev;
- 	dma_addr_t dma_addr;
    bool has_vlan;
    u8 flags = 0;
    bool encap;
@@@ -1001,67 -1007,66 +1007,66 @@@
    has_vlan = !!skb_vlan_tag_present(skb);
    encap = skb->encapsulation;
- 	dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb));
- 	if (dma_mapping_error(dev, dma_addr))
- 		return -ENOMEM;
- 
    flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0;
    flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE,
- 				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
+ 				  flags, skb_shinfo(skb)->nr_frags,
+ 				  buf_info->dma_addr);
    desc->cmd = cpu_to_le64(cmd);
- 	desc->len = cpu_to_le16(skb_headlen(skb));
+ 	desc->len = cpu_to_le16(buf_info->len);
    if (has_vlan) {
    	desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
    	stats->vlan_inserted++;
+ 	} else {
+ 		desc->vlan_tci = 0;
    }
+ 	desc->csum_start = 0;
+ 	desc->csum_offset = 0;
stats->csum_none++;
return 0;
  }
- static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
+ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb,
+ 			      struct ionic_desc_info *desc_info)
  {
- 	struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc;
- 	unsigned int len_left = skb->len - skb_headlen(skb);
+ 	struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc;
+ 	struct ionic_buf_info *buf_info = &desc_info->bufs[1];
    struct ionic_txq_sg_elem *elem = sg_desc->elems;
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
- 	struct device *dev = q->lif->ionic->dev;
- 	dma_addr_t dma_addr;
- 	skb_frag_t *frag;
- 	u16 len;
+ 	unsigned int i;
- 	for (frag = skb_shinfo(skb)->frags; len_left; frag++, elem++) {
- 		len = skb_frag_size(frag);
- 		elem->len = cpu_to_le16(len);
- 		dma_addr = ionic_tx_map_frag(q, frag, 0, len);
- 		if (dma_mapping_error(dev, dma_addr))
- 			return -ENOMEM;
- 		elem->addr = cpu_to_le64(dma_addr);
- 		len_left -= len;
- 		stats->frags++;
+ 	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, buf_info++, elem++) {
+ 		elem->addr = cpu_to_le64(buf_info->dma_addr);
+ 		elem->len = cpu_to_le16(buf_info->len);
    }
+ 	stats->frags += skb_shinfo(skb)->nr_frags;
+ 
    return 0;
  }
static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
  {
+ 	struct ionic_desc_info *desc_info = &q->info[q->head_idx];
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
    int err;
+ 	if (unlikely(ionic_tx_map_skb(q, skb, desc_info)))
+ 		return -EIO;
+ 
    /* set up the initial descriptor */
    if (skb->ip_summed == CHECKSUM_PARTIAL)
- 		err = ionic_tx_calc_csum(q, skb);
+ 		err = ionic_tx_calc_csum(q, skb, desc_info);
    else
- 		err = ionic_tx_calc_no_csum(q, skb);
+ 		err = ionic_tx_calc_no_csum(q, skb, desc_info);
    if (err)
    	return err;
/* add frags */
- 	err = ionic_tx_skb_frags(q, skb);
+ 	err = ionic_tx_skb_frags(q, skb, desc_info);
    if (err)
    	return err;
@@@ -1077,19 -1082,16 +1082,18 @@@
static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
  {
- 	int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems;
    struct ionic_tx_stats *stats = q_to_tx_stats(q);
 +	int ndescs;
    int err;
-	/* If TSO, need roundup(skb->len/mss) descs */
 +	/* Each desc is mss long max, so a descriptor for each gso_seg */
    if (skb_is_gso(skb))
 -		return (skb->len / skb_shinfo(skb)->gso_size) + 1;
 +		ndescs = skb_shinfo(skb)->gso_segs;
 +	else
 +		ndescs = 1;
- 	if (skb_shinfo(skb)->nr_frags <= sg_elems)
 -	/* If non-TSO, just need 1 desc and nr_frags sg elems */
+ 	if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems)
 -		return 1;
 +		return ndescs;
/* Too many frags, so linearize */
    err = skb_linearize(skb);
@@@ -1098,7 -1100,8 +1102,7 @@@
stats->linearize++;
-	/* Need 1 desc and zero sg elems */
 -	return 1;
 +	return ndescs;
  }
static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs)
diff --combined drivers/net/ethernet/realtek/r8169_main.c
index 581a92fc3292,66d10aa47c08..7a8bb7e833f3
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@@ -1586,12 -1586,10 +1586,10 @@@ DECLARE_RTL_COND(rtl_counters_cond
static void rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
  {
- 	dma_addr_t paddr = tp->counters_phys_addr;
- 	u32 cmd;
+ 	u32 cmd = lower_32_bits(tp->counters_phys_addr);
- 	RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32);
+ 	RTL_W32(tp, CounterAddrHigh, upper_32_bits(tp->counters_phys_addr));
    rtl_pci_commit(tp);
- 	cmd = (u64)paddr & DMA_BIT_MASK(32);
    RTL_W32(tp, CounterAddrLow, cmd);
    RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
@@@ -1903,6 -1901,15 +1901,15 @@@ static int rtl8169_set_eee(struct net_d
    return ret;
  }
+ static void rtl8169_get_ringparam(struct net_device *dev,
+ 				  struct ethtool_ringparam *data)
+ {
+ 	data->rx_max_pending = NUM_RX_DESC;
+ 	data->rx_pending = NUM_RX_DESC;
+ 	data->tx_max_pending = NUM_TX_DESC;
+ 	data->tx_pending = NUM_TX_DESC;
+ }
+ 
  static const struct ethtool_ops rtl8169_ethtool_ops = {
    .supported_coalesce_params = ETHTOOL_COALESCE_USECS |
    			     ETHTOOL_COALESCE_MAX_FRAMES,
@@@ -1923,6 -1930,7 +1930,7 @@@
    .set_eee		= rtl8169_set_eee,
    .get_link_ksettings	= phy_ethtool_get_link_ksettings,
    .set_link_ksettings	= phy_ethtool_set_link_ksettings,
+ 	.get_ringparam		= rtl8169_get_ringparam,
  };
static void rtl_enable_eee(struct rtl8169_private *tp)
@@@ -4646,9 -4654,6 +4654,9 @@@ static void rtl8169_down(struct rtl8169
rtl8169_update_counters(tp);
+	pci_clear_master(tp->pci_dev);
 +	rtl_pci_commit(tp);
 +
    rtl8169_cleanup(tp, true);
rtl_prepare_power_down(tp);
@@@ -4656,7 -4661,6 +4664,7 @@@
static void rtl8169_up(struct rtl8169_private *tp)
  {
 +	pci_set_master(tp->pci_dev);
    phy_resume(tp->phydev);
    rtl8169_init_phy(tp);
    napi_enable(&tp->napi);
@@@ -5311,6 -5315,8 +5319,6 @@@ static int rtl_init_one(struct pci_dev
rtl_hw_reset(tp);
-	pci_set_master(pdev);
 -
    rc = rtl_alloc_irq(tp);
    if (rc < 0) {
    	dev_err(&pdev->dev, "Can't allocate interrupt\n");
diff --combined drivers/net/ipa/ipa_qmi.c
index e594bf3b600f,af8666b89b37..f3746ed9b797
--- a/drivers/net/ipa/ipa_qmi.c
+++ b/drivers/net/ipa/ipa_qmi.c
@@@ -249,7 -249,6 +249,7 @@@ static const struct qmi_msg_handler ipa
    	.decoded_size	= IPA_QMI_DRIVER_INIT_COMPLETE_REQ_SZ,
    	.fn		= ipa_server_driver_init_complete,
    },
 +	{ },
  };
/* Handle an INIT_DRIVER response message from the modem. */
@@@ -270,7 -269,6 +270,7 @@@ static const struct qmi_msg_handler ipa
    	.decoded_size	= IPA_QMI_INIT_DRIVER_RSP_SZ,
    	.fn		= ipa_client_init_driver,
    },
 +	{ },
  };
/* Return a pointer to an init modem driver request structure, which contains
@@@ -380,7 -378,7 +380,7 @@@ init_modem_driver_req(struct ipa_qmi *i
    /* None of the stats fields are valid (IPA v4.0 and above) */
if (ipa->version != IPA_VERSION_3_5_1) {
- 		mem = &ipa->mem[IPA_MEM_STATS_QUOTA];
+ 		mem = &ipa->mem[IPA_MEM_STATS_QUOTA_MODEM];
    	if (mem->size) {
    		req.hw_stats_quota_base_addr_valid = 1;
    		req.hw_stats_quota_base_addr =
diff --combined drivers/net/phy/phylink.c
index dc2800beacc3,12a047d47dec..96d8e88b4e46
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@@ -271,8 -271,9 +271,9 @@@ static int phylink_parse_mode(struct ph
    	pl->cfg_link_an_mode = MLO_AN_FIXED;
    fwnode_handle_put(dn);
- 	if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
- 	    strcmp(managed, "in-band-status") == 0) {
+ 	if ((fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
+ 	     strcmp(managed, "in-band-status") == 0) ||
+ 	    pl->config->ovr_an_inband) {
    	if (pl->cfg_link_an_mode == MLO_AN_FIXED) {
    		phylink_err(pl,
    			    "can't use both fixed-link and in-band-status\n");
@@@ -476,7 -477,7 +477,7 @@@ static void phylink_major_config(struc
    	err = pl->mac_ops->mac_finish(pl->config, pl->cur_link_an_mode,
    				      state->interface);
    	if (err < 0)
 -			phylink_err(pl, "mac_prepare failed: %pe\n",
 +			phylink_err(pl, "mac_finish failed: %pe\n",
    			    ERR_PTR(err));
    }
  }
diff --combined drivers/s390/net/qeth_core_main.c
index 175b82b98f36,6954d4e831a3..a1f08e9aa064
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@@ -369,7 -369,8 +369,7 @@@ static int qeth_cq_init(struct qeth_car
    			   QDIO_MAX_BUFFERS_PER_Q);
    	card->qdio.c_q->next_buf_to_init = 127;
    	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT,
 -			     card->qdio.no_in_queues - 1, 0,
 -			     127);
 +			     card->qdio.no_in_queues - 1, 0, 127, NULL);
    	if (rc) {
    		QETH_CARD_TEXT_(card, 2, "1err%d", rc);
    		goto out;
@@@ -382,22 -383,48 +382,22 @@@ out
static int qeth_alloc_cq(struct qeth_card *card)
  {
 -	int rc;
 -
    if (card->options.cq == QETH_CQ_ENABLED) {
 -		int i;
 -		struct qdio_outbuf_state *outbuf_states;
 -
    	QETH_CARD_TEXT(card, 2, "cqon");
    	card->qdio.c_q = qeth_alloc_qdio_queue();
    	if (!card->qdio.c_q) {
 -			rc = -1;
 -			goto kmsg_out;
 +			dev_err(&card->gdev->dev, "Failed to create completion queue\n");
 +			return -ENOMEM;
    	}
 +
    	card->qdio.no_in_queues = 2;
 -		card->qdio.out_bufstates =
 -			kcalloc(card->qdio.no_out_queues *
 -					QDIO_MAX_BUFFERS_PER_Q,
 -				sizeof(struct qdio_outbuf_state),
 -				GFP_KERNEL);
 -		outbuf_states = card->qdio.out_bufstates;
 -		if (outbuf_states == NULL) {
 -			rc = -1;
 -			goto free_cq_out;
 -		}
 -		for (i = 0; i < card->qdio.no_out_queues; ++i) {
 -			card->qdio.out_qs[i]->bufstates = outbuf_states;
 -			outbuf_states += QDIO_MAX_BUFFERS_PER_Q;
 -		}
    } else {
    	QETH_CARD_TEXT(card, 2, "nocq");
    	card->qdio.c_q = NULL;
    	card->qdio.no_in_queues = 1;
    }
    QETH_CARD_TEXT_(card, 2, "iqc%d", card->qdio.no_in_queues);
 -	rc = 0;
 -out:
 -	return rc;
 -free_cq_out:
 -	qeth_free_qdio_queue(card->qdio.c_q);
 -	card->qdio.c_q = NULL;
 -kmsg_out:
 -	dev_err(&card->gdev->dev, "Failed to create completion queue\n");
 -	goto out;
 +	return 0;
  }
static void qeth_free_cq(struct qeth_card *card)
@@@ -407,6 -434,8 +407,6 @@@
    	qeth_free_qdio_queue(card->qdio.c_q);
    	card->qdio.c_q = NULL;
    }
 -	kfree(card->qdio.out_bufstates);
 -	card->qdio.out_bufstates = NULL;
  }
static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
@@@ -458,12 -487,12 +458,12 @@@ static void qeth_qdio_handle_aob(struc
    switch (atomic_xchg(&buffer->state, new_state)) {
    case QETH_QDIO_BUF_PRIMED:
    	/* Faster than TX completion code, let it handle the async
 -		 * completion for us.
 +		 * completion for us. It will also recycle the QAOB.
    	 */
    	break;
    case QETH_QDIO_BUF_PENDING:
    	/* TX completion code is active and will handle the async
 -		 * completion for us.
 +		 * completion for us. It will also recycle the QAOB.
    	 */
    	break;
    case QETH_QDIO_BUF_NEED_QAOB:
@@@ -472,7 -501,7 +472,7 @@@
    	qeth_notify_skbs(buffer->q, buffer, notification);
/* Free dangling allocations. The attached skbs are handled by
 -		 * qeth_tx_complete_pending_bufs().
 +		 * qeth_tx_complete_pending_bufs(), and so is the QAOB.
    	 */
    	for (i = 0;
    	     i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card);
@@@ -491,6 -520,8 +491,6 @@@
    default:
    	WARN_ON_ONCE(1);
    }
 -
 -	qdio_release_aob(aob);
  }
static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len,
@@@ -1420,16 -1451,9 +1420,16 @@@ static void qeth_clear_output_buffer(st
    atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
  }
+static void qeth_free_out_buf(struct qeth_qdio_out_buffer *buf)
 +{
 +	if (buf->aob)
 +		qdio_release_aob(buf->aob);
 +	kmem_cache_free(qeth_qdio_outbuf_cache, buf);
 +}
 +
  static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
    				  struct qeth_qdio_out_q *queue,
- 					  bool drain)
+ 					  bool drain, int budget)
  {
    struct qeth_qdio_out_buffer *buf, *tmp;
@@@ -1441,10 -1465,10 +1441,10 @@@
    		if (drain)
    			qeth_notify_skbs(queue, buf,
    					 TX_NOTIFY_GENERALERROR);
- 			qeth_tx_complete_buf(buf, drain, 0);
+ 			qeth_tx_complete_buf(buf, drain, budget);
list_del(&buf->list_entry);
 -			kmem_cache_free(qeth_qdio_outbuf_cache, buf);
 +			qeth_free_out_buf(buf);
    	}
    }
  }
@@@ -1453,7 -1477,7 +1453,7 @@@ static void qeth_drain_output_queue(str
  {
    int j;
- 	qeth_tx_complete_pending_bufs(q->card, q, true);
+ 	qeth_tx_complete_pending_bufs(q->card, q, true, 0);
for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
    	if (!q->bufs[j])
@@@ -1461,7 -1485,7 +1461,7 @@@
qeth_clear_output_buffer(q, q->bufs[j], true, 0);
    	if (free) {
 -			kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]);
 +			qeth_free_out_buf(q->bufs[j]);
    		q->bufs[j] = NULL;
    	}
    }
@@@ -2566,11 -2590,12 +2566,12 @@@ static int qeth_ulp_setup(struct qeth_c
    return qeth_send_control_data(card, iob, qeth_ulp_setup_cb, NULL);
  }
- static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
+ static int qeth_alloc_out_buf(struct qeth_qdio_out_q *q, unsigned int bidx,
+ 			      gfp_t gfp)
  {
    struct qeth_qdio_out_buffer *newbuf;
- 	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC);
+ 	newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, gfp);
    if (!newbuf)
    	return -ENOMEM;
@@@ -2605,7 -2630,7 +2606,7 @@@ static struct qeth_qdio_out_q *qeth_all
    	goto err_qdio_bufs;
for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) {
- 		if (qeth_init_qdio_out_buf(q, i))
+ 		if (qeth_alloc_out_buf(q, i, GFP_KERNEL))
    		goto err_out_bufs;
    }
@@@ -2613,7 -2638,7 +2614,7 @@@
err_out_bufs:
    while (i > 0)
 -		kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[--i]);
 +		qeth_free_out_buf(q->bufs[--i]);
    qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
  err_qdio_bufs:
    kfree(q);
@@@ -3000,8 -3025,7 +3001,8 @@@ static int qeth_init_qdio_queues(struc
    }
card->qdio.in_q->next_buf_to_init = QDIO_BUFNR(rx_bufs);
 -	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, rx_bufs);
 +	rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, rx_bufs,
 +		     NULL);
    if (rc) {
    	QETH_CARD_TEXT_(card, 2, "1err%d", rc);
    	return rc;
@@@ -3493,7 -3517,7 +3494,7 @@@ static unsigned int qeth_rx_refill_queu
    	}
rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0,
 -			     queue->next_buf_to_init, count);
 +			     queue->next_buf_to_init, count, NULL);
    	if (rc) {
    		QETH_CARD_TEXT(card, 2, "qinberr");
    	}
@@@ -3602,7 -3626,6 +3603,7 @@@ static void qeth_flush_buffers(struct q
    struct qeth_qdio_out_buffer *buf = queue->bufs[index];
    unsigned int qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
    struct qeth_card *card = queue->card;
 +	struct qaob *aob = NULL;
    int rc;
    int i;
@@@ -3615,24 -3638,16 +3616,24 @@@
    			SBAL_EFLAGS_LAST_ENTRY;
    	queue->coalesced_frames += buf->frames;
-		if (queue->bufstates)
 -			queue->bufstates[bidx].user = buf;
 -
    	if (IS_IQD(card)) {
    		skb_queue_walk(&buf->skb_list, skb)
    			skb_tx_timestamp(skb);
    	}
    }
-	if (!IS_IQD(card)) {
 +	if (IS_IQD(card)) {
 +		if (card->options.cq == QETH_CQ_ENABLED &&
 +		    !qeth_iqd_is_mcast_queue(card, queue) &&
 +		    count == 1) {
 +			if (!buf->aob)
 +				buf->aob = qdio_allocate_aob();
 +			if (buf->aob) {
 +				aob = buf->aob;
 +				aob->user1 = (u64) buf;
 +			}
 +		}
 +	} else {
    	if (!queue->do_pack) {
    		if ((atomic_read(&queue->used_buffers) >=
    			(QETH_HIGH_WATERMARK_PACK -
@@@ -3663,8 -3678,8 +3664,8 @@@
    }
QETH_TXQ_STAT_INC(queue, doorbell);
 -	rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags,
 -		     queue->queue_no, index, count);
 +	rc = do_QDIO(CARD_DDEV(card), qdio_flags, queue->queue_no, index, count,
 +		     aob);
switch (rc) {
    case 0:
@@@ -3800,7 -3815,8 +3801,7 @@@ static void qeth_qdio_cq_handler(struc
    	qeth_scrub_qdio_buffer(buffer, QDIO_MAX_ELEMENTS_PER_BUFFER);
    }
    rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, queue,
 -		    card->qdio.c_q->next_buf_to_init,
 -		    count);
 +		     cq->next_buf_to_init, count, NULL);
    if (rc) {
    	dev_warn(&card->gdev->dev,
    		"QDIO reported an error, rc=%i\n", rc);
@@@ -5255,6 -5271,7 +5256,6 @@@ static int qeth_qdio_establish(struct q
    init_data.int_parm               = (unsigned long) card;
    init_data.input_sbal_addr_array  = in_sbal_ptrs;
    init_data.output_sbal_addr_array = out_sbal_ptrs;
 -	init_data.output_sbal_state_array = card->qdio.out_bufstates;
    init_data.scan_threshold	 = IS_IQD(card) ? 0 : 32;
if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ALLOCATED,
@@@ -6053,15 -6070,7 +6054,15 @@@ static void qeth_iqd_tx_complete(struc
    bool error = !!qdio_error;
if (qdio_error == QDIO_ERROR_SLSB_PENDING) {
 -		WARN_ON_ONCE(card->options.cq != QETH_CQ_ENABLED);
 +		struct qaob *aob = buffer->aob;
 +
 +		if (!aob) {
 +			netdev_WARN_ONCE(card->dev,
 +					 "Pending TX buffer %#x without QAOB on TX queue %u\n",
 +					 bidx, queue->queue_no);
 +			qeth_schedule_recovery(card);
 +			return;
 +		}
QETH_CARD_TEXT_(card, 5, "pel%u", bidx);
@@@ -6080,7 -6089,8 +6081,8 @@@
/* Prepare the queue slot for immediate re-use: */
    			qeth_scrub_qdio_buffer(buffer->buffer, queue->max_elements);
- 				if (qeth_init_qdio_out_buf(queue, bidx)) {
+ 				if (qeth_alloc_out_buf(queue, bidx,
+ 						       GFP_ATOMIC)) {
    				QETH_CARD_TEXT(card, 2, "outofbuf");
    				qeth_schedule_recovery(card);
    			}
@@@ -6117,8 -6127,6 +6119,8 @@@
    	default:
    		WARN_ON_ONCE(1);
    	}
 +
 +		memset(aob, 0, sizeof(*aob));
    } else if (card->options.cq == QETH_CQ_ENABLED) {
    	qeth_notify_skbs(queue, buffer,
    			 qeth_compute_cq_notification(sflags, 0));
@@@ -6144,7 -6152,7 +6146,7 @@@ static int qeth_tx_poll(struct napi_str
    	unsigned int bytes = 0;
    	int completed;
- 		qeth_tx_complete_pending_bufs(card, queue, false);
+ 		qeth_tx_complete_pending_bufs(card, queue, false, budget);
if (qeth_out_queue_is_empty(queue)) {
    		napi_complete(napi);
diff --combined include/linux/bpf.h
index 3625f019767d,a25730eaa148..39dce9d3c3a5
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@@ -21,7 -21,6 +21,7 @@@
  #include <linux/capability.h>
  #include <linux/sched/mm.h>
  #include <linux/slab.h>
 +#include <linux/percpu-refcount.h>
struct bpf_verifier_env;
  struct bpf_verifier_log;
@@@ -40,6 -39,7 +40,7 @@@ struct bpf_local_storage
  struct bpf_local_storage_map;
  struct kobject;
  struct mem_cgroup;
+ struct bpf_func_state;
extern struct idr btf_idr;
  extern spinlock_t btf_idr_lock;
@@@ -118,6 -118,9 +119,9 @@@ struct bpf_map_ops 
    				   void *owner, u32 size);
    struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+ 	/* Misc helpers.*/
+ 	int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags);
+ 
    /* map_meta_equal must be implemented for maps that can be
     * used as an inner map.  It is a runtime check to ensure
     * an inner map can be inserted to an outer map.
@@@ -130,6 -133,13 +134,13 @@@
    bool (*map_meta_equal)(const struct bpf_map *meta0,
    		       const struct bpf_map *meta1);
+ 
+ 	int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env,
+ 					      struct bpf_func_state *caller,
+ 					      struct bpf_func_state *callee);
+ 	int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn,
+ 				     void *callback_ctx, u64 flags);
+ 
    /* BTF name and id of struct allocated by map_alloc */
    const char * const map_btf_name;
    int *map_btf_id;
@@@ -296,6 -306,8 +307,8 @@@ enum bpf_arg_type 
    ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
    ARG_PTR_TO_BTF_ID_SOCK_COMMON,	/* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */
    ARG_PTR_TO_PERCPU_BTF_ID,	/* pointer to in-kernel percpu type */
+ 	ARG_PTR_TO_FUNC,	/* pointer to a bpf program function */
+ 	ARG_PTR_TO_STACK_OR_NULL,	/* pointer to stack or NULL */
    __BPF_ARG_TYPE_MAX,
  };
@@@ -412,6 -424,8 +425,8 @@@ enum bpf_reg_type 
    PTR_TO_RDWR_BUF,	 /* reg points to a read/write buffer */
    PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
    PTR_TO_PERCPU_BTF_ID,	 /* reg points to a percpu kernel variable */
+ 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
+ 	PTR_TO_MAP_KEY,		 /* reg points to a map element key */
  };
/* The information passed from prog-specific *_is_valid_access
@@@ -507,6 -521,11 +522,11 @@@ enum bpf_cgroup_storage_type 
   */
  #define MAX_BPF_FUNC_ARGS 12
+ /* The maximum number of arguments passed through registers
+  * a single function may have.
+  */
+ #define MAX_BPF_FUNC_REG_ARGS 5
+ 
  struct btf_func_model {
    u8 ret_size;
    u8 nr_args;
@@@ -557,8 -576,7 +577,8 @@@ struct bpf_tramp_progs 
   *      fentry = a set of program to run before calling original function
   *      fexit = a set of program to run after original function
   */
 -int arch_prepare_bpf_trampoline(void *image, void *image_end,
 +struct bpf_tramp_image;
 +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
    			const struct btf_func_model *m, u32 flags,
    			struct bpf_tramp_progs *tprogs,
    			void *orig_call);
@@@ -567,8 -585,6 +587,8 @@@ u64 notrace __bpf_prog_enter(struct bpf
  void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
  u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
  void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
 +void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
 +void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
struct bpf_ksym {
    unsigned long		 start;
@@@ -587,18 -603,6 +607,18 @@@ enum bpf_tramp_prog_type 
    BPF_TRAMP_REPLACE, /* more than MAX */
  };
+struct bpf_tramp_image {
 +	void *image;
 +	struct bpf_ksym ksym;
 +	struct percpu_ref pcref;
 +	void *ip_after_call;
 +	void *ip_epilogue;
 +	union {
 +		struct rcu_head rcu;
 +		struct work_struct work;
 +	};
 +};
 +
  struct bpf_trampoline {
    /* hlist for trampoline_table */
    struct hlist_node hlist;
@@@ -621,8 -625,9 +641,8 @@@
    /* Number of attached programs. A counter per kind. */
    int progs_cnt[BPF_TRAMP_MAX];
    /* Executable image of trampoline */
 -	void *image;
 +	struct bpf_tramp_image *cur_image;
    u64 selector;
 -	struct bpf_ksym ksym;
  };
struct bpf_attach_target_info {
@@@ -706,8 -711,6 +726,8 @@@ void bpf_image_ksym_add(void *data, str
  void bpf_image_ksym_del(struct bpf_ksym *ksym);
  void bpf_ksym_add(struct bpf_ksym *ksym);
  void bpf_ksym_del(struct bpf_ksym *ksym);
 +int bpf_jit_charge_modmem(u32 pages);
 +void bpf_jit_uncharge_modmem(u32 pages);
  #else
  static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
    				   struct bpf_trampoline *tr)
@@@ -804,6 -807,7 +824,6 @@@ struct bpf_prog_aux 
    bool func_proto_unreliable;
    bool sleepable;
    bool tail_call_reachable;
 -	enum bpf_tramp_prog_type trampoline_prog_type;
    struct hlist_node tramp_hlist;
    /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
    const struct btf_type *attach_func_proto;
@@@ -1109,7 -1113,7 +1129,7 @@@ int bpf_prog_array_copy(struct bpf_prog
    	_ret;							\
     })
-#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null)	\
 +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage)	\
    ({						\
    	struct bpf_prog_array_item *_item;	\
    	struct bpf_prog *_prog;			\
@@@ -1122,8 -1126,7 +1142,8 @@@
    		goto _out;			\
    	_item = &_array->items[0];		\
    	while ((_prog = READ_ONCE(_item->prog))) {		\
 -			bpf_cgroup_storage_set(_item->cgroup_storage);	\
 +			if (set_cg_storage)		\
 +				bpf_cgroup_storage_set(_item->cgroup_storage);	\
    		_ret &= func(_prog, ctx);	\
    		_item++;			\
    	}					\
@@@ -1170,10 -1173,10 +1190,10 @@@ _out:							
    })
#define BPF_PROG_RUN_ARRAY(array, ctx, func)		\
 -	__BPF_PROG_RUN_ARRAY(array, ctx, func, false)
 +	__BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func)	\
 -	__BPF_PROG_RUN_ARRAY(array, ctx, func, true)
 +	__BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
#ifdef CONFIG_BPF_SYSCALL
  DECLARE_PER_CPU(int, bpf_prog_active);
@@@ -1397,6 -1400,10 +1417,10 @@@ void bpf_iter_map_show_fdinfo(const str
  int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux,
    			struct bpf_link_info *info);
+ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+ 				   struct bpf_func_state *caller,
+ 				   struct bpf_func_state *callee);
+ 
  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
  int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
  int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
@@@ -1446,9 -1453,9 +1470,9 @@@ struct btf *bpf_get_btf_vmlinux(void)
  /* Map specifics */
  struct xdp_buff;
  struct sk_buff;
+ struct bpf_dtab_netdev;
+ struct bpf_cpu_map_entry;
- struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
- struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
  void __dev_flush(void);
  int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
    	    struct net_device *dev_rx);
@@@ -1458,7 -1465,6 +1482,6 @@@ int dev_map_generic_redirect(struct bpf
    		     struct bpf_prog *xdp_prog);
  bool dev_map_can_have_prog(struct bpf_map *map);
- struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
  void __cpu_map_flush(void);
  int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
    	    struct net_device *dev_rx);
@@@ -1487,6 -1493,9 +1510,9 @@@ int bpf_prog_test_run_flow_dissector(st
  int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
    		     const union bpf_attr *kattr,
    		     union bpf_attr __user *uattr);
+ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+ 				const union bpf_attr *kattr,
+ 				union bpf_attr __user *uattr);
  bool btf_ctx_access(int off, int size, enum bpf_access_type type,
    	    const struct bpf_prog *prog,
    	    struct bpf_insn_access_aux *info);
@@@ -1516,6 -1525,7 +1542,7 @@@ struct bpf_prog *bpf_prog_by_id(u32 id)
  struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+ void bpf_task_storage_free(struct task_struct *task);
  #else /* !CONFIG_BPF_SYSCALL */
  static inline struct bpf_prog *bpf_prog_get(u32 ufd)
  {
@@@ -1585,17 -1595,6 +1612,6 @@@ static inline int bpf_obj_get_user(cons
    return -EOPNOTSUPP;
  }
- static inline struct net_device  *__dev_map_lookup_elem(struct bpf_map *map,
- 						       u32 key)
- {
- 	return NULL;
- }
- 
- static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map,
- 							     u32 key)
- {
- 	return NULL;
- }
  static inline bool dev_map_can_have_prog(struct bpf_map *map)
  {
    return false;
@@@ -1607,6 -1606,7 +1623,7 @@@ static inline void __dev_flush(void
struct xdp_buff;
  struct bpf_dtab_netdev;
+ struct bpf_cpu_map_entry;
static inline
  int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
@@@ -1631,12 -1631,6 +1648,6 @@@ static inline int dev_map_generic_redir
    return 0;
  }
- static inline
- struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
- {
- 	return NULL;
- }
- 
  static inline void __cpu_map_flush(void)
  {
  }
@@@ -1687,6 -1681,13 +1698,13 @@@ static inline int bpf_prog_test_run_flo
    return -ENOTSUPP;
  }
+ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog,
+ 					      const union bpf_attr *kattr,
+ 					      union bpf_attr __user *uattr)
+ {
+ 	return -ENOTSUPP;
+ }
+ 
  static inline void bpf_map_put(struct bpf_map *map)
  {
  }
@@@ -1701,6 -1702,10 +1719,10 @@@ bpf_base_func_proto(enum bpf_func_id fu
  {
    return NULL;
  }
+ 
+ static inline void bpf_task_storage_free(struct task_struct *task)
+ {
+ }
  #endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux,
@@@ -1785,22 -1790,24 +1807,24 @@@ static inline void bpf_map_offload_map_
  }
  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
- #if defined(CONFIG_BPF_STREAM_PARSER)
- int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog,
- 			 struct bpf_prog *old, u32 which);
+ #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
  int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog);
  int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype);
  int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags);
  void sock_map_unhash(struct sock *sk);
  void sock_map_close(struct sock *sk, long timeout);
+ 
+ void bpf_sk_reuseport_detach(struct sock *sk);
+ int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
+ 				       void *value);
+ int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
+ 				       void *value, u64 map_flags);
  #else
- static inline int sock_map_prog_update(struct bpf_map *map,
- 				       struct bpf_prog *prog,
- 				       struct bpf_prog *old, u32 which)
+ static inline void bpf_sk_reuseport_detach(struct sock *sk)
  {
- 	return -EOPNOTSUPP;
  }
+ #ifdef CONFIG_BPF_SYSCALL
  static inline int sock_map_get_from_fd(const union bpf_attr *attr,
    			       struct bpf_prog *prog)
  {
@@@ -1818,20 -1825,7 +1842,7 @@@ static inline int sock_map_update_elem_
  {
    return -EOPNOTSUPP;
  }
- #endif /* CONFIG_BPF_STREAM_PARSER */
- #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL)
- void bpf_sk_reuseport_detach(struct sock *sk);
- int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key,
- 				       void *value);
- int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key,
- 				       void *value, u64 map_flags);
- #else
- static inline void bpf_sk_reuseport_detach(struct sock *sk)
- {
- }
- 
- #ifdef CONFIG_BPF_SYSCALL
  static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map,
    					     void *key, void *value)
  {
@@@ -1903,6 -1897,9 +1914,9 @@@ extern const struct bpf_func_proto bpf_
  extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
  extern const struct bpf_func_proto bpf_sock_from_file_proto;
  extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
+ extern const struct bpf_func_proto bpf_task_storage_get_proto;
+ extern const struct bpf_func_proto bpf_task_storage_delete_proto;
+ extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
    enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --combined include/linux/mlx5/driver.h
index ab07f09f2bad,23bb01d7c9b9..3d146f1b2e62
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@@ -644,10 -644,14 +644,14 @@@ struct mlx5_td 
  };
struct mlx5e_resources {
- 	u32                        pdn;
- 	struct mlx5_td             td;
- 	struct mlx5_core_mkey      mkey;
- 	struct mlx5_sq_bfreg       bfreg;
+ 	struct mlx5e_hw_objs {
+ 		u32                        pdn;
+ 		struct mlx5_td             td;
+ 		struct mlx5_core_mkey      mkey;
+ 		struct mlx5_sq_bfreg       bfreg;
+ 	} hw_objs;
+ 	struct devlink_port dl_port;
+ 	struct net_device *uplink_netdev;
  };
enum mlx5_sw_icm_type {
@@@ -1226,7 -1230,7 +1230,7 @@@ enum 
    MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32,
  };
-static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev)
 +static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev)
  {
    struct devlink *devlink = priv_to_devlink(dev);
    union devlink_param_value val;
diff --combined include/linux/netdevice.h
index 87a5d186faff,7005ad80e8d1..da39991ecf8b
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -360,7 -360,6 +360,7 @@@ enum 
    NAPI_STATE_IN_BUSY_POLL,	/* sk_busy_loop() owns this NAPI */
    NAPI_STATE_PREFER_BUSY_POLL,	/* prefer busy-polling over softirq processing*/
    NAPI_STATE_THREADED,		/* The poll is performed inside its own thread*/
 +	NAPI_STATE_SCHED_THREADED,	/* Napi is currently scheduled in threaded mode */
  };
enum {
@@@ -373,7 -372,6 +373,7 @@@
    NAPIF_STATE_IN_BUSY_POLL	= BIT(NAPI_STATE_IN_BUSY_POLL),
    NAPIF_STATE_PREFER_BUSY_POLL	= BIT(NAPI_STATE_PREFER_BUSY_POLL),
    NAPIF_STATE_THREADED		= BIT(NAPI_STATE_THREADED),
 +	NAPIF_STATE_SCHED_THREADED	= BIT(NAPI_STATE_SCHED_THREADED),
  };
enum gro_result {
@@@ -756,6 -754,13 +756,13 @@@ struct rx_queue_attribute 
    		 const char *buf, size_t len);
  };
+ /* XPS map type and offset of the xps map within net_device->xps_maps[]. */
+ enum xps_map_type {
+ 	XPS_CPUS = 0,
+ 	XPS_RXQS,
+ 	XPS_MAPS_MAX,
+ };
+ 
  #ifdef CONFIG_XPS
  /*
   * This structure holds an XPS map which can be of variable length.  The
@@@ -773,9 -778,19 +780,19 @@@ struct xps_map
/*
   * This structure holds all XPS maps for device.  Maps are indexed by CPU.
+  *
+  * We keep track of the number of cpus/rxqs used when the struct is allocated,
+  * in nr_ids. This will help not accessing out-of-bound memory.
+  *
+  * We keep track of the number of traffic classes used when the struct is
+  * allocated, in num_tc. This will be used to navigate the maps, to ensure we're
+  * not crossing its upper bound, as the original dev->num_tc can be updated in
+  * the meantime.
   */
  struct xps_dev_maps {
    struct rcu_head rcu;
+ 	unsigned int nr_ids;
+ 	s16 num_tc;
    struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */
  };
@@@ -1520,6 -1535,8 +1537,8 @@@ struct net_device_ops 
   * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device
   * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device
   * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running
+  * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with
+  *	skb_headlen(skb) == 0 (data starts from frag0)
   */
  enum netdev_priv_flags {
    IFF_802_1Q_VLAN			= 1<<0,
@@@ -1553,6 -1570,7 +1572,7 @@@
    IFF_FAILOVER_SLAVE		= 1<<28,
    IFF_L3MDEV_RX_HANDLER		= 1<<29,
    IFF_LIVE_RENAME_OK		= 1<<30,
+ 	IFF_TX_SKB_NO_LINEAR		= 1<<31,
  };
#define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@@ -1579,12 -1597,14 +1599,14 @@@
  #define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
  #define IFF_TEAM			IFF_TEAM
  #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
+ #define IFF_PHONY_HEADROOM		IFF_PHONY_HEADROOM
  #define IFF_MACSEC			IFF_MACSEC
  #define IFF_NO_RX_HANDLER		IFF_NO_RX_HANDLER
  #define IFF_FAILOVER			IFF_FAILOVER
  #define IFF_FAILOVER_SLAVE		IFF_FAILOVER_SLAVE
  #define IFF_L3MDEV_RX_HANDLER		IFF_L3MDEV_RX_HANDLER
  #define IFF_LIVE_RENAME_OK		IFF_LIVE_RENAME_OK
+ #define IFF_TX_SKB_NO_LINEAR		IFF_TX_SKB_NO_LINEAR
/* Specifies the type of the struct net_device::ml_priv pointer */
  enum netdev_ml_priv_type {
@@@ -1760,8 -1780,7 +1782,7 @@@
   *	@tx_queue_len:		Max frames per queue allowed
   *	@tx_global_lock: 	XXX: need comments on this one
   *	@xdp_bulkq:		XDP device bulk queue
-  *	@xps_cpus_map:		all CPUs map for XPS device
-  *	@xps_rxqs_map:		all RXQs map for XPS device
+  *	@xps_maps:		all CPUs/RXQs maps for XPS device
   *
   *	@xps_maps:	XXX: need comments on this one
   *	@miniq_egress:		clsact qdisc specific data for
@@@ -1773,6 -1792,7 +1794,7 @@@
   *
   *	@proto_down_reason:	reason a netdev interface is held down
   *	@pcpu_refcnt:		Number of references to this device
+  *	@dev_refcnt:		Number of references to this device
   *	@todo_list:		Delayed register/unregister
   *	@link_watch_list:	XXX: need comments on this one
   *
@@@ -2057,8 -2077,7 +2079,7 @@@ struct net_device 
    struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
#ifdef CONFIG_XPS
- 	struct xps_dev_maps __rcu *xps_cpus_map;
- 	struct xps_dev_maps __rcu *xps_rxqs_map;
+ 	struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX];
  #endif
  #ifdef CONFIG_NET_CLS_ACT
    struct mini_Qdisc __rcu	*miniq_egress;
@@@ -2074,7 -2093,12 +2095,12 @@@
    u32                     proto_down_reason;
struct list_head	todo_list;
+ 
+ #ifdef CONFIG_PCPU_DEV_REFCNT
    int __percpu		*pcpu_refcnt;
+ #else
+ 	refcount_t		dev_refcnt;
+ #endif
struct list_head	link_watch_list;
@@@ -3423,6 -3447,24 +3449,24 @@@ netif_xmit_frozen_or_drv_stopped(const 
    return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN;
  }
+ /**
+  *	netdev_queue_set_dql_min_limit - set dql minimum limit
+  *	@dev_queue: pointer to transmit queue
+  *	@min_limit: dql minimum limit
+  *
+  * Forces xmit_more() to return true until the minimum threshold
+  * defined by @min_limit is reached (or until the tx queue is
+  * empty). Warning: to be use with care, misuse will impact the
+  * latency.
+  */
+ static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue,
+ 						  unsigned int min_limit)
+ {
+ #ifdef CONFIG_BQL
+ 	dev_queue->dql.min_limit = min_limit;
+ #endif
+ }
+ 
  /**
   *	netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
   *	@dev_queue: pointer to transmit queue
@@@ -3688,7 -3730,7 +3732,7 @@@ static inline void netif_wake_subqueue(
  int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
    		u16 index);
  int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
- 			  u16 index, bool is_rxqs_map);
+ 			  u16 index, enum xps_map_type type);
/**
   *	netif_attr_test_mask - Test a CPU or Rx queue set in a mask
@@@ -3783,7 -3825,7 +3827,7 @@@ static inline int netif_set_xps_queue(s
static inline int __netif_set_xps_queue(struct net_device *dev,
    				const unsigned long *mask,
- 					u16 index, bool is_rxqs_map)
+ 					u16 index, enum xps_map_type type)
  {
    return 0;
  }
@@@ -4026,7 -4068,11 +4070,11 @@@ void netdev_run_todo(void)
   */
  static inline void dev_put(struct net_device *dev)
  {
+ #ifdef CONFIG_PCPU_DEV_REFCNT
    this_cpu_dec(*dev->pcpu_refcnt);
+ #else
+ 	refcount_dec(&dev->dev_refcnt);
+ #endif
  }
/**
@@@ -4037,7 -4083,11 +4085,11 @@@
   */
  static inline void dev_hold(struct net_device *dev)
  {
+ #ifdef CONFIG_PCPU_DEV_REFCNT
    this_cpu_inc(*dev->pcpu_refcnt);
+ #else
+ 	refcount_inc(&dev->dev_refcnt);
+ #endif
  }
/* Carrier loss detection, dial on demand. The functions netif_carrier_on
@@@ -4172,7 -4222,7 +4224,7 @@@ static inline bool netif_oper_up(const 
   *
   * Check if device has not been removed from system.
   */
- static inline bool netif_device_present(struct net_device *dev)
+ static inline bool netif_device_present(const struct net_device *dev)
  {
    return test_bit(__LINK_STATE_PRESENT, &dev->state);
  }
@@@ -5287,6 -5337,9 +5339,9 @@@ do {								
  #define PTYPE_HASH_SIZE	(16)
  #define PTYPE_HASH_MASK	(PTYPE_HASH_SIZE - 1)
+ extern struct list_head ptype_all __read_mostly;
+ extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
+ 
  extern struct net_device *blackhole_netdev;
#endif	/* _LINUX_NETDEVICE_H */
diff --combined include/linux/skbuff.h
index f2c9ee71cb2c,ecc029674ae4..c8def85fcc22
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -285,7 -285,6 +285,7 @@@ struct nf_bridge_info 
  struct tc_skb_ext {
    __u32 chain;
    __u16 mru;
 +	bool post_ct;
  };
  #endif
@@@ -657,6 -656,7 +657,7 @@@ typedef unsigned char *sk_buff_data_t
   *	@protocol: Packet protocol from driver
   *	@destructor: Destruct function
   *	@tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue)
+  *	@_sk_redir: socket redirection information for skmsg
   *	@_nfct: Associated connection, if any (with nfctinfo bits)
   *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
   *	@skb_iif: ifindex of device we arrived on
@@@ -756,6 -756,9 +757,9 @@@ struct sk_buff 
    		void		(*destructor)(struct sk_buff *skb);
    	};
    	struct list_head	tcp_tsorted_anchor;
+ #ifdef CONFIG_NET_SOCK_MSG
+ 		unsigned long		_sk_redir;
+ #endif
    };
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@@ -1137,7 -1140,7 +1141,7 @@@ static inline bool skb_fclone_busy(cons
return skb->fclone == SKB_FCLONE_ORIG &&
           refcount_read(&fclones->fclone_ref) > 1 &&
- 	       fclones->skb2.sk == sk;
+ 	       READ_ONCE(fclones->skb2.sk) == sk;
  }
/**
@@@ -1289,10 -1292,10 +1293,10 @@@ __skb_set_sw_hash(struct sk_buff *skb, 
  void __skb_get_hash(struct sk_buff *skb);
  u32 __skb_get_hash_symmetric(const struct sk_buff *skb);
  u32 skb_get_poff(const struct sk_buff *skb);
- u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+ u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
    	   const struct flow_keys_basic *keys, int hlen);
  __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
- 			    void *data, int hlen_proto);
+ 			    const void *data, int hlen_proto);
static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
    				int thoff, u8 ip_proto)
@@@ -1311,9 -1314,8 +1315,8 @@@ bool bpf_flow_dissect(struct bpf_prog *
  bool __skb_flow_dissect(const struct net *net,
    		const struct sk_buff *skb,
    		struct flow_dissector *flow_dissector,
- 			void *target_container,
- 			void *data, __be16 proto, int nhoff, int hlen,
- 			unsigned int flags);
+ 			void *target_container, const void *data,
+ 			__be16 proto, int nhoff, int hlen, unsigned int flags);
static inline bool skb_flow_dissect(const struct sk_buff *skb,
    			    struct flow_dissector *flow_dissector,
@@@ -1335,9 -1337,9 +1338,9 @@@ static inline bool skb_flow_dissect_flo
  static inline bool
  skb_flow_dissect_flow_keys_basic(const struct net *net,
    			 const struct sk_buff *skb,
- 				 struct flow_keys_basic *flow, void *data,
- 				 __be16 proto, int nhoff, int hlen,
- 				 unsigned int flags)
+ 				 struct flow_keys_basic *flow,
+ 				 const void *data, __be16 proto,
+ 				 int nhoff, int hlen, unsigned int flags)
  {
    memset(flow, 0, sizeof(*flow));
    return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow,
@@@ -3675,14 -3677,13 +3678,13 @@@ __wsum skb_checksum(const struct sk_buf
    	    __wsum csum);
static inline void * __must_check
- __skb_header_pointer(const struct sk_buff *skb, int offset,
- 		     int len, void *data, int hlen, void *buffer)
+ __skb_header_pointer(const struct sk_buff *skb, int offset, int len,
+ 		     const void *data, int hlen, void *buffer)
  {
- 	if (hlen - offset >= len)
- 		return data + offset;
+ 	if (likely(hlen - offset >= len))
+ 		return (void *)data + offset;
- 	if (!skb ||
- 	    skb_copy_bits(skb, offset, buffer, len) < 0)
+ 	if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0))
    	return NULL;
return buffer;
diff --combined include/net/netfilter/nf_tables.h
index 5aaced6bf13e,67bc36f7f4fb..0cef5ad9768a
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@@ -1498,13 -1498,16 +1498,16 @@@ struct nft_trans_chain
struct nft_trans_table {
    bool				update;
- 	bool				enable;
+ 	u8				state;
+ 	u32				flags;
  };
#define nft_trans_table_update(trans)	\
    (((struct nft_trans_table *)trans->data)->update)
- #define nft_trans_table_enable(trans)	\
- 	(((struct nft_trans_table *)trans->data)->enable)
+ #define nft_trans_table_state(trans)	\
+ 	(((struct nft_trans_table *)trans->data)->state)
+ #define nft_trans_table_flags(trans)	\
+ 	(((struct nft_trans_table *)trans->data)->flags)
struct nft_trans_elem {
    struct nft_set			*set;
@@@ -1536,7 -1539,6 +1539,7 @@@ struct nft_trans_flowtable 
    struct nft_flowtable		*flowtable;
    bool				update;
    struct list_head		hook_list;
 +	u32				flags;
  };
#define nft_trans_flowtable(trans)	\
@@@ -1545,8 -1547,6 +1548,8 @@@
    (((struct nft_trans_flowtable *)trans->data)->update)
  #define nft_trans_flowtable_hooks(trans)	\
    (((struct nft_trans_flowtable *)trans->data)->hook_list)
 +#define nft_trans_flowtable_flags(trans)	\
 +	(((struct nft_trans_flowtable *)trans->data)->flags)
int __init nft_chain_filter_init(void);
  void nft_chain_filter_fini(void);
diff --combined include/net/nexthop.h
index a10a319d7eb2,ba94868a21d5..28145f714801
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@@ -40,6 -40,12 +40,12 @@@ struct nh_config
struct nlattr	*nh_grp;
    u16		nh_grp_type;
+ 	u16		nh_grp_res_num_buckets;
+ 	unsigned long	nh_grp_res_idle_timer;
+ 	unsigned long	nh_grp_res_unbalanced_timer;
+ 	bool		nh_grp_res_has_num_buckets;
+ 	bool		nh_grp_res_has_idle_timer;
+ 	bool		nh_grp_res_has_unbalanced_timer;
struct nlattr	*nh_encap;
    u16		nh_encap_type;
@@@ -63,6 -69,32 +69,32 @@@ struct nh_info 
    };
  };
+ struct nh_res_bucket {
+ 	struct nh_grp_entry __rcu *nh_entry;
+ 	atomic_long_t		used_time;
+ 	unsigned long		migrated_time;
+ 	bool			occupied;
+ 	u8			nh_flags;
+ };
+ 
+ struct nh_res_table {
+ 	struct net		*net;
+ 	u32			nhg_id;
+ 	struct delayed_work	upkeep_dw;
+ 
+ 	/* List of NHGEs that have too few buckets ("uw" for underweight).
+ 	 * Reclaimed buckets will be given to entries in this list.
+ 	 */
+ 	struct list_head	uw_nh_entries;
+ 	unsigned long		unbalanced_since;
+ 
+ 	u32			idle_timer;
+ 	u32			unbalanced_timer;
+ 
+ 	u16			num_nh_buckets;
+ 	struct nh_res_bucket	nh_buckets[];
+ };
+ 
  struct nh_grp_entry {
    struct nexthop	*nh;
    u8		weight;
@@@ -71,6 -103,13 +103,13 @@@
    	struct {
    		atomic_t	upper_bound;
    	} mpath;
+ 		struct {
+ 			/* Member on uw_nh_entries. */
+ 			struct list_head	uw_nh_entry;
+ 
+ 			u16			count_buckets;
+ 			u16			wants_buckets;
+ 		} res;
    };
struct list_head nh_list;
@@@ -80,9 -119,13 +119,13 @@@
  struct nh_group {
    struct nh_group		*spare; /* spare group for removals */
    u16			num_nh;
+ 	bool			is_multipath;
    bool			mpath;
+ 	bool			resilient;
    bool			fdb_nh;
    bool			has_v4;
+ 
+ 	struct nh_res_table __rcu *res_table;
    struct nh_grp_entry	nh_entries[];
  };
@@@ -112,11 -155,15 +155,15 @@@ struct nexthop 
  enum nexthop_event_type {
    NEXTHOP_EVENT_DEL,
    NEXTHOP_EVENT_REPLACE,
+ 	NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE,
+ 	NEXTHOP_EVENT_BUCKET_REPLACE,
  };
enum nh_notifier_info_type {
    NH_NOTIFIER_INFO_TYPE_SINGLE,
    NH_NOTIFIER_INFO_TYPE_GRP,
+ 	NH_NOTIFIER_INFO_TYPE_RES_TABLE,
+ 	NH_NOTIFIER_INFO_TYPE_RES_BUCKET,
  };
struct nh_notifier_single_info {
@@@ -143,6 -190,19 +190,19 @@@ struct nh_notifier_grp_info 
    struct nh_notifier_grp_entry_info nh_entries[];
  };
+ struct nh_notifier_res_bucket_info {
+ 	u16 bucket_index;
+ 	unsigned int idle_timer_ms;
+ 	bool force;
+ 	struct nh_notifier_single_info old_nh;
+ 	struct nh_notifier_single_info new_nh;
+ };
+ 
+ struct nh_notifier_res_table_info {
+ 	u16 num_nh_buckets;
+ 	struct nh_notifier_single_info nhs[];
+ };
+ 
  struct nh_notifier_info {
    struct net *net;
    struct netlink_ext_ack *extack;
@@@ -151,6 -211,8 +211,8 @@@
    union {
    	struct nh_notifier_single_info *nh;
    	struct nh_notifier_grp_info *nh_grp;
+ 		struct nh_notifier_res_table_info *nh_res_table;
+ 		struct nh_notifier_res_bucket_info *nh_res_bucket;
    };
  };
@@@ -158,6 -220,10 +220,10 @@@ int register_nexthop_notifier(struct ne
    		      struct netlink_ext_ack *extack);
  int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
  void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
+ void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
+ 				 bool offload, bool trap);
+ void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
+ 				     unsigned long *activity);
/* caller is holding rcu or rtnl; no reference taken to nexthop */
  struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
@@@ -212,7 -278,7 +278,7 @@@ static inline bool nexthop_is_multipath
    	struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- 		return nh_grp->mpath;
+ 		return nh_grp->is_multipath;
    }
    return false;
  }
@@@ -227,7 -293,7 +293,7 @@@ static inline unsigned int nexthop_num_
    	struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- 		if (nh_grp->mpath)
+ 		if (nh_grp->is_multipath)
    		rc = nh_grp->num_nh;
    }
@@@ -308,7 -374,7 +374,7 @@@ struct fib_nh_common *nexthop_fib_nhc(s
    	struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp);
- 		if (nh_grp->mpath) {
+ 		if (nh_grp->is_multipath) {
    		nh = nexthop_mpath_select(nh_grp, nhsel);
    		if (!nh)
    			return NULL;
@@@ -410,7 -476,6 +476,7 @@@ static inline struct fib_nh *fib_info_n
  int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
    	       struct netlink_ext_ack *extack);
+/* Caller should either hold rcu_read_lock(), or RTNL. */
  static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
  {
    struct nh_info *nhi;
@@@ -431,29 -496,6 +497,29 @@@
    return NULL;
  }
+/* Variant of nexthop_fib6_nh().
 + * Caller should either hold rcu_read_lock_bh(), or RTNL.
 + */
 +static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh)
 +{
 +	struct nh_info *nhi;
 +
 +	if (nh->is_group) {
 +		struct nh_group *nh_grp;
 +
 +		nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp);
 +		nh = nexthop_mpath_select(nh_grp, 0);
 +		if (!nh)
 +			return NULL;
 +	}
 +
 +	nhi = rcu_dereference_bh_rtnl(nh->nh_info);
 +	if (nhi->family == AF_INET6)
 +		return &nhi->fib6_nh;
 +
 +	return NULL;
 +}
 +
  static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
  {
    struct fib6_nh *fib6_nh;
diff --combined include/uapi/linux/bpf.h
index 4ba4ef0ff63a,2d3036e292a9..008edc1dc8c1
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@@ -93,7 -93,717 +93,717 @@@ union bpf_iter_link_info 
    } map;
  };
- /* BPF syscall commands, see bpf(2) man-page for details. */
+ /* BPF syscall commands, see bpf(2) man-page for more details. */
+ /**
+  * DOC: eBPF Syscall Preamble
+  *
+  * The operation to be performed by the **bpf**\ () system call is determined
+  * by the *cmd* argument. Each operation takes an accompanying argument,
+  * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+  * below). The size argument is the size of the union pointed to by *attr*.
+  */
+ /**
+  * DOC: eBPF Syscall Commands
+  *
+  * BPF_MAP_CREATE
+  *	Description
+  *		Create a map and return a file descriptor that refers to the
+  *		map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+  *		is automatically enabled for the new file descriptor.
+  *
+  *		Applying **close**\ (2) to the file descriptor returned by
+  *		**BPF_MAP_CREATE** will delete the map (but see NOTES).
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_MAP_LOOKUP_ELEM
+  *	Description
+  *		Look up an element with a given *key* in the map referred to
+  *		by the file descriptor *map_fd*.
+  *
+  *		The *flags* argument may be specified as one of the
+  *		following:
+  *
+  *		**BPF_F_LOCK**
+  *			Look up the value of a spin-locked map without
+  *			returning the lock. This must be specified if the
+  *			elements contain a spinlock.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_MAP_UPDATE_ELEM
+  *	Description
+  *		Create or update an element (key/value pair) in a specified map.
+  *
+  *		The *flags* argument should be specified as one of the
+  *		following:
+  *
+  *		**BPF_ANY**
+  *			Create a new element or update an existing element.
+  *		**BPF_NOEXIST**
+  *			Create a new element only if it did not exist.
+  *		**BPF_EXIST**
+  *			Update an existing element.
+  *		**BPF_F_LOCK**
+  *			Update a spin_lock-ed map element.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+  *		**E2BIG**, **EEXIST**, or **ENOENT**.
+  *
+  *		**E2BIG**
+  *			The number of elements in the map reached the
+  *			*max_entries* limit specified at map creation time.
+  *		**EEXIST**
+  *			If *flags* specifies **BPF_NOEXIST** and the element
+  *			with *key* already exists in the map.
+  *		**ENOENT**
+  *			If *flags* specifies **BPF_EXIST** and the element with
+  *			*key* does not exist in the map.
+  *
+  * BPF_MAP_DELETE_ELEM
+  *	Description
+  *		Look up and delete an element by key in a specified map.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_MAP_GET_NEXT_KEY
+  *	Description
+  *		Look up an element by key in a specified map and return the key
+  *		of the next element. Can be used to iterate over all elements
+  *		in the map.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  *		The following cases can be used to iterate over all elements of
+  *		the map:
+  *
+  *		* If *key* is not found, the operation returns zero and sets
+  *		  the *next_key* pointer to the key of the first element.
+  *		* If *key* is found, the operation returns zero and sets the
+  *		  *next_key* pointer to the key of the next element.
+  *		* If *key* is the last element, returns -1 and *errno* is set
+  *		  to **ENOENT**.
+  *
+  *		May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+  *		**EINVAL** on error.
+  *
+  * BPF_PROG_LOAD
+  *	Description
+  *		Verify and load an eBPF program, returning a new file
+  *		descriptor associated with the program.
+  *
+  *		Applying **close**\ (2) to the file descriptor returned by
+  *		**BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+  *
+  *		The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+  *		automatically enabled for the new file descriptor.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_OBJ_PIN
+  *	Description
+  *		Pin an eBPF program or map referred by the specified *bpf_fd*
+  *		to the provided *pathname* on the filesystem.
+  *
+  *		The *pathname* argument must not contain a dot (".").
+  *
+  *		On success, *pathname* retains a reference to the eBPF object,
+  *		preventing deallocation of the object when the original
+  *		*bpf_fd* is closed. This allow the eBPF object to live beyond
+  *		**close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+  *		process.
+  *
+  *		Applying **unlink**\ (2) or similar calls to the *pathname*
+  *		unpins the object from the filesystem, removing the reference.
+  *		If no other file descriptors or filesystem nodes refer to the
+  *		same object, it will be deallocated (see NOTES).
+  *
+  *		The filesystem type for the parent directory of *pathname* must
+  *		be **BPF_FS_MAGIC**.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_OBJ_GET
+  *	Description
+  *		Open a file descriptor for the eBPF object pinned to the
+  *		specified *pathname*.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_PROG_ATTACH
+  *	Description
+  *		Attach an eBPF program to a *target_fd* at the specified
+  *		*attach_type* hook.
+  *
+  *		The *attach_type* specifies the eBPF attachment point to
+  *		attach the program to, and must be one of *bpf_attach_type*
+  *		(see below).
+  *
+  *		The *attach_bpf_fd* must be a valid file descriptor for a
+  *		loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+  *		or sock_ops type corresponding to the specified *attach_type*.
+  *
+  *		The *target_fd* must be a valid file descriptor for a kernel
+  *		object which depends on the attach type of *attach_bpf_fd*:
+  *
+  *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+  *		**BPF_PROG_TYPE_CGROUP_SKB**,
+  *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+  *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+  *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+  *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+  *		**BPF_PROG_TYPE_SOCK_OPS**
+  *
+  *			Control Group v2 hierarchy with the eBPF controller
+  *			enabled. Requires the kernel to be compiled with
+  *			**CONFIG_CGROUP_BPF**.
+  *
+  *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+  *
+  *			Network namespace (eg /proc/self/ns/net).
+  *
+  *		**BPF_PROG_TYPE_LIRC_MODE2**
+  *
+  *			LIRC device path (eg /dev/lircN). Requires the kernel
+  *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+  *
+  *		**BPF_PROG_TYPE_SK_SKB**,
+  *		**BPF_PROG_TYPE_SK_MSG**
+  *
+  *			eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_PROG_DETACH
+  *	Description
+  *		Detach the eBPF program associated with the *target_fd* at the
+  *		hook specified by *attach_type*. The program must have been
+  *		previously attached using **BPF_PROG_ATTACH**.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_PROG_TEST_RUN
+  *	Description
+  *		Run the eBPF program associated with the *prog_fd* a *repeat*
+  *		number of times against a provided program context *ctx_in* and
+  *		data *data_in*, and return the modified program context
+  *		*ctx_out*, *data_out* (for example, packet data), result of the
+  *		execution *retval*, and *duration* of the test run.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  *		**ENOSPC**
+  *			Either *data_size_out* or *ctx_size_out* is too small.
+  *		**ENOTSUPP**
+  *			This command is not supported by the program type of
+  *			the program referred to by *prog_fd*.
+  *
+  * BPF_PROG_GET_NEXT_ID
+  *	Description
+  *		Fetch the next eBPF program currently loaded into the kernel.
+  *
+  *		Looks for the eBPF program with an id greater than *start_id*
+  *		and updates *next_id* on success. If no other eBPF programs
+  *		remain with ids higher than *start_id*, returns -1 and sets
+  *		*errno* to **ENOENT**.
+  *
+  *	Return
+  *		Returns zero on success. On error, or when no id remains, -1
+  *		is returned and *errno* is set appropriately.
+  *
+  * BPF_MAP_GET_NEXT_ID
+  *	Description
+  *		Fetch the next eBPF map currently loaded into the kernel.
+  *
+  *		Looks for the eBPF map with an id greater than *start_id*
+  *		and updates *next_id* on success. If no other eBPF maps
+  *		remain with ids higher than *start_id*, returns -1 and sets
+  *		*errno* to **ENOENT**.
+  *
+  *	Return
+  *		Returns zero on success. On error, or when no id remains, -1
+  *		is returned and *errno* is set appropriately.
+  *
+  * BPF_PROG_GET_FD_BY_ID
+  *	Description
+  *		Open a file descriptor for the eBPF program corresponding to
+  *		*prog_id*.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_MAP_GET_FD_BY_ID
+  *	Description
+  *		Open a file descriptor for the eBPF map corresponding to
+  *		*map_id*.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_OBJ_GET_INFO_BY_FD
+  *	Description
+  *		Obtain information about the eBPF object corresponding to
+  *		*bpf_fd*.
+  *
+  *		Populates up to *info_len* bytes of *info*, which will be in
+  *		one of the following formats depending on the eBPF object type
+  *		of *bpf_fd*:
+  *
+  *		* **struct bpf_prog_info**
+  *		* **struct bpf_map_info**
+  *		* **struct bpf_btf_info**
+  *		* **struct bpf_link_info**
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_PROG_QUERY
+  *	Description
+  *		Obtain information about eBPF programs associated with the
+  *		specified *attach_type* hook.
+  *
+  *		The *target_fd* must be a valid file descriptor for a kernel
+  *		object which depends on the attach type of *attach_bpf_fd*:
+  *
+  *		**BPF_PROG_TYPE_CGROUP_DEVICE**,
+  *		**BPF_PROG_TYPE_CGROUP_SKB**,
+  *		**BPF_PROG_TYPE_CGROUP_SOCK**,
+  *		**BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+  *		**BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+  *		**BPF_PROG_TYPE_CGROUP_SYSCTL**,
+  *		**BPF_PROG_TYPE_SOCK_OPS**
+  *
+  *			Control Group v2 hierarchy with the eBPF controller
+  *			enabled. Requires the kernel to be compiled with
+  *			**CONFIG_CGROUP_BPF**.
+  *
+  *		**BPF_PROG_TYPE_FLOW_DISSECTOR**
+  *
+  *			Network namespace (eg /proc/self/ns/net).
+  *
+  *		**BPF_PROG_TYPE_LIRC_MODE2**
+  *
+  *			LIRC device path (eg /dev/lircN). Requires the kernel
+  *			to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+  *
+  *		**BPF_PROG_QUERY** always fetches the number of programs
+  *		attached and the *attach_flags* which were used to attach those
+  *		programs. Additionally, if *prog_ids* is nonzero and the number
+  *		of attached programs is less than *prog_cnt*, populates
+  *		*prog_ids* with the eBPF program ids of the programs attached
+  *		at *target_fd*.
+  *
+  *		The following flags may alter the result:
+  *
+  *		**BPF_F_QUERY_EFFECTIVE**
+  *			Only return information regarding programs which are
+  *			currently effective at the specified *target_fd*.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_RAW_TRACEPOINT_OPEN
+  *	Description
+  *		Attach an eBPF program to a tracepoint *name* to access kernel
+  *		internal arguments of the tracepoint in their raw form.
+  *
+  *		The *prog_fd* must be a valid file descriptor associated with
+  *		a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+  *
+  *		No ABI guarantees are made about the content of tracepoint
+  *		arguments exposed to the corresponding eBPF program.
+  *
+  *		Applying **close**\ (2) to the file descriptor returned by
+  *		**BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_BTF_LOAD
+  *	Description
+  *		Verify and load BPF Type Format (BTF) metadata into the kernel,
+  *		returning a new file descriptor associated with the metadata.
+  *		BTF is described in more detail at
+  *		https://www.kernel.org/doc/html/latest/bpf/btf.html.
+  *
+  *		The *btf* parameter must point to valid memory providing
+  *		*btf_size* bytes of BTF binary metadata.
+  *
+  *		The returned file descriptor can be passed to other **bpf**\ ()
+  *		subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+  *		associate the BTF with those objects.
+  *
+  *		Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+  *		parameters to specify a *btf_log_buf*, *btf_log_size* and
+  *		*btf_log_level* which allow the kernel to return freeform log
+  *		output regarding the BTF verification process.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_BTF_GET_FD_BY_ID
+  *	Description
+  *		Open a file descriptor for the BPF Type Format (BTF)
+  *		corresponding to *btf_id*.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_TASK_FD_QUERY
+  *	Description
+  *		Obtain information about eBPF programs associated with the
+  *		target process identified by *pid* and *fd*.
+  *
+  *		If the *pid* and *fd* are associated with a tracepoint, kprobe
+  *		or uprobe perf event, then the *prog_id* and *fd_type* will
+  *		be populated with the eBPF program id and file descriptor type
+  *		of type **bpf_task_fd_type**. If associated with a kprobe or
+  *		uprobe, the  *probe_offset* and *probe_addr* will also be
+  *		populated. Optionally, if *buf* is provided, then up to
+  *		*buf_len* bytes of *buf* will be populated with the name of
+  *		the tracepoint, kprobe or uprobe.
+  *
+  *		The resulting *prog_id* may be introspected in deeper detail
+  *		using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+  *	Description
+  *		Look up an element with the given *key* in the map referred to
+  *		by the file descriptor *fd*, and if found, delete the element.
+  *
+  *		The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+  *		implement this command as a "pop" operation, deleting the top
+  *		element rather than one corresponding to *key*.
+  *		The *key* and *key_len* parameters should be zeroed when
+  *		issuing this operation for these map types.
+  *
+  *		This command is only valid for the following map types:
+  *		* **BPF_MAP_TYPE_QUEUE**
+  *		* **BPF_MAP_TYPE_STACK**
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_MAP_FREEZE
+  *	Description
+  *		Freeze the permissions of the specified map.
+  *
+  *		Write permissions may be frozen by passing zero *flags*.
+  *		Upon success, no future syscall invocations may alter the
+  *		map state of *map_fd*. Write operations from eBPF programs
+  *		are still possible for a frozen map.
+  *
+  *		Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_BTF_GET_NEXT_ID
+  *	Description
+  *		Fetch the next BPF Type Format (BTF) object currently loaded
+  *		into the kernel.
+  *
+  *		Looks for the BTF object with an id greater than *start_id*
+  *		and updates *next_id* on success. If no other BTF objects
+  *		remain with ids higher than *start_id*, returns -1 and sets
+  *		*errno* to **ENOENT**.
+  *
+  *	Return
+  *		Returns zero on success. On error, or when no id remains, -1
+  *		is returned and *errno* is set appropriately.
+  *
+  * BPF_MAP_LOOKUP_BATCH
+  *	Description
+  *		Iterate and fetch multiple elements in a map.
+  *
+  *		Two opaque values are used to manage batch operations,
+  *		*in_batch* and *out_batch*. Initially, *in_batch* must be set
+  *		to NULL to begin the batched operation. After each subsequent
+  *		**BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+  *		*out_batch* as the *in_batch* for the next operation to
+  *		continue iteration from the current point.
+  *
+  *		The *keys* and *values* are output parameters which must point
+  *		to memory large enough to hold *count* items based on the key
+  *		and value size of the map *map_fd*. The *keys* buffer must be
+  *		of *key_size* * *count*. The *values* buffer must be of
+  *		*value_size* * *count*.
+  *
+  *		The *elem_flags* argument may be specified as one of the
+  *		following:
+  *
+  *		**BPF_F_LOCK**
+  *			Look up the value of a spin-locked map without
+  *			returning the lock. This must be specified if the
+  *			elements contain a spinlock.
+  *
+  *		On success, *count* elements from the map are copied into the
+  *		user buffer, with the keys copied into *keys* and the values
+  *		copied into the corresponding indices in *values*.
+  *
+  *		If an error is returned and *errno* is not **EFAULT**, *count*
+  *		is set to the number of successfully processed elements.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  *		May set *errno* to **ENOSPC** to indicate that *keys* or
+  *		*values* is too small to dump an entire bucket during
+  *		iteration of a hash-based map type.
+  *
+  * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+  *	Description
+  *		Iterate and delete all elements in a map.
+  *
+  *		This operation has the same behavior as
+  *		**BPF_MAP_LOOKUP_BATCH** with two exceptions:
+  *
+  *		* Every element that is successfully returned is also deleted
+  *		  from the map. This is at least *count* elements. Note that
+  *		  *count* is both an input and an output parameter.
+  *		* Upon returning with *errno* set to **EFAULT**, up to
+  *		  *count* elements may be deleted without returning the keys
+  *		  and values of the deleted elements.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_MAP_UPDATE_BATCH
+  *	Description
+  *		Update multiple elements in a map by *key*.
+  *
+  *		The *keys* and *values* are input parameters which must point
+  *		to memory large enough to hold *count* items based on the key
+  *		and value size of the map *map_fd*. The *keys* buffer must be
+  *		of *key_size* * *count*. The *values* buffer must be of
+  *		*value_size* * *count*.
+  *
+  *		Each element specified in *keys* is sequentially updated to the
+  *		value in the corresponding index in *values*. The *in_batch*
+  *		and *out_batch* parameters are ignored and should be zeroed.
+  *
+  *		The *elem_flags* argument should be specified as one of the
+  *		following:
+  *
+  *		**BPF_ANY**
+  *			Create new elements or update a existing elements.
+  *		**BPF_NOEXIST**
+  *			Create new elements only if they do not exist.
+  *		**BPF_EXIST**
+  *			Update existing elements.
+  *		**BPF_F_LOCK**
+  *			Update spin_lock-ed map elements. This must be
+  *			specified if the map value contains a spinlock.
+  *
+  *		On success, *count* elements from the map are updated.
+  *
+  *		If an error is returned and *errno* is not **EFAULT**, *count*
+  *		is set to the number of successfully processed elements.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  *		May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+  *		**E2BIG**. **E2BIG** indicates that the number of elements in
+  *		the map reached the *max_entries* limit specified at map
+  *		creation time.
+  *
+  *		May set *errno* to one of the following error codes under
+  *		specific circumstances:
+  *
+  *		**EEXIST**
+  *			If *flags* specifies **BPF_NOEXIST** and the element
+  *			with *key* already exists in the map.
+  *		**ENOENT**
+  *			If *flags* specifies **BPF_EXIST** and the element with
+  *			*key* does not exist in the map.
+  *
+  * BPF_MAP_DELETE_BATCH
+  *	Description
+  *		Delete multiple elements in a map by *key*.
+  *
+  *		The *keys* parameter is an input parameter which must point
+  *		to memory large enough to hold *count* items based on the key
+  *		size of the map *map_fd*, that is, *key_size* * *count*.
+  *
+  *		Each element specified in *keys* is sequentially deleted. The
+  *		*in_batch*, *out_batch*, and *values* parameters are ignored
+  *		and should be zeroed.
+  *
+  *		The *elem_flags* argument may be specified as one of the
+  *		following:
+  *
+  *		**BPF_F_LOCK**
+  *			Look up the value of a spin-locked map without
+  *			returning the lock. This must be specified if the
+  *			elements contain a spinlock.
+  *
+  *		On success, *count* elements from the map are updated.
+  *
+  *		If an error is returned and *errno* is not **EFAULT**, *count*
+  *		is set to the number of successfully processed elements. If
+  *		*errno* is **EFAULT**, up to *count* elements may be been
+  *		deleted.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_LINK_CREATE
+  *	Description
+  *		Attach an eBPF program to a *target_fd* at the specified
+  *		*attach_type* hook and return a file descriptor handle for
+  *		managing the link.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_LINK_UPDATE
+  *	Description
+  *		Update the eBPF program in the specified *link_fd* to
+  *		*new_prog_fd*.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_LINK_GET_FD_BY_ID
+  *	Description
+  *		Open a file descriptor for the eBPF Link corresponding to
+  *		*link_id*.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_LINK_GET_NEXT_ID
+  *	Description
+  *		Fetch the next eBPF link currently loaded into the kernel.
+  *
+  *		Looks for the eBPF link with an id greater than *start_id*
+  *		and updates *next_id* on success. If no other eBPF links
+  *		remain with ids higher than *start_id*, returns -1 and sets
+  *		*errno* to **ENOENT**.
+  *
+  *	Return
+  *		Returns zero on success. On error, or when no id remains, -1
+  *		is returned and *errno* is set appropriately.
+  *
+  * BPF_ENABLE_STATS
+  *	Description
+  *		Enable eBPF runtime statistics gathering.
+  *
+  *		Runtime statistics gathering for the eBPF runtime is disabled
+  *		by default to minimize the corresponding performance overhead.
+  *		This command enables statistics globally.
+  *
+  *		Multiple programs may independently enable statistics.
+  *		After gathering the desired statistics, eBPF runtime statistics
+  *		may be disabled again by calling **close**\ (2) for the file
+  *		descriptor returned by this function. Statistics will only be
+  *		disabled system-wide when all outstanding file descriptors
+  *		returned by prior calls for this subcommand are closed.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_ITER_CREATE
+  *	Description
+  *		Create an iterator on top of the specified *link_fd* (as
+  *		previously created using **BPF_LINK_CREATE**) and return a
+  *		file descriptor that can be used to trigger the iteration.
+  *
+  *		If the resulting file descriptor is pinned to the filesystem
+  *		using  **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+  *		for that path will trigger the iterator to read kernel state
+  *		using the eBPF program attached to *link_fd*.
+  *
+  *	Return
+  *		A new file descriptor (a nonnegative integer), or -1 if an
+  *		error occurred (in which case, *errno* is set appropriately).
+  *
+  * BPF_LINK_DETACH
+  *	Description
+  *		Forcefully detach the specified *link_fd* from its
+  *		corresponding attachment point.
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * BPF_PROG_BIND_MAP
+  *	Description
+  *		Bind a map to the lifetime of an eBPF program.
+  *
+  *		The map identified by *map_fd* is bound to the program
+  *		identified by *prog_fd* and only released when *prog_fd* is
+  *		released. This may be used in cases where metadata should be
+  *		associated with a program which otherwise does not contain any
+  *		references to the map (for example, embedded in the eBPF
+  *		program instructions).
+  *
+  *	Return
+  *		Returns zero on success. On error, -1 is returned and *errno*
+  *		is set appropriately.
+  *
+  * NOTES
+  *	eBPF objects (maps and programs) can be shared between processes.
+  *
+  *	* After **fork**\ (2), the child inherits file descriptors
+  *	  referring to the same eBPF objects.
+  *	* File descriptors referring to eBPF objects can be transferred over
+  *	  **unix**\ (7) domain sockets.
+  *	* File descriptors referring to eBPF objects can be duplicated in the
+  *	  usual way, using **dup**\ (2) and similar calls.
+  *	* File descriptors referring to eBPF objects can be pinned to the
+  *	  filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+  *
+  *	An eBPF object is deallocated only after all file descriptors referring
+  *	to the object have been closed and no references remain pinned to the
+  *	filesystem or attached (for example, bound to a program or device).
+  */
  enum bpf_cmd {
    BPF_MAP_CREATE,
    BPF_MAP_LOOKUP_ELEM,
@@@ -393,6 -1103,15 +1103,15 @@@ enum bpf_link_type 
   *                   is struct/union.
   */
  #define BPF_PSEUDO_BTF_ID	3
+ /* insn[0].src_reg:  BPF_PSEUDO_FUNC
+  * insn[0].imm:      insn offset to the func
+  * insn[1].imm:      0
+  * insn[0].off:      0
+  * insn[1].off:      0
+  * ldimm64 rewrite:  address of the function
+  * verifier type:    PTR_TO_FUNC.
+  */
+ #define BPF_PSEUDO_FUNC		4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
   * offset to another bpf function
@@@ -720,7 -1439,7 +1439,7 @@@ union bpf_attr 
   * parsed and used to produce a manual page. The workflow is the following,
   * and requires the rst2man utility:
   *
-  *     $ ./scripts/bpf_helpers_doc.py \
+  *     $ ./scripts/bpf_doc.py \
   *             --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
   *     $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
   *     $ man /tmp/bpf-helpers.7
@@@ -1765,6 -2484,10 +2484,10 @@@
   *		  Use with ENCAP_L3/L4 flags to further specify the tunnel
   *		  type; *len* is the length of the inner MAC header.
   *
+  *		* **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+  *		  Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+  *		  L2 type as Ethernet.
+  *
   * 		A call to this helper is susceptible to change the underlying
   * 		packet buffer. Therefore, at load time, all checks on pointers
   * 		previously done by the verifier are invalidated and must be
@@@ -3850,7 -4573,7 +4573,7 @@@
   *
   * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
   *	Description
 - *		Check ctx packet size against exceeding MTU of net device (based
 + *		Check packet size against exceeding MTU of net device (based
   *		on *ifindex*).  This helper will likely be used in combination
   *		with helpers that adjust/change the packet size.
   *
@@@ -3867,14 -4590,6 +4590,14 @@@
   *		against the current net device.  This is practical if this isn't
   *		used prior to redirect.
   *
 + *		On input *mtu_len* must be a valid pointer, else verifier will
 + *		reject BPF program.  If the value *mtu_len* is initialized to
 + *		zero then the ctx packet size is use.  When value *mtu_len* is
 + *		provided as input this specify the L3 length that the MTU check
 + *		is done against. Remember XDP and TC length operate at L2, but
 + *		this value is L3 as this correlate to MTU and IP-header tot_len
 + *		values which are L3 (similar behavior as bpf_fib_lookup).
 + *
   *		The Linux kernel route table can configure MTUs on a more
   *		specific per route level, which is not provided by this helper.
   *		For route level MTU checks use the **bpf_fib_lookup**\ ()
@@@ -3899,9 -4614,11 +4622,9 @@@
   *
   *		On return *mtu_len* pointer contains the MTU value of the net
   *		device.  Remember the net device configured MTU is the L3 size,
 - *		which is returned here and XDP and TX length operate at L2.
 + *		which is returned here and XDP and TC length operate at L2.
   *		Helper take this into account for you, but remember when using
 - *		MTU value in your BPF-code.  On input *mtu_len* must be a valid
 - *		pointer and be initialized (to zero), else verifier will reject
 - *		BPF program.
 + *		MTU value in your BPF-code.
   *
   *	Return
   *		* 0 on success, and populate MTU value in *mtu_len* pointer.
@@@ -3915,6 -4632,34 +4638,34 @@@
   *		* **BPF_MTU_CHK_RET_FRAG_NEEDED**
   *		* **BPF_MTU_CHK_RET_SEGS_TOOBIG**
   *
+  * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+  *	Description
+  *		For each element in **map**, call **callback_fn** function with
+  *		**map**, **callback_ctx** and other map-specific parameters.
+  *		The **callback_fn** should be a static function and
+  *		the **callback_ctx** should be a pointer to the stack.
+  *		The **flags** is used to control certain aspects of the helper.
+  *		Currently, the **flags** must be 0.
+  *
+  *		The following are a list of supported map types and their
+  *		respective expected callback signatures:
+  *
+  *		BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+  *		BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+  *		BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+  *
+  *		long (*callback_fn)(struct bpf_map *map, const void *key, void *value, void *ctx);
+  *
+  *		For per_cpu maps, the map_value is the value on the cpu where the
+  *		bpf_prog is running.
+  *
+  *		If **callback_fn** return 0, the helper will continue to the next
+  *		element. If return value is 1, the helper will skip the rest of
+  *		elements and return. Other return values are not used now.
+  *
+  *	Return
+  *		The number of traversed map elements for success, **-EINVAL** for
+  *		invalid **flags**.
   */
  #define __BPF_FUNC_MAPPER(FN)		\
    FN(unspec),			\
@@@ -4081,6 -4826,7 +4832,7 @@@
    FN(ima_inode_hash),		\
    FN(sock_from_file),		\
    FN(check_mtu),			\
+ 	FN(for_each_map_elem),		\
    /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@@ -4174,6 -4920,7 +4926,7 @@@ enum 
    BPF_F_ADJ_ROOM_ENCAP_L4_GRE	= (1ULL << 3),
    BPF_F_ADJ_ROOM_ENCAP_L4_UDP	= (1ULL << 4),
    BPF_F_ADJ_ROOM_NO_CSUM_RESET	= (1ULL << 5),
+ 	BPF_F_ADJ_ROOM_ENCAP_L2_ETH	= (1ULL << 6),
  };
enum {
@@@ -5211,7 -5958,10 +5964,10 @@@ struct bpf_pidns_info
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
  struct bpf_sk_lookup {
- 	__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ 	union {
+ 		__bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ 		__u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+ 	};
__u32 family;		/* Protocol family (AF_INET, AF_INET6) */
    __u32 protocol;		/* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --combined init/Kconfig
index 5f5c776ef192,2c9cbd8e368c..5deae45b8d81
--- a/init/Kconfig
+++ b/init/Kconfig
@@@ -20,10 -20,10 +20,10 @@@ config CC_VERSION_TEX
        When the compiler is updated, Kconfig will be invoked.
- Ensure full rebuild when the compiler is updated
 -	    include/linux/kconfig.h contains this option in the comment line so
 -	    fixdep adds include/config/cc/version/text.h into the auto-generated
 -	    dependency. When the compiler is updated, syncconfig will touch it
 -	    and then every file will be rebuilt.
 +	    include/linux/compiler-version.h contains this option in the comment
 +	    line so fixdep adds include/config/cc/version/text.h into the
 +	    auto-generated dependency. When the compiler is updated, syncconfig
 +	    will touch it and then every file will be rebuilt.
config CC_IS_GCC
    def_bool $(success,test "$(cc-name)" = GCC)
@@@ -119,7 -119,8 +119,7 @@@ config INIT_ENV_ARG_LIMI
config COMPILE_TEST
    bool "Compile also drivers which will not load"
 -	depends on !UML && !S390
 -	default n
 +	depends on HAS_IOMEM
    help
      Some drivers can be compiled on a different platform than they are
      intended to be run on. Despite they cannot be loaded there (or even
@@@ -1708,6 -1709,7 +1708,7 @@@ config BPF_SYSCAL
    select BPF
    select IRQ_WORK
    select TASKS_TRACE_RCU
+ 	select NET_SOCK_MSG if INET
    default n
    help
      Enable the bpf() system call that allows to manipulate eBPF
diff --combined kernel/bpf/bpf_inode_storage.c
index b58b2efb9b43,da753721457c..2921ca39a93e
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@@ -109,7 -109,7 +109,7 @@@ static void *bpf_fd_inode_storage_looku
    fd = *(int *)key;
    f = fget_raw(fd);
    if (!f)
 -		return NULL;
 +		return ERR_PTR(-EBADF);
sdata = inode_storage_lookup(f->f_inode, map, true);
    fput(f);
@@@ -237,7 -237,7 +237,7 @@@ static void inode_storage_map_free(stru
smap = (struct bpf_local_storage_map *)map;
    bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
- 	bpf_local_storage_map_free(smap);
+ 	bpf_local_storage_map_free(smap, NULL);
  }
static int inode_storage_map_btf_id;
diff --combined kernel/bpf/verifier.c
index 44e4ec1640f1,f9096b049cd6..999bf36ffeb1
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -234,6 -234,12 +234,12 @@@ static bool bpf_pseudo_call(const struc
           insn->src_reg == BPF_PSEUDO_CALL;
  }
+ static bool bpf_pseudo_func(const struct bpf_insn *insn)
+ {
+ 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW) &&
+ 	       insn->src_reg == BPF_PSEUDO_FUNC;
+ }
+ 
  struct bpf_call_arg_meta {
    struct bpf_map *map_ptr;
    bool raw_mode;
@@@ -248,6 -254,7 +254,7 @@@
    u32 btf_id;
    struct btf *ret_btf;
    u32 ret_btf_id;
+ 	u32 subprogno;
  };
struct btf *btf_vmlinux;
@@@ -390,6 -397,24 +397,24 @@@ __printf(3, 4) static void verbose_linf
    env->prev_linfo = linfo;
  }
+ static void verbose_invalid_scalar(struct bpf_verifier_env *env,
+ 				   struct bpf_reg_state *reg,
+ 				   struct tnum *range, const char *ctx,
+ 				   const char *reg_name)
+ {
+ 	char tn_buf[48];
+ 
+ 	verbose(env, "At %s the register %s ", ctx, reg_name);
+ 	if (!tnum_is_unknown(reg->var_off)) {
+ 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ 		verbose(env, "has value %s", tn_buf);
+ 	} else {
+ 		verbose(env, "has unknown scalar value");
+ 	}
+ 	tnum_strn(tn_buf, sizeof(tn_buf), *range);
+ 	verbose(env, " should have been in %s\n", tn_buf);
+ }
+ 
  static bool type_is_pkt_pointer(enum bpf_reg_type type)
  {
    return type == PTR_TO_PACKET ||
@@@ -409,6 -434,7 +434,7 @@@ static bool reg_type_not_null(enum bpf_
    return type == PTR_TO_SOCKET ||
    	type == PTR_TO_TCP_SOCK ||
    	type == PTR_TO_MAP_VALUE ||
+ 		type == PTR_TO_MAP_KEY ||
    	type == PTR_TO_SOCK_COMMON;
  }
@@@ -451,7 -477,8 +477,8 @@@ static bool arg_type_may_be_null(enum b
           type == ARG_PTR_TO_MEM_OR_NULL ||
           type == ARG_PTR_TO_CTX_OR_NULL ||
           type == ARG_PTR_TO_SOCKET_OR_NULL ||
- 	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
+ 	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL ||
+ 	       type == ARG_PTR_TO_STACK_OR_NULL;
  }
/* Determine whether the function releases some resources allocated by another
@@@ -541,6 -568,8 +568,8 @@@ static const char * const reg_type_str[
    [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
    [PTR_TO_RDWR_BUF]	= "rdwr_buf",
    [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
+ 	[PTR_TO_FUNC]		= "func",
+ 	[PTR_TO_MAP_KEY]	= "map_key",
  };
static char slot_type_char[] = {
@@@ -612,6 -641,7 +641,7 @@@ static void print_verifier_state(struc
    		if (type_is_pkt_pointer(t))
    			verbose(env, ",r=%d", reg->range);
    		else if (t == CONST_PTR_TO_MAP ||
+ 				 t == PTR_TO_MAP_KEY ||
    			 t == PTR_TO_MAP_VALUE ||
    			 t == PTR_TO_MAP_VALUE_OR_NULL)
    			verbose(env, ",ks=%d,vs=%d",
@@@ -1519,7 -1549,7 +1549,7 @@@ static int add_subprog(struct bpf_verif
    }
    ret = find_subprog(env, off);
    if (ret >= 0)
- 		return 0;
+ 		return ret;
    if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
    	verbose(env, "too many subprograms\n");
    	return -E2BIG;
@@@ -1527,7 -1557,7 +1557,7 @@@
    env->subprog_info[env->subprog_cnt++].start = off;
    sort(env->subprog_info, env->subprog_cnt,
         sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
- 	return 0;
+ 	return env->subprog_cnt - 1;
  }
static int check_subprogs(struct bpf_verifier_env *env)
@@@ -1544,6 -1574,19 +1574,19 @@@
/* determine subprog starts. The end is one before the next starts */
    for (i = 0; i < insn_cnt; i++) {
+ 		if (bpf_pseudo_func(insn + i)) {
+ 			if (!env->bpf_capable) {
+ 				verbose(env,
+ 					"function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
+ 				return -EPERM;
+ 			}
+ 			ret = add_subprog(env, i + insn[i].imm + 1);
+ 			if (ret < 0)
+ 				return ret;
+ 			/* remember subprog */
+ 			insn[i + 1].imm = ret;
+ 			continue;
+ 		}
    	if (!bpf_pseudo_call(insn + i))
    		continue;
    	if (!env->bpf_capable) {
@@@ -2295,6 -2338,8 +2338,8 @@@ static bool is_spillable_regtype(enum b
    case PTR_TO_PERCPU_BTF_ID:
    case PTR_TO_MEM:
    case PTR_TO_MEM_OR_NULL:
+ 	case PTR_TO_FUNC:
+ 	case PTR_TO_MAP_KEY:
    	return true;
    default:
    	return false;
@@@ -2899,6 -2944,10 +2944,10 @@@ static int __check_mem_access(struct bp
reg = &cur_regs(env)[regno];
    switch (reg->type) {
+ 	case PTR_TO_MAP_KEY:
+ 		verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
+ 			mem_size, off, size);
+ 		break;
    case PTR_TO_MAP_VALUE:
    	verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
    		mem_size, off, size);
@@@ -3304,6 -3353,9 +3353,9 @@@ static int check_ptr_alignment(struct b
    case PTR_TO_FLOW_KEYS:
    	pointer_desc = "flow keys ";
    	break;
+ 	case PTR_TO_MAP_KEY:
+ 		pointer_desc = "key ";
+ 		break;
    case PTR_TO_MAP_VALUE:
    	pointer_desc = "value ";
    	break;
@@@ -3405,7 -3457,7 +3457,7 @@@ process_func
  continue_func:
    subprog_end = subprog[idx + 1].start;
    for (; i < subprog_end; i++) {
- 		if (!bpf_pseudo_call(insn + i))
+ 		if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
    		continue;
    	/* remember insn and function to return to */
    	ret_insn[frame] = i + 1;
@@@ -3842,7 -3894,19 +3894,19 @@@ static int check_mem_access(struct bpf_
    /* for access checks, reg->off is just part of off */
    off += reg->off;
- 	if (reg->type == PTR_TO_MAP_VALUE) {
+ 	if (reg->type == PTR_TO_MAP_KEY) {
+ 		if (t == BPF_WRITE) {
+ 			verbose(env, "write to change key R%d not allowed\n", regno);
+ 			return -EACCES;
+ 		}
+ 
+ 		err = check_mem_region_access(env, regno, off, size,
+ 					      reg->map_ptr->key_size, false);
+ 		if (err)
+ 			return err;
+ 		if (value_regno >= 0)
+ 			mark_reg_unknown(env, regs, value_regno);
+ 	} else if (reg->type == PTR_TO_MAP_VALUE) {
    	if (t == BPF_WRITE && value_regno >= 0 &&
    	    is_pointer_value(env, value_regno)) {
    		verbose(env, "R%d leaks addr into map\n", value_regno);
@@@ -4258,6 -4322,9 +4322,9 @@@ static int check_helper_mem_access(stru
    case PTR_TO_PACKET_META:
    	return check_packet_access(env, regno, reg->off, access_size,
    				   zero_size_allowed);
+ 	case PTR_TO_MAP_KEY:
+ 		return check_mem_region_access(env, regno, reg->off, access_size,
+ 					       reg->map_ptr->key_size, false);
    case PTR_TO_MAP_VALUE:
    	if (check_map_access_type(env, regno, reg->off, access_size,
    				  meta && meta->raw_mode ? BPF_WRITE :
@@@ -4474,6 -4541,7 +4541,7 @@@ static const struct bpf_reg_types map_k
    	PTR_TO_STACK,
    	PTR_TO_PACKET,
    	PTR_TO_PACKET_META,
+ 		PTR_TO_MAP_KEY,
    	PTR_TO_MAP_VALUE,
    },
  };
@@@ -4505,6 -4573,7 +4573,7 @@@ static const struct bpf_reg_types mem_t
    	PTR_TO_STACK,
    	PTR_TO_PACKET,
    	PTR_TO_PACKET_META,
+ 		PTR_TO_MAP_KEY,
    	PTR_TO_MAP_VALUE,
    	PTR_TO_MEM,
    	PTR_TO_RDONLY_BUF,
@@@ -4517,6 -4586,7 +4586,7 @@@ static const struct bpf_reg_types int_p
    	PTR_TO_STACK,
    	PTR_TO_PACKET,
    	PTR_TO_PACKET_META,
+ 		PTR_TO_MAP_KEY,
    	PTR_TO_MAP_VALUE,
    },
  };
@@@ -4529,6 -4599,8 +4599,8 @@@ static const struct bpf_reg_types const
  static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
  static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
  static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
+ static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
+ static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
    [ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@@ -4557,6 -4629,8 +4629,8 @@@
    [ARG_PTR_TO_INT]		= &int_ptr_types,
    [ARG_PTR_TO_LONG]		= &int_ptr_types,
    [ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
+ 	[ARG_PTR_TO_FUNC]		= &func_ptr_types,
+ 	[ARG_PTR_TO_STACK_OR_NULL]	= &stack_ptr_types,
  };
static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@@ -4738,6 -4812,8 +4812,8 @@@ skip_type_check
    		verbose(env, "verifier internal error\n");
    		return -EFAULT;
    	}
+ 	} else if (arg_type == ARG_PTR_TO_FUNC) {
+ 		meta->subprogno = reg->subprogno;
    } else if (arg_type_is_mem_ptr(arg_type)) {
    	/* The access to this pointer is only checked when we hit the
    	 * next is_mem_size argument below.
@@@ -5258,13 -5334,19 +5334,19 @@@ static void clear_caller_saved_regs(str
    }
  }
- static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
- 			   int *insn_idx)
+ typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
+ 				   struct bpf_func_state *caller,
+ 				   struct bpf_func_state *callee,
+ 				   int insn_idx);
+ 
+ static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ 			     int *insn_idx, int subprog,
+ 			     set_callee_state_fn set_callee_state_cb)
  {
    struct bpf_verifier_state *state = env->cur_state;
    struct bpf_func_info_aux *func_info_aux;
    struct bpf_func_state *caller, *callee;
- 	int i, err, subprog, target_insn;
+ 	int err;
    bool is_global = false;
if (state->curframe + 1 >= MAX_CALL_FRAMES) {
@@@ -5273,14 -5355,6 +5355,6 @@@
    	return -E2BIG;
    }
- 	target_insn = *insn_idx + insn->imm;
- 	subprog = find_subprog(env, target_insn + 1);
- 	if (subprog < 0) {
- 		verbose(env, "verifier bug. No program starts at insn %d\n",
- 			target_insn + 1);
- 		return -EFAULT;
- 	}
- 
    caller = state->frame[state->curframe];
    if (state->frame[state->curframe + 1]) {
    	verbose(env, "verifier bug. Frame %d already allocated\n",
@@@ -5335,11 -5409,9 +5409,9 @@@
    if (err)
    	return err;
- 	/* copy r1 - r5 args that callee can access.  The copy includes parent
- 	 * pointers, which connects us up to the liveness chain
- 	 */
- 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
- 		callee->regs[i] = caller->regs[i];
+ 	err = set_callee_state_cb(env, caller, callee, *insn_idx);
+ 	if (err)
+ 		return err;
clear_caller_saved_regs(env, caller->regs);
@@@ -5347,7 -5419,7 +5419,7 @@@
    state->curframe++;
/* and go analyze first insn of the callee */
- 	*insn_idx = target_insn;
+ 	*insn_idx = env->subprog_info[subprog].start - 1;
if (env->log.level & BPF_LOG_LEVEL) {
    	verbose(env, "caller:\n");
@@@ -5358,6 -5430,92 +5430,92 @@@
    return 0;
  }
+ int map_set_for_each_callback_args(struct bpf_verifier_env *env,
+ 				   struct bpf_func_state *caller,
+ 				   struct bpf_func_state *callee)
+ {
+ 	/* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
+ 	 *      void *callback_ctx, u64 flags);
+ 	 * callback_fn(struct bpf_map *map, void *key, void *value,
+ 	 *      void *callback_ctx);
+ 	 */
+ 	callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
+ 
+ 	callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
+ 	__mark_reg_known_zero(&callee->regs[BPF_REG_2]);
+ 	callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
+ 
+ 	callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
+ 	__mark_reg_known_zero(&callee->regs[BPF_REG_3]);
+ 	callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
+ 
+ 	/* pointer to stack or null */
+ 	callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
+ 
+ 	/* unused */
+ 	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+ 	return 0;
+ }
+ 
+ static int set_callee_state(struct bpf_verifier_env *env,
+ 			    struct bpf_func_state *caller,
+ 			    struct bpf_func_state *callee, int insn_idx)
+ {
+ 	int i;
+ 
+ 	/* copy r1 - r5 args that callee can access.  The copy includes parent
+ 	 * pointers, which connects us up to the liveness chain
+ 	 */
+ 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+ 		callee->regs[i] = caller->regs[i];
+ 	return 0;
+ }
+ 
+ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ 			   int *insn_idx)
+ {
+ 	int subprog, target_insn;
+ 
+ 	target_insn = *insn_idx + insn->imm + 1;
+ 	subprog = find_subprog(env, target_insn);
+ 	if (subprog < 0) {
+ 		verbose(env, "verifier bug. No program starts at insn %d\n",
+ 			target_insn);
+ 		return -EFAULT;
+ 	}
+ 
+ 	return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
+ }
+ 
+ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
+ 				       struct bpf_func_state *caller,
+ 				       struct bpf_func_state *callee,
+ 				       int insn_idx)
+ {
+ 	struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
+ 	struct bpf_map *map;
+ 	int err;
+ 
+ 	if (bpf_map_ptr_poisoned(insn_aux)) {
+ 		verbose(env, "tail_call abusing map_ptr\n");
+ 		return -EINVAL;
+ 	}
+ 
+ 	map = BPF_MAP_PTR(insn_aux->map_ptr_state);
+ 	if (!map->ops->map_set_for_each_callback_args ||
+ 	    !map->ops->map_for_each_callback) {
+ 		verbose(env, "callback function not allowed for map\n");
+ 		return -ENOTSUPP;
+ 	}
+ 
+ 	err = map->ops->map_set_for_each_callback_args(env, caller, callee);
+ 	if (err)
+ 		return err;
+ 
+ 	callee->in_callback_fn = true;
+ 	return 0;
+ }
+ 
  static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
  {
    struct bpf_verifier_state *state = env->cur_state;
@@@ -5380,8 -5538,22 +5538,22 @@@
state->curframe--;
    caller = state->frame[state->curframe];
- 	/* return to the caller whatever r0 had in the callee */
- 	caller->regs[BPF_REG_0] = *r0;
+ 	if (callee->in_callback_fn) {
+ 		/* enforce R0 return value range [0, 1]. */
+ 		struct tnum range = tnum_range(0, 1);
+ 
+ 		if (r0->type != SCALAR_VALUE) {
+ 			verbose(env, "R0 not a scalar value\n");
+ 			return -EACCES;
+ 		}
+ 		if (!tnum_in(range, r0->var_off)) {
+ 			verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
+ 			return -EINVAL;
+ 		}
+ 	} else {
+ 		/* return to the caller whatever r0 had in the callee */
+ 		caller->regs[BPF_REG_0] = *r0;
+ 	}
/* Transfer references to the caller */
    err = transfer_reference_state(caller, callee);
@@@ -5436,7 -5608,9 +5608,9 @@@ record_func_map(struct bpf_verifier_en
        func_id != BPF_FUNC_map_delete_elem &&
        func_id != BPF_FUNC_map_push_elem &&
        func_id != BPF_FUNC_map_pop_elem &&
- 	    func_id != BPF_FUNC_map_peek_elem)
+ 	    func_id != BPF_FUNC_map_peek_elem &&
+ 	    func_id != BPF_FUNC_for_each_map_elem &&
+ 	    func_id != BPF_FUNC_redirect_map)
    	return 0;
if (map == NULL) {
@@@ -5517,15 -5691,18 +5691,18 @@@ static int check_reference_leak(struct 
    return state->acquired_refs ? -EINVAL : 0;
  }
- static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
+ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ 			     int *insn_idx_p)
  {
    const struct bpf_func_proto *fn = NULL;
    struct bpf_reg_state *regs;
    struct bpf_call_arg_meta meta;
+ 	int insn_idx = *insn_idx_p;
    bool changes_data;
- 	int i, err;
+ 	int i, err, func_id;
/* find function prototype */
+ 	func_id = insn->imm;
    if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
    	verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
    		func_id);
@@@ -5571,7 -5748,7 +5748,7 @@@
meta.func_id = func_id;
    /* check args */
- 	for (i = 0; i < 5; i++) {
+ 	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
    	err = check_func_arg(env, i, &meta, fn);
    	if (err)
    		return err;
@@@ -5621,6 -5798,13 +5798,13 @@@
    	return -EINVAL;
    }
+ 	if (func_id == BPF_FUNC_for_each_map_elem) {
+ 		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+ 					set_map_elem_callback_state);
+ 		if (err < 0)
+ 			return -EINVAL;
+ 	}
+ 
    /* reset caller saved regs */
    for (i = 0; i < CALLER_SAVED_REGS; i++) {
    	mark_reg_not_init(env, regs, caller_saved[i]);
@@@ -5861,14 -6045,10 +6045,14 @@@ static int retrieve_ptr_limit(const str
  {
    bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
    		    (opcode == BPF_SUB && !off_is_neg);
 -	u32 off;
 +	u32 off, max;
switch (ptr_reg->type) {
    case PTR_TO_STACK:
 +		/* Offset 0 is out-of-bounds, but acceptable start for the
 +		 * left direction, see BPF_REG_FP.
 +		 */
 +		max = MAX_BPF_STACK + mask_to_left;
    	/* Indirect variable offset stack access is prohibited in
    	 * unprivileged mode so it's not handled here.
    	 */
@@@ -5876,17 -6056,29 +6060,30 @@@
    	if (mask_to_left)
    		*ptr_limit = MAX_BPF_STACK + off;
    	else
 -			*ptr_limit = -off;
 -		return 0;
 +			*ptr_limit = -off - 1;
 +		return *ptr_limit >= max ? -ERANGE : 0;
+ 	case PTR_TO_MAP_KEY:
+ 		/* Currently, this code is not exercised as the only use
+ 		 * is bpf_for_each_map_elem() helper which requires
+ 		 * bpf_capble. The code has been tested manually for
+ 		 * future use.
+ 		 */
+ 		if (mask_to_left) {
+ 			*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
+ 		} else {
+ 			off = ptr_reg->smin_value + ptr_reg->off;
+ 			*ptr_limit = ptr_reg->map_ptr->key_size - off;
+ 		}
+ 		return 0;
    case PTR_TO_MAP_VALUE:
 +		max = ptr_reg->map_ptr->value_size;
    	if (mask_to_left) {
    		*ptr_limit = ptr_reg->umax_value + ptr_reg->off;
    	} else {
    		off = ptr_reg->smin_value + ptr_reg->off;
 -			*ptr_limit = ptr_reg->map_ptr->value_size - off;
 +			*ptr_limit = ptr_reg->map_ptr->value_size - off - 1;
    	}
 -		return 0;
 +		return *ptr_limit >= max ? -ERANGE : 0;
    default:
    	return -EINVAL;
    }
@@@ -5909,7 -6101,7 +6106,7 @@@ static int update_alu_sanitation_state(
         aux->alu_limit != alu_limit))
    	return -EACCES;
- 	/* Corresponding fixup done in fixup_bpf_calls(). */
+ 	/* Corresponding fixup done in do_misc_fixups(). */
    aux->alu_state = alu_state;
    aux->alu_limit = alu_limit;
    return 0;
@@@ -5939,7 -6131,6 +6136,7 @@@ static int sanitize_ptr_alu(struct bpf_
    u32 alu_state, alu_limit;
    struct bpf_reg_state tmp;
    bool ret;
 +	int err;
if (can_skip_alu_sanitation(env, insn))
    	return 0;
@@@ -5955,13 -6146,10 +6152,13 @@@
    alu_state |= ptr_is_dst_reg ?
    	     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
-	if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
 -		return 0;
 -	if (update_alu_sanitation_state(aux, alu_state, alu_limit))
 -		return -EACCES;
 +	err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg);
 +	if (err < 0)
 +		return err;
 +
 +	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
 +	if (err < 0)
 +		return err;
  do_sim:
    /* Simulate and find potential out-of-bounds access under
     * speculative execution from truncation as a result of
@@@ -6084,6 -6272,7 +6281,7 @@@ static int adjust_ptr_min_max_vals(stru
    	verbose(env, "R%d pointer arithmetic on %s prohibited\n",
    		dst, reg_type_str[ptr_reg->type]);
    	return -EACCES;
+ 	case PTR_TO_MAP_KEY:
    case PTR_TO_MAP_VALUE:
    	if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) {
    		verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n",
@@@ -6112,7 -6301,7 +6310,7 @@@
    case BPF_ADD:
    	ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
    	if (ret < 0) {
 -			verbose(env, "R%d tried to add from different maps or paths\n", dst);
 +			verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst);
    		return ret;
    	}
    	/* We can take a fixed offset as long as it doesn't overflow
@@@ -6167,7 -6356,7 +6365,7 @@@
    case BPF_SUB:
    	ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
    	if (ret < 0) {
 -			verbose(env, "R%d tried to sub from different maps or paths\n", dst);
 +			verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst);
    		return ret;
    	}
    	if (dst_reg == off_reg) {
@@@ -8263,6 -8452,24 +8461,24 @@@ static int check_ld_imm(struct bpf_veri
    	return 0;
    }
+ 	if (insn->src_reg == BPF_PSEUDO_FUNC) {
+ 		struct bpf_prog_aux *aux = env->prog->aux;
+ 		u32 subprogno = insn[1].imm;
+ 
+ 		if (!aux->func_info) {
+ 			verbose(env, "missing btf func_info\n");
+ 			return -EINVAL;
+ 		}
+ 		if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
+ 			verbose(env, "callback function not static\n");
+ 			return -EINVAL;
+ 		}
+ 
+ 		dst_reg->type = PTR_TO_FUNC;
+ 		dst_reg->subprogno = subprogno;
+ 		return 0;
+ 	}
+ 
    map = env->used_maps[aux->map_index];
    mark_reg_known_zero(env, regs, insn->dst_reg);
    dst_reg->map_ptr = map;
@@@ -8491,17 -8698,7 +8707,7 @@@ static int check_return_code(struct bpf
    }
if (!tnum_in(range, reg->var_off)) {
- 		char tn_buf[48];
- 
- 		verbose(env, "At program exit the register R0 ");
- 		if (!tnum_is_unknown(reg->var_off)) {
- 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
- 			verbose(env, "has value %s", tn_buf);
- 		} else {
- 			verbose(env, "has unknown scalar value");
- 		}
- 		tnum_strn(tn_buf, sizeof(tn_buf), range);
- 		verbose(env, " should have been in %s\n", tn_buf);
+ 		verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
    	return -EINVAL;
    }
@@@ -8628,6 -8825,27 +8834,27 @@@ static int push_insn(int t, int w, int 
    return DONE_EXPLORING;
  }
+ static int visit_func_call_insn(int t, int insn_cnt,
+ 				struct bpf_insn *insns,
+ 				struct bpf_verifier_env *env,
+ 				bool visit_callee)
+ {
+ 	int ret;
+ 
+ 	ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
+ 	if (ret)
+ 		return ret;
+ 
+ 	if (t + 1 < insn_cnt)
+ 		init_explored_state(env, t + 1);
+ 	if (visit_callee) {
+ 		init_explored_state(env, t);
+ 		ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
+ 				env, false);
+ 	}
+ 	return ret;
+ }
+ 
  /* Visits the instruction at index t and returns one of the following:
   *  < 0 - an error occurred
   *  DONE_EXPLORING - the instruction was fully explored
@@@ -8638,6 -8856,9 +8865,9 @@@ static int visit_insn(int t, int insn_c
    struct bpf_insn *insns = env->prog->insnsi;
    int ret;
+ 	if (bpf_pseudo_func(insns + t))
+ 		return visit_func_call_insn(t, insn_cnt, insns, env, true);
+ 
    /* All non-branch instructions have a single fall-through edge. */
    if (BPF_CLASS(insns[t].code) != BPF_JMP &&
        BPF_CLASS(insns[t].code) != BPF_JMP32)
@@@ -8648,18 -8869,8 +8878,8 @@@
    	return DONE_EXPLORING;
case BPF_CALL:
- 		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
- 		if (ret)
- 			return ret;
- 
- 		if (t + 1 < insn_cnt)
- 			init_explored_state(env, t + 1);
- 		if (insns[t].src_reg == BPF_PSEUDO_CALL) {
- 			init_explored_state(env, t);
- 			ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
- 					env, false);
- 		}
- 		return ret;
+ 		return visit_func_call_insn(t, insn_cnt, insns, env,
+ 					    insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA:
    	if (BPF_SRC(insns[t].code) != BPF_K)
@@@ -9065,10 -9276,6 +9285,10 @@@ static int check_btf_info(struct bpf_ve
    btf = btf_get_by_fd(attr->prog_btf_fd);
    if (IS_ERR(btf))
    	return PTR_ERR(btf);
 +	if (btf_is_kernel(btf)) {
 +		btf_put(btf);
 +		return -EACCES;
 +	}
    env->prog->aux->btf = btf;
err = check_btf_func(env, attr, uattr);
@@@ -9272,6 -9479,7 +9492,7 @@@ static bool regsafe(struct bpf_reg_stat
    		 */
    		return false;
    	}
+ 	case PTR_TO_MAP_KEY:
    case PTR_TO_MAP_VALUE:
    	/* If the new min/max/var_off satisfy the old ones and
    	 * everything else matches, we are OK.
@@@ -10118,10 -10326,9 +10339,9 @@@ static int do_check(struct bpf_verifier
    			if (insn->src_reg == BPF_PSEUDO_CALL)
    				err = check_func_call(env, insn, &env->insn_idx);
    			else
- 					err = check_helper_call(env, insn->imm, env->insn_idx);
+ 					err = check_helper_call(env, insn, &env->insn_idx);
    			if (err)
    				return err;
- 
    		} else if (opcode == BPF_JA) {
    			if (BPF_SRC(insn->code) != BPF_K ||
    			    insn->imm != 0 ||
@@@ -10550,6 -10757,12 +10770,12 @@@ static int resolve_pseudo_ldimm64(struc
    			goto next_insn;
    		}
+ 			if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
+ 				aux = &env->insn_aux_data[i];
+ 				aux->ptr_type = PTR_TO_FUNC;
+ 				goto next_insn;
+ 			}
+ 
    		/* In final convert_pseudo_ld_imm64() step, this is
    		 * converted into regular 64-bit imm load insn.
    		 */
@@@ -10682,9 -10895,13 +10908,13 @@@ static void convert_pseudo_ld_imm64(str
    int insn_cnt = env->prog->len;
    int i;
- 	for (i = 0; i < insn_cnt; i++, insn++)
- 		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
- 			insn->src_reg = 0;
+ 	for (i = 0; i < insn_cnt; i++, insn++) {
+ 		if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
+ 			continue;
+ 		if (insn->src_reg == BPF_PSEUDO_FUNC)
+ 			continue;
+ 		insn->src_reg = 0;
+ 	}
  }
/* single env->prog->insni[off] instruction was replaced with the range
@@@ -11323,6 -11540,12 +11553,12 @@@ static int jit_subprogs(struct bpf_veri
    	return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ 		if (bpf_pseudo_func(insn)) {
+ 			env->insn_aux_data[i].call_imm = insn->imm;
+ 			/* subprog is encoded in insn[1].imm */
+ 			continue;
+ 		}
+ 
    	if (!bpf_pseudo_call(insn))
    		continue;
    	/* Upon error here we cannot fall back to interpreter but
@@@ -11452,6 -11675,12 +11688,12 @@@
    for (i = 0; i < env->subprog_cnt; i++) {
    	insn = func[i]->insnsi;
    	for (j = 0; j < func[i]->len; j++, insn++) {
+ 			if (bpf_pseudo_func(insn)) {
+ 				subprog = insn[1].imm;
+ 				insn[0].imm = (u32)(long)func[subprog]->bpf_func;
+ 				insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
+ 				continue;
+ 			}
    		if (!bpf_pseudo_call(insn))
    			continue;
    		subprog = insn->off;
@@@ -11497,6 -11726,11 +11739,11 @@@
     * later look the same as if they were interpreted only.
     */
    for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ 		if (bpf_pseudo_func(insn)) {
+ 			insn[0].imm = env->insn_aux_data[i].call_imm;
+ 			insn[1].imm = find_subprog(env, i + insn[0].imm + 1);
+ 			continue;
+ 		}
    	if (!bpf_pseudo_call(insn))
    		continue;
    	insn->off = env->insn_aux_data[i].call_imm;
@@@ -11561,6 -11795,14 +11808,14 @@@ static int fixup_call_args(struct bpf_v
    	return -EINVAL;
    }
    for (i = 0; i < prog->len; i++, insn++) {
+ 		if (bpf_pseudo_func(insn)) {
+ 			/* When JIT fails the progs with callback calls
+ 			 * have to be rejected, since interpreter doesn't support them yet.
+ 			 */
+ 			verbose(env, "callbacks are not allowed in non-JITed programs\n");
+ 			return -EINVAL;
+ 		}
+ 
    	if (!bpf_pseudo_call(insn))
    		continue;
    	depth = get_callee_stack_depth(env, insn, i);
@@@ -11573,12 -11815,10 +11828,10 @@@
    return err;
  }
- /* fixup insn->imm field of bpf_call instructions
-  * and inline eligible helpers as explicit sequence of BPF instructions
-  *
-  * this function is called after eBPF program passed verification
+ /* Do various post-verification rewrites in a single program pass.
+  * These rewrites simplify JIT and interpreter implementations.
   */
- static int fixup_bpf_calls(struct bpf_verifier_env *env)
+ static int do_misc_fixups(struct bpf_verifier_env *env)
  {
    struct bpf_prog *prog = env->prog;
    bool expect_blinding = bpf_jit_blinding_enabled(prog);
@@@ -11593,6 -11833,7 +11846,7 @@@
    int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) {
+ 		/* Make divide-by-zero exceptions impossible. */
    	if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
    	    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
    	    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
@@@ -11633,6 -11874,7 +11887,7 @@@
    		continue;
    	}
+ 		/* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
    	if (BPF_CLASS(insn->code) == BPF_LD &&
    	    (BPF_MODE(insn->code) == BPF_ABS ||
    	     BPF_MODE(insn->code) == BPF_IND)) {
@@@ -11652,6 -11894,7 +11907,7 @@@
    		continue;
    	}
+ 		/* Rewrite pointer arithmetic to mitigate speculation attacks. */
    	if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
    	    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
    		const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
@@@ -11673,7 -11916,7 +11929,7 @@@
    		off_reg = issrc ? insn->src_reg : insn->dst_reg;
    		if (isneg)
    			*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
 -			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
 +			*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
    		*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
    		*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
    		*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
@@@ -11800,7 -12043,8 +12056,8 @@@
    	     insn->imm == BPF_FUNC_map_delete_elem ||
    	     insn->imm == BPF_FUNC_map_push_elem   ||
    	     insn->imm == BPF_FUNC_map_pop_elem    ||
- 		     insn->imm == BPF_FUNC_map_peek_elem)) {
+ 		     insn->imm == BPF_FUNC_map_peek_elem   ||
+ 		     insn->imm == BPF_FUNC_redirect_map)) {
    		aux = &env->insn_aux_data[i + delta];
    		if (bpf_map_ptr_poisoned(aux))
    			goto patch_call_imm;
@@@ -11842,6 -12086,9 +12099,9 @@@
    			     (int (*)(struct bpf_map *map, void *value))NULL));
    		BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
    			     (int (*)(struct bpf_map *map, void *value))NULL));
+ 			BUILD_BUG_ON(!__same_type(ops->map_redirect,
+ 				     (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL));
+ 
  patch_map_ops_generic:
    		switch (insn->imm) {
    		case BPF_FUNC_map_lookup_elem:
@@@ -11868,11 -12115,16 +12128,16 @@@
    			insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
    				    __bpf_call_base;
    			continue;
+ 			case BPF_FUNC_redirect_map:
+ 				insn->imm = BPF_CAST_CALL(ops->map_redirect) -
+ 					    __bpf_call_base;
+ 				continue;
    		}
goto patch_call_imm;
    	}
+ 		/* Implement bpf_jiffies64 inline. */
    	if (prog->jit_requested && BITS_PER_LONG == 64 &&
    	    insn->imm == BPF_FUNC_jiffies64) {
    		struct bpf_insn ld_jiffies_addr[2] = {
@@@ -12683,7 -12935,7 +12948,7 @@@ skip_full_check
    	ret = convert_ctx_accesses(env);
if (ret == 0)
- 		ret = fixup_bpf_calls(env);
+ 		ret = do_misc_fixups(env);
/* do 32-bit optimization after insn patching has done so those patched
     * insns could be handled correctly.
diff --combined kernel/fork.c
index 54cc905e5fe0,b94391a58708..50209691f21a
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@@ -96,6 -96,7 +96,7 @@@
  #include <linux/kasan.h>
  #include <linux/scs.h>
  #include <linux/io_uring.h>
+ #include <linux/bpf.h>
#include <asm/pgalloc.h>
  #include <linux/uaccess.h>
@@@ -734,6 -735,7 +735,7 @@@ void __put_task_struct(struct task_stru
    cgroup_free(tsk);
    task_numa_free(tsk, true);
    security_task_free(tsk);
+ 	bpf_task_storage_free(tsk);
    exit_creds(tsk);
    delayacct_tsk_free(tsk);
    put_signal_struct(tsk->signal);
@@@ -994,13 -996,6 +996,13 @@@ static void mm_init_owner(struct mm_str
  #endif
  }
+static void mm_init_pasid(struct mm_struct *mm)
 +{
 +#ifdef CONFIG_IOMMU_SUPPORT
 +	mm->pasid = INIT_PASID;
 +#endif
 +}
 +
  static void mm_init_uprobes_state(struct mm_struct *mm)
  {
  #ifdef CONFIG_UPROBES
@@@ -1031,7 -1026,6 +1033,7 @@@ static struct mm_struct *mm_init(struc
    mm_init_cpumask(mm);
    mm_init_aio(mm);
    mm_init_owner(mm, p);
 +	mm_init_pasid(mm);
    RCU_INIT_POINTER(mm->exe_file, NULL);
    mmu_notifier_subscriptions_init(mm);
    init_tlb_flush_pending(mm);
@@@ -2072,6 -2066,9 +2074,9 @@@ static __latent_entropy struct task_str
    p->sequential_io	= 0;
    p->sequential_io_avg	= 0;
  #endif
+ #ifdef CONFIG_BPF_SYSCALL
+ 	RCU_INIT_POINTER(p->bpf_storage, NULL);
+ #endif
/* Perform scheduler related setup. Assign this task to a CPU. */
    retval = sched_fork(clone_flags, p);
diff --combined net/core/dev.c
index 0f72ff5d34ba,c9a496f5e687..40699957e882
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@@ -1184,18 -1184,6 +1184,18 @@@ static int __dev_alloc_name(struct net 
    		return -ENOMEM;
for_each_netdev(net, d) {
 +			struct netdev_name_node *name_node;
 +			list_for_each_entry(name_node, &d->name_node->list, list) {
 +				if (!sscanf(name_node->name, name, &i))
 +					continue;
 +				if (i < 0 || i >= max_netdevices)
 +					continue;
 +
 +				/*  avoid cases where sscanf is not exact inverse of printf */
 +				snprintf(buf, IFNAMSIZ, name, i);
 +				if (!strncmp(buf, name_node->name, IFNAMSIZ))
 +					set_bit(i, inuse);
 +			}
    		if (!sscanf(d->name, name, &i))
    			continue;
    		if (i < 0 || i >= max_netdevices)
@@@ -2463,16 -2451,14 +2463,14 @@@ int netdev_txq_to_tc(struct net_device 
  EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS
- struct static_key xps_needed __read_mostly;
- EXPORT_SYMBOL(xps_needed);
- struct static_key xps_rxqs_needed __read_mostly;
- EXPORT_SYMBOL(xps_rxqs_needed);
+ static struct static_key xps_needed __read_mostly;
+ static struct static_key xps_rxqs_needed __read_mostly;
  static DEFINE_MUTEX(xps_map_mutex);
  #define xmap_dereference(P)		\
    rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
- 			     int tci, u16 index)
+ 			     struct xps_dev_maps *old_maps, int tci, u16 index)
  {
    struct xps_map *map = NULL;
    int pos;
@@@ -2491,6 -2477,8 +2489,8 @@@
    		break;
    	}
+ 		if (old_maps)
+ 			RCU_INIT_POINTER(old_maps->attr_map[tci], NULL);
    	RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
    	kfree_rcu(map, rcu);
    	return false;
@@@ -2503,7 -2491,7 +2503,7 @@@ static bool remove_xps_queue_cpu(struc
    			 struct xps_dev_maps *dev_maps,
    			 int cpu, u16 offset, u16 count)
  {
- 	int num_tc = dev->num_tc ? : 1;
+ 	int num_tc = dev_maps->num_tc;
    bool active = false;
    int tci;
@@@ -2511,7 -2499,7 +2511,7 @@@
    	int i, j;
for (i = count, j = offset; i--; j++) {
- 			if (!remove_xps_queue(dev_maps, tci, j))
+ 			if (!remove_xps_queue(dev_maps, NULL, tci, j))
    			break;
    	}
@@@ -2523,74 -2511,54 +2523,54 @@@
static void reset_xps_maps(struct net_device *dev,
    		   struct xps_dev_maps *dev_maps,
- 			   bool is_rxqs_map)
+ 			   enum xps_map_type type)
  {
- 	if (is_rxqs_map) {
- 		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
- 		RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
- 	} else {
- 		RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
- 	}
    static_key_slow_dec_cpuslocked(&xps_needed);
+ 	if (type == XPS_RXQS)
+ 		static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+ 
+ 	RCU_INIT_POINTER(dev->xps_maps[type], NULL);
+ 
    kfree_rcu(dev_maps, rcu);
  }
- static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
- 			   struct xps_dev_maps *dev_maps, unsigned int nr_ids,
- 			   u16 offset, u16 count, bool is_rxqs_map)
+ static void clean_xps_maps(struct net_device *dev, enum xps_map_type type,
+ 			   u16 offset, u16 count)
  {
+ 	struct xps_dev_maps *dev_maps;
    bool active = false;
    int i, j;
- 	for (j = -1; j = netif_attrmask_next(j, mask, nr_ids),
- 	     j < nr_ids;)
- 		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
- 					       count);
+ 	dev_maps = xmap_dereference(dev->xps_maps[type]);
+ 	if (!dev_maps)
+ 		return;
+ 
+ 	for (j = 0; j < dev_maps->nr_ids; j++)
+ 		active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count);
    if (!active)
- 		reset_xps_maps(dev, dev_maps, is_rxqs_map);
+ 		reset_xps_maps(dev, dev_maps, type);
- 	if (!is_rxqs_map) {
- 		for (i = offset + (count - 1); count--; i--) {
+ 	if (type == XPS_CPUS) {
+ 		for (i = offset + (count - 1); count--; i--)
    		netdev_queue_numa_node_write(
- 				netdev_get_tx_queue(dev, i),
- 				NUMA_NO_NODE);
- 		}
+ 				netdev_get_tx_queue(dev, i), NUMA_NO_NODE);
    }
  }
static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
    			   u16 count)
  {
- 	const unsigned long *possible_mask = NULL;
- 	struct xps_dev_maps *dev_maps;
- 	unsigned int nr_ids;
- 
    if (!static_key_false(&xps_needed))
    	return;
cpus_read_lock();
    mutex_lock(&xps_map_mutex);
- 	if (static_key_false(&xps_rxqs_needed)) {
- 		dev_maps = xmap_dereference(dev->xps_rxqs_map);
- 		if (dev_maps) {
- 			nr_ids = dev->num_rx_queues;
- 			clean_xps_maps(dev, possible_mask, dev_maps, nr_ids,
- 				       offset, count, true);
- 		}
- 	}
+ 	if (static_key_false(&xps_rxqs_needed))
+ 		clean_xps_maps(dev, XPS_RXQS, offset, count);
- 	dev_maps = xmap_dereference(dev->xps_cpus_map);
- 	if (!dev_maps)
- 		goto out_no_maps;
- 
- 	if (num_possible_cpus() > 1)
- 		possible_mask = cpumask_bits(cpu_possible_mask);
- 	nr_ids = nr_cpu_ids;
- 	clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count,
- 		       false);
+ 	clean_xps_maps(dev, XPS_CPUS, offset, count);
- out_no_maps:
    mutex_unlock(&xps_map_mutex);
    cpus_read_unlock();
  }
@@@ -2640,16 -2608,35 +2620,35 @@@ static struct xps_map *expand_xps_map(s
    return new_map;
  }
+ /* Copy xps maps at a given index */
+ static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
+ 			      struct xps_dev_maps *new_dev_maps, int index,
+ 			      int tc, bool skip_tc)
+ {
+ 	int i, tci = index * dev_maps->num_tc;
+ 	struct xps_map *map;
+ 
+ 	/* copy maps belonging to foreign traffic classes */
+ 	for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+ 		if (i == tc && skip_tc)
+ 			continue;
+ 
+ 		/* fill in the new device map from the old device map */
+ 		map = xmap_dereference(dev_maps->attr_map[tci]);
+ 		RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
+ 	}
+ }
+ 
  /* Must be called under cpus_read_lock */
  int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
- 			  u16 index, bool is_rxqs_map)
+ 			  u16 index, enum xps_map_type type)
  {
- 	const unsigned long *online_mask = NULL, *possible_mask = NULL;
- 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL;
+ 	struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL;
+ 	const unsigned long *online_mask = NULL;
+ 	bool active = false, copy = false;
    int i, j, tci, numa_node_id = -2;
    int maps_sz, num_tc = 1, tc = 0;
    struct xps_map *map, *new_map;
- 	bool active = false;
    unsigned int nr_ids;
if (dev->num_tc) {
@@@ -2667,38 -2654,48 +2666,48 @@@
    }
mutex_lock(&xps_map_mutex);
- 	if (is_rxqs_map) {
+ 
+ 	dev_maps = xmap_dereference(dev->xps_maps[type]);
+ 	if (type == XPS_RXQS) {
    	maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues);
- 		dev_maps = xmap_dereference(dev->xps_rxqs_map);
    	nr_ids = dev->num_rx_queues;
    } else {
    	maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc);
- 		if (num_possible_cpus() > 1) {
+ 		if (num_possible_cpus() > 1)
    		online_mask = cpumask_bits(cpu_online_mask);
- 			possible_mask = cpumask_bits(cpu_possible_mask);
- 		}
- 		dev_maps = xmap_dereference(dev->xps_cpus_map);
    	nr_ids = nr_cpu_ids;
    }
if (maps_sz < L1_CACHE_BYTES)
    	maps_sz = L1_CACHE_BYTES;
+ 	/* The old dev_maps could be larger or smaller than the one we're
+ 	 * setting up now, as dev->num_tc or nr_ids could have been updated in
+ 	 * between. We could try to be smart, but let's be safe instead and only
+ 	 * copy foreign traffic classes if the two map sizes match.
+ 	 */
+ 	if (dev_maps &&
+ 	    dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids)
+ 		copy = true;
+ 
    /* allocate memory for queue storage */
    for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids),
         j < nr_ids;) {
- 		if (!new_dev_maps)
- 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
    	if (!new_dev_maps) {
- 			mutex_unlock(&xps_map_mutex);
- 			return -ENOMEM;
+ 			new_dev_maps = kzalloc(maps_sz, GFP_KERNEL);
+ 			if (!new_dev_maps) {
+ 				mutex_unlock(&xps_map_mutex);
+ 				return -ENOMEM;
+ 			}
+ 
+ 			new_dev_maps->nr_ids = nr_ids;
+ 			new_dev_maps->num_tc = num_tc;
    	}
tci = j * num_tc + tc;
- 		map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) :
- 				 NULL;
+ 		map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
- 		map = expand_xps_map(map, j, index, is_rxqs_map);
+ 		map = expand_xps_map(map, j, index, type == XPS_RXQS);
    	if (!map)
    		goto error;
@@@ -2711,29 -2708,21 +2720,21 @@@
    if (!dev_maps) {
    	/* Increment static keys at most once per type */
    	static_key_slow_inc_cpuslocked(&xps_needed);
- 		if (is_rxqs_map)
+ 		if (type == XPS_RXQS)
    		static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
    }
- 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
- 	     j < nr_ids;) {
- 		/* copy maps belonging to foreign traffic classes */
- 		for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) {
- 			/* fill in the new device map from the old device map */
- 			map = xmap_dereference(dev_maps->attr_map[tci]);
- 			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
- 		}
+ 	for (j = 0; j < nr_ids; j++) {
+ 		bool skip_tc = false;
- 		/* We need to explicitly update tci as prevous loop
- 		 * could break out early if dev_maps is NULL.
- 		 */
    	tci = j * num_tc + tc;
- 
    	if (netif_attr_test_mask(j, mask, nr_ids) &&
    	    netif_attr_test_online(j, online_mask, nr_ids)) {
    		/* add tx-queue to CPU/rx-queue maps */
    		int pos = 0;
+ 			skip_tc = true;
+ 
    		map = xmap_dereference(new_dev_maps->attr_map[tci]);
    		while ((pos < map->len) && (map->queues[pos] != index))
    			pos++;
@@@ -2741,78 -2730,81 +2742,81 @@@
    		if (pos == map->len)
    			map->queues[map->len++] = index;
  #ifdef CONFIG_NUMA
- 			if (!is_rxqs_map) {
+ 			if (type == XPS_CPUS) {
    			if (numa_node_id == -2)
    				numa_node_id = cpu_to_node(j);
    			else if (numa_node_id != cpu_to_node(j))
    				numa_node_id = -1;
    		}
  #endif
- 		} else if (dev_maps) {
- 			/* fill in the new device map from the old device map */
- 			map = xmap_dereference(dev_maps->attr_map[tci]);
- 			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
    	}
- 		/* copy maps belonging to foreign traffic classes */
- 		for (i = num_tc - tc, tci++; dev_maps && --i; tci++) {
- 			/* fill in the new device map from the old device map */
- 			map = xmap_dereference(dev_maps->attr_map[tci]);
- 			RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map);
- 		}
+ 		if (copy)
+ 			xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc,
+ 					  skip_tc);
    }
- 	if (is_rxqs_map)
- 		rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps);
- 	else
- 		rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps);
+ 	rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
/* Cleanup old maps */
    if (!dev_maps)
    	goto out_no_old_maps;
- 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
- 	     j < nr_ids;) {
- 		for (i = num_tc, tci = j * num_tc; i--; tci++) {
- 			new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+ 	for (j = 0; j < dev_maps->nr_ids; j++) {
+ 		for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) {
    		map = xmap_dereference(dev_maps->attr_map[tci]);
- 			if (map && map != new_map)
- 				kfree_rcu(map, rcu);
+ 			if (!map)
+ 				continue;
+ 
+ 			if (copy) {
+ 				new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
+ 				if (map == new_map)
+ 					continue;
+ 			}
+ 
+ 			RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL);
+ 			kfree_rcu(map, rcu);
    	}
    }
- 	kfree_rcu(dev_maps, rcu);
+ 	old_dev_maps = dev_maps;
out_no_old_maps:
    dev_maps = new_dev_maps;
    active = true;
out_no_new_maps:
- 	if (!is_rxqs_map) {
+ 	if (type == XPS_CPUS)
    	/* update Tx queue numa node */
    	netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index),
    				     (numa_node_id >= 0) ?
    				     numa_node_id : NUMA_NO_NODE);
- 	}
if (!dev_maps)
    	goto out_no_maps;
/* removes tx-queue from unused CPUs/rx-queues */
- 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
- 	     j < nr_ids;) {
- 		for (i = tc, tci = j * num_tc; i--; tci++)
- 			active |= remove_xps_queue(dev_maps, tci, index);
- 		if (!netif_attr_test_mask(j, mask, nr_ids) ||
- 		    !netif_attr_test_online(j, online_mask, nr_ids))
- 			active |= remove_xps_queue(dev_maps, tci, index);
- 		for (i = num_tc - tc, tci++; --i; tci++)
- 			active |= remove_xps_queue(dev_maps, tci, index);
+ 	for (j = 0; j < dev_maps->nr_ids; j++) {
+ 		tci = j * dev_maps->num_tc;
+ 
+ 		for (i = 0; i < dev_maps->num_tc; i++, tci++) {
+ 			if (i == tc &&
+ 			    netif_attr_test_mask(j, mask, dev_maps->nr_ids) &&
+ 			    netif_attr_test_online(j, online_mask, dev_maps->nr_ids))
+ 				continue;
+ 
+ 			active |= remove_xps_queue(dev_maps,
+ 						   copy ? old_dev_maps : NULL,
+ 						   tci, index);
+ 		}
    }
+ 	if (old_dev_maps)
+ 		kfree_rcu(old_dev_maps, rcu);
+ 
    /* free map if not active */
    if (!active)
- 		reset_xps_maps(dev, dev_maps, is_rxqs_map);
+ 		reset_xps_maps(dev, dev_maps, type);
out_no_maps:
    mutex_unlock(&xps_map_mutex);
@@@ -2820,11 -2812,10 +2824,10 @@@
    return 0;
  error:
    /* remove any maps that we added */
- 	for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
- 	     j < nr_ids;) {
+ 	for (j = 0; j < nr_ids; j++) {
    	for (i = num_tc, tci = j * num_tc; i--; tci++) {
    		new_map = xmap_dereference(new_dev_maps->attr_map[tci]);
- 			map = dev_maps ?
+ 			map = copy ?
    		      xmap_dereference(dev_maps->attr_map[tci]) :
    		      NULL;
    		if (new_map && new_map != map)
@@@ -2845,7 -2836,7 +2848,7 @@@ int netif_set_xps_queue(struct net_devi
    int ret;
cpus_read_lock();
- 	ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, false);
+ 	ret =  __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS);
    cpus_read_unlock();
return ret;
@@@ -3956,13 -3947,15 +3959,15 @@@ sch_handle_egress(struct sk_buff *skb, 
  static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
    		       struct xps_dev_maps *dev_maps, unsigned int tci)
  {
+ 	int tc = netdev_get_prio_tc_map(dev, skb->priority);
    struct xps_map *map;
    int queue_index = -1;
- 	if (dev->num_tc) {
- 		tci *= dev->num_tc;
- 		tci += netdev_get_prio_tc_map(dev, skb->priority);
- 	}
+ 	if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids)
+ 		return queue_index;
+ 
+ 	tci *= dev_maps->num_tc;
+ 	tci += tc;
map = rcu_dereference(dev_maps->attr_map[tci]);
    if (map) {
@@@ -3993,18 -3986,18 +3998,18 @@@ static int get_xps_queue(struct net_dev
    if (!static_key_false(&xps_rxqs_needed))
    	goto get_cpus_map;
- 	dev_maps = rcu_dereference(sb_dev->xps_rxqs_map);
+ 	dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]);
    if (dev_maps) {
    	int tci = sk_rx_queue_get(sk);
- 		if (tci >= 0 && tci < dev->num_rx_queues)
+ 		if (tci >= 0)
    		queue_index = __get_xps_queue_idx(dev, skb, dev_maps,
    						  tci);
    }
get_cpus_map:
    if (queue_index < 0) {
- 		dev_maps = rcu_dereference(sb_dev->xps_cpus_map);
+ 		dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]);
    	if (dev_maps) {
    		unsigned int tci = skb->sender_cpu - 1;
@@@ -4306,13 -4299,6 +4311,13 @@@ static inline void ____napi_schedule(st
    	 */
    	thread = READ_ONCE(napi->thread);
    	if (thread) {
 +			/* Avoid doing set_bit() if the thread is in
 +			 * INTERRUPTIBLE state, cause napi_thread_wait()
 +			 * makes sure to proceed with napi polling
 +			 * if the thread is explicitly woken from here.
 +			 */
 +			if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE)
 +				set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
    		wake_up_process(thread);
    		return;
    	}
@@@ -5284,6 -5270,7 +5289,7 @@@ skip_classify
    		goto another_round;
    	case RX_HANDLER_EXACT:
    		deliver_exact = true;
+ 			break;
    	case RX_HANDLER_PASS:
    		break;
    	default:
@@@ -5876,15 -5863,13 +5882,13 @@@ void napi_gro_flush(struct napi_struct 
  }
  EXPORT_SYMBOL(napi_gro_flush);
- static struct list_head *gro_list_prepare(struct napi_struct *napi,
- 					  struct sk_buff *skb)
+ static void gro_list_prepare(const struct list_head *head,
+ 			     const struct sk_buff *skb)
  {
    unsigned int maclen = skb->dev->hard_header_len;
    u32 hash = skb_get_hash_raw(skb);
- 	struct list_head *head;
    struct sk_buff *p;
- 	head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list;
    list_for_each_entry(p, head, list) {
    	unsigned long diffs;
@@@ -5910,8 -5895,6 +5914,6 @@@
    			       maclen);
    	NAPI_GRO_CB(p)->same_flow = !diffs;
    }
- 
- 	return head;
  }
static void skb_gro_reset_offset(struct sk_buff *skb)
@@@ -5974,11 -5957,11 +5976,11 @@@ static void gro_flush_oldest(struct nap
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
  {
- 	u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+ 	u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
+ 	struct gro_list *gro_list = &napi->gro_hash[bucket];
    struct list_head *head = &offload_base;
    struct packet_offload *ptype;
    __be16 type = skb->protocol;
- 	struct list_head *gro_head;
    struct sk_buff *pp = NULL;
    enum gro_result ret;
    int same_flow;
@@@ -5987,7 -5970,7 +5989,7 @@@
    if (netif_elide_gro(skb->dev))
    	goto normal;
- 	gro_head = gro_list_prepare(napi, skb);
+ 	gro_list_prepare(&gro_list->list, skb);
rcu_read_lock();
    list_for_each_entry_rcu(ptype, head, list) {
@@@ -6023,7 -6006,7 +6025,7 @@@
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
    				ipv6_gro_receive, inet_gro_receive,
- 					gro_head, skb);
+ 					&gro_list->list, skb);
    	break;
    }
    rcu_read_unlock();
@@@ -6042,7 -6025,7 +6044,7 @@@
    if (pp) {
    	skb_list_del_init(pp);
    	napi_gro_complete(napi, pp);
- 		napi->gro_hash[hash].count--;
+ 		gro_list->count--;
    }
if (same_flow)
@@@ -6051,16 -6034,16 +6053,16 @@@
    if (NAPI_GRO_CB(skb)->flush)
    	goto normal;
- 	if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) {
- 		gro_flush_oldest(napi, gro_head);
- 	} else {
- 		napi->gro_hash[hash].count++;
- 	}
+ 	if (unlikely(gro_list->count >= MAX_GRO_SKBS))
+ 		gro_flush_oldest(napi, &gro_list->list);
+ 	else
+ 		gro_list->count++;
+ 
    NAPI_GRO_CB(skb)->count = 1;
    NAPI_GRO_CB(skb)->age = jiffies;
    NAPI_GRO_CB(skb)->last = skb;
    skb_shinfo(skb)->gso_size = skb_gro_len(skb);
- 	list_add(&skb->list, gro_head);
+ 	list_add(&skb->list, &gro_list->list);
    ret = GRO_HELD;
pull:
@@@ -6068,11 -6051,11 +6070,11 @@@
    if (grow > 0)
    	gro_pull_from_frag0(skb, grow);
  ok:
- 	if (napi->gro_hash[hash].count) {
- 		if (!test_bit(hash, &napi->gro_bitmask))
- 			__set_bit(hash, &napi->gro_bitmask);
- 	} else if (test_bit(hash, &napi->gro_bitmask)) {
- 		__clear_bit(hash, &napi->gro_bitmask);
+ 	if (gro_list->count) {
+ 		if (!test_bit(bucket, &napi->gro_bitmask))
+ 			__set_bit(bucket, &napi->gro_bitmask);
+ 	} else if (test_bit(bucket, &napi->gro_bitmask)) {
+ 		__clear_bit(bucket, &napi->gro_bitmask);
    }
return ret;
@@@ -6505,7 -6488,6 +6507,7 @@@ bool napi_complete_done(struct napi_str
    	WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
 +			      NAPIF_STATE_SCHED_THREADED |
    		      NAPIF_STATE_PREFER_BUSY_POLL);
/* If STATE_MISSED was set, leave STATE_SCHED set,
@@@ -6789,6 -6771,7 +6791,7 @@@ int dev_set_threaded(struct net_device
return err;
  }
+ EXPORT_SYMBOL(dev_set_threaded);
void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
    	    int (*poll)(struct napi_struct *, int), int weight)
@@@ -6988,25 -6971,16 +6991,25 @@@ static int napi_poll(struct napi_struc
static int napi_thread_wait(struct napi_struct *napi)
  {
 +	bool woken = false;
 +
    set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop() && !napi_disable_pending(napi)) {
 -		if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
 +		/* Testing SCHED_THREADED bit here to make sure the current
 +		 * kthread owns this napi and could poll on this napi.
 +		 * Testing SCHED bit is not enough because SCHED bit might be
 +		 * set by some other busy poll thread or by napi_disable().
 +		 */
 +		if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
    		WARN_ON(!list_empty(&napi->poll_list));
    		__set_current_state(TASK_RUNNING);
    		return 0;
    	}
schedule();
 +		/* woken being true indicates this thread owns this napi. */
 +		woken = true;
    	set_current_state(TASK_INTERRUPTIBLE);
    }
    __set_current_state(TASK_RUNNING);
@@@ -10336,11 -10310,15 +10339,15 @@@ EXPORT_SYMBOL(register_netdev)
int netdev_refcnt_read(const struct net_device *dev)
  {
+ #ifdef CONFIG_PCPU_DEV_REFCNT
    int i, refcnt = 0;
for_each_possible_cpu(i)
    	refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
    return refcnt;
+ #else
+ 	return refcount_read(&dev->dev_refcnt);
+ #endif
  }
  EXPORT_SYMBOL(netdev_refcnt_read);
@@@ -10368,7 -10346,7 +10375,7 @@@ static void netdev_wait_allrefs(struct 
    rebroadcast_time = warning_time = jiffies;
    refcnt = netdev_refcnt_read(dev);
- 	while (refcnt != 0) {
+ 	while (refcnt != 1) {
    	if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
    		rtnl_lock();
@@@ -10405,7 -10383,7 +10412,7 @@@
refcnt = netdev_refcnt_read(dev);
- 		if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) {
+ 		if (refcnt != 1 && time_after(jiffies, warning_time + 10 * HZ)) {
    		pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
    			 dev->name, refcnt);
    		warning_time = jiffies;
@@@ -10481,7 -10459,7 +10488,7 @@@ void netdev_run_todo(void
    	netdev_wait_allrefs(dev);
/* paranoia */
- 		BUG_ON(netdev_refcnt_read(dev));
+ 		BUG_ON(netdev_refcnt_read(dev) != 1);
    	BUG_ON(!list_empty(&dev->ptype_all));
    	BUG_ON(!list_empty(&dev->ptype_specific));
    	WARN_ON(rcu_access_pointer(dev->ip_ptr));
@@@ -10698,9 -10676,14 +10705,14 @@@ struct net_device *alloc_netdev_mqs(in
    dev = PTR_ALIGN(p, NETDEV_ALIGN);
    dev->padded = (char *)dev - (char *)p;
+ #ifdef CONFIG_PCPU_DEV_REFCNT
    dev->pcpu_refcnt = alloc_percpu(int);
    if (!dev->pcpu_refcnt)
    	goto free_dev;
+ 	dev_hold(dev);
+ #else
+ 	refcount_set(&dev->dev_refcnt, 1);
+ #endif
if (dev_addr_init(dev))
    	goto free_pcpu;
@@@ -10764,8 -10747,10 +10776,10 @@@ free_all
    return NULL;
free_pcpu:
+ #ifdef CONFIG_PCPU_DEV_REFCNT
    free_percpu(dev->pcpu_refcnt);
  free_dev:
+ #endif
    netdev_freemem(dev);
    return NULL;
  }
@@@ -10807,8 -10792,10 +10821,10 @@@ void free_netdev(struct net_device *dev
    list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
    	netif_napi_del(p);
+ #ifdef CONFIG_PCPU_DEV_REFCNT
    free_percpu(dev->pcpu_refcnt);
    dev->pcpu_refcnt = NULL;
+ #endif
    free_percpu(dev->xdp_bulkq);
    dev->xdp_bulkq = NULL;
@@@ -11375,7 -11362,7 +11391,7 @@@ static void __net_exit default_device_e
    		continue;
/* Leave virtual devices for the generic cleanup */
 -		if (dev->rtnl_link_ops)
 +		if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund)
    		continue;
/* Push remaining network devices to init_net */
diff --combined net/core/drop_monitor.c
index db65ce62b625,1eb02c2236f2..ead2a8aa57b4
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@@ -1053,20 -1053,6 +1053,20 @@@ static int net_dm_hw_monitor_start(stru
    return 0;
err_module_put:
 +	for_each_possible_cpu(cpu) {
 +		struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
 +		struct sk_buff *skb;
 +
 +		del_timer_sync(&hw_data->send_timer);
 +		cancel_work_sync(&hw_data->dm_alert_work);
 +		while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
 +			struct devlink_trap_metadata *hw_metadata;
 +
 +			hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
 +			net_dm_hw_metadata_free(hw_metadata);
 +			consume_skb(skb);
 +		}
 +	}
    module_put(THIS_MODULE);
    return rc;
  }
@@@ -1148,15 -1134,6 +1148,15 @@@ static int net_dm_trace_on_set(struct n
  err_unregister_trace:
    unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
  err_module_put:
 +	for_each_possible_cpu(cpu) {
 +		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
 +		struct sk_buff *skb;
 +
 +		del_timer_sync(&data->send_timer);
 +		cancel_work_sync(&data->dm_alert_work);
 +		while ((skb = __skb_dequeue(&data->drop_queue)))
 +			consume_skb(skb);
 +	}
    module_put(THIS_MODULE);
    return rc;
  }
@@@ -1754,7 -1731,7 +1754,7 @@@ static void exit_net_drop_monitor(void
/*
     * Because of the module_get/put we do in the trace state change path
- 	 * we are guarnateed not to have any current users when we get here
+ 	 * we are guaranteed not to have any current users when we get here
     */
for_each_possible_cpu(cpu) {
diff --combined net/core/filter.c
index 9323d34d34cc,b6732000d8a2..f5eeebf6a16f
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@@ -1863,10 -1863,7 +1863,7 @@@ static const struct bpf_func_proto bpf_
  static inline int sk_skb_try_make_writable(struct sk_buff *skb,
    				   unsigned int write_len)
  {
- 	int err = __bpf_try_make_writable(skb, write_len);
- 
- 	bpf_compute_data_end_sk_skb(skb);
- 	return err;
+ 	return __bpf_try_make_writable(skb, write_len);
  }
BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
@@@ -3412,6 -3409,7 +3409,7 @@@ static u32 bpf_skb_net_base_len(const s
    				 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
    				 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
    				 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+ 					 BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
    				 BPF_F_ADJ_ROOM_ENCAP_L2( \
    				  BPF_ADJ_ROOM_ENCAP_L2_MASK))
@@@ -3448,6 -3446,10 +3446,10 @@@ static int bpf_skb_net_grow(struct sk_b
    	    flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
    		return -EINVAL;
+ 		if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
+ 		    inner_mac_len < ETH_HLEN)
+ 			return -EINVAL;
+ 
    	if (skb->encapsulation)
    		return -EALREADY;
@@@ -3466,7 -3468,11 +3468,11 @@@
    	skb->inner_mac_header = inner_net - inner_mac_len;
    	skb->inner_network_header = inner_net;
    	skb->inner_transport_header = inner_trans;
- 		skb_set_inner_protocol(skb, skb->protocol);
+ 
+ 		if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
+ 			skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+ 		else
+ 			skb_set_inner_protocol(skb, skb->protocol);
skb->encapsulation = 1;
    	skb_set_network_header(skb, mac_len);
@@@ -3577,7 -3583,6 +3583,6 @@@ BPF_CALL_4(sk_skb_adjust_room, struct s
    		return -ENOMEM;
    	__skb_pull(skb, len_diff_abs);
    }
- 	bpf_compute_data_end_sk_skb(skb);
    if (tls_sw_has_ctx_rx(skb->sk)) {
    	struct strp_msg *rxm = strp_msg(skb);
@@@ -3742,10 -3747,7 +3747,7 @@@ static const struct bpf_func_proto bpf_
  BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
       u64, flags)
  {
- 	int ret = __bpf_skb_change_tail(skb, new_len, flags);
- 
- 	bpf_compute_data_end_sk_skb(skb);
- 	return ret;
+ 	return __bpf_skb_change_tail(skb, new_len, flags);
  }
static const struct bpf_func_proto sk_skb_change_tail_proto = {
@@@ -3808,10 -3810,7 +3810,7 @@@ static const struct bpf_func_proto bpf_
  BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
       u64, flags)
  {
- 	int ret = __bpf_skb_change_head(skb, head_room, flags);
- 
- 	bpf_compute_data_end_sk_skb(skb);
- 	return ret;
+ 	return __bpf_skb_change_head(skb, head_room, flags);
  }
static const struct bpf_func_proto sk_skb_change_head_proto = {
@@@ -3919,23 -3918,6 +3918,6 @@@ static const struct bpf_func_proto bpf_
    .arg2_type	= ARG_ANYTHING,
  };
- static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
- 			    struct bpf_map *map, struct xdp_buff *xdp)
- {
- 	switch (map->map_type) {
- 	case BPF_MAP_TYPE_DEVMAP:
- 	case BPF_MAP_TYPE_DEVMAP_HASH:
- 		return dev_map_enqueue(fwd, xdp, dev_rx);
- 	case BPF_MAP_TYPE_CPUMAP:
- 		return cpu_map_enqueue(fwd, xdp, dev_rx);
- 	case BPF_MAP_TYPE_XSKMAP:
- 		return __xsk_map_redirect(fwd, xdp);
- 	default:
- 		return -EBADRQC;
- 	}
- 	return 0;
- }
- 
  void xdp_do_flush(void)
  {
    __dev_flush();
@@@ -3944,71 -3926,52 +3926,52 @@@
  }
  EXPORT_SYMBOL_GPL(xdp_do_flush);
- static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
- {
- 	switch (map->map_type) {
- 	case BPF_MAP_TYPE_DEVMAP:
- 		return __dev_map_lookup_elem(map, index);
- 	case BPF_MAP_TYPE_DEVMAP_HASH:
- 		return __dev_map_hash_lookup_elem(map, index);
- 	case BPF_MAP_TYPE_CPUMAP:
- 		return __cpu_map_lookup_elem(map, index);
- 	case BPF_MAP_TYPE_XSKMAP:
- 		return __xsk_map_lookup_elem(map, index);
- 	default:
- 		return NULL;
- 	}
- }
- 
- void bpf_clear_redirect_map(struct bpf_map *map)
- {
- 	struct bpf_redirect_info *ri;
- 	int cpu;
- 
- 	for_each_possible_cpu(cpu) {
- 		ri = per_cpu_ptr(&bpf_redirect_info, cpu);
- 		/* Avoid polluting remote cacheline due to writes if
- 		 * not needed. Once we pass this test, we need the
- 		 * cmpxchg() to make sure it hasn't been changed in
- 		 * the meantime by remote CPU.
- 		 */
- 		if (unlikely(READ_ONCE(ri->map) == map))
- 			cmpxchg(&ri->map, map, NULL);
- 	}
- }
- 
  int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
    	    struct bpf_prog *xdp_prog)
  {
    struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- 	struct bpf_map *map = READ_ONCE(ri->map);
- 	u32 index = ri->tgt_index;
+ 	enum bpf_map_type map_type = ri->map_type;
    void *fwd = ri->tgt_value;
+ 	u32 map_id = ri->map_id;
    int err;
- 	ri->tgt_index = 0;
- 	ri->tgt_value = NULL;
- 	WRITE_ONCE(ri->map, NULL);
+ 	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ 	ri->map_type = BPF_MAP_TYPE_UNSPEC;
- 	if (unlikely(!map)) {
- 		fwd = dev_get_by_index_rcu(dev_net(dev), index);
- 		if (unlikely(!fwd)) {
- 			err = -EINVAL;
- 			goto err;
+ 	switch (map_type) {
+ 	case BPF_MAP_TYPE_DEVMAP:
+ 		fallthrough;
+ 	case BPF_MAP_TYPE_DEVMAP_HASH:
+ 		err = dev_map_enqueue(fwd, xdp, dev);
+ 		break;
+ 	case BPF_MAP_TYPE_CPUMAP:
+ 		err = cpu_map_enqueue(fwd, xdp, dev);
+ 		break;
+ 	case BPF_MAP_TYPE_XSKMAP:
+ 		err = __xsk_map_redirect(fwd, xdp);
+ 		break;
+ 	case BPF_MAP_TYPE_UNSPEC:
+ 		if (map_id == INT_MAX) {
+ 			fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
+ 			if (unlikely(!fwd)) {
+ 				err = -EINVAL;
+ 				break;
+ 			}
+ 			err = dev_xdp_enqueue(fwd, xdp, dev);
+ 			break;
    	}
- 
- 		err = dev_xdp_enqueue(fwd, xdp, dev);
- 	} else {
- 		err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
+ 		fallthrough;
+ 	default:
+ 		err = -EBADRQC;
    }
if (unlikely(err))
    	goto err;
- 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
    return 0;
  err:
- 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+ 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
    return err;
  }
  EXPORT_SYMBOL_GPL(xdp_do_redirect);
@@@ -4017,41 -3980,36 +3980,36 @@@ static int xdp_do_generic_redirect_map(
    			       struct sk_buff *skb,
    			       struct xdp_buff *xdp,
    			       struct bpf_prog *xdp_prog,
- 				       struct bpf_map *map)
+ 				       void *fwd,
+ 				       enum bpf_map_type map_type, u32 map_id)
  {
    struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- 	u32 index = ri->tgt_index;
- 	void *fwd = ri->tgt_value;
- 	int err = 0;
- 
- 	ri->tgt_index = 0;
- 	ri->tgt_value = NULL;
- 	WRITE_ONCE(ri->map, NULL);
- 
- 	if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
- 	    map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
- 		struct bpf_dtab_netdev *dst = fwd;
+ 	int err;
- 		err = dev_map_generic_redirect(dst, skb, xdp_prog);
+ 	switch (map_type) {
+ 	case BPF_MAP_TYPE_DEVMAP:
+ 		fallthrough;
+ 	case BPF_MAP_TYPE_DEVMAP_HASH:
+ 		err = dev_map_generic_redirect(fwd, skb, xdp_prog);
    	if (unlikely(err))
    		goto err;
- 	} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
- 		struct xdp_sock *xs = fwd;
- 
- 		err = xsk_generic_rcv(xs, xdp);
+ 		break;
+ 	case BPF_MAP_TYPE_XSKMAP:
+ 		err = xsk_generic_rcv(fwd, xdp);
    	if (err)
    		goto err;
    	consume_skb(skb);
- 	} else {
+ 		break;
+ 	default:
    	/* TODO: Handle BPF_MAP_TYPE_CPUMAP */
    	err = -EBADRQC;
    	goto err;
    }
- 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
+ 	_trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
    return 0;
  err:
- 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
+ 	_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
    return err;
  }
@@@ -4059,31 -4017,34 +4017,34 @@@ int xdp_do_generic_redirect(struct net_
    		    struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
  {
    struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- 	struct bpf_map *map = READ_ONCE(ri->map);
- 	u32 index = ri->tgt_index;
- 	struct net_device *fwd;
- 	int err = 0;
- 
- 	if (map)
- 		return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
- 						   map);
- 	ri->tgt_index = 0;
- 	fwd = dev_get_by_index_rcu(dev_net(dev), index);
- 	if (unlikely(!fwd)) {
- 		err = -EINVAL;
- 		goto err;
- 	}
+ 	enum bpf_map_type map_type = ri->map_type;
+ 	void *fwd = ri->tgt_value;
+ 	u32 map_id = ri->map_id;
+ 	int err;
- 	err = xdp_ok_fwd_dev(fwd, skb->len);
- 	if (unlikely(err))
- 		goto err;
+ 	ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
+ 	ri->map_type = BPF_MAP_TYPE_UNSPEC;
- 	skb->dev = fwd;
- 	_trace_xdp_redirect(dev, xdp_prog, index);
- 	generic_xdp_tx(skb, xdp_prog);
- 	return 0;
+ 	if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
+ 		fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
+ 		if (unlikely(!fwd)) {
+ 			err = -EINVAL;
+ 			goto err;
+ 		}
+ 
+ 		err = xdp_ok_fwd_dev(fwd, skb->len);
+ 		if (unlikely(err))
+ 			goto err;
+ 
+ 		skb->dev = fwd;
+ 		_trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
+ 		generic_xdp_tx(skb, xdp_prog);
+ 		return 0;
+ 	}
+ 
+ 	return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
  err:
- 	_trace_xdp_redirect_err(dev, xdp_prog, index, err);
+ 	_trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
    return err;
  }
@@@ -4094,10 -4055,12 +4055,12 @@@ BPF_CALL_2(bpf_xdp_redirect, u32, ifind
    if (unlikely(flags))
    	return XDP_ABORTED;
- 	ri->flags = flags;
+ 	/* NB! Map type UNSPEC and map_id == INT_MAX (never generated
+ 	 * by map_idr) is used for ifindex based XDP redirect.
+ 	 */
    ri->tgt_index = ifindex;
- 	ri->tgt_value = NULL;
- 	WRITE_ONCE(ri->map, NULL);
+ 	ri->map_id = INT_MAX;
+ 	ri->map_type = BPF_MAP_TYPE_UNSPEC;
return XDP_REDIRECT;
  }
@@@ -4113,28 -4076,7 +4076,7 @@@ static const struct bpf_func_proto bpf_
  BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
       u64, flags)
  {
- 	struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- 
- 	/* Lower bits of the flags are used as return code on lookup failure */
- 	if (unlikely(flags > XDP_TX))
- 		return XDP_ABORTED;
- 
- 	ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
- 	if (unlikely(!ri->tgt_value)) {
- 		/* If the lookup fails we want to clear out the state in the
- 		 * redirect_info struct completely, so that if an eBPF program
- 		 * performs multiple lookups, the last one always takes
- 		 * precedence.
- 		 */
- 		WRITE_ONCE(ri->map, NULL);
- 		return flags;
- 	}
- 
- 	ri->flags = flags;
- 	ri->tgt_index = ifindex;
- 	WRITE_ONCE(ri->map, map);
- 
- 	return XDP_REDIRECT;
+ 	return map->ops->map_redirect(map, ifindex, flags);
  }
static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
@@@ -5658,7 -5600,7 +5600,7 @@@ BPF_CALL_5(bpf_skb_check_mtu, struct sk
    if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
    	return -EINVAL;
-	if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff))
 +	if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
    	return -EINVAL;
dev = __dev_via_ifindex(dev, ifindex);
@@@ -5668,11 -5610,7 +5610,11 @@@
    mtu = READ_ONCE(dev->mtu);
dev_len = mtu + dev->hard_header_len;
 -	skb_len = skb->len + len_diff; /* minus result pass check */
 +
 +	/* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
 +	skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len;
 +
 +	skb_len += len_diff; /* minus result pass check */
    if (skb_len <= dev_len) {
    	ret = BPF_MTU_CHK_RET_SUCCESS;
    	goto out;
@@@ -5717,10 -5655,6 +5659,10 @@@ BPF_CALL_5(bpf_xdp_check_mtu, struct xd
    /* Add L2-header as dev MTU is L3 size */
    dev_len = mtu + dev->hard_header_len;
+	/* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
 +	if (*mtu_len)
 +		xdp_len = *mtu_len + dev->hard_header_len;
 +
    xdp_len += len_diff; /* minus result pass check */
    if (xdp_len > dev_len)
    	ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
@@@ -9663,22 -9597,40 +9605,40 @@@ static u32 sock_ops_convert_ctx_access(
    return insn - insn_buf;
  }
+ /* data_end = skb->data + skb_headlen() */
+ static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
+ 						    struct bpf_insn *insn)
+ {
+ 	/* si->dst_reg = skb->data */
+ 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
+ 			      si->dst_reg, si->src_reg,
+ 			      offsetof(struct sk_buff, data));
+ 	/* AX = skb->len */
+ 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
+ 			      BPF_REG_AX, si->src_reg,
+ 			      offsetof(struct sk_buff, len));
+ 	/* si->dst_reg = skb->data + skb->len */
+ 	*insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
+ 	/* AX = skb->data_len */
+ 	*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
+ 			      BPF_REG_AX, si->src_reg,
+ 			      offsetof(struct sk_buff, data_len));
+ 	/* si->dst_reg = skb->data + skb->len - skb->data_len */
+ 	*insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX);
+ 
+ 	return insn;
+ }
+ 
  static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
    			     const struct bpf_insn *si,
    			     struct bpf_insn *insn_buf,
    			     struct bpf_prog *prog, u32 *target_size)
  {
    struct bpf_insn *insn = insn_buf;
- 	int off;
switch (si->off) {
    case offsetof(struct __sk_buff, data_end):
- 		off  = si->off;
- 		off -= offsetof(struct __sk_buff, data_end);
- 		off += offsetof(struct sk_buff, cb);
- 		off += offsetof(struct tcp_skb_cb, bpf.data_end);
- 		*insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
- 				      si->src_reg, off);
+ 		insn = bpf_convert_data_end_access(si, insn);
    	break;
    default:
    	return bpf_convert_ctx_access(type, si, insn_buf, prog,
@@@ -10457,6 -10409,7 +10417,7 @@@ static u32 sk_lookup_convert_ctx_access
  }
const struct bpf_prog_ops sk_lookup_prog_ops = {
+ 	.test_run = bpf_prog_test_run_sk_lookup,
  };
const struct bpf_verifier_ops sk_lookup_verifier_ops = {
diff --combined net/core/flow_dissector.c
index a96a4f5de0ce,2ed380d096ce..5985029e43d4
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@@ -114,7 -114,7 +114,7 @@@ int flow_dissector_bpf_prog_attach_chec
   * is the protocol port offset returned from proto_ports_offset
   */
  __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
- 			    void *data, int hlen)
+ 			    const void *data, int hlen)
  {
    int poff = proto_ports_offset(ip_proto);
@@@ -161,7 -161,7 +161,7 @@@ static bool icmp_has_id(u8 type
   */
  void skb_flow_get_icmp_tci(const struct sk_buff *skb,
    		   struct flow_dissector_key_icmp *key_icmp,
- 			   void *data, int thoff, int hlen)
+ 			   const void *data, int thoff, int hlen)
  {
    struct icmphdr *ih, _ih;
@@@ -176,7 -176,7 +176,7 @@@
     * avoid confusion with packets without such field
     */
    if (icmp_has_id(ih->type))
 -		key_icmp->id = ih->un.echo.id ? : 1;
 +		key_icmp->id = ih->un.echo.id ? ntohs(ih->un.echo.id) : 1;
    else
    	key_icmp->id = 0;
  }
@@@ -187,8 -187,8 +187,8 @@@ EXPORT_SYMBOL(skb_flow_get_icmp_tci)
   */
  static void __skb_flow_dissect_icmp(const struct sk_buff *skb,
    			    struct flow_dissector *flow_dissector,
- 				    void *target_container,
- 				    void *data, int thoff, int hlen)
+ 				    void *target_container, const void *data,
+ 				    int thoff, int hlen)
  {
    struct flow_dissector_key_icmp *key_icmp;
@@@ -409,8 -409,8 +409,8 @@@ EXPORT_SYMBOL(skb_flow_dissect_hash)
  static enum flow_dissect_ret
  __skb_flow_dissect_mpls(const struct sk_buff *skb,
    		struct flow_dissector *flow_dissector,
- 			void *target_container, void *data, int nhoff, int hlen,
- 			int lse_index, bool *entropy_label)
+ 			void *target_container, const void *data, int nhoff,
+ 			int hlen, int lse_index, bool *entropy_label)
  {
    struct mpls_label *hdr, _hdr;
    u32 entry, label, bos;
@@@ -467,7 -467,8 +467,8 @@@
  static enum flow_dissect_ret
  __skb_flow_dissect_arp(const struct sk_buff *skb,
    	       struct flow_dissector *flow_dissector,
- 		       void *target_container, void *data, int nhoff, int hlen)
+ 		       void *target_container, const void *data,
+ 		       int nhoff, int hlen)
  {
    struct flow_dissector_key_arp *key_arp;
    struct {
@@@ -523,7 -524,7 +524,7 @@@ static enum flow_dissect_re
  __skb_flow_dissect_gre(const struct sk_buff *skb,
    	       struct flow_dissector_key_control *key_control,
    	       struct flow_dissector *flow_dissector,
- 		       void *target_container, void *data,
+ 		       void *target_container, const void *data,
    	       __be16 *p_proto, int *p_nhoff, int *p_hlen,
    	       unsigned int flags)
  {
@@@ -663,8 -664,8 +664,8 @@@
  static enum flow_dissect_ret
  __skb_flow_dissect_batadv(const struct sk_buff *skb,
    		  struct flow_dissector_key_control *key_control,
- 			  void *data, __be16 *p_proto, int *p_nhoff, int hlen,
- 			  unsigned int flags)
+ 			  const void *data, __be16 *p_proto, int *p_nhoff,
+ 			  int hlen, unsigned int flags)
  {
    struct {
    	struct batadv_unicast_packet batadv_unicast;
@@@ -695,7 -696,8 +696,8 @@@
  static void
  __skb_flow_dissect_tcp(const struct sk_buff *skb,
    	       struct flow_dissector *flow_dissector,
- 		       void *target_container, void *data, int thoff, int hlen)
+ 		       void *target_container, const void *data,
+ 		       int thoff, int hlen)
  {
    struct flow_dissector_key_tcp *key_tcp;
    struct tcphdr *th, _th;
@@@ -719,8 -721,8 +721,8 @@@
  static void
  __skb_flow_dissect_ports(const struct sk_buff *skb,
    		 struct flow_dissector *flow_dissector,
- 			 void *target_container, void *data, int nhoff,
- 			 u8 ip_proto, int hlen)
+ 			 void *target_container, const void *data,
+ 			 int nhoff, u8 ip_proto, int hlen)
  {
    enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
    struct flow_dissector_key_ports *key_ports;
@@@ -744,7 -746,8 +746,8 @@@
  static void
  __skb_flow_dissect_ipv4(const struct sk_buff *skb,
    		struct flow_dissector *flow_dissector,
- 			void *target_container, void *data, const struct iphdr *iph)
+ 			void *target_container, const void *data,
+ 			const struct iphdr *iph)
  {
    struct flow_dissector_key_ip *key_ip;
@@@ -761,7 -764,8 +764,8 @@@
  static void
  __skb_flow_dissect_ipv6(const struct sk_buff *skb,
    		struct flow_dissector *flow_dissector,
- 			void *target_container, void *data, const struct ipv6hdr *iph)
+ 			void *target_container, const void *data,
+ 			const struct ipv6hdr *iph)
  {
    struct flow_dissector_key_ip *key_ip;
@@@ -908,9 -912,8 +912,8 @@@ bool bpf_flow_dissect(struct bpf_prog *
  bool __skb_flow_dissect(const struct net *net,
    		const struct sk_buff *skb,
    		struct flow_dissector *flow_dissector,
- 			void *target_container,
- 			void *data, __be16 proto, int nhoff, int hlen,
- 			unsigned int flags)
+ 			void *target_container, const void *data,
+ 			__be16 proto, int nhoff, int hlen, unsigned int flags)
  {
    struct flow_dissector_key_control *key_control;
    struct flow_dissector_key_basic *key_basic;
@@@ -1642,7 -1645,7 +1645,7 @@@ __u32 skb_get_hash_perturb(const struc
  }
  EXPORT_SYMBOL(skb_get_hash_perturb);
- u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+ u32 __skb_get_poff(const struct sk_buff *skb, const void *data,
    	   const struct flow_keys_basic *keys, int hlen)
  {
    u32 poff = keys->control.thoff;
diff --combined net/ipv4/route.c
index bba150fdd265,0470442ff61d..fa68c2612252
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@@ -21,7 -21,7 +21,7 @@@
   *		Alan Cox	:	Added BSD route gw semantics
   *		Alan Cox	:	Super /proc >4K
   *		Alan Cox	:	MTU in route table
-  *		Alan Cox	: 	MSS actually. Also added the window
+  *		Alan Cox	:	MSS actually. Also added the window
   *					clamper.
   *		Sam Lantinga	:	Fixed route matching in rt_del()
   *		Alan Cox	:	Routing cache support.
@@@ -41,7 -41,7 +41,7 @@@
   *		Olaf Erb	:	irtt wasn't being copied right.
   *		Bjorn Ekwall	:	Kerneld route support.
   *		Alan Cox	:	Multicast fixed (I hope)
-  * 		Pavel Krauz	:	Limited broadcast fixed
+  *		Pavel Krauz	:	Limited broadcast fixed
   *		Mike McLagan	:	Routing by source
   *	Alexey Kuznetsov	:	End of old history. Split to fib.c and
   *					route.c and rewritten from scratch.
@@@ -54,8 -54,8 +54,8 @@@
   *	Robert Olsson		:	Added rt_cache statistics
   *	Arnaldo C. Melo		:	Convert proc stuff to seq_file
   *	Eric Dumazet		:	hashed spinlocks and rt_check_expire() fixes.
-  * 	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
-  * 	Ilia Sotnikov		:	Removed TOS from hash calculations
+  *	Ilia Sotnikov		:	Ignore TOS on PMTUD and Redirect
+  *	Ilia Sotnikov		:	Removed TOS from hash calculations
   */
#define pr_fmt(fmt) "IPv4: " fmt
@@@ -234,19 -234,6 +234,6 @@@ static const struct seq_operations rt_c
    .show   = rt_cache_seq_show,
  };
- static int rt_cache_seq_open(struct inode *inode, struct file *file)
- {
- 	return seq_open(file, &rt_cache_seq_ops);
- }
- 
- static const struct proc_ops rt_cache_proc_ops = {
- 	.proc_open	= rt_cache_seq_open,
- 	.proc_read	= seq_read,
- 	.proc_lseek	= seq_lseek,
- 	.proc_release	= seq_release,
- };
- 
- 
  static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
  {
    int cpu;
@@@ -324,19 -311,6 +311,6 @@@ static const struct seq_operations rt_c
    .show   = rt_cpu_seq_show,
  };
- 
- static int rt_cpu_seq_open(struct inode *inode, struct file *file)
- {
- 	return seq_open(file, &rt_cpu_seq_ops);
- }
- 
- static const struct proc_ops rt_cpu_proc_ops = {
- 	.proc_open	= rt_cpu_seq_open,
- 	.proc_read	= seq_read,
- 	.proc_lseek	= seq_lseek,
- 	.proc_release	= seq_release,
- };
- 
  #ifdef CONFIG_IP_ROUTE_CLASSID
  static int rt_acct_proc_show(struct seq_file *m, void *v)
  {
@@@ -367,13 -341,13 +341,13 @@@ static int __net_init ip_rt_do_proc_ini
  {
    struct proc_dir_entry *pde;
- 	pde = proc_create("rt_cache", 0444, net->proc_net,
- 			  &rt_cache_proc_ops);
+ 	pde = proc_create_seq("rt_cache", 0444, net->proc_net,
+ 			      &rt_cache_seq_ops);
    if (!pde)
    	goto err1;
- 	pde = proc_create("rt_cache", 0444,
- 			  net->proc_net_stat, &rt_cpu_proc_ops);
+ 	pde = proc_create_seq("rt_cache", 0444, net->proc_net_stat,
+ 			      &rt_cpu_seq_ops);
    if (!pde)
    	goto err2;
@@@ -722,6 -696,7 +696,7 @@@ static void update_or_create_fnhe(struc
for_each_possible_cpu(i) {
    		struct rtable __rcu **prt;
+ 
    		prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i);
    		rt = rcu_dereference(*prt);
    		if (rt)
@@@ -1258,12 -1233,12 +1233,12 @@@ static int ip_rt_bug(struct net *net, s
  }
/*
-    We do not cache source address of outgoing interface,
-    because it is used only by IP RR, TS and SRR options,
-    so that it out of fast path.
- 
-    BTW remember: "addr" is allowed to be not aligned
-    in IP options!
+  * We do not cache source address of outgoing interface,
+  * because it is used only by IP RR, TS and SRR options,
+  * so that it out of fast path.
+  *
+  * BTW remember: "addr" is allowed to be not aligned
+  * in IP options!
   */
void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
@@@ -2108,7 -2083,7 +2083,7 @@@ static int ip_route_input_slow(struct s
    	goto out;
/* Check for the most weird martians, which can be not detected
- 	   by fib_lookup.
+ 	 * by fib_lookup.
     */
tun_info = skb_tunnel_info(skb);
@@@ -2246,7 -2221,7 +2221,7 @@@ local_input
    if (res->type == RTN_UNREACHABLE) {
    	rth->dst.input= ip_error;
    	rth->dst.error= -err;
- 		rth->rt_flags 	&= ~RTCF_LOCAL;
+ 		rth->rt_flags	&= ~RTCF_LOCAL;
    }
if (do_cache) {
@@@ -2317,15 -2292,15 +2292,15 @@@ int ip_route_input_rcu(struct sk_buff *
    	       u8 tos, struct net_device *dev, struct fib_result *res)
  {
    /* Multicast recognition logic is moved from route cache to here.
- 	   The problem was that too many Ethernet cards have broken/missing
- 	   hardware multicast filters :-( As result the host on multicasting
- 	   network acquires a lot of useless route cache entries, sort of
- 	   SDR messages from all the world. Now we try to get rid of them.
- 	   Really, provided software IP multicast filter is organized
- 	   reasonably (at least, hashed), it does not result in a slowdown
- 	   comparing with route cache reject entries.
- 	   Note, that multicast routers are not affected, because
- 	   route cache entry is created eventually.
+ 	 * The problem was that too many Ethernet cards have broken/missing
+ 	 * hardware multicast filters :-( As result the host on multicasting
+ 	 * network acquires a lot of useless route cache entries, sort of
+ 	 * SDR messages from all the world. Now we try to get rid of them.
+ 	 * Really, provided software IP multicast filter is organized
+ 	 * reasonably (at least, hashed), it does not result in a slowdown
+ 	 * comparing with route cache reject entries.
+ 	 * Note, that multicast routers are not affected, because
+ 	 * route cache entry is created eventually.
     */
    if (ipv4_is_multicast(daddr)) {
    	struct in_device *in_dev = __in_dev_get_rcu(dev);
@@@ -2537,11 -2512,11 +2512,11 @@@ struct rtable *ip_route_output_key_hash
    	rth = ERR_PTR(-ENETUNREACH);
/* I removed check for oif == dev_out->oif here.
- 		   It was wrong for two reasons:
- 		   1. ip_dev_find(net, saddr) can return wrong iface, if saddr
- 		      is assigned to multiple interfaces.
- 		   2. Moreover, we are allowed to send packets with saddr
- 		      of another iface. --ANK
+ 		 * It was wrong for two reasons:
+ 		 * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
+ 		 *    is assigned to multiple interfaces.
+ 		 * 2. Moreover, we are allowed to send packets with saddr
+ 		 *    of another iface. --ANK
    	 */
if (fl4->flowi4_oif == 0 &&
@@@ -2553,18 -2528,18 +2528,18 @@@
    			goto out;
/* Special hack: user can direct multicasts
- 			   and limited broadcast via necessary interface
- 			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
- 			   This hack is not just for fun, it allows
- 			   vic,vat and friends to work.
- 			   They bind socket to loopback, set ttl to zero
- 			   and expect that it will work.
- 			   From the viewpoint of routing cache they are broken,
- 			   because we are not allowed to build multicast path
- 			   with loopback source addr (look, routing cache
- 			   cannot know, that ttl is zero, so that packet
- 			   will not leave this host and route is valid).
- 			   Luckily, this hack is good workaround.
+ 			 * and limited broadcast via necessary interface
+ 			 * without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
+ 			 * This hack is not just for fun, it allows
+ 			 * vic,vat and friends to work.
+ 			 * They bind socket to loopback, set ttl to zero
+ 			 * and expect that it will work.
+ 			 * From the viewpoint of routing cache they are broken,
+ 			 * because we are not allowed to build multicast path
+ 			 * with loopback source addr (look, routing cache
+ 			 * cannot know, that ttl is zero, so that packet
+ 			 * will not leave this host and route is valid).
+ 			 * Luckily, this hack is good workaround.
    		 */
fl4->flowi4_oif = dev_out->ifindex;
@@@ -2627,21 -2602,21 +2602,21 @@@
    	    (ipv4_is_multicast(fl4->daddr) ||
    	    !netif_index_is_l3_master(net, fl4->flowi4_oif))) {
    		/* Apparently, routing tables are wrong. Assume,
- 			   that the destination is on link.
- 
- 			   WHY? DW.
- 			   Because we are allowed to send to iface
- 			   even if it has NO routes and NO assigned
- 			   addresses. When oif is specified, routing
- 			   tables are looked up with only one purpose:
- 			   to catch if destination is gatewayed, rather than
- 			   direct. Moreover, if MSG_DONTROUTE is set,
- 			   we send packet, ignoring both routing tables
- 			   and ifaddr state. --ANK
- 
- 
- 			   We could make it even if oif is unknown,
- 			   likely IPv6, but we do not.
+ 			 * that the destination is on link.
+ 			 *
+ 			 * WHY? DW.
+ 			 * Because we are allowed to send to iface
+ 			 * even if it has NO routes and NO assigned
+ 			 * addresses. When oif is specified, routing
+ 			 * tables are looked up with only one purpose:
+ 			 * to catch if destination is gatewayed, rather than
+ 			 * direct. Moreover, if MSG_DONTROUTE is set,
+ 			 * we send packet, ignoring both routing tables
+ 			 * and ifaddr state. --ANK
+ 			 *
+ 			 *
+ 			 * We could make it even if oif is unknown,
+ 			 * likely IPv6, but we do not.
    		 */
if (fl4->saddr == 0)
@@@ -2687,15 -2662,44 +2662,15 @@@ out
    return rth;
  }
-static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
 -{
 -	return NULL;
 -}
 -
 -static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
 -{
 -	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 -
 -	return mtu ? : dst->dev->mtu;
 -}
 -
 -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 -					  struct sk_buff *skb, u32 mtu,
 -					  bool confirm_neigh)
 -{
 -}
 -
 -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 -				       struct sk_buff *skb)
 -{
 -}
 -
 -static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
 -					  unsigned long old)
 -{
 -	return NULL;
 -}
 -
  static struct dst_ops ipv4_dst_blackhole_ops = {
 -	.family			=	AF_INET,
 -	.check			=	ipv4_blackhole_dst_check,
 -	.mtu			=	ipv4_blackhole_mtu,
 -	.default_advmss		=	ipv4_default_advmss,
 -	.update_pmtu		=	ipv4_rt_blackhole_update_pmtu,
 -	.redirect		=	ipv4_rt_blackhole_redirect,
 -	.cow_metrics		=	ipv4_rt_blackhole_cow_metrics,
 -	.neigh_lookup		=	ipv4_neigh_lookup,
 +	.family			= AF_INET,
 +	.default_advmss		= ipv4_default_advmss,
 +	.neigh_lookup		= ipv4_neigh_lookup,
 +	.check			= dst_blackhole_check,
 +	.cow_metrics		= dst_blackhole_cow_metrics,
 +	.update_pmtu		= dst_blackhole_update_pmtu,
 +	.redirect		= dst_blackhole_redirect,
 +	.mtu			= dst_blackhole_mtu,
  };
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
diff --combined net/ipv6/route.c
index 1056b0229ffd,60058f3dcc48..ebb7519bec2a
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -260,16 -260,34 +260,16 @@@ static struct dst_ops ip6_dst_ops_templ
    .confirm_neigh		=	ip6_confirm_neigh,
  };
-static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
 -{
 -	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
 -
 -	return mtu ? : dst->dev->mtu;
 -}
 -
 -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
 -					 struct sk_buff *skb, u32 mtu,
 -					 bool confirm_neigh)
 -{
 -}
 -
 -static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
 -				      struct sk_buff *skb)
 -{
 -}
 -
  static struct dst_ops ip6_dst_blackhole_ops = {
 -	.family			=	AF_INET6,
 -	.destroy		=	ip6_dst_destroy,
 -	.check			=	ip6_dst_check,
 -	.mtu			=	ip6_blackhole_mtu,
 -	.default_advmss		=	ip6_default_advmss,
 -	.update_pmtu		=	ip6_rt_blackhole_update_pmtu,
 -	.redirect		=	ip6_rt_blackhole_redirect,
 -	.cow_metrics		=	dst_cow_metrics_generic,
 -	.neigh_lookup		=	ip6_dst_neigh_lookup,
 +	.family			= AF_INET6,
 +	.default_advmss		= ip6_default_advmss,
 +	.neigh_lookup		= ip6_dst_neigh_lookup,
 +	.check			= ip6_dst_check,
 +	.destroy		= ip6_dst_destroy,
 +	.cow_metrics		= dst_cow_metrics_generic,
 +	.update_pmtu		= dst_blackhole_update_pmtu,
 +	.redirect		= dst_blackhole_redirect,
 +	.mtu			= dst_blackhole_mtu,
  };
static const u32 ip6_template_metrics[RTAX_MAX] = {
@@@ -2360,7 -2378,7 +2360,7 @@@ u32 rt6_multipath_hash(const struct ne
memset(&hash_keys, 0, sizeof(hash_keys));
-                         if (!flkeys) {
+ 			if (!flkeys) {
    			skb_flow_dissect_flow_keys(skb, &keys, flag);
    			flkeys = &keys;
    		}
@@@ -2500,20 -2518,20 +2500,20 @@@ struct dst_entry *ip6_route_output_flag
    				 struct flowi6 *fl6,
    				 int flags)
  {
-         struct dst_entry *dst;
-         struct rt6_info *rt6;
+ 	struct dst_entry *dst;
+ 	struct rt6_info *rt6;
-         rcu_read_lock();
-         dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
-         rt6 = (struct rt6_info *)dst;
-         /* For dst cached in uncached_list, refcnt is already taken. */
-         if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
-                 dst = &net->ipv6.ip6_null_entry->dst;
-                 dst_hold(dst);
-         }
-         rcu_read_unlock();
+ 	rcu_read_lock();
+ 	dst = ip6_route_output_flags_noref(net, sk, fl6, flags);
+ 	rt6 = (struct rt6_info *)dst;
+ 	/* For dst cached in uncached_list, refcnt is already taken. */
+ 	if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) {
+ 		dst = &net->ipv6.ip6_null_entry->dst;
+ 		dst_hold(dst);
+ 	}
+ 	rcu_read_unlock();
-         return dst;
+ 	return dst;
  }
  EXPORT_SYMBOL_GPL(ip6_route_output_flags);
diff --combined net/mptcp/options.c
index 89a4225ed321,5fabf3e9a38d..2b7eec93c9f5
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@@ -26,6 -26,7 +26,7 @@@ static void mptcp_parse_option(const st
    int expected_opsize;
    u8 version;
    u8 flags;
+ 	u8 i;
switch (subtype) {
    case MPTCPOPT_MP_CAPABLE:
@@@ -272,14 -273,17 +273,17 @@@
    	break;
case MPTCPOPT_RM_ADDR:
- 		if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE)
+ 		if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 ||
+ 		    opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX)
    		break;
ptr++;
mp_opt->rm_addr = 1;
- 		mp_opt->rm_id = *ptr++;
- 		pr_debug("RM_ADDR: id=%d", mp_opt->rm_id);
+ 		mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE;
+ 		for (i = 0; i < mp_opt->rm_list.nr; i++)
+ 			mp_opt->rm_list.ids[i] = *ptr++;
+ 		pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr);
    	break;
case MPTCPOPT_MP_PRIO:
@@@ -567,15 -571,15 +571,15 @@@ static bool mptcp_established_options_d
  }
static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
 -				  struct in_addr *addr)
 +				  struct in_addr *addr, u16 port)
  {
    u8 hmac[SHA256_DIGEST_SIZE];
    u8 msg[7];
msg[0] = addr_id;
    memcpy(&msg[1], &addr->s_addr, 4);
 -	msg[5] = 0;
 -	msg[6] = 0;
 +	msg[5] = port >> 8;
 +	msg[6] = port & 0xFF;
mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
@@@ -584,15 -588,15 +588,15 @@@
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
  static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
 -				   struct in6_addr *addr)
 +				   struct in6_addr *addr, u16 port)
  {
    u8 hmac[SHA256_DIGEST_SIZE];
    u8 msg[19];
msg[0] = addr_id;
    memcpy(&msg[1], &addr->s6_addr, 16);
 -	msg[17] = 0;
 -	msg[18] = 0;
 +	msg[17] = port >> 8;
 +	msg[18] = port & 0xFF;
mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
@@@ -646,8 -650,7 +650,8 @@@ static bool mptcp_established_options_a
    		opts->ahmac = add_addr_generate_hmac(msk->local_key,
    						     msk->remote_key,
    						     opts->addr_id,
 -							     &opts->addr);
 +							     &opts->addr,
 +							     opts->port);
    	}
    }
  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@@ -658,8 -661,7 +662,8 @@@
    		opts->ahmac = add_addr6_generate_hmac(msk->local_key,
    						      msk->remote_key,
    						      opts->addr_id,
 -							      &opts->addr6);
 +							      &opts->addr6,
 +							      opts->port);
    	}
    }
  #endif
@@@ -676,20 -678,25 +680,25 @@@ static bool mptcp_established_options_r
  {
    struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
    struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- 	u8 rm_id;
+ 	struct mptcp_rm_list rm_list;
+ 	int i, len;
if (!mptcp_pm_should_rm_signal(msk) ||
- 	    !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id)))
+ 	    !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list)))
    	return false;
- 	if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE)
+ 	len = mptcp_rm_addr_len(&rm_list);
+ 	if (len < 0)
+ 		return false;
+ 	if (remaining < len)
    	return false;
- 	*size = TCPOLEN_MPTCP_RM_ADDR_BASE;
+ 	*size = len;
    opts->suboptions |= OPTION_MPTCP_RM_ADDR;
- 	opts->rm_id = rm_id;
+ 	opts->rm_list = rm_list;
- 	pr_debug("rm_id=%d", opts->rm_id);
+ 	for (i = 0; i < opts->rm_list.nr; i++)
+ 		pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
return true;
  }
@@@ -964,14 -971,12 +973,14 @@@ static bool add_addr_hmac_valid(struct 
    if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
    	hmac = add_addr_generate_hmac(msk->remote_key,
    				      msk->local_key,
 -					      mp_opt->addr_id, &mp_opt->addr);
 +					      mp_opt->addr_id, &mp_opt->addr,
 +					      mp_opt->port);
  #if IS_ENABLED(CONFIG_MPTCP_IPV6)
    else
    	hmac = add_addr6_generate_hmac(msk->remote_key,
    				       msk->local_key,
 -					       mp_opt->addr_id, &mp_opt->addr6);
 +					       mp_opt->addr_id, &mp_opt->addr6,
 +					       mp_opt->port);
  #endif
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
@@@ -1042,7 -1047,7 +1051,7 @@@ void mptcp_incoming_options(struct soc
    }
if (mp_opt.rm_addr) {
- 		mptcp_pm_rm_addr_received(msk, mp_opt.rm_id);
+ 		mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list);
    	mp_opt.rm_addr = 0;
    }
@@@ -1221,9 -1226,23 +1230,23 @@@ mp_capable_done
    }
if (OPTION_MPTCP_RM_ADDR & opts->suboptions) {
+ 		u8 i = 1;
+ 
    	*ptr++ = mptcp_option(MPTCPOPT_RM_ADDR,
- 				      TCPOLEN_MPTCP_RM_ADDR_BASE,
- 				      0, opts->rm_id);
+ 				      TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr,
+ 				      0, opts->rm_list.ids[0]);
+ 
+ 		while (i < opts->rm_list.nr) {
+ 			u8 id1, id2, id3, id4;
+ 
+ 			id1 = opts->rm_list.ids[i];
+ 			id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP;
+ 			id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP;
+ 			id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP;
+ 			put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr);
+ 			ptr += 1;
+ 			i += 4;
+ 		}
    }
if (OPTION_MPTCP_PRIO & opts->suboptions) {
diff --combined net/netfilter/nf_flow_table_core.c
index c77ba8690ed8,8ffd3f3c288c..d61bbe469761
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@@ -389,29 -389,20 +389,20 @@@ static void nf_flow_offload_work_gc(str
    queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
  }
- 
- static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
- 				__be16 port, __be16 new_port)
+ static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
+ 				 __be16 port, __be16 new_port)
  {
    struct tcphdr *tcph;
- 	if (skb_try_make_writable(skb, thoff + sizeof(*tcph)))
- 		return -1;
- 
    tcph = (void *)(skb_network_header(skb) + thoff);
    inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
- 
- 	return 0;
  }
- static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
- 				__be16 port, __be16 new_port)
+ static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
+ 				 __be16 port, __be16 new_port)
  {
    struct udphdr *udph;
- 	if (skb_try_make_writable(skb, thoff + sizeof(*udph)))
- 		return -1;
- 
    udph = (void *)(skb_network_header(skb) + thoff);
    if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
    	inet_proto_csum_replace2(&udph->check, skb, port,
@@@ -419,37 -410,28 +410,28 @@@
    	if (!udph->check)
    		udph->check = CSUM_MANGLED_0;
    }
- 
- 	return 0;
  }
- static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
- 			    u8 protocol, __be16 port, __be16 new_port)
+ static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
+ 			     u8 protocol, __be16 port, __be16 new_port)
  {
    switch (protocol) {
    case IPPROTO_TCP:
- 		if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
- 			return NF_DROP;
+ 		nf_flow_nat_port_tcp(skb, thoff, port, new_port);
    	break;
    case IPPROTO_UDP:
- 		if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
- 			return NF_DROP;
+ 		nf_flow_nat_port_udp(skb, thoff, port, new_port);
    	break;
    }
- 
- 	return 0;
  }
- int nf_flow_snat_port(const struct flow_offload *flow,
- 		      struct sk_buff *skb, unsigned int thoff,
- 		      u8 protocol, enum flow_offload_tuple_dir dir)
+ void nf_flow_snat_port(const struct flow_offload *flow,
+ 		       struct sk_buff *skb, unsigned int thoff,
+ 		       u8 protocol, enum flow_offload_tuple_dir dir)
  {
    struct flow_ports *hdr;
    __be16 port, new_port;
- 	if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
- 		return -1;
- 
    hdr = (void *)(skb_network_header(skb) + thoff);
switch (dir) {
@@@ -463,24 -445,19 +445,19 @@@
    	new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
    	hdr->dest = new_port;
    	break;
- 	default:
- 		return -1;
    }
- 	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ 	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
  }
  EXPORT_SYMBOL_GPL(nf_flow_snat_port);
- int nf_flow_dnat_port(const struct flow_offload *flow,
- 		      struct sk_buff *skb, unsigned int thoff,
- 		      u8 protocol, enum flow_offload_tuple_dir dir)
+ void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
+ 		       unsigned int thoff, u8 protocol,
+ 		       enum flow_offload_tuple_dir dir)
  {
    struct flow_ports *hdr;
    __be16 port, new_port;
- 	if (skb_try_make_writable(skb, thoff + sizeof(*hdr)))
- 		return -1;
- 
    hdr = (void *)(skb_network_header(skb) + thoff);
switch (dir) {
@@@ -494,11 -471,9 +471,9 @@@
    	new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
    	hdr->source = new_port;
    	break;
- 	default:
- 		return -1;
    }
- 	return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
+ 	nf_flow_nat_port(skb, thoff, protocol, port, new_port);
  }
  EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
@@@ -506,7 -481,7 +481,7 @@@ int nf_flow_table_init(struct nf_flowta
  {
    int err;
-	INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
 +	INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
    flow_block_init(&flowtable->flow_block);
    init_rwsem(&flowtable->flow_block_lock);
diff --combined net/netfilter/nf_tables_api.c
index f57f1a6ba96f,bd5e8122ea5e..fc2526b8bd55
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@@ -900,6 -900,12 +900,12 @@@ static void nf_tables_table_disable(str
    nft_table_disable(net, table, 0);
  }
+ enum {
+ 	NFT_TABLE_STATE_UNCHANGED	= 0,
+ 	NFT_TABLE_STATE_DORMANT,
+ 	NFT_TABLE_STATE_WAKEUP
+ };
+ 
  static int nf_tables_updtable(struct nft_ctx *ctx)
  {
    struct nft_trans *trans;
@@@ -929,19 -935,17 +935,17 @@@
if ((flags & NFT_TABLE_F_DORMANT) &&
        !(ctx->table->flags & NFT_TABLE_F_DORMANT)) {
- 		nft_trans_table_enable(trans) = false;
+ 		nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT;
    } else if (!(flags & NFT_TABLE_F_DORMANT) &&
    	   ctx->table->flags & NFT_TABLE_F_DORMANT) {
- 		ctx->table->flags &= ~NFT_TABLE_F_DORMANT;
    	ret = nf_tables_table_enable(ctx->net, ctx->table);
    	if (ret >= 0)
- 			nft_trans_table_enable(trans) = true;
- 		else
- 			ctx->table->flags |= NFT_TABLE_F_DORMANT;
+ 			nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP;
    }
    if (ret < 0)
    	goto err;
+ 	nft_trans_table_flags(trans) = flags;
    nft_trans_table_update(trans) = true;
    list_add_tail(&trans->list, &ctx->net->nft.commit_list);
    return 0;
@@@ -6783,9 -6787,6 +6787,9 @@@ static int nft_register_flowtable_net_h
list_for_each_entry(hook, hook_list, list) {
    	list_for_each_entry(ft, &table->flowtables, list) {
 +			if (!nft_is_active_next(net, ft))
 +				continue;
 +
    		list_for_each_entry(hook2, &ft->hook_list, list) {
    			if (hook->ops.dev == hook2->ops.dev &&
    			    hook->ops.pf == hook2->ops.pf) {
@@@ -6845,7 -6846,6 +6849,7 @@@ static int nft_flowtable_update(struct 
    struct nft_hook *hook, *next;
    struct nft_trans *trans;
    bool unregister = false;
 +	u32 flags;
    int err;
err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
@@@ -6860,17 -6860,6 +6864,17 @@@
    	}
    }
+	if (nla[NFTA_FLOWTABLE_FLAGS]) {
 +		flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
 +		if (flags & ~NFT_FLOWTABLE_MASK)
 +			return -EOPNOTSUPP;
 +		if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
 +		    (flags & NFT_FLOWTABLE_HW_OFFLOAD))
 +			return -EOPNOTSUPP;
 +	} else {
 +		flags = flowtable->data.flags;
 +	}
 +
    err = nft_register_flowtable_net_hooks(ctx->net, ctx->table,
    				       &flowtable_hook.list, flowtable);
    if (err < 0)
@@@ -6884,7 -6873,6 +6888,7 @@@
    	goto err_flowtable_update_hook;
    }
+	nft_trans_flowtable_flags(trans) = flags;
    nft_trans_flowtable(trans) = flowtable;
    nft_trans_flowtable_update(trans) = true;
    INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
@@@ -6979,10 -6967,8 +6983,10 @@@ static int nf_tables_newflowtable(struc
    if (nla[NFTA_FLOWTABLE_FLAGS]) {
    	flowtable->data.flags =
    		ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
 -		if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK)
 +		if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) {
 +			err = -EOPNOTSUPP;
    		goto err3;
 +		}
    }
write_pnet(&flowtable->data.net, net);
@@@ -8086,11 -8072,10 +8090,10 @@@ static int nf_tables_commit(struct net 
    	switch (trans->msg_type) {
    	case NFT_MSG_NEWTABLE:
    		if (nft_trans_table_update(trans)) {
- 				if (!nft_trans_table_enable(trans)) {
- 					nf_tables_table_disable(net,
- 								trans->ctx.table);
- 					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
- 				}
+ 				if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT)
+ 					nf_tables_table_disable(net, trans->ctx.table);
+ 
+ 				trans->ctx.table->flags = nft_trans_table_flags(trans);
    		} else {
    			nft_clear(net, trans->ctx.table);
    		}
@@@ -8194,8 -8179,6 +8197,8 @@@
    		break;
    	case NFT_MSG_NEWFLOWTABLE:
    		if (nft_trans_flowtable_update(trans)) {
 +				nft_trans_flowtable(trans)->data.flags =
 +					nft_trans_flowtable_flags(trans);
    			nf_tables_flowtable_notify(&trans->ctx,
    						   nft_trans_flowtable(trans),
    						   &nft_trans_flowtable_hooks(trans),
@@@ -8303,11 -8286,9 +8306,9 @@@ static int __nf_tables_abort(struct ne
    	switch (trans->msg_type) {
    	case NFT_MSG_NEWTABLE:
    		if (nft_trans_table_update(trans)) {
- 				if (nft_trans_table_enable(trans)) {
- 					nf_tables_table_disable(net,
- 								trans->ctx.table);
- 					trans->ctx.table->flags |= NFT_TABLE_F_DORMANT;
- 				}
+ 				if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP)
+ 					nf_tables_table_disable(net, trans->ctx.table);
+ 
    			nft_trans_destroy(trans);
    		} else {
    			list_del_rcu(&trans->ctx.table->list);
@@@ -8577,6 -8558,7 +8578,7 @@@ static int nf_tables_check_loops(const 
    						data->verdict.chain);
    			if (err < 0)
    				return err;
+ 				break;
    		default:
    			break;
    		}
diff --combined net/sched/cls_api.c
index 13341e7fb077,ca8e177bf31b..d3db70865d66
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@@ -1629,7 -1629,6 +1629,7 @@@ int tcf_classify_ingress(struct sk_buf
    		return TC_ACT_SHOT;
    	ext->chain = last_executed_chain;
    	ext->mru = qdisc_skb_cb(skb)->mru;
 +		ext->post_ct = qdisc_skb_cb(skb)->post_ct;
    }
return ret;
@@@ -3662,6 -3661,9 +3662,9 @@@ int tc_setup_flow_action(struct flow_ac
    		entry->police.burst = tcf_police_burst(act);
    		entry->police.rate_bytes_ps =
    			tcf_police_rate_bytes_ps(act);
+ 			entry->police.burst_pkt = tcf_police_burst_pkt(act);
+ 			entry->police.rate_pkt_ps =
+ 				tcf_police_rate_pkt_ps(act);
    		entry->police.mtu = tcf_police_tcfp_mtu(act);
    		entry->police.index = act->tcfa_index;
    	} else if (is_tcf_ct(act)) {
diff --combined net/sched/cls_flower.c
index c69a4ba9c33f,9736df97e04d..d7869a984881
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@@ -209,16 -209,16 +209,16 @@@ static bool fl_range_port_dst_cmp(struc
    			  struct fl_flow_key *key,
    			  struct fl_flow_key *mkey)
  {
- 	__be16 min_mask, max_mask, min_val, max_val;
+ 	u16 min_mask, max_mask, min_val, max_val;
- 	min_mask = htons(filter->mask->key.tp_range.tp_min.dst);
- 	max_mask = htons(filter->mask->key.tp_range.tp_max.dst);
- 	min_val = htons(filter->key.tp_range.tp_min.dst);
- 	max_val = htons(filter->key.tp_range.tp_max.dst);
+ 	min_mask = ntohs(filter->mask->key.tp_range.tp_min.dst);
+ 	max_mask = ntohs(filter->mask->key.tp_range.tp_max.dst);
+ 	min_val = ntohs(filter->key.tp_range.tp_min.dst);
+ 	max_val = ntohs(filter->key.tp_range.tp_max.dst);
if (min_mask && max_mask) {
- 		if (htons(key->tp_range.tp.dst) < min_val ||
- 		    htons(key->tp_range.tp.dst) > max_val)
+ 		if (ntohs(key->tp_range.tp.dst) < min_val ||
+ 		    ntohs(key->tp_range.tp.dst) > max_val)
    		return false;
/* skb does not have min and max values */
@@@ -232,16 -232,16 +232,16 @@@ static bool fl_range_port_src_cmp(struc
    			  struct fl_flow_key *key,
    			  struct fl_flow_key *mkey)
  {
- 	__be16 min_mask, max_mask, min_val, max_val;
+ 	u16 min_mask, max_mask, min_val, max_val;
- 	min_mask = htons(filter->mask->key.tp_range.tp_min.src);
- 	max_mask = htons(filter->mask->key.tp_range.tp_max.src);
- 	min_val = htons(filter->key.tp_range.tp_min.src);
- 	max_val = htons(filter->key.tp_range.tp_max.src);
+ 	min_mask = ntohs(filter->mask->key.tp_range.tp_min.src);
+ 	max_mask = ntohs(filter->mask->key.tp_range.tp_max.src);
+ 	min_val = ntohs(filter->key.tp_range.tp_min.src);
+ 	max_val = ntohs(filter->key.tp_range.tp_max.src);
if (min_mask && max_mask) {
- 		if (htons(key->tp_range.tp.src) < min_val ||
- 		    htons(key->tp_range.tp.src) > max_val)
+ 		if (ntohs(key->tp_range.tp.src) < min_val ||
+ 		    ntohs(key->tp_range.tp.src) > max_val)
    		return false;
/* skb does not have min and max values */
@@@ -783,16 -783,16 +783,16 @@@ static int fl_set_key_port_range(struc
    	       TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst &&
- 	    htons(key->tp_range.tp_max.dst) <=
- 	    htons(key->tp_range.tp_min.dst)) {
+ 	    ntohs(key->tp_range.tp_max.dst) <=
+ 	    ntohs(key->tp_range.tp_min.dst)) {
    	NL_SET_ERR_MSG_ATTR(extack,
    			    tb[TCA_FLOWER_KEY_PORT_DST_MIN],
    			    "Invalid destination port range (min must be strictly smaller than max)");
    	return -EINVAL;
    }
    if (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src &&
- 	    htons(key->tp_range.tp_max.src) <=
- 	    htons(key->tp_range.tp_min.src)) {
+ 	    ntohs(key->tp_range.tp_max.src) <=
+ 	    ntohs(key->tp_range.tp_min.src)) {
    	NL_SET_ERR_MSG_ATTR(extack,
    			    tb[TCA_FLOWER_KEY_PORT_SRC_MIN],
    			    "Invalid source port range (min must be strictly smaller than max)");
@@@ -1044,8 -1044,8 +1044,8 @@@ static int fl_set_key_flags(struct nlat
    	return -EINVAL;
    }
- 	key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS]));
- 	mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
+ 	key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS]));
+ 	mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
*flags_key  = 0;
    *flags_mask = 0;
@@@ -1451,7 -1451,7 +1451,7 @@@ static int fl_set_key_ct(struct nlattr 
    		       &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
    		       sizeof(key->ct_state));
-		err = fl_validate_ct_state(mask->ct_state,
 +		err = fl_validate_ct_state(key->ct_state & mask->ct_state,
    				   tb[TCA_FLOWER_KEY_CT_STATE_MASK],
    				   extack);
    	if (err)
diff --combined net/tipc/node.c
index 136338b85504,0daf3be11ed1..61c38eaaa298
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@@ -372,42 -372,49 +372,49 @@@ static struct tipc_node *tipc_node_find
  }
static void tipc_node_read_lock(struct tipc_node *n)
+ 	__acquires(n->lock)
  {
    read_lock_bh(&n->lock);
  }
static void tipc_node_read_unlock(struct tipc_node *n)
+ 	__releases(n->lock)
  {
    read_unlock_bh(&n->lock);
  }
static void tipc_node_write_lock(struct tipc_node *n)
+ 	__acquires(n->lock)
  {
    write_lock_bh(&n->lock);
  }
static void tipc_node_write_unlock_fast(struct tipc_node *n)
+ 	__releases(n->lock)
  {
    write_unlock_bh(&n->lock);
  }
static void tipc_node_write_unlock(struct tipc_node *n)
+ 	__releases(n->lock)
  {
+ 	struct tipc_socket_addr sk;
    struct net *net = n->net;
- 	u32 addr = 0;
    u32 flags = n->action_flags;
- 	u32 link_id = 0;
- 	u32 bearer_id;
    struct list_head *publ_list;
+ 	struct tipc_uaddr ua;
+ 	u32 bearer_id;
if (likely(!flags)) {
    	write_unlock_bh(&n->lock);
    	return;
    }
- 	addr = n->addr;
- 	link_id = n->link_id;
- 	bearer_id = link_id & 0xffff;
+ 	tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE,
+ 		   TIPC_LINK_STATE, n->addr, n->addr);
+ 	sk.ref = n->link_id;
+ 	sk.node = n->addr;
+ 	bearer_id = n->link_id & 0xffff;
    publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP |
@@@ -416,20 -423,18 +423,18 @@@
    write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN)
- 		tipc_publ_notify(net, publ_list, addr, n->capabilities);
+ 		tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP)
- 		tipc_named_node_up(net, addr, n->capabilities);
+ 		tipc_named_node_up(net, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) {
- 		tipc_mon_peer_up(net, addr, bearer_id);
- 		tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr,
- 				     TIPC_NODE_SCOPE, link_id, link_id);
+ 		tipc_mon_peer_up(net, n->addr, bearer_id);
+ 		tipc_nametbl_publish(net, &ua, &sk, n->link_id);
    }
    if (flags & TIPC_NOTIFY_LINK_DOWN) {
- 		tipc_mon_peer_down(net, addr, bearer_id);
- 		tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr,
- 				      addr, link_id);
+ 		tipc_mon_peer_down(net, n->addr, bearer_id);
+ 		tipc_nametbl_withdraw(net, &ua, &sk, n->link_id);
    }
  }
@@@ -2895,22 -2900,17 +2900,22 @@@ int tipc_nl_node_dump_monitor_peer(stru
#ifdef CONFIG_TIPC_CRYPTO
  static int tipc_nl_retrieve_key(struct nlattr **attrs,
 -				struct tipc_aead_key **key)
 +				struct tipc_aead_key **pkey)
  {
    struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY];
 +	struct tipc_aead_key *key;
if (!attr)
    	return -ENODATA;
-	*key = (struct tipc_aead_key *)nla_data(attr);
 -	if (nla_len(attr) < tipc_aead_key_size(*key))
 +	if (nla_len(attr) < sizeof(*key))
 +		return -EINVAL;
 +	key = (struct tipc_aead_key *)nla_data(attr);
 +	if (key->keylen > TIPC_AEAD_KEYLEN_MAX ||
 +	    nla_len(attr) < tipc_aead_key_size(key))
    	return -EINVAL;
+	*pkey = key;
    return 0;
  }
diff --combined tools/lib/bpf/Makefile
index e9eb6a6e80d2,8170f88e8ea6..87b14b74d62f
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@@ -158,7 -158,7 +158,7 @@@ $(BPF_IN_STATIC): force $(BPF_HELPER_DE
    $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
- 	$(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
+ 	$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
    	--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
@@@ -215,7 -215,7 +215,7 @@@ define do_instal
    if [ ! -d '$(DESTDIR_SQ)$2' ]; then		\
    	$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2';	\
    fi;						\
 -	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
 +	$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
  endef
install_lib: all_cmd
diff --combined tools/lib/bpf/btf_dump.c
index 0911aea4cdbe,5e957fcceee6..7b53a484860f
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@@ -279,6 -279,7 +279,7 @@@ static int btf_dump_mark_referenced(str
    	case BTF_KIND_INT:
    	case BTF_KIND_ENUM:
    	case BTF_KIND_FWD:
+ 		case BTF_KIND_FLOAT:
    		break;
case BTF_KIND_VOLATILE:
@@@ -453,6 -454,7 +454,7 @@@ static int btf_dump_order_type(struct b
switch (btf_kind(t)) {
    case BTF_KIND_INT:
+ 	case BTF_KIND_FLOAT:
    	tstate->order_state = ORDERED;
    	return 0;
@@@ -462,7 -464,7 +464,7 @@@
    	return err;
case BTF_KIND_ARRAY:
 -		return btf_dump_order_type(d, btf_array(t)->type, through_ptr);
 +		return btf_dump_order_type(d, btf_array(t)->type, false);
case BTF_KIND_STRUCT:
    case BTF_KIND_UNION: {
@@@ -1133,6 -1135,7 +1135,7 @@@ skip_mod
    	case BTF_KIND_STRUCT:
    	case BTF_KIND_UNION:
    	case BTF_KIND_TYPEDEF:
+ 		case BTF_KIND_FLOAT:
    		goto done;
    	default:
    		pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
@@@ -1247,6 -1250,7 +1250,7 @@@ static void btf_dump_emit_type_chain(st
switch (kind) {
    	case BTF_KIND_INT:
+ 		case BTF_KIND_FLOAT:
    		btf_dump_emit_mods(d, decls);
    		name = btf_name_of(d, t->name_off);
    		btf_dump_printf(d, "%s", name);
diff --combined tools/lib/bpf/libbpf.c
index 4181d178ee7b,2f351d3ad3e7..8d610259f4be
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@@ -178,6 -178,8 +178,8 @@@ enum kern_feature_id 
    FEAT_PROG_BIND_MAP,
    /* Kernel support for module BTFs */
    FEAT_MODULE_BTF,
+ 	/* BTF_KIND_FLOAT support */
+ 	FEAT_BTF_FLOAT,
    __FEAT_CNT,
  };
@@@ -188,6 -190,7 +190,7 @@@ enum reloc_type 
    RELO_CALL,
    RELO_DATA,
    RELO_EXTERN,
+ 	RELO_SUBPROG_ADDR,
  };
struct reloc_desc {
@@@ -574,6 -577,16 +577,16 @@@ static bool insn_is_subprog_call(const 
           insn->off == 0;
  }
+ static bool is_ldimm64(struct bpf_insn *insn)
+ {
+ 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+ }
+ 
+ static bool insn_is_pseudo_func(struct bpf_insn *insn)
+ {
+ 	return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+ }
+ 
  static int
  bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
    	      const char *name, size_t sec_idx, const char *sec_name,
@@@ -1181,8 -1194,7 +1194,8 @@@ static int bpf_object__elf_init(struct 
    if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
    	pr_warn("elf: failed to get section names strings from %s: %s\n",
    		obj->path, elf_errmsg(-1));
 -		return -LIBBPF_ERRNO__FORMAT;
 +		err = -LIBBPF_ERRNO__FORMAT;
 +		goto errout;
    }
/* Old LLVM set e_machine to EM_NONE */
@@@ -1936,6 -1948,7 +1949,7 @@@ static const char *btf_kind_str(const s
    case BTF_KIND_FUNC_PROTO: return "func_proto";
    case BTF_KIND_VAR: return "var";
    case BTF_KIND_DATASEC: return "datasec";
+ 	case BTF_KIND_FLOAT: return "float";
    default: return "unknown";
    }
  }
@@@ -2385,15 -2398,17 +2399,17 @@@ static bool btf_needs_sanitization(stru
  {
    bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
    bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ 	bool has_float = kernel_supports(FEAT_BTF_FLOAT);
    bool has_func = kernel_supports(FEAT_BTF_FUNC);
- 	return !has_func || !has_datasec || !has_func_global;
+ 	return !has_func || !has_datasec || !has_func_global || !has_float;
  }
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
  {
    bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
    bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ 	bool has_float = kernel_supports(FEAT_BTF_FLOAT);
    bool has_func = kernel_supports(FEAT_BTF_FUNC);
    struct btf_type *t;
    int i, j, vlen;
@@@ -2446,6 -2461,13 +2462,13 @@@
    	} else if (!has_func_global && btf_is_func(t)) {
    		/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
    		t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
+ 		} else if (!has_float && btf_is_float(t)) {
+ 			/* replace FLOAT with an equally-sized empty STRUCT;
+ 			 * since C compilers do not accept e.g. "float" as a
+ 			 * valid struct name, make it anonymous
+ 			 */
+ 			t->name_off = 0;
+ 			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
    	}
    }
  }
@@@ -2975,6 -2997,23 +2998,23 @@@ static bool sym_is_extern(const GElf_Sy
           GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
  }
+ static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
+ {
+ 	int bind = GELF_ST_BIND(sym->st_info);
+ 	int type = GELF_ST_TYPE(sym->st_info);
+ 
+ 	/* in .text section */
+ 	if (sym->st_shndx != text_shndx)
+ 		return false;
+ 
+ 	/* local function */
+ 	if (bind == STB_LOCAL && type == STT_SECTION)
+ 		return true;
+ 
+ 	/* global function */
+ 	return bind == STB_GLOBAL && type == STT_FUNC;
+ }
+ 
  static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
  {
    const struct btf_type *t;
@@@ -3396,7 -3435,7 +3436,7 @@@ static int bpf_program__record_reloc(st
    	return 0;
    }
- 	if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ 	if (!is_ldimm64(insn)) {
    	pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
    		prog->name, sym_name, insn_idx, insn->code);
    	return -LIBBPF_ERRNO__RELOC;
@@@ -3431,6 -3470,23 +3471,23 @@@
    	return -LIBBPF_ERRNO__RELOC;
    }
+ 	/* loading subprog addresses */
+ 	if (sym_is_subprog(sym, obj->efile.text_shndx)) {
+ 		/* global_func: sym->st_value = offset in the section, insn->imm = 0.
+ 		 * local_func: sym->st_value = 0, insn->imm = offset in the section.
+ 		 */
+ 		if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
+ 			pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
+ 				prog->name, sym_name, (size_t)sym->st_value, insn->imm);
+ 			return -LIBBPF_ERRNO__RELOC;
+ 		}
+ 
+ 		reloc_desc->type = RELO_SUBPROG_ADDR;
+ 		reloc_desc->insn_idx = insn_idx;
+ 		reloc_desc->sym_off = sym->st_value;
+ 		return 0;
+ 	}
+ 
    type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
    sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
@@@ -3883,6 -3939,18 +3940,18 @@@ static int probe_kern_btf_datasec(void
    				     strs, sizeof(strs)));
  }
+ static int probe_kern_btf_float(void)
+ {
+ 	static const char strs[] = "\0float";
+ 	__u32 types[] = {
+ 		/* float */
+ 		BTF_TYPE_FLOAT_ENC(1, 4),
+ 	};
+ 
+ 	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ 					     strs, sizeof(strs)));
+ }
+ 
  static int probe_kern_array_mmap(void)
  {
    struct bpf_create_map_attr attr = {
@@@ -4062,6 -4130,9 +4131,9 @@@ static struct kern_feature_desc 
    [FEAT_MODULE_BTF] = {
    	"module BTF support", probe_module_btf,
    },
+ 	[FEAT_BTF_FLOAT] = {
+ 		"BTF_KIND_FLOAT support", probe_kern_btf_float,
+ 	},
  };
static bool kernel_supports(enum kern_feature_id feat_id)
@@@ -5567,11 -5638,6 +5639,6 @@@ static void bpf_core_poison_insn(struc
    insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
  }
- static bool is_ldimm64(struct bpf_insn *insn)
- {
- 	return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
- }
- 
  static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
  {
    switch (BPF_SIZE(insn->code)) {
@@@ -6173,6 -6239,10 +6240,10 @@@ bpf_object__relocate_data(struct bpf_ob
    		}
    		relo->processed = true;
    		break;
+ 		case RELO_SUBPROG_ADDR:
+ 			insn[0].src_reg = BPF_PSEUDO_FUNC;
+ 			/* will be handled as a follow up pass */
+ 			break;
    	case RELO_CALL:
    		/* will be handled as a follow up pass */
    		break;
@@@ -6359,11 -6429,11 +6430,11 @@@ bpf_object__reloc_code(struct bpf_objec
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
    	insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
- 		if (!insn_is_subprog_call(insn))
+ 		if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
    		continue;
relo = find_prog_insn_relo(prog, insn_idx);
- 		if (relo && relo->type != RELO_CALL) {
+ 		if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
    		pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
    			prog->name, insn_idx, relo->type);
    		return -LIBBPF_ERRNO__RELOC;
@@@ -6375,8 -6445,22 +6446,22 @@@
    		 * call always has imm = -1, but for static functions
    		 * relocation is against STT_SECTION and insn->imm
    		 * points to a start of a static function
+ 			 *
+ 			 * for subprog addr relocation, the relo->sym_off + insn->imm is
+ 			 * the byte offset in the corresponding section.
    		 */
- 			sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ 			if (relo->type == RELO_CALL)
+ 				sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ 			else
+ 				sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
+ 		} else if (insn_is_pseudo_func(insn)) {
+ 			/*
+ 			 * RELO_SUBPROG_ADDR relo is always emitted even if both
+ 			 * functions are in the same section, so it shouldn't reach here.
+ 			 */
+ 			pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
+ 				prog->name, insn_idx);
+ 			return -LIBBPF_ERRNO__RELOC;
    	} else {
    		/* if subprogram call is to a static function within
    		 * the same ELF section, there won't be any relocation
diff --combined tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 3ac0c9afc35a,12b40dc81e14..8aaa24a00322
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@@ -174,12 -174,6 +174,12 @@@ struct struct_in_struct 
    };
  };
+struct struct_in_array {};
 +
 +struct struct_in_array_typed {};
 +
 +typedef struct struct_in_array_typed struct_in_array_t[2];
 +
  struct struct_with_embedded_stuff {
    int a;
    struct {
@@@ -209,10 -203,14 +209,16 @@@
    } r[5];
    struct struct_in_struct s[10];
    int t[11];
 +	struct struct_in_array (*u)[2];
 +	struct_in_array_t *v;
  };
+ struct float_struct {
+ 	float f;
+ 	const double *d;
+ 	volatile long double *ld;
+ };
+ 
  struct root_struct {
    enum e1 _1;
    enum e2 _2;
@@@ -227,6 -225,7 +233,7 @@@
    union_fwd_t *_12;
    union_fwd_ptr_t _13;
    struct struct_with_embedded_stuff _14;
+ 	struct float_struct _15;
  };
/* ------ END-EXPECTED-OUTPUT ------ */
diff --combined tools/testing/selftests/net/mptcp/mptcp_join.sh
index ad32240fbfda,191303b652a6..fe990d8696a9
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@@ -11,7 -11,6 +11,7 @@@ ksft_skip=
  timeout=30
  mptcp_connect=""
  capture=0
 +do_all_tests=1
TEST_COUNT=0
@@@ -122,6 -121,12 +122,6 @@@ reset_with_add_addr_timeout(
    	-j DROP
  }
-for arg in "$@"; do
 -	if [ "$arg" = "-c" ]; then
 -		capture=1
 -	fi
 -done
 -
  ip -Version > /dev/null 2>&1
  if [ $? -ne 0 ];then
    echo "SKIP: Could not run test without ip tool"
@@@ -279,14 -284,19 +279,19 @@@ do_transfer(
    	let rm_nr_ns1=-addr_nr_ns1
    	if [ $rm_nr_ns1 -lt 8 ]; then
    		counter=1
- 			sleep 1
- 
- 			while [ $counter -le $rm_nr_ns1 ]
- 			do
- 				ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+ 			dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
+ 			if [ ${#dump[@]} -gt 0 ]; then
+ 				id=${dump[1]}
    			sleep 1
- 				let counter+=1
- 			done
+ 
+ 				while [ $counter -le $rm_nr_ns1 ]
+ 				do
+ 					ip netns exec ${listener_ns} ./pm_nl_ctl del $id
+ 					sleep 1
+ 					let counter+=1
+ 					let id+=1
+ 				done
+ 			fi
    	else
    		sleep 1
    		ip netns exec ${listener_ns} ./pm_nl_ctl flush
@@@ -313,14 -323,19 +318,19 @@@
    	let rm_nr_ns2=-addr_nr_ns2
    	if [ $rm_nr_ns2 -lt 8 ]; then
    		counter=1
- 			sleep 1
- 
- 			while [ $counter -le $rm_nr_ns2 ]
- 			do
- 				ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+ 			dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
+ 			if [ ${#dump[@]} -gt 0 ]; then
+ 				id=${dump[1]}
    			sleep 1
- 				let counter+=1
- 			done
+ 
+ 				while [ $counter -le $rm_nr_ns2 ]
+ 				do
+ 					ip netns exec ${connector_ns} ./pm_nl_ctl del $id
+ 					sleep 1
+ 					let counter+=1
+ 					let id+=1
+ 				done
+ 			fi
    	else
    		sleep 1
    		ip netns exec ${connector_ns} ./pm_nl_ctl flush
@@@ -605,11 -620,22 +615,22 @@@ chk_rm_nr(
  {
    local rm_addr_nr=$1
    local rm_subflow_nr=$2
+ 	local invert=${3:-""}
    local count
    local dump_stats
+ 	local addr_ns
+ 	local subflow_ns
+ 
+ 	if [ -z $invert ]; then
+ 		addr_ns=$ns1
+ 		subflow_ns=$ns2
+ 	elif [ $invert = "invert" ]; then
+ 		addr_ns=$ns2
+ 		subflow_ns=$ns1
+ 	fi
printf "%-39s %s" " " "rm "
- 	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+ 	count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
    [ -z "$count" ] && count=0
    if [ "$count" != "$rm_addr_nr" ]; then
    	echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
@@@ -620,7 -646,7 +641,7 @@@
    fi
echo -n " - sf    "
- 	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+ 	count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
    [ -z "$count" ] && count=0
    if [ "$count" != "$rm_subflow_nr" ]; then
    	echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
@@@ -828,7 -854,7 +849,7 @@@ remove_tests(
    run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
    chk_join_nr "remove single address" 1 1 1
    chk_add_nr 1 1
- 	chk_rm_nr 0 0
+ 	chk_rm_nr 1 1 invert
# subflow and signal, remove
    reset
@@@ -864,6 -890,29 +885,29 @@@
    chk_join_nr "flush subflows and signal" 3 3 3
    chk_add_nr 1 1
    chk_rm_nr 2 2
+ 
+ 	# subflows flush
+ 	reset
+ 	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ 	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150
+ 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ 	chk_join_nr "flush subflows" 3 3 3
+ 	chk_rm_nr 3 3
+ 
+ 	# addresses flush
+ 	reset
+ 	ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ 	ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
+ 	ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ 	ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ 	ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ 	run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ 	chk_join_nr "flush addresses" 3 3 3
+ 	chk_add_nr 3 3
+ 	chk_rm_nr 3 3 invert
  }
add_tests()
@@@ -940,7 -989,7 +984,7 @@@ ipv6_tests(
    run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
    chk_join_nr "remove single address IPv6" 1 1 1
    chk_add_nr 1 1
- 	chk_rm_nr 0 0
+ 	chk_rm_nr 1 1 invert
# subflow and signal IPv6, remove
    reset
@@@ -1083,7 -1132,7 +1127,7 @@@ add_addr_ports_tests(
    run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
    chk_join_nr "remove single address with port" 1 1 1
    chk_add_nr 1 1 1
- 	chk_rm_nr 0 0
+ 	chk_rm_nr 1 1 invert
# subflow and signal with port, remove
    reset
@@@ -1216,8 -1265,7 +1260,8 @@@ usage(
    echo "  -4 v4mapped_tests"
    echo "  -b backup_tests"
    echo "  -p add_addr_ports_tests"
 -	echo "  -c syncookies_tests"
 +	echo "  -k syncookies_tests"
 +	echo "  -c capture pcap files"
    echo "  -h help"
  }
@@@ -1231,24 -1279,12 +1275,24 @@@ make_file "$cin" "client" 
  make_file "$sin" "server" 1
  trap cleanup EXIT
-if [ -z $1 ]; then
 +for arg in "$@"; do
 +	# check for "capture" arg before launching tests
 +	if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
 +		capture=1
 +	fi
 +
 +	# exception for the capture option, the rest means: a part of the tests
 +	if [ "${arg}" != "-c" ]; then
 +		do_all_tests=0
 +	fi
 +done
 +
 +if [ $do_all_tests -eq 1 ]; then
    all_tests
    exit $ret
  fi
-while getopts 'fsltra64bpch' opt; do
 +while getopts 'fsltra64bpkch' opt; do
    case $opt in
    	f)
    		subflows_tests
@@@ -1280,11 -1316,9 +1324,11 @@@
    	p)
    		add_addr_ports_tests
    		;;
 -		c)
 +		k)
    		syncookies_tests
    		;;
 +		c)
 +			;;
    	h | *)
    		usage
    		;;
-- 
LinuxNextTracking