The following commit has been merged in the master branch: commit 0efb8a790ef4f4ef2fc3d1c265ed125a386fcfd1 Merge: 6132ececa50042edad15794eb5cb7a2e6831e98b 4c94fe88cde4bb5c8e1baa01106c4e6db1c75738 Author: Stephen Rothwell sfr@canb.auug.org.au Date: Wed Mar 24 12:21:24 2021 +1100
Merge remote-tracking branch 'net-next/master'
# Conflicts: # drivers/net/ethernet/mellanox/mlx5/core/en_tc.c # drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c # drivers/net/ethernet/pensando/ionic/ionic_txrx.c # kernel/bpf/verifier.c
diff --combined MAINTAINERS index 4941a9ba6fc3,ad214621655f..97085cb1828f --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -261,8 -261,8 +261,8 @@@ ABI/AP L: linux-api@vger.kernel.org F: include/linux/syscalls.h F: kernel/sys_ni.c -F: include/uapi/ -F: arch/*/include/uapi/ +X: include/uapi/ +X: arch/*/include/uapi/
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER M: Hans de Goede hdegoede@redhat.com @@@ -1181,7 -1181,7 +1181,7 @@@ M: Joel Fernandes <joel@joelfernandes.o M: Christian Brauner christian@brauner.io M: Hridya Valsaraju hridya@google.com M: Suren Baghdasaryan surenb@google.com -L: devel@driverdev.osuosl.org +L: linux-kernel@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git F: drivers/android/ @@@ -1530,6 -1530,7 +1530,7 @@@ F: Documentation/devicetree/bindings/dm F: Documentation/devicetree/bindings/i2c/i2c-owl.yaml F: Documentation/devicetree/bindings/interrupt-controller/actions,owl-sirq.yaml F: Documentation/devicetree/bindings/mmc/owl-mmc.yaml + F: Documentation/devicetree/bindings/net/actions,owl-emac.yaml F: Documentation/devicetree/bindings/pinctrl/actions,* F: Documentation/devicetree/bindings/power/actions,owl-sps.txt F: Documentation/devicetree/bindings/timer/actions,owl-timer.txt @@@ -1542,6 -1543,7 +1543,7 @@@ F: drivers/dma/owl-dma. F: drivers/i2c/busses/i2c-owl.c F: drivers/irqchip/irq-owl-sirq.c F: drivers/mmc/host/owl-mmc.c + F: drivers/net/ethernet/actions/ F: drivers/pinctrl/actions/* F: drivers/soc/actions/ F: include/dt-bindings/power/owl-* @@@ -2375,7 -2377,7 +2377,7 @@@ F: sound/soc/rockchip N: rockchip
ARM/SAMSUNG S3C, S5P AND EXYNOS ARM ARCHITECTURES -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org S: Maintained @@@ -2489,7 -2491,7 +2491,7 @@@ N: sc27x N: sc2731
ARM/STI ARCHITECTURE -M: Patrice Chotard patrice.chotard@st.com +M: Patrice Chotard patrice.chotard@foss.st.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained W: http://www.stlinux.com @@@ -2522,7 -2524,7 +2524,7 @@@ F: include/linux/remoteproc/st_slim_rpr
ARM/STM32 ARCHITECTURE M: Maxime Coquelin mcoquelin.stm32@gmail.com -M: Alexandre Torgue alexandre.torgue@st.com +M: Alexandre Torgue alexandre.torgue@foss.st.com L: linux-stm32@st-md-mailman.stormreply.com (moderated for non-subscribers) L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@@ -3115,7 -3117,7 +3117,7 @@@ C: irc://irc.oftc.net/bcach F: drivers/md/bcache/
BDISP ST MEDIA DRIVER -M: Fabien Dessenne fabien.dessenne@st.com +M: Fabien Dessenne fabien.dessenne@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -3233,6 -3235,7 +3235,7 @@@ T: git git://git.kernel.org/pub/scm/lin T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git F: Documentation/bpf/ F: Documentation/networking/filter.rst + F: Documentation/userspace-api/ebpf/ F: arch/*/net/* F: include/linux/bpf* F: include/linux/filter.h @@@ -3247,6 -3250,7 +3250,7 @@@ F: net/core/filter. F: net/sched/act_bpf.c F: net/sched/cls_bpf.c F: samples/bpf/ + F: scripts/bpf_doc.py F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ @@@ -3675,7 -3679,7 +3679,7 @@@ M: bcm-kernel-feedback-list@broadcom.co L: linux-pm@vger.kernel.org S: Maintained T: git git://github.com/broadcom/stblinux.git -F: drivers/soc/bcm/bcm-pmb.c +F: drivers/soc/bcm/bcm63xx/bcm-pmb.c F: include/dt-bindings/soc/bcm-pmb.h
BROADCOM SPECIFIC AMBA DRIVER (BCMA) @@@ -4181,18 -4185,9 +4185,18 @@@ X: drivers/char/tpm CHECKPATCH M: Andy Whitcroft apw@canonical.com M: Joe Perches joe@perches.com +R: Dwaipayan Ray dwaipayanray1@gmail.com +R: Lukas Bulwahn lukas.bulwahn@gmail.com S: Maintained F: scripts/checkpatch.pl
+CHECKPATCH DOCUMENTATION +M: Dwaipayan Ray dwaipayanray1@gmail.com +M: Lukas Bulwahn lukas.bulwahn@gmail.com +R: Joe Perches joe@perches.com +S: Maintained +F: Documentation/dev-tools/checkpatch.rst + CHINESE DOCUMENTATION M: Harry Wei harryxiyou@gmail.com M: Alex Shi alex.shi@linux.alibaba.com @@@ -4443,7 -4438,7 +4447,7 @@@ F: include/linux/clk F: include/linux/of_clk.h X: drivers/clk/clkdev.c
-COMMON INTERNET FILE SYSTEM (CIFS) +COMMON INTERNET FILE SYSTEM CLIENT (CIFS) M: Steve French sfrench@samba.org L: linux-cifs@vger.kernel.org L: samba-technical@lists.samba.org (moderated for non-subscribers) @@@ -4453,16 -4448,6 +4457,16 @@@ T: git git://git.samba.org/sfrench/cifs F: Documentation/admin-guide/cifs/ F: fs/cifs/
+COMMON INTERNET FILE SYSTEM SERVER (CIFSD) +M: Namjae Jeon namjae.jeon@samsung.com +M: Sergey Senozhatsky sergey.senozhatsky@gmail.com +M: Steve French sfrench@samba.org +M: Hyunchul Lee hyc.lee@gmail.com +L: linux-cifs@vger.kernel.org +L: linux-cifsd-devel@lists.sourceforge.net +S: Maintained +F: fs/cifsd/ + COMPACTPCI HOTPLUG CORE M: Scott Murray scott@spiteful.org L: linux-pci@vger.kernel.org @@@ -5099,7 -5084,7 +5103,7 @@@ S: Maintaine F: drivers/platform/x86/dell/dell-wmi.c
DELTA ST MEDIA DRIVER -M: Hugues Fruchet hugues.fruchet@st.com +M: Hugues Fruchet hugues.fruchet@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -5397,7 -5382,7 +5401,7 @@@ F: drivers/hwmon/dme1737. DMI/SMBIOS SUPPORT M: Jean Delvare jdelvare@suse.com S: Maintained -T: quilt http://jdelvare.nerim.net/devel/linux/jdelvare-dmi/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jdelvare/staging.git dmi-for-next F: Documentation/ABI/testing/sysfs-firmware-dmi-tables F: drivers/firmware/dmi-id.c F: drivers/firmware/dmi_scan.c @@@ -5490,11 -5475,11 +5494,11 @@@ F: drivers/net/ethernet/freescale/dpaa2 F: drivers/net/ethernet/freescale/dpaa2/dpni*
DPAA2 ETHERNET SWITCH DRIVER - M: Ioana Radulescu ruxandra.radulescu@nxp.com M: Ioana Ciornei ioana.ciornei@nxp.com - L: linux-kernel@vger.kernel.org + L: netdev@vger.kernel.org S: Maintained - F: drivers/staging/fsl-dpaa2/ethsw + F: drivers/net/ethernet/freescale/dpaa2/dpaa2-switch* + F: drivers/net/ethernet/freescale/dpaa2/dpsw*
DPT_I2O SCSI RAID DRIVER M: Adaptec OEM Raid Solutions aacraid@microsemi.com @@@ -5854,7 -5839,7 +5858,7 @@@ M: David Airlie <airlied@linux.ie M: Daniel Vetter daniel@ffwll.ch L: dri-devel@lists.freedesktop.org S: Maintained -B: https://bugs.freedesktop.org/ +B: https://gitlab.freedesktop.org/drm C: irc://chat.freenode.net/dri-devel T: git git://anongit.freedesktop.org/drm/drm F: Documentation/devicetree/bindings/display/ @@@ -6025,6 -6010,7 +6029,6 @@@ F: drivers/gpu/drm/rockchip
DRM DRIVERS FOR STI M: Benjamin Gaignard benjamin.gaignard@linaro.org -M: Vincent Abriou vincent.abriou@st.com L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@@ -6032,9 -6018,10 +6036,9 @@@ F: Documentation/devicetree/bindings/di F: drivers/gpu/drm/sti
DRM DRIVERS FOR STM -M: Yannick Fertre yannick.fertre@st.com -M: Philippe Cornu philippe.cornu@st.com +M: Yannick Fertre yannick.fertre@foss.st.com +M: Philippe Cornu philippe.cornu@foss.st.com M: Benjamin Gaignard benjamin.gaignard@linaro.org -M: Vincent Abriou vincent.abriou@st.com L: dri-devel@lists.freedesktop.org S: Maintained T: git git://anongit.freedesktop.org/drm/drm-misc @@@ -7345,13 -7332,6 +7349,13 @@@ F: fs/verity F: include/linux/fsverity.h F: include/uapi/linux/fsverity.h
+FT260 FTDI USB-HID TO I2C BRIDGE DRIVER +M: Michael Zaidman michael.zaidman@gmail.com +L: linux-i2c@vger.kernel.org +L: linux-input@vger.kernel.org +S: Maintained +F: drivers/hid/hid-ft260.c + FUJITSU LAPTOP EXTRAS M: Jonathan Woithe jwoithe@just42.net L: platform-driver-x86@vger.kernel.org @@@ -7500,9 -7480,8 +7504,9 @@@ F: include/uapi/asm-generic GENERIC PHY FRAMEWORK M: Kishon Vijay Abraham I kishon@ti.com M: Vinod Koul vkoul@kernel.org -L: linux-kernel@vger.kernel.org +L: linux-phy@lists.infradead.org S: Supported +Q: https://patchwork.kernel.org/project/linux-phy/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git F: Documentation/devicetree/bindings/phy/ F: drivers/phy/ @@@ -8141,6 -8120,7 +8145,6 @@@ F: drivers/crypto/hisilicon/sec2/sec_ma
HISILICON STAGING DRIVERS FOR HIKEY 960/970 M: Mauro Carvalho Chehab mchehab+huawei@kernel.org -L: devel@driverdev.osuosl.org S: Maintained F: drivers/staging/hikey9xx/
@@@ -8255,7 -8235,7 +8259,7 @@@ F: include/linux/hugetlb. F: mm/hugetlb.c
HVA ST MEDIA DRIVER -M: Jean-Christophe Trotin jean-christophe.trotin@st.com +M: Jean-Christophe Trotin jean-christophe.trotin@foss.st.com L: linux-media@vger.kernel.org S: Supported W: https://linuxtv.org @@@ -8545,7 -8525,6 +8549,7 @@@ IBM Power SRIOV Virtual NIC Device Driv M: Dany Madden drt@linux.ibm.com M: Lijun Pan ljp@linux.ibm.com M: Sukadev Bhattiprolu sukadev@linux.ibm.com +R: Thomas Falcon tlfalcon@linux.ibm.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* @@@ -8717,12 -8696,6 +8721,12 @@@ S: Maintaine F: Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.txt F: drivers/iio/multiplexer/iio-mux.c
+IIO SCMI BASED DRIVER +M: Jyoti Bhayana jbhayana@google.com +L: linux-iio@vger.kernel.org +S: Maintained +F: drivers/iio/common/scmi_sensors/scmi_iio.c + IIO SUBSYSTEM AND DRIVERS M: Jonathan Cameron jic23@kernel.org R: Lars-Peter Clausen lars@metafoo.de @@@ -10900,7 -10873,7 +10904,7 @@@ F: drivers/regulator/max77802-regulator F: include/dt-bindings/*/*max77802.h
MAXIM MUIC CHARGER DRIVERS FOR EXYNOS BASED BOARDS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com L: linux-pm@vger.kernel.org S: Supported @@@ -10909,7 -10882,7 +10913,7 @@@ F: drivers/power/supply/max77693_charge
MAXIM PMIC AND MUIC DRIVERS FOR EXYNOS BASED BOARDS M: Chanwoo Choi cw00.choi@samsung.com -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com L: linux-kernel@vger.kernel.org S: Supported @@@ -11198,7 -11171,7 +11202,7 @@@ T: git git://linuxtv.org/media_tree.gi F: drivers/media/dvb-frontends/stv6111*
MEDIA DRIVERS FOR STM32 - DCMI -M: Hugues Fruchet hugues.fruchet@st.com +M: Hugues Fruchet hugues.fruchet@foss.st.com L: linux-media@vger.kernel.org S: Supported T: git git://linuxtv.org/media_tree.git @@@ -11561,7 -11534,7 +11565,7 @@@ F: include/linux/memblock. F: mm/memblock.c
MEMORY CONTROLLER DRIVERS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux-mem-ctrl.git @@@ -12569,7 -12542,7 +12573,7 @@@ NETWORKING [MPTCP M: Mat Martineau mathew.j.martineau@linux.intel.com M: Matthieu Baerts matthieu.baerts@tessares.net L: netdev@vger.kernel.org -L: mptcp@lists.01.org +L: mptcp@lists.linux.dev S: Maintained W: https://github.com/multipath-tcp/mptcp_net-next/wiki B: https://github.com/multipath-tcp/mptcp_net-next/issues @@@ -12899,7 -12872,7 +12903,7 @@@ F: Documentation/devicetree/bindings/re F: drivers/regulator/pf8x00-regulator.c
NXP PTN5150A CC LOGIC AND EXTCON DRIVER -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/extcon/extcon-ptn5150.yaml @@@ -12942,21 -12915,6 +12946,21 @@@ L: linux-nfc@lists.01.org (moderated fo S: Supported F: drivers/nfc/nxp-nci
+NXP i.MX 8QXP/8QM JPEG V4L2 DRIVER +M: Mirela Rabulea mirela.rabulea@nxp.com +R: NXP Linux Team linux-imx@nxp.com +L: linux-media@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/media/imx8-jpeg.yaml +F: drivers/media/platform/imx-jpeg + +NZXT-KRAKEN2 HARDWARE MONITORING DRIVER +M: Jonas Malaco jonas@protocubo.io +L: linux-hwmon@vger.kernel.org +S: Maintained +F: Documentation/hwmon/nzxt-kraken2.rst +F: drivers/hwmon/nzxt-kraken2.c + OBJAGG M: Jiri Pirko jiri@nvidia.com L: netdev@vger.kernel.org @@@ -13890,7 -13848,7 +13894,7 @@@ M: Lorenzo Pieralisi <lorenzo.pieralisi R: Rob Herring robh@kernel.org L: linux-pci@vger.kernel.org S: Supported -Q: http://patchwork.ozlabs.org/project/linux-pci/list/ +Q: http://patchwork.kernel.org/project/linux-pci/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/pci.git/ F: drivers/pci/controller/
@@@ -13898,7 -13856,7 +13902,7 @@@ PCI SUBSYSTE M: Bjorn Helgaas bhelgaas@google.com L: linux-pci@vger.kernel.org S: Supported -Q: http://patchwork.ozlabs.org/project/linux-pci/list/ +Q: http://patchwork.kernel.org/project/linux-pci/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci.git F: Documentation/PCI/ F: Documentation/devicetree/bindings/pci/ @@@ -14205,7 -14163,7 +14209,7 @@@ F: drivers/pinctrl/renesas
PIN CONTROLLER - SAMSUNG M: Tomasz Figa tomasz.figa@gmail.com -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Sylwester Nawrocki s.nawrocki@samsung.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: linux-samsung-soc@vger.kernel.org @@@ -14361,15 -14319,6 +14365,15 @@@ F: include/linux/pm_ F: include/linux/powercap.h F: kernel/configs/nopm.config
+DYNAMIC THERMAL POWER MANAGEMENT (DTPM) +M: Daniel Lezcano daniel.lezcano@kernel.org +L: linux-pm@vger.kernel.org +S: Supported +B: https://bugzilla.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm +F: drivers/powercap/dtpm* +F: include/linux/dtpm.h + POWER STATE COORDINATION INTERFACE (PSCI) M: Mark Rutland mark.rutland@arm.com M: Lorenzo Pieralisi lorenzo.pieralisi@arm.com @@@ -14447,7 -14396,7 +14451,7 @@@ F: kernel/sched/psi.
PRINTK M: Petr Mladek pmladek@suse.com -M: Sergey Senozhatsky sergey.senozhatsky@gmail.com +M: Sergey Senozhatsky senozhatsky@chromium.org R: Steven Rostedt rostedt@goodmis.org R: John Ogness john.ogness@linutronix.de S: Maintained @@@ -14764,11 -14713,15 +14768,11 @@@ F: drivers/net/ethernet/qlogic/qlcnic QLOGIC QLGE 10Gb ETHERNET DRIVER M: Manish Chopra manishc@marvell.com M: GR-Linux-NIC-Dev@marvell.com -L: netdev@vger.kernel.org -S: Supported -F: drivers/staging/qlge/ - -QLOGIC QLGE 10Gb ETHERNET DRIVER M: Coiby Xu coiby.xu@gmail.com L: netdev@vger.kernel.org -S: Maintained +S: Supported F: Documentation/networking/device_drivers/qlogic/qlge.rst +F: drivers/staging/qlge/
QM1D1B0004 MEDIA DRIVER M: Akihiro Tsukada tskd08@gmail.com @@@ -14839,7 -14792,7 +14843,7 @@@ M: Todor Tomov <todor.too@gmail.com L: linux-media@vger.kernel.org S: Maintained F: Documentation/admin-guide/media/qcom_camss.rst -F: Documentation/devicetree/bindings/media/qcom,camss.txt +F: Documentation/devicetree/bindings/media/*camss* F: drivers/media/platform/qcom/camss/
QUALCOMM CORE POWER REDUCTION (CPR) AVS DRIVER @@@ -15769,7 -15722,7 +15773,7 @@@ F: Documentation/admin-guide/LSM/SafeSe F: security/safesetid/
SAMSUNG AUDIO (ASoC) DRIVERS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Sylwester Nawrocki s.nawrocki@samsung.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Supported @@@ -15777,7 -15730,7 +15781,7 @@@ F: Documentation/devicetree/bindings/so F: sound/soc/samsung/
SAMSUNG EXYNOS PSEUDO RANDOM NUMBER GENERATOR (RNG) DRIVER -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org S: Maintained @@@ -15812,7 -15765,7 +15816,7 @@@ S: Maintaine F: drivers/platform/x86/samsung-laptop.c
SAMSUNG MULTIFUNCTION PMIC DEVICE DRIVERS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com L: linux-kernel@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@@ -15837,7 -15790,7 +15841,7 @@@ F: drivers/media/platform/s3c-camif F: include/media/drv-intf/s3c_camif.h
SAMSUNG S3FWRN5 NFC DRIVER -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Krzysztof Opasiak k.opasiak@samsung.com L: linux-nfc@lists.01.org (moderated for non-subscribers) S: Maintained @@@ -15857,7 -15810,7 +15861,7 @@@ S: Supporte F: drivers/media/i2c/s5k5baf.c
SAMSUNG S5P Security SubSystem (SSS) DRIVER -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Vladimir Zapolskiy vz@mleia.com L: linux-crypto@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@@ -15889,7 -15842,7 +15893,7 @@@ F: include/linux/clk/samsung. F: include/linux/platform_data/clk-s3c2410.h
SAMSUNG SPI DRIVERS -M: Krzysztof Kozlowski krzk@kernel.org +M: Krzysztof Kozlowski krzysztof.kozlowski@canonical.com M: Andi Shyti andi@etezian.org L: linux-spi@vger.kernel.org L: linux-samsung-soc@vger.kernel.org @@@ -16652,13 -16605,6 +16656,13 @@@ F: drivers/firmware/arm_sdei. F: include/linux/arm_sdei.h F: include/uapi/linux/arm_sdei.h
+SOFTWARE NODES +R: Andy Shevchenko andriy.shevchenko@linux.intel.com +R: Heikki Krogerus heikki.krogerus@linux.intel.com +L: linux-acpi@vger.kernel.org +S: Maintained +F: drivers/base/swnode.c + SOFTWARE RAID (Multiple Disks) SUPPORT M: Song Liu song@kernel.org L: linux-raid@vger.kernel.org @@@ -16945,10 -16891,8 +16949,10 @@@ F: tools/spi
SPIDERNET NETWORK DRIVER for CELL M: Ishizaki Kou kou.ishizaki@toshiba.co.jp +M: Geoff Levand geoff@infradead.org L: netdev@vger.kernel.org -S: Supported +L: linuxppc-dev@lists.ozlabs.org +S: Maintained F: Documentation/networking/device_drivers/ethernet/toshiba/spider_net.rst F: drivers/net/ethernet/toshiba/spider_net*
@@@ -17002,19 -16946,11 +17006,19 @@@ F: Documentation/devicetree/bindings/me F: drivers/media/i2c/st-mipid02.c
ST STM32 I2C/SMBUS DRIVER -M: Pierre-Yves MORDRET pierre-yves.mordret@st.com +M: Pierre-Yves MORDRET pierre-yves.mordret@foss.st.com +M: Alain Volmat alain.volmat@foss.st.com L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-stm32*
+ST STPDDC60 DRIVER +M: Daniel Nilsson daniel.nilsson@flex.com +L: linux-hwmon@vger.kernel.org +S: Maintained +F: Documentation/hwmon/stpddc60.rst +F: drivers/hwmon/pmbus/stpddc60.c + ST VL53L0X ToF RANGER(I2C) IIO DRIVER M: Song Qiang songqiang1304521@gmail.com L: linux-iio@vger.kernel.org @@@ -17108,7 -17044,7 +17112,7 @@@ F: drivers/staging/vt665?
STAGING SUBSYSTEM M: Greg Kroah-Hartman gregkh@linuxfoundation.org -L: devel@driverdev.osuosl.org +L: linux-staging@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git F: drivers/staging/ @@@ -17135,7 -17071,7 +17139,7 @@@ F: kernel/jump_label. F: kernel/static_call.c
STI AUDIO (ASoC) DRIVERS -M: Arnaud Pouliquen arnaud.pouliquen@st.com +M: Arnaud Pouliquen arnaud.pouliquen@foss.st.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt @@@ -17155,15 -17091,15 +17159,15 @@@ T: git git://linuxtv.org/media_tree.gi F: drivers/media/usb/stk1160/
STM32 AUDIO (ASoC) DRIVERS -M: Olivier Moysan olivier.moysan@st.com -M: Arnaud Pouliquen arnaud.pouliquen@st.com +M: Olivier Moysan olivier.moysan@foss.st.com +M: Arnaud Pouliquen arnaud.pouliquen@foss.st.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml F: sound/soc/stm/
STM32 TIMER/LPTIMER DRIVERS -M: Fabrice Gasnier fabrice.gasnier@st.com +M: Fabrice Gasnier fabrice.gasnier@foss.st.com S: Maintained F: Documentation/ABI/testing/*timer-stm32 F: Documentation/devicetree/bindings/*/*stm32-*timer* @@@ -17173,7 -17109,7 +17177,7 @@@ F: include/linux/*/stm32-*tim
STMMAC ETHERNET DRIVER M: Giuseppe Cavallaro peppe.cavallaro@st.com -M: Alexandre Torgue alexandre.torgue@st.com +M: Alexandre Torgue alexandre.torgue@foss.st.com M: Jose Abreu joabreu@synopsys.com L: netdev@vger.kernel.org S: Supported @@@ -19203,7 -19139,7 +19207,7 @@@ VME SUBSYSTE M: Martyn Welch martyn@welchs.me.uk M: Manohar Vanga manohar.vanga@gmail.com M: Greg Kroah-Hartman gregkh@linuxfoundation.org -L: devel@driverdev.osuosl.org +L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git F: Documentation/driver-api/vme.rst @@@ -19234,7 -19170,7 +19238,7 @@@ S: Maintaine F: drivers/infiniband/hw/vmw_pvrdma/
VMware PVSCSI driver -M: Jim Gill jgill@vmware.com +M: Vishal Bhakta vbhakta@vmware.com M: VMware PV-Drivers pv-drivers@vmware.com L: linux-scsi@vger.kernel.org S: Maintained @@@ -19293,7 -19229,7 +19297,7 @@@ F: drivers/net/vrf. VSPRINTF M: Petr Mladek pmladek@suse.com M: Steven Rostedt rostedt@goodmis.org -M: Sergey Senozhatsky sergey.senozhatsky@gmail.com +M: Sergey Senozhatsky senozhatsky@chromium.org R: Andy Shevchenko andriy.shevchenko@linux.intel.com R: Rasmus Villemoes linux@rasmusvillemoes.dk S: Maintained @@@ -19944,7 -19880,7 +19948,7 @@@ F: drivers/staging/media/zoran ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER M: Minchan Kim minchan@kernel.org M: Nitin Gupta ngupta@vflare.org -R: Sergey Senozhatsky sergey.senozhatsky.work@gmail.com +R: Sergey Senozhatsky senozhatsky@chromium.org L: linux-kernel@vger.kernel.org S: Maintained F: Documentation/admin-guide/blockdev/zram.rst @@@ -19958,7 -19894,7 +19962,7 @@@ F: drivers/tty/serial/zs. ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR M: Minchan Kim minchan@kernel.org M: Nitin Gupta ngupta@vflare.org -R: Sergey Senozhatsky sergey.senozhatsky.work@gmail.com +R: Sergey Senozhatsky senozhatsky@chromium.org L: linux-mm@kvack.org S: Maintained F: Documentation/vm/zsmalloc.rst diff --combined drivers/atm/fore200e.c index 495fd0a1f040,0b9c99c3d218..b508df2ecada --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@@ -21,7 -21,6 +21,6 @@@ #include <linux/module.h> #include <linux/atmdev.h> #include <linux/sonet.h> - #include <linux/atm_suni.h> #include <linux/dma-mapping.h> #include <linux/delay.h> #include <linux/firmware.h> @@@ -100,6 -99,8 +99,6 @@@ static LIST_HEAD(fore200e_boards)
MODULE_AUTHOR("Christophe Lizzi - credits to Uwe Dannowski and Heikki Vatiainen"); MODULE_DESCRIPTION("FORE Systems 200E-series ATM driver - version " FORE200E_VERSION); -MODULE_SUPPORTED_DEVICE("PCA-200E, SBA-200E"); -
static const int fore200e_rx_buf_nbr[ BUFFER_SCHEME_NBR ][ BUFFER_MAGN_NBR ] = { { BUFFER_S1_NBR, BUFFER_L1_NBR }, diff --combined drivers/net/dsa/b53/b53_common.c index eb443721c58e,8d5a82dedce8..3ca6b394dd5f --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@@ -349,7 -349,7 +349,7 @@@ static void b53_set_forwarding(struct b b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt); }
- static void b53_enable_vlan(struct b53_device *dev, bool enable, + static void b53_enable_vlan(struct b53_device *dev, int port, bool enable, bool enable_filtering) { u8 mgmt, vc0, vc1, vc4 = 0, vc5; @@@ -431,6 -431,9 +431,9 @@@ b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, mgmt);
dev->vlan_enabled = enable; + + dev_dbg(dev->dev, "Port %d VLAN enabled: %d, filtering: %d\n", + port, enable, enable_filtering); }
static int b53_set_jumbo(struct b53_device *dev, bool enable, bool allow_10_100) @@@ -743,7 -746,7 +746,7 @@@ int b53_configure_vlan(struct dsa_switc b53_do_vlan_op(dev, VTA_CMD_CLEAR); }
- b53_enable_vlan(dev, dev->vlan_enabled, ds->vlan_filtering); + b53_enable_vlan(dev, -1, dev->vlan_enabled, ds->vlan_filtering);
b53_for_each_port(dev, i) b53_write16(dev, B53_VLAN_PAGE, @@@ -1105,6 -1108,13 +1108,6 @@@ static int b53_setup(struct dsa_switch b53_disable_port(ds, port); }
- /* Let DSA handle the case were multiple bridges span the same switch - * device and different VLAN awareness settings are requested, which - * would be breaking filtering semantics for any of the other bridge - * devices. (not hardware supported) - */ - ds->vlan_filtering_is_global = true; - return b53_setup_devlink_resources(ds); }
@@@ -1422,7 -1432,7 +1425,7 @@@ int b53_vlan_filtering(struct dsa_switc { struct b53_device *dev = ds->priv;
- b53_enable_vlan(dev, dev->vlan_enabled, vlan_filtering); + b53_enable_vlan(dev, port, dev->vlan_enabled, vlan_filtering);
return 0; } @@@ -1447,7 -1457,7 +1450,7 @@@ static int b53_vlan_prepare(struct dsa_ if (vlan->vid >= dev->num_vlans) return -ERANGE;
- b53_enable_vlan(dev, true, ds->vlan_filtering); + b53_enable_vlan(dev, port, true, ds->vlan_filtering);
return 0; } @@@ -2045,15 -2055,17 +2048,17 @@@ enum dsa_tag_protocol b53_get_tag_proto { struct b53_device *dev = ds->priv;
- /* Older models (5325, 5365) support a different tag format that we do - * not support in net/dsa/tag_brcm.c yet. - */ - if (is5325(dev) || is5365(dev) || - !b53_can_enable_brcm_tags(ds, port, mprot)) { + if (!b53_can_enable_brcm_tags(ds, port, mprot)) { dev->tag_protocol = DSA_TAG_PROTO_NONE; goto out; }
+ /* Older models require a different 6 byte tag */ + if (is5325(dev) || is5365(dev) || is63xx(dev)) { + dev->tag_protocol = DSA_TAG_PROTO_BRCM_LEGACY; + goto out; + } + /* Broadcom BCM58xx chips have a flow accelerator on Port 8 * which requires us to use the prepended Broadcom tag type */ @@@ -2657,13 -2669,6 +2662,13 @@@ struct b53_device *b53_switch_alloc(str ds->ops = &b53_switch_ops; ds->untag_bridge_pvid = true; dev->vlan_enabled = true; + /* Let DSA handle the case were multiple bridges span the same switch + * device and different VLAN awareness settings are requested, which + * would be breaking filtering semantics for any of the other bridge + * devices. (not hardware supported) + */ + ds->vlan_filtering_is_global = true; + mutex_init(&dev->reg_mutex); mutex_init(&dev->stats_mutex);
diff --combined drivers/net/dsa/bcm_sf2.c index ba5d546d06aa,7e0ca8012983..9150038b60cb --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@@ -32,6 -32,36 +32,36 @@@ #include "b53/b53_priv.h" #include "b53/b53_regs.h"
+ static u16 bcm_sf2_reg_rgmii_cntrl(struct bcm_sf2_priv *priv, int port) + { + switch (priv->type) { + case BCM4908_DEVICE_ID: + switch (port) { + case 7: + return REG_RGMII_11_CNTRL; + default: + break; + } + break; + default: + switch (port) { + case 0: + return REG_RGMII_0_CNTRL; + case 1: + return REG_RGMII_1_CNTRL; + case 2: + return REG_RGMII_2_CNTRL; + default: + break; + } + } + + WARN_ONCE(1, "Unsupported port %d\n", port); + + /* RO fallback reg */ + return REG_SWITCH_STATUS; + } + /* Return the number of active ports, not counting the IMP (CPU) port */ static unsigned int bcm_sf2_num_active_ports(struct dsa_switch *ds) { @@@ -114,10 -144,7 +144,10 @@@ static void bcm_sf2_imp_setup(struct ds /* Force link status for IMP port */ reg = core_readl(priv, offset); reg |= (MII_SW_OR | LINK_STS); - reg &= ~GMII_SPEED_UP_2G; + if (priv->type == BCM4908_DEVICE_ID) + reg |= GMII_SPEED_UP_2G; + else + reg &= ~GMII_SPEED_UP_2G; core_writel(priv, reg, offset);
/* Enable Broadcast, Multicast, Unicast forwarding to IMP port */ @@@ -435,6 -462,44 +465,44 @@@ static int bcm_sf2_sw_rst(struct bcm_sf return 0; }
+ static void bcm_sf2_crossbar_setup(struct bcm_sf2_priv *priv) + { + struct device *dev = priv->dev->ds->dev; + int shift; + u32 mask; + u32 reg; + int i; + + mask = BIT(priv->num_crossbar_int_ports) - 1; + + reg = reg_readl(priv, REG_CROSSBAR); + switch (priv->type) { + case BCM4908_DEVICE_ID: + shift = CROSSBAR_BCM4908_INT_P7 * priv->num_crossbar_int_ports; + reg &= ~(mask << shift); + if (0) /* FIXME */ + reg |= CROSSBAR_BCM4908_EXT_SERDES << shift; + else if (priv->int_phy_mask & BIT(7)) + reg |= CROSSBAR_BCM4908_EXT_GPHY4 << shift; + else if (phy_interface_mode_is_rgmii(priv->port_sts[7].mode)) + reg |= CROSSBAR_BCM4908_EXT_RGMII << shift; + else if (WARN(1, "Invalid port mode\n")) + return; + break; + default: + return; + } + reg_writel(priv, reg, REG_CROSSBAR); + + reg = reg_readl(priv, REG_CROSSBAR); + for (i = 0; i < priv->num_crossbar_int_ports; i++) { + shift = i * priv->num_crossbar_int_ports; + + dev_dbg(dev, "crossbar int port #%d - ext port #%d\n", i, + (reg >> shift) & mask); + } + } + static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv) { intrl2_0_mask_set(priv, 0xffffffff); @@@ -446,10 -511,11 +514,11 @@@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv, struct device_node *dn) { + struct device *dev = priv->dev->ds->dev; + struct bcm_sf2_port_status *port_st; struct device_node *port; unsigned int port_num; struct property *prop; - phy_interface_t mode; int err;
priv->moca_port = -1; @@@ -458,19 -524,26 +527,26 @@@ if (of_property_read_u32(port, "reg", &port_num)) continue;
+ if (port_num >= DSA_MAX_PORTS) { + dev_err(dev, "Invalid port number %d\n", port_num); + continue; + } + + port_st = &priv->port_sts[port_num]; + /* Internal PHYs get assigned a specific 'phy-mode' property * value: "internal" to help flag them before MDIO probing * has completed, since they might be turned off at that * time */ - err = of_get_phy_mode(port, &mode); + err = of_get_phy_mode(port, &port_st->mode); if (err) continue;
- if (mode == PHY_INTERFACE_MODE_INTERNAL) + if (port_st->mode == PHY_INTERFACE_MODE_INTERNAL) priv->int_phy_mask |= 1 << port_num;
- if (mode == PHY_INTERFACE_MODE_MOCA) + if (port_st->mode == PHY_INTERFACE_MODE_MOCA) priv->moca_port = port_num;
if (of_property_read_bool(port, "brcm,use-bcm-hdr")) @@@ -588,10 -661,8 +664,10 @@@ static u32 bcm_sf2_sw_get_phy_flags(str * in bits 15:8 and the patch level in bits 7:0 which is exactly what * the REG_PHY_REVISION register layout is. */ - - return priv->hw_params.gphy_rev; + if (priv->int_phy_mask & BIT(port)) + return priv->hw_params.gphy_rev; + else + return 0; }
static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port, @@@ -647,6 -718,7 +723,7 @@@ static void bcm_sf2_sw_mac_config(struc { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 id_mode_dis = 0, port_mode; + u32 reg_rgmii_ctrl; u32 reg;
if (port == core_readl(priv, CORE_IMP0_PRT_ID)) @@@ -670,10 -742,12 +747,12 @@@ return; }
+ reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port); + /* Clear id_mode_dis bit, and the existing port mode, let * RGMII_MODE_EN bet set by mac_link_{up,down} */ - reg = reg_readl(priv, REG_RGMII_CNTRL_P(port)); + reg = reg_readl(priv, reg_rgmii_ctrl); reg &= ~ID_MODE_DIS; reg &= ~(PORT_MODE_MASK << PORT_MODE_SHIFT);
@@@ -681,13 -755,14 +760,14 @@@ if (id_mode_dis) reg |= ID_MODE_DIS;
- reg_writel(priv, reg, REG_RGMII_CNTRL_P(port)); + reg_writel(priv, reg, reg_rgmii_ctrl); }
static void bcm_sf2_sw_mac_link_set(struct dsa_switch *ds, int port, phy_interface_t interface, bool link) { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); + u32 reg_rgmii_ctrl; u32 reg;
if (!phy_interface_mode_is_rgmii(interface) && @@@ -695,13 -770,15 +775,15 @@@ interface != PHY_INTERFACE_MODE_REVMII) return;
+ reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port); + /* If the link is down, just disable the interface to conserve power */ - reg = reg_readl(priv, REG_RGMII_CNTRL_P(port)); + reg = reg_readl(priv, reg_rgmii_ctrl); if (link) reg |= RGMII_MODE_EN; else reg &= ~RGMII_MODE_EN; - reg_writel(priv, reg, REG_RGMII_CNTRL_P(port)); + reg_writel(priv, reg, reg_rgmii_ctrl); }
static void bcm_sf2_sw_mac_link_down(struct dsa_switch *ds, int port, @@@ -735,11 -812,15 +817,15 @@@ static void bcm_sf2_sw_mac_link_up(stru { struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_eee *p = &priv->dev->ports[port].eee; - u32 reg, offset;
bcm_sf2_sw_mac_link_set(ds, port, interface, true);
if (port != core_readl(priv, CORE_IMP0_PRT_ID)) { + u32 reg_rgmii_ctrl; + u32 reg, offset; + + reg_rgmii_ctrl = bcm_sf2_reg_rgmii_cntrl(priv, port); + if (priv->type == BCM4908_DEVICE_ID || priv->type == BCM7445_DEVICE_ID) offset = CORE_STS_OVERRIDE_GMIIP_PORT(port); @@@ -750,7 -831,7 +836,7 @@@ interface == PHY_INTERFACE_MODE_RGMII_TXID || interface == PHY_INTERFACE_MODE_MII || interface == PHY_INTERFACE_MODE_REVMII) { - reg = reg_readl(priv, REG_RGMII_CNTRL_P(port)); + reg = reg_readl(priv, reg_rgmii_ctrl); reg &= ~(RX_PAUSE_EN | TX_PAUSE_EN);
if (tx_pause) @@@ -758,7 -839,7 +844,7 @@@ if (rx_pause) reg |= RX_PAUSE_EN;
- reg_writel(priv, reg, REG_RGMII_CNTRL_P(port)); + reg_writel(priv, reg, reg_rgmii_ctrl); }
reg = SW_OVERRIDE | LINK_STS; @@@ -861,6 -942,8 +947,8 @@@ static int bcm_sf2_sw_resume(struct dsa return ret; }
+ bcm_sf2_crossbar_setup(priv); + ret = bcm_sf2_cfp_resume(ds); if (ret) return ret; @@@ -1133,6 -1216,7 +1221,7 @@@ struct bcm_sf2_of_data const u16 *reg_offsets; unsigned int core_reg_align; unsigned int num_cfp_rules; + unsigned int num_crossbar_int_ports; };
static const u16 bcm_sf2_4908_reg_offsets[] = { @@@ -1144,9 -1228,7 +1233,7 @@@ [REG_PHY_REVISION] = 0x14, [REG_SPHY_CNTRL] = 0x24, [REG_CROSSBAR] = 0xc8, - [REG_RGMII_0_CNTRL] = 0xe0, - [REG_RGMII_1_CNTRL] = 0xec, - [REG_RGMII_2_CNTRL] = 0xf8, + [REG_RGMII_11_CNTRL] = 0x014c, [REG_LED_0_CNTRL] = 0x40, [REG_LED_1_CNTRL] = 0x4c, [REG_LED_2_CNTRL] = 0x58, @@@ -1156,7 -1238,8 +1243,8 @@@ static const struct bcm_sf2_of_data bcm .type = BCM4908_DEVICE_ID, .core_reg_align = 0, .reg_offsets = bcm_sf2_4908_reg_offsets, - .num_cfp_rules = 0, /* FIXME */ + .num_cfp_rules = 256, + .num_crossbar_int_ports = 2, };
/* Register offsets for the SWITCH_REG_* block */ @@@ -1267,6 -1350,7 +1355,7 @@@ static int bcm_sf2_sw_probe(struct plat priv->reg_offsets = data->reg_offsets; priv->core_reg_align = data->core_reg_align; priv->num_cfp_rules = data->num_cfp_rules; + priv->num_crossbar_int_ports = data->num_crossbar_int_ports;
priv->rcdev = devm_reset_control_get_optional_exclusive(&pdev->dev, "switch"); @@@ -1340,6 -1424,8 +1429,8 @@@ goto out_clk_mdiv; }
+ bcm_sf2_crossbar_setup(priv); + bcm_sf2_gphy_enable_set(priv->dev->ds, true);
ret = bcm_sf2_mdio_register(ds); diff --combined drivers/net/dsa/mt7530.c index 9871d7cff93a,127856823a3b..c442a5885fca --- a/drivers/net/dsa/mt7530.c +++ b/drivers/net/dsa/mt7530.c @@@ -436,32 -436,34 +436,32 @@@ mt7530_pad_clk_setup(struct dsa_switch TD_DM_DRVP(8) | TD_DM_DRVN(8));
/* Setup core clock for MT7530 */ - if (!trgint) { - /* Disable MT7530 core clock */ - core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); - - /* Disable PLL, since phy_device has not yet been created - * provided for phy_[read,write]_mmd_indirect is called, we - * provide our own core_write_mmd_indirect to complete this - * function. - */ - core_write_mmd_indirect(priv, - CORE_GSWPLL_GRP1, - MDIO_MMD_VEND2, - 0); - - /* Set core clock into 500Mhz */ - core_write(priv, CORE_GSWPLL_GRP2, - RG_GSWPLL_POSDIV_500M(1) | - RG_GSWPLL_FBKDIV_500M(25)); - - /* Enable PLL */ - core_write(priv, CORE_GSWPLL_GRP1, - RG_GSWPLL_EN_PRE | - RG_GSWPLL_POSDIV_200M(2) | - RG_GSWPLL_FBKDIV_200M(32)); - - /* Enable MT7530 core clock */ - core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); - } + /* Disable MT7530 core clock */ + core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); + + /* Disable PLL, since phy_device has not yet been created + * provided for phy_[read,write]_mmd_indirect is called, we + * provide our own core_write_mmd_indirect to complete this + * function. + */ + core_write_mmd_indirect(priv, + CORE_GSWPLL_GRP1, + MDIO_MMD_VEND2, + 0); + + /* Set core clock into 500Mhz */ + core_write(priv, CORE_GSWPLL_GRP2, + RG_GSWPLL_POSDIV_500M(1) | + RG_GSWPLL_FBKDIV_500M(25)); + + /* Enable PLL */ + core_write(priv, CORE_GSWPLL_GRP1, + RG_GSWPLL_EN_PRE | + RG_GSWPLL_POSDIV_200M(2) | + RG_GSWPLL_FBKDIV_200M(32)); + + /* Enable MT7530 core clock */ + core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
/* Setup the MT7530 TRGMII Tx Clock */ core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN); @@@ -997,8 -999,9 +997,9 @@@ mt753x_cpu_port_enable(struct dsa_switc mt7530_write(priv, MT7530_PVC_P(port), PORT_SPEC_TAG);
- /* Unknown multicast frame forwarding to the cpu port */ - mt7530_rmw(priv, MT7530_MFC, UNM_FFP_MASK, UNM_FFP(BIT(port))); + /* Disable flooding by default */ + mt7530_rmw(priv, MT7530_MFC, BC_FFP_MASK | UNM_FFP_MASK | UNU_FFP_MASK, + BC_FFP(BIT(port)) | UNM_FFP(BIT(port)) | UNU_FFP(BIT(port)));
/* Set CPU port number */ if (priv->id == ID_MT7621) @@@ -1135,6 -1138,56 +1136,56 @@@ mt7530_stp_state_set(struct dsa_switch mt7530_rmw(priv, MT7530_SSP_P(port), FID_PST_MASK, stp_state); }
+ static int + mt7530_port_pre_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) + { + if (flags.mask & ~(BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | + BR_BCAST_FLOOD)) + return -EINVAL; + + return 0; + } + + static int + mt7530_port_bridge_flags(struct dsa_switch *ds, int port, + struct switchdev_brport_flags flags, + struct netlink_ext_ack *extack) + { + struct mt7530_priv *priv = ds->priv; + + if (flags.mask & BR_LEARNING) + mt7530_rmw(priv, MT7530_PSC_P(port), SA_DIS, + flags.val & BR_LEARNING ? 0 : SA_DIS); + + if (flags.mask & BR_FLOOD) + mt7530_rmw(priv, MT7530_MFC, UNU_FFP(BIT(port)), + flags.val & BR_FLOOD ? UNU_FFP(BIT(port)) : 0); + + if (flags.mask & BR_MCAST_FLOOD) + mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)), + flags.val & BR_MCAST_FLOOD ? UNM_FFP(BIT(port)) : 0); + + if (flags.mask & BR_BCAST_FLOOD) + mt7530_rmw(priv, MT7530_MFC, BC_FFP(BIT(port)), + flags.val & BR_BCAST_FLOOD ? BC_FFP(BIT(port)) : 0); + + return 0; + } + + static int + mt7530_port_set_mrouter(struct dsa_switch *ds, int port, bool mrouter, + struct netlink_ext_ack *extack) + { + struct mt7530_priv *priv = ds->priv; + + mt7530_rmw(priv, MT7530_MFC, UNM_FFP(BIT(port)), + mrouter ? UNM_FFP(BIT(port)) : 0); + + return 0; + } + static int mt7530_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) @@@ -1346,6 -1399,59 +1397,59 @@@ err return 0; }
+ static int + mt7530_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) + { + struct mt7530_priv *priv = ds->priv; + const u8 *addr = mdb->addr; + u16 vid = mdb->vid; + u8 port_mask = 0; + int ret; + + mutex_lock(&priv->reg_mutex); + + mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP); + if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL)) + port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP) + & PORT_MAP_MASK; + + port_mask |= BIT(port); + mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT); + ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL); + + mutex_unlock(&priv->reg_mutex); + + return ret; + } + + static int + mt7530_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) + { + struct mt7530_priv *priv = ds->priv; + const u8 *addr = mdb->addr; + u16 vid = mdb->vid; + u8 port_mask = 0; + int ret; + + mutex_lock(&priv->reg_mutex); + + mt7530_fdb_write(priv, vid, 0, addr, 0, STATIC_EMP); + if (!mt7530_fdb_cmd(priv, MT7530_FDB_READ, NULL)) + port_mask = (mt7530_read(priv, MT7530_ATRD) >> PORT_MAP) + & PORT_MAP_MASK; + + port_mask &= ~BIT(port); + mt7530_fdb_write(priv, vid, port_mask, addr, -1, + port_mask ? STATIC_ENT : STATIC_EMP); + ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, NULL); + + mutex_unlock(&priv->reg_mutex); + + return ret; + } + static int mt7530_vlan_cmd(struct mt7530_priv *priv, enum mt7530_vlan_cmd cmd, u16 vid) { @@@ -1818,9 -1924,12 +1922,12 @@@ mt7530_setup(struct dsa_switch *ds ret = mt753x_cpu_port_enable(ds, i); if (ret) return ret; - } else + } else { mt7530_port_disable(ds, i);
+ /* Disable learning by default on all user ports */ + mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + } /* Enable consistent egress tag */ mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK, PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); @@@ -1982,9 -2091,13 +2089,13 @@@ mt7531_setup(struct dsa_switch *ds ret = mt753x_cpu_port_enable(ds, i); if (ret) return ret; - } else + } else { mt7530_port_disable(ds, i);
+ /* Disable learning by default on all user ports */ + mt7530_set(priv, MT7530_PSC_P(i), SA_DIS); + } + /* Enable consistent egress tag */ mt7530_rmw(priv, MT7530_PVC_P(i), PVC_EG_TAG_MASK, PVC_EG_TAG(MT7530_VLAN_EG_CONSISTENT)); @@@ -2706,11 -2819,16 +2817,16 @@@ static const struct dsa_switch_ops mt75 .port_change_mtu = mt7530_port_change_mtu, .port_max_mtu = mt7530_port_max_mtu, .port_stp_state_set = mt7530_stp_state_set, + .port_pre_bridge_flags = mt7530_port_pre_bridge_flags, + .port_bridge_flags = mt7530_port_bridge_flags, + .port_set_mrouter = mt7530_port_set_mrouter, .port_bridge_join = mt7530_port_bridge_join, .port_bridge_leave = mt7530_port_bridge_leave, .port_fdb_add = mt7530_port_fdb_add, .port_fdb_del = mt7530_port_fdb_del, .port_fdb_dump = mt7530_port_fdb_dump, + .port_mdb_add = mt7530_port_mdb_add, + .port_mdb_del = mt7530_port_mdb_del, .port_vlan_filtering = mt7530_port_vlan_filtering, .port_vlan_add = mt7530_port_vlan_add, .port_vlan_del = mt7530_port_vlan_del, diff --combined drivers/net/ethernet/intel/e1000e/netdev.c index a0948002ddf8,31b8726fd69b..88e9035b75cf --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@@ -25,6 -25,7 +25,7 @@@ #include <linux/pm_runtime.h> #include <linux/aer.h> #include <linux/prefetch.h> + #include <linux/suspend.h>
#include "e1000.h"
@@@ -5974,23 -5975,19 +5975,23 @@@ static void e1000_reset_task(struct wor struct e1000_adapter *adapter; adapter = container_of(work, struct e1000_adapter, reset_task);
+ rtnl_lock(); /* don't run the task if already down */ - if (test_bit(__E1000_DOWN, &adapter->state)) + if (test_bit(__E1000_DOWN, &adapter->state)) { + rtnl_unlock(); return; + }
if (!(adapter->flags & FLAG_RESTART_NOW)) { e1000e_dump(adapter); e_err("Reset adapter unexpectedly\n"); } e1000e_reinit_locked(adapter); + rtnl_unlock(); }
/** - * e1000_get_stats64 - Get System Network Statistics + * e1000e_get_stats64 - Get System Network Statistics * @netdev: network interface device structure * @stats: rtnl_link_stats64 pointer * @@@ -6163,7 -6160,7 +6164,7 @@@ static int e1000_mii_ioctl(struct net_d }
/** - * e1000e_hwtstamp_ioctl - control hardware time stamping + * e1000e_hwtstamp_set - control hardware time stamping * @netdev: network interface device structure * @ifr: interface request * @@@ -6821,7 -6818,7 +6822,7 @@@ static void e1000e_disable_aspm(struct }
/** - * e1000e_disable_aspm_locked Disable ASPM states. + * e1000e_disable_aspm_locked - Disable ASPM states. * @pdev: pointer to PCI device struct * @state: bit-mask of ASPM states to disable * @@@ -6922,6 -6919,12 +6923,12 @@@ static int __e1000_resume(struct pci_de return 0; }
+ static __maybe_unused int e1000e_pm_prepare(struct device *dev) + { + return pm_runtime_suspended(dev) && + pm_suspend_via_firmware(); + } + static __maybe_unused int e1000e_pm_suspend(struct device *dev) { struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev)); @@@ -7630,9 -7633,9 +7637,9 @@@ static int e1000_probe(struct pci_dev *
e1000_print_device_info(adapter);
- dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE); + dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_SMART_PREPARE);
- if (pci_dev_run_wake(pdev) && hw->mac.type < e1000_pch_cnp) + if (pci_dev_run_wake(pdev) && hw->mac.type != e1000_pch_cnp) pm_runtime_put_noidle(&pdev->dev);
return 0; @@@ -7855,6 -7858,7 +7862,7 @@@ MODULE_DEVICE_TABLE(pci, e1000_pci_tbl)
static const struct dev_pm_ops e1000_pm_ops = { #ifdef CONFIG_PM_SLEEP + .prepare = e1000e_pm_prepare, .suspend = e1000e_pm_suspend, .resume = e1000e_pm_resume, .freeze = e1000e_pm_freeze, diff --combined drivers/net/ethernet/intel/i40e/i40e_main.c index 17f3b800640e,14a1bad9af74..0f84ed0143e4 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@@ -2023,7 -2023,7 +2023,7 @@@ static void i40e_undo_add_filter_entrie }
/** - * i40e_next_entry - Get the next non-broadcast filter from a list + * i40e_next_filter - Get the next non-broadcast filter from a list * @next: pointer to filter in list * * Returns the next non-broadcast filter in the list. Required so that we @@@ -3258,17 -3258,6 +3258,17 @@@ static int i40e_configure_tx_ring(struc return 0; }
+/** + * i40e_rx_offset - Return expected offset into page to access data + * @rx_ring: Ring we are requesting offset of + * + * Returns the offset value for ring into the data buffer. + */ +static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) +{ + return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; +} + /** * i40e_configure_rx_ring - Configure a receive ring context * @ring: The Rx ring to configure @@@ -3380,8 -3369,6 +3380,8 @@@ static int i40e_configure_rx_ring(struc else set_ring_build_skb_enabled(ring);
+ ring->rx_offset = i40e_rx_offset(ring); + /* cache tail for quicker writes, and clear the reg before use */ ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q); writel(0, ring->tail); @@@ -5204,7 -5191,7 +5204,7 @@@ static u8 i40e_pf_get_num_tc(struct i40 }
/** - * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes + * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried * * Return a bitmap for enabled traffic classes for this PF. @@@ -9467,7 -9454,7 +9467,7 @@@ static void i40e_fdir_flush_and_replay( }
/** - * i40e_get_current_atr_count - Get the count of total FD ATR filters programmed + * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed * @pf: board private structure **/ u32 i40e_get_current_atr_cnt(struct i40e_pf *pf) diff --combined drivers/net/ethernet/intel/i40e/i40e_txrx.c index 5747a99122fb,895f59a06fdb..8b5820921377 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@@ -1569,6 -1569,17 +1569,6 @@@ void i40e_free_rx_resources(struct i40e } }
-/** - * i40e_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0; -} - /** * i40e_setup_rx_descriptors - Allocate Rx descriptors * @rx_ring: Rx descriptor ring (for a specific queue) to setup @@@ -1597,6 -1608,7 +1597,6 @@@ int i40e_setup_rx_descriptors(struct i4 rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - rx_ring->rx_offset = i40e_rx_offset(rx_ring);
/* XDP RX-queue info only needed for RX rings exposed to XDP */ if (rx_ring->vsi->type == I40E_VSI_MAIN) { @@@ -3333,7 -3345,7 +3333,7 @@@ static int i40e_tx_enable_csum(struct s }
/** - * i40e_create_tx_ctx Build the Tx context descriptor + * i40e_create_tx_ctx - Build the Tx context descriptor * @tx_ring: ring to create the descriptor on * @cd_type_cmd_tso_mss: Quad Word 1 * @cd_tunneling: Quad Word 0 - bits 0-31 diff --combined drivers/net/ethernet/intel/ice/ice_txrx.c index b91dcfd12727,6d87dd9d456e..3148e78adc36 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@@ -443,6 -443,22 +443,6 @@@ void ice_free_rx_ring(struct ice_ring * } }
-/** - * ice_rx_offset - Return expected offset into page to access data - * @rx_ring: Ring we are requesting offset of - * - * Returns the offset value for ring into the data buffer. - */ -static unsigned int ice_rx_offset(struct ice_ring *rx_ring) -{ - if (ice_ring_uses_build_skb(rx_ring)) - return ICE_SKB_PAD; - else if (ice_is_xdp_ena_vsi(rx_ring->vsi)) - return XDP_PACKET_HEADROOM; - - return 0; -} - /** * ice_setup_rx_ring - Allocate the Rx descriptors * @rx_ring: the Rx ring to set up @@@ -477,6 -493,7 +477,6 @@@ int ice_setup_rx_ring(struct ice_ring *
rx_ring->next_to_use = 0; rx_ring->next_to_clean = 0; - rx_ring->rx_offset = ice_rx_offset(rx_ring);
if (ice_is_xdp_ena_vsi(rx_ring->vsi)) WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog); @@@ -1098,6 -1115,11 +1098,11 @@@ int ice_clean_rx_irq(struct ice_ring *r dma_rmb();
if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) { + struct ice_vsi *ctrl_vsi = rx_ring->vsi; + + if (rx_desc->wb.rxdid == FDIR_DESC_RXDID && + ctrl_vsi->vf_id != ICE_INVAL_VFID) + ice_vc_fdir_irq_handler(ctrl_vsi, rx_desc); ice_put_rx_buf(rx_ring, NULL, 0); cleaned_count++; continue; diff --combined drivers/net/ethernet/intel/ice/ice_xsk.c index 9f94d9159acd,727f277e9d75..17ab8ef024ad --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@@ -358,18 -358,18 +358,18 @@@ xsk_pool_if_up * This function allocates a number of Rx buffers from the fill ring * or the internal recycle mechanism and places them on the Rx ring. * - * Returns false if all allocations were successful, true if any fail. + * Returns true if all allocations were successful, false if any fail. */ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; struct ice_rx_buf *rx_buf; - bool ret = false; + bool ok = true; dma_addr_t dma;
if (!count) - return false; + return true;
rx_desc = ICE_RX_DESC(rx_ring, ntu); rx_buf = &rx_ring->rx_buf[ntu]; @@@ -377,7 -377,7 +377,7 @@@ do { rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool); if (!rx_buf->xdp) { - ret = true; + ok = false; break; }
@@@ -402,7 -402,7 +402,7 @@@ ice_release_rx_desc(rx_ring, ntu); }
- return ret; + return ok; }
/** @@@ -473,6 -473,14 +473,14 @@@ ice_run_xdp_zc(struct ice_ring *rx_ring xdp_prog = READ_ONCE(rx_ring->xdp_prog);
act = bpf_prog_run_xdp(xdp_prog, xdp); + + if (likely(act == XDP_REDIRECT)) { + err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); + result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; + rcu_read_unlock(); + return result; + } + switch (act) { case XDP_PASS: break; @@@ -480,10 -488,6 +488,6 @@@ xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index]; result = ice_xmit_xdp_buff(xdp, xdp_ring); break; - case XDP_REDIRECT: - err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); - result = !err ? ICE_XDP_REDIR : ICE_XDP_CONSUMED; - break; default: bpf_warn_invalid_xdp_action(act); fallthrough; diff --combined drivers/net/ethernet/intel/igb/igb_main.c index a45cd2b416c8,854d19fbf4a4..b83966aa6647 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@@ -2037,7 -2037,7 +2037,7 @@@ static void igb_power_down_link(struct }
/** - * Detect and switch function for Media Auto Sense + * igb_check_swap_media - Detect and switch function for Media Auto Sense * @adapter: address of the board private structure **/ static void igb_check_swap_media(struct igb_adapter *adapter) @@@ -3115,7 -3115,7 +3115,7 @@@ static s32 igb_init_i2c(struct igb_adap return 0;
/* Initialize the i2c bus which is controlled by the registers. - * This bus will use the i2c_algo_bit structue that implements + * This bus will use the i2c_algo_bit structure that implements * the protocol through toggling of the 4 bits in the register. */ adapter->i2c_adap.owner = THIS_MODULE; @@@ -4020,7 -4020,7 +4020,7 @@@ static int igb_sw_init(struct igb_adapt }
/** - * igb_open - Called when a network interface is made active + * __igb_open - Called when a network interface is made active * @netdev: network interface device structure * @resuming: indicates whether we are in a resume call * @@@ -4138,7 -4138,7 +4138,7 @@@ int igb_open(struct net_device *netdev }
/** - * igb_close - Disables a network interface + * __igb_close - Disables a network interface * @netdev: network interface device structure * @suspending: indicates we are in a suspend call * @@@ -5856,7 -5856,7 +5856,7 @@@ static void igb_tx_ctxtdesc(struct igb_ */ if (tx_ring->launchtime_enable) { ts = ktime_to_timespec64(first->skb->tstamp); - first->skb->tstamp = ktime_set(0, 0); + skb_txtime_consumed(first->skb); context_desc->seqnum_seed = cpu_to_le32(ts.tv_nsec / 32); } else { context_desc->seqnum_seed = 0; @@@ -8214,8 -8214,7 +8214,8 @@@ static void igb_reuse_rx_page(struct ig new_buff->pagecnt_bias = old_buff->pagecnt_bias; }
-static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer) +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, + int rx_buf_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@@ -8226,7 -8225,7 +8226,7 @@@
#if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) return false; #else #define IGB_LAST_OFFSET \ @@@ -8302,10 -8301,9 +8302,10 @@@ static struct sk_buff *igb_construct_sk return NULL;
if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb); - xdp->data += IGB_TS_HDR_LEN; - size -= IGB_TS_HDR_LEN; + if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) { + xdp->data += IGB_TS_HDR_LEN; + size -= IGB_TS_HDR_LEN; + } }
/* Determine available headroom for copy */ @@@ -8366,8 -8364,8 +8366,8 @@@ static struct sk_buff *igb_build_skb(st
/* pull timestamp out of packet data */ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); - __skb_pull(skb, IGB_TS_HDR_LEN); + if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb)) + __skb_pull(skb, IGB_TS_HDR_LEN); }
/* update buffer offset */ @@@ -8616,17 -8614,11 +8616,17 @@@ static unsigned int igb_rx_offset(struc }
static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, - const unsigned int size) + const unsigned int size, int *rx_buf_pgcnt) { struct igb_rx_buffer *rx_buffer;
rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buf_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetchw(rx_buffer->page);
/* we are reusing so sync this buffer for CPU use */ @@@ -8642,9 -8634,9 +8642,9 @@@ }
static void igb_put_rx_buffer(struct igb_ring *rx_ring, - struct igb_rx_buffer *rx_buffer) + struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt) { - if (igb_can_reuse_rx_page(rx_buffer)) { + if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) { /* hand second half of page back to the ring */ igb_reuse_rx_page(rx_ring, rx_buffer); } else { @@@ -8672,7 -8664,6 +8672,7 @@@ static int igb_clean_rx_irq(struct igb_ unsigned int xdp_xmit = 0; struct xdp_buff xdp; u32 frame_sz = 0; + int rx_buf_pgcnt;
/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ #if (PAGE_SIZE < 8192) @@@ -8702,7 -8693,7 +8702,7 @@@ */ dma_rmb();
- rx_buffer = igb_get_rx_buffer(rx_ring, size); + rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt);
/* retrieve a buffer from the ring */ if (!skb) { @@@ -8745,7 -8736,7 +8745,7 @@@ break; }
- igb_put_rx_buffer(rx_ring, rx_buffer); + igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt); cleaned_count++;
/* fetch next buffer in frame if non-eop */ diff --combined drivers/net/ethernet/intel/igb/igb_ptp.c index 86a576201f5f,f3ff565da0a1..ba61fe9bfaf4 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@@ -856,9 -856,6 +856,9 @@@ static void igb_ptp_tx_hwtstamp(struct dev_kfree_skb_any(skb); }
+#define IGB_RET_PTP_DISABLED 1 +#define IGB_RET_PTP_INVALID 2 + /** * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp * @q_vector: Pointer to interrupt specific structure @@@ -867,29 -864,19 +867,29 @@@ * * This function is meant to retrieve a timestamp from the first buffer of an * incoming frame. The value is stored in little endian format starting on - * byte 8. + * byte 8 + * + * Returns: 0 if success, nonzero if failure **/ -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, - struct sk_buff *skb) +int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + struct sk_buff *skb) { - __le64 *regval = (__le64 *)va; struct igb_adapter *adapter = q_vector->adapter; + __le64 *regval = (__le64 *)va; int adjust = 0;
+ if (!(adapter->ptp_flags & IGB_PTP_ENABLED)) + return IGB_RET_PTP_DISABLED; + /* The timestamp is recorded in little endian format. * DWORD: 0 1 2 3 * Field: Reserved Reserved SYSTIML SYSTIMH */ + + /* check reserved dwords are zero, be/le doesn't matter for zero */ + if (regval[0]) + return IGB_RET_PTP_INVALID; + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), le64_to_cpu(regval[1]));
@@@ -909,8 -896,6 +909,8 @@@ } skb_hwtstamps(skb)->hwtstamp = ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); + + return 0; }
/** @@@ -921,15 -906,13 +921,15 @@@ * This function is meant to retrieve a timestamp from the internal registers * of the adapter and store it in the skb. **/ -void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, - struct sk_buff *skb) +void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) { struct igb_adapter *adapter = q_vector->adapter; struct e1000_hw *hw = &adapter->hw; - u64 regval; int adjust = 0; + u64 regval; + + if (!(adapter->ptp_flags & IGB_PTP_ENABLED)) + return;
/* If this bit is set, then the RX registers contain the time stamp. No * other packet will be time stamped until we read these registers, so @@@ -1025,6 -1008,7 +1025,7 @@@ static int igb_ptp_set_timestamp_mode(s switch (config->tx_type) { case HWTSTAMP_TX_OFF: tsync_tx_ctl = 0; + break; case HWTSTAMP_TX_ON: break; default: diff --combined drivers/net/ethernet/intel/igc/igc_main.c index 4d989ebc9713,a476837eafca..baa45a1f3a65 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@@ -941,7 -941,7 +941,7 @@@ static void igc_tx_ctxtdesc(struct igc_ struct igc_adapter *adapter = netdev_priv(tx_ring->netdev); ktime_t txtime = first->skb->tstamp;
- first->skb->tstamp = ktime_set(0, 0); + skb_txtime_consumed(first->skb); context_desc->launch_time = igc_tx_launchtime(adapter, txtime); } else { @@@ -3580,7 -3580,7 +3580,7 @@@ void igc_up(struct igc_adapter *adapter netif_tx_start_all_queues(adapter->netdev);
/* start the watchdog. */ - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; schedule_work(&adapter->watchdog_task); }
@@@ -3831,19 -3831,10 +3831,19 @@@ static void igc_reset_task(struct work_
adapter = container_of(work, struct igc_adapter, reset_task);
+ rtnl_lock(); + /* If we're already down or resetting, just bail */ + if (test_bit(__IGC_DOWN, &adapter->state) || + test_bit(__IGC_RESETTING, &adapter->state)) { + rtnl_unlock(); + return; + } + igc_rings_dump(adapter); igc_regs_dump(adapter); netdev_err(adapter->netdev, "Reset adapter\n"); igc_reinit_locked(adapter); + rtnl_unlock(); }
/** @@@ -4009,7 -4000,7 +4009,7 @@@ static irqreturn_t igc_msix_other(int i }
if (icr & IGC_ICR_LSC) { - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__IGC_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); @@@ -4387,7 -4378,7 +4387,7 @@@ static irqreturn_t igc_intr_msi(int irq }
if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; if (!test_bit(__IGC_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); } @@@ -4429,7 -4420,7 +4429,7 @@@ static irqreturn_t igc_intr(int irq, vo }
if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) { - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; /* guard against interrupt when we're going down */ if (!test_bit(__IGC_DOWN, &adapter->state)) mod_timer(&adapter->watchdog_timer, jiffies + 1); @@@ -4583,7 -4574,7 +4583,7 @@@ static int __igc_open(struct net_devic netif_tx_start_all_queues(netdev);
/* start the watchdog. */ - hw->mac.get_link_status = 1; + hw->mac.get_link_status = true; schedule_work(&adapter->watchdog_task);
return IGC_SUCCESS; @@@ -4924,7 -4915,7 +4924,7 @@@ int igc_set_spd_dplx(struct igc_adapte { struct igc_mac_info *mac = &adapter->hw.mac;
- mac->autoneg = 0; + mac->autoneg = false;
/* Make sure dplx is at most 1 bit and lsb of speed is not set * for the switch() below to work @@@ -4946,13 -4937,13 +4946,13 @@@ mac->forced_speed_duplex = ADVERTISE_100_FULL; break; case SPEED_1000 + DUPLEX_FULL: - mac->autoneg = 1; + mac->autoneg = true; adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; break; case SPEED_1000 + DUPLEX_HALF: /* not supported */ goto err_inval; case SPEED_2500 + DUPLEX_FULL: - mac->autoneg = 1; + mac->autoneg = true; adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL; break; case SPEED_2500 + DUPLEX_HALF: /* not supported */ diff --combined drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 03d9aad516d4,4c90f83fd6ce..19fe21116fe8 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@@ -225,7 -225,7 +225,7 @@@ static s32 ixgbe_get_parent_bus_info(st }
/** - * ixgbe_check_from_parent - Determine whether PCIe info should come from parent + * ixgbe_pcie_from_parent - Determine whether PCIe info should come from parent * @hw: hw specific details * * This function is used by probe to determine whether a device's PCI-Express @@@ -4118,8 -4118,6 +4118,8 @@@ void ixgbe_configure_rx_ring(struct ixg #endif }
+ ring->rx_offset = ixgbe_rx_offset(ring); + if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) { u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
@@@ -6158,7 -6156,7 +6158,7 @@@ void ixgbe_down(struct ixgbe_adapter *a }
/** - * ixgbe_eee_capable - helper function to determine EEE support on X550 + * ixgbe_set_eee_capable - helper function to determine EEE support on X550 * @adapter: board private structure */ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter) @@@ -6580,6 -6578,7 +6580,6 @@@ int ixgbe_setup_rx_resources(struct ixg
rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; - rx_ring->rx_offset = ixgbe_rx_offset(rx_ring);
/* XDP RX-queue info */ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 76f399229ddb,baaba01bd8c5..c2cc4806d13c --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@@ -548,6 -548,12 +548,12 @@@ static inline int is_afvf(u16 pcifunc return !(pcifunc & ~RVU_PFVF_FUNC_MASK); }
+ /* check if PF_FUNC is AF */ + static inline bool is_pffunc_af(u16 pcifunc) + { + return !pcifunc; + } + static inline bool is_rvu_fwdata_valid(struct rvu *rvu) { return (rvu->fwdata->header_magic == RVU_FWDATA_HEADER_MAGIC) && @@@ -640,7 -646,8 +646,8 @@@ int npc_config_ts_kpuaction(struct rvu void rvu_npc_install_ucast_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan, u8 *mac_addr); void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, - int nixlf, u64 chan, bool allmulti); + int nixlf, u64 chan, u8 chan_cnt, + bool allmulti); void rvu_npc_disable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_enable_promisc_entry(struct rvu *rvu, u16 pcifunc, int nixlf); void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, @@@ -665,9 -672,6 +672,6 @@@ int rvu_npc_get_tx_nibble_cfg(struct rv int npc_mcam_verify_channel(struct rvu *rvu, u16 pcifunc, u8 intf, u16 channel); int npc_flow_steering_init(struct rvu *rvu, int blkaddr); const char *npc_get_field_name(u8 hdr); - bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf, - u16 pcifunc, u8 intf, struct mcam_entry *entry, - int *entry_index); int npc_get_bank(struct npc_mcam *mcam, int index); void npc_mcam_enable_flows(struct rvu *rvu, u16 target); void npc_mcam_disable_flows(struct rvu *rvu, u16 target); @@@ -678,8 -682,12 +682,13 @@@ void npc_read_mcam_entry(struct rvu *rv u8 *intf, u8 *ena); bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature); u32 rvu_cgx_get_fifolen(struct rvu *rvu); +void *rvu_first_cgx_pdata(struct rvu *rvu);
+ int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, u16 pcifunc, int nixlf, + int type); + bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, int blkaddr, + int index); + /* CPT APIs */ int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot);
diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index b4c53b19f535,741da112fdf0..8ec17ee72b5d --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@@ -234,14 -234,12 +234,14 @@@ static ssize_t rvu_dbg_rsrc_attach_stat char __user *buffer, size_t count, loff_t *ppos) { - int index, off = 0, flag = 0, go_back = 0, off_prev; + int index, off = 0, flag = 0, go_back = 0, len = 0; struct rvu *rvu = filp->private_data; int lf, pf, vf, pcifunc; struct rvu_block block; int bytes_not_copied; + int lf_str_size = 12; int buf_size = 2048; + char *lfs; char *buf;
/* don't allow partial reads */ @@@ -251,18 -249,12 +251,18 @@@ buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) return -ENOSPC; - off += scnprintf(&buf[off], buf_size - 1 - off, "\npcifunc\t\t"); + + lfs = kzalloc(lf_str_size, GFP_KERNEL); + if (!lfs) + return -ENOMEM; + off += scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size, + "pcifunc"); for (index = 0; index < BLK_COUNT; index++) - if (strlen(rvu->hw->block[index].name)) - off += scnprintf(&buf[off], buf_size - 1 - off, - "%*s\t", (index - 1) * 2, - rvu->hw->block[index].name); + if (strlen(rvu->hw->block[index].name)) { + off += scnprintf(&buf[off], buf_size - 1 - off, + "%-*s", lf_str_size, + rvu->hw->block[index].name); + } off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); for (pf = 0; pf < rvu->hw->total_pfs; pf++) { for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { @@@ -271,15 -263,14 +271,15 @@@ continue;
if (vf) { + sprintf(lfs, "PF%d:VF%d", pf, vf - 1); go_back = scnprintf(&buf[off], buf_size - 1 - off, - "PF%d:VF%d\t\t", pf, - vf - 1); + "%-*s", lf_str_size, lfs); } else { + sprintf(lfs, "PF%d", pf); go_back = scnprintf(&buf[off], buf_size - 1 - off, - "PF%d\t\t", pf); + "%-*s", lf_str_size, lfs); }
off += go_back; @@@ -287,22 -278,20 +287,22 @@@ block = rvu->hw->block[index]; if (!strlen(block.name)) continue; - off_prev = off; + len = 0; + lfs[len] = '\0'; for (lf = 0; lf < block.lf.max; lf++) { if (block.fn_map[lf] != pcifunc) continue; flag = 1; - off += scnprintf(&buf[off], buf_size - 1 - - off, "%3d,", lf); + len += sprintf(&lfs[len], "%d,", lf); } - if (flag && off_prev != off) - off--; - else - go_back++; + + if (flag) + len--; + lfs[len] = '\0'; off += scnprintf(&buf[off], buf_size - 1 - off, - "\t"); + "%-*s", lf_str_size, lfs); + if (!strlen(lfs)) + go_back += lf_str_size; } if (!flag) off -= go_back; @@@ -314,7 -303,6 +314,7 @@@ }
bytes_not_copied = copy_to_user(buffer, buf, off); + kfree(lfs); kfree(buf);
if (bytes_not_copied) @@@ -331,6 -319,7 +331,6 @@@ static int rvu_dbg_rvu_pf_cgx_map_displ struct rvu *rvu = filp->private; struct pci_dev *pdev = NULL; struct mac_ops *mac_ops; - int rvu_def_cgx_id = 0; char cgx[10], lmac[10]; struct rvu_pfvf *pfvf; int pf, domain, blkid; @@@ -338,10 -327,7 +338,10 @@@ u16 pcifunc;
domain = 2; - mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu)); + mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu)); + /* There can be no CGX devices at all */ + if (!mac_ops) + return 0; seq_printf(filp, "PCI dev\t\tRVU PF Func\tNIX block\t%s\tLMAC\n", mac_ops->name); for (pf = 0; pf < rvu->hw->total_pfs; pf++) { @@@ -1832,6 -1818,7 +1832,6 @@@ static void rvu_dbg_cgx_init(struct rv { struct mac_ops *mac_ops; unsigned long lmac_bmap; - int rvu_def_cgx_id = 0; int i, lmac_id; char dname[20]; void *cgx; @@@ -1839,7 -1826,7 +1839,7 @@@ if (!cgx_get_cgxcnt_max()) return;
- mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu)); + mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu)); if (!mac_ops) return;
@@@ -2015,7 -2002,7 +2015,7 @@@ static void rvu_dbg_npc_mcam_show_flows seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.etype)); break; case NPC_OUTER_VID: - seq_printf(s, "%d ", ntohs(rule->packet.vlan_tci)); + seq_printf(s, "0x%x ", ntohs(rule->packet.vlan_tci)); seq_printf(s, "mask 0x%x\n", ntohs(rule->mask.vlan_tci)); break; @@@ -2158,7 -2145,7 +2158,7 @@@ static int rvu_dbg_npc_mcam_show_rules( seq_printf(s, "\tmcam entry: %d\n", iter->entry);
rvu_dbg_npc_mcam_show_flows(s, iter); - if (iter->intf == NIX_INTF_RX) { + if (is_npc_intf_rx(iter->intf)) { target = iter->rx_action.pf_func; pf = (target >> RVU_PFVF_PF_SHIFT) & RVU_PFVF_PF_MASK; seq_printf(s, "\tForward to: PF%d ", pf); diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 3d068b7d46bd,a87104121344..0a8bd667cb11 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@@ -273,7 -273,8 +273,8 @@@ static int nix_interface_init(struct rv pfvf->rx_chan_cnt = 1; pfvf->tx_chan_cnt = 1; rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, - pfvf->rx_chan_base, false); + pfvf->rx_chan_base, + pfvf->rx_chan_cnt, false); break; }
@@@ -2629,7 -2630,7 +2630,7 @@@ static int set_flowkey_fields(struct ni struct nix_rx_flowkey_alg *field; struct nix_rx_flowkey_alg tmp; u32 key_type, valid_key; - int l4_key_offset; + int l4_key_offset = 0;
if (!alg) return -EINVAL; @@@ -3088,7 -3089,8 +3089,8 @@@ int rvu_mbox_handler_nix_set_rx_mode(st rvu_npc_disable_promisc_entry(rvu, pcifunc, nixlf); else rvu_npc_install_promisc_entry(rvu, pcifunc, nixlf, - pfvf->rx_chan_base, allmulti); + pfvf->rx_chan_base, + pfvf->rx_chan_cnt, allmulti); return 0; }
@@@ -3635,9 -3637,7 +3637,7 @@@ int rvu_mbox_handler_nix_lf_stop_rx(str if (err) return err;
- rvu_npc_disable_default_entries(rvu, pcifunc, nixlf); - - npc_mcam_disable_flows(rvu, pcifunc); + rvu_npc_disable_mcam_entries(rvu, pcifunc, nixlf);
return rvu_cgx_start_stop_io(rvu, pcifunc, false); } diff --combined drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 0bd49c7080a6,16d7797b7a14..0bc4529691ec --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@@ -22,10 -22,6 +22,6 @@@ #define RSVD_MCAM_ENTRIES_PER_PF 2 /* Bcast & Promisc */ #define RSVD_MCAM_ENTRIES_PER_NIXLF 1 /* Ucast for LFs */
- #define NIXLF_UCAST_ENTRY 0 - #define NIXLF_BCAST_ENTRY 1 - #define NIXLF_PROMISC_ENTRY 2 - #define NPC_PARSE_RESULT_DMAC_OFFSET 8 #define NPC_HW_TSTAMP_OFFSET 8 #define NPC_KEX_CHAN_MASK 0xFFFULL @@@ -96,6 -92,10 +92,10 @@@ int npc_mcam_verify_channel(struct rvu if (is_npc_intf_tx(intf)) return 0;
+ /* return in case of AF installed rules */ + if (is_pffunc_af(pcifunc)) + return 0; + if (is_afvf(pcifunc)) { end = rvu_get_num_lbk_chans(); if (end < 0) @@@ -196,8 -196,8 +196,8 @@@ static int npc_get_ucast_mcam_index(str return mcam->nixlf_offset + (max + nixlf) * RSVD_MCAM_ENTRIES_PER_NIXLF; }
- static int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, - u16 pcifunc, int nixlf, int type) + int npc_get_nixlf_mcam_index(struct npc_mcam *mcam, + u16 pcifunc, int nixlf, int type) { int pf = rvu_get_pf(pcifunc); int index; @@@ -230,8 -230,8 +230,8 @@@ int npc_get_bank(struct npc_mcam *mcam return bank; }
- static bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, - int blkaddr, int index) + bool is_mcam_entry_enabled(struct rvu *rvu, struct npc_mcam *mcam, + int blkaddr, int index) { int bank = npc_get_bank(mcam, index); u64 cfg; @@@ -647,13 -647,17 +647,17 @@@ void rvu_npc_install_ucast_entry(struc }
void rvu_npc_install_promisc_entry(struct rvu *rvu, u16 pcifunc, - int nixlf, u64 chan, bool allmulti) + int nixlf, u64 chan, u8 chan_cnt, + bool allmulti) { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; - int blkaddr, ucast_idx, index, kwi; - struct mcam_entry entry = { {0} }; - struct nix_rx_action action = { }; + int blkaddr, ucast_idx, index; + u8 mac_addr[ETH_ALEN] = { 0 }; + struct nix_rx_action action; + u64 relaxed_mask;
/* Only PF or AF VF can add a promiscuous entry */ if ((pcifunc & RVU_PFVF_FUNC_MASK) && !is_afvf(pcifunc)) @@@ -663,24 -667,15 +667,15 @@@ if (blkaddr < 0) return;
+ *(u64 *)&action = 0x00; index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_PROMISC_ENTRY);
- entry.kw[0] = chan; - entry.kw_mask[0] = 0xFFFULL; - - if (allmulti) { - kwi = NPC_KEXOF_DMAC / sizeof(u64); - entry.kw[kwi] = BIT_ULL(40); /* LSB bit of 1st byte in DMAC */ - entry.kw_mask[kwi] = BIT_ULL(40); - } - - ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, - nixlf, NIXLF_UCAST_ENTRY); - /* If the corresponding PF's ucast action is RSS, * use the same action for promisc also */ + ucast_idx = npc_get_nixlf_mcam_index(mcam, pcifunc, + nixlf, NIXLF_UCAST_ENTRY); if (is_mcam_entry_enabled(rvu, mcam, blkaddr, ucast_idx)) *(u64 *)&action = npc_get_mcam_action(rvu, mcam, blkaddr, ucast_idx); @@@ -691,9 -686,36 +686,36 @@@ action.pf_func = pcifunc; }
- entry.action = *(u64 *)&action; - npc_config_mcam_entry(rvu, mcam, blkaddr, index, - pfvf->nix_rx_intf, &entry, true); + if (allmulti) { + mac_addr[0] = 0x01; /* LSB bit of 1st byte in DMAC */ + ether_addr_copy(req.packet.dmac, mac_addr); + ether_addr_copy(req.mask.dmac, mac_addr); + req.features = BIT_ULL(NPC_DMAC); + } + + req.chan_mask = 0xFFFU; + if (chan_cnt > 1) { + if (!is_power_of_2(chan_cnt)) { + dev_err(rvu->dev, + "%s: channel count more than 1, must be power of 2\n", __func__); + return; + } + relaxed_mask = GENMASK_ULL(BITS_PER_LONG_LONG - 1, + ilog2(chan_cnt)); + req.chan_mask &= relaxed_mask; + } + + req.channel = chan; + req.intf = pfvf->nix_rx_intf; + req.entry = index; + req.op = action.op; + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.index = action.index; + req.match_id = action.match_id; + req.flow_key_alg = action.flow_key_alg; + + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); }
static void npc_enadis_promisc_entry(struct rvu *rvu, u16 pcifunc, @@@ -728,12 -750,14 +750,14 @@@ void rvu_npc_enable_promisc_entry(struc void rvu_npc_install_bcast_match_entry(struct rvu *rvu, u16 pcifunc, int nixlf, u64 chan) { + struct rvu_pfvf *pfvf; + struct npc_install_flow_req req = { 0 }; + struct npc_install_flow_rsp rsp = { 0 }; struct npc_mcam *mcam = &rvu->hw->mcam; - struct mcam_entry entry = { {0} }; struct rvu_hwinfo *hw = rvu->hw; - struct nix_rx_action action; - struct rvu_pfvf *pfvf; int blkaddr, index; + u32 req_index = 0; + u8 op;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); if (blkaddr < 0) @@@ -755,32 -779,29 +779,29 @@@ index = npc_get_nixlf_mcam_index(mcam, pcifunc, nixlf, NIXLF_BCAST_ENTRY);
- /* Match ingress channel */ - entry.kw[0] = chan; - entry.kw_mask[0] = 0xfffull; - - /* Match broadcast MAC address. - * DMAC is extracted at 0th bit of PARSE_KEX::KW1 - */ - entry.kw[1] = 0xffffffffffffull; - entry.kw_mask[1] = 0xffffffffffffull; - - *(u64 *)&action = 0x00; if (!hw->cap.nix_rx_multicast) { /* Early silicon doesn't support pkt replication, * so install entry with UCAST action, so that PF * receives all broadcast packets. */ - action.op = NIX_RX_ACTIONOP_UCAST; - action.pf_func = pcifunc; + op = NIX_RX_ACTIONOP_UCAST; } else { - action.index = pfvf->bcast_mce_idx; - action.op = NIX_RX_ACTIONOP_MCAST; + op = NIX_RX_ACTIONOP_MCAST; + req_index = pfvf->bcast_mce_idx; }
- entry.action = *(u64 *)&action; - npc_config_mcam_entry(rvu, mcam, blkaddr, index, - pfvf->nix_rx_intf, &entry, true); + eth_broadcast_addr((u8 *)&req.packet.dmac); + eth_broadcast_addr((u8 *)&req.mask.dmac); + req.features = BIT_ULL(NPC_DMAC); + req.channel = chan; + req.intf = pfvf->nix_rx_intf; + req.entry = index; + req.op = op; + req.hdr.pcifunc = 0; /* AF is requester */ + req.vf = pcifunc; + req.index = req_index; + + rvu_mbox_handler_npc_install_flow(rvu, &req, &rsp); }
void rvu_npc_enable_bcast_entry(struct rvu *rvu, u16 pcifunc, bool enable) @@@ -967,7 -988,7 +988,7 @@@ void rvu_npc_disable_mcam_entries(struc { struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); struct npc_mcam *mcam = &rvu->hw->mcam; - struct rvu_npc_mcam_rule *rule; + struct rvu_npc_mcam_rule *rule, *tmp; int blkaddr;
blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); @@@ -977,15 -998,18 +998,18 @@@ mutex_lock(&mcam->lock);
/* Disable MCAM entries directing traffic to this 'pcifunc' */ - list_for_each_entry(rule, &mcam->mcam_rules, list) { + list_for_each_entry_safe(rule, tmp, &mcam->mcam_rules, list) { if (is_npc_intf_rx(rule->intf) && rule->rx_action.pf_func == pcifunc) { npc_enable_mcam_entry(rvu, mcam, blkaddr, rule->entry, false); rule->enable = false; /* Indicate that default rule is disabled */ - if (rule->default_rule) + if (rule->default_rule) { pfvf->def_ucast_rule = NULL; + list_del(&rule->list); + kfree(rule); + } } }
@@@ -1674,6 -1698,9 +1698,9 @@@ void rvu_npc_get_mcam_counter_alloc_inf static int npc_mcam_verify_entry(struct npc_mcam *mcam, u16 pcifunc, int entry) { + /* verify AF installed entries */ + if (is_pffunc_af(pcifunc)) + return 0; /* Verify if entry is valid and if it is indeed * allocated to the requesting PFFUNC. */ @@@ -2268,6 -2295,10 +2295,10 @@@ int rvu_mbox_handler_npc_mcam_write_ent goto exit; }
+ /* For AF installed rules, the nix_intf should be set to target NIX */ + if (is_pffunc_af(req->hdr.pcifunc)) + nix_intf = req->intf; + npc_config_mcam_entry(rvu, mcam, blkaddr, req->entry, nix_intf, &req->entry_data, req->enable_entry);
@@@ -2490,10 -2521,10 +2521,10 @@@ int rvu_mbox_handler_npc_mcam_free_coun index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry); if (index >= mcam->bmap_entries) break; + entry = index + 1; if (mcam->entry2cntr_map[index] != req->cntr) continue;
- entry = index + 1; npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr, index, req->cntr); } @@@ -2730,30 -2761,6 +2761,6 @@@ int rvu_mbox_handler_npc_get_kex_cfg(st return 0; }
- bool rvu_npc_write_default_rule(struct rvu *rvu, int blkaddr, int nixlf, - u16 pcifunc, u8 intf, struct mcam_entry *entry, - int *index) - { - struct rvu_pfvf *pfvf = rvu_get_pfvf(rvu, pcifunc); - struct npc_mcam *mcam = &rvu->hw->mcam; - bool enable; - u8 nix_intf; - - if (is_npc_intf_tx(intf)) - nix_intf = pfvf->nix_tx_intf; - else - nix_intf = pfvf->nix_rx_intf; - - *index = npc_get_nixlf_mcam_index(mcam, pcifunc, - nixlf, NIXLF_UCAST_ENTRY); - /* dont force enable unicast entry */ - enable = is_mcam_entry_enabled(rvu, mcam, blkaddr, *index); - npc_config_mcam_entry(rvu, mcam, blkaddr, *index, nix_intf, - entry, enable); - - return enable; - } - int rvu_mbox_handler_npc_read_base_steer_rule(struct rvu *rvu, struct msg_req *req, struct npc_mcam_read_base_rule_rsp *rsp) @@@ -2799,3 -2806,42 +2806,42 @@@ read_entry out: return rc; } + + int rvu_mbox_handler_npc_mcam_entry_stats(struct rvu *rvu, + struct npc_mcam_get_stats_req *req, + struct npc_mcam_get_stats_rsp *rsp) + { + struct npc_mcam *mcam = &rvu->hw->mcam; + u16 index, cntr; + int blkaddr; + u64 regval; + u32 bank; + + blkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPC, 0); + if (blkaddr < 0) + return NPC_MCAM_INVALID_REQ; + + mutex_lock(&mcam->lock); + + index = req->entry & (mcam->banksize - 1); + bank = npc_get_bank(mcam, req->entry); + + /* read MCAM entry STAT_ACT register */ + regval = rvu_read64(rvu, blkaddr, NPC_AF_MCAMEX_BANKX_STAT_ACT(index, bank)); + + if (!(regval & BIT_ULL(9))) { + rsp->stat_ena = 0; + mutex_unlock(&mcam->lock); + return 0; + } + + cntr = regval & 0x1FF; + + rsp->stat_ena = 1; + rsp->stat = rvu_read64(rvu, blkaddr, NPC_AF_MATCH_STATX(cntr)); + rsp->stat &= BIT_ULL(48) - 1; + + mutex_unlock(&mcam->lock); + + return 0; + } diff --combined drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c index dc1778420978,fa7a46aa15ef..0b4fa92ba821 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c @@@ -57,10 -57,13 +57,13 @@@ int otx2_alloc_mcam_entries(struct otx2 flow_cfg->ntuple_max_flows = rsp->count; flow_cfg->ntuple_offset = 0; pfvf->flags |= OTX2_FLAG_NTUPLE_SUPPORT; + flow_cfg->tc_max_flows = flow_cfg->ntuple_max_flows; + pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT; } else { flow_cfg->vf_vlan_offset = 0; flow_cfg->ntuple_offset = flow_cfg->vf_vlan_offset + vf_vlan_max_flows; + flow_cfg->tc_flower_offset = flow_cfg->ntuple_offset; flow_cfg->unicast_offset = flow_cfg->ntuple_offset + OTX2_MAX_NTUPLE_FLOWS; flow_cfg->rx_vlan_offset = flow_cfg->unicast_offset + @@@ -69,6 -72,7 +72,7 @@@ pfvf->flags |= OTX2_FLAG_UCAST_FLTR_SUPPORT; pfvf->flags |= OTX2_FLAG_RX_VLAN_SUPPORT; pfvf->flags |= OTX2_FLAG_VF_VLAN_SUPPORT; + pfvf->flags |= OTX2_FLAG_TC_FLOWER_SUPPORT; }
for (i = 0; i < rsp->count; i++) @@@ -93,6 -97,7 +97,7 @@@ int otx2_mcam_flow_init(struct otx2_ni INIT_LIST_HEAD(&pf->flow_cfg->flow_list);
pf->flow_cfg->ntuple_max_flows = OTX2_MAX_NTUPLE_FLOWS; + pf->flow_cfg->tc_max_flows = pf->flow_cfg->ntuple_max_flows;
err = otx2_alloc_mcam_entries(pf); if (err) @@@ -257,19 -262,17 +262,19 @@@ int otx2_get_flow(struct otx2_nic *pfvf int otx2_get_all_flows(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc, u32 *rule_locs) { + u32 rule_cnt = nfc->rule_cnt; u32 location = 0; int idx = 0; int err = 0;
nfc->data = pfvf->flow_cfg->ntuple_max_flows; - while ((!err || err == -ENOENT) && idx < nfc->rule_cnt) { + while ((!err || err == -ENOENT) && idx < rule_cnt) { err = otx2_get_flow(pfvf, nfc, location); if (!err) rule_locs[idx++] = location; location++; } + nfc->rule_cnt = rule_cnt;
return err; } @@@ -303,6 -306,35 +308,35 @@@ static int otx2_prepare_ipv4_flow(struc sizeof(pmask->ip4dst)); req->features |= BIT_ULL(NPC_DIP_IPV4); } + if (ipv4_usr_mask->tos) { + pkt->tos = ipv4_usr_hdr->tos; + pmask->tos = ipv4_usr_mask->tos; + req->features |= BIT_ULL(NPC_TOS); + } + if (ipv4_usr_mask->proto) { + switch (ipv4_usr_hdr->proto) { + case IPPROTO_ICMP: + req->features |= BIT_ULL(NPC_IPPROTO_ICMP); + break; + case IPPROTO_TCP: + req->features |= BIT_ULL(NPC_IPPROTO_TCP); + break; + case IPPROTO_UDP: + req->features |= BIT_ULL(NPC_IPPROTO_UDP); + break; + case IPPROTO_SCTP: + req->features |= BIT_ULL(NPC_IPPROTO_SCTP); + break; + case IPPROTO_AH: + req->features |= BIT_ULL(NPC_IPPROTO_AH); + break; + case IPPROTO_ESP: + req->features |= BIT_ULL(NPC_IPPROTO_ESP); + break; + default: + return -EOPNOTSUPP; + } + } pkt->etype = cpu_to_be16(ETH_P_IP); pmask->etype = cpu_to_be16(0xFFFF); req->features |= BIT_ULL(NPC_ETYPE); @@@ -327,6 -359,11 +361,11 @@@ sizeof(pmask->ip4dst)); req->features |= BIT_ULL(NPC_DIP_IPV4); } + if (ipv4_l4_mask->tos) { + pkt->tos = ipv4_l4_hdr->tos; + pmask->tos = ipv4_l4_mask->tos; + req->features |= BIT_ULL(NPC_TOS); + } if (ipv4_l4_mask->psrc) { memcpy(&pkt->sport, &ipv4_l4_hdr->psrc, sizeof(pkt->sport)); @@@ -377,10 -414,14 +416,14 @@@ sizeof(pmask->ip4dst)); req->features |= BIT_ULL(NPC_DIP_IPV4); } + if (ah_esp_mask->tos) { + pkt->tos = ah_esp_hdr->tos; + pmask->tos = ah_esp_mask->tos; + req->features |= BIT_ULL(NPC_TOS); + }
/* NPC profile doesn't extract AH/ESP header fields */ - if ((ah_esp_mask->spi & ah_esp_hdr->spi) || - (ah_esp_mask->tos & ah_esp_mask->tos)) + if (ah_esp_mask->spi & ah_esp_hdr->spi) return -EOPNOTSUPP;
if (flow_type == AH_V4_FLOW) diff --combined drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 2fd3d235d292,772a29ba8503..03004fdac0c6 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@@ -1672,7 -1672,6 +1672,7 @@@ int otx2_stop(struct net_device *netdev struct otx2_nic *pf = netdev_priv(netdev); struct otx2_cq_poll *cq_poll = NULL; struct otx2_qset *qset = &pf->qset; + struct otx2_rss_info *rss; int qidx, vec, wrk;
netif_carrier_off(netdev); @@@ -1685,10 -1684,6 +1685,10 @@@ /* First stop packet Rx/Tx */ otx2_rxtx_enable(pf, false);
+ /* Clear RSS enable flag */ + rss = &pf->hw.rss_info; + rss->enable = false; + /* Cleanup Queue IRQ */ vec = pci_irq_vector(pf->pdev, pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START); @@@ -1765,6 -1760,24 +1765,24 @@@ static netdev_tx_t otx2_xmit(struct sk_ return NETDEV_TX_OK; }
+ static netdev_features_t otx2_fix_features(struct net_device *dev, + netdev_features_t features) + { + /* check if n-tuple filters are ON */ + if ((features & NETIF_F_HW_TC) && (dev->features & NETIF_F_NTUPLE)) { + netdev_info(dev, "Disabling n-tuple filters\n"); + features &= ~NETIF_F_NTUPLE; + } + + /* check if tc hw offload is ON */ + if ((features & NETIF_F_NTUPLE) && (dev->features & NETIF_F_HW_TC)) { + netdev_info(dev, "Disabling TC hardware offload\n"); + features &= ~NETIF_F_HW_TC; + } + + return features; + } + static void otx2_set_rx_mode(struct net_device *netdev) { struct otx2_nic *pf = netdev_priv(netdev); @@@ -1827,6 -1840,12 +1845,12 @@@ static int otx2_set_features(struct net if ((changed & NETIF_F_NTUPLE) && !ntuple) otx2_destroy_ntuple_flows(pf);
+ if ((netdev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC) && + pf->tc_info.num_entries) { + netdev_err(netdev, "Can't disable TC hardware offload while flows are active\n"); + return -EBUSY; + } + return 0; }
@@@ -2225,6 -2244,7 +2249,7 @@@ static const struct net_device_ops otx2 .ndo_open = otx2_open, .ndo_stop = otx2_stop, .ndo_start_xmit = otx2_xmit, + .ndo_fix_features = otx2_fix_features, .ndo_set_mac_address = otx2_set_mac_address, .ndo_change_mtu = otx2_change_mtu, .ndo_set_rx_mode = otx2_set_rx_mode, @@@ -2235,6 -2255,7 +2260,7 @@@ .ndo_set_vf_mac = otx2_set_vf_mac, .ndo_set_vf_vlan = otx2_set_vf_vlan, .ndo_get_vf_config = otx2_get_vf_config, + .ndo_setup_tc = otx2_setup_tc, };
static int otx2_wq_init(struct otx2_nic *pf) @@@ -2454,6 -2475,10 +2480,10 @@@ static int otx2_probe(struct pci_dev *p NETIF_F_HW_VLAN_STAG_RX; netdev->features |= netdev->hw_features;
+ /* HW supports tc offload but mutually exclusive with n-tuple filters */ + if (pf->flags & OTX2_FLAG_TC_FLOWER_SUPPORT) + netdev->hw_features |= NETIF_F_HW_TC; + netdev->gso_max_segs = OTX2_MAX_GSO_SEGS; netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
@@@ -2475,6 -2500,10 +2505,10 @@@
otx2_set_ethtool_ops(netdev);
+ err = otx2_init_tc(pf); + if (err) + goto err_mcam_flow_del; + /* Enable link notifications */ otx2_cgx_config_linkevents(pf, true);
@@@ -2484,6 -2513,8 +2518,8 @@@
return 0;
+ err_mcam_flow_del: + otx2_mcam_flow_del(pf); err_unreg_netdev: unregister_netdev(netdev); err_del_mcam_entries: @@@ -2651,6 -2682,7 +2687,7 @@@ static void otx2_remove(struct pci_dev
otx2_ptp_destroy(pf); otx2_mcam_flow_del(pf); + otx2_shutdown_tc(pf); otx2_detach_resources(&pf->mbox); if (pf->hw.lmt_base) iounmap(pf->hw.lmt_base); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h index 304b296fe8b9,1f5bc4d91060..9ea3f3befe74 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@@ -92,15 -92,14 +92,15 @@@ struct page_pool MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
-#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) +#define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8)) +#define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2) +#define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts))) /* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between * WQEs, This page will absorb write overflow by the hardware, when * receiving packets larger than MTU. These oversize packets are * dropped by the driver at a later stage. */ -#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8)) -#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS)) +#define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1)) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ @@@ -881,7 -880,6 +881,6 @@@ struct mlx5e_priv #endif struct devlink_health_reporter *tx_reporter; struct devlink_health_reporter *rx_reporter; - struct devlink_port dl_port; struct mlx5e_xsk xsk; #if IS_ENABLED(CONFIG_PCI_HYPERV_INTERFACE) struct mlx5e_hv_vhca_stats_agent stats_agent; @@@ -1175,6 -1173,7 +1174,7 @@@ void mlx5e_detach_netdev(struct mlx5e_p void mlx5e_destroy_netdev(struct mlx5e_priv *priv); int mlx5e_netdev_change_profile(struct mlx5e_priv *priv, const struct mlx5e_profile *new_profile, void *new_ppriv); + void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv); void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv); void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu); void mlx5e_build_rq_params(struct mlx5_core_dev *mdev, diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index b2cd29847a37,5e3d31b888ce..df13e5094034 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@@ -695,7 -695,7 +695,7 @@@ mlx5_tc_ct_entry_add_rule(struct mlx5_t
zone_rule->nat = nat;
- spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return -ENOMEM;
@@@ -737,7 -737,7 +737,7 @@@
zone_rule->attr = attr;
- kfree(spec); + kvfree(spec); ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
return 0; @@@ -749,7 -749,7 +749,7 @@@ err_rule err_mod_hdr: kfree(attr); err_attr: - kfree(spec); + kvfree(spec); return err; }
@@@ -1181,8 -1181,7 +1181,8 @@@ int mlx5_tc_ct_add_no_trk_match(struct
mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, &ctstate, &ctstate_mask); - if (ctstate_mask) + + if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) return -EOPNOTSUPP;
ctstate_mask |= MLX5_CT_STATE_TRK_BIT; @@@ -1540,6 -1539,14 +1540,14 @@@ mlx5_tc_ct_free_pre_ct_tables(struct ml mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct); }
+ /* To avoid false lock dependency warning set the ct_entries_ht lock + * class different than the lock class of the ht being used when deleting + * last flow from a group and then deleting a group, we get into del_sw_flow_group() + * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but + * it's different than the ht->mutex here. + */ + static struct lock_class_key ct_entries_ht_lock_key; + static struct mlx5_ct_ft * mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, struct nf_flowtable *nf_ft) @@@ -1574,6 -1581,8 +1582,8 @@@ if (err) goto err_init;
+ lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key); + err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node, zone_params); if (err) @@@ -1675,10 -1684,10 +1685,10 @@@ __mlx5_tc_ct_flow_offload(struct mlx5_t struct mlx5_ct_ft *ft; u32 fte_id = 1;
- post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL); + post_ct_spec = kvzalloc(sizeof(*post_ct_spec), GFP_KERNEL); ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL); if (!post_ct_spec || !ct_flow) { - kfree(post_ct_spec); + kvfree(post_ct_spec); kfree(ct_flow); return ERR_PTR(-ENOMEM); } @@@ -1788,6 -1797,10 +1798,10 @@@ ct_flow->post_ct_attr->prio = 0; ct_flow->post_ct_attr->ft = ct_priv->post_ct;
+ /* Splits were handled before CT */ + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) + ct_flow->post_ct_attr->esw_attr->split_count = 0; + ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE; ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP); @@@ -1813,7 -1826,7 +1827,7 @@@
attr->ct_attr.ct_flow = ct_flow; dealloc_mod_hdr_actions(&pre_mod_acts); - kfree(post_ct_spec); + kvfree(post_ct_spec);
return rule;
@@@ -1834,7 -1847,7 +1848,7 @@@ err_alloc_pre err_idr: mlx5_tc_ct_del_ft_cb(ct_priv, ft); err_ft: - kfree(post_ct_spec); + kvfree(post_ct_spec); kfree(ct_flow); netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err); return ERR_PTR(err); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 7f7b0f6dcdf9,32d06fe94acc..01d435e15ad3 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@@ -2,6 -2,7 +2,7 @@@ /* Copyright (c) 2021 Mellanox Technologies. */
#include <net/fib_notifier.h> + #include <net/nexthop.h> #include "tc_tun_encap.h" #include "en_tc.h" #include "tc_tun.h" @@@ -89,7 -90,6 +90,7 @@@ int mlx5e_tc_set_attr_rx_tun(struct mlx * required to establish routing. */ flow_flag_set(flow, TUN_RX); + flow->attr->tun_ip_version = ip_version; return 0; }
@@@ -1092,7 -1092,7 +1093,7 @@@ int mlx5e_attach_decap_route(struct mlx if (err || !esw_attr->rx_tun_attr->decap_vport) goto out;
- key.ip_version = attr->ip_version; + key.ip_version = attr->tun_ip_version; if (key.ip_version == 4) key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4; else diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 158f947a8503,9c08f0bd1fcc..d40fc2672530 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -302,7 -302,7 +302,7 @@@ static int mlx5e_create_umr_mkey(struc MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); mlx5e_mkey_set_relaxed_ordering(mdev, mkc); MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET64(mkc, mkc, len, npages << page_shift); MLX5_SET(mkc, mkc, translations_octword_size, MLX5_MTT_OCTW(npages)); @@@ -334,9 -334,9 +334,9 @@@ static int mlx5e_create_rq_umr_mkey(str rq->wqe_overflow.addr); }
-static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix) +static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix) { - return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT; + return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT; }
static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) @@@ -577,7 -577,7 +577,7 @@@ static int mlx5e_alloc_rq(struct mlx5e_ mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); u32 byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; - u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i); + u64 dma_offset = mlx5e_get_mpwqe_offset(i);
wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom); wqe->data[0].byte_count = cpu_to_be32(byte_count); @@@ -1019,7 -1019,7 +1019,7 @@@ static int mlx5e_alloc_xdpsq(struct mlx sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->xsk_pool = xsk_pool; @@@ -1090,7 -1090,7 +1090,7 @@@ static int mlx5e_alloc_icosq(struct mlx int err;
sq->channel = c; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map;
param->wq.db_numa_node = cpu_to_node(c->cpu); err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, wq, &sq->wq_ctrl); @@@ -1174,7 -1174,7 +1174,7 @@@ static int mlx5e_alloc_txqsq(struct mlx sq->priv = c->priv; sq->ch_ix = c->ix; sq->txq_ix = txq_ix; - sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); @@@ -1257,7 -1257,7 +1257,7 @@@ static int mlx5e_create_sq(struct mlx5_ MLX5_SET(sqc, sqc, flush_in_error_en, 1);
MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.bfreg.index); + MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); @@@ -2032,7 -2032,7 +2032,7 @@@ static int mlx5e_open_channel(struct ml c->cpu = cpu; c->pdev = mlx5_core_dma_dev(priv->mdev); c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.hw_objs.mkey.key); c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; c->stats = &priv->channel_stats[ix].ch; @@@ -2217,7 -2217,7 +2217,7 @@@ void mlx5e_build_rq_param(struct mlx5e_ MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, mlx5e_get_rqwq_log_stride(params->rq_wq_type, ndsegs)); - MLX5_SET(wq, wq, pd, mdev->mlx5e_res.pdn); + MLX5_SET(wq, wq, pd, mdev->mlx5e_res.hw_objs.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); MLX5_SET(rqc, rqc, vsd, params->vlan_strip_disable); MLX5_SET(rqc, rqc, scatter_fcs, params->scatter_fcs_en); @@@ -2248,7 -2248,7 +2248,7 @@@ void mlx5e_build_sq_param_common(struc void *wq = MLX5_ADDR_OF(sqc, sqc, wq);
MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.hw_objs.pdn);
param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(priv->mdev)); } @@@ -2368,9 -2368,8 +2368,9 @@@ static u8 mlx5e_build_icosq_log_wq_sz(s { switch (params->rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - return order_base_2(MLX5E_UMR_WQEBBS) + - mlx5e_get_rq_log_wq_sz(rqp->rqc); + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, + order_base_2(MLX5E_UMR_WQEBBS) + + mlx5e_get_rq_log_wq_sz(rqp->rqc)); default: /* MLX5_WQ_TYPE_CYCLIC */ return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; } @@@ -2503,10 -2502,8 +2503,10 @@@ void mlx5e_close_channels(struct mlx5e_ { int i;
- if (chs->port_ptp) + if (chs->port_ptp) { mlx5e_port_ptp_close(chs->port_ptp); + chs->port_ptp = NULL; + }
for (i = 0; i < chs->num; i++) mlx5e_close_channel(chs->c[i]); @@@ -3424,10 -3421,10 +3424,10 @@@ int mlx5e_create_tis(struct mlx5_core_d { void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
- MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn);
if (MLX5_GET(tisc, tisc, tls_en)) - MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.pdn); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn);
if (mlx5_lag_is_lacp_owner(mdev)) MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); @@@ -3497,7 -3494,7 +3497,7 @@@ static void mlx5e_cleanup_nic_tx(struc static void mlx5e_build_indir_tir_ctx_common(struct mlx5e_priv *priv, u32 rqtn, u32 *tirc) { - MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.hw_objs.td.tdn); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); MLX5_SET(tirc, tirc, indirect_table, rqtn); MLX5_SET(tirc, tirc, tunneled_offload_en, @@@ -3772,8 -3769,16 +3772,16 @@@ static int mlx5e_setup_tc(struct net_de void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + bool tc_unbind = false; int err;
+ if (type == TC_SETUP_BLOCK && + ((struct flow_block_offload *)type_data)->command == FLOW_BLOCK_UNBIND) + tc_unbind = true; + + if (!netif_device_present(dev) && !tc_unbind) + return -ENODEV; + switch (type) { case TC_SETUP_BLOCK: { struct flow_block_offload *f = type_data; @@@ -3813,15 -3818,6 +3821,15 @@@ void mlx5e_fold_sw_stats64(struct mlx5e for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j];
+ s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + s->tx_dropped += sq_stats->dropped; + } + } + if (priv->port_ptp_opened) { + for (i = 0; i < priv->max_opened_tc; i++) { + struct mlx5e_sq_stats *sq_stats = &priv->port_ptp_stats.sq[i]; + s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; s->tx_dropped += sq_stats->dropped; @@@ -3835,6 -3831,9 +3843,9 @@@ mlx5e_get_stats(struct net_device *dev struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_pport_stats *pstats = &priv->stats.pport;
+ if (!netif_device_present(dev)) + return; + /* In switchdev mode, monitor counters doesn't monitor * rx/tx stats of 802_3. The update stats mechanism * should keep the 802_3 layout counters updated @@@ -3846,17 -3845,10 +3857,17 @@@ }
if (mlx5e_is_uplink_rep(priv)) { + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); } else { mlx5e_fold_sw_stats64(priv, stats); } @@@ -3876,11 -3868,19 +3887,19 @@@ stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; }
+ static void mlx5e_nic_set_rx_mode(struct mlx5e_priv *priv) + { + if (mlx5e_is_uplink_rep(priv)) + return; /* no rx mode for uplink rep */ + + queue_work(priv->wq, &priv->set_rx_mode_work); + } + static void mlx5e_set_rx_mode(struct net_device *dev) { struct mlx5e_priv *priv = netdev_priv(dev);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv); }
static int mlx5e_set_mac(struct net_device *netdev, void *addr) @@@ -3895,7 -3895,7 +3914,7 @@@ ether_addr_copy(netdev->dev_addr, saddr->sa_data); netif_addr_unlock_bh(netdev);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
return 0; } @@@ -4433,6 -4433,9 +4452,9 @@@ static int mlx5e_set_vf_link_state(stru struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev;
+ if (mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + return mlx5_eswitch_set_vport_state(mdev->priv.eswitch, vf + 1, mlx5_ifla_link2vport(link_state)); } @@@ -4444,6 -4447,9 +4466,9 @@@ int mlx5e_get_vf_config(struct net_devi struct mlx5_core_dev *mdev = priv->mdev; int err;
+ if (!netif_device_present(dev)) + return -EOPNOTSUPP; + err = mlx5_eswitch_get_vport_config(mdev->priv.eswitch, vf + 1, ivi); if (err) return err; @@@ -4460,6 -4466,32 +4485,32 @@@ int mlx5e_get_vf_stats(struct net_devic return mlx5_eswitch_get_vport_stats(mdev->priv.eswitch, vf + 1, vf_stats); } + + static bool + mlx5e_has_offload_stats(const struct net_device *dev, int attr_id) + { + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!netif_device_present(dev)) + return false; + + if (!mlx5e_is_uplink_rep(priv)) + return false; + + return mlx5e_rep_has_offload_stats(dev, attr_id); + } + + static int + mlx5e_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) + { + struct mlx5e_priv *priv = netdev_priv(dev); + + if (!mlx5e_is_uplink_rep(priv)) + return -EOPNOTSUPP; + + return mlx5e_rep_get_offload_stats(attr_id, dev, sp); + } #endif
static bool mlx5e_tunnel_proto_supported_tx(struct mlx5_core_dev *mdev, u8 proto_type) @@@ -4702,10 -4734,8 +4753,10 @@@ static int mlx5e_xdp_set(struct net_dev struct mlx5e_channel *c = priv->channels.c[i];
mlx5e_rq_replace_xdp_prog(&c->rq, prog); - if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) { + bpf_prog_inc(prog); mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); + } }
unlock: @@@ -4818,6 -4848,8 +4869,8 @@@ const struct net_device_ops mlx5e_netde .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, + .ndo_has_offload_stats = mlx5e_has_offload_stats, + .ndo_get_offload_stats = mlx5e_get_offload_stats, #endif .ndo_get_devlink_port = mlx5e_get_devlink_port, }; @@@ -4979,11 -5011,6 +5032,11 @@@ void mlx5e_build_nic_params(struct mlx5 priv->max_nch); params->num_tc = 1;
+ /* Set an initial non-zero value, so that mlx5e_select_queue won't + * divide by zero if called before first activating channels. + */ + priv->num_tc_x_num_ch = params->num_channels * params->num_tc; + /* SQ */ params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : @@@ -5279,10 -5306,6 +5332,6 @@@ static int mlx5e_nic_init(struct mlx5_c if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
- err = mlx5e_devlink_port_register(priv); - if (err) - mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); - mlx5e_health_create_reporters(priv);
return 0; @@@ -5291,7 -5314,6 +5340,6 @@@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); - mlx5e_devlink_port_unregister(priv); mlx5e_tls_cleanup(priv); mlx5e_ipsec_cleanup(priv); } @@@ -5431,7 -5453,7 +5479,7 @@@ static void mlx5e_nic_enable(struct mlx return; mlx5e_dcbnl_init_app(priv);
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
rtnl_lock(); if (netif_running(netdev)) @@@ -5454,7 -5476,7 +5502,7 @@@ static void mlx5e_nic_disable(struct ml netif_device_detach(priv->netdev); rtnl_unlock();
- queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_nic_set_rx_mode(priv);
mlx5e_hv_vhca_stats_destroy(priv); if (mlx5e_monitor_counter_supported(priv)) @@@ -5500,6 -5522,8 +5548,6 @@@ int mlx5e_priv_init(struct mlx5e_priv * struct net_device *netdev, struct mlx5_core_dev *mdev) { - memset(priv, 0, sizeof(*priv)); - /* priv init */ priv->mdev = mdev; priv->netdev = netdev; @@@ -5532,18 -5556,12 +5580,18 @@@ void mlx5e_priv_cleanup(struct mlx5e_pr { int i;
+ /* bail if change profile failed and also rollback failed */ + if (!priv->mdev) + return; + destroy_workqueue(priv->wq); free_cpumask_var(priv->scratchpad.cpumask);
for (i = 0; i < priv->htb.max_qos_sqs; i++) kfree(priv->htb.qos_sq_stats[i]); kvfree(priv->htb.qos_sq_stats); + + memset(priv, 0, sizeof(*priv)); }
struct net_device * @@@ -5660,10 -5678,11 +5708,10 @@@ void mlx5e_detach_netdev(struct mlx5e_p }
static int -mlx5e_netdev_attach_profile(struct mlx5e_priv *priv, +mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev, const struct mlx5e_profile *new_profile, void *new_ppriv) { - struct net_device *netdev = priv->netdev; - struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_priv *priv = netdev_priv(netdev); int err;
err = mlx5e_priv_init(priv, netdev, mdev); @@@ -5676,16 -5695,10 +5724,16 @@@ priv->ppriv = new_ppriv; err = new_profile->init(priv->mdev, priv->netdev); if (err) - return err; + goto priv_cleanup; err = mlx5e_attach_netdev(priv); if (err) - new_profile->cleanup(priv); + goto profile_cleanup; + return err; + +profile_cleanup: + new_profile->cleanup(priv); +priv_cleanup: + mlx5e_priv_cleanup(priv); return err; }
@@@ -5694,14 -5707,13 +5742,14 @@@ int mlx5e_netdev_change_profile(struct { unsigned int new_max_nch = mlx5e_calc_max_nch(priv, new_profile); const struct mlx5e_profile *orig_profile = priv->profile; + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; void *orig_ppriv = priv->ppriv; int err, rollback_err;
/* sanity */ if (new_max_nch != priv->max_nch) { - netdev_warn(priv->netdev, - "%s: Replacing profile with different max channels\n", + netdev_warn(netdev, "%s: Replacing profile with different max channels\n", __func__); return -EINVAL; } @@@ -5711,22 -5723,30 +5759,27 @@@ priv->profile->cleanup(priv); mlx5e_priv_cleanup(priv);
- err = mlx5e_netdev_attach_profile(priv, new_profile, new_ppriv); + err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv); if (err) { /* roll back to original profile */ - netdev_warn(priv->netdev, "%s: new profile init failed, %d\n", - __func__, err); + netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err); goto rollback; }
return 0;
rollback: - rollback_err = mlx5e_netdev_attach_profile(priv, orig_profile, orig_ppriv); - if (rollback_err) { - netdev_err(priv->netdev, - "%s: failed to rollback to orig profile, %d\n", + rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv); + if (rollback_err) + netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n", __func__, rollback_err); - } return err; }
+ void mlx5e_netdev_attach_nic_profile(struct mlx5e_priv *priv) + { + mlx5e_netdev_change_profile(priv, &mlx5e_nic_profile, NULL); + } + void mlx5e_destroy_netdev(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; @@@ -5809,10 -5829,17 +5862,17 @@@ static int mlx5e_probe(struct auxiliary
priv->profile = profile; priv->ppriv = NULL; + + err = mlx5e_devlink_port_register(priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_devlink_port_register failed, %d\n", err); + goto err_destroy_netdev; + } + err = profile->init(mdev, netdev); if (err) { mlx5_core_err(mdev, "mlx5e_nic_profile init failed, %d\n", err); - goto err_destroy_netdev; + goto err_devlink_cleanup; }
err = mlx5e_resume(adev); @@@ -5830,12 -5857,15 +5890,15 @@@ mlx5e_devlink_port_type_eth_set(priv);
mlx5e_dcbnl_init_app(priv); + mlx5_uplink_netdev_set(mdev, netdev); return 0;
err_resume: mlx5e_suspend(adev, state); err_profile_cleanup: profile->cleanup(priv); + err_devlink_cleanup: + mlx5e_devlink_port_unregister(priv); err_destroy_netdev: mlx5e_destroy_netdev(priv); return err; @@@ -5850,6 -5880,7 +5913,7 @@@ static void mlx5e_remove(struct auxilia unregister_netdev(priv->netdev); mlx5e_suspend(adev, state); priv->profile->cleanup(priv); + mlx5e_devlink_port_unregister(priv); mlx5e_destroy_netdev(priv); }
@@@ -5875,18 -5906,18 +5939,18 @@@ int mlx5e_init(void
mlx5e_ipsec_build_inverse_table(); mlx5e_build_ptys2ethtool_map(); - ret = mlx5e_rep_init(); + ret = auxiliary_driver_register(&mlx5e_driver); if (ret) return ret;
- ret = auxiliary_driver_register(&mlx5e_driver); + ret = mlx5e_rep_init(); if (ret) - mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); return ret; }
void mlx5e_cleanup(void) { - auxiliary_driver_unregister(&mlx5e_driver); mlx5e_rep_cleanup(); + auxiliary_driver_unregister(&mlx5e_driver); } diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 249d8905e644,b0604b113530..f90894eea9e0 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@@ -52,6 -52,7 +52,7 @@@ #include "en/health.h" #include "en/params.h" #include "devlink.h" + #include "en/devlink.h"
static struct sk_buff * mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, @@@ -500,6 -501,7 +501,6 @@@ static int mlx5e_alloc_rx_mpwqe(struct struct mlx5e_icosq *sq = rq->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; - u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); u16 pi; int err; int i; @@@ -530,8 -532,7 +531,8 @@@ umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); + umr_wqe->uctrl.xlt_offset = + cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix)));
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, @@@ -669,6 -670,7 +670,7 @@@ int mlx5e_poll_ico_cq(struct mlx5e_cq * get_cqe_opcode(cqe)); mlx5e_dump_error_cqe(&sq->cq, sq->sqn, (struct mlx5_err_cqe *)cqe); + mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs); if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) queue_work(cq->priv->wq, &sq->recover_work); break; @@@ -1822,6 -1824,7 +1824,7 @@@ static void mlx5e_trap_handle_rx_cqe(st struct mlx5e_priv *priv = netdev_priv(rq->netdev); struct mlx5_wq_cyc *wq = &rq->wqe.wq; struct mlx5e_wqe_frag_info *wi; + struct devlink_port *dl_port; struct sk_buff *skb; u32 cqe_bcnt; u16 trap_id; @@@ -1844,7 -1847,8 +1847,8 @@@ mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); skb_push(skb, ETH_HLEN);
- mlx5_devlink_trap_report(rq->mdev, trap_id, skb, &priv->dl_port); + dl_port = mlx5e_devlink_get_dl_port(priv); + mlx5_devlink_trap_report(rq->mdev, trap_id, skb, dl_port); dev_kfree_skb_any(skb);
free_wqe: diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index df2a0af854bb,730f33ada90a..3a82e2c64a3e --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@@ -445,12 -445,16 +445,16 @@@ static void mlx5e_hairpin_destroy_trans mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn); }
- static void mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) + static int mlx5e_hairpin_fill_rqt_rqns(struct mlx5e_hairpin *hp, void *rqtc) { - u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE], rqn; + u32 *indirection_rqt, rqn; struct mlx5e_priv *priv = hp->func_priv; int i, ix, sz = MLX5E_INDIR_RQT_SIZE;
+ indirection_rqt = kzalloc(sz, GFP_KERNEL); + if (!indirection_rqt) + return -ENOMEM; + mlx5e_build_default_indir_rqt(indirection_rqt, sz, hp->num_channels);
@@@ -462,6 -466,9 +466,9 @@@ rqn = hp->pair->rqn[ix]; MLX5_SET(rqtc, rqtc, rq_num[i], rqn); } + + kfree(indirection_rqt); + return 0; }
static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) @@@ -482,12 -489,15 +489,15 @@@ MLX5_SET(rqtc, rqtc, rqt_actual_size, sz); MLX5_SET(rqtc, rqtc, rqt_max_size, sz);
- mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); + err = mlx5e_hairpin_fill_rqt_rqns(hp, rqtc); + if (err) + goto out;
err = mlx5_core_create_rqt(mdev, in, inlen, &hp->indir_rqt.rqtn); if (!err) hp->indir_rqt.enabled = true;
+ out: kvfree(in); return err; } @@@ -1077,19 -1087,23 +1087,23 @@@ mlx5e_tc_offload_fdb_rules(struct mlx5_ if (flow_flag_test(flow, CT)) { mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
- return mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), + rule = mlx5_tc_ct_flow_offload(get_ct_priv(flow->priv), flow, spec, attr, mod_hdr_acts); + } else { + rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); }
- rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); if (IS_ERR(rule)) return rule;
if (attr->esw_attr->split_count) { flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr); if (IS_ERR(flow->rule[1])) { - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); + if (flow_flag_test(flow, CT)) + mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), flow, attr); + else + mlx5_eswitch_del_offloaded_rule(esw, rule, attr); return flow->rule[1]; } } @@@ -1947,6 -1961,10 +1961,10 @@@ static int __parse_cls_flower(struct ml misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, + misc_parameters_3); + void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value, + misc_parameters_3); struct flow_rule *rule = flow_cls_offload_flow_rule(f); struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; @@@ -1976,6 -1994,7 +1994,7 @@@ BIT(FLOW_DISSECTOR_KEY_CT) | BIT(FLOW_DISSECTOR_KEY_ENC_IP) | BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) | + BIT(FLOW_DISSECTOR_KEY_ICMP) | BIT(FLOW_DISSECTOR_KEY_MPLS))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n", @@@ -2295,17 -2314,49 +2314,60 @@@ if (match.mask->flags) *match_level = MLX5_MATCH_L4; } + + /* Currenlty supported only for MPLS over UDP */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && + !netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on MPLS is supported only for MPLS over UDP"); + netdev_err(priv->netdev, + "Matching on MPLS is supported only for MPLS over UDP\n"); + return -EOPNOTSUPP; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) { + struct flow_match_icmp match; + + flow_rule_match_icmp(rule, &match); + switch (ip_proto) { + case IPPROTO_ICMP: + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_ICMP)) + return -EOPNOTSUPP; + MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type, + match.mask->type); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type, + match.key->type); + MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code, + match.mask->code); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code, + match.key->code); + break; + case IPPROTO_ICMPV6: + if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) & + MLX5_FLEX_PROTO_ICMPV6)) + return -EOPNOTSUPP; + MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type, + match.mask->type); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type, + match.key->type); + MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code, + match.mask->code); + MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code, + match.key->code); + break; + default: + NL_SET_ERR_MSG_MOD(extack, + "Code and type matching only with ICMP and ICMPv6"); + netdev_err(priv->netdev, + "Code and type matching only with ICMP and ICMPv6\n"); + return -EINVAL; + } + if (match.mask->code || match.mask->type) { + *match_level = MLX5_MATCH_L4; + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3; + } + } return 0; }
@@@ -2909,37 -2960,6 +2971,37 @@@ static int is_action_keys_supported(con return 0; }
+static bool modify_tuple_supported(bool modify_tuple, bool ct_clear, + bool ct_flow, struct netlink_ext_ack *extack, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec) +{ + if (!modify_tuple || ct_clear) + return true; + + if (ct_flow) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload tuple modification with non-clear ct()"); + netdev_info(priv->netdev, + "can't offload tuple modification with non-clear ct()"); + return false; + } + + /* Add ct_state=-trk match so it will be offloaded for non ct flows + * (or after clear action), as otherwise, since the tuple is changed, + * we can't restore ct state + */ + if (mlx5_tc_ct_add_no_trk_match(spec)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload tuple modification with ct matches and no ct(clear) action"); + netdev_info(priv->netdev, + "can't offload tuple modification with ct matches and no ct(clear) action"); + return false; + } + + return true; +} + static bool modify_header_match_supported(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_action *flow_action, @@@ -2978,9 -2998,18 +3040,9 @@@ return err; }
- /* Add ct_state=-trk match so it will be offloaded for non ct flows - * (or after clear action), as otherwise, since the tuple is changed, - * we can't restore ct state - */ - if (!ct_clear && modify_tuple && - mlx5_tc_ct_add_no_trk_match(spec)) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload tuple modify header with ct matches"); - netdev_info(priv->netdev, - "can't offload tuple modify header with ct matches"); + if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack, + priv, spec)) return false; - }
ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); if (modify_ip_header && ip_proto != IPPROTO_TCP && @@@ -3011,7 -3040,8 +3073,8 @@@ static bool actions_match_supported(str actions = flow->attr->action;
if (mlx5e_is_eswitch_flow(flow)) { - if (flow->attr->esw_attr->split_count && ct_flow) { + if (flow->attr->esw_attr->split_count && ct_flow && + !MLX5_CAP_GEN(flow->attr->esw_attr->in_mdev, reg_c_preserve)) { /* All registers used by ct are cleared when using * split rules. */ @@@ -3811,6 -3841,7 +3874,7 @@@ static int parse_tc_fdb_actions(struct return err;
flow_flag_set(flow, CT); + esw_attr->split_count = esw_attr->out_count; break; default: NL_SET_ERR_MSG_MOD(extack, "The offload action is not supported"); @@@ -3873,11 -3904,6 +3937,6 @@@ return -EOPNOTSUPP; }
- if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { - NL_SET_ERR_MSG_MOD(extack, - "Mirroring goto chain rules isn't supported"); - return -EOPNOTSUPP; - } attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; }
@@@ -4297,6 -4323,11 +4356,11 @@@ int mlx5e_configure_flower(struct net_d struct mlx5e_tc_flow *flow; int err = 0;
+ if (!mlx5_esw_hold(priv->mdev)) + return -EAGAIN; + + mlx5_esw_get(priv->mdev); + rcu_read_lock(); flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); if (flow) { @@@ -4334,11 -4365,14 +4398,14 @@@ rcu_unlock if (err) goto err_free;
+ mlx5_esw_release(priv->mdev); return 0;
err_free: mlx5e_flow_put(priv, flow); out: + mlx5_esw_put(priv->mdev); + mlx5_esw_release(priv->mdev); return err; }
@@@ -4378,6 -4412,7 +4445,7 @@@ int mlx5e_delete_flower(struct net_devi trace_mlx5e_delete_flower(f); mlx5e_flow_put(priv, flow);
+ mlx5_esw_put(priv->mdev); return 0;
errout: @@@ -4477,8 -4512,7 +4545,8 @@@ static int apply_police_params(struct m */ if (rate) { rate = (rate * BITS_PER_BYTE) + 500000; - rate_mbps = max_t(u64, do_div(rate, 1000000), 1); + do_div(rate, 1000000); + rate_mbps = max_t(u32, rate, 1); }
err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); @@@ -4513,6 -4547,10 +4581,10 @@@ static int scan_tc_matchall_fdb_actions flow_action_for_each(i, act, flow_action) { switch (act->id) { case FLOW_ACTION_POLICE: + if (act->police.rate_pkt_ps) { + NL_SET_ERR_MSG_MOD(extack, "QoS offload not support packets per second"); + return -EOPNOTSUPP; + } err = apply_police_params(priv, act->police.rate_bytes_ps, extack); if (err) return err; @@@ -4679,10 -4717,6 +4751,6 @@@ int mlx5e_tc_nic_init(struct mlx5e_pri
tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr, MLX5_FLOW_NAMESPACE_KERNEL); - if (IS_ERR(tc->ct)) { - err = PTR_ERR(tc->ct); - goto err_ct; - }
tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event; err = register_netdevice_notifier_dev_net(priv->netdev, @@@ -4698,7 -4732,6 +4766,6 @@@
err_reg: mlx5_tc_ct_clean(tc->ct); - err_ct: mlx5_chains_destroy(tc->chains); err_chains: rhashtable_destroy(&tc->ht); @@@ -4757,8 -4790,6 +4824,6 @@@ int mlx5e_tc_esw_init(struct rhashtabl esw_chains(esw), &esw->offloads.mod_hdr, MLX5_FLOW_NAMESPACE_FDB); - if (IS_ERR(uplink_priv->ct_priv)) - goto err_ct;
mapping = mapping_create(sizeof(struct tunnel_match_key), TUNNEL_INFO_BITS_MASK, true); @@@ -4798,7 -4829,6 +4863,6 @@@ err_enc_opts_mapping mapping_destroy(uplink_priv->tunnel_mapping); err_tun_mapping: mlx5_tc_ct_clean(uplink_priv->ct_priv); - err_ct: netdev_warn(priv->netdev, "Failed to initialize tc (eswitch), err: %d", err); return err; @@@ -4871,9 -4901,17 +4935,17 @@@ static int mlx5e_setup_tc_cls_flower(st int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { - unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); + unsigned long flags = MLX5_TC_FLAG(INGRESS); struct mlx5e_priv *priv = cb_priv;
+ if (!priv->netdev || !netif_device_present(priv->netdev)) + return -EOPNOTSUPP; + + if (mlx5e_is_uplink_rep(priv)) + flags |= MLX5_TC_FLAG(ESW_OFFLOAD); + else + flags |= MLX5_TC_FLAG(NIC_OFFLOAD); + switch (type) { case TC_SETUP_CLSFLOWER: return mlx5e_setup_tc_cls_flower(priv, type_data, flags); diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8694b83968b4,ab2694835246..d5de6bf622ce --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@@ -40,7 -40,6 +40,6 @@@ #include "eswitch.h" #include "esw/indir_table.h" #include "esw/acl/ofld.h" - #include "esw/indir_table.h" #include "rdma.h" #include "en.h" #include "fs_core.h" @@@ -551,8 -550,7 +550,8 @@@ esw_setup_dests(struct mlx5_flow_destin
if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) && MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) && - mlx5_eswitch_vport_match_metadata_enabled(esw)) + mlx5_eswitch_vport_match_metadata_enabled(esw) && + MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
if (attr->dest_ft) { @@@ -1447,7 -1445,7 +1446,7 @@@ esw_add_restore_rule(struct mlx5_eswitc if (!mlx5_eswitch_reg_c1_loopback_supported(esw)) return ERR_PTR(-EOPNOTSUPP);
- spec = kzalloc(sizeof(*spec), GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) return ERR_PTR(-ENOMEM);
@@@ -1470,7 -1468,7 +1469,7 @@@ dest.ft = esw->offloads.ft_offloads;
flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1); - kfree(spec); + kvfree(spec);
if (IS_ERR(flow_rule)) esw_warn(esw->dev, @@@ -1855,6 -1853,7 +1854,7 @@@ static void esw_destroy_offloads_fdb_ta /* Holds true only as long as DMFS is the default */ mlx5_flow_namespace_set_mode(esw->fdb_table.offloads.ns, MLX5_FLOW_STEERING_MODE_DMFS); + atomic64_set(&esw->user_count, 0); }
static int esw_create_offloads_table(struct mlx5_eswitch *esw) @@@ -2260,9 -2259,11 +2260,11 @@@ int esw_offloads_load_rep(struct mlx5_e if (esw->mode != MLX5_ESWITCH_OFFLOADS) return 0;
- err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); - if (err) - return err; + if (vport_num != MLX5_VPORT_UPLINK) { + err = mlx5_esw_offloads_devlink_port_register(esw, vport_num); + if (err) + return err; + }
err = mlx5_esw_offloads_rep_load(esw, vport_num); if (err) @@@ -2270,7 -2271,8 +2272,8 @@@ return err;
load_err: - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); return err; }
@@@ -2280,7 -2282,9 +2283,9 @@@ void esw_offloads_unload_rep(struct mlx return;
mlx5_esw_offloads_rep_unload(esw, vport_num); - mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); + + if (vport_num != MLX5_VPORT_UPLINK) + mlx5_esw_offloads_devlink_port_unregister(esw, vport_num); }
#define ESW_OFFLOADS_DEVCOM_PAIR (0) @@@ -2555,6 -2559,9 +2560,9 @@@ static int esw_create_uplink_offloads_a struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return PTR_ERR(vport); + return esw_vport_create_offloads_acl_tables(esw, vport); }
@@@ -2563,6 -2570,9 +2571,9 @@@ static void esw_destroy_uplink_offloads struct mlx5_vport *vport;
vport = mlx5_eswitch_get_vport(esw, MLX5_VPORT_UPLINK); + if (IS_ERR(vport)) + return; + esw_vport_destroy_offloads_acl_tables(esw, vport); }
@@@ -2574,6 -2584,7 +2585,7 @@@ static int esw_offloads_steering_init(s memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); mutex_init(&esw->fdb_table.offloads.vports.lock); hash_init(esw->fdb_table.offloads.vports.table); + atomic64_set(&esw->user_count, 0);
indir = mlx5_esw_indir_table_init(); if (IS_ERR(indir)) { @@@ -2915,8 -2926,14 +2927,14 @@@ int mlx5_devlink_eswitch_mode_set(struc if (esw_mode_from_devlink(mode, &mlx5_mode)) return -EINVAL;
- mutex_lock(&esw->mode_lock); - cur_mlx5_mode = esw->mode; + err = mlx5_esw_try_lock(esw); + if (err < 0) { + NL_SET_ERR_MSG_MOD(extack, "Can't change mode, E-Switch is busy"); + return err; + } + cur_mlx5_mode = err; + err = 0; + if (cur_mlx5_mode == mlx5_mode) goto unlock;
@@@ -2928,7 -2945,7 +2946,7 @@@ err = -EINVAL;
unlock: - mutex_unlock(&esw->mode_lock); + mlx5_esw_unlock(esw); return err; }
@@@ -2941,14 -2958,14 +2959,14 @@@ int mlx5_devlink_eswitch_mode_get(struc if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
err = esw_mode_to_devlink(esw->mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -2964,7 -2981,7 +2982,7 @@@ int mlx5_devlink_eswitch_inline_mode_se if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto out; @@@ -3003,7 -3020,7 +3021,7 @@@ }
esw->offloads.inline_mode = mlx5_mode; - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0;
revert_inline_mode: @@@ -3013,7 -3030,7 +3031,7 @@@ vport, esw->offloads.inline_mode); out: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -3026,14 -3043,14 +3044,14 @@@ int mlx5_devlink_eswitch_inline_mode_ge if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
err = esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode); unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -3049,7 -3066,7 +3067,7 @@@ int mlx5_devlink_eswitch_encap_mode_set if (IS_ERR(esw)) return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock; @@@ -3095,7 -3112,7 +3113,7 @@@ }
unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return err; }
@@@ -3110,14 -3127,14 +3128,14 @@@ int mlx5_devlink_eswitch_encap_mode_get return PTR_ERR(esw);
- mutex_lock(&esw->mode_lock); + down_write(&esw->mode_lock); err = eswitch_devlink_esw_mode_check(esw); if (err) goto unlock;
*encap = esw->offloads.encap; unlock: - mutex_unlock(&esw->mode_lock); + up_write(&esw->mode_lock); return 0; }
diff --combined drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index 6f7cef47e04c,0fc055cdf221..48303286c133 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@@ -233,7 -233,6 +233,7 @@@ int mlx5i_create_underlay_qp(struct mlx }
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(priv->mdev)); MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, @@@ -695,7 -694,6 +695,7 @@@ static int mlx5i_check_required_hca_cap static void mlx5_rdma_netdev_free(struct net_device *netdev) { struct mlx5e_priv *priv = mlx5i_epriv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; struct mlx5i_priv *ipriv = priv->ppriv; const struct mlx5e_profile *profile = priv->profile;
@@@ -704,13 -702,13 +704,13 @@@
if (!ipriv->sub_interface) { mlx5i_pkey_qpn_ht_cleanup(netdev); - mlx5e_destroy_mdev_resources(priv->mdev); + mlx5e_destroy_mdev_resources(mdev); } }
static bool mlx5_is_sub_interface(struct mlx5_core_dev *mdev) { - return mdev->mlx5e_res.pdn != 0; + return mdev->mlx5e_res.hw_objs.pdn != 0; }
static const struct mlx5e_profile *mlx5_get_profile(struct mlx5_core_dev *mdev) diff --combined drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c index a5a0f60bef66,3c8a00dd573a..699d615e4e2a --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c @@@ -5,8 -5,7 +5,7 @@@ #include "priv.h" #include "sf.h" #include "mlx5_ifc_vhca_event.h" - #include "vhca_event.h" -#include "ecpf.h" +#include "mlx5_core.h"
struct mlx5_sf_hw { u32 usr_sfnum; @@@ -18,6 -17,7 +17,6 @@@ struct mlx5_sf_hw_table struct mlx5_core_dev *dev; struct mlx5_sf_hw *sfs; int max_local_functions; - u8 ecpu: 1; struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */ struct notifier_block vhca_nb; }; @@@ -63,7 -63,7 +62,7 @@@ int mlx5_sf_hw_table_sf_alloc(struct ml } if (sw_id == -ENOSPC) { err = -ENOSPC; - goto err; + goto exist_err; }
hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sw_id); @@@ -71,7 -71,7 +70,7 @@@ if (err) goto err;
- err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, table->ecpu, usr_sfnum); + err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, usr_sfnum); if (err) goto vhca_err;
@@@ -117,7 -117,7 +116,7 @@@ void mlx5_sf_hw_table_sf_deferred_free(
hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id); mutex_lock(&table->table_lock); - err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, table->ecpu, out, sizeof(out)); + err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out)); if (err) goto err; state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state); @@@ -163,6 -163,7 +162,6 @@@ int mlx5_sf_hw_table_init(struct mlx5_c table->dev = dev; table->sfs = sfs; table->max_local_functions = max_functions; - table->ecpu = mlx5_read_embedded_cpu(dev); dev->priv.sf_hw_table = table; mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions); return 0; diff --combined drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c index 9143ec326ebf,815951617e7c..616ebc38381a --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c @@@ -264,8 -264,8 +264,8 @@@ static void dr_ste_v1_set_miss_addr(u8 static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p) { u64 index = - (MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | - MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32) << 26); + ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) | + ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32)) << 26);
return index << 6; } @@@ -437,21 -437,6 +437,6 @@@ static void dr_ste_v1_set_rx_decap(u8 * dr_ste_v1_set_reparse(hw_ste_p); }
- static void dr_ste_v1_set_rx_decap_l3(u8 *hw_ste_p, - u8 *s_action, - u16 decap_actions, - u32 decap_index) - { - MLX5_SET(ste_single_action_modify_list_v1, s_action, action_id, - DR_STE_V1_ACTION_ID_MODIFY_LIST); - MLX5_SET(ste_single_action_modify_list_v1, s_action, num_of_modify_actions, - decap_actions); - MLX5_SET(ste_single_action_modify_list_v1, s_action, modify_actions_ptr, - decap_index); - - dr_ste_v1_set_reparse(hw_ste_p); - } - static void dr_ste_v1_set_rewrite_actions(u8 *hw_ste_p, u8 *s_action, u16 num_of_actions, @@@ -571,9 -556,6 +556,6 @@@ static void dr_ste_v1_set_actions_rx(st bool allow_ctr = true;
if (action_type_set[DR_ACTION_TYP_TNL_L3_TO_L2]) { - dr_ste_v1_set_rx_decap_l3(last_ste, action, - attr->decap_actions, - attr->decap_index); dr_ste_v1_set_rewrite_actions(last_ste, action, attr->decap_actions, attr->decap_index); @@@ -1532,6 -1514,7 +1514,7 @@@ static void dr_ste_v1_build_src_gvmi_qp
DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_gvmi, misc_mask, source_port); DR_STE_SET_ONES(src_gvmi_qp_v1, bit_mask, source_qp, misc_mask, source_sqn); + misc_mask->source_eswitch_owner_vhca_id = 0; }
static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value, diff --combined drivers/net/ethernet/pensando/ionic/ionic_txrx.c index 4087311f7082,03e00a6c413a..4fa1ae7db13e --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@@ -10,12 -10,6 +10,6 @@@ #include "ionic_lif.h" #include "ionic_txrx.h"
- static void ionic_rx_clean(struct ionic_queue *q, - struct ionic_desc_info *desc_info, - struct ionic_cq_info *cq_info, - void *cb_arg); - - static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
@@@ -40,72 -34,149 +34,149 @@@ static inline struct netdev_queue *q_to return netdev_get_tx_queue(q->lif->netdev, q->index); }
- static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q, - unsigned int len, bool frags) + static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info) + { + buf_info->page = NULL; + buf_info->page_offset = 0; + buf_info->dma_addr = 0; + } + + static int ionic_rx_page_alloc(struct ionic_queue *q, + struct ionic_buf_info *buf_info) { - struct ionic_lif *lif = q->lif; + struct net_device *netdev = q->lif->netdev; struct ionic_rx_stats *stats; - struct net_device *netdev; - struct sk_buff *skb; + struct device *dev;
- netdev = lif->netdev; - stats = &q->lif->rxqstats[q->index]; + dev = q->dev; + stats = q_to_rx_stats(q);
- if (frags) - skb = napi_get_frags(&q_to_qcq(q)->napi); - else - skb = netdev_alloc_skb_ip_align(netdev, len); + if (unlikely(!buf_info)) { + net_err_ratelimited("%s: %s invalid buf_info in alloc\n", + netdev->name, q->name); + return -EINVAL; + }
- if (unlikely(!skb)) { - net_warn_ratelimited("%s: SKB alloc failed on %s!\n", - netdev->name, q->name); + buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0); + if (unlikely(!buf_info->page)) { + net_err_ratelimited("%s: %s page alloc failed\n", + netdev->name, q->name); stats->alloc_err++; - return NULL; + return -ENOMEM; } + buf_info->page_offset = 0;
- return skb; + buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset, + IONIC_PAGE_SIZE, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) { + __free_pages(buf_info->page, 0); + ionic_rx_buf_reset(buf_info); + net_err_ratelimited("%s: %s dma map failed\n", + netdev->name, q->name); + stats->dma_map_err++; + return -EIO; + } + + return 0; + } + + static void ionic_rx_page_free(struct ionic_queue *q, + struct ionic_buf_info *buf_info) + { + struct net_device *netdev = q->lif->netdev; + struct device *dev = q->dev; + + if (unlikely(!buf_info)) { + net_err_ratelimited("%s: %s invalid buf_info in free\n", + netdev->name, q->name); + return; + } + + if (!buf_info->page) + return; + + dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE); + __free_pages(buf_info->page, 0); + ionic_rx_buf_reset(buf_info); + } + + static bool ionic_rx_buf_recycle(struct ionic_queue *q, + struct ionic_buf_info *buf_info, u32 used) + { + u32 size; + + /* don't re-use pages allocated in low-mem condition */ + if (page_is_pfmemalloc(buf_info->page)) + return false; + + /* don't re-use buffers from non-local numa nodes */ + if (page_to_nid(buf_info->page) != numa_mem_id()) + return false; + + size = ALIGN(used, IONIC_PAGE_SPLIT_SZ); + buf_info->page_offset += size; + if (buf_info->page_offset >= IONIC_PAGE_SIZE) + return false; + + get_page(buf_info->page); + + return true; }
static struct sk_buff *ionic_rx_frags(struct ionic_queue *q, struct ionic_desc_info *desc_info, - struct ionic_cq_info *cq_info) + struct ionic_rxq_comp *comp) { - struct ionic_rxq_comp *comp = cq_info->cq_desc; - struct device *dev = q->lif->ionic->dev; - struct ionic_page_info *page_info; + struct net_device *netdev = q->lif->netdev; + struct ionic_buf_info *buf_info; + struct ionic_rx_stats *stats; + struct device *dev = q->dev; struct sk_buff *skb; unsigned int i; u16 frag_len; u16 len;
- page_info = &desc_info->pages[0]; + stats = q_to_rx_stats(q); + + buf_info = &desc_info->bufs[0]; len = le16_to_cpu(comp->len);
- prefetch(page_address(page_info->page) + NET_IP_ALIGN); + prefetch(buf_info->page);
- skb = ionic_rx_skb_alloc(q, len, true); - if (unlikely(!skb)) + skb = napi_get_frags(&q_to_qcq(q)->napi); + if (unlikely(!skb)) { + net_warn_ratelimited("%s: SKB alloc failed on %s!\n", + netdev->name, q->name); + stats->alloc_err++; return NULL; + }
i = comp->num_sg_elems + 1; do { - if (unlikely(!page_info->page)) { - struct napi_struct *napi = &q_to_qcq(q)->napi; - - napi->skb = NULL; + if (unlikely(!buf_info->page)) { dev_kfree_skb(skb); return NULL; }
- frag_len = min(len, (u16)PAGE_SIZE); + frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset); len -= frag_len;
- dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr), - PAGE_SIZE, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(dev, + buf_info->dma_addr + buf_info->page_offset, + frag_len, DMA_FROM_DEVICE); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - page_info->page, 0, frag_len, PAGE_SIZE); - page_info->page = NULL; - page_info++; + buf_info->page, buf_info->page_offset, frag_len, + IONIC_PAGE_SIZE); + + if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) { + dma_unmap_page(dev, buf_info->dma_addr, + IONIC_PAGE_SIZE, DMA_FROM_DEVICE); + ionic_rx_buf_reset(buf_info); + } + + buf_info++; + i--; } while (i > 0);
@@@ -114,30 -185,37 +185,37 @@@
static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q, struct ionic_desc_info *desc_info, - struct ionic_cq_info *cq_info) + struct ionic_rxq_comp *comp) { - struct ionic_rxq_comp *comp = cq_info->cq_desc; - struct device *dev = q->lif->ionic->dev; - struct ionic_page_info *page_info; + struct net_device *netdev = q->lif->netdev; + struct ionic_buf_info *buf_info; + struct ionic_rx_stats *stats; + struct device *dev = q->dev; struct sk_buff *skb; u16 len;
- page_info = &desc_info->pages[0]; + stats = q_to_rx_stats(q); + + buf_info = &desc_info->bufs[0]; len = le16_to_cpu(comp->len);
- skb = ionic_rx_skb_alloc(q, len, false); - if (unlikely(!skb)) + skb = napi_alloc_skb(&q_to_qcq(q)->napi, len); + if (unlikely(!skb)) { + net_warn_ratelimited("%s: SKB alloc failed on %s!\n", + netdev->name, q->name); + stats->alloc_err++; return NULL; + }
- if (unlikely(!page_info->page)) { + if (unlikely(!buf_info->page)) { dev_kfree_skb(skb); return NULL; }
- dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr), + dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset, len, DMA_FROM_DEVICE); - skb_copy_to_linear_data(skb, page_address(page_info->page), len); - dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr), + skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len); + dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset, len, DMA_FROM_DEVICE);
skb_put(skb, len); @@@ -151,14 -229,13 +229,13 @@@ static void ionic_rx_clean(struct ionic struct ionic_cq_info *cq_info, void *cb_arg) { - struct ionic_rxq_comp *comp = cq_info->cq_desc; + struct ionic_rxq_comp *comp = cq_info->rxcq; + struct net_device *netdev = q->lif->netdev; struct ionic_qcq *qcq = q_to_qcq(q); struct ionic_rx_stats *stats; - struct net_device *netdev; struct sk_buff *skb;
stats = q_to_rx_stats(q); - netdev = q->lif->netdev;
if (comp->status) { stats->dropped++; @@@ -169,9 -246,9 +246,9 @@@ stats->bytes += le16_to_cpu(comp->len);
if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak) - skb = ionic_rx_copybreak(q, desc_info, cq_info); + skb = ionic_rx_copybreak(q, desc_info, comp); else - skb = ionic_rx_frags(q, desc_info, cq_info); + skb = ionic_rx_frags(q, desc_info, comp);
if (unlikely(!skb)) { stats->dropped++; @@@ -227,7 -304,7 +304,7 @@@
static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) { - struct ionic_rxq_comp *comp = cq_info->cq_desc; + struct ionic_rxq_comp *comp = cq_info->rxcq; struct ionic_queue *q = cq->bound_q; struct ionic_desc_info *desc_info;
@@@ -253,138 -330,75 +330,75 @@@ return true; }
- static int ionic_rx_page_alloc(struct ionic_queue *q, - struct ionic_page_info *page_info) - { - struct ionic_lif *lif = q->lif; - struct ionic_rx_stats *stats; - struct net_device *netdev; - struct device *dev; - - netdev = lif->netdev; - dev = lif->ionic->dev; - stats = q_to_rx_stats(q); - - if (unlikely(!page_info)) { - net_err_ratelimited("%s: %s invalid page_info in alloc\n", - netdev->name, q->name); - return -EINVAL; - } - - page_info->page = dev_alloc_page(); - if (unlikely(!page_info->page)) { - net_err_ratelimited("%s: %s page alloc failed\n", - netdev->name, q->name); - stats->alloc_err++; - return -ENOMEM; - } - - page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) { - put_page(page_info->page); - page_info->dma_addr = 0; - page_info->page = NULL; - net_err_ratelimited("%s: %s dma map failed\n", - netdev->name, q->name); - stats->dma_map_err++; - return -EIO; - } - - return 0; - } - - static void ionic_rx_page_free(struct ionic_queue *q, - struct ionic_page_info *page_info) - { - struct ionic_lif *lif = q->lif; - struct net_device *netdev; - struct device *dev; - - netdev = lif->netdev; - dev = lif->ionic->dev; - - if (unlikely(!page_info)) { - net_err_ratelimited("%s: %s invalid page_info in free\n", - netdev->name, q->name); - return; - } - - if (unlikely(!page_info->page)) { - net_err_ratelimited("%s: %s invalid page in free\n", - netdev->name, q->name); - return; - } - - dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE); - - put_page(page_info->page); - page_info->dma_addr = 0; - page_info->page = NULL; - } - void ionic_rx_fill(struct ionic_queue *q) { struct net_device *netdev = q->lif->netdev; struct ionic_desc_info *desc_info; - struct ionic_page_info *page_info; struct ionic_rxq_sg_desc *sg_desc; struct ionic_rxq_sg_elem *sg_elem; + struct ionic_buf_info *buf_info; struct ionic_rxq_desc *desc; unsigned int remain_len; - unsigned int seg_len; + unsigned int frag_len; unsigned int nfrags; unsigned int i, j; unsigned int len;
len = netdev->mtu + ETH_HLEN + VLAN_HLEN; - nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
for (i = ionic_q_space_avail(q); i; i--) { + nfrags = 0; remain_len = len; desc_info = &q->info[q->head_idx]; desc = desc_info->desc; - sg_desc = desc_info->sg_desc; - page_info = &desc_info->pages[0]; + buf_info = &desc_info->bufs[0];
- if (page_info->page) { /* recycle the buffer */ - ionic_rxq_post(q, false, ionic_rx_clean, NULL); - continue; - } - - /* fill main descriptor - pages[0] */ - desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG : - IONIC_RXQ_DESC_OPCODE_SIMPLE; - desc_info->npages = nfrags; - if (unlikely(ionic_rx_page_alloc(q, page_info))) { - desc->addr = 0; - desc->len = 0; - return; + if (!buf_info->page) { /* alloc a new buffer? */ + if (unlikely(ionic_rx_page_alloc(q, buf_info))) { + desc->addr = 0; + desc->len = 0; + return; + } } - desc->addr = cpu_to_le64(page_info->dma_addr); - seg_len = min_t(unsigned int, PAGE_SIZE, len); - desc->len = cpu_to_le16(seg_len); - remain_len -= seg_len; - page_info++;
- /* fill sg descriptors - pages[1..n] */ - for (j = 0; j < nfrags - 1; j++) { - if (page_info->page) /* recycle the sg buffer */ - continue; + /* fill main descriptor - buf[0] */ + desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset); + frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset); + desc->len = cpu_to_le16(frag_len); + remain_len -= frag_len; + buf_info++; + nfrags++;
+ /* fill sg descriptors - buf[1..n] */ + sg_desc = desc_info->sg_desc; + for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++) { sg_elem = &sg_desc->elems[j]; - if (unlikely(ionic_rx_page_alloc(q, page_info))) { - sg_elem->addr = 0; - sg_elem->len = 0; - return; + if (!buf_info->page) { /* alloc a new sg buffer? */ + if (unlikely(ionic_rx_page_alloc(q, buf_info))) { + sg_elem->addr = 0; + sg_elem->len = 0; + return; + } } - sg_elem->addr = cpu_to_le64(page_info->dma_addr); - seg_len = min_t(unsigned int, PAGE_SIZE, remain_len); - sg_elem->len = cpu_to_le16(seg_len); - remain_len -= seg_len; - page_info++; + + sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset); + frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset); + sg_elem->len = cpu_to_le16(frag_len); + remain_len -= frag_len; + buf_info++; + nfrags++; + } + + /* clear end sg element as a sentinel */ + if (j < q->max_sg_elems) { + sg_elem = &sg_desc->elems[j]; + memset(sg_elem, 0, sizeof(*sg_elem)); }
+ desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG : + IONIC_RXQ_DESC_OPCODE_SIMPLE; + desc_info->nbufs = nfrags; + ionic_rxq_post(q, false, ionic_rx_clean, NULL); }
@@@ -395,21 -409,24 +409,24 @@@ void ionic_rx_empty(struct ionic_queue *q) { struct ionic_desc_info *desc_info; - struct ionic_page_info *page_info; + struct ionic_buf_info *buf_info; unsigned int i, j;
for (i = 0; i < q->num_descs; i++) { desc_info = &q->info[i]; for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) { - page_info = &desc_info->pages[j]; - if (page_info->page) - ionic_rx_page_free(q, page_info); + buf_info = &desc_info->bufs[j]; + if (buf_info->page) + ionic_rx_page_free(q, buf_info); }
- desc_info->npages = 0; + desc_info->nbufs = 0; desc_info->cb = NULL; desc_info->cb_arg = NULL; } + + q->head_idx = 0; + q->tail_idx = 0; }
static void ionic_dim_update(struct ionic_qcq *qcq) @@@ -525,7 -542,7 +542,7 @@@ int ionic_txrx_napi(struct napi_struct idev = &lif->ionic->idev; txcq = &lif->txqcqs[qi]->cq;
- tx_work_done = ionic_cq_service(txcq, lif->tx_budget, + tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT, ionic_tx_service, NULL, NULL);
rx_work_done = ionic_cq_service(rxcq, budget, @@@ -558,7 -575,7 +575,7 @@@ static dma_addr_t ionic_tx_map_single(s void *data, size_t len) { struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; + struct device *dev = q->dev; dma_addr_t dma_addr;
dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE); @@@ -576,7 -593,7 +593,7 @@@ static dma_addr_t ionic_tx_map_frag(str size_t offset, size_t len) { struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; + struct device *dev = q->dev; dma_addr_t dma_addr;
dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE); @@@ -588,42 -605,72 +605,72 @@@ return dma_addr; }
+ static int ionic_tx_map_skb(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) + { + struct ionic_buf_info *buf_info = desc_info->bufs; + struct device *dev = q->dev; + dma_addr_t dma_addr; + unsigned int nfrags; + skb_frag_t *frag; + int frag_idx; + + dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); + if (dma_mapping_error(dev, dma_addr)) + return -EIO; + buf_info->dma_addr = dma_addr; + buf_info->len = skb_headlen(skb); + buf_info++; + + frag = skb_shinfo(skb)->frags; + nfrags = skb_shinfo(skb)->nr_frags; + for (frag_idx = 0; frag_idx < nfrags; frag_idx++, frag++) { + dma_addr = ionic_tx_map_frag(q, frag, 0, skb_frag_size(frag)); + if (dma_mapping_error(dev, dma_addr)) + goto dma_fail; + buf_info->dma_addr = dma_addr; + buf_info->len = skb_frag_size(frag); + buf_info++; + } + + desc_info->nbufs = 1 + nfrags; + + return 0; + + dma_fail: + /* unwind the frag mappings and the head mapping */ + while (frag_idx > 0) { + frag_idx--; + buf_info--; + dma_unmap_page(dev, buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + } + dma_unmap_single(dev, buf_info->dma_addr, buf_info->len, DMA_TO_DEVICE); + return -EIO; + } + static void ionic_tx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info, struct ionic_cq_info *cq_info, void *cb_arg) { - struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc; - struct ionic_txq_sg_elem *elem = sg_desc->elems; + struct ionic_buf_info *buf_info = desc_info->bufs; struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct ionic_txq_desc *desc = desc_info->desc; - struct device *dev = q->lif->ionic->dev; - u8 opcode, flags, nsge; + struct device *dev = q->dev; u16 queue_index; unsigned int i; - u64 addr; - - decode_txq_desc_cmd(le64_to_cpu(desc->cmd), - &opcode, &flags, &nsge, &addr); - - /* use unmap_single only if either this is not TSO, - * or this is first descriptor of a TSO - */ - if (opcode != IONIC_TXQ_DESC_OPCODE_TSO || - flags & IONIC_TXQ_DESC_FLAG_TSO_SOT) - dma_unmap_single(dev, (dma_addr_t)addr, - le16_to_cpu(desc->len), DMA_TO_DEVICE); - else - dma_unmap_page(dev, (dma_addr_t)addr, - le16_to_cpu(desc->len), DMA_TO_DEVICE);
- for (i = 0; i < nsge; i++, elem++) - dma_unmap_page(dev, (dma_addr_t)le64_to_cpu(elem->addr), - le16_to_cpu(elem->len), DMA_TO_DEVICE); + if (desc_info->nbufs) { + dma_unmap_single(dev, (dma_addr_t)buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + buf_info++; + for (i = 1; i < desc_info->nbufs; i++, buf_info++) + dma_unmap_page(dev, (dma_addr_t)buf_info->dma_addr, + buf_info->len, DMA_TO_DEVICE); + }
if (cb_arg) { struct sk_buff *skb = cb_arg; - u32 len = skb->len;
queue_index = skb_get_queue_mapping(skb); if (unlikely(__netif_subqueue_stopped(q->lif->netdev, @@@ -631,17 -678,21 +678,21 @@@ netif_wake_subqueue(q->lif->netdev, queue_index); q->wake++; } - dev_kfree_skb_any(skb); + + desc_info->bytes = skb->len; stats->clean++; - netdev_tx_completed_queue(q_to_ndq(q), 1, len); + + dev_consume_skb_any(skb); } }
static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info) { - struct ionic_txq_comp *comp = cq_info->cq_desc; + struct ionic_txq_comp *comp = cq_info->txcq; struct ionic_queue *q = cq->bound_q; struct ionic_desc_info *desc_info; + int bytes = 0; + int pkts = 0; u16 index;
if (!color_match(comp->color, cq->done_color)) @@@ -652,13 -703,21 +703,21 @@@ */ do { desc_info = &q->info[q->tail_idx]; + desc_info->bytes = 0; index = q->tail_idx; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); ionic_tx_clean(q, desc_info, cq_info, desc_info->cb_arg); + if (desc_info->cb_arg) { + pkts++; + bytes += desc_info->bytes; + } desc_info->cb = NULL; desc_info->cb_arg = NULL; } while (index != le16_to_cpu(comp->comp_index));
+ if (pkts && bytes) + netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes); + return true; }
@@@ -677,15 -736,25 +736,25 @@@ void ionic_tx_flush(struct ionic_cq *cq void ionic_tx_empty(struct ionic_queue *q) { struct ionic_desc_info *desc_info; + int bytes = 0; + int pkts = 0;
/* walk the not completed tx entries, if any */ while (q->head_idx != q->tail_idx) { desc_info = &q->info[q->tail_idx]; + desc_info->bytes = 0; q->tail_idx = (q->tail_idx + 1) & (q->num_descs - 1); ionic_tx_clean(q, desc_info, NULL, desc_info->cb_arg); + if (desc_info->cb_arg) { + pkts++; + bytes += desc_info->bytes; + } desc_info->cb = NULL; desc_info->cb_arg = NULL; } + + if (pkts && bytes) + netdev_tx_completed_queue(q_to_ndq(q), pkts, bytes); }
static int ionic_tx_tcp_inner_pseudo_csum(struct sk_buff *skb) @@@ -756,50 -825,33 +825,33 @@@ static void ionic_tx_tso_post(struct io desc->hdr_len = cpu_to_le16(hdrlen); desc->mss = cpu_to_le16(mss);
- if (done) { + if (start) { skb_tx_timestamp(skb); netdev_tx_sent_queue(q_to_ndq(q), skb->len); - ionic_txq_post(q, !netdev_xmit_more(), ionic_tx_clean, skb); + ionic_txq_post(q, false, ionic_tx_clean, skb); } else { - ionic_txq_post(q, false, ionic_tx_clean, NULL); + ionic_txq_post(q, done, NULL, NULL); } }
- static struct ionic_txq_desc *ionic_tx_tso_next(struct ionic_queue *q, - struct ionic_txq_sg_elem **elem) - { - struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc; - struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc; - - *elem = sg_desc->elems; - return desc; - } - static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb) { struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct ionic_desc_info *rewind_desc_info; - struct device *dev = q->lif->ionic->dev; + struct ionic_desc_info *desc_info; + struct ionic_buf_info *buf_info; struct ionic_txq_sg_elem *elem; struct ionic_txq_desc *desc; - unsigned int frag_left = 0; - unsigned int offset = 0; - u16 abort = q->head_idx; - unsigned int len_left; + unsigned int chunk_len; + unsigned int frag_rem; + unsigned int tso_rem; + unsigned int seg_rem; dma_addr_t desc_addr; + dma_addr_t frag_addr; unsigned int hdrlen; - unsigned int nfrags; - unsigned int seglen; - u64 total_bytes = 0; - u64 total_pkts = 0; - u16 rewind = abort; - unsigned int left; unsigned int len; unsigned int mss; - skb_frag_t *frag; bool start, done; bool outer_csum; - dma_addr_t addr; bool has_vlan; u16 desc_len; u8 desc_nsge; @@@ -807,9 -859,14 +859,14 @@@ bool encap; int err;
+ desc_info = &q->info[q->head_idx]; + buf_info = desc_info->bufs; + + if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) + return -EIO; + + len = skb->len; mss = skb_shinfo(skb)->gso_size; - nfrags = skb_shinfo(skb)->nr_frags; - len_left = skb->len - skb_headlen(skb); outer_csum = (skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM) || (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM); has_vlan = !!skb_vlan_tag_present(skb); @@@ -834,125 -891,75 +891,75 @@@ else hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
- seglen = hdrlen + mss; - left = skb_headlen(skb); + tso_rem = len; + seg_rem = min(tso_rem, hdrlen + mss);
- desc = ionic_tx_tso_next(q, &elem); - start = true; + frag_addr = 0; + frag_rem = 0;
- /* Chop skb->data up into desc segments */ + start = true;
- while (left > 0) { - len = min(seglen, left); - frag_left = seglen - len; - desc_addr = ionic_tx_map_single(q, skb->data + offset, len); - if (dma_mapping_error(dev, desc_addr)) - goto err_out_abort; - desc_len = len; + while (tso_rem > 0) { + desc = NULL; + elem = NULL; + desc_addr = 0; + desc_len = 0; desc_nsge = 0; - left -= len; - offset += len; - if (nfrags > 0 && frag_left > 0) - continue; - done = (nfrags == 0 && left == 0); - ionic_tx_tso_post(q, desc, skb, - desc_addr, desc_nsge, desc_len, - hdrlen, mss, - outer_csum, - vlan_tci, has_vlan, - start, done); - total_pkts++; - total_bytes += start ? len : len + hdrlen; - desc = ionic_tx_tso_next(q, &elem); - start = false; - seglen = mss; - } - - /* Chop skb frags into desc segments */ - - for (frag = skb_shinfo(skb)->frags; len_left; frag++) { - offset = 0; - left = skb_frag_size(frag); - len_left -= left; - nfrags--; - stats->frags++; - - while (left > 0) { - if (frag_left > 0) { - len = min(frag_left, left); - frag_left -= len; - addr = ionic_tx_map_frag(q, frag, offset, len); - if (dma_mapping_error(dev, addr)) - goto err_out_abort; - elem->addr = cpu_to_le64(addr); - elem->len = cpu_to_le16(len); + /* use fragments until we have enough to post a single descriptor */ + while (seg_rem > 0) { + /* if the fragment is exhausted then move to the next one */ + if (frag_rem == 0) { + /* grab the next fragment */ + frag_addr = buf_info->dma_addr; + frag_rem = buf_info->len; + buf_info++; + } + chunk_len = min(frag_rem, seg_rem); + if (!desc) { + /* fill main descriptor */ + desc = desc_info->txq_desc; + elem = desc_info->txq_sg_desc->elems; + desc_addr = frag_addr; + desc_len = chunk_len; + } else { + /* fill sg descriptor */ + elem->addr = cpu_to_le64(frag_addr); + elem->len = cpu_to_le16(chunk_len); elem++; desc_nsge++; - left -= len; - offset += len; - if (nfrags > 0 && frag_left > 0) - continue; - done = (nfrags == 0 && left == 0); - ionic_tx_tso_post(q, desc, skb, desc_addr, - desc_nsge, desc_len, - hdrlen, mss, outer_csum, - vlan_tci, has_vlan, - start, done); - total_pkts++; - total_bytes += start ? len : len + hdrlen; - desc = ionic_tx_tso_next(q, &elem); - start = false; - } else { - len = min(mss, left); - frag_left = mss - len; - desc_addr = ionic_tx_map_frag(q, frag, - offset, len); - if (dma_mapping_error(dev, desc_addr)) - goto err_out_abort; - desc_len = len; - desc_nsge = 0; - left -= len; - offset += len; - if (nfrags > 0 && frag_left > 0) - continue; - done = (nfrags == 0 && left == 0); - ionic_tx_tso_post(q, desc, skb, desc_addr, - desc_nsge, desc_len, - hdrlen, mss, outer_csum, - vlan_tci, has_vlan, - start, done); - total_pkts++; - total_bytes += start ? len : len + hdrlen; - desc = ionic_tx_tso_next(q, &elem); - start = false; } + frag_addr += chunk_len; + frag_rem -= chunk_len; + tso_rem -= chunk_len; + seg_rem -= chunk_len; } + seg_rem = min(tso_rem, mss); + done = (tso_rem == 0); + /* post descriptor */ + ionic_tx_tso_post(q, desc, skb, + desc_addr, desc_nsge, desc_len, + hdrlen, mss, outer_csum, vlan_tci, has_vlan, + start, done); + start = false; + /* Buffer information is stored with the first tso descriptor */ + desc_info = &q->info[q->head_idx]; + desc_info->nbufs = 0; }
- stats->pkts += total_pkts; - stats->bytes += total_bytes; + stats->pkts += DIV_ROUND_UP(len - hdrlen, mss); + stats->bytes += len; stats->tso++; - stats->tso_bytes += total_bytes; + stats->tso_bytes = len;
return 0; - - err_out_abort: - while (rewind != q->head_idx) { - rewind_desc_info = &q->info[rewind]; - ionic_tx_clean(q, rewind_desc_info, NULL, NULL); - rewind = (rewind + 1) & (q->num_descs - 1); - } - q->head_idx = abort; - - return -ENOMEM; }
- static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb) + static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { - struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc; + struct ionic_txq_desc *desc = desc_info->txq_desc; + struct ionic_buf_info *buf_info = desc_info->bufs; struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; - dma_addr_t dma_addr; bool has_vlan; u8 flags = 0; bool encap; @@@ -961,23 -968,22 +968,22 @@@ has_vlan = !!skb_vlan_tag_present(skb); encap = skb->encapsulation;
- dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); - if (dma_mapping_error(dev, dma_addr)) - return -ENOMEM; - flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL, - flags, skb_shinfo(skb)->nr_frags, dma_addr); + flags, skb_shinfo(skb)->nr_frags, + buf_info->dma_addr); desc->cmd = cpu_to_le64(cmd); - desc->len = cpu_to_le16(skb_headlen(skb)); - desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb)); - desc->csum_offset = cpu_to_le16(skb->csum_offset); + desc->len = cpu_to_le16(buf_info->len); if (has_vlan) { desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb)); stats->vlan_inserted++; + } else { + desc->vlan_tci = 0; } + desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb)); + desc->csum_offset = cpu_to_le16(skb->csum_offset);
if (skb_csum_is_sctp(skb)) stats->crc32_csum++; @@@ -987,12 -993,12 +993,12 @@@ return 0; }
- static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb) + static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { - struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc; + struct ionic_txq_desc *desc = desc_info->txq_desc; + struct ionic_buf_info *buf_info = desc_info->bufs; struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; - dma_addr_t dma_addr; bool has_vlan; u8 flags = 0; bool encap; @@@ -1001,67 -1007,66 +1007,66 @@@ has_vlan = !!skb_vlan_tag_present(skb); encap = skb->encapsulation;
- dma_addr = ionic_tx_map_single(q, skb->data, skb_headlen(skb)); - if (dma_mapping_error(dev, dma_addr)) - return -ENOMEM; - flags |= has_vlan ? IONIC_TXQ_DESC_FLAG_VLAN : 0; flags |= encap ? IONIC_TXQ_DESC_FLAG_ENCAP : 0;
cmd = encode_txq_desc_cmd(IONIC_TXQ_DESC_OPCODE_CSUM_NONE, - flags, skb_shinfo(skb)->nr_frags, dma_addr); + flags, skb_shinfo(skb)->nr_frags, + buf_info->dma_addr); desc->cmd = cpu_to_le64(cmd); - desc->len = cpu_to_le16(skb_headlen(skb)); + desc->len = cpu_to_le16(buf_info->len); if (has_vlan) { desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb)); stats->vlan_inserted++; + } else { + desc->vlan_tci = 0; } + desc->csum_start = 0; + desc->csum_offset = 0;
stats->csum_none++;
return 0; }
- static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb) + static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb, + struct ionic_desc_info *desc_info) { - struct ionic_txq_sg_desc *sg_desc = q->info[q->head_idx].txq_sg_desc; - unsigned int len_left = skb->len - skb_headlen(skb); + struct ionic_txq_sg_desc *sg_desc = desc_info->txq_sg_desc; + struct ionic_buf_info *buf_info = &desc_info->bufs[1]; struct ionic_txq_sg_elem *elem = sg_desc->elems; struct ionic_tx_stats *stats = q_to_tx_stats(q); - struct device *dev = q->lif->ionic->dev; - dma_addr_t dma_addr; - skb_frag_t *frag; - u16 len; + unsigned int i;
- for (frag = skb_shinfo(skb)->frags; len_left; frag++, elem++) { - len = skb_frag_size(frag); - elem->len = cpu_to_le16(len); - dma_addr = ionic_tx_map_frag(q, frag, 0, len); - if (dma_mapping_error(dev, dma_addr)) - return -ENOMEM; - elem->addr = cpu_to_le64(dma_addr); - len_left -= len; - stats->frags++; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, buf_info++, elem++) { + elem->addr = cpu_to_le64(buf_info->dma_addr); + elem->len = cpu_to_le16(buf_info->len); }
+ stats->frags += skb_shinfo(skb)->nr_frags; + return 0; }
static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb) { + struct ionic_desc_info *desc_info = &q->info[q->head_idx]; struct ionic_tx_stats *stats = q_to_tx_stats(q); int err;
+ if (unlikely(ionic_tx_map_skb(q, skb, desc_info))) + return -EIO; + /* set up the initial descriptor */ if (skb->ip_summed == CHECKSUM_PARTIAL) - err = ionic_tx_calc_csum(q, skb); + err = ionic_tx_calc_csum(q, skb, desc_info); else - err = ionic_tx_calc_no_csum(q, skb); + err = ionic_tx_calc_no_csum(q, skb, desc_info); if (err) return err;
/* add frags */ - err = ionic_tx_skb_frags(q, skb); + err = ionic_tx_skb_frags(q, skb, desc_info); if (err) return err;
@@@ -1077,19 -1082,16 +1082,18 @@@
static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) { - int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems; struct ionic_tx_stats *stats = q_to_tx_stats(q); + int ndescs; int err;
- /* If TSO, need roundup(skb->len/mss) descs */ + /* Each desc is mss long max, so a descriptor for each gso_seg */ if (skb_is_gso(skb)) - return (skb->len / skb_shinfo(skb)->gso_size) + 1; + ndescs = skb_shinfo(skb)->gso_segs; + else + ndescs = 1;
- if (skb_shinfo(skb)->nr_frags <= sg_elems) - /* If non-TSO, just need 1 desc and nr_frags sg elems */ + if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems) - return 1; + return ndescs;
/* Too many frags, so linearize */ err = skb_linearize(skb); @@@ -1098,7 -1100,8 +1102,7 @@@
stats->linearize++;
- /* Need 1 desc and zero sg elems */ - return 1; + return ndescs; }
static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs) diff --combined drivers/net/ethernet/realtek/r8169_main.c index 581a92fc3292,66d10aa47c08..7a8bb7e833f3 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@@ -1586,12 -1586,10 +1586,10 @@@ DECLARE_RTL_COND(rtl_counters_cond
static void rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd) { - dma_addr_t paddr = tp->counters_phys_addr; - u32 cmd; + u32 cmd = lower_32_bits(tp->counters_phys_addr);
- RTL_W32(tp, CounterAddrHigh, (u64)paddr >> 32); + RTL_W32(tp, CounterAddrHigh, upper_32_bits(tp->counters_phys_addr)); rtl_pci_commit(tp); - cmd = (u64)paddr & DMA_BIT_MASK(32); RTL_W32(tp, CounterAddrLow, cmd); RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
@@@ -1903,6 -1901,15 +1901,15 @@@ static int rtl8169_set_eee(struct net_d return ret; }
+ static void rtl8169_get_ringparam(struct net_device *dev, + struct ethtool_ringparam *data) + { + data->rx_max_pending = NUM_RX_DESC; + data->rx_pending = NUM_RX_DESC; + data->tx_max_pending = NUM_TX_DESC; + data->tx_pending = NUM_TX_DESC; + } + static const struct ethtool_ops rtl8169_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, @@@ -1923,6 -1930,7 +1930,7 @@@ .set_eee = rtl8169_set_eee, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_ringparam = rtl8169_get_ringparam, };
static void rtl_enable_eee(struct rtl8169_private *tp) @@@ -4646,9 -4654,6 +4654,9 @@@ static void rtl8169_down(struct rtl8169
rtl8169_update_counters(tp);
+ pci_clear_master(tp->pci_dev); + rtl_pci_commit(tp); + rtl8169_cleanup(tp, true);
rtl_prepare_power_down(tp); @@@ -4656,7 -4661,6 +4664,7 @@@
static void rtl8169_up(struct rtl8169_private *tp) { + pci_set_master(tp->pci_dev); phy_resume(tp->phydev); rtl8169_init_phy(tp); napi_enable(&tp->napi); @@@ -5311,6 -5315,8 +5319,6 @@@ static int rtl_init_one(struct pci_dev
rtl_hw_reset(tp);
- pci_set_master(pdev); - rc = rtl_alloc_irq(tp); if (rc < 0) { dev_err(&pdev->dev, "Can't allocate interrupt\n"); diff --combined drivers/net/ipa/ipa_qmi.c index e594bf3b600f,af8666b89b37..f3746ed9b797 --- a/drivers/net/ipa/ipa_qmi.c +++ b/drivers/net/ipa/ipa_qmi.c @@@ -249,7 -249,6 +249,7 @@@ static const struct qmi_msg_handler ipa .decoded_size = IPA_QMI_DRIVER_INIT_COMPLETE_REQ_SZ, .fn = ipa_server_driver_init_complete, }, + { }, };
/* Handle an INIT_DRIVER response message from the modem. */ @@@ -270,7 -269,6 +270,7 @@@ static const struct qmi_msg_handler ipa .decoded_size = IPA_QMI_INIT_DRIVER_RSP_SZ, .fn = ipa_client_init_driver, }, + { }, };
/* Return a pointer to an init modem driver request structure, which contains @@@ -380,7 -378,7 +380,7 @@@ init_modem_driver_req(struct ipa_qmi *i /* None of the stats fields are valid (IPA v4.0 and above) */
if (ipa->version != IPA_VERSION_3_5_1) { - mem = &ipa->mem[IPA_MEM_STATS_QUOTA]; + mem = &ipa->mem[IPA_MEM_STATS_QUOTA_MODEM]; if (mem->size) { req.hw_stats_quota_base_addr_valid = 1; req.hw_stats_quota_base_addr = diff --combined drivers/net/phy/phylink.c index dc2800beacc3,12a047d47dec..96d8e88b4e46 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@@ -271,8 -271,9 +271,9 @@@ static int phylink_parse_mode(struct ph pl->cfg_link_an_mode = MLO_AN_FIXED; fwnode_handle_put(dn);
- if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 && - strcmp(managed, "in-band-status") == 0) { + if ((fwnode_property_read_string(fwnode, "managed", &managed) == 0 && + strcmp(managed, "in-band-status") == 0) || + pl->config->ovr_an_inband) { if (pl->cfg_link_an_mode == MLO_AN_FIXED) { phylink_err(pl, "can't use both fixed-link and in-band-status\n"); @@@ -476,7 -477,7 +477,7 @@@ static void phylink_major_config(struc err = pl->mac_ops->mac_finish(pl->config, pl->cur_link_an_mode, state->interface); if (err < 0) - phylink_err(pl, "mac_prepare failed: %pe\n", + phylink_err(pl, "mac_finish failed: %pe\n", ERR_PTR(err)); } } diff --combined drivers/s390/net/qeth_core_main.c index 175b82b98f36,6954d4e831a3..a1f08e9aa064 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@@ -369,7 -369,8 +369,7 @@@ static int qeth_cq_init(struct qeth_car QDIO_MAX_BUFFERS_PER_Q); card->qdio.c_q->next_buf_to_init = 127; rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, - card->qdio.no_in_queues - 1, 0, - 127); + card->qdio.no_in_queues - 1, 0, 127, NULL); if (rc) { QETH_CARD_TEXT_(card, 2, "1err%d", rc); goto out; @@@ -382,22 -383,48 +382,22 @@@ out
static int qeth_alloc_cq(struct qeth_card *card) { - int rc; - if (card->options.cq == QETH_CQ_ENABLED) { - int i; - struct qdio_outbuf_state *outbuf_states; - QETH_CARD_TEXT(card, 2, "cqon"); card->qdio.c_q = qeth_alloc_qdio_queue(); if (!card->qdio.c_q) { - rc = -1; - goto kmsg_out; + dev_err(&card->gdev->dev, "Failed to create completion queue\n"); + return -ENOMEM; } + card->qdio.no_in_queues = 2; - card->qdio.out_bufstates = - kcalloc(card->qdio.no_out_queues * - QDIO_MAX_BUFFERS_PER_Q, - sizeof(struct qdio_outbuf_state), - GFP_KERNEL); - outbuf_states = card->qdio.out_bufstates; - if (outbuf_states == NULL) { - rc = -1; - goto free_cq_out; - } - for (i = 0; i < card->qdio.no_out_queues; ++i) { - card->qdio.out_qs[i]->bufstates = outbuf_states; - outbuf_states += QDIO_MAX_BUFFERS_PER_Q; - } } else { QETH_CARD_TEXT(card, 2, "nocq"); card->qdio.c_q = NULL; card->qdio.no_in_queues = 1; } QETH_CARD_TEXT_(card, 2, "iqc%d", card->qdio.no_in_queues); - rc = 0; -out: - return rc; -free_cq_out: - qeth_free_qdio_queue(card->qdio.c_q); - card->qdio.c_q = NULL; -kmsg_out: - dev_err(&card->gdev->dev, "Failed to create completion queue\n"); - goto out; + return 0; }
static void qeth_free_cq(struct qeth_card *card) @@@ -407,6 -434,8 +407,6 @@@ qeth_free_qdio_queue(card->qdio.c_q); card->qdio.c_q = NULL; } - kfree(card->qdio.out_bufstates); - card->qdio.out_bufstates = NULL; }
static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, @@@ -458,12 -487,12 +458,12 @@@ static void qeth_qdio_handle_aob(struc switch (atomic_xchg(&buffer->state, new_state)) { case QETH_QDIO_BUF_PRIMED: /* Faster than TX completion code, let it handle the async - * completion for us. + * completion for us. It will also recycle the QAOB. */ break; case QETH_QDIO_BUF_PENDING: /* TX completion code is active and will handle the async - * completion for us. + * completion for us. It will also recycle the QAOB. */ break; case QETH_QDIO_BUF_NEED_QAOB: @@@ -472,7 -501,7 +472,7 @@@ qeth_notify_skbs(buffer->q, buffer, notification);
/* Free dangling allocations. The attached skbs are handled by - * qeth_tx_complete_pending_bufs(). + * qeth_tx_complete_pending_bufs(), and so is the QAOB. */ for (i = 0; i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); @@@ -491,6 -520,8 +491,6 @@@ default: WARN_ON_ONCE(1); } - - qdio_release_aob(aob); }
static void qeth_setup_ccw(struct ccw1 *ccw, u8 cmd_code, u8 flags, u32 len, @@@ -1420,16 -1451,9 +1420,16 @@@ static void qeth_clear_output_buffer(st atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY); }
+static void qeth_free_out_buf(struct qeth_qdio_out_buffer *buf) +{ + if (buf->aob) + qdio_release_aob(buf->aob); + kmem_cache_free(qeth_qdio_outbuf_cache, buf); +} + static void qeth_tx_complete_pending_bufs(struct qeth_card *card, struct qeth_qdio_out_q *queue, - bool drain) + bool drain, int budget) { struct qeth_qdio_out_buffer *buf, *tmp;
@@@ -1441,10 -1465,10 +1441,10 @@@ if (drain) qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR); - qeth_tx_complete_buf(buf, drain, 0); + qeth_tx_complete_buf(buf, drain, budget);
list_del(&buf->list_entry); - kmem_cache_free(qeth_qdio_outbuf_cache, buf); + qeth_free_out_buf(buf); } } } @@@ -1453,7 -1477,7 +1453,7 @@@ static void qeth_drain_output_queue(str { int j;
- qeth_tx_complete_pending_bufs(q->card, q, true); + qeth_tx_complete_pending_bufs(q->card, q, true, 0);
for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (!q->bufs[j]) @@@ -1461,7 -1485,7 +1461,7 @@@
qeth_clear_output_buffer(q, q->bufs[j], true, 0); if (free) { - kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]); + qeth_free_out_buf(q->bufs[j]); q->bufs[j] = NULL; } } @@@ -2566,11 -2590,12 +2566,12 @@@ static int qeth_ulp_setup(struct qeth_c return qeth_send_control_data(card, iob, qeth_ulp_setup_cb, NULL); }
- static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) + static int qeth_alloc_out_buf(struct qeth_qdio_out_q *q, unsigned int bidx, + gfp_t gfp) { struct qeth_qdio_out_buffer *newbuf;
- newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, GFP_ATOMIC); + newbuf = kmem_cache_zalloc(qeth_qdio_outbuf_cache, gfp); if (!newbuf) return -ENOMEM;
@@@ -2605,7 -2630,7 +2606,7 @@@ static struct qeth_qdio_out_q *qeth_all goto err_qdio_bufs;
for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) { - if (qeth_init_qdio_out_buf(q, i)) + if (qeth_alloc_out_buf(q, i, GFP_KERNEL)) goto err_out_bufs; }
@@@ -2613,7 -2638,7 +2614,7 @@@
err_out_bufs: while (i > 0) - kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[--i]); + qeth_free_out_buf(q->bufs[--i]); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); err_qdio_bufs: kfree(q); @@@ -3000,8 -3025,7 +3001,8 @@@ static int qeth_init_qdio_queues(struc }
card->qdio.in_q->next_buf_to_init = QDIO_BUFNR(rx_bufs); - rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, rx_bufs); + rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, 0, rx_bufs, + NULL); if (rc) { QETH_CARD_TEXT_(card, 2, "1err%d", rc); return rc; @@@ -3493,7 -3517,7 +3494,7 @@@ static unsigned int qeth_rx_refill_queu }
rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, 0, - queue->next_buf_to_init, count); + queue->next_buf_to_init, count, NULL); if (rc) { QETH_CARD_TEXT(card, 2, "qinberr"); } @@@ -3602,7 -3626,6 +3603,7 @@@ static void qeth_flush_buffers(struct q struct qeth_qdio_out_buffer *buf = queue->bufs[index]; unsigned int qdio_flags = QDIO_FLAG_SYNC_OUTPUT; struct qeth_card *card = queue->card; + struct qaob *aob = NULL; int rc; int i;
@@@ -3615,24 -3638,16 +3616,24 @@@ SBAL_EFLAGS_LAST_ENTRY; queue->coalesced_frames += buf->frames;
- if (queue->bufstates) - queue->bufstates[bidx].user = buf; - if (IS_IQD(card)) { skb_queue_walk(&buf->skb_list, skb) skb_tx_timestamp(skb); } }
- if (!IS_IQD(card)) { + if (IS_IQD(card)) { + if (card->options.cq == QETH_CQ_ENABLED && + !qeth_iqd_is_mcast_queue(card, queue) && + count == 1) { + if (!buf->aob) + buf->aob = qdio_allocate_aob(); + if (buf->aob) { + aob = buf->aob; + aob->user1 = (u64) buf; + } + } + } else { if (!queue->do_pack) { if ((atomic_read(&queue->used_buffers) >= (QETH_HIGH_WATERMARK_PACK - @@@ -3663,8 -3678,8 +3664,8 @@@ }
QETH_TXQ_STAT_INC(queue, doorbell); - rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags, - queue->queue_no, index, count); + rc = do_QDIO(CARD_DDEV(card), qdio_flags, queue->queue_no, index, count, + aob);
switch (rc) { case 0: @@@ -3800,7 -3815,8 +3801,7 @@@ static void qeth_qdio_cq_handler(struc qeth_scrub_qdio_buffer(buffer, QDIO_MAX_ELEMENTS_PER_BUFFER); } rc = do_QDIO(CARD_DDEV(card), QDIO_FLAG_SYNC_INPUT, queue, - card->qdio.c_q->next_buf_to_init, - count); + cq->next_buf_to_init, count, NULL); if (rc) { dev_warn(&card->gdev->dev, "QDIO reported an error, rc=%i\n", rc); @@@ -5255,6 -5271,7 +5256,6 @@@ static int qeth_qdio_establish(struct q init_data.int_parm = (unsigned long) card; init_data.input_sbal_addr_array = in_sbal_ptrs; init_data.output_sbal_addr_array = out_sbal_ptrs; - init_data.output_sbal_state_array = card->qdio.out_bufstates; init_data.scan_threshold = IS_IQD(card) ? 0 : 32;
if (atomic_cmpxchg(&card->qdio.state, QETH_QDIO_ALLOCATED, @@@ -6053,15 -6070,7 +6054,15 @@@ static void qeth_iqd_tx_complete(struc bool error = !!qdio_error;
if (qdio_error == QDIO_ERROR_SLSB_PENDING) { - WARN_ON_ONCE(card->options.cq != QETH_CQ_ENABLED); + struct qaob *aob = buffer->aob; + + if (!aob) { + netdev_WARN_ONCE(card->dev, + "Pending TX buffer %#x without QAOB on TX queue %u\n", + bidx, queue->queue_no); + qeth_schedule_recovery(card); + return; + }
QETH_CARD_TEXT_(card, 5, "pel%u", bidx);
@@@ -6080,7 -6089,8 +6081,8 @@@
/* Prepare the queue slot for immediate re-use: */ qeth_scrub_qdio_buffer(buffer->buffer, queue->max_elements); - if (qeth_init_qdio_out_buf(queue, bidx)) { + if (qeth_alloc_out_buf(queue, bidx, + GFP_ATOMIC)) { QETH_CARD_TEXT(card, 2, "outofbuf"); qeth_schedule_recovery(card); } @@@ -6117,8 -6127,6 +6119,8 @@@ default: WARN_ON_ONCE(1); } + + memset(aob, 0, sizeof(*aob)); } else if (card->options.cq == QETH_CQ_ENABLED) { qeth_notify_skbs(queue, buffer, qeth_compute_cq_notification(sflags, 0)); @@@ -6144,7 -6152,7 +6146,7 @@@ static int qeth_tx_poll(struct napi_str unsigned int bytes = 0; int completed;
- qeth_tx_complete_pending_bufs(card, queue, false); + qeth_tx_complete_pending_bufs(card, queue, false, budget);
if (qeth_out_queue_is_empty(queue)) { napi_complete(napi); diff --combined include/linux/bpf.h index 3625f019767d,a25730eaa148..39dce9d3c3a5 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@@ -21,7 -21,6 +21,7 @@@ #include <linux/capability.h> #include <linux/sched/mm.h> #include <linux/slab.h> +#include <linux/percpu-refcount.h>
struct bpf_verifier_env; struct bpf_verifier_log; @@@ -40,6 -39,7 +40,7 @@@ struct bpf_local_storage struct bpf_local_storage_map; struct kobject; struct mem_cgroup; + struct bpf_func_state;
extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@@ -118,6 -118,9 +119,9 @@@ struct bpf_map_ops void *owner, u32 size); struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
+ /* Misc helpers.*/ + int (*map_redirect)(struct bpf_map *map, u32 ifindex, u64 flags); + /* map_meta_equal must be implemented for maps that can be * used as an inner map. It is a runtime check to ensure * an inner map can be inserted to an outer map. @@@ -130,6 -133,13 +134,13 @@@ bool (*map_meta_equal)(const struct bpf_map *meta0, const struct bpf_map *meta1);
+ + int (*map_set_for_each_callback_args)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int (*map_for_each_callback)(struct bpf_map *map, void *callback_fn, + void *callback_ctx, u64 flags); + /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; @@@ -296,6 -306,8 +307,8 @@@ enum bpf_arg_type ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + ARG_PTR_TO_FUNC, /* pointer to a bpf program function */ + ARG_PTR_TO_STACK_OR_NULL, /* pointer to stack or NULL */ __BPF_ARG_TYPE_MAX, };
@@@ -412,6 -424,8 +425,8 @@@ enum bpf_reg_type PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + PTR_TO_FUNC, /* reg points to a bpf program function */ + PTR_TO_MAP_KEY, /* reg points to a map element key */ };
/* The information passed from prog-specific *_is_valid_access @@@ -507,6 -521,11 +522,11 @@@ enum bpf_cgroup_storage_type */ #define MAX_BPF_FUNC_ARGS 12
+ /* The maximum number of arguments passed through registers + * a single function may have. + */ + #define MAX_BPF_FUNC_REG_ARGS 5 + struct btf_func_model { u8 ret_size; u8 nr_args; @@@ -557,8 -576,7 +577,8 @@@ struct bpf_tramp_progs * fentry = a set of program to run before calling original function * fexit = a set of program to run after original function */ -int arch_prepare_bpf_trampoline(void *image, void *image_end, +struct bpf_tramp_image; +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_progs *tprogs, void *orig_call); @@@ -567,8 -585,6 +587,8 @@@ u64 notrace __bpf_prog_enter(struct bpf void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog); void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start); +void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); +void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
struct bpf_ksym { unsigned long start; @@@ -587,18 -603,6 +607,18 @@@ enum bpf_tramp_prog_type BPF_TRAMP_REPLACE, /* more than MAX */ };
+struct bpf_tramp_image { + void *image; + struct bpf_ksym ksym; + struct percpu_ref pcref; + void *ip_after_call; + void *ip_epilogue; + union { + struct rcu_head rcu; + struct work_struct work; + }; +}; + struct bpf_trampoline { /* hlist for trampoline_table */ struct hlist_node hlist; @@@ -621,8 -625,9 +641,8 @@@ /* Number of attached programs. A counter per kind. */ int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ - void *image; + struct bpf_tramp_image *cur_image; u64 selector; - struct bpf_ksym ksym; };
struct bpf_attach_target_info { @@@ -706,8 -711,6 +726,8 @@@ void bpf_image_ksym_add(void *data, str void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); +int bpf_jit_charge_modmem(u32 pages); +void bpf_jit_uncharge_modmem(u32 pages); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) @@@ -804,6 -807,7 +824,6 @@@ struct bpf_prog_aux bool func_proto_unreliable; bool sleepable; bool tail_call_reachable; - enum bpf_tramp_prog_type trampoline_prog_type; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; @@@ -1109,7 -1113,7 +1129,7 @@@ int bpf_prog_array_copy(struct bpf_prog _ret; \ })
-#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \ ({ \ struct bpf_prog_array_item *_item; \ struct bpf_prog *_prog; \ @@@ -1122,8 -1126,7 +1142,8 @@@ goto _out; \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ + if (set_cg_storage) \ + bpf_cgroup_storage_set(_item->cgroup_storage); \ _ret &= func(_prog, ctx); \ _item++; \ } \ @@@ -1170,10 -1173,10 +1190,10 @@@ _out: })
#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ - __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ - __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
#ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); @@@ -1397,6 -1400,10 +1417,10 @@@ void bpf_iter_map_show_fdinfo(const str int bpf_iter_map_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info);
+ int map_set_for_each_callback_args(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee); + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, @@@ -1446,9 -1453,9 +1470,9 @@@ struct btf *bpf_get_btf_vmlinux(void) /* Map specifics */ struct xdp_buff; struct sk_buff; + struct bpf_dtab_netdev; + struct bpf_cpu_map_entry;
- struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); - struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); void __dev_flush(void); int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, struct net_device *dev_rx); @@@ -1458,7 -1465,6 +1482,6 @@@ int dev_map_generic_redirect(struct bpf struct bpf_prog *xdp_prog); bool dev_map_can_have_prog(struct bpf_map *map);
- struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@@ -1487,6 -1493,9 +1510,9 @@@ int bpf_prog_test_run_flow_dissector(st int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); + int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info); @@@ -1516,6 -1525,7 +1542,7 @@@ struct bpf_prog *bpf_prog_by_id(u32 id) struct bpf_link *bpf_link_by_id(u32 id);
const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); + void bpf_task_storage_free(struct task_struct *task); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@@ -1585,17 -1595,6 +1612,6 @@@ static inline int bpf_obj_get_user(cons return -EOPNOTSUPP; }
- static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, - u32 key) - { - return NULL; - } - - static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map, - u32 key) - { - return NULL; - } static inline bool dev_map_can_have_prog(struct bpf_map *map) { return false; @@@ -1607,6 -1606,7 +1623,7 @@@ static inline void __dev_flush(void
struct xdp_buff; struct bpf_dtab_netdev; + struct bpf_cpu_map_entry;
static inline int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp, @@@ -1631,12 -1631,6 +1648,6 @@@ static inline int dev_map_generic_redir return 0; }
- static inline - struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) - { - return NULL; - } - static inline void __cpu_map_flush(void) { } @@@ -1687,6 -1681,13 +1698,13 @@@ static inline int bpf_prog_test_run_flo return -ENOTSUPP; }
+ static inline int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) + { + return -ENOTSUPP; + } + static inline void bpf_map_put(struct bpf_map *map) { } @@@ -1701,6 -1702,10 +1719,10 @@@ bpf_base_func_proto(enum bpf_func_id fu { return NULL; } + + static inline void bpf_task_storage_free(struct task_struct *task) + { + } #endif /* CONFIG_BPF_SYSCALL */
void __bpf_free_used_btfs(struct bpf_prog_aux *aux, @@@ -1785,22 -1790,24 +1807,24 @@@ static inline void bpf_map_offload_map_ } #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
- #if defined(CONFIG_BPF_STREAM_PARSER) - int sock_map_prog_update(struct bpf_map *map, struct bpf_prog *prog, - struct bpf_prog *old, u32 which); + #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog); int sock_map_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype); int sock_map_update_elem_sys(struct bpf_map *map, void *key, void *value, u64 flags); void sock_map_unhash(struct sock *sk); void sock_map_close(struct sock *sk, long timeout); + + void bpf_sk_reuseport_detach(struct sock *sk); + int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, + void *value); + int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, + void *value, u64 map_flags); #else - static inline int sock_map_prog_update(struct bpf_map *map, - struct bpf_prog *prog, - struct bpf_prog *old, u32 which) + static inline void bpf_sk_reuseport_detach(struct sock *sk) { - return -EOPNOTSUPP; }
+ #ifdef CONFIG_BPF_SYSCALL static inline int sock_map_get_from_fd(const union bpf_attr *attr, struct bpf_prog *prog) { @@@ -1818,20 -1825,7 +1842,7 @@@ static inline int sock_map_update_elem_ { return -EOPNOTSUPP; } - #endif /* CONFIG_BPF_STREAM_PARSER */
- #if defined(CONFIG_INET) && defined(CONFIG_BPF_SYSCALL) - void bpf_sk_reuseport_detach(struct sock *sk); - int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, - void *value); - int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, void *key, - void *value, u64 map_flags); - #else - static inline void bpf_sk_reuseport_detach(struct sock *sk) - { - } - - #ifdef CONFIG_BPF_SYSCALL static inline int bpf_fd_reuseport_array_lookup_elem(struct bpf_map *map, void *key, void *value) { @@@ -1903,6 -1897,9 +1914,9 @@@ extern const struct bpf_func_proto bpf_ extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto; extern const struct bpf_func_proto bpf_sock_from_file_proto; extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto; + extern const struct bpf_func_proto bpf_task_storage_get_proto; + extern const struct bpf_func_proto bpf_task_storage_delete_proto; + extern const struct bpf_func_proto bpf_for_each_map_elem_proto;
const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); diff --combined include/linux/mlx5/driver.h index ab07f09f2bad,23bb01d7c9b9..3d146f1b2e62 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@@ -644,10 -644,14 +644,14 @@@ struct mlx5_td };
struct mlx5e_resources { - u32 pdn; - struct mlx5_td td; - struct mlx5_core_mkey mkey; - struct mlx5_sq_bfreg bfreg; + struct mlx5e_hw_objs { + u32 pdn; + struct mlx5_td td; + struct mlx5_core_mkey mkey; + struct mlx5_sq_bfreg bfreg; + } hw_objs; + struct devlink_port dl_port; + struct net_device *uplink_netdev; };
enum mlx5_sw_icm_type { @@@ -1226,7 -1230,7 +1230,7 @@@ enum MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, };
-static inline bool mlx5_is_roce_enabled(struct mlx5_core_dev *dev) +static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) { struct devlink *devlink = priv_to_devlink(dev); union devlink_param_value val; diff --combined include/linux/netdevice.h index 87a5d186faff,7005ad80e8d1..da39991ecf8b --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@@ -360,7 -360,6 +360,7 @@@ enum NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ NAPI_STATE_THREADED, /* The poll is performed inside its own thread*/ + NAPI_STATE_SCHED_THREADED, /* Napi is currently scheduled in threaded mode */ };
enum { @@@ -373,7 -372,6 +373,7 @@@ NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), NAPIF_STATE_THREADED = BIT(NAPI_STATE_THREADED), + NAPIF_STATE_SCHED_THREADED = BIT(NAPI_STATE_SCHED_THREADED), };
enum gro_result { @@@ -756,6 -754,13 +756,13 @@@ struct rx_queue_attribute const char *buf, size_t len); };
+ /* XPS map type and offset of the xps map within net_device->xps_maps[]. */ + enum xps_map_type { + XPS_CPUS = 0, + XPS_RXQS, + XPS_MAPS_MAX, + }; + #ifdef CONFIG_XPS /* * This structure holds an XPS map which can be of variable length. The @@@ -773,9 -778,19 +780,19 @@@ struct xps_map
/* * This structure holds all XPS maps for device. Maps are indexed by CPU. + * + * We keep track of the number of cpus/rxqs used when the struct is allocated, + * in nr_ids. This will help not accessing out-of-bound memory. + * + * We keep track of the number of traffic classes used when the struct is + * allocated, in num_tc. This will be used to navigate the maps, to ensure we're + * not crossing its upper bound, as the original dev->num_tc can be updated in + * the meantime. */ struct xps_dev_maps { struct rcu_head rcu; + unsigned int nr_ids; + s16 num_tc; struct xps_map __rcu *attr_map[]; /* Either CPUs map or RXQs map */ };
@@@ -1520,6 -1535,8 +1537,8 @@@ struct net_device_ops * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device * @IFF_LIVE_RENAME_OK: rename is allowed while device is up and running + * @IFF_TX_SKB_NO_LINEAR: device/driver is capable of xmitting frames with + * skb_headlen(skb) == 0 (data starts from frag0) */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@@ -1553,6 -1570,7 +1572,7 @@@ IFF_FAILOVER_SLAVE = 1<<28, IFF_L3MDEV_RX_HANDLER = 1<<29, IFF_LIVE_RENAME_OK = 1<<30, + IFF_TX_SKB_NO_LINEAR = 1<<31, };
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@@ -1579,12 -1597,14 +1599,14 @@@ #define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE #define IFF_TEAM IFF_TEAM #define IFF_RXFH_CONFIGURED IFF_RXFH_CONFIGURED + #define IFF_PHONY_HEADROOM IFF_PHONY_HEADROOM #define IFF_MACSEC IFF_MACSEC #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER #define IFF_LIVE_RENAME_OK IFF_LIVE_RENAME_OK + #define IFF_TX_SKB_NO_LINEAR IFF_TX_SKB_NO_LINEAR
/* Specifies the type of the struct net_device::ml_priv pointer */ enum netdev_ml_priv_type { @@@ -1760,8 -1780,7 +1782,7 @@@ * @tx_queue_len: Max frames per queue allowed * @tx_global_lock: XXX: need comments on this one * @xdp_bulkq: XDP device bulk queue - * @xps_cpus_map: all CPUs map for XPS device - * @xps_rxqs_map: all RXQs map for XPS device + * @xps_maps: all CPUs/RXQs maps for XPS device * * @xps_maps: XXX: need comments on this one * @miniq_egress: clsact qdisc specific data for @@@ -1773,6 -1792,7 +1794,7 @@@ * * @proto_down_reason: reason a netdev interface is held down * @pcpu_refcnt: Number of references to this device + * @dev_refcnt: Number of references to this device * @todo_list: Delayed register/unregister * @link_watch_list: XXX: need comments on this one * @@@ -2057,8 -2077,7 +2079,7 @@@ struct net_device struct xdp_dev_bulk_queue __percpu *xdp_bulkq;
#ifdef CONFIG_XPS - struct xps_dev_maps __rcu *xps_cpus_map; - struct xps_dev_maps __rcu *xps_rxqs_map; + struct xps_dev_maps __rcu *xps_maps[XPS_MAPS_MAX]; #endif #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; @@@ -2074,7 -2093,12 +2095,12 @@@ u32 proto_down_reason;
struct list_head todo_list; + + #ifdef CONFIG_PCPU_DEV_REFCNT int __percpu *pcpu_refcnt; + #else + refcount_t dev_refcnt; + #endif
struct list_head link_watch_list;
@@@ -3423,6 -3447,24 +3449,24 @@@ netif_xmit_frozen_or_drv_stopped(const return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN; }
+ /** + * netdev_queue_set_dql_min_limit - set dql minimum limit + * @dev_queue: pointer to transmit queue + * @min_limit: dql minimum limit + * + * Forces xmit_more() to return true until the minimum threshold + * defined by @min_limit is reached (or until the tx queue is + * empty). Warning: to be use with care, misuse will impact the + * latency. + */ + static inline void netdev_queue_set_dql_min_limit(struct netdev_queue *dev_queue, + unsigned int min_limit) + { + #ifdef CONFIG_BQL + dev_queue->dql.min_limit = min_limit; + #endif + } + /** * netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write * @dev_queue: pointer to transmit queue @@@ -3688,7 -3730,7 +3732,7 @@@ static inline void netif_wake_subqueue( int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index); int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map); + u16 index, enum xps_map_type type);
/** * netif_attr_test_mask - Test a CPU or Rx queue set in a mask @@@ -3783,7 -3825,7 +3827,7 @@@ static inline int netif_set_xps_queue(s
static inline int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map) + u16 index, enum xps_map_type type) { return 0; } @@@ -4026,7 -4068,11 +4070,11 @@@ void netdev_run_todo(void) */ static inline void dev_put(struct net_device *dev) { + #ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_dec(*dev->pcpu_refcnt); + #else + refcount_dec(&dev->dev_refcnt); + #endif }
/** @@@ -4037,7 -4083,11 +4085,11 @@@ */ static inline void dev_hold(struct net_device *dev) { + #ifdef CONFIG_PCPU_DEV_REFCNT this_cpu_inc(*dev->pcpu_refcnt); + #else + refcount_inc(&dev->dev_refcnt); + #endif }
/* Carrier loss detection, dial on demand. The functions netif_carrier_on @@@ -4172,7 -4222,7 +4224,7 @@@ static inline bool netif_oper_up(const * * Check if device has not been removed from system. */ - static inline bool netif_device_present(struct net_device *dev) + static inline bool netif_device_present(const struct net_device *dev) { return test_bit(__LINK_STATE_PRESENT, &dev->state); } @@@ -5287,6 -5337,9 +5339,9 @@@ do { #define PTYPE_HASH_SIZE (16) #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
+ extern struct list_head ptype_all __read_mostly; + extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; + extern struct net_device *blackhole_netdev;
#endif /* _LINUX_NETDEVICE_H */ diff --combined include/linux/skbuff.h index f2c9ee71cb2c,ecc029674ae4..c8def85fcc22 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@@ -285,7 -285,6 +285,7 @@@ struct nf_bridge_info struct tc_skb_ext { __u32 chain; __u16 mru; + bool post_ct; }; #endif
@@@ -657,6 -656,7 +657,7 @@@ typedef unsigned char *sk_buff_data_t * @protocol: Packet protocol from driver * @destructor: Destruct function * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) + * @_sk_redir: socket redirection information for skmsg * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on @@@ -756,6 -756,9 +757,9 @@@ struct sk_buff void (*destructor)(struct sk_buff *skb); }; struct list_head tcp_tsorted_anchor; + #ifdef CONFIG_NET_SOCK_MSG + unsigned long _sk_redir; + #endif };
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@@ -1137,7 -1140,7 +1141,7 @@@ static inline bool skb_fclone_busy(cons
return skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) > 1 && - fclones->skb2.sk == sk; + READ_ONCE(fclones->skb2.sk) == sk; }
/** @@@ -1289,10 -1292,10 +1293,10 @@@ __skb_set_sw_hash(struct sk_buff *skb, void __skb_get_hash(struct sk_buff *skb); u32 __skb_get_hash_symmetric(const struct sk_buff *skb); u32 skb_get_poff(const struct sk_buff *skb); - u32 __skb_get_poff(const struct sk_buff *skb, void *data, + u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen); __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen_proto); + const void *data, int hlen_proto);
static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto) @@@ -1311,9 -1314,8 +1315,8 @@@ bool bpf_flow_dissect(struct bpf_prog * bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, - void *data, __be16 proto, int nhoff, int hlen, - unsigned int flags); + void *target_container, const void *data, + __be16 proto, int nhoff, int hlen, unsigned int flags);
static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, @@@ -1335,9 -1337,9 +1338,9 @@@ static inline bool skb_flow_dissect_flo static inline bool skb_flow_dissect_flow_keys_basic(const struct net *net, const struct sk_buff *skb, - struct flow_keys_basic *flow, void *data, - __be16 proto, int nhoff, int hlen, - unsigned int flags) + struct flow_keys_basic *flow, + const void *data, __be16 proto, + int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, @@@ -3675,14 -3677,13 +3678,13 @@@ __wsum skb_checksum(const struct sk_buf __wsum csum);
static inline void * __must_check - __skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *data, int hlen, void *buffer) + __skb_header_pointer(const struct sk_buff *skb, int offset, int len, + const void *data, int hlen, void *buffer) { - if (hlen - offset >= len) - return data + offset; + if (likely(hlen - offset >= len)) + return (void *)data + offset;
- if (!skb || - skb_copy_bits(skb, offset, buffer, len) < 0) + if (!skb || unlikely(skb_copy_bits(skb, offset, buffer, len) < 0)) return NULL;
return buffer; diff --combined include/net/netfilter/nf_tables.h index 5aaced6bf13e,67bc36f7f4fb..0cef5ad9768a --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@@ -1498,13 -1498,16 +1498,16 @@@ struct nft_trans_chain
struct nft_trans_table { bool update; - bool enable; + u8 state; + u32 flags; };
#define nft_trans_table_update(trans) \ (((struct nft_trans_table *)trans->data)->update) - #define nft_trans_table_enable(trans) \ - (((struct nft_trans_table *)trans->data)->enable) + #define nft_trans_table_state(trans) \ + (((struct nft_trans_table *)trans->data)->state) + #define nft_trans_table_flags(trans) \ + (((struct nft_trans_table *)trans->data)->flags)
struct nft_trans_elem { struct nft_set *set; @@@ -1536,7 -1539,6 +1539,7 @@@ struct nft_trans_flowtable struct nft_flowtable *flowtable; bool update; struct list_head hook_list; + u32 flags; };
#define nft_trans_flowtable(trans) \ @@@ -1545,8 -1547,6 +1548,8 @@@ (((struct nft_trans_flowtable *)trans->data)->update) #define nft_trans_flowtable_hooks(trans) \ (((struct nft_trans_flowtable *)trans->data)->hook_list) +#define nft_trans_flowtable_flags(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flags)
int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); diff --combined include/net/nexthop.h index a10a319d7eb2,ba94868a21d5..28145f714801 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@@ -40,6 -40,12 +40,12 @@@ struct nh_config
struct nlattr *nh_grp; u16 nh_grp_type; + u16 nh_grp_res_num_buckets; + unsigned long nh_grp_res_idle_timer; + unsigned long nh_grp_res_unbalanced_timer; + bool nh_grp_res_has_num_buckets; + bool nh_grp_res_has_idle_timer; + bool nh_grp_res_has_unbalanced_timer;
struct nlattr *nh_encap; u16 nh_encap_type; @@@ -63,6 -69,32 +69,32 @@@ struct nh_info }; };
+ struct nh_res_bucket { + struct nh_grp_entry __rcu *nh_entry; + atomic_long_t used_time; + unsigned long migrated_time; + bool occupied; + u8 nh_flags; + }; + + struct nh_res_table { + struct net *net; + u32 nhg_id; + struct delayed_work upkeep_dw; + + /* List of NHGEs that have too few buckets ("uw" for underweight). + * Reclaimed buckets will be given to entries in this list. + */ + struct list_head uw_nh_entries; + unsigned long unbalanced_since; + + u32 idle_timer; + u32 unbalanced_timer; + + u16 num_nh_buckets; + struct nh_res_bucket nh_buckets[]; + }; + struct nh_grp_entry { struct nexthop *nh; u8 weight; @@@ -71,6 -103,13 +103,13 @@@ struct { atomic_t upper_bound; } mpath; + struct { + /* Member on uw_nh_entries. */ + struct list_head uw_nh_entry; + + u16 count_buckets; + u16 wants_buckets; + } res; };
struct list_head nh_list; @@@ -80,9 -119,13 +119,13 @@@ struct nh_group { struct nh_group *spare; /* spare group for removals */ u16 num_nh; + bool is_multipath; bool mpath; + bool resilient; bool fdb_nh; bool has_v4; + + struct nh_res_table __rcu *res_table; struct nh_grp_entry nh_entries[]; };
@@@ -112,11 -155,15 +155,15 @@@ struct nexthop enum nexthop_event_type { NEXTHOP_EVENT_DEL, NEXTHOP_EVENT_REPLACE, + NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE, + NEXTHOP_EVENT_BUCKET_REPLACE, };
enum nh_notifier_info_type { NH_NOTIFIER_INFO_TYPE_SINGLE, NH_NOTIFIER_INFO_TYPE_GRP, + NH_NOTIFIER_INFO_TYPE_RES_TABLE, + NH_NOTIFIER_INFO_TYPE_RES_BUCKET, };
struct nh_notifier_single_info { @@@ -143,6 -190,19 +190,19 @@@ struct nh_notifier_grp_info struct nh_notifier_grp_entry_info nh_entries[]; };
+ struct nh_notifier_res_bucket_info { + u16 bucket_index; + unsigned int idle_timer_ms; + bool force; + struct nh_notifier_single_info old_nh; + struct nh_notifier_single_info new_nh; + }; + + struct nh_notifier_res_table_info { + u16 num_nh_buckets; + struct nh_notifier_single_info nhs[]; + }; + struct nh_notifier_info { struct net *net; struct netlink_ext_ack *extack; @@@ -151,6 -211,8 +211,8 @@@ union { struct nh_notifier_single_info *nh; struct nh_notifier_grp_info *nh_grp; + struct nh_notifier_res_table_info *nh_res_table; + struct nh_notifier_res_bucket_info *nh_res_bucket; }; };
@@@ -158,6 -220,10 +220,10 @@@ int register_nexthop_notifier(struct ne struct netlink_ext_ack *extack); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); + void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, + bool offload, bool trap); + void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, + unsigned long *activity);
/* caller is holding rcu or rtnl; no reference taken to nexthop */ struct nexthop *nexthop_find_by_id(struct net *net, u32 id); @@@ -212,7 -278,7 +278,7 @@@ static inline bool nexthop_is_multipath struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp); - return nh_grp->mpath; + return nh_grp->is_multipath; } return false; } @@@ -227,7 -293,7 +293,7 @@@ static inline unsigned int nexthop_num_ struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp); - if (nh_grp->mpath) + if (nh_grp->is_multipath) rc = nh_grp->num_nh; }
@@@ -308,7 -374,7 +374,7 @@@ struct fib_nh_common *nexthop_fib_nhc(s struct nh_group *nh_grp;
nh_grp = rcu_dereference_rtnl(nh->nh_grp); - if (nh_grp->mpath) { + if (nh_grp->is_multipath) { nh = nexthop_mpath_select(nh_grp, nhsel); if (!nh) return NULL; @@@ -410,7 -476,6 +476,7 @@@ static inline struct fib_nh *fib_info_n int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack);
+/* Caller should either hold rcu_read_lock(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) { struct nh_info *nhi; @@@ -431,29 -496,6 +497,29 @@@ return NULL; }
+/* Variant of nexthop_fib6_nh(). + * Caller should either hold rcu_read_lock_bh(), or RTNL. + */ +static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh) +{ + struct nh_info *nhi; + + if (nh->is_group) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp); + nh = nexthop_mpath_select(nh_grp, 0); + if (!nh) + return NULL; + } + + nhi = rcu_dereference_bh_rtnl(nh->nh_info); + if (nhi->family == AF_INET6) + return &nhi->fib6_nh; + + return NULL; +} + static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) { struct fib6_nh *fib6_nh; diff --combined include/uapi/linux/bpf.h index 4ba4ef0ff63a,2d3036e292a9..008edc1dc8c1 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@@ -93,7 -93,717 +93,717 @@@ union bpf_iter_link_info } map; };
- /* BPF syscall commands, see bpf(2) man-page for details. */ + /* BPF syscall commands, see bpf(2) man-page for more details. */ + /** + * DOC: eBPF Syscall Preamble + * + * The operation to be performed by the **bpf**\ () system call is determined + * by the *cmd* argument. Each operation takes an accompanying argument, + * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see + * below). The size argument is the size of the union pointed to by *attr*. + */ + /** + * DOC: eBPF Syscall Commands + * + * BPF_MAP_CREATE + * Description + * Create a map and return a file descriptor that refers to the + * map. The close-on-exec file descriptor flag (see **fcntl**\ (2)) + * is automatically enabled for the new file descriptor. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_MAP_CREATE** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_LOOKUP_ELEM + * Description + * Look up an element with a given *key* in the map referred to + * by the file descriptor *map_fd*. + * + * The *flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_ELEM + * Description + * Create or update an element (key/value pair) in a specified map. + * + * The *flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create a new element or update an existing element. + * **BPF_NOEXIST** + * Create a new element only if it did not exist. + * **BPF_EXIST** + * Update an existing element. + * **BPF_F_LOCK** + * Update a spin_lock-ed map element. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, + * **E2BIG**, **EEXIST**, or **ENOENT**. + * + * **E2BIG** + * The number of elements in the map reached the + * *max_entries* limit specified at map creation time. + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_ELEM + * Description + * Look up and delete an element by key in a specified map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_GET_NEXT_KEY + * Description + * Look up an element by key in a specified map and return the key + * of the next element. Can be used to iterate over all elements + * in the map. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * The following cases can be used to iterate over all elements of + * the map: + * + * * If *key* is not found, the operation returns zero and sets + * the *next_key* pointer to the key of the first element. + * * If *key* is found, the operation returns zero and sets the + * *next_key* pointer to the key of the next element. + * * If *key* is the last element, returns -1 and *errno* is set + * to **ENOENT**. + * + * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or + * **EINVAL** on error. + * + * BPF_PROG_LOAD + * Description + * Verify and load an eBPF program, returning a new file + * descriptor associated with the program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES). + * + * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is + * automatically enabled for the new file descriptor. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_PIN + * Description + * Pin an eBPF program or map referred by the specified *bpf_fd* + * to the provided *pathname* on the filesystem. + * + * The *pathname* argument must not contain a dot ("."). + * + * On success, *pathname* retains a reference to the eBPF object, + * preventing deallocation of the object when the original + * *bpf_fd* is closed. This allow the eBPF object to live beyond + * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent + * process. + * + * Applying **unlink**\ (2) or similar calls to the *pathname* + * unpins the object from the filesystem, removing the reference. + * If no other file descriptors or filesystem nodes refer to the + * same object, it will be deallocated (see NOTES). + * + * The filesystem type for the parent directory of *pathname* must + * be **BPF_FS_MAGIC**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_OBJ_GET + * Description + * Open a file descriptor for the eBPF object pinned to the + * specified *pathname*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_PROG_ATTACH + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook. + * + * The *attach_type* specifies the eBPF attachment point to + * attach the program to, and must be one of *bpf_attach_type* + * (see below). + * + * The *attach_bpf_fd* must be a valid file descriptor for a + * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap + * or sock_ops type corresponding to the specified *attach_type*. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_TYPE_SK_SKB**, + * **BPF_PROG_TYPE_SK_MSG** + * + * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_DETACH + * Description + * Detach the eBPF program associated with the *target_fd* at the + * hook specified by *attach_type*. The program must have been + * previously attached using **BPF_PROG_ATTACH**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_TEST_RUN + * Description + * Run the eBPF program associated with the *prog_fd* a *repeat* + * number of times against a provided program context *ctx_in* and + * data *data_in*, and return the modified program context + * *ctx_out*, *data_out* (for example, packet data), result of the + * execution *retval*, and *duration* of the test run. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * **ENOSPC** + * Either *data_size_out* or *ctx_size_out* is too small. + * **ENOTSUPP** + * This command is not supported by the program type of + * the program referred to by *prog_fd*. + * + * BPF_PROG_GET_NEXT_ID + * Description + * Fetch the next eBPF program currently loaded into the kernel. + * + * Looks for the eBPF program with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF programs + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_GET_NEXT_ID + * Description + * Fetch the next eBPF map currently loaded into the kernel. + * + * Looks for the eBPF map with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF maps + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_PROG_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF program corresponding to + * *prog_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_MAP_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF map corresponding to + * *map_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_OBJ_GET_INFO_BY_FD + * Description + * Obtain information about the eBPF object corresponding to + * *bpf_fd*. + * + * Populates up to *info_len* bytes of *info*, which will be in + * one of the following formats depending on the eBPF object type + * of *bpf_fd*: + * + * * **struct bpf_prog_info** + * * **struct bpf_map_info** + * * **struct bpf_btf_info** + * * **struct bpf_link_info** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_QUERY + * Description + * Obtain information about eBPF programs associated with the + * specified *attach_type* hook. + * + * The *target_fd* must be a valid file descriptor for a kernel + * object which depends on the attach type of *attach_bpf_fd*: + * + * **BPF_PROG_TYPE_CGROUP_DEVICE**, + * **BPF_PROG_TYPE_CGROUP_SKB**, + * **BPF_PROG_TYPE_CGROUP_SOCK**, + * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**, + * **BPF_PROG_TYPE_CGROUP_SOCKOPT**, + * **BPF_PROG_TYPE_CGROUP_SYSCTL**, + * **BPF_PROG_TYPE_SOCK_OPS** + * + * Control Group v2 hierarchy with the eBPF controller + * enabled. Requires the kernel to be compiled with + * **CONFIG_CGROUP_BPF**. + * + * **BPF_PROG_TYPE_FLOW_DISSECTOR** + * + * Network namespace (eg /proc/self/ns/net). + * + * **BPF_PROG_TYPE_LIRC_MODE2** + * + * LIRC device path (eg /dev/lircN). Requires the kernel + * to be compiled with **CONFIG_BPF_LIRC_MODE2**. + * + * **BPF_PROG_QUERY** always fetches the number of programs + * attached and the *attach_flags* which were used to attach those + * programs. Additionally, if *prog_ids* is nonzero and the number + * of attached programs is less than *prog_cnt*, populates + * *prog_ids* with the eBPF program ids of the programs attached + * at *target_fd*. + * + * The following flags may alter the result: + * + * **BPF_F_QUERY_EFFECTIVE** + * Only return information regarding programs which are + * currently effective at the specified *target_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_RAW_TRACEPOINT_OPEN + * Description + * Attach an eBPF program to a tracepoint *name* to access kernel + * internal arguments of the tracepoint in their raw form. + * + * The *prog_fd* must be a valid file descriptor associated with + * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**. + * + * No ABI guarantees are made about the content of tracepoint + * arguments exposed to the corresponding eBPF program. + * + * Applying **close**\ (2) to the file descriptor returned by + * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES). + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_LOAD + * Description + * Verify and load BPF Type Format (BTF) metadata into the kernel, + * returning a new file descriptor associated with the metadata. + * BTF is described in more detail at + * https://www.kernel.org/doc/html/latest/bpf/btf.html. + * + * The *btf* parameter must point to valid memory providing + * *btf_size* bytes of BTF binary metadata. + * + * The returned file descriptor can be passed to other **bpf**\ () + * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to + * associate the BTF with those objects. + * + * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional + * parameters to specify a *btf_log_buf*, *btf_log_size* and + * *btf_log_level* which allow the kernel to return freeform log + * output regarding the BTF verification process. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_BTF_GET_FD_BY_ID + * Description + * Open a file descriptor for the BPF Type Format (BTF) + * corresponding to *btf_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_TASK_FD_QUERY + * Description + * Obtain information about eBPF programs associated with the + * target process identified by *pid* and *fd*. + * + * If the *pid* and *fd* are associated with a tracepoint, kprobe + * or uprobe perf event, then the *prog_id* and *fd_type* will + * be populated with the eBPF program id and file descriptor type + * of type **bpf_task_fd_type**. If associated with a kprobe or + * uprobe, the *probe_offset* and *probe_addr* will also be + * populated. Optionally, if *buf* is provided, then up to + * *buf_len* bytes of *buf* will be populated with the name of + * the tracepoint, kprobe or uprobe. + * + * The resulting *prog_id* may be introspected in deeper detail + * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_LOOKUP_AND_DELETE_ELEM + * Description + * Look up an element with the given *key* in the map referred to + * by the file descriptor *fd*, and if found, delete the element. + * + * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types + * implement this command as a "pop" operation, deleting the top + * element rather than one corresponding to *key*. + * The *key* and *key_len* parameters should be zeroed when + * issuing this operation for these map types. + * + * This command is only valid for the following map types: + * * **BPF_MAP_TYPE_QUEUE** + * * **BPF_MAP_TYPE_STACK** + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_FREEZE + * Description + * Freeze the permissions of the specified map. + * + * Write permissions may be frozen by passing zero *flags*. + * Upon success, no future syscall invocations may alter the + * map state of *map_fd*. Write operations from eBPF programs + * are still possible for a frozen map. + * + * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_BTF_GET_NEXT_ID + * Description + * Fetch the next BPF Type Format (BTF) object currently loaded + * into the kernel. + * + * Looks for the BTF object with an id greater than *start_id* + * and updates *next_id* on success. If no other BTF objects + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_MAP_LOOKUP_BATCH + * Description + * Iterate and fetch multiple elements in a map. + * + * Two opaque values are used to manage batch operations, + * *in_batch* and *out_batch*. Initially, *in_batch* must be set + * to NULL to begin the batched operation. After each subsequent + * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant + * *out_batch* as the *in_batch* for the next operation to + * continue iteration from the current point. + * + * The *keys* and *values* are output parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are copied into the + * user buffer, with the keys copied into *keys* and the values + * copied into the corresponding indices in *values*. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **ENOSPC** to indicate that *keys* or + * *values* is too small to dump an entire bucket during + * iteration of a hash-based map type. + * + * BPF_MAP_LOOKUP_AND_DELETE_BATCH + * Description + * Iterate and delete all elements in a map. + * + * This operation has the same behavior as + * **BPF_MAP_LOOKUP_BATCH** with two exceptions: + * + * * Every element that is successfully returned is also deleted + * from the map. This is at least *count* elements. Note that + * *count* is both an input and an output parameter. + * * Upon returning with *errno* set to **EFAULT**, up to + * *count* elements may be deleted without returning the keys + * and values of the deleted elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_MAP_UPDATE_BATCH + * Description + * Update multiple elements in a map by *key*. + * + * The *keys* and *values* are input parameters which must point + * to memory large enough to hold *count* items based on the key + * and value size of the map *map_fd*. The *keys* buffer must be + * of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * Each element specified in *keys* is sequentially updated to the + * value in the corresponding index in *values*. The *in_batch* + * and *out_batch* parameters are ignored and should be zeroed. + * + * The *elem_flags* argument should be specified as one of the + * following: + * + * **BPF_ANY** + * Create new elements or update a existing elements. + * **BPF_NOEXIST** + * Create new elements only if they do not exist. + * **BPF_EXIST** + * Update existing elements. + * **BPF_F_LOCK** + * Update spin_lock-ed map elements. This must be + * specified if the map value contains a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or + * **E2BIG**. **E2BIG** indicates that the number of elements in + * the map reached the *max_entries* limit specified at map + * creation time. + * + * May set *errno* to one of the following error codes under + * specific circumstances: + * + * **EEXIST** + * If *flags* specifies **BPF_NOEXIST** and the element + * with *key* already exists in the map. + * **ENOENT** + * If *flags* specifies **BPF_EXIST** and the element with + * *key* does not exist in the map. + * + * BPF_MAP_DELETE_BATCH + * Description + * Delete multiple elements in a map by *key*. + * + * The *keys* parameter is an input parameter which must point + * to memory large enough to hold *count* items based on the key + * size of the map *map_fd*, that is, *key_size* * *count*. + * + * Each element specified in *keys* is sequentially deleted. The + * *in_batch*, *out_batch*, and *values* parameters are ignored + * and should be zeroed. + * + * The *elem_flags* argument may be specified as one of the + * following: + * + * **BPF_F_LOCK** + * Look up the value of a spin-locked map without + * returning the lock. This must be specified if the + * elements contain a spinlock. + * + * On success, *count* elements from the map are updated. + * + * If an error is returned and *errno* is not **EFAULT**, *count* + * is set to the number of successfully processed elements. If + * *errno* is **EFAULT**, up to *count* elements may be been + * deleted. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_CREATE + * Description + * Attach an eBPF program to a *target_fd* at the specified + * *attach_type* hook and return a file descriptor handle for + * managing the link. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_UPDATE + * Description + * Update the eBPF program in the specified *link_fd* to + * *new_prog_fd*. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_LINK_GET_FD_BY_ID + * Description + * Open a file descriptor for the eBPF Link corresponding to + * *link_id*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_GET_NEXT_ID + * Description + * Fetch the next eBPF link currently loaded into the kernel. + * + * Looks for the eBPF link with an id greater than *start_id* + * and updates *next_id* on success. If no other eBPF links + * remain with ids higher than *start_id*, returns -1 and sets + * *errno* to **ENOENT**. + * + * Return + * Returns zero on success. On error, or when no id remains, -1 + * is returned and *errno* is set appropriately. + * + * BPF_ENABLE_STATS + * Description + * Enable eBPF runtime statistics gathering. + * + * Runtime statistics gathering for the eBPF runtime is disabled + * by default to minimize the corresponding performance overhead. + * This command enables statistics globally. + * + * Multiple programs may independently enable statistics. + * After gathering the desired statistics, eBPF runtime statistics + * may be disabled again by calling **close**\ (2) for the file + * descriptor returned by this function. Statistics will only be + * disabled system-wide when all outstanding file descriptors + * returned by prior calls for this subcommand are closed. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_ITER_CREATE + * Description + * Create an iterator on top of the specified *link_fd* (as + * previously created using **BPF_LINK_CREATE**) and return a + * file descriptor that can be used to trigger the iteration. + * + * If the resulting file descriptor is pinned to the filesystem + * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls + * for that path will trigger the iterator to read kernel state + * using the eBPF program attached to *link_fd*. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * + * BPF_LINK_DETACH + * Description + * Forcefully detach the specified *link_fd* from its + * corresponding attachment point. + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * BPF_PROG_BIND_MAP + * Description + * Bind a map to the lifetime of an eBPF program. + * + * The map identified by *map_fd* is bound to the program + * identified by *prog_fd* and only released when *prog_fd* is + * released. This may be used in cases where metadata should be + * associated with a program which otherwise does not contain any + * references to the map (for example, embedded in the eBPF + * program instructions). + * + * Return + * Returns zero on success. On error, -1 is returned and *errno* + * is set appropriately. + * + * NOTES + * eBPF objects (maps and programs) can be shared between processes. + * + * * After **fork**\ (2), the child inherits file descriptors + * referring to the same eBPF objects. + * * File descriptors referring to eBPF objects can be transferred over + * **unix**\ (7) domain sockets. + * * File descriptors referring to eBPF objects can be duplicated in the + * usual way, using **dup**\ (2) and similar calls. + * * File descriptors referring to eBPF objects can be pinned to the + * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2). + * + * An eBPF object is deallocated only after all file descriptors referring + * to the object have been closed and no references remain pinned to the + * filesystem or attached (for example, bound to a program or device). + */ enum bpf_cmd { BPF_MAP_CREATE, BPF_MAP_LOOKUP_ELEM, @@@ -393,6 -1103,15 +1103,15 @@@ enum bpf_link_type * is struct/union. */ #define BPF_PSEUDO_BTF_ID 3 + /* insn[0].src_reg: BPF_PSEUDO_FUNC + * insn[0].imm: insn offset to the func + * insn[1].imm: 0 + * insn[0].off: 0 + * insn[1].off: 0 + * ldimm64 rewrite: address of the function + * verifier type: PTR_TO_FUNC. + */ + #define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative * offset to another bpf function @@@ -720,7 -1439,7 +1439,7 @@@ union bpf_attr * parsed and used to produce a manual page. The workflow is the following, * and requires the rst2man utility: * - * $ ./scripts/bpf_helpers_doc.py \ + * $ ./scripts/bpf_doc.py \ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7 * $ man /tmp/bpf-helpers.7 @@@ -1765,6 -2484,10 +2484,10 @@@ * Use with ENCAP_L3/L4 flags to further specify the tunnel * type; *len* is the length of the inner MAC header. * + * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**: + * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the + * L2 type as Ethernet. + * * A call to this helper is susceptible to change the underlying * packet buffer. Therefore, at load time, all checks on pointers * previously done by the verifier are invalidated and must be @@@ -3850,7 -4573,7 +4573,7 @@@ * * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags) * Description - * Check ctx packet size against exceeding MTU of net device (based + * Check packet size against exceeding MTU of net device (based * on *ifindex*). This helper will likely be used in combination * with helpers that adjust/change the packet size. * @@@ -3867,14 -4590,6 +4590,14 @@@ * against the current net device. This is practical if this isn't * used prior to redirect. * + * On input *mtu_len* must be a valid pointer, else verifier will + * reject BPF program. If the value *mtu_len* is initialized to + * zero then the ctx packet size is use. When value *mtu_len* is + * provided as input this specify the L3 length that the MTU check + * is done against. Remember XDP and TC length operate at L2, but + * this value is L3 as this correlate to MTU and IP-header tot_len + * values which are L3 (similar behavior as bpf_fib_lookup). + * * The Linux kernel route table can configure MTUs on a more * specific per route level, which is not provided by this helper. * For route level MTU checks use the **bpf_fib_lookup**\ () @@@ -3899,9 -4614,11 +4622,9 @@@ * * On return *mtu_len* pointer contains the MTU value of the net * device. Remember the net device configured MTU is the L3 size, - * which is returned here and XDP and TX length operate at L2. + * which is returned here and XDP and TC length operate at L2. * Helper take this into account for you, but remember when using - * MTU value in your BPF-code. On input *mtu_len* must be a valid - * pointer and be initialized (to zero), else verifier will reject - * BPF program. + * MTU value in your BPF-code. * * Return * * 0 on success, and populate MTU value in *mtu_len* pointer. @@@ -3915,6 -4632,34 +4638,34 @@@ * * **BPF_MTU_CHK_RET_FRAG_NEEDED** * * **BPF_MTU_CHK_RET_SEGS_TOOBIG** * + * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags) + * Description + * For each element in **map**, call **callback_fn** function with + * **map**, **callback_ctx** and other map-specific parameters. + * The **callback_fn** should be a static function and + * the **callback_ctx** should be a pointer to the stack. + * The **flags** is used to control certain aspects of the helper. + * Currently, the **flags** must be 0. + * + * The following are a list of supported map types and their + * respective expected callback signatures: + * + * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH, + * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH, + * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY + * + * long (*callback_fn)(struct bpf_map *map, const void *key, void *value, void *ctx); + * + * For per_cpu maps, the map_value is the value on the cpu where the + * bpf_prog is running. + * + * If **callback_fn** return 0, the helper will continue to the next + * element. If return value is 1, the helper will skip the rest of + * elements and return. Other return values are not used now. + * + * Return + * The number of traversed map elements for success, **-EINVAL** for + * invalid **flags**. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@@ -4081,6 -4826,7 +4832,7 @@@ FN(ima_inode_hash), \ FN(sock_from_file), \ FN(check_mtu), \ + FN(for_each_map_elem), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper @@@ -4174,6 -4920,7 +4926,7 @@@ enum BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5), + BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6), };
enum { @@@ -5211,7 -5958,10 +5964,10 @@@ struct bpf_pidns_info
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */ struct bpf_sk_lookup { - __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + union { + __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */ + __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */ + };
__u32 family; /* Protocol family (AF_INET, AF_INET6) */ __u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */ diff --combined init/Kconfig index 5f5c776ef192,2c9cbd8e368c..5deae45b8d81 --- a/init/Kconfig +++ b/init/Kconfig @@@ -20,10 -20,10 +20,10 @@@ config CC_VERSION_TEX When the compiler is updated, Kconfig will be invoked.
- Ensure full rebuild when the compiler is updated - include/linux/kconfig.h contains this option in the comment line so - fixdep adds include/config/cc/version/text.h into the auto-generated - dependency. When the compiler is updated, syncconfig will touch it - and then every file will be rebuilt. + include/linux/compiler-version.h contains this option in the comment + line so fixdep adds include/config/cc/version/text.h into the + auto-generated dependency. When the compiler is updated, syncconfig + will touch it and then every file will be rebuilt.
config CC_IS_GCC def_bool $(success,test "$(cc-name)" = GCC) @@@ -119,7 -119,8 +119,7 @@@ config INIT_ENV_ARG_LIMI
config COMPILE_TEST bool "Compile also drivers which will not load" - depends on !UML && !S390 - default n + depends on HAS_IOMEM help Some drivers can be compiled on a different platform than they are intended to be run on. Despite they cannot be loaded there (or even @@@ -1708,6 -1709,7 +1708,7 @@@ config BPF_SYSCAL select BPF select IRQ_WORK select TASKS_TRACE_RCU + select NET_SOCK_MSG if INET default n help Enable the bpf() system call that allows to manipulate eBPF diff --combined kernel/bpf/bpf_inode_storage.c index b58b2efb9b43,da753721457c..2921ca39a93e --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@@ -109,7 -109,7 +109,7 @@@ static void *bpf_fd_inode_storage_looku fd = *(int *)key; f = fget_raw(fd); if (!f) - return NULL; + return ERR_PTR(-EBADF);
sdata = inode_storage_lookup(f->f_inode, map, true); fput(f); @@@ -237,7 -237,7 +237,7 @@@ static void inode_storage_map_free(stru
smap = (struct bpf_local_storage_map *)map; bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx); - bpf_local_storage_map_free(smap); + bpf_local_storage_map_free(smap, NULL); }
static int inode_storage_map_btf_id; diff --combined kernel/bpf/verifier.c index 44e4ec1640f1,f9096b049cd6..999bf36ffeb1 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@@ -234,6 -234,12 +234,12 @@@ static bool bpf_pseudo_call(const struc insn->src_reg == BPF_PSEUDO_CALL; }
+ static bool bpf_pseudo_func(const struct bpf_insn *insn) + { + return insn->code == (BPF_LD | BPF_IMM | BPF_DW) && + insn->src_reg == BPF_PSEUDO_FUNC; + } + struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; @@@ -248,6 -254,7 +254,7 @@@ u32 btf_id; struct btf *ret_btf; u32 ret_btf_id; + u32 subprogno; };
struct btf *btf_vmlinux; @@@ -390,6 -397,24 +397,24 @@@ __printf(3, 4) static void verbose_linf env->prev_linfo = linfo; }
+ static void verbose_invalid_scalar(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + struct tnum *range, const char *ctx, + const char *reg_name) + { + char tn_buf[48]; + + verbose(env, "At %s the register %s ", ctx, reg_name); + if (!tnum_is_unknown(reg->var_off)) { + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "has value %s", tn_buf); + } else { + verbose(env, "has unknown scalar value"); + } + tnum_strn(tn_buf, sizeof(tn_buf), *range); + verbose(env, " should have been in %s\n", tn_buf); + } + static bool type_is_pkt_pointer(enum bpf_reg_type type) { return type == PTR_TO_PACKET || @@@ -409,6 -434,7 +434,7 @@@ static bool reg_type_not_null(enum bpf_ return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || + type == PTR_TO_MAP_KEY || type == PTR_TO_SOCK_COMMON; }
@@@ -451,7 -477,8 +477,8 @@@ static bool arg_type_may_be_null(enum b type == ARG_PTR_TO_MEM_OR_NULL || type == ARG_PTR_TO_CTX_OR_NULL || type == ARG_PTR_TO_SOCKET_OR_NULL || - type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; + type == ARG_PTR_TO_ALLOC_MEM_OR_NULL || + type == ARG_PTR_TO_STACK_OR_NULL; }
/* Determine whether the function releases some resources allocated by another @@@ -541,6 -568,8 +568,8 @@@ static const char * const reg_type_str[ [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", [PTR_TO_RDWR_BUF] = "rdwr_buf", [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", + [PTR_TO_FUNC] = "func", + [PTR_TO_MAP_KEY] = "map_key", };
static char slot_type_char[] = { @@@ -612,6 -641,7 +641,7 @@@ static void print_verifier_state(struc if (type_is_pkt_pointer(t)) verbose(env, ",r=%d", reg->range); else if (t == CONST_PTR_TO_MAP || + t == PTR_TO_MAP_KEY || t == PTR_TO_MAP_VALUE || t == PTR_TO_MAP_VALUE_OR_NULL) verbose(env, ",ks=%d,vs=%d", @@@ -1519,7 -1549,7 +1549,7 @@@ static int add_subprog(struct bpf_verif } ret = find_subprog(env, off); if (ret >= 0) - return 0; + return ret; if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { verbose(env, "too many subprograms\n"); return -E2BIG; @@@ -1527,7 -1557,7 +1557,7 @@@ env->subprog_info[env->subprog_cnt++].start = off; sort(env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs, NULL); - return 0; + return env->subprog_cnt - 1; }
static int check_subprogs(struct bpf_verifier_env *env) @@@ -1544,6 -1574,19 +1574,19 @@@
/* determine subprog starts. The end is one before the next starts */ for (i = 0; i < insn_cnt; i++) { + if (bpf_pseudo_func(insn + i)) { + if (!env->bpf_capable) { + verbose(env, + "function pointers are allowed for CAP_BPF and CAP_SYS_ADMIN\n"); + return -EPERM; + } + ret = add_subprog(env, i + insn[i].imm + 1); + if (ret < 0) + return ret; + /* remember subprog */ + insn[i + 1].imm = ret; + continue; + } if (!bpf_pseudo_call(insn + i)) continue; if (!env->bpf_capable) { @@@ -2295,6 -2338,8 +2338,8 @@@ static bool is_spillable_regtype(enum b case PTR_TO_PERCPU_BTF_ID: case PTR_TO_MEM: case PTR_TO_MEM_OR_NULL: + case PTR_TO_FUNC: + case PTR_TO_MAP_KEY: return true; default: return false; @@@ -2899,6 -2944,10 +2944,10 @@@ static int __check_mem_access(struct bp
reg = &cur_regs(env)[regno]; switch (reg->type) { + case PTR_TO_MAP_KEY: + verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n", + mem_size, off, size); + break; case PTR_TO_MAP_VALUE: verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size); @@@ -3304,6 -3353,9 +3353,9 @@@ static int check_ptr_alignment(struct b case PTR_TO_FLOW_KEYS: pointer_desc = "flow keys "; break; + case PTR_TO_MAP_KEY: + pointer_desc = "key "; + break; case PTR_TO_MAP_VALUE: pointer_desc = "value "; break; @@@ -3405,7 -3457,7 +3457,7 @@@ process_func continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { - if (!bpf_pseudo_call(insn + i)) + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) continue; /* remember insn and function to return to */ ret_insn[frame] = i + 1; @@@ -3842,7 -3894,19 +3894,19 @@@ static int check_mem_access(struct bpf_ /* for access checks, reg->off is just part of off */ off += reg->off;
- if (reg->type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_KEY) { + if (t == BPF_WRITE) { + verbose(env, "write to change key R%d not allowed\n", regno); + return -EACCES; + } + + err = check_mem_region_access(env, regno, off, size, + reg->map_ptr->key_size, false); + if (err) + return err; + if (value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_MAP_VALUE) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose(env, "R%d leaks addr into map\n", value_regno); @@@ -4258,6 -4322,9 +4322,9 @@@ static int check_helper_mem_access(stru case PTR_TO_PACKET_META: return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); + case PTR_TO_MAP_KEY: + return check_mem_region_access(env, regno, reg->off, access_size, + reg->map_ptr->key_size, false); case PTR_TO_MAP_VALUE: if (check_map_access_type(env, regno, reg->off, access_size, meta && meta->raw_mode ? BPF_WRITE : @@@ -4474,6 -4541,7 +4541,7 @@@ static const struct bpf_reg_types map_k PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@@ -4505,6 -4573,7 +4573,7 @@@ static const struct bpf_reg_types mem_t PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, PTR_TO_MEM, PTR_TO_RDONLY_BUF, @@@ -4517,6 -4586,7 +4586,7 @@@ static const struct bpf_reg_types int_p PTR_TO_STACK, PTR_TO_PACKET, PTR_TO_PACKET_META, + PTR_TO_MAP_KEY, PTR_TO_MAP_VALUE, }, }; @@@ -4529,6 -4599,8 +4599,8 @@@ static const struct bpf_reg_types const static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } }; + static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } }; + static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, @@@ -4557,6 -4629,8 +4629,8 @@@ [ARG_PTR_TO_INT] = &int_ptr_types, [ARG_PTR_TO_LONG] = &int_ptr_types, [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, + [ARG_PTR_TO_FUNC] = &func_ptr_types, + [ARG_PTR_TO_STACK_OR_NULL] = &stack_ptr_types, };
static int check_reg_type(struct bpf_verifier_env *env, u32 regno, @@@ -4738,6 -4812,8 +4812,8 @@@ skip_type_check verbose(env, "verifier internal error\n"); return -EFAULT; } + } else if (arg_type == ARG_PTR_TO_FUNC) { + meta->subprogno = reg->subprogno; } else if (arg_type_is_mem_ptr(arg_type)) { /* The access to this pointer is only checked when we hit the * next is_mem_size argument below. @@@ -5258,13 -5334,19 +5334,19 @@@ static void clear_caller_saved_regs(str } }
- static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, - int *insn_idx) + typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx); + + static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx, int subprog, + set_callee_state_fn set_callee_state_cb) { struct bpf_verifier_state *state = env->cur_state; struct bpf_func_info_aux *func_info_aux; struct bpf_func_state *caller, *callee; - int i, err, subprog, target_insn; + int err; bool is_global = false;
if (state->curframe + 1 >= MAX_CALL_FRAMES) { @@@ -5273,14 -5355,6 +5355,6 @@@ return -E2BIG; }
- target_insn = *insn_idx + insn->imm; - subprog = find_subprog(env, target_insn + 1); - if (subprog < 0) { - verbose(env, "verifier bug. No program starts at insn %d\n", - target_insn + 1); - return -EFAULT; - } - caller = state->frame[state->curframe]; if (state->frame[state->curframe + 1]) { verbose(env, "verifier bug. Frame %d already allocated\n", @@@ -5335,11 -5409,9 +5409,9 @@@ if (err) return err;
- /* copy r1 - r5 args that callee can access. The copy includes parent - * pointers, which connects us up to the liveness chain - */ - for (i = BPF_REG_1; i <= BPF_REG_5; i++) - callee->regs[i] = caller->regs[i]; + err = set_callee_state_cb(env, caller, callee, *insn_idx); + if (err) + return err;
clear_caller_saved_regs(env, caller->regs);
@@@ -5347,7 -5419,7 +5419,7 @@@ state->curframe++;
/* and go analyze first insn of the callee */ - *insn_idx = target_insn; + *insn_idx = env->subprog_info[subprog].start - 1;
if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); @@@ -5358,6 -5430,92 +5430,92 @@@ return 0; }
+ int map_set_for_each_callback_args(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee) + { + /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, + * void *callback_ctx, u64 flags); + * callback_fn(struct bpf_map *map, void *key, void *value, + * void *callback_ctx); + */ + callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1]; + + callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY; + __mark_reg_known_zero(&callee->regs[BPF_REG_2]); + callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr; + + callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE; + __mark_reg_known_zero(&callee->regs[BPF_REG_3]); + callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr; + + /* pointer to stack or null */ + callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3]; + + /* unused */ + __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); + return 0; + } + + static int set_callee_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, int insn_idx) + { + int i; + + /* copy r1 - r5 args that callee can access. The copy includes parent + * pointers, which connects us up to the liveness chain + */ + for (i = BPF_REG_1; i <= BPF_REG_5; i++) + callee->regs[i] = caller->regs[i]; + return 0; + } + + static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx) + { + int subprog, target_insn; + + target_insn = *insn_idx + insn->imm + 1; + subprog = find_subprog(env, target_insn); + if (subprog < 0) { + verbose(env, "verifier bug. No program starts at insn %d\n", + target_insn); + return -EFAULT; + } + + return __check_func_call(env, insn, insn_idx, subprog, set_callee_state); + } + + static int set_map_elem_callback_state(struct bpf_verifier_env *env, + struct bpf_func_state *caller, + struct bpf_func_state *callee, + int insn_idx) + { + struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx]; + struct bpf_map *map; + int err; + + if (bpf_map_ptr_poisoned(insn_aux)) { + verbose(env, "tail_call abusing map_ptr\n"); + return -EINVAL; + } + + map = BPF_MAP_PTR(insn_aux->map_ptr_state); + if (!map->ops->map_set_for_each_callback_args || + !map->ops->map_for_each_callback) { + verbose(env, "callback function not allowed for map\n"); + return -ENOTSUPP; + } + + err = map->ops->map_set_for_each_callback_args(env, caller, callee); + if (err) + return err; + + callee->in_callback_fn = true; + return 0; + } + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) { struct bpf_verifier_state *state = env->cur_state; @@@ -5380,8 -5538,22 +5538,22 @@@
state->curframe--; caller = state->frame[state->curframe]; - /* return to the caller whatever r0 had in the callee */ - caller->regs[BPF_REG_0] = *r0; + if (callee->in_callback_fn) { + /* enforce R0 return value range [0, 1]. */ + struct tnum range = tnum_range(0, 1); + + if (r0->type != SCALAR_VALUE) { + verbose(env, "R0 not a scalar value\n"); + return -EACCES; + } + if (!tnum_in(range, r0->var_off)) { + verbose_invalid_scalar(env, r0, &range, "callback return", "R0"); + return -EINVAL; + } + } else { + /* return to the caller whatever r0 had in the callee */ + caller->regs[BPF_REG_0] = *r0; + }
/* Transfer references to the caller */ err = transfer_reference_state(caller, callee); @@@ -5436,7 -5608,9 +5608,9 @@@ record_func_map(struct bpf_verifier_en func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_map_push_elem && func_id != BPF_FUNC_map_pop_elem && - func_id != BPF_FUNC_map_peek_elem) + func_id != BPF_FUNC_map_peek_elem && + func_id != BPF_FUNC_for_each_map_elem && + func_id != BPF_FUNC_redirect_map) return 0;
if (map == NULL) { @@@ -5517,15 -5691,18 +5691,18 @@@ static int check_reference_leak(struct return state->acquired_refs ? -EINVAL : 0; }
- static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) + static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx_p) { const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; + int insn_idx = *insn_idx_p; bool changes_data; - int i, err; + int i, err, func_id;
/* find function prototype */ + func_id = insn->imm; if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) { verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id); @@@ -5571,7 -5748,7 +5748,7 @@@
meta.func_id = func_id; /* check args */ - for (i = 0; i < 5; i++) { + for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { err = check_func_arg(env, i, &meta, fn); if (err) return err; @@@ -5621,6 -5798,13 +5798,13 @@@ return -EINVAL; }
+ if (func_id == BPF_FUNC_for_each_map_elem) { + err = __check_func_call(env, insn, insn_idx_p, meta.subprogno, + set_map_elem_callback_state); + if (err < 0) + return -EINVAL; + } + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@@ -5861,14 -6045,10 +6045,14 @@@ static int retrieve_ptr_limit(const str { bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off; + u32 off, max;
switch (ptr_reg->type) { case PTR_TO_STACK: + /* Offset 0 is out-of-bounds, but acceptable start for the + * left direction, see BPF_REG_FP. + */ + max = MAX_BPF_STACK + mask_to_left; /* Indirect variable offset stack access is prohibited in * unprivileged mode so it's not handled here. */ @@@ -5876,17 -6056,29 +6060,30 @@@ if (mask_to_left) *ptr_limit = MAX_BPF_STACK + off; else - *ptr_limit = -off; - return 0; + *ptr_limit = -off - 1; + return *ptr_limit >= max ? -ERANGE : 0; + case PTR_TO_MAP_KEY: + /* Currently, this code is not exercised as the only use + * is bpf_for_each_map_elem() helper which requires + * bpf_capble. The code has been tested manually for + * future use. + */ + if (mask_to_left) { + *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + } else { + off = ptr_reg->smin_value + ptr_reg->off; + *ptr_limit = ptr_reg->map_ptr->key_size - off; + } + return 0; case PTR_TO_MAP_VALUE: + max = ptr_reg->map_ptr->value_size; if (mask_to_left) { *ptr_limit = ptr_reg->umax_value + ptr_reg->off; } else { off = ptr_reg->smin_value + ptr_reg->off; - *ptr_limit = ptr_reg->map_ptr->value_size - off; + *ptr_limit = ptr_reg->map_ptr->value_size - off - 1; } - return 0; + return *ptr_limit >= max ? -ERANGE : 0; default: return -EINVAL; } @@@ -5909,7 -6101,7 +6106,7 @@@ static int update_alu_sanitation_state( aux->alu_limit != alu_limit)) return -EACCES;
- /* Corresponding fixup done in fixup_bpf_calls(). */ + /* Corresponding fixup done in do_misc_fixups(). */ aux->alu_state = alu_state; aux->alu_limit = alu_limit; return 0; @@@ -5939,7 -6131,6 +6136,7 @@@ static int sanitize_ptr_alu(struct bpf_ u32 alu_state, alu_limit; struct bpf_reg_state tmp; bool ret; + int err;
if (can_skip_alu_sanitation(env, insn)) return 0; @@@ -5955,13 -6146,10 +6152,13 @@@ alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
- if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) - return 0; - if (update_alu_sanitation_state(aux, alu_state, alu_limit)) - return -EACCES; + err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg); + if (err < 0) + return err; + + err = update_alu_sanitation_state(aux, alu_state, alu_limit); + if (err < 0) + return err; do_sim: /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of @@@ -6084,6 -6272,7 +6281,7 @@@ static int adjust_ptr_min_max_vals(stru verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str[ptr_reg->type]); return -EACCES; + case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { verbose(env, "R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", @@@ -6112,7 -6301,7 +6310,7 @@@ case BPF_ADD: ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); if (ret < 0) { - verbose(env, "R%d tried to add from different maps or paths\n", dst); + verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); return ret; } /* We can take a fixed offset as long as it doesn't overflow @@@ -6167,7 -6356,7 +6365,7 @@@ case BPF_SUB: ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); if (ret < 0) { - verbose(env, "R%d tried to sub from different maps or paths\n", dst); + verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); return ret; } if (dst_reg == off_reg) { @@@ -8263,6 -8452,24 +8461,24 @@@ static int check_ld_imm(struct bpf_veri return 0; }
+ if (insn->src_reg == BPF_PSEUDO_FUNC) { + struct bpf_prog_aux *aux = env->prog->aux; + u32 subprogno = insn[1].imm; + + if (!aux->func_info) { + verbose(env, "missing btf func_info\n"); + return -EINVAL; + } + if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) { + verbose(env, "callback function not static\n"); + return -EINVAL; + } + + dst_reg->type = PTR_TO_FUNC; + dst_reg->subprogno = subprogno; + return 0; + } + map = env->used_maps[aux->map_index]; mark_reg_known_zero(env, regs, insn->dst_reg); dst_reg->map_ptr = map; @@@ -8491,17 -8698,7 +8707,7 @@@ static int check_return_code(struct bpf }
if (!tnum_in(range, reg->var_off)) { - char tn_buf[48]; - - verbose(env, "At program exit the register R0 "); - if (!tnum_is_unknown(reg->var_off)) { - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "has value %s", tn_buf); - } else { - verbose(env, "has unknown scalar value"); - } - tnum_strn(tn_buf, sizeof(tn_buf), range); - verbose(env, " should have been in %s\n", tn_buf); + verbose_invalid_scalar(env, reg, &range, "program exit", "R0"); return -EINVAL; }
@@@ -8628,6 -8825,27 +8834,27 @@@ static int push_insn(int t, int w, int return DONE_EXPLORING; }
+ static int visit_func_call_insn(int t, int insn_cnt, + struct bpf_insn *insns, + struct bpf_verifier_env *env, + bool visit_callee) + { + int ret; + + ret = push_insn(t, t + 1, FALLTHROUGH, env, false); + if (ret) + return ret; + + if (t + 1 < insn_cnt) + init_explored_state(env, t + 1); + if (visit_callee) { + init_explored_state(env, t); + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, + env, false); + } + return ret; + } + /* Visits the instruction at index t and returns one of the following: * < 0 - an error occurred * DONE_EXPLORING - the instruction was fully explored @@@ -8638,6 -8856,9 +8865,9 @@@ static int visit_insn(int t, int insn_c struct bpf_insn *insns = env->prog->insnsi; int ret;
+ if (bpf_pseudo_func(insns + t)) + return visit_func_call_insn(t, insn_cnt, insns, env, true); + /* All non-branch instructions have a single fall-through edge. */ if (BPF_CLASS(insns[t].code) != BPF_JMP && BPF_CLASS(insns[t].code) != BPF_JMP32) @@@ -8648,18 -8869,8 +8878,8 @@@ return DONE_EXPLORING;
case BPF_CALL: - ret = push_insn(t, t + 1, FALLTHROUGH, env, false); - if (ret) - return ret; - - if (t + 1 < insn_cnt) - init_explored_state(env, t + 1); - if (insns[t].src_reg == BPF_PSEUDO_CALL) { - init_explored_state(env, t); - ret = push_insn(t, t + insns[t].imm + 1, BRANCH, - env, false); - } - return ret; + return visit_func_call_insn(t, insn_cnt, insns, env, + insns[t].src_reg == BPF_PSEUDO_CALL);
case BPF_JA: if (BPF_SRC(insns[t].code) != BPF_K) @@@ -9065,10 -9276,6 +9285,10 @@@ static int check_btf_info(struct bpf_ve btf = btf_get_by_fd(attr->prog_btf_fd); if (IS_ERR(btf)) return PTR_ERR(btf); + if (btf_is_kernel(btf)) { + btf_put(btf); + return -EACCES; + } env->prog->aux->btf = btf;
err = check_btf_func(env, attr, uattr); @@@ -9272,6 -9479,7 +9492,7 @@@ static bool regsafe(struct bpf_reg_stat */ return false; } + case PTR_TO_MAP_KEY: case PTR_TO_MAP_VALUE: /* If the new min/max/var_off satisfy the old ones and * everything else matches, we are OK. @@@ -10118,10 -10326,9 +10339,9 @@@ static int do_check(struct bpf_verifier if (insn->src_reg == BPF_PSEUDO_CALL) err = check_func_call(env, insn, &env->insn_idx); else - err = check_helper_call(env, insn->imm, env->insn_idx); + err = check_helper_call(env, insn, &env->insn_idx); if (err) return err; - } else if (opcode == BPF_JA) { if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || @@@ -10550,6 -10757,12 +10770,12 @@@ static int resolve_pseudo_ldimm64(struc goto next_insn; }
+ if (insn[0].src_reg == BPF_PSEUDO_FUNC) { + aux = &env->insn_aux_data[i]; + aux->ptr_type = PTR_TO_FUNC; + goto next_insn; + } + /* In final convert_pseudo_ld_imm64() step, this is * converted into regular 64-bit imm load insn. */ @@@ -10682,9 -10895,13 +10908,13 @@@ static void convert_pseudo_ld_imm64(str int insn_cnt = env->prog->len; int i;
- for (i = 0; i < insn_cnt; i++, insn++) - if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) - insn->src_reg = 0; + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) + continue; + if (insn->src_reg == BPF_PSEUDO_FUNC) + continue; + insn->src_reg = 0; + } }
/* single env->prog->insni[off] instruction was replaced with the range @@@ -11323,6 -11540,12 +11553,12 @@@ static int jit_subprogs(struct bpf_veri return 0;
for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + env->insn_aux_data[i].call_imm = insn->imm; + /* subprog is encoded in insn[1].imm */ + continue; + } + if (!bpf_pseudo_call(insn)) continue; /* Upon error here we cannot fall back to interpreter but @@@ -11452,6 -11675,12 +11688,12 @@@ for (i = 0; i < env->subprog_cnt; i++) { insn = func[i]->insnsi; for (j = 0; j < func[i]->len; j++, insn++) { + if (bpf_pseudo_func(insn)) { + subprog = insn[1].imm; + insn[0].imm = (u32)(long)func[subprog]->bpf_func; + insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32; + continue; + } if (!bpf_pseudo_call(insn)) continue; subprog = insn->off; @@@ -11497,6 -11726,11 +11739,11 @@@ * later look the same as if they were interpreted only. */ for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + insn[0].imm = env->insn_aux_data[i].call_imm; + insn[1].imm = find_subprog(env, i + insn[0].imm + 1); + continue; + } if (!bpf_pseudo_call(insn)) continue; insn->off = env->insn_aux_data[i].call_imm; @@@ -11561,6 -11795,14 +11808,14 @@@ static int fixup_call_args(struct bpf_v return -EINVAL; } for (i = 0; i < prog->len; i++, insn++) { + if (bpf_pseudo_func(insn)) { + /* When JIT fails the progs with callback calls + * have to be rejected, since interpreter doesn't support them yet. + */ + verbose(env, "callbacks are not allowed in non-JITed programs\n"); + return -EINVAL; + } + if (!bpf_pseudo_call(insn)) continue; depth = get_callee_stack_depth(env, insn, i); @@@ -11573,12 -11815,10 +11828,10 @@@ return err; }
- /* fixup insn->imm field of bpf_call instructions - * and inline eligible helpers as explicit sequence of BPF instructions - * - * this function is called after eBPF program passed verification + /* Do various post-verification rewrites in a single program pass. + * These rewrites simplify JIT and interpreter implementations. */ - static int fixup_bpf_calls(struct bpf_verifier_env *env) + static int do_misc_fixups(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog; bool expect_blinding = bpf_jit_blinding_enabled(prog); @@@ -11593,6 -11833,7 +11846,7 @@@ int i, ret, cnt, delta = 0;
for (i = 0; i < insn_cnt; i++, insn++) { + /* Make divide-by-zero exceptions impossible. */ if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) || insn->code == (BPF_ALU | BPF_MOD | BPF_X) || @@@ -11633,6 -11874,7 +11887,7 @@@ continue; }
+ /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */ if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || BPF_MODE(insn->code) == BPF_IND)) { @@@ -11652,6 -11894,7 +11907,7 @@@ continue; }
+ /* Rewrite pointer arithmetic to mitigate speculation attacks. */ if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; @@@ -11673,7 -11916,7 +11929,7 @@@ off_reg = issrc ? insn->src_reg : insn->dst_reg; if (isneg) *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); - *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); + *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); @@@ -11800,7 -12043,8 +12056,8 @@@ insn->imm == BPF_FUNC_map_delete_elem || insn->imm == BPF_FUNC_map_push_elem || insn->imm == BPF_FUNC_map_pop_elem || - insn->imm == BPF_FUNC_map_peek_elem)) { + insn->imm == BPF_FUNC_map_peek_elem || + insn->imm == BPF_FUNC_redirect_map)) { aux = &env->insn_aux_data[i + delta]; if (bpf_map_ptr_poisoned(aux)) goto patch_call_imm; @@@ -11842,6 -12086,9 +12099,9 @@@ (int (*)(struct bpf_map *map, void *value))NULL)); BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map *map, void *value))NULL)); + BUILD_BUG_ON(!__same_type(ops->map_redirect, + (int (*)(struct bpf_map *map, u32 ifindex, u64 flags))NULL)); + patch_map_ops_generic: switch (insn->imm) { case BPF_FUNC_map_lookup_elem: @@@ -11868,11 -12115,16 +12128,16 @@@ insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - __bpf_call_base; continue; + case BPF_FUNC_redirect_map: + insn->imm = BPF_CAST_CALL(ops->map_redirect) - + __bpf_call_base; + continue; }
goto patch_call_imm; }
+ /* Implement bpf_jiffies64 inline. */ if (prog->jit_requested && BITS_PER_LONG == 64 && insn->imm == BPF_FUNC_jiffies64) { struct bpf_insn ld_jiffies_addr[2] = { @@@ -12683,7 -12935,7 +12948,7 @@@ skip_full_check ret = convert_ctx_accesses(env);
if (ret == 0) - ret = fixup_bpf_calls(env); + ret = do_misc_fixups(env);
/* do 32-bit optimization after insn patching has done so those patched * insns could be handled correctly. diff --combined kernel/fork.c index 54cc905e5fe0,b94391a58708..50209691f21a --- a/kernel/fork.c +++ b/kernel/fork.c @@@ -96,6 -96,7 +96,7 @@@ #include <linux/kasan.h> #include <linux/scs.h> #include <linux/io_uring.h> + #include <linux/bpf.h>
#include <asm/pgalloc.h> #include <linux/uaccess.h> @@@ -734,6 -735,7 +735,7 @@@ void __put_task_struct(struct task_stru cgroup_free(tsk); task_numa_free(tsk, true); security_task_free(tsk); + bpf_task_storage_free(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); put_signal_struct(tsk->signal); @@@ -994,13 -996,6 +996,13 @@@ static void mm_init_owner(struct mm_str #endif }
+static void mm_init_pasid(struct mm_struct *mm) +{ +#ifdef CONFIG_IOMMU_SUPPORT + mm->pasid = INIT_PASID; +#endif +} + static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@@ -1031,7 -1026,6 +1033,7 @@@ static struct mm_struct *mm_init(struc mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); + mm_init_pasid(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); @@@ -2072,6 -2066,9 +2074,9 @@@ static __latent_entropy struct task_str p->sequential_io = 0; p->sequential_io_avg = 0; #endif + #ifdef CONFIG_BPF_SYSCALL + RCU_INIT_POINTER(p->bpf_storage, NULL); + #endif
/* Perform scheduler related setup. Assign this task to a CPU. */ retval = sched_fork(clone_flags, p); diff --combined net/core/dev.c index 0f72ff5d34ba,c9a496f5e687..40699957e882 --- a/net/core/dev.c +++ b/net/core/dev.c @@@ -1184,18 -1184,6 +1184,18 @@@ static int __dev_alloc_name(struct net return -ENOMEM;
for_each_netdev(net, d) { + struct netdev_name_node *name_node; + list_for_each_entry(name_node, &d->name_node->list, list) { + if (!sscanf(name_node->name, name, &i)) + continue; + if (i < 0 || i >= max_netdevices) + continue; + + /* avoid cases where sscanf is not exact inverse of printf */ + snprintf(buf, IFNAMSIZ, name, i); + if (!strncmp(buf, name_node->name, IFNAMSIZ)) + set_bit(i, inuse); + } if (!sscanf(d->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) @@@ -2463,16 -2451,14 +2463,14 @@@ int netdev_txq_to_tc(struct net_device EXPORT_SYMBOL(netdev_txq_to_tc);
#ifdef CONFIG_XPS - struct static_key xps_needed __read_mostly; - EXPORT_SYMBOL(xps_needed); - struct static_key xps_rxqs_needed __read_mostly; - EXPORT_SYMBOL(xps_rxqs_needed); + static struct static_key xps_needed __read_mostly; + static struct static_key xps_rxqs_needed __read_mostly; static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex))
static bool remove_xps_queue(struct xps_dev_maps *dev_maps, - int tci, u16 index) + struct xps_dev_maps *old_maps, int tci, u16 index) { struct xps_map *map = NULL; int pos; @@@ -2491,6 -2477,8 +2489,8 @@@ break; }
+ if (old_maps) + RCU_INIT_POINTER(old_maps->attr_map[tci], NULL); RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL); kfree_rcu(map, rcu); return false; @@@ -2503,7 -2491,7 +2503,7 @@@ static bool remove_xps_queue_cpu(struc struct xps_dev_maps *dev_maps, int cpu, u16 offset, u16 count) { - int num_tc = dev->num_tc ? : 1; + int num_tc = dev_maps->num_tc; bool active = false; int tci;
@@@ -2511,7 -2499,7 +2511,7 @@@ int i, j;
for (i = count, j = offset; i--; j++) { - if (!remove_xps_queue(dev_maps, tci, j)) + if (!remove_xps_queue(dev_maps, NULL, tci, j)) break; }
@@@ -2523,74 -2511,54 +2523,54 @@@
static void reset_xps_maps(struct net_device *dev, struct xps_dev_maps *dev_maps, - bool is_rxqs_map) + enum xps_map_type type) { - if (is_rxqs_map) { - static_key_slow_dec_cpuslocked(&xps_rxqs_needed); - RCU_INIT_POINTER(dev->xps_rxqs_map, NULL); - } else { - RCU_INIT_POINTER(dev->xps_cpus_map, NULL); - } static_key_slow_dec_cpuslocked(&xps_needed); + if (type == XPS_RXQS) + static_key_slow_dec_cpuslocked(&xps_rxqs_needed); + + RCU_INIT_POINTER(dev->xps_maps[type], NULL); + kfree_rcu(dev_maps, rcu); }
- static void clean_xps_maps(struct net_device *dev, const unsigned long *mask, - struct xps_dev_maps *dev_maps, unsigned int nr_ids, - u16 offset, u16 count, bool is_rxqs_map) + static void clean_xps_maps(struct net_device *dev, enum xps_map_type type, + u16 offset, u16 count) { + struct xps_dev_maps *dev_maps; bool active = false; int i, j;
- for (j = -1; j = netif_attrmask_next(j, mask, nr_ids), - j < nr_ids;) - active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, - count); + dev_maps = xmap_dereference(dev->xps_maps[type]); + if (!dev_maps) + return; + + for (j = 0; j < dev_maps->nr_ids; j++) + active |= remove_xps_queue_cpu(dev, dev_maps, j, offset, count); if (!active) - reset_xps_maps(dev, dev_maps, is_rxqs_map); + reset_xps_maps(dev, dev_maps, type);
- if (!is_rxqs_map) { - for (i = offset + (count - 1); count--; i--) { + if (type == XPS_CPUS) { + for (i = offset + (count - 1); count--; i--) netdev_queue_numa_node_write( - netdev_get_tx_queue(dev, i), - NUMA_NO_NODE); - } + netdev_get_tx_queue(dev, i), NUMA_NO_NODE); } }
static void netif_reset_xps_queues(struct net_device *dev, u16 offset, u16 count) { - const unsigned long *possible_mask = NULL; - struct xps_dev_maps *dev_maps; - unsigned int nr_ids; - if (!static_key_false(&xps_needed)) return;
cpus_read_lock(); mutex_lock(&xps_map_mutex);
- if (static_key_false(&xps_rxqs_needed)) { - dev_maps = xmap_dereference(dev->xps_rxqs_map); - if (dev_maps) { - nr_ids = dev->num_rx_queues; - clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, - offset, count, true); - } - } + if (static_key_false(&xps_rxqs_needed)) + clean_xps_maps(dev, XPS_RXQS, offset, count);
- dev_maps = xmap_dereference(dev->xps_cpus_map); - if (!dev_maps) - goto out_no_maps; - - if (num_possible_cpus() > 1) - possible_mask = cpumask_bits(cpu_possible_mask); - nr_ids = nr_cpu_ids; - clean_xps_maps(dev, possible_mask, dev_maps, nr_ids, offset, count, - false); + clean_xps_maps(dev, XPS_CPUS, offset, count);
- out_no_maps: mutex_unlock(&xps_map_mutex); cpus_read_unlock(); } @@@ -2640,16 -2608,35 +2620,35 @@@ static struct xps_map *expand_xps_map(s return new_map; }
+ /* Copy xps maps at a given index */ + static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps, + struct xps_dev_maps *new_dev_maps, int index, + int tc, bool skip_tc) + { + int i, tci = index * dev_maps->num_tc; + struct xps_map *map; + + /* copy maps belonging to foreign traffic classes */ + for (i = 0; i < dev_maps->num_tc; i++, tci++) { + if (i == tc && skip_tc) + continue; + + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->attr_map[tci]); + RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); + } + } + /* Must be called under cpus_read_lock */ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask, - u16 index, bool is_rxqs_map) + u16 index, enum xps_map_type type) { - const unsigned long *online_mask = NULL, *possible_mask = NULL; - struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + struct xps_dev_maps *dev_maps, *new_dev_maps = NULL, *old_dev_maps = NULL; + const unsigned long *online_mask = NULL; + bool active = false, copy = false; int i, j, tci, numa_node_id = -2; int maps_sz, num_tc = 1, tc = 0; struct xps_map *map, *new_map; - bool active = false; unsigned int nr_ids;
if (dev->num_tc) { @@@ -2667,38 -2654,48 +2666,48 @@@ }
mutex_lock(&xps_map_mutex); - if (is_rxqs_map) { + + dev_maps = xmap_dereference(dev->xps_maps[type]); + if (type == XPS_RXQS) { maps_sz = XPS_RXQ_DEV_MAPS_SIZE(num_tc, dev->num_rx_queues); - dev_maps = xmap_dereference(dev->xps_rxqs_map); nr_ids = dev->num_rx_queues; } else { maps_sz = XPS_CPU_DEV_MAPS_SIZE(num_tc); - if (num_possible_cpus() > 1) { + if (num_possible_cpus() > 1) online_mask = cpumask_bits(cpu_online_mask); - possible_mask = cpumask_bits(cpu_possible_mask); - } - dev_maps = xmap_dereference(dev->xps_cpus_map); nr_ids = nr_cpu_ids; }
if (maps_sz < L1_CACHE_BYTES) maps_sz = L1_CACHE_BYTES;
+ /* The old dev_maps could be larger or smaller than the one we're + * setting up now, as dev->num_tc or nr_ids could have been updated in + * between. We could try to be smart, but let's be safe instead and only + * copy foreign traffic classes if the two map sizes match. + */ + if (dev_maps && + dev_maps->num_tc == num_tc && dev_maps->nr_ids == nr_ids) + copy = true; + /* allocate memory for queue storage */ for (j = -1; j = netif_attrmask_next_and(j, online_mask, mask, nr_ids), j < nr_ids;) { - if (!new_dev_maps) - new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); if (!new_dev_maps) { - mutex_unlock(&xps_map_mutex); - return -ENOMEM; + new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); + if (!new_dev_maps) { + mutex_unlock(&xps_map_mutex); + return -ENOMEM; + } + + new_dev_maps->nr_ids = nr_ids; + new_dev_maps->num_tc = num_tc; }
tci = j * num_tc + tc; - map = dev_maps ? xmap_dereference(dev_maps->attr_map[tci]) : - NULL; + map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL;
- map = expand_xps_map(map, j, index, is_rxqs_map); + map = expand_xps_map(map, j, index, type == XPS_RXQS); if (!map) goto error;
@@@ -2711,29 -2708,21 +2720,21 @@@ if (!dev_maps) { /* Increment static keys at most once per type */ static_key_slow_inc_cpuslocked(&xps_needed); - if (is_rxqs_map) + if (type == XPS_RXQS) static_key_slow_inc_cpuslocked(&xps_rxqs_needed); }
- for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - /* copy maps belonging to foreign traffic classes */ - for (i = tc, tci = j * num_tc; dev_maps && i--; tci++) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); - } + for (j = 0; j < nr_ids; j++) { + bool skip_tc = false;
- /* We need to explicitly update tci as prevous loop - * could break out early if dev_maps is NULL. - */ tci = j * num_tc + tc; - if (netif_attr_test_mask(j, mask, nr_ids) && netif_attr_test_online(j, online_mask, nr_ids)) { /* add tx-queue to CPU/rx-queue maps */ int pos = 0;
+ skip_tc = true; + map = xmap_dereference(new_dev_maps->attr_map[tci]); while ((pos < map->len) && (map->queues[pos] != index)) pos++; @@@ -2741,78 -2730,81 +2742,81 @@@ if (pos == map->len) map->queues[map->len++] = index; #ifdef CONFIG_NUMA - if (!is_rxqs_map) { + if (type == XPS_CPUS) { if (numa_node_id == -2) numa_node_id = cpu_to_node(j); else if (numa_node_id != cpu_to_node(j)) numa_node_id = -1; } #endif - } else if (dev_maps) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); }
- /* copy maps belonging to foreign traffic classes */ - for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { - /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->attr_map[tci]); - RCU_INIT_POINTER(new_dev_maps->attr_map[tci], map); - } + if (copy) + xps_copy_dev_maps(dev_maps, new_dev_maps, j, tc, + skip_tc); }
- if (is_rxqs_map) - rcu_assign_pointer(dev->xps_rxqs_map, new_dev_maps); - else - rcu_assign_pointer(dev->xps_cpus_map, new_dev_maps); + rcu_assign_pointer(dev->xps_maps[type], new_dev_maps);
/* Cleanup old maps */ if (!dev_maps) goto out_no_old_maps;
- for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - for (i = num_tc, tci = j * num_tc; i--; tci++) { - new_map = xmap_dereference(new_dev_maps->attr_map[tci]); + for (j = 0; j < dev_maps->nr_ids; j++) { + for (i = num_tc, tci = j * dev_maps->num_tc; i--; tci++) { map = xmap_dereference(dev_maps->attr_map[tci]); - if (map && map != new_map) - kfree_rcu(map, rcu); + if (!map) + continue; + + if (copy) { + new_map = xmap_dereference(new_dev_maps->attr_map[tci]); + if (map == new_map) + continue; + } + + RCU_INIT_POINTER(dev_maps->attr_map[tci], NULL); + kfree_rcu(map, rcu); } }
- kfree_rcu(dev_maps, rcu); + old_dev_maps = dev_maps;
out_no_old_maps: dev_maps = new_dev_maps; active = true;
out_no_new_maps: - if (!is_rxqs_map) { + if (type == XPS_CPUS) /* update Tx queue numa node */ netdev_queue_numa_node_write(netdev_get_tx_queue(dev, index), (numa_node_id >= 0) ? numa_node_id : NUMA_NO_NODE); - }
if (!dev_maps) goto out_no_maps;
/* removes tx-queue from unused CPUs/rx-queues */ - for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { - for (i = tc, tci = j * num_tc; i--; tci++) - active |= remove_xps_queue(dev_maps, tci, index); - if (!netif_attr_test_mask(j, mask, nr_ids) || - !netif_attr_test_online(j, online_mask, nr_ids)) - active |= remove_xps_queue(dev_maps, tci, index); - for (i = num_tc - tc, tci++; --i; tci++) - active |= remove_xps_queue(dev_maps, tci, index); + for (j = 0; j < dev_maps->nr_ids; j++) { + tci = j * dev_maps->num_tc; + + for (i = 0; i < dev_maps->num_tc; i++, tci++) { + if (i == tc && + netif_attr_test_mask(j, mask, dev_maps->nr_ids) && + netif_attr_test_online(j, online_mask, dev_maps->nr_ids)) + continue; + + active |= remove_xps_queue(dev_maps, + copy ? old_dev_maps : NULL, + tci, index); + } }
+ if (old_dev_maps) + kfree_rcu(old_dev_maps, rcu); + /* free map if not active */ if (!active) - reset_xps_maps(dev, dev_maps, is_rxqs_map); + reset_xps_maps(dev, dev_maps, type);
out_no_maps: mutex_unlock(&xps_map_mutex); @@@ -2820,11 -2812,10 +2824,10 @@@ return 0; error: /* remove any maps that we added */ - for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids), - j < nr_ids;) { + for (j = 0; j < nr_ids; j++) { for (i = num_tc, tci = j * num_tc; i--; tci++) { new_map = xmap_dereference(new_dev_maps->attr_map[tci]); - map = dev_maps ? + map = copy ? xmap_dereference(dev_maps->attr_map[tci]) : NULL; if (new_map && new_map != map) @@@ -2845,7 -2836,7 +2848,7 @@@ int netif_set_xps_queue(struct net_devi int ret;
cpus_read_lock(); - ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, false); + ret = __netif_set_xps_queue(dev, cpumask_bits(mask), index, XPS_CPUS); cpus_read_unlock();
return ret; @@@ -3956,13 -3947,15 +3959,15 @@@ sch_handle_egress(struct sk_buff *skb, static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb, struct xps_dev_maps *dev_maps, unsigned int tci) { + int tc = netdev_get_prio_tc_map(dev, skb->priority); struct xps_map *map; int queue_index = -1;
- if (dev->num_tc) { - tci *= dev->num_tc; - tci += netdev_get_prio_tc_map(dev, skb->priority); - } + if (tc >= dev_maps->num_tc || tci >= dev_maps->nr_ids) + return queue_index; + + tci *= dev_maps->num_tc; + tci += tc;
map = rcu_dereference(dev_maps->attr_map[tci]); if (map) { @@@ -3993,18 -3986,18 +3998,18 @@@ static int get_xps_queue(struct net_dev if (!static_key_false(&xps_rxqs_needed)) goto get_cpus_map;
- dev_maps = rcu_dereference(sb_dev->xps_rxqs_map); + dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_RXQS]); if (dev_maps) { int tci = sk_rx_queue_get(sk);
- if (tci >= 0 && tci < dev->num_rx_queues) + if (tci >= 0) queue_index = __get_xps_queue_idx(dev, skb, dev_maps, tci); }
get_cpus_map: if (queue_index < 0) { - dev_maps = rcu_dereference(sb_dev->xps_cpus_map); + dev_maps = rcu_dereference(sb_dev->xps_maps[XPS_CPUS]); if (dev_maps) { unsigned int tci = skb->sender_cpu - 1;
@@@ -4306,13 -4299,6 +4311,13 @@@ static inline void ____napi_schedule(st */ thread = READ_ONCE(napi->thread); if (thread) { + /* Avoid doing set_bit() if the thread is in + * INTERRUPTIBLE state, cause napi_thread_wait() + * makes sure to proceed with napi polling + * if the thread is explicitly woken from here. + */ + if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE) + set_bit(NAPI_STATE_SCHED_THREADED, &napi->state); wake_up_process(thread); return; } @@@ -5284,6 -5270,7 +5289,7 @@@ skip_classify goto another_round; case RX_HANDLER_EXACT: deliver_exact = true; + break; case RX_HANDLER_PASS: break; default: @@@ -5876,15 -5863,13 +5882,13 @@@ void napi_gro_flush(struct napi_struct } EXPORT_SYMBOL(napi_gro_flush);
- static struct list_head *gro_list_prepare(struct napi_struct *napi, - struct sk_buff *skb) + static void gro_list_prepare(const struct list_head *head, + const struct sk_buff *skb) { unsigned int maclen = skb->dev->hard_header_len; u32 hash = skb_get_hash_raw(skb); - struct list_head *head; struct sk_buff *p;
- head = &napi->gro_hash[hash & (GRO_HASH_BUCKETS - 1)].list; list_for_each_entry(p, head, list) { unsigned long diffs;
@@@ -5910,8 -5895,6 +5914,6 @@@ maclen); NAPI_GRO_CB(p)->same_flow = !diffs; } - - return head; }
static void skb_gro_reset_offset(struct sk_buff *skb) @@@ -5974,11 -5957,11 +5976,11 @@@ static void gro_flush_oldest(struct nap
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { - u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); + u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); + struct gro_list *gro_list = &napi->gro_hash[bucket]; struct list_head *head = &offload_base; struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *gro_head; struct sk_buff *pp = NULL; enum gro_result ret; int same_flow; @@@ -5987,7 -5970,7 +5989,7 @@@ if (netif_elide_gro(skb->dev)) goto normal;
- gro_head = gro_list_prepare(napi, skb); + gro_list_prepare(&gro_list->list, skb);
rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { @@@ -6023,7 -6006,7 +6025,7 @@@
pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive, ipv6_gro_receive, inet_gro_receive, - gro_head, skb); + &gro_list->list, skb); break; } rcu_read_unlock(); @@@ -6042,7 -6025,7 +6044,7 @@@ if (pp) { skb_list_del_init(pp); napi_gro_complete(napi, pp); - napi->gro_hash[hash].count--; + gro_list->count--; }
if (same_flow) @@@ -6051,16 -6034,16 +6053,16 @@@ if (NAPI_GRO_CB(skb)->flush) goto normal;
- if (unlikely(napi->gro_hash[hash].count >= MAX_GRO_SKBS)) { - gro_flush_oldest(napi, gro_head); - } else { - napi->gro_hash[hash].count++; - } + if (unlikely(gro_list->count >= MAX_GRO_SKBS)) + gro_flush_oldest(napi, &gro_list->list); + else + gro_list->count++; + NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->last = skb; skb_shinfo(skb)->gso_size = skb_gro_len(skb); - list_add(&skb->list, gro_head); + list_add(&skb->list, &gro_list->list); ret = GRO_HELD;
pull: @@@ -6068,11 -6051,11 +6070,11 @@@ if (grow > 0) gro_pull_from_frag0(skb, grow); ok: - if (napi->gro_hash[hash].count) { - if (!test_bit(hash, &napi->gro_bitmask)) - __set_bit(hash, &napi->gro_bitmask); - } else if (test_bit(hash, &napi->gro_bitmask)) { - __clear_bit(hash, &napi->gro_bitmask); + if (gro_list->count) { + if (!test_bit(bucket, &napi->gro_bitmask)) + __set_bit(bucket, &napi->gro_bitmask); + } else if (test_bit(bucket, &napi->gro_bitmask)) { + __clear_bit(bucket, &napi->gro_bitmask); }
return ret; @@@ -6505,7 -6488,6 +6507,7 @@@ bool napi_complete_done(struct napi_str WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | + NAPIF_STATE_SCHED_THREADED | NAPIF_STATE_PREFER_BUSY_POLL);
/* If STATE_MISSED was set, leave STATE_SCHED set, @@@ -6789,6 -6771,7 +6791,7 @@@ int dev_set_threaded(struct net_device
return err; } + EXPORT_SYMBOL(dev_set_threaded);
void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) @@@ -6988,25 -6971,16 +6991,25 @@@ static int napi_poll(struct napi_struc
static int napi_thread_wait(struct napi_struct *napi) { + bool woken = false; + set_current_state(TASK_INTERRUPTIBLE);
while (!kthread_should_stop() && !napi_disable_pending(napi)) { - if (test_bit(NAPI_STATE_SCHED, &napi->state)) { + /* Testing SCHED_THREADED bit here to make sure the current + * kthread owns this napi and could poll on this napi. + * Testing SCHED bit is not enough because SCHED bit might be + * set by some other busy poll thread or by napi_disable(). + */ + if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) { WARN_ON(!list_empty(&napi->poll_list)); __set_current_state(TASK_RUNNING); return 0; }
schedule(); + /* woken being true indicates this thread owns this napi. */ + woken = true; set_current_state(TASK_INTERRUPTIBLE); } __set_current_state(TASK_RUNNING); @@@ -10336,11 -10310,15 +10339,15 @@@ EXPORT_SYMBOL(register_netdev)
int netdev_refcnt_read(const struct net_device *dev) { + #ifdef CONFIG_PCPU_DEV_REFCNT int i, refcnt = 0;
for_each_possible_cpu(i) refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i); return refcnt; + #else + return refcount_read(&dev->dev_refcnt); + #endif } EXPORT_SYMBOL(netdev_refcnt_read);
@@@ -10368,7 -10346,7 +10375,7 @@@ static void netdev_wait_allrefs(struct rebroadcast_time = warning_time = jiffies; refcnt = netdev_refcnt_read(dev);
- while (refcnt != 0) { + while (refcnt != 1) { if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { rtnl_lock();
@@@ -10405,7 -10383,7 +10412,7 @@@
refcnt = netdev_refcnt_read(dev);
- if (refcnt && time_after(jiffies, warning_time + 10 * HZ)) { + if (refcnt != 1 && time_after(jiffies, warning_time + 10 * HZ)) { pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", dev->name, refcnt); warning_time = jiffies; @@@ -10481,7 -10459,7 +10488,7 @@@ void netdev_run_todo(void netdev_wait_allrefs(dev);
/* paranoia */ - BUG_ON(netdev_refcnt_read(dev)); + BUG_ON(netdev_refcnt_read(dev) != 1); BUG_ON(!list_empty(&dev->ptype_all)); BUG_ON(!list_empty(&dev->ptype_specific)); WARN_ON(rcu_access_pointer(dev->ip_ptr)); @@@ -10698,9 -10676,14 +10705,14 @@@ struct net_device *alloc_netdev_mqs(in dev = PTR_ALIGN(p, NETDEV_ALIGN); dev->padded = (char *)dev - (char *)p;
+ #ifdef CONFIG_PCPU_DEV_REFCNT dev->pcpu_refcnt = alloc_percpu(int); if (!dev->pcpu_refcnt) goto free_dev; + dev_hold(dev); + #else + refcount_set(&dev->dev_refcnt, 1); + #endif
if (dev_addr_init(dev)) goto free_pcpu; @@@ -10764,8 -10747,10 +10776,10 @@@ free_all return NULL;
free_pcpu: + #ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); free_dev: + #endif netdev_freemem(dev); return NULL; } @@@ -10807,8 -10792,10 +10821,10 @@@ void free_netdev(struct net_device *dev list_for_each_entry_safe(p, n, &dev->napi_list, dev_list) netif_napi_del(p);
+ #ifdef CONFIG_PCPU_DEV_REFCNT free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; + #endif free_percpu(dev->xdp_bulkq); dev->xdp_bulkq = NULL;
@@@ -11375,7 -11362,7 +11391,7 @@@ static void __net_exit default_device_e continue;
/* Leave virtual devices for the generic cleanup */ - if (dev->rtnl_link_ops) + if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund) continue;
/* Push remaining network devices to init_net */ diff --combined net/core/drop_monitor.c index db65ce62b625,1eb02c2236f2..ead2a8aa57b4 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@@ -1053,20 -1053,6 +1053,20 @@@ static int net_dm_hw_monitor_start(stru return 0;
err_module_put: + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); + struct sk_buff *skb; + + del_timer_sync(&hw_data->send_timer); + cancel_work_sync(&hw_data->dm_alert_work); + while ((skb = __skb_dequeue(&hw_data->drop_queue))) { + struct devlink_trap_metadata *hw_metadata; + + hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; + net_dm_hw_metadata_free(hw_metadata); + consume_skb(skb); + } + } module_put(THIS_MODULE); return rc; } @@@ -1148,15 -1134,6 +1148,15 @@@ static int net_dm_trace_on_set(struct n err_unregister_trace: unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); err_module_put: + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); + struct sk_buff *skb; + + del_timer_sync(&data->send_timer); + cancel_work_sync(&data->dm_alert_work); + while ((skb = __skb_dequeue(&data->drop_queue))) + consume_skb(skb); + } module_put(THIS_MODULE); return rc; } @@@ -1754,7 -1731,7 +1754,7 @@@ static void exit_net_drop_monitor(void
/* * Because of the module_get/put we do in the trace state change path - * we are guarnateed not to have any current users when we get here + * we are guaranteed not to have any current users when we get here */
for_each_possible_cpu(cpu) { diff --combined net/core/filter.c index 9323d34d34cc,b6732000d8a2..f5eeebf6a16f --- a/net/core/filter.c +++ b/net/core/filter.c @@@ -1863,10 -1863,7 +1863,7 @@@ static const struct bpf_func_proto bpf_ static inline int sk_skb_try_make_writable(struct sk_buff *skb, unsigned int write_len) { - int err = __bpf_try_make_writable(skb, write_len); - - bpf_compute_data_end_sk_skb(skb); - return err; + return __bpf_try_make_writable(skb, write_len); }
BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len) @@@ -3412,6 -3409,7 +3409,7 @@@ static u32 bpf_skb_net_base_len(const s BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \ BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \ + BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \ BPF_F_ADJ_ROOM_ENCAP_L2( \ BPF_ADJ_ROOM_ENCAP_L2_MASK))
@@@ -3448,6 -3446,10 +3446,10 @@@ static int bpf_skb_net_grow(struct sk_b flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) return -EINVAL;
+ if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH && + inner_mac_len < ETH_HLEN) + return -EINVAL; + if (skb->encapsulation) return -EALREADY;
@@@ -3466,7 -3468,11 +3468,11 @@@ skb->inner_mac_header = inner_net - inner_mac_len; skb->inner_network_header = inner_net; skb->inner_transport_header = inner_trans; - skb_set_inner_protocol(skb, skb->protocol); + + if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH) + skb_set_inner_protocol(skb, htons(ETH_P_TEB)); + else + skb_set_inner_protocol(skb, skb->protocol);
skb->encapsulation = 1; skb_set_network_header(skb, mac_len); @@@ -3577,7 -3583,6 +3583,6 @@@ BPF_CALL_4(sk_skb_adjust_room, struct s return -ENOMEM; __skb_pull(skb, len_diff_abs); } - bpf_compute_data_end_sk_skb(skb); if (tls_sw_has_ctx_rx(skb->sk)) { struct strp_msg *rxm = strp_msg(skb);
@@@ -3742,10 -3747,7 +3747,7 @@@ static const struct bpf_func_proto bpf_ BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len, u64, flags) { - int ret = __bpf_skb_change_tail(skb, new_len, flags); - - bpf_compute_data_end_sk_skb(skb); - return ret; + return __bpf_skb_change_tail(skb, new_len, flags); }
static const struct bpf_func_proto sk_skb_change_tail_proto = { @@@ -3808,10 -3810,7 +3810,7 @@@ static const struct bpf_func_proto bpf_ BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room, u64, flags) { - int ret = __bpf_skb_change_head(skb, head_room, flags); - - bpf_compute_data_end_sk_skb(skb); - return ret; + return __bpf_skb_change_head(skb, head_room, flags); }
static const struct bpf_func_proto sk_skb_change_head_proto = { @@@ -3919,23 -3918,6 +3918,6 @@@ static const struct bpf_func_proto bpf_ .arg2_type = ARG_ANYTHING, };
- static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, - struct bpf_map *map, struct xdp_buff *xdp) - { - switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: - return dev_map_enqueue(fwd, xdp, dev_rx); - case BPF_MAP_TYPE_CPUMAP: - return cpu_map_enqueue(fwd, xdp, dev_rx); - case BPF_MAP_TYPE_XSKMAP: - return __xsk_map_redirect(fwd, xdp); - default: - return -EBADRQC; - } - return 0; - } - void xdp_do_flush(void) { __dev_flush(); @@@ -3944,71 -3926,52 +3926,52 @@@ } EXPORT_SYMBOL_GPL(xdp_do_flush);
- static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) - { - switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: - return __dev_map_lookup_elem(map, index); - case BPF_MAP_TYPE_DEVMAP_HASH: - return __dev_map_hash_lookup_elem(map, index); - case BPF_MAP_TYPE_CPUMAP: - return __cpu_map_lookup_elem(map, index); - case BPF_MAP_TYPE_XSKMAP: - return __xsk_map_lookup_elem(map, index); - default: - return NULL; - } - } - - void bpf_clear_redirect_map(struct bpf_map *map) - { - struct bpf_redirect_info *ri; - int cpu; - - for_each_possible_cpu(cpu) { - ri = per_cpu_ptr(&bpf_redirect_info, cpu); - /* Avoid polluting remote cacheline due to writes if - * not needed. Once we pass this test, we need the - * cmpxchg() to make sure it hasn't been changed in - * the meantime by remote CPU. - */ - if (unlikely(READ_ONCE(ri->map) == map)) - cmpxchg(&ri->map, map, NULL); - } - } - int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - struct bpf_map *map = READ_ONCE(ri->map); - u32 index = ri->tgt_index; + enum bpf_map_type map_type = ri->map_type; void *fwd = ri->tgt_value; + u32 map_id = ri->map_id; int err;
- ri->tgt_index = 0; - ri->tgt_value = NULL; - WRITE_ONCE(ri->map, NULL); + ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC;
- if (unlikely(!map)) { - fwd = dev_get_by_index_rcu(dev_net(dev), index); - if (unlikely(!fwd)) { - err = -EINVAL; - goto err; + switch (map_type) { + case BPF_MAP_TYPE_DEVMAP: + fallthrough; + case BPF_MAP_TYPE_DEVMAP_HASH: + err = dev_map_enqueue(fwd, xdp, dev); + break; + case BPF_MAP_TYPE_CPUMAP: + err = cpu_map_enqueue(fwd, xdp, dev); + break; + case BPF_MAP_TYPE_XSKMAP: + err = __xsk_map_redirect(fwd, xdp); + break; + case BPF_MAP_TYPE_UNSPEC: + if (map_id == INT_MAX) { + fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index); + if (unlikely(!fwd)) { + err = -EINVAL; + break; + } + err = dev_xdp_enqueue(fwd, xdp, dev); + break; } - - err = dev_xdp_enqueue(fwd, xdp, dev); - } else { - err = __bpf_tx_xdp_map(dev, fwd, map, xdp); + fallthrough; + default: + err = -EBADRQC; }
if (unlikely(err)) goto err;
- _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); return 0; err: - _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); return err; } EXPORT_SYMBOL_GPL(xdp_do_redirect); @@@ -4017,41 -3980,36 +3980,36 @@@ static int xdp_do_generic_redirect_map( struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, - struct bpf_map *map) + void *fwd, + enum bpf_map_type map_type, u32 map_id) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - u32 index = ri->tgt_index; - void *fwd = ri->tgt_value; - int err = 0; - - ri->tgt_index = 0; - ri->tgt_value = NULL; - WRITE_ONCE(ri->map, NULL); - - if (map->map_type == BPF_MAP_TYPE_DEVMAP || - map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { - struct bpf_dtab_netdev *dst = fwd; + int err;
- err = dev_map_generic_redirect(dst, skb, xdp_prog); + switch (map_type) { + case BPF_MAP_TYPE_DEVMAP: + fallthrough; + case BPF_MAP_TYPE_DEVMAP_HASH: + err = dev_map_generic_redirect(fwd, skb, xdp_prog); if (unlikely(err)) goto err; - } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) { - struct xdp_sock *xs = fwd; - - err = xsk_generic_rcv(xs, xdp); + break; + case BPF_MAP_TYPE_XSKMAP: + err = xsk_generic_rcv(fwd, xdp); if (err) goto err; consume_skb(skb); - } else { + break; + default: /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ err = -EBADRQC; goto err; }
- _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index); return 0; err: - _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err); return err; }
@@@ -4059,31 -4017,34 +4017,34 @@@ int xdp_do_generic_redirect(struct net_ struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - struct bpf_map *map = READ_ONCE(ri->map); - u32 index = ri->tgt_index; - struct net_device *fwd; - int err = 0; - - if (map) - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, - map); - ri->tgt_index = 0; - fwd = dev_get_by_index_rcu(dev_net(dev), index); - if (unlikely(!fwd)) { - err = -EINVAL; - goto err; - } + enum bpf_map_type map_type = ri->map_type; + void *fwd = ri->tgt_value; + u32 map_id = ri->map_id; + int err;
- err = xdp_ok_fwd_dev(fwd, skb->len); - if (unlikely(err)) - goto err; + ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */ + ri->map_type = BPF_MAP_TYPE_UNSPEC;
- skb->dev = fwd; - _trace_xdp_redirect(dev, xdp_prog, index); - generic_xdp_tx(skb, xdp_prog); - return 0; + if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) { + fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index); + if (unlikely(!fwd)) { + err = -EINVAL; + goto err; + } + + err = xdp_ok_fwd_dev(fwd, skb->len); + if (unlikely(err)) + goto err; + + skb->dev = fwd; + _trace_xdp_redirect(dev, xdp_prog, ri->tgt_index); + generic_xdp_tx(skb, xdp_prog); + return 0; + } + + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id); err: - _trace_xdp_redirect_err(dev, xdp_prog, index, err); + _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err); return err; }
@@@ -4094,10 -4055,12 +4055,12 @@@ BPF_CALL_2(bpf_xdp_redirect, u32, ifind if (unlikely(flags)) return XDP_ABORTED;
- ri->flags = flags; + /* NB! Map type UNSPEC and map_id == INT_MAX (never generated + * by map_idr) is used for ifindex based XDP redirect. + */ ri->tgt_index = ifindex; - ri->tgt_value = NULL; - WRITE_ONCE(ri->map, NULL); + ri->map_id = INT_MAX; + ri->map_type = BPF_MAP_TYPE_UNSPEC;
return XDP_REDIRECT; } @@@ -4113,28 -4076,7 +4076,7 @@@ static const struct bpf_func_proto bpf_ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - - /* Lower bits of the flags are used as return code on lookup failure */ - if (unlikely(flags > XDP_TX)) - return XDP_ABORTED; - - ri->tgt_value = __xdp_map_lookup_elem(map, ifindex); - if (unlikely(!ri->tgt_value)) { - /* If the lookup fails we want to clear out the state in the - * redirect_info struct completely, so that if an eBPF program - * performs multiple lookups, the last one always takes - * precedence. - */ - WRITE_ONCE(ri->map, NULL); - return flags; - } - - ri->flags = flags; - ri->tgt_index = ifindex; - WRITE_ONCE(ri->map, map); - - return XDP_REDIRECT; + return map->ops->map_redirect(map, ifindex, flags); }
static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { @@@ -5658,7 -5600,7 +5600,7 @@@ BPF_CALL_5(bpf_skb_check_mtu, struct sk if (unlikely(flags & ~(BPF_MTU_CHK_SEGS))) return -EINVAL;
- if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff)) + if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len))) return -EINVAL;
dev = __dev_via_ifindex(dev, ifindex); @@@ -5668,11 -5610,7 +5610,11 @@@ mtu = READ_ONCE(dev->mtu);
dev_len = mtu + dev->hard_header_len; - skb_len = skb->len + len_diff; /* minus result pass check */ + + /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */ + skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len; + + skb_len += len_diff; /* minus result pass check */ if (skb_len <= dev_len) { ret = BPF_MTU_CHK_RET_SUCCESS; goto out; @@@ -5717,10 -5655,6 +5659,10 @@@ BPF_CALL_5(bpf_xdp_check_mtu, struct xd /* Add L2-header as dev MTU is L3 size */ dev_len = mtu + dev->hard_header_len;
+ /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */ + if (*mtu_len) + xdp_len = *mtu_len + dev->hard_header_len; + xdp_len += len_diff; /* minus result pass check */ if (xdp_len > dev_len) ret = BPF_MTU_CHK_RET_FRAG_NEEDED; @@@ -9663,22 -9597,40 +9605,40 @@@ static u32 sock_ops_convert_ctx_access( return insn - insn_buf; }
+ /* data_end = skb->data + skb_headlen() */ + static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si, + struct bpf_insn *insn) + { + /* si->dst_reg = skb->data */ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, data)); + /* AX = skb->len */ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, len)); + /* si->dst_reg = skb->data + skb->len */ + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); + /* AX = skb->data_len */ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, data_len)); + /* si->dst_reg = skb->data + skb->len - skb->data_len */ + *insn++ = BPF_ALU64_REG(BPF_SUB, si->dst_reg, BPF_REG_AX); + + return insn; + } + static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; - int off;
switch (si->off) { case offsetof(struct __sk_buff, data_end): - off = si->off; - off -= offsetof(struct __sk_buff, data_end); - off += offsetof(struct sk_buff, cb); - off += offsetof(struct tcp_skb_cb, bpf.data_end); - *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, - si->src_reg, off); + insn = bpf_convert_data_end_access(si, insn); break; default: return bpf_convert_ctx_access(type, si, insn_buf, prog, @@@ -10457,6 -10409,7 +10417,7 @@@ static u32 sk_lookup_convert_ctx_access }
const struct bpf_prog_ops sk_lookup_prog_ops = { + .test_run = bpf_prog_test_run_sk_lookup, };
const struct bpf_verifier_ops sk_lookup_verifier_ops = { diff --combined net/core/flow_dissector.c index a96a4f5de0ce,2ed380d096ce..5985029e43d4 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@@ -114,7 -114,7 +114,7 @@@ int flow_dissector_bpf_prog_attach_chec * is the protocol port offset returned from proto_ports_offset */ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, - void *data, int hlen) + const void *data, int hlen) { int poff = proto_ports_offset(ip_proto);
@@@ -161,7 -161,7 +161,7 @@@ static bool icmp_has_id(u8 type */ void skb_flow_get_icmp_tci(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp, - void *data, int thoff, int hlen) + const void *data, int thoff, int hlen) { struct icmphdr *ih, _ih;
@@@ -176,7 -176,7 +176,7 @@@ * avoid confusion with packets without such field */ if (icmp_has_id(ih->type)) - key_icmp->id = ih->un.echo.id ? : 1; + key_icmp->id = ih->un.echo.id ? ntohs(ih->un.echo.id) : 1; else key_icmp->id = 0; } @@@ -187,8 -187,8 +187,8 @@@ EXPORT_SYMBOL(skb_flow_get_icmp_tci) */ static void __skb_flow_dissect_icmp(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, - void *data, int thoff, int hlen) + void *target_container, const void *data, + int thoff, int hlen) { struct flow_dissector_key_icmp *key_icmp;
@@@ -409,8 -409,8 +409,8 @@@ EXPORT_SYMBOL(skb_flow_dissect_hash) static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, void *data, int nhoff, int hlen, - int lse_index, bool *entropy_label) + void *target_container, const void *data, int nhoff, + int hlen, int lse_index, bool *entropy_label) { struct mpls_label *hdr, _hdr; u32 entry, label, bos; @@@ -467,7 -467,8 +467,8 @@@ static enum flow_dissect_ret __skb_flow_dissect_arp(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, void *data, int nhoff, int hlen) + void *target_container, const void *data, + int nhoff, int hlen) { struct flow_dissector_key_arp *key_arp; struct { @@@ -523,7 -524,7 +524,7 @@@ static enum flow_dissect_re __skb_flow_dissect_gre(const struct sk_buff *skb, struct flow_dissector_key_control *key_control, struct flow_dissector *flow_dissector, - void *target_container, void *data, + void *target_container, const void *data, __be16 *p_proto, int *p_nhoff, int *p_hlen, unsigned int flags) { @@@ -663,8 -664,8 +664,8 @@@ static enum flow_dissect_ret __skb_flow_dissect_batadv(const struct sk_buff *skb, struct flow_dissector_key_control *key_control, - void *data, __be16 *p_proto, int *p_nhoff, int hlen, - unsigned int flags) + const void *data, __be16 *p_proto, int *p_nhoff, + int hlen, unsigned int flags) { struct { struct batadv_unicast_packet batadv_unicast; @@@ -695,7 -696,8 +696,8 @@@ static void __skb_flow_dissect_tcp(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, void *data, int thoff, int hlen) + void *target_container, const void *data, + int thoff, int hlen) { struct flow_dissector_key_tcp *key_tcp; struct tcphdr *th, _th; @@@ -719,8 -721,8 +721,8 @@@ static void __skb_flow_dissect_ports(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, void *data, int nhoff, - u8 ip_proto, int hlen) + void *target_container, const void *data, + int nhoff, u8 ip_proto, int hlen) { enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX; struct flow_dissector_key_ports *key_ports; @@@ -744,7 -746,8 +746,8 @@@ static void __skb_flow_dissect_ipv4(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, void *data, const struct iphdr *iph) + void *target_container, const void *data, + const struct iphdr *iph) { struct flow_dissector_key_ip *key_ip;
@@@ -761,7 -764,8 +764,8 @@@ static void __skb_flow_dissect_ipv6(const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, void *data, const struct ipv6hdr *iph) + void *target_container, const void *data, + const struct ipv6hdr *iph) { struct flow_dissector_key_ip *key_ip;
@@@ -908,9 -912,8 +912,8 @@@ bool bpf_flow_dissect(struct bpf_prog * bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, struct flow_dissector *flow_dissector, - void *target_container, - void *data, __be16 proto, int nhoff, int hlen, - unsigned int flags) + void *target_container, const void *data, + __be16 proto, int nhoff, int hlen, unsigned int flags) { struct flow_dissector_key_control *key_control; struct flow_dissector_key_basic *key_basic; @@@ -1642,7 -1645,7 +1645,7 @@@ __u32 skb_get_hash_perturb(const struc } EXPORT_SYMBOL(skb_get_hash_perturb);
- u32 __skb_get_poff(const struct sk_buff *skb, void *data, + u32 __skb_get_poff(const struct sk_buff *skb, const void *data, const struct flow_keys_basic *keys, int hlen) { u32 poff = keys->control.thoff; diff --combined net/ipv4/route.c index bba150fdd265,0470442ff61d..fa68c2612252 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@@ -21,7 -21,7 +21,7 @@@ * Alan Cox : Added BSD route gw semantics * Alan Cox : Super /proc >4K * Alan Cox : MTU in route table - * Alan Cox : MSS actually. Also added the window + * Alan Cox : MSS actually. Also added the window * clamper. * Sam Lantinga : Fixed route matching in rt_del() * Alan Cox : Routing cache support. @@@ -41,7 -41,7 +41,7 @@@ * Olaf Erb : irtt wasn't being copied right. * Bjorn Ekwall : Kerneld route support. * Alan Cox : Multicast fixed (I hope) - * Pavel Krauz : Limited broadcast fixed + * Pavel Krauz : Limited broadcast fixed * Mike McLagan : Routing by source * Alexey Kuznetsov : End of old history. Split to fib.c and * route.c and rewritten from scratch. @@@ -54,8 -54,8 +54,8 @@@ * Robert Olsson : Added rt_cache statistics * Arnaldo C. Melo : Convert proc stuff to seq_file * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. - * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect - * Ilia Sotnikov : Removed TOS from hash calculations + * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect + * Ilia Sotnikov : Removed TOS from hash calculations */
#define pr_fmt(fmt) "IPv4: " fmt @@@ -234,19 -234,6 +234,6 @@@ static const struct seq_operations rt_c .show = rt_cache_seq_show, };
- static int rt_cache_seq_open(struct inode *inode, struct file *file) - { - return seq_open(file, &rt_cache_seq_ops); - } - - static const struct proc_ops rt_cache_proc_ops = { - .proc_open = rt_cache_seq_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = seq_release, - }; - - static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) { int cpu; @@@ -324,19 -311,6 +311,6 @@@ static const struct seq_operations rt_c .show = rt_cpu_seq_show, };
- - static int rt_cpu_seq_open(struct inode *inode, struct file *file) - { - return seq_open(file, &rt_cpu_seq_ops); - } - - static const struct proc_ops rt_cpu_proc_ops = { - .proc_open = rt_cpu_seq_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = seq_release, - }; - #ifdef CONFIG_IP_ROUTE_CLASSID static int rt_acct_proc_show(struct seq_file *m, void *v) { @@@ -367,13 -341,13 +341,13 @@@ static int __net_init ip_rt_do_proc_ini { struct proc_dir_entry *pde;
- pde = proc_create("rt_cache", 0444, net->proc_net, - &rt_cache_proc_ops); + pde = proc_create_seq("rt_cache", 0444, net->proc_net, + &rt_cache_seq_ops); if (!pde) goto err1;
- pde = proc_create("rt_cache", 0444, - net->proc_net_stat, &rt_cpu_proc_ops); + pde = proc_create_seq("rt_cache", 0444, net->proc_net_stat, + &rt_cpu_seq_ops); if (!pde) goto err2;
@@@ -722,6 -696,7 +696,7 @@@ static void update_or_create_fnhe(struc
for_each_possible_cpu(i) { struct rtable __rcu **prt; + prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); rt = rcu_dereference(*prt); if (rt) @@@ -1258,12 -1233,12 +1233,12 @@@ static int ip_rt_bug(struct net *net, s }
/* - We do not cache source address of outgoing interface, - because it is used only by IP RR, TS and SRR options, - so that it out of fast path. - - BTW remember: "addr" is allowed to be not aligned - in IP options! + * We do not cache source address of outgoing interface, + * because it is used only by IP RR, TS and SRR options, + * so that it out of fast path. + * + * BTW remember: "addr" is allowed to be not aligned + * in IP options! */
void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) @@@ -2108,7 -2083,7 +2083,7 @@@ static int ip_route_input_slow(struct s goto out;
/* Check for the most weird martians, which can be not detected - by fib_lookup. + * by fib_lookup. */
tun_info = skb_tunnel_info(skb); @@@ -2246,7 -2221,7 +2221,7 @@@ local_input if (res->type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; - rth->rt_flags &= ~RTCF_LOCAL; + rth->rt_flags &= ~RTCF_LOCAL; }
if (do_cache) { @@@ -2317,15 -2292,15 +2292,15 @@@ int ip_route_input_rcu(struct sk_buff * u8 tos, struct net_device *dev, struct fib_result *res) { /* Multicast recognition logic is moved from route cache to here. - The problem was that too many Ethernet cards have broken/missing - hardware multicast filters :-( As result the host on multicasting - network acquires a lot of useless route cache entries, sort of - SDR messages from all the world. Now we try to get rid of them. - Really, provided software IP multicast filter is organized - reasonably (at least, hashed), it does not result in a slowdown - comparing with route cache reject entries. - Note, that multicast routers are not affected, because - route cache entry is created eventually. + * The problem was that too many Ethernet cards have broken/missing + * hardware multicast filters :-( As result the host on multicasting + * network acquires a lot of useless route cache entries, sort of + * SDR messages from all the world. Now we try to get rid of them. + * Really, provided software IP multicast filter is organized + * reasonably (at least, hashed), it does not result in a slowdown + * comparing with route cache reject entries. + * Note, that multicast routers are not affected, because + * route cache entry is created eventually. */ if (ipv4_is_multicast(daddr)) { struct in_device *in_dev = __in_dev_get_rcu(dev); @@@ -2537,11 -2512,11 +2512,11 @@@ struct rtable *ip_route_output_key_hash rth = ERR_PTR(-ENETUNREACH);
/* I removed check for oif == dev_out->oif here. - It was wrong for two reasons: - 1. ip_dev_find(net, saddr) can return wrong iface, if saddr - is assigned to multiple interfaces. - 2. Moreover, we are allowed to send packets with saddr - of another iface. --ANK + * It was wrong for two reasons: + * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr + * is assigned to multiple interfaces. + * 2. Moreover, we are allowed to send packets with saddr + * of another iface. --ANK */
if (fl4->flowi4_oif == 0 && @@@ -2553,18 -2528,18 +2528,18 @@@ goto out;
/* Special hack: user can direct multicasts - and limited broadcast via necessary interface - without fiddling with IP_MULTICAST_IF or IP_PKTINFO. - This hack is not just for fun, it allows - vic,vat and friends to work. - They bind socket to loopback, set ttl to zero - and expect that it will work. - From the viewpoint of routing cache they are broken, - because we are not allowed to build multicast path - with loopback source addr (look, routing cache - cannot know, that ttl is zero, so that packet - will not leave this host and route is valid). - Luckily, this hack is good workaround. + * and limited broadcast via necessary interface + * without fiddling with IP_MULTICAST_IF or IP_PKTINFO. + * This hack is not just for fun, it allows + * vic,vat and friends to work. + * They bind socket to loopback, set ttl to zero + * and expect that it will work. + * From the viewpoint of routing cache they are broken, + * because we are not allowed to build multicast path + * with loopback source addr (look, routing cache + * cannot know, that ttl is zero, so that packet + * will not leave this host and route is valid). + * Luckily, this hack is good workaround. */
fl4->flowi4_oif = dev_out->ifindex; @@@ -2627,21 -2602,21 +2602,21 @@@ (ipv4_is_multicast(fl4->daddr) || !netif_index_is_l3_master(net, fl4->flowi4_oif))) { /* Apparently, routing tables are wrong. Assume, - that the destination is on link. - - WHY? DW. - Because we are allowed to send to iface - even if it has NO routes and NO assigned - addresses. When oif is specified, routing - tables are looked up with only one purpose: - to catch if destination is gatewayed, rather than - direct. Moreover, if MSG_DONTROUTE is set, - we send packet, ignoring both routing tables - and ifaddr state. --ANK - - - We could make it even if oif is unknown, - likely IPv6, but we do not. + * that the destination is on link. + * + * WHY? DW. + * Because we are allowed to send to iface + * even if it has NO routes and NO assigned + * addresses. When oif is specified, routing + * tables are looked up with only one purpose: + * to catch if destination is gatewayed, rather than + * direct. Moreover, if MSG_DONTROUTE is set, + * we send packet, ignoring both routing tables + * and ifaddr state. --ANK + * + * + * We could make it even if oif is unknown, + * likely IPv6, but we do not. */
if (fl4->saddr == 0) @@@ -2687,15 -2662,44 +2662,15 @@@ out return rth; }
-static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) -{ - return NULL; -} - -static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) -{ - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - - return mtu ? : dst->dev->mtu; -} - -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu, - bool confirm_neigh) -{ -} - -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb) -{ -} - -static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, - unsigned long old) -{ - return NULL; -} - static struct dst_ops ipv4_dst_blackhole_ops = { - .family = AF_INET, - .check = ipv4_blackhole_dst_check, - .mtu = ipv4_blackhole_mtu, - .default_advmss = ipv4_default_advmss, - .update_pmtu = ipv4_rt_blackhole_update_pmtu, - .redirect = ipv4_rt_blackhole_redirect, - .cow_metrics = ipv4_rt_blackhole_cow_metrics, - .neigh_lookup = ipv4_neigh_lookup, + .family = AF_INET, + .default_advmss = ipv4_default_advmss, + .neigh_lookup = ipv4_neigh_lookup, + .check = dst_blackhole_check, + .cow_metrics = dst_blackhole_cow_metrics, + .update_pmtu = dst_blackhole_update_pmtu, + .redirect = dst_blackhole_redirect, + .mtu = dst_blackhole_mtu, };
struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) diff --combined net/ipv6/route.c index 1056b0229ffd,60058f3dcc48..ebb7519bec2a --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@@ -260,16 -260,34 +260,16 @@@ static struct dst_ops ip6_dst_ops_templ .confirm_neigh = ip6_confirm_neigh, };
-static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) -{ - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - - return mtu ? : dst->dev->mtu; -} - -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu, - bool confirm_neigh) -{ -} - -static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb) -{ -} - static struct dst_ops ip6_dst_blackhole_ops = { - .family = AF_INET6, - .destroy = ip6_dst_destroy, - .check = ip6_dst_check, - .mtu = ip6_blackhole_mtu, - .default_advmss = ip6_default_advmss, - .update_pmtu = ip6_rt_blackhole_update_pmtu, - .redirect = ip6_rt_blackhole_redirect, - .cow_metrics = dst_cow_metrics_generic, - .neigh_lookup = ip6_dst_neigh_lookup, + .family = AF_INET6, + .default_advmss = ip6_default_advmss, + .neigh_lookup = ip6_dst_neigh_lookup, + .check = ip6_dst_check, + .destroy = ip6_dst_destroy, + .cow_metrics = dst_cow_metrics_generic, + .update_pmtu = dst_blackhole_update_pmtu, + .redirect = dst_blackhole_redirect, + .mtu = dst_blackhole_mtu, };
static const u32 ip6_template_metrics[RTAX_MAX] = { @@@ -2360,7 -2378,7 +2360,7 @@@ u32 rt6_multipath_hash(const struct ne
memset(&hash_keys, 0, sizeof(hash_keys));
- if (!flkeys) { + if (!flkeys) { skb_flow_dissect_flow_keys(skb, &keys, flag); flkeys = &keys; } @@@ -2500,20 -2518,20 +2500,20 @@@ struct dst_entry *ip6_route_output_flag struct flowi6 *fl6, int flags) { - struct dst_entry *dst; - struct rt6_info *rt6; + struct dst_entry *dst; + struct rt6_info *rt6;
- rcu_read_lock(); - dst = ip6_route_output_flags_noref(net, sk, fl6, flags); - rt6 = (struct rt6_info *)dst; - /* For dst cached in uncached_list, refcnt is already taken. */ - if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { - dst = &net->ipv6.ip6_null_entry->dst; - dst_hold(dst); - } - rcu_read_unlock(); + rcu_read_lock(); + dst = ip6_route_output_flags_noref(net, sk, fl6, flags); + rt6 = (struct rt6_info *)dst; + /* For dst cached in uncached_list, refcnt is already taken. */ + if (list_empty(&rt6->rt6i_uncached) && !dst_hold_safe(dst)) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + } + rcu_read_unlock();
- return dst; + return dst; } EXPORT_SYMBOL_GPL(ip6_route_output_flags);
diff --combined net/mptcp/options.c index 89a4225ed321,5fabf3e9a38d..2b7eec93c9f5 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@@ -26,6 -26,7 +26,7 @@@ static void mptcp_parse_option(const st int expected_opsize; u8 version; u8 flags; + u8 i;
switch (subtype) { case MPTCPOPT_MP_CAPABLE: @@@ -272,14 -273,17 +273,17 @@@ break;
case MPTCPOPT_RM_ADDR: - if (opsize != TCPOLEN_MPTCP_RM_ADDR_BASE) + if (opsize < TCPOLEN_MPTCP_RM_ADDR_BASE + 1 || + opsize > TCPOLEN_MPTCP_RM_ADDR_BASE + MPTCP_RM_IDS_MAX) break;
ptr++;
mp_opt->rm_addr = 1; - mp_opt->rm_id = *ptr++; - pr_debug("RM_ADDR: id=%d", mp_opt->rm_id); + mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE; + for (i = 0; i < mp_opt->rm_list.nr; i++) + mp_opt->rm_list.ids[i] = *ptr++; + pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr); break;
case MPTCPOPT_MP_PRIO: @@@ -567,15 -571,15 +571,15 @@@ static bool mptcp_established_options_d }
static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in_addr *addr) + struct in_addr *addr, u16 port) { u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[7];
msg[0] = addr_id; memcpy(&msg[1], &addr->s_addr, 4); - msg[5] = 0; - msg[6] = 0; + msg[5] = port >> 8; + msg[6] = port & 0xFF;
mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
@@@ -584,15 -588,15 +588,15 @@@
#if IS_ENABLED(CONFIG_MPTCP_IPV6) static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in6_addr *addr) + struct in6_addr *addr, u16 port) { u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19];
msg[0] = addr_id; memcpy(&msg[1], &addr->s6_addr, 16); - msg[17] = 0; - msg[18] = 0; + msg[17] = port >> 8; + msg[18] = port & 0xFF;
mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
@@@ -646,8 -650,7 +650,8 @@@ static bool mptcp_established_options_a opts->ahmac = add_addr_generate_hmac(msk->local_key, msk->remote_key, opts->addr_id, - &opts->addr); + &opts->addr, + opts->port); } } #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@@ -658,8 -661,7 +662,8 @@@ opts->ahmac = add_addr6_generate_hmac(msk->local_key, msk->remote_key, opts->addr_id, - &opts->addr6); + &opts->addr6, + opts->port); } } #endif @@@ -676,20 -678,25 +680,25 @@@ static bool mptcp_established_options_r { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - u8 rm_id; + struct mptcp_rm_list rm_list; + int i, len;
if (!mptcp_pm_should_rm_signal(msk) || - !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_id))) + !(mptcp_pm_rm_addr_signal(msk, remaining, &rm_list))) return false;
- if (remaining < TCPOLEN_MPTCP_RM_ADDR_BASE) + len = mptcp_rm_addr_len(&rm_list); + if (len < 0) + return false; + if (remaining < len) return false;
- *size = TCPOLEN_MPTCP_RM_ADDR_BASE; + *size = len; opts->suboptions |= OPTION_MPTCP_RM_ADDR; - opts->rm_id = rm_id; + opts->rm_list = rm_list;
- pr_debug("rm_id=%d", opts->rm_id); + for (i = 0; i < opts->rm_list.nr; i++) + pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]);
return true; } @@@ -964,14 -971,12 +973,14 @@@ static bool add_addr_hmac_valid(struct if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) hmac = add_addr_generate_hmac(msk->remote_key, msk->local_key, - mp_opt->addr_id, &mp_opt->addr); + mp_opt->addr_id, &mp_opt->addr, + mp_opt->port); #if IS_ENABLED(CONFIG_MPTCP_IPV6) else hmac = add_addr6_generate_hmac(msk->remote_key, msk->local_key, - mp_opt->addr_id, &mp_opt->addr6); + mp_opt->addr_id, &mp_opt->addr6, + mp_opt->port); #endif
pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", @@@ -1042,7 -1047,7 +1051,7 @@@ void mptcp_incoming_options(struct soc }
if (mp_opt.rm_addr) { - mptcp_pm_rm_addr_received(msk, mp_opt.rm_id); + mptcp_pm_rm_addr_received(msk, &mp_opt.rm_list); mp_opt.rm_addr = 0; }
@@@ -1221,9 -1226,23 +1230,23 @@@ mp_capable_done }
if (OPTION_MPTCP_RM_ADDR & opts->suboptions) { + u8 i = 1; + *ptr++ = mptcp_option(MPTCPOPT_RM_ADDR, - TCPOLEN_MPTCP_RM_ADDR_BASE, - 0, opts->rm_id); + TCPOLEN_MPTCP_RM_ADDR_BASE + opts->rm_list.nr, + 0, opts->rm_list.ids[0]); + + while (i < opts->rm_list.nr) { + u8 id1, id2, id3, id4; + + id1 = opts->rm_list.ids[i]; + id2 = i + 1 < opts->rm_list.nr ? opts->rm_list.ids[i + 1] : TCPOPT_NOP; + id3 = i + 2 < opts->rm_list.nr ? opts->rm_list.ids[i + 2] : TCPOPT_NOP; + id4 = i + 3 < opts->rm_list.nr ? opts->rm_list.ids[i + 3] : TCPOPT_NOP; + put_unaligned_be32(id1 << 24 | id2 << 16 | id3 << 8 | id4, ptr); + ptr += 1; + i += 4; + } }
if (OPTION_MPTCP_PRIO & opts->suboptions) { diff --combined net/netfilter/nf_flow_table_core.c index c77ba8690ed8,8ffd3f3c288c..d61bbe469761 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@@ -389,29 -389,20 +389,20 @@@ static void nf_flow_offload_work_gc(str queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ); }
- - static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, - __be16 port, __be16 new_port) + static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) { struct tcphdr *tcph;
- if (skb_try_make_writable(skb, thoff + sizeof(*tcph))) - return -1; - tcph = (void *)(skb_network_header(skb) + thoff); inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false); - - return 0; }
- static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, - __be16 port, __be16 new_port) + static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff, + __be16 port, __be16 new_port) { struct udphdr *udph;
- if (skb_try_make_writable(skb, thoff + sizeof(*udph))) - return -1; - udph = (void *)(skb_network_header(skb) + thoff); if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) { inet_proto_csum_replace2(&udph->check, skb, port, @@@ -419,37 -410,28 +410,28 @@@ if (!udph->check) udph->check = CSUM_MANGLED_0; } - - return 0; }
- static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, - u8 protocol, __be16 port, __be16 new_port) + static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff, + u8 protocol, __be16 port, __be16 new_port) { switch (protocol) { case IPPROTO_TCP: - if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0) - return NF_DROP; + nf_flow_nat_port_tcp(skb, thoff, port, new_port); break; case IPPROTO_UDP: - if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0) - return NF_DROP; + nf_flow_nat_port_udp(skb, thoff, port, new_port); break; } - - return 0; }
- int nf_flow_snat_port(const struct flow_offload *flow, - struct sk_buff *skb, unsigned int thoff, - u8 protocol, enum flow_offload_tuple_dir dir) + void nf_flow_snat_port(const struct flow_offload *flow, + struct sk_buff *skb, unsigned int thoff, + u8 protocol, enum flow_offload_tuple_dir dir) { struct flow_ports *hdr; __be16 port, new_port;
- if (skb_try_make_writable(skb, thoff + sizeof(*hdr))) - return -1; - hdr = (void *)(skb_network_header(skb) + thoff);
switch (dir) { @@@ -463,24 -445,19 +445,19 @@@ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port; hdr->dest = new_port; break; - default: - return -1; }
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port); + nf_flow_nat_port(skb, thoff, protocol, port, new_port); } EXPORT_SYMBOL_GPL(nf_flow_snat_port);
- int nf_flow_dnat_port(const struct flow_offload *flow, - struct sk_buff *skb, unsigned int thoff, - u8 protocol, enum flow_offload_tuple_dir dir) + void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb, + unsigned int thoff, u8 protocol, + enum flow_offload_tuple_dir dir) { struct flow_ports *hdr; __be16 port, new_port;
- if (skb_try_make_writable(skb, thoff + sizeof(*hdr))) - return -1; - hdr = (void *)(skb_network_header(skb) + thoff);
switch (dir) { @@@ -494,11 -471,9 +471,9 @@@ new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port; hdr->source = new_port; break; - default: - return -1; }
- return nf_flow_nat_port(skb, thoff, protocol, port, new_port); + nf_flow_nat_port(skb, thoff, protocol, port, new_port); } EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
@@@ -506,7 -481,7 +481,7 @@@ int nf_flow_table_init(struct nf_flowta { int err;
- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); + INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); flow_block_init(&flowtable->flow_block); init_rwsem(&flowtable->flow_block_lock);
diff --combined net/netfilter/nf_tables_api.c index f57f1a6ba96f,bd5e8122ea5e..fc2526b8bd55 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@@ -900,6 -900,12 +900,12 @@@ static void nf_tables_table_disable(str nft_table_disable(net, table, 0); }
+ enum { + NFT_TABLE_STATE_UNCHANGED = 0, + NFT_TABLE_STATE_DORMANT, + NFT_TABLE_STATE_WAKEUP + }; + static int nf_tables_updtable(struct nft_ctx *ctx) { struct nft_trans *trans; @@@ -929,19 -935,17 +935,17 @@@
if ((flags & NFT_TABLE_F_DORMANT) && !(ctx->table->flags & NFT_TABLE_F_DORMANT)) { - nft_trans_table_enable(trans) = false; + nft_trans_table_state(trans) = NFT_TABLE_STATE_DORMANT; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ret = nf_tables_table_enable(ctx->net, ctx->table); if (ret >= 0) - nft_trans_table_enable(trans) = true; - else - ctx->table->flags |= NFT_TABLE_F_DORMANT; + nft_trans_table_state(trans) = NFT_TABLE_STATE_WAKEUP; } if (ret < 0) goto err;
+ nft_trans_table_flags(trans) = flags; nft_trans_table_update(trans) = true; list_add_tail(&trans->list, &ctx->net->nft.commit_list); return 0; @@@ -6783,9 -6787,6 +6787,9 @@@ static int nft_register_flowtable_net_h
list_for_each_entry(hook, hook_list, list) { list_for_each_entry(ft, &table->flowtables, list) { + if (!nft_is_active_next(net, ft)) + continue; + list_for_each_entry(hook2, &ft->hook_list, list) { if (hook->ops.dev == hook2->ops.dev && hook->ops.pf == hook2->ops.pf) { @@@ -6845,7 -6846,6 +6849,7 @@@ static int nft_flowtable_update(struct struct nft_hook *hook, *next; struct nft_trans *trans; bool unregister = false; + u32 flags; int err;
err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK], @@@ -6860,17 -6860,6 +6864,17 @@@ } }
+ if (nla[NFTA_FLOWTABLE_FLAGS]) { + flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); + if (flags & ~NFT_FLOWTABLE_MASK) + return -EOPNOTSUPP; + if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^ + (flags & NFT_FLOWTABLE_HW_OFFLOAD)) + return -EOPNOTSUPP; + } else { + flags = flowtable->data.flags; + } + err = nft_register_flowtable_net_hooks(ctx->net, ctx->table, &flowtable_hook.list, flowtable); if (err < 0) @@@ -6884,7 -6873,6 +6888,7 @@@ goto err_flowtable_update_hook; }
+ nft_trans_flowtable_flags(trans) = flags; nft_trans_flowtable(trans) = flowtable; nft_trans_flowtable_update(trans) = true; INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); @@@ -6979,10 -6967,8 +6983,10 @@@ static int nf_tables_newflowtable(struc if (nla[NFTA_FLOWTABLE_FLAGS]) { flowtable->data.flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); - if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) + if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) { + err = -EOPNOTSUPP; goto err3; + } }
write_pnet(&flowtable->data.net, net); @@@ -8086,11 -8072,10 +8090,10 @@@ static int nf_tables_commit(struct net switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } + if (nft_trans_table_state(trans) == NFT_TABLE_STATE_DORMANT) + nf_tables_table_disable(net, trans->ctx.table); + + trans->ctx.table->flags = nft_trans_table_flags(trans); } else { nft_clear(net, trans->ctx.table); } @@@ -8194,8 -8179,6 +8197,8 @@@ break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { + nft_trans_flowtable(trans)->data.flags = + nft_trans_flowtable_flags(trans); nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), @@@ -8303,11 -8286,9 +8306,9 @@@ static int __nf_tables_abort(struct ne switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (nft_trans_table_enable(trans)) { - nf_tables_table_disable(net, - trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } + if (nft_trans_table_state(trans) == NFT_TABLE_STATE_WAKEUP) + nf_tables_table_disable(net, trans->ctx.table); + nft_trans_destroy(trans); } else { list_del_rcu(&trans->ctx.table->list); @@@ -8577,6 -8558,7 +8578,7 @@@ static int nf_tables_check_loops(const data->verdict.chain); if (err < 0) return err; + break; default: break; } diff --combined net/sched/cls_api.c index 13341e7fb077,ca8e177bf31b..d3db70865d66 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@@ -1629,7 -1629,6 +1629,7 @@@ int tcf_classify_ingress(struct sk_buf return TC_ACT_SHOT; ext->chain = last_executed_chain; ext->mru = qdisc_skb_cb(skb)->mru; + ext->post_ct = qdisc_skb_cb(skb)->post_ct; }
return ret; @@@ -3662,6 -3661,9 +3662,9 @@@ int tc_setup_flow_action(struct flow_ac entry->police.burst = tcf_police_burst(act); entry->police.rate_bytes_ps = tcf_police_rate_bytes_ps(act); + entry->police.burst_pkt = tcf_police_burst_pkt(act); + entry->police.rate_pkt_ps = + tcf_police_rate_pkt_ps(act); entry->police.mtu = tcf_police_tcfp_mtu(act); entry->police.index = act->tcfa_index; } else if (is_tcf_ct(act)) { diff --combined net/sched/cls_flower.c index c69a4ba9c33f,9736df97e04d..d7869a984881 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@@ -209,16 -209,16 +209,16 @@@ static bool fl_range_port_dst_cmp(struc struct fl_flow_key *key, struct fl_flow_key *mkey) { - __be16 min_mask, max_mask, min_val, max_val; + u16 min_mask, max_mask, min_val, max_val;
- min_mask = htons(filter->mask->key.tp_range.tp_min.dst); - max_mask = htons(filter->mask->key.tp_range.tp_max.dst); - min_val = htons(filter->key.tp_range.tp_min.dst); - max_val = htons(filter->key.tp_range.tp_max.dst); + min_mask = ntohs(filter->mask->key.tp_range.tp_min.dst); + max_mask = ntohs(filter->mask->key.tp_range.tp_max.dst); + min_val = ntohs(filter->key.tp_range.tp_min.dst); + max_val = ntohs(filter->key.tp_range.tp_max.dst);
if (min_mask && max_mask) { - if (htons(key->tp_range.tp.dst) < min_val || - htons(key->tp_range.tp.dst) > max_val) + if (ntohs(key->tp_range.tp.dst) < min_val || + ntohs(key->tp_range.tp.dst) > max_val) return false;
/* skb does not have min and max values */ @@@ -232,16 -232,16 +232,16 @@@ static bool fl_range_port_src_cmp(struc struct fl_flow_key *key, struct fl_flow_key *mkey) { - __be16 min_mask, max_mask, min_val, max_val; + u16 min_mask, max_mask, min_val, max_val;
- min_mask = htons(filter->mask->key.tp_range.tp_min.src); - max_mask = htons(filter->mask->key.tp_range.tp_max.src); - min_val = htons(filter->key.tp_range.tp_min.src); - max_val = htons(filter->key.tp_range.tp_max.src); + min_mask = ntohs(filter->mask->key.tp_range.tp_min.src); + max_mask = ntohs(filter->mask->key.tp_range.tp_max.src); + min_val = ntohs(filter->key.tp_range.tp_min.src); + max_val = ntohs(filter->key.tp_range.tp_max.src);
if (min_mask && max_mask) { - if (htons(key->tp_range.tp.src) < min_val || - htons(key->tp_range.tp.src) > max_val) + if (ntohs(key->tp_range.tp.src) < min_val || + ntohs(key->tp_range.tp.src) > max_val) return false;
/* skb does not have min and max values */ @@@ -783,16 -783,16 +783,16 @@@ static int fl_set_key_port_range(struc TCA_FLOWER_UNSPEC, sizeof(key->tp_range.tp_max.src));
if (mask->tp_range.tp_min.dst && mask->tp_range.tp_max.dst && - htons(key->tp_range.tp_max.dst) <= - htons(key->tp_range.tp_min.dst)) { + ntohs(key->tp_range.tp_max.dst) <= + ntohs(key->tp_range.tp_min.dst)) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_FLOWER_KEY_PORT_DST_MIN], "Invalid destination port range (min must be strictly smaller than max)"); return -EINVAL; } if (mask->tp_range.tp_min.src && mask->tp_range.tp_max.src && - htons(key->tp_range.tp_max.src) <= - htons(key->tp_range.tp_min.src)) { + ntohs(key->tp_range.tp_max.src) <= + ntohs(key->tp_range.tp_min.src)) { NL_SET_ERR_MSG_ATTR(extack, tb[TCA_FLOWER_KEY_PORT_SRC_MIN], "Invalid source port range (min must be strictly smaller than max)"); @@@ -1044,8 -1044,8 +1044,8 @@@ static int fl_set_key_flags(struct nlat return -EINVAL; }
- key = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS])); - mask = be32_to_cpu(nla_get_u32(tb[TCA_FLOWER_KEY_FLAGS_MASK])); + key = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS])); + mask = be32_to_cpu(nla_get_be32(tb[TCA_FLOWER_KEY_FLAGS_MASK]));
*flags_key = 0; *flags_mask = 0; @@@ -1451,7 -1451,7 +1451,7 @@@ static int fl_set_key_ct(struct nlattr &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK, sizeof(key->ct_state));
- err = fl_validate_ct_state(mask->ct_state, + err = fl_validate_ct_state(key->ct_state & mask->ct_state, tb[TCA_FLOWER_KEY_CT_STATE_MASK], extack); if (err) diff --combined net/tipc/node.c index 136338b85504,0daf3be11ed1..61c38eaaa298 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@@ -372,42 -372,49 +372,49 @@@ static struct tipc_node *tipc_node_find }
static void tipc_node_read_lock(struct tipc_node *n) + __acquires(n->lock) { read_lock_bh(&n->lock); }
static void tipc_node_read_unlock(struct tipc_node *n) + __releases(n->lock) { read_unlock_bh(&n->lock); }
static void tipc_node_write_lock(struct tipc_node *n) + __acquires(n->lock) { write_lock_bh(&n->lock); }
static void tipc_node_write_unlock_fast(struct tipc_node *n) + __releases(n->lock) { write_unlock_bh(&n->lock); }
static void tipc_node_write_unlock(struct tipc_node *n) + __releases(n->lock) { + struct tipc_socket_addr sk; struct net *net = n->net; - u32 addr = 0; u32 flags = n->action_flags; - u32 link_id = 0; - u32 bearer_id; struct list_head *publ_list; + struct tipc_uaddr ua; + u32 bearer_id;
if (likely(!flags)) { write_unlock_bh(&n->lock); return; }
- addr = n->addr; - link_id = n->link_id; - bearer_id = link_id & 0xffff; + tipc_uaddr(&ua, TIPC_SERVICE_RANGE, TIPC_NODE_SCOPE, + TIPC_LINK_STATE, n->addr, n->addr); + sk.ref = n->link_id; + sk.node = n->addr; + bearer_id = n->link_id & 0xffff; publ_list = &n->publ_list;
n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | @@@ -416,20 -423,18 +423,18 @@@ write_unlock_bh(&n->lock);
if (flags & TIPC_NOTIFY_NODE_DOWN) - tipc_publ_notify(net, publ_list, addr, n->capabilities); + tipc_publ_notify(net, publ_list, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_NODE_UP) - tipc_named_node_up(net, addr, n->capabilities); + tipc_named_node_up(net, n->addr, n->capabilities);
if (flags & TIPC_NOTIFY_LINK_UP) { - tipc_mon_peer_up(net, addr, bearer_id); - tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, - TIPC_NODE_SCOPE, link_id, link_id); + tipc_mon_peer_up(net, n->addr, bearer_id); + tipc_nametbl_publish(net, &ua, &sk, n->link_id); } if (flags & TIPC_NOTIFY_LINK_DOWN) { - tipc_mon_peer_down(net, addr, bearer_id); - tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, - addr, link_id); + tipc_mon_peer_down(net, n->addr, bearer_id); + tipc_nametbl_withdraw(net, &ua, &sk, n->link_id); } }
@@@ -2895,22 -2900,17 +2900,22 @@@ int tipc_nl_node_dump_monitor_peer(stru
#ifdef CONFIG_TIPC_CRYPTO static int tipc_nl_retrieve_key(struct nlattr **attrs, - struct tipc_aead_key **key) + struct tipc_aead_key **pkey) { struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY]; + struct tipc_aead_key *key;
if (!attr) return -ENODATA;
- *key = (struct tipc_aead_key *)nla_data(attr); - if (nla_len(attr) < tipc_aead_key_size(*key)) + if (nla_len(attr) < sizeof(*key)) + return -EINVAL; + key = (struct tipc_aead_key *)nla_data(attr); + if (key->keylen > TIPC_AEAD_KEYLEN_MAX || + nla_len(attr) < tipc_aead_key_size(key)) return -EINVAL;
+ *pkey = key; return 0; }
diff --combined tools/lib/bpf/Makefile index e9eb6a6e80d2,8170f88e8ea6..87b14b74d62f --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@@ -158,7 -158,7 +158,7 @@@ $(BPF_IN_STATIC): force $(BPF_HELPER_DE $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h - $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \ + $(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \ --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) @@@ -215,7 -215,7 +215,7 @@@ define do_instal if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' endef
install_lib: all_cmd diff --combined tools/lib/bpf/btf_dump.c index 0911aea4cdbe,5e957fcceee6..7b53a484860f --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@@ -279,6 -279,7 +279,7 @@@ static int btf_dump_mark_referenced(str case BTF_KIND_INT: case BTF_KIND_ENUM: case BTF_KIND_FWD: + case BTF_KIND_FLOAT: break;
case BTF_KIND_VOLATILE: @@@ -453,6 -454,7 +454,7 @@@ static int btf_dump_order_type(struct b
switch (btf_kind(t)) { case BTF_KIND_INT: + case BTF_KIND_FLOAT: tstate->order_state = ORDERED; return 0;
@@@ -462,7 -464,7 +464,7 @@@ return err;
case BTF_KIND_ARRAY: - return btf_dump_order_type(d, btf_array(t)->type, through_ptr); + return btf_dump_order_type(d, btf_array(t)->type, false);
case BTF_KIND_STRUCT: case BTF_KIND_UNION: { @@@ -1133,6 -1135,7 +1135,7 @@@ skip_mod case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_TYPEDEF: + case BTF_KIND_FLOAT: goto done; default: pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", @@@ -1247,6 -1250,7 +1250,7 @@@ static void btf_dump_emit_type_chain(st
switch (kind) { case BTF_KIND_INT: + case BTF_KIND_FLOAT: btf_dump_emit_mods(d, decls); name = btf_name_of(d, t->name_off); btf_dump_printf(d, "%s", name); diff --combined tools/lib/bpf/libbpf.c index 4181d178ee7b,2f351d3ad3e7..8d610259f4be --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@@ -178,6 -178,8 +178,8 @@@ enum kern_feature_id FEAT_PROG_BIND_MAP, /* Kernel support for module BTFs */ FEAT_MODULE_BTF, + /* BTF_KIND_FLOAT support */ + FEAT_BTF_FLOAT, __FEAT_CNT, };
@@@ -188,6 -190,7 +190,7 @@@ enum reloc_type RELO_CALL, RELO_DATA, RELO_EXTERN, + RELO_SUBPROG_ADDR, };
struct reloc_desc { @@@ -574,6 -577,16 +577,16 @@@ static bool insn_is_subprog_call(const insn->off == 0; }
+ static bool is_ldimm64(struct bpf_insn *insn) + { + return insn->code == (BPF_LD | BPF_IMM | BPF_DW); + } + + static bool insn_is_pseudo_func(struct bpf_insn *insn) + { + return is_ldimm64(insn) && insn->src_reg == BPF_PSEUDO_FUNC; + } + static int bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog, const char *name, size_t sec_idx, const char *sec_name, @@@ -1181,8 -1194,7 +1194,8 @@@ static int bpf_object__elf_init(struct if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); - return -LIBBPF_ERRNO__FORMAT; + err = -LIBBPF_ERRNO__FORMAT; + goto errout; }
/* Old LLVM set e_machine to EM_NONE */ @@@ -1936,6 -1948,7 +1949,7 @@@ static const char *btf_kind_str(const s case BTF_KIND_FUNC_PROTO: return "func_proto"; case BTF_KIND_VAR: return "var"; case BTF_KIND_DATASEC: return "datasec"; + case BTF_KIND_FLOAT: return "float"; default: return "unknown"; } } @@@ -2385,15 -2398,17 +2399,17 @@@ static bool btf_needs_sanitization(stru { bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); + bool has_float = kernel_supports(FEAT_BTF_FLOAT); bool has_func = kernel_supports(FEAT_BTF_FUNC);
- return !has_func || !has_datasec || !has_func_global; + return !has_func || !has_datasec || !has_func_global || !has_float; }
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) { bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC); bool has_datasec = kernel_supports(FEAT_BTF_DATASEC); + bool has_float = kernel_supports(FEAT_BTF_FLOAT); bool has_func = kernel_supports(FEAT_BTF_FUNC); struct btf_type *t; int i, j, vlen; @@@ -2446,6 -2461,13 +2462,13 @@@ } else if (!has_func_global && btf_is_func(t)) { /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */ t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0); + } else if (!has_float && btf_is_float(t)) { + /* replace FLOAT with an equally-sized empty STRUCT; + * since C compilers do not accept e.g. "float" as a + * valid struct name, make it anonymous + */ + t->name_off = 0; + t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); } } } @@@ -2975,6 -2997,23 +2998,23 @@@ static bool sym_is_extern(const GElf_Sy GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; }
+ static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx) + { + int bind = GELF_ST_BIND(sym->st_info); + int type = GELF_ST_TYPE(sym->st_info); + + /* in .text section */ + if (sym->st_shndx != text_shndx) + return false; + + /* local function */ + if (bind == STB_LOCAL && type == STT_SECTION) + return true; + + /* global function */ + return bind == STB_GLOBAL && type == STT_FUNC; + } + static int find_extern_btf_id(const struct btf *btf, const char *ext_name) { const struct btf_type *t; @@@ -3396,7 -3435,7 +3436,7 @@@ static int bpf_program__record_reloc(st return 0; }
- if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { + if (!is_ldimm64(insn)) { pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n", prog->name, sym_name, insn_idx, insn->code); return -LIBBPF_ERRNO__RELOC; @@@ -3431,6 -3470,23 +3471,23 @@@ return -LIBBPF_ERRNO__RELOC; }
+ /* loading subprog addresses */ + if (sym_is_subprog(sym, obj->efile.text_shndx)) { + /* global_func: sym->st_value = offset in the section, insn->imm = 0. + * local_func: sym->st_value = 0, insn->imm = offset in the section. + */ + if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) { + pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n", + prog->name, sym_name, (size_t)sym->st_value, insn->imm); + return -LIBBPF_ERRNO__RELOC; + } + + reloc_desc->type = RELO_SUBPROG_ADDR; + reloc_desc->insn_idx = insn_idx; + reloc_desc->sym_off = sym->st_value; + return 0; + } + type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
@@@ -3883,6 -3939,18 +3940,18 @@@ static int probe_kern_btf_datasec(void strs, sizeof(strs))); }
+ static int probe_kern_btf_float(void) + { + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs))); + } + static int probe_kern_array_mmap(void) { struct bpf_create_map_attr attr = { @@@ -4062,6 -4130,9 +4131,9 @@@ static struct kern_feature_desc [FEAT_MODULE_BTF] = { "module BTF support", probe_module_btf, }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, };
static bool kernel_supports(enum kern_feature_id feat_id) @@@ -5567,11 -5638,6 +5639,6 @@@ static void bpf_core_poison_insn(struc insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */ }
- static bool is_ldimm64(struct bpf_insn *insn) - { - return insn->code == (BPF_LD | BPF_IMM | BPF_DW); - } - static int insn_bpf_size_to_bytes(struct bpf_insn *insn) { switch (BPF_SIZE(insn->code)) { @@@ -6173,6 -6239,10 +6240,10 @@@ bpf_object__relocate_data(struct bpf_ob } relo->processed = true; break; + case RELO_SUBPROG_ADDR: + insn[0].src_reg = BPF_PSEUDO_FUNC; + /* will be handled as a follow up pass */ + break; case RELO_CALL: /* will be handled as a follow up pass */ break; @@@ -6359,11 -6429,11 +6430,11 @@@ bpf_object__reloc_code(struct bpf_objec
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) { insn = &main_prog->insns[prog->sub_insn_off + insn_idx]; - if (!insn_is_subprog_call(insn)) + if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn)) continue;
relo = find_prog_insn_relo(prog, insn_idx); - if (relo && relo->type != RELO_CALL) { + if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) { pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n", prog->name, insn_idx, relo->type); return -LIBBPF_ERRNO__RELOC; @@@ -6375,8 -6445,22 +6446,22 @@@ * call always has imm = -1, but for static functions * relocation is against STT_SECTION and insn->imm * points to a start of a static function + * + * for subprog addr relocation, the relo->sym_off + insn->imm is + * the byte offset in the corresponding section. */ - sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; + if (relo->type == RELO_CALL) + sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1; + else + sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ; + } else if (insn_is_pseudo_func(insn)) { + /* + * RELO_SUBPROG_ADDR relo is always emitted even if both + * functions are in the same section, so it shouldn't reach here. + */ + pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n", + prog->name, insn_idx); + return -LIBBPF_ERRNO__RELOC; } else { /* if subprogram call is to a static function within * the same ELF section, there won't be any relocation diff --combined tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c index 3ac0c9afc35a,12b40dc81e14..8aaa24a00322 --- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c +++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c @@@ -174,12 -174,6 +174,12 @@@ struct struct_in_struct }; };
+struct struct_in_array {}; + +struct struct_in_array_typed {}; + +typedef struct struct_in_array_typed struct_in_array_t[2]; + struct struct_with_embedded_stuff { int a; struct { @@@ -209,10 -203,14 +209,16 @@@ } r[5]; struct struct_in_struct s[10]; int t[11]; + struct struct_in_array (*u)[2]; + struct_in_array_t *v; };
+ struct float_struct { + float f; + const double *d; + volatile long double *ld; + }; + struct root_struct { enum e1 _1; enum e2 _2; @@@ -227,6 -225,7 +233,7 @@@ union_fwd_t *_12; union_fwd_ptr_t _13; struct struct_with_embedded_stuff _14; + struct float_struct _15; };
/* ------ END-EXPECTED-OUTPUT ------ */ diff --combined tools/testing/selftests/net/mptcp/mptcp_join.sh index ad32240fbfda,191303b652a6..fe990d8696a9 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@@ -11,7 -11,6 +11,7 @@@ ksft_skip= timeout=30 mptcp_connect="" capture=0 +do_all_tests=1
TEST_COUNT=0
@@@ -122,6 -121,12 +122,6 @@@ reset_with_add_addr_timeout( -j DROP }
-for arg in "$@"; do - if [ "$arg" = "-c" ]; then - capture=1 - fi -done - ip -Version > /dev/null 2>&1 if [ $? -ne 0 ];then echo "SKIP: Could not run test without ip tool" @@@ -279,14 -284,19 +279,19 @@@ do_transfer( let rm_nr_ns1=-addr_nr_ns1 if [ $rm_nr_ns1 -lt 8 ]; then counter=1 - sleep 1 - - while [ $counter -le $rm_nr_ns1 ] - do - ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`) + if [ ${#dump[@]} -gt 0 ]; then + id=${dump[1]} sleep 1 - let counter+=1 - done + + while [ $counter -le $rm_nr_ns1 ] + do + ip netns exec ${listener_ns} ./pm_nl_ctl del $id + sleep 1 + let counter+=1 + let id+=1 + done + fi else sleep 1 ip netns exec ${listener_ns} ./pm_nl_ctl flush @@@ -313,14 -323,19 +318,19 @@@ let rm_nr_ns2=-addr_nr_ns2 if [ $rm_nr_ns2 -lt 8 ]; then counter=1 - sleep 1 - - while [ $counter -le $rm_nr_ns2 ] - do - ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`) + if [ ${#dump[@]} -gt 0 ]; then + id=${dump[1]} sleep 1 - let counter+=1 - done + + while [ $counter -le $rm_nr_ns2 ] + do + ip netns exec ${connector_ns} ./pm_nl_ctl del $id + sleep 1 + let counter+=1 + let id+=1 + done + fi else sleep 1 ip netns exec ${connector_ns} ./pm_nl_ctl flush @@@ -605,11 -620,22 +615,22 @@@ chk_rm_nr( { local rm_addr_nr=$1 local rm_subflow_nr=$2 + local invert=${3:-""} local count local dump_stats + local addr_ns + local subflow_ns + + if [ -z $invert ]; then + addr_ns=$ns1 + subflow_ns=$ns2 + elif [ $invert = "invert" ]; then + addr_ns=$ns2 + subflow_ns=$ns1 + fi
printf "%-39s %s" " " "rm " - count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` + count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$rm_addr_nr" ]; then echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" @@@ -620,7 -646,7 +641,7 @@@ fi
echo -n " - sf " - count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` + count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` [ -z "$count" ] && count=0 if [ "$count" != "$rm_subflow_nr" ]; then echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" @@@ -828,7 -854,7 +849,7 @@@ remove_tests( run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert
# subflow and signal, remove reset @@@ -864,6 -890,29 +885,29 @@@ chk_join_nr "flush subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 + + # subflows flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150 + ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow + ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush subflows" 3 3 3 + chk_rm_nr 3 3 + + # addresses flush + reset + ip netns exec $ns1 ./pm_nl_ctl limits 3 3 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250 + ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal + ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal + ip netns exec $ns2 ./pm_nl_ctl limits 3 3 + run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + chk_join_nr "flush addresses" 3 3 3 + chk_add_nr 3 3 + chk_rm_nr 3 3 invert }
add_tests() @@@ -940,7 -989,7 +984,7 @@@ ipv6_tests( run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow chk_join_nr "remove single address IPv6" 1 1 1 chk_add_nr 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert
# subflow and signal IPv6, remove reset @@@ -1083,7 -1132,7 +1127,7 @@@ add_addr_ports_tests( run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow chk_join_nr "remove single address with port" 1 1 1 chk_add_nr 1 1 1 - chk_rm_nr 0 0 + chk_rm_nr 1 1 invert
# subflow and signal with port, remove reset @@@ -1216,8 -1265,7 +1260,8 @@@ usage( echo " -4 v4mapped_tests" echo " -b backup_tests" echo " -p add_addr_ports_tests" - echo " -c syncookies_tests" + echo " -k syncookies_tests" + echo " -c capture pcap files" echo " -h help" }
@@@ -1231,24 -1279,12 +1275,24 @@@ make_file "$cin" "client" make_file "$sin" "server" 1 trap cleanup EXIT
-if [ -z $1 ]; then +for arg in "$@"; do + # check for "capture" arg before launching tests + if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then + capture=1 + fi + + # exception for the capture option, the rest means: a part of the tests + if [ "${arg}" != "-c" ]; then + do_all_tests=0 + fi +done + +if [ $do_all_tests -eq 1 ]; then all_tests exit $ret fi
-while getopts 'fsltra64bpch' opt; do +while getopts 'fsltra64bpkch' opt; do case $opt in f) subflows_tests @@@ -1280,11 -1316,9 +1324,11 @@@ p) add_addr_ports_tests ;; - c) + k) syncookies_tests ;; + c) + ;; h | *) usage ;;
linux-merge@lists.open-mesh.org