The following commit has been merged in the master branch: commit 6de6e0da658e673691d2f9ec9cc0c2087c550e4a Merge: 268dcf532ac3279ad194a504e12db4b57abdc8e0 20e79a0a2cfd15b6cfb18119f2e108396be56716 Author: Stephen Rothwell sfr@canb.auug.org.au Date: Mon Aug 19 12:30:06 2019 +1000
Merge remote-tracking branch 'net-next/master'
# Conflicts: # drivers/net/ethernet/mellanox/mlx5/core/en_tc.c # scripts/link-vmlinux.sh
diff --combined Documentation/networking/index.rst index 6739066acadb,37eabc17894c..425a3ca01db4 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@@ -14,6 -14,8 +14,8 @@@ Contents device_drivers/index dsa/index devlink-info-versions + devlink-trap + devlink-trap-netdevsim ieee802154 kapi z8530book @@@ -31,7 -33,7 +33,7 @@@ tls tls-offload
-.. only:: subproject +.. only:: subproject and html
Indices ======= diff --combined MAINTAINERS index a6551a7fb72e,96d3e60697f5..18faf2bb0a84 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -183,7 -183,7 +183,7 @@@ M: Realtek linux nic maintainers <nic_s M: Heiner Kallweit hkallweit1@gmail.com L: netdev@vger.kernel.org S: Maintained -F: drivers/net/ethernet/realtek/r8169.c +F: drivers/net/ethernet/realtek/r8169*
8250/16?50 (AND CLONE UARTS) SERIAL DRIVER M: Greg Kroah-Hartman gregkh@linuxfoundation.org @@@ -517,6 -517,14 +517,6 @@@ W: http://ez.analog.com/community/linux S: Supported F: drivers/video/backlight/adp8860_bl.c
-ADS1015 HARDWARE MONITOR DRIVER -M: Dirk Eibach eibach@gdsys.de -L: linux-hwmon@vger.kernel.org -S: Maintained -F: Documentation/hwmon/ads1015.rst -F: drivers/hwmon/ads1015.c -F: include/linux/platform_data/ads1015.h - ADT746X FAN DRIVER M: Colin Leroy colin@colino.net S: Maintained @@@ -658,7 -666,7 +658,7 @@@ ALI1563 I2C DRIVE M: Rudolf Marek r.marek@assembler.cz L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-ali1563 +F: Documentation/i2c/busses/i2c-ali1563.rst F: drivers/i2c/busses/i2c-ali1563.c
ALLEGRO DVT VIDEO IP CORE DRIVER @@@ -668,13 -676,6 +668,13 @@@ L: linux-media@vger.kernel.or S: Maintained F: drivers/staging/media/allegro-dvt/
+ALLWINNER CPUFREQ DRIVER +M: Yangtao Li tiny.windzz@gmail.com +L: linux-pm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt +F: drivers/cpufreq/sun50i-cpufreq-nvmem.c + ALLWINNER SECURITY SYSTEM M: Corentin Labbe clabbe.montjoie@gmail.com L: linux-crypto@vger.kernel.org @@@ -682,7 -683,7 +682,7 @@@ S: Maintaine F: drivers/crypto/sunxi-ss/
ALLWINNER VPU DRIVER -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org M: Paul Kocialkowski paul.kocialkowski@bootlin.com L: linux-media@vger.kernel.org S: Maintained @@@ -937,6 -938,14 +937,14 @@@ S: Supporte F: drivers/mux/adgs1408.c F: Documentation/devicetree/bindings/mux/adi,adgs1408.txt
+ ANALOG DEVICES INC ADIN DRIVER + M: Alexandru Ardelean alexaundru.ardelean@analog.com + L: netdev@vger.kernel.org + W: http://ez.analog.com/community/linux-device-drivers + S: Supported + F: drivers/net/phy/adin.c + F: Documentation/devicetree/bindings/net/adi,adin.yaml + ANALOG DEVICES INC ADIS DRIVER LIBRARY M: Alexandru Ardelean alexandru.ardelean@analog.com S: Supported @@@ -1407,7 -1416,7 +1415,7 @@@ S: Maintaine F: drivers/clk/sunxi/
ARM/Allwinner sunXi SoC support -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org M: Chen-Yu Tsai wens@csie.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@@ -1581,8 -1590,8 +1589,8 @@@ R: Suzuki K Poulose <suzuki.poulose@arm L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/hwtracing/coresight/* -F: Documentation/trace/coresight.txt -F: Documentation/trace/coresight-cpu-debug.txt +F: Documentation/trace/coresight.rst +F: Documentation/trace/coresight-cpu-debug.rst F: Documentation/devicetree/bindings/arm/coresight.txt F: Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt F: Documentation/ABI/testing/sysfs-bus-coresight-devices-* @@@ -1625,18 -1634,6 +1633,18 @@@ F: drivers/clocksource/timer-atlas7. N: [^a-z]sirf X: drivers/gnss
+ARM/CZ.NIC TURRIS MOX SUPPORT +M: Marek Behun marek.behun@nic.cz +W: http://mox.turris.cz +S: Maintained +F: Documentation/ABI/testing/debugfs-moxtet +F: Documentation/ABI/testing/sysfs-bus-moxtet-devices +F: Documentation/devicetree/bindings/bus/moxtet.txt +F: Documentation/devicetree/bindings/gpio/gpio-moxtet.txt +F: include/linux/moxtet.h +F: drivers/bus/moxtet.c +F: drivers/gpio/gpio-moxtet.c + ARM/EBSA110 MACHINE SUPPORT M: Russell King linux@armlinux.org.uk L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -1760,11 -1757,20 +1768,11 @@@ L: linux-arm-kernel@lists.infradead.or S: Maintained F: arch/arm/mach-pxa/colibri-pxa270-income.c
-ARM/INTEL IOP13XX ARM ARCHITECTURE -M: Lennert Buytenhek kernel@wantstofly.org -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained - ARM/INTEL IOP32X ARM ARCHITECTURE M: Lennert Buytenhek kernel@wantstofly.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained
-ARM/INTEL IOP33X ARM ARCHITECTURE -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Orphan - ARM/INTEL IQ81342EX MACHINE SUPPORT M: Lennert Buytenhek kernel@wantstofly.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -1923,6 -1929,12 +1931,6 @@@ S: Maintaine F: drivers/phy/mediatek/ F: Documentation/devicetree/bindings/phy/phy-mtk-*
-ARM/MICREL KS8695 ARCHITECTURE -M: Greg Ungerer gerg@uclinux.org -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -F: arch/arm/mach-ks8695/ -S: Odd Fixes - ARM/Microchip (AT91) SoC support M: Nicolas Ferre nicolas.ferre@microchip.com M: Alexandre Belloni alexandre.belloni@bootlin.com @@@ -1964,7 -1976,6 +1972,7 @@@ F: Documentation/devicetree/bindings/i2 F: arch/arm/mach-nomadik/ F: arch/arm/mach-u300/ F: arch/arm/mach-ux500/ +F: drivers/soc/ux500/ F: arch/arm/boot/dts/ste-* F: drivers/clk/clk-nomadik.c F: drivers/clk/clk-u300.c @@@ -2008,6 -2019,22 +2016,6 @@@ F: drivers/*/*npcm F: Documentation/devicetree/bindings/*/*npcm* F: Documentation/devicetree/bindings/*/*/*npcm*
-ARM/NUVOTON W90X900 ARM ARCHITECTURE -M: Wan ZongShun mcuos.com@gmail.com -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.mcuos.com -S: Maintained -F: arch/arm/mach-w90x900/ -F: drivers/input/keyboard/w90p910_keypad.c -F: drivers/input/touchscreen/w90p910_ts.c -F: drivers/watchdog/nuc900_wdt.c -F: drivers/net/ethernet/nuvoton/w90p910_ether.c -F: drivers/mtd/nand/raw/nuc900_nand.c -F: drivers/rtc/rtc-nuc900.c -F: drivers/spi/spi-nuc900.c -F: drivers/usb/host/ehci-w90x900.c -F: drivers/video/fbdev/nuc900fb.c - ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT L: openmoko-kernel@lists.openmoko.org (subscribers-only) W: http://wiki.openmoko.org/wiki/Neo_FreeRunner @@@ -3558,7 -3585,7 +3566,7 @@@ F: Documentation/filesystems/caching/ca F: fs/cachefiles/
CADENCE MIPI-CSI2 BRIDGES -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org L: linux-media@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/media/cdns,*.txt @@@ -3616,9 -3643,12 +3624,12 @@@ S: Maintaine F: Documentation/devicetree/bindings/net/can/ F: drivers/net/can/ F: include/linux/can/dev.h + F: include/linux/can/led.h + F: include/linux/can/rx-offload.h F: include/linux/can/platform/ F: include/uapi/linux/can/error.h F: include/uapi/linux/can/netlink.h + F: include/uapi/linux/can/vxcan.h
CAN NETWORK LAYER M: Oliver Hartkopp socketcan@hartkopp.net @@@ -3631,6 -3661,8 +3642,8 @@@ S: Maintaine F: Documentation/networking/can.rst F: net/can/ F: include/linux/can/core.h + F: include/linux/can/skb.h + F: include/net/netns/can.h F: include/uapi/linux/can.h F: include/uapi/linux/can/bcm.h F: include/uapi/linux/can/raw.h @@@ -4084,7 -4116,7 +4097,7 @@@ L: samba-technical@lists.samba.org (mod W: http://linux-cifs.samba.org/ T: git git://git.samba.org/sfrench/cifs-2.6.git S: Supported -F: Documentation/filesystems/cifs/ +F: Documentation/admin-guide/cifs/ F: fs/cifs/
COMPACTPCI HOTPLUG CORE @@@ -4271,14 -4303,6 +4284,14 @@@ S: Supporte F: drivers/cpuidle/cpuidle-exynos.c F: arch/arm/mach-exynos/pm.c
+CPUIDLE DRIVER - ARM PSCI +M: Lorenzo Pieralisi lorenzo.pieralisi@arm.com +M: Sudeep Holla sudeep.holla@arm.com +L: linux-pm@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org +S: Supported +F: drivers/cpuidle/cpuidle-psci.c + CPU IDLE TIME MANAGEMENT FRAMEWORK M: "Rafael J. Wysocki" rjw@rjwysocki.net M: Daniel Lezcano daniel.lezcano@linaro.org @@@ -4940,9 -4964,7 +4953,9 @@@ M: Jonathan Corbet <corbet@lwn.net L: linux-doc@vger.kernel.org S: Maintained F: Documentation/ +F: scripts/documentation-file-ref-check F: scripts/kernel-doc +F: scripts/sphinx-pre-install X: Documentation/ABI/ X: Documentation/firmware-guide/acpi/ X: Documentation/devicetree/ @@@ -4958,14 -4980,6 +4971,14 @@@ L: linux-doc@vger.kernel.or S: Maintained F: Documentation/translations/it_IT
+DOCUMENTATION SCRIPTS +M: Mauro Carvalho Chehab mchehab@kernel.org +L: linux-doc@vger.kernel.org +S: Maintained +F: scripts/documentation-file-ref-check +F: scripts/sphinx-pre-install +F: Documentation/sphinx/parse-headers.pl + DONGWOON DW9714 LENS VOICE COIL DRIVER M: Sakari Ailus sakari.ailus@linux.intel.com L: linux-media@vger.kernel.org @@@ -5294,7 -5308,7 +5307,7 @@@ F: include/linux/vga
DRM DRIVERS AND MISC GPU PATCHES M: Maarten Lankhorst maarten.lankhorst@linux.intel.com -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org M: Sean Paul sean@poorly.run W: https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html S: Maintained @@@ -5307,7 -5321,7 +5320,7 @@@ F: include/uapi/drm/drm F: include/linux/vga*
DRM DRIVERS FOR ALLWINNER A10 -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org L: dri-devel@lists.freedesktop.org S: Supported F: drivers/gpu/drm/sun4i/ @@@ -6064,7 -6078,7 +6077,7 @@@ M: Florian Fainelli <f.fainelli@gmail.c M: Heiner Kallweit hkallweit1@gmail.com L: netdev@vger.kernel.org S: Maintained -F: Documentation/ABI/testing/sysfs-bus-mdio +F: Documentation/ABI/testing/sysfs-class-net-phydev F: Documentation/devicetree/bindings/net/ethernet-phy.yaml F: Documentation/devicetree/bindings/net/mdio* F: Documentation/networking/phy.rst @@@ -6320,7 -6334,7 +6333,7 @@@ FLEXTIMER FTM-QUADDEC DRIVE M: Patrick Havelange patrick.havelange@essensium.com L: linux-iio@vger.kernel.org S: Maintained -F: Documentation/ABI/testing/sysfs-bus-counter-ftm-quadddec +F: Documentation/ABI/testing/sysfs-bus-counter-ftm-quaddec F: Documentation/devicetree/bindings/counter/ftm-quaddec.txt F: drivers/counter/ftm-quaddec.c
@@@ -6343,7 -6357,7 +6356,7 @@@ FPGA MANAGER FRAMEWOR M: Moritz Fischer mdf@kernel.org L: linux-fpga@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git Q: http://patchwork.kernel.org/project/linux-fpga/list/ F: Documentation/fpga/ F: Documentation/driver-api/fpga/ @@@ -6376,7 -6390,7 +6389,7 @@@ FRAMEBUFFER LAYE M: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com L: dri-devel@lists.freedesktop.org L: linux-fbdev@vger.kernel.org -T: git git://github.com/bzolnier/linux.git +T: git git://anongit.freedesktop.org/drm/drm-misc Q: http://patchwork.kernel.org/project/linux-fbdev/list/ S: Maintained F: Documentation/fb/ @@@ -6440,14 -6454,6 +6453,14 @@@ S: Maintaine F: drivers/perf/fsl_imx8_ddr_perf.c F: Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt
+FREESCALE IMX I2C DRIVER +M: Oleksij Rempel o.rempel@pengutronix.de +R: Pengutronix Kernel Team kernel@pengutronix.de +L: linux-i2c@vger.kernel.org +S: Maintained +F: drivers/i2c/busses/i2c-imx.c +F: Documentation/devicetree/bindings/i2c/i2c-imx.txt + FREESCALE IMX LPI2C DRIVER M: Dong Aisheng aisheng.dong@nxp.com L: linux-i2c@vger.kernel.org @@@ -6610,7 -6616,6 +6623,7 @@@ T: git git://git.kernel.org/pub/scm/fs/ S: Supported F: fs/crypto/ F: include/linux/fscrypt*.h +F: include/uapi/linux/fscrypt.h F: Documentation/filesystems/fscrypt.rst
FSI SUBSYSTEM @@@ -6642,18 -6647,6 +6655,18 @@@ S: Maintaine F: fs/notify/ F: include/linux/fsnotify*.h
+FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION +M: Eric Biggers ebiggers@kernel.org +M: Theodore Y. Ts'o tytso@mit.edu +L: linux-fscrypt@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-fscrypt/list/ +T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity +S: Supported +F: fs/verity/ +F: include/linux/fsverity.h +F: include/uapi/linux/fsverity.h +F: Documentation/filesystems/fsverity.rst + FUJITSU LAPTOP EXTRAS M: Jonathan Woithe jwoithe@just42.net L: platform-driver-x86@vger.kernel.org @@@ -6744,13 -6737,6 +6757,13 @@@ W: https://linuxtv.or S: Maintained F: drivers/media/radio/radio-gemtek*
+GENERIC ARCHITECTURE TOPOLOGY +M: Sudeep Holla sudeep.holla@arm.com +L: linux-kernel@vger.kernel.org +S: Maintained +F: drivers/base/arch_topology.c +F: include/linux/arch_topology.h + GENERIC GPIO I2C DRIVER M: Wolfram Sang wsa+renesas@sang-engineering.com S: Supported @@@ -6763,7 -6749,7 +6776,7 @@@ L: linux-i2c@vger.kernel.or S: Supported F: drivers/i2c/muxes/i2c-mux-gpio.c F: include/linux/platform_data/i2c-mux-gpio.h -F: Documentation/i2c/muxes/i2c-mux-gpio +F: Documentation/i2c/muxes/i2c-mux-gpio.rst
GENERIC HDLC (WAN) DRIVERS M: Krzysztof Halasa khc@pm.waw.pl @@@ -7479,7 -7465,7 +7492,7 @@@ F: drivers/net/hyperv F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c F: drivers/video/fbdev/hyperv_fb.c -F: drivers/iommu/hyperv_iommu.c +F: drivers/iommu/hyperv-iommu.c F: net/vmw_vsock/hyperv_transport.c F: include/clocksource/hyperv_timer.h F: include/linux/hyperv.h @@@ -7512,14 -7498,14 +7525,14 @@@ I2C CONTROLLER DRIVER FOR NVIDIA GP M: Ajay Gupta ajayg@nvidia.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-nvidia-gpu +F: Documentation/i2c/busses/i2c-nvidia-gpu.rst F: drivers/i2c/busses/i2c-nvidia-gpu.c
I2C MUXES M: Peter Rosin peda@axentia.se L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/i2c-topology +F: Documentation/i2c/i2c-topology.rst F: Documentation/i2c/muxes/ F: Documentation/devicetree/bindings/i2c/i2c-mux* F: Documentation/devicetree/bindings/i2c/i2c-arb* @@@ -7532,15 -7518,15 +7545,15 @@@ I2C MV64XXX MARVELL AND ALLWINNER DRIVE M: Gregory CLEMENT gregory.clement@bootlin.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt +F: Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml F: drivers/i2c/busses/i2c-mv64xxx.c
I2C OVER PARALLEL PORT M: Jean Delvare jdelvare@suse.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-parport -F: Documentation/i2c/busses/i2c-parport-light +F: Documentation/i2c/busses/i2c-parport.rst +F: Documentation/i2c/busses/i2c-parport-light.rst F: drivers/i2c/busses/i2c-parport.c F: drivers/i2c/busses/i2c-parport-light.c
@@@ -7574,7 -7560,7 +7587,7 @@@ I2C-TAOS-EVM DRIVE M: Jean Delvare jdelvare@suse.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-taos-evm +F: Documentation/i2c/busses/i2c-taos-evm.rst F: drivers/i2c/busses/i2c-taos-evm.c
I2C-TINY-USB DRIVER @@@ -7588,19 -7574,19 +7601,19 @@@ I2C/SMBUS CONTROLLER DRIVERS FOR P M: Jean Delvare jdelvare@suse.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-ali1535 -F: Documentation/i2c/busses/i2c-ali1563 -F: Documentation/i2c/busses/i2c-ali15x3 -F: Documentation/i2c/busses/i2c-amd756 -F: Documentation/i2c/busses/i2c-amd8111 -F: Documentation/i2c/busses/i2c-i801 -F: Documentation/i2c/busses/i2c-nforce2 -F: Documentation/i2c/busses/i2c-piix4 -F: Documentation/i2c/busses/i2c-sis5595 -F: Documentation/i2c/busses/i2c-sis630 -F: Documentation/i2c/busses/i2c-sis96x -F: Documentation/i2c/busses/i2c-via -F: Documentation/i2c/busses/i2c-viapro +F: Documentation/i2c/busses/i2c-ali1535.rst +F: Documentation/i2c/busses/i2c-ali1563.rst +F: Documentation/i2c/busses/i2c-ali15x3.rst +F: Documentation/i2c/busses/i2c-amd756.rst +F: Documentation/i2c/busses/i2c-amd8111.rst +F: Documentation/i2c/busses/i2c-i801.rst +F: Documentation/i2c/busses/i2c-nforce2.rst +F: Documentation/i2c/busses/i2c-piix4.rst +F: Documentation/i2c/busses/i2c-sis5595.rst +F: Documentation/i2c/busses/i2c-sis630.rst +F: Documentation/i2c/busses/i2c-sis96x.rst +F: Documentation/i2c/busses/i2c-via.rst +F: Documentation/i2c/busses/i2c-viapro.rst F: drivers/i2c/busses/i2c-ali1535.c F: drivers/i2c/busses/i2c-ali1563.c F: drivers/i2c/busses/i2c-ali15x3.c @@@ -7629,7 -7615,7 +7642,7 @@@ M: Seth Heasley <seth.heasley@intel.com M: Neil Horman nhorman@tuxdriver.com L: linux-i2c@vger.kernel.org F: drivers/i2c/busses/i2c-ismt.c -F: Documentation/i2c/busses/i2c-ismt +F: Documentation/i2c/busses/i2c-ismt.rst
I2C/SMBUS STUB DRIVER M: Jean Delvare jdelvare@suse.com @@@ -8069,7 -8055,6 +8082,7 @@@ S: Maintaine F: drivers/video/fbdev/i810/
INTEL ASoC DRIVERS +M: Cezary Rojewski cezary.rojewski@intel.com M: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com M: Liam Girdwood liam.r.girdwood@linux.intel.com M: Jie Yang yang.jie@linux.intel.com @@@ -8091,13 -8076,6 +8104,13 @@@ T: git git://git.code.sf.net/p/intel-sa S: Supported F: drivers/scsi/isci/
+INTEL CPU family model numbers +M: Tony Luck tony.luck@intel.com +M: x86@kernel.org +L: linux-kernel@vger.kernel.org +S: Supported +F: arch/x86/include/asm/intel-family.h + INTEL DRM DRIVERS (excluding Poulsbo, Moorestown and derivative chipsets) M: Jani Nikula jani.nikula@linux.intel.com M: Joonas Lahtinen joonas.lahtinen@linux.intel.com @@@ -8379,7 -8357,7 +8392,7 @@@ M: linux-wimax@intel.co L: wimax@linuxwimax.org (subscribers-only) S: Supported W: http://linuxwimax.org -F: Documentation/wimax/README.i2400m +F: Documentation/admin-guide/wimax/i2400m.rst F: drivers/net/wimax/i2400m/ F: include/uapi/linux/wimax/i2400m.h
@@@ -8434,6 -8412,12 +8447,6 @@@ L: linux-mips@vger.kernel.or S: Maintained F: drivers/net/ethernet/sgi/ioc3-eth.c
-IOC3 SERIAL DRIVER -M: Pat Gefre pfg@sgi.com -L: linux-serial@vger.kernel.org -S: Maintained -F: drivers/tty/serial/ioc3_serial.c - IOMAP FILESYSTEM LIBRARY M: Christoph Hellwig hch@infradead.org M: Darrick J. Wong darrick.wong@oracle.com @@@ -8443,6 -8427,7 +8456,6 @@@ L: linux-xfs@vger.kernel.or L: linux-fsdevel@vger.kernel.org T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git S: Supported -F: fs/iomap.c F: fs/iomap/ F: include/linux/iomap.h
@@@ -8675,7 -8660,7 +8688,7 @@@ L: jfs-discussion@lists.sourceforge.ne W: http://jfs.sourceforge.net/ T: git git://github.com/kleikamp/linux-shaggy.git S: Maintained -F: Documentation/filesystems/jfs.txt +F: Documentation/admin-guide/jfs.rst F: fs/jfs/
JME NETWORK DRIVER @@@ -9017,7 -9002,7 +9030,7 @@@ F: kernel/kprobes. KS0108 LCD CONTROLLER DRIVER M: Miguel Ojeda Sandonis miguel.ojeda.sandonis@gmail.com S: Maintained -F: Documentation/auxdisplay/ks0108 +F: Documentation/admin-guide/auxdisplay/ks0108.rst F: drivers/auxdisplay/ks0108.c F: include/linux/ks0108.h
@@@ -9594,7 -9579,7 +9607,7 @@@ F: Documentation/networking/mac80211-in F: include/net/mac80211.h F: net/mac80211/ F: drivers/net/wireless/mac80211_hwsim.[ch] -F: Documentation/networking/mac80211_hwsim/README +F: Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst
MAILBOX API M: Jassi Brar jassisinghbrar@gmail.com @@@ -10043,8 -10028,8 +10056,8 @@@ L: linux-media@vger.kernel.or L: linux-renesas-soc@vger.kernel.org T: git git://linuxtv.org/media_tree.git S: Supported -F: Documentation/devicetree/bindings/media/renesas,rcar-csi2.txt -F: Documentation/devicetree/bindings/media/rcar_vin.txt +F: Documentation/devicetree/bindings/media/renesas,csi2.txt +F: Documentation/devicetree/bindings/media/renesas,vin.txt F: drivers/media/platform/rcar-vin/
MEDIA DRIVERS FOR RENESAS - VSP1 @@@ -10371,7 -10356,7 +10384,7 @@@ L: linux-i2c@vger.kernel.or S: Supported F: drivers/i2c/busses/i2c-mlxcpld.c F: drivers/i2c/muxes/i2c-mux-mlxcpld.c -F: Documentation/i2c/busses/i2c-mlxcpld +F: Documentation/i2c/busses/i2c-mlxcpld.rst
MELLANOX MLXCPLD LED DRIVER M: Vadim Pasternak vadimp@mellanox.com @@@ -11171,6 -11156,7 +11184,7 @@@ S: Maintaine W: https://fedorahosted.org/dropwatch/ F: net/core/drop_monitor.c F: include/uapi/linux/net_dropmon.h + F: include/net/drop_monitor.h
NETWORKING DRIVERS M: "David S. Miller" davem@davemloft.net @@@ -11350,7 -11336,6 +11364,6 @@@ F: include/net/nfc F: include/uapi/linux/nfc.h F: drivers/nfc/ F: include/linux/platform_data/nfcmrvl.h - F: include/linux/platform_data/nxp-nci.h F: Documentation/devicetree/bindings/net/nfc/
NFS, SUNRPC, AND LOCKD CLIENTS @@@ -11791,7 -11776,6 +11804,7 @@@ S: Maintaine F: arch/arm/mach-omap2/ F: arch/arm/plat-omap/ F: arch/arm/configs/omap2plus_defconfig +F: drivers/bus/ti-sysc.c F: drivers/i2c/busses/i2c-omap.c F: drivers/irqchip/irq-omap-intc.c F: drivers/mfd/*omap*.c @@@ -11812,7 -11796,6 +11825,7 @@@ F: drivers/regulator/tps65910-regulator F: drivers/regulator/twl-regulator.c F: drivers/regulator/twl6030-regulator.c F: include/linux/platform_data/i2c-omap.h +F: include/linux/platform_data/ti-sysc.h
ONION OMEGA2+ BOARD M: Harvey Hunt harveyhuntnexus@gmail.com @@@ -11875,21 -11858,6 +11888,21 @@@ T: git git://linuxtv.org/media_tree.gi S: Maintained F: drivers/media/i2c/ov5647.c
+OMNIVISION OV5670 SENSOR DRIVER +M: Chiranjeevi Rapolu chiranjeevi.rapolu@intel.com +M: Hyungwoo Yang hyungwoo.yang@intel.com +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/i2c/ov5670.c + +OMNIVISION OV5675 SENSOR DRIVER +M: Shawn Tu shawnx.tu@intel.com +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/i2c/ov5675.c + OMNIVISION OV5695 SENSOR DRIVER M: Shunqian Zheng zhengsq@rock-chips.com L: linux-media@vger.kernel.org @@@ -12011,7 -11979,7 +12024,7 @@@ M: Andrew Lunn <andrew@lunn.ch L: linux-i2c@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-ocores.txt -F: Documentation/i2c/busses/i2c-ocores +F: Documentation/i2c/busses/i2c-ocores.rst F: drivers/i2c/busses/i2c-ocores.c F: include/linux/platform_data/i2c-ocores.h
@@@ -12134,7 -12102,7 +12147,7 @@@ L: netdev@vger.kernel.or S: Supported F: lib/packing.c F: include/linux/packing.h -F: Documentation/packing.txt +F: Documentation/core-api/packing.rst
PADATA PARALLEL EXECUTION MECHANISM M: Steffen Klassert steffen.klassert@secunet.com @@@ -13262,7 -13230,7 +13275,7 @@@ M: Manish Chopra <manishc@marvell.com M: GR-Linux-NIC-Dev@marvell.com L: netdev@vger.kernel.org S: Supported - F: drivers/net/ethernet/qlogic/qlge/ + F: drivers/staging/qlge/
QM1D1B0004 MEDIA DRIVER M: Akihiro Tsukada tskd08@gmail.com @@@ -13330,8 -13298,8 +13343,8 @@@ QUALCOMM CPUFREQ DRIVER MSM8996/APQ809 M: Ilia Lin ilia.lin@kernel.org L: linux-pm@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/opp/kryo-cpufreq.txt -F: drivers/cpufreq/qcom-cpufreq-kryo.c +F: Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt +F: drivers/cpufreq/qcom-cpufreq-nvmem.c
QUALCOMM EMAC GIGABIT ETHERNET DRIVER M: Timur Tabi timur@kernel.org @@@ -13673,7 -13641,7 +13686,7 @@@ F: drivers/clk/renesas RENESAS EMEV2 I2C DRIVER M: Wolfram Sang wsa+renesas@sang-engineering.com S: Supported -F: Documentation/devicetree/bindings/i2c/i2c-emev2.txt +F: Documentation/devicetree/bindings/i2c/renesas,iic-emev2.txt F: drivers/i2c/busses/i2c-emev2.c
RENESAS ETHERNET DRIVERS @@@ -13695,15 -13663,15 +13708,15 @@@ F: drivers/iio/adc/rcar-gyroadc. RENESAS R-CAR I2C DRIVERS M: Wolfram Sang wsa+renesas@sang-engineering.com S: Supported -F: Documentation/devicetree/bindings/i2c/i2c-rcar.txt -F: Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt +F: Documentation/devicetree/bindings/i2c/renesas,i2c.txt +F: Documentation/devicetree/bindings/i2c/renesas,iic.txt F: drivers/i2c/busses/i2c-rcar.c F: drivers/i2c/busses/i2c-sh_mobile.c
RENESAS RIIC DRIVER M: Chris Brandt chris.brandt@renesas.com S: Supported -F: Documentation/devicetree/bindings/i2c/i2c-riic.txt +F: Documentation/devicetree/bindings/i2c/renesas,riic.txt F: drivers/i2c/busses/i2c-riic.c
RENESAS USB PHY DRIVER @@@ -13799,7 -13767,7 +13812,7 @@@ HANTRO VPU CODEC DRIVE M: Ezequiel Garcia ezequiel@collabora.com L: linux-media@vger.kernel.org S: Maintained -F: drivers/staging/media/platform/hantro/ +F: drivers/staging/media/hantro/ F: Documentation/devicetree/bindings/media/rockchip-vpu.txt
ROCKER DRIVER @@@ -14321,7 -14289,7 +14334,7 @@@ F: net/sctp SCx200 CPU SUPPORT M: Jim Cromie jim.cromie@gmail.com S: Odd Fixes -F: Documentation/i2c/busses/scx200_acb +F: Documentation/i2c/busses/scx200_acb.rst F: arch/x86/platform/scx200/ F: drivers/watchdog/scx200_wdt.c F: drivers/i2c/busses/scx200* @@@ -15596,7 -15564,6 +15609,7 @@@ F: drivers/clk/clk-sc[mp]i. F: drivers/cpufreq/sc[mp]i-cpufreq.c F: drivers/firmware/arm_scpi.c F: drivers/firmware/arm_scmi/ +F: drivers/reset/reset-scmi.c F: include/linux/sc[mp]i_protocol.h
SYSTEM RESET/SHUTDOWN DRIVERS @@@ -15958,7 -15925,7 +15971,7 @@@ M: Viresh Kumar <viresh.kumar@linaro.or M: Javi Merino javi.merino@kernel.org L: linux-pm@vger.kernel.org S: Supported -F: Documentation/thermal/cpu-cooling-api.rst +F: Documentation/driver-api/thermal/cpu-cooling-api.rst F: drivers/thermal/cpu_cooling.c F: include/linux/cpu_cooling.h
@@@ -16130,7 -16097,7 +16143,7 @@@ S: Maintaine F: drivers/net/ethernet/ti/netcp*
TI PCM3060 ASoC CODEC DRIVER -M: Kirill Marinushkin kmarinushkin@birdec.tech +M: Kirill Marinushkin kmarinushkin@birdec.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/pcm3060.txt @@@ -16481,7 -16448,7 +16494,7 @@@ F: drivers/hid/hid-udraw-ps3. UFS FILESYSTEM M: Evgeniy Dushistov dushistov@mail.ru S: Maintained -F: Documentation/filesystems/ufs.txt +F: Documentation/admin-guide/ufs.rst F: fs/ufs/
UHID USERSPACE HID IO DRIVER: @@@ -17399,7 -17366,7 +17412,7 @@@ M: linux-wimax@intel.co L: wimax@linuxwimax.org (subscribers-only) S: Supported W: http://linuxwimax.org -F: Documentation/wimax/README.wimax +F: Documentation/admin-guide/wimax/wimax.rst F: include/linux/wimax/debug.h F: include/net/wimax.h F: include/uapi/linux/wimax.h @@@ -17851,6 -17818,14 +17864,6 @@@ S: Maintaine F: mm/zpool.c F: include/linux/zpool.h
-ZR36067 VIDEO FOR LINUX DRIVER -L: mjpeg-users@lists.sourceforge.net -L: linux-media@vger.kernel.org -W: http://mjpeg.sourceforge.net/driver-zoran/ -T: hg https://linuxtv.org/hg/v4l-dvb -S: Odd Fixes -F: drivers/staging/media/zoran/ - ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER M: Minchan Kim minchan@kernel.org M: Nitin Gupta ngupta@vflare.org diff --combined arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 20d7e7db5dcb,de71153fda00..d34c867b49ba --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@@ -29,7 -29,6 +29,7 @@@ clocks = <&clockgen 1 0>; next-level-cache = <&l2>; cpu-idle-states = <&CPU_PW20>; + #cooling-cells = <2>; };
cpu1: cpu@1 { @@@ -40,7 -39,6 +40,7 @@@ clocks = <&clockgen 1 0>; next-level-cache = <&l2>; cpu-idle-states = <&CPU_PW20>; + #cooling-cells = <2>; };
l2: l2-cache { @@@ -505,89 -503,6 +505,89 @@@ status = "disabled"; };
+ tmu: tmu@1f00000 { + compatible = "fsl,qoriq-tmu"; + reg = <0x0 0x1f80000 0x0 0x10000>; + interrupts = <0 23 0x4>; + fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x70061>; + fsl,tmu-calibration = <0x00000000 0x00000024 + 0x00000001 0x0000002b + 0x00000002 0x00000031 + 0x00000003 0x00000038 + 0x00000004 0x0000003f + 0x00000005 0x00000045 + 0x00000006 0x0000004c + 0x00000007 0x00000053 + 0x00000008 0x00000059 + 0x00000009 0x00000060 + 0x0000000a 0x00000066 + 0x0000000b 0x0000006d + + 0x00010000 0x0000001c + 0x00010001 0x00000024 + 0x00010002 0x0000002c + 0x00010003 0x00000035 + 0x00010004 0x0000003d + 0x00010005 0x00000045 + 0x00010006 0x0000004d + 0x00010007 0x00000045 + 0x00010008 0x0000005e + 0x00010009 0x00000066 + 0x0001000a 0x0000006e + + 0x00020000 0x00000018 + 0x00020001 0x00000022 + 0x00020002 0x0000002d + 0x00020003 0x00000038 + 0x00020004 0x00000043 + 0x00020005 0x0000004d + 0x00020006 0x00000058 + 0x00020007 0x00000063 + 0x00020008 0x0000006e + + 0x00030000 0x00000010 + 0x00030001 0x0000001c + 0x00030002 0x00000029 + 0x00030003 0x00000036 + 0x00030004 0x00000042 + 0x00030005 0x0000004f + 0x00030006 0x0000005b + 0x00030007 0x00000068>; + little-endian; + #thermal-sensor-cells = <1>; + }; + + thermal-zones { + core-cluster { + polling-delay-passive = <1000>; + polling-delay = <5000>; + thermal-sensors = <&tmu 0>; + + trips { + core_cluster_alert: core-cluster-alert { + temperature = <85000>; + hysteresis = <2000>; + type = "passive"; + }; + + core_cluster_crit: core-cluster-crit { + temperature = <95000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&core_cluster_alert>; + cooling-device = + <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; + }; + }; + }; + pcie@1f0000000 { /* Integrated Endpoint Root Complex */ compatible = "pci-host-ecam-generic"; reg = <0x01 0xf0000000 0x0 0x100000>; @@@ -621,6 -536,12 +621,12 @@@ compatible = "fsl,enetc"; reg = <0x000100 0 0 0 0>; }; + enetc_mdio_pf3: mdio@0,3 { + compatible = "fsl,enetc-mdio"; + reg = <0x000300 0 0 0 0>; + #address-cells = <1>; + #size-cells = <0>; + }; ethernet@0,4 { compatible = "fsl,enetc-ptp"; reg = <0x000400 0 0 0 0>; @@@ -639,7 -560,6 +645,7 @@@ clocks = <&dpclk>, <&aclk>, <&aclk>, <&pclk>; clock-names = "pxlclk", "mclk", "aclk", "pclk"; arm,malidp-output-port-lines = /bits/ 8 <8 8 8>; + arm,malidp-arqos-value = <0xd000d000>;
port { dp0_out: endpoint { diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c index 8dce4069472b,94be97b7952c..4c790ffa1a73 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@@ -116,6 -116,9 +116,9 @@@ enum board_idx BCM57508, BCM57504, BCM57502, + BCM57508_NPAR, + BCM57504_NPAR, + BCM57502_NPAR, BCM58802, BCM58804, BCM58808, @@@ -161,6 -164,9 +164,9 @@@ static const struct [BCM57508] = { "Broadcom BCM57508 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" }, [BCM57504] = { "Broadcom BCM57504 NetXtreme-E 10Gb/25Gb/50Gb/100Gb/200Gb Ethernet" }, [BCM57502] = { "Broadcom BCM57502 NetXtreme-E 10Gb/25Gb/50Gb Ethernet" }, + [BCM57508_NPAR] = { "Broadcom BCM57508 NetXtreme-E Ethernet Partition" }, + [BCM57504_NPAR] = { "Broadcom BCM57504 NetXtreme-E Ethernet Partition" }, + [BCM57502_NPAR] = { "Broadcom BCM57502 NetXtreme-E Ethernet Partition" }, [BCM58802] = { "Broadcom BCM58802 NetXtreme-S 10Gb/25Gb/40Gb/50Gb Ethernet" }, [BCM58804] = { "Broadcom BCM58804 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, [BCM58808] = { "Broadcom BCM58808 NetXtreme-S 10Gb/25Gb/40Gb/50Gb/100Gb Ethernet" }, @@@ -209,6 -215,12 +215,12 @@@ static const struct pci_device_id bnxt_ { PCI_VDEVICE(BROADCOM, 0x1750), .driver_data = BCM57508 }, { PCI_VDEVICE(BROADCOM, 0x1751), .driver_data = BCM57504 }, { PCI_VDEVICE(BROADCOM, 0x1752), .driver_data = BCM57502 }, + { PCI_VDEVICE(BROADCOM, 0x1800), .driver_data = BCM57508_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1801), .driver_data = BCM57504_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1802), .driver_data = BCM57502_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1803), .driver_data = BCM57508_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1804), .driver_data = BCM57504_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x1805), .driver_data = BCM57502_NPAR }, { PCI_VDEVICE(BROADCOM, 0xd802), .driver_data = BCM58802 }, { PCI_VDEVICE(BROADCOM, 0xd804), .driver_data = BCM58804 }, #ifdef CONFIG_BNXT_SRIOV @@@ -828,16 -840,41 +840,41 @@@ static inline int bnxt_alloc_rx_page(st return 0; }
- static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 cp_cons, - u32 agg_bufs) + static struct rx_agg_cmp *bnxt_get_agg(struct bnxt *bp, + struct bnxt_cp_ring_info *cpr, + u16 cp_cons, u16 curr) + { + struct rx_agg_cmp *agg; + + cp_cons = RING_CMP(ADV_RAW_CMP(cp_cons, curr)); + agg = (struct rx_agg_cmp *) + &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + return agg; + } + + static struct rx_agg_cmp *bnxt_get_tpa_agg_p5(struct bnxt *bp, + struct bnxt_rx_ring_info *rxr, + u16 agg_id, u16 curr) + { + struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[agg_id]; + + return &tpa_info->agg_arr[curr]; + } + + static void bnxt_reuse_rx_agg_bufs(struct bnxt_cp_ring_info *cpr, u16 idx, + u16 start, u32 agg_bufs, bool tpa) { struct bnxt_napi *bnapi = cpr->bnapi; struct bnxt *bp = bnapi->bp; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; u16 prod = rxr->rx_agg_prod; u16 sw_prod = rxr->rx_sw_agg_prod; + bool p5_tpa = false; u32 i;
+ if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa) + p5_tpa = true; + for (i = 0; i < agg_bufs; i++) { u16 cons; struct rx_agg_cmp *agg; @@@ -845,8 -882,10 +882,10 @@@ struct rx_bd *prod_bd; struct page *page;
- agg = (struct rx_agg_cmp *) - &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + if (p5_tpa) + agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, start + i); + else + agg = bnxt_get_agg(bp, cpr, idx, start + i); cons = agg->rx_agg_cmp_opaque; __clear_bit(cons, rxr->rx_agg_bmap);
@@@ -874,7 -913,6 +913,6 @@@
prod = NEXT_RX_AGG(prod); sw_prod = NEXT_RX_AGG(sw_prod); - cp_cons = NEXT_CMP(cp_cons); } rxr->rx_agg_prod = prod; rxr->rx_sw_agg_prod = sw_prod; @@@ -888,7 -926,7 +926,7 @@@ static struct sk_buff *bnxt_rx_page_skb { unsigned int payload = offset_and_len >> 16; unsigned int len = offset_and_len & 0xffff; - struct skb_frag_struct *frag; + skb_frag_t *frag; struct page *page = data; u16 prod = rxr->rx_prod; struct sk_buff *skb; @@@ -919,7 -957,7 +957,7 @@@
frag = &skb_shinfo(skb)->frags[0]; skb_frag_size_sub(frag, payload); - frag->page_offset += payload; + skb_frag_off_add(frag, payload); skb->data_len -= payload; skb->tail += payload;
@@@ -957,15 -995,19 +995,19 @@@ static struct sk_buff *bnxt_rx_skb(stru
static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, - struct sk_buff *skb, u16 cp_cons, - u32 agg_bufs) + struct sk_buff *skb, u16 idx, + u32 agg_bufs, bool tpa) { struct bnxt_napi *bnapi = cpr->bnapi; struct pci_dev *pdev = bp->pdev; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; u16 prod = rxr->rx_agg_prod; + bool p5_tpa = false; u32 i;
+ if ((bp->flags & BNXT_FLAG_CHIP_P5) && tpa) + p5_tpa = true; + for (i = 0; i < agg_bufs; i++) { u16 cons, frag_len; struct rx_agg_cmp *agg; @@@ -973,8 -1015,10 +1015,10 @@@ struct page *page; dma_addr_t mapping;
- agg = (struct rx_agg_cmp *) - &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + if (p5_tpa) + agg = bnxt_get_tpa_agg_p5(bp, rxr, idx, i); + else + agg = bnxt_get_agg(bp, cpr, idx, i); cons = agg->rx_agg_cmp_opaque; frag_len = (le32_to_cpu(agg->rx_agg_cmp_len_flags_type) & RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT; @@@ -1008,7 -1052,7 +1052,7 @@@ * allocated already. */ rxr->rx_agg_prod = prod; - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs - i); + bnxt_reuse_rx_agg_bufs(cpr, idx, i, agg_bufs - i, tpa); return NULL; }
@@@ -1021,7 -1065,6 +1065,6 @@@ skb->truesize += PAGE_SIZE;
prod = NEXT_RX_AGG(prod); - cp_cons = NEXT_CMP(cp_cons); } rxr->rx_agg_prod = prod; return skb; @@@ -1081,9 -1124,10 +1124,10 @@@ static int bnxt_discard_rx(struct bnxt } else if (cmp_type == CMP_TYPE_RX_L2_TPA_END_CMP) { struct rx_tpa_end_cmp *tpa_end = cmp;
- agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) & - RX_TPA_END_CMP_AGG_BUFS) >> - RX_TPA_END_CMP_AGG_BUFS_SHIFT; + if (bp->flags & BNXT_FLAG_CHIP_P5) + return 0; + + agg_bufs = TPA_END_AGG_BUFS(tpa_end); }
if (agg_bufs) { @@@ -1120,26 -1164,60 +1164,60 @@@ static void bnxt_sched_reset(struct bnx rxr->rx_next_cons = 0xffff; }
+ static u16 bnxt_alloc_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id) + { + struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map; + u16 idx = agg_id & MAX_TPA_P5_MASK; + + if (test_bit(idx, map->agg_idx_bmap)) + idx = find_first_zero_bit(map->agg_idx_bmap, + BNXT_AGG_IDX_BMAP_SIZE); + __set_bit(idx, map->agg_idx_bmap); + map->agg_id_tbl[agg_id] = idx; + return idx; + } + + static void bnxt_free_agg_idx(struct bnxt_rx_ring_info *rxr, u16 idx) + { + struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map; + + __clear_bit(idx, map->agg_idx_bmap); + } + + static u16 bnxt_lookup_agg_idx(struct bnxt_rx_ring_info *rxr, u16 agg_id) + { + struct bnxt_tpa_idx_map *map = rxr->rx_tpa_idx_map; + + return map->agg_id_tbl[agg_id]; + } + static void bnxt_tpa_start(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, struct rx_tpa_start_cmp *tpa_start, struct rx_tpa_start_cmp_ext *tpa_start1) { - u8 agg_id = TPA_START_AGG_ID(tpa_start); - u16 cons, prod; - struct bnxt_tpa_info *tpa_info; struct bnxt_sw_rx_bd *cons_rx_buf, *prod_rx_buf; + struct bnxt_tpa_info *tpa_info; + u16 cons, prod, agg_id; struct rx_bd *prod_bd; dma_addr_t mapping;
+ if (bp->flags & BNXT_FLAG_CHIP_P5) { + agg_id = TPA_START_AGG_ID_P5(tpa_start); + agg_id = bnxt_alloc_agg_idx(rxr, agg_id); + } else { + agg_id = TPA_START_AGG_ID(tpa_start); + } cons = tpa_start->rx_tpa_start_cmp_opaque; prod = rxr->rx_prod; cons_rx_buf = &rxr->rx_buf_ring[cons]; prod_rx_buf = &rxr->rx_buf_ring[prod]; tpa_info = &rxr->rx_tpa[agg_id];
- if (unlikely(cons != rxr->rx_next_cons)) { - netdev_warn(bp->dev, "TPA cons %x != expected cons %x\n", - cons, rxr->rx_next_cons); + if (unlikely(cons != rxr->rx_next_cons || + TPA_START_ERROR(tpa_start))) { + netdev_warn(bp->dev, "TPA cons %x, expected cons %x, error code %x\n", + cons, rxr->rx_next_cons, + TPA_START_ERROR_CODE(tpa_start1)); bnxt_sched_reset(bp, rxr); return; } @@@ -1184,6 -1262,7 +1262,7 @@@ tpa_info->flags2 = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_flags2); tpa_info->metadata = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_metadata); tpa_info->hdr_info = le32_to_cpu(tpa_start1->rx_tpa_start_cmp_hdr_info); + tpa_info->agg_count = 0;
rxr->rx_prod = NEXT_RX(prod); cons = NEXT_RX(cons); @@@ -1195,13 -1274,37 +1274,37 @@@ cons_rx_buf->data = NULL; }
- static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 cp_cons, - u32 agg_bufs) + static void bnxt_abort_tpa(struct bnxt_cp_ring_info *cpr, u16 idx, u32 agg_bufs) { if (agg_bufs) - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); + bnxt_reuse_rx_agg_bufs(cpr, idx, 0, agg_bufs, true); }
+ #ifdef CONFIG_INET + static void bnxt_gro_tunnel(struct sk_buff *skb, __be16 ip_proto) + { + struct udphdr *uh = NULL; + + if (ip_proto == htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)skb->data; + + if (iph->protocol == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } else { + struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; + + if (iph->nexthdr == IPPROTO_UDP) + uh = (struct udphdr *)(iph + 1); + } + if (uh) { + if (uh->check) + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL_CSUM; + else + skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; + } + } + #endif + static struct sk_buff *bnxt_gro_func_5731x(struct bnxt_tpa_info *tpa_info, int payload_off, int tcp_ts, struct sk_buff *skb) @@@ -1259,28 -1362,39 +1362,39 @@@ }
if (inner_mac_off) { /* tunnel */ - struct udphdr *uh = NULL; __be16 proto = *((__be16 *)(skb->data + outer_ip_off - ETH_HLEN - 2));
- if (proto == htons(ETH_P_IP)) { - struct iphdr *iph = (struct iphdr *)skb->data; + bnxt_gro_tunnel(skb, proto); + } + #endif + return skb; + }
- if (iph->protocol == IPPROTO_UDP) - uh = (struct udphdr *)(iph + 1); - } else { - struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; + static struct sk_buff *bnxt_gro_func_5750x(struct bnxt_tpa_info *tpa_info, + int payload_off, int tcp_ts, + struct sk_buff *skb) + { + #ifdef CONFIG_INET + u16 outer_ip_off, inner_ip_off, inner_mac_off; + u32 hdr_info = tpa_info->hdr_info; + int iphdr_len, nw_off;
- if (iph->nexthdr == IPPROTO_UDP) - uh = (struct udphdr *)(iph + 1); - } - if (uh) { - if (uh->check) - skb_shinfo(skb)->gso_type |= - SKB_GSO_UDP_TUNNEL_CSUM; - else - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } + inner_ip_off = BNXT_TPA_INNER_L3_OFF(hdr_info); + inner_mac_off = BNXT_TPA_INNER_L2_OFF(hdr_info); + outer_ip_off = BNXT_TPA_OUTER_L3_OFF(hdr_info); + + nw_off = inner_ip_off - ETH_HLEN; + skb_set_network_header(skb, nw_off); + iphdr_len = (tpa_info->flags2 & RX_TPA_START_CMP_FLAGS2_IP_TYPE) ? + sizeof(struct ipv6hdr) : sizeof(struct iphdr); + skb_set_transport_header(skb, nw_off + iphdr_len); + + if (inner_mac_off) { /* tunnel */ + __be16 proto = *((__be16 *)(skb->data + outer_ip_off - + ETH_HLEN - 2)); + + bnxt_gro_tunnel(skb, proto); } #endif return skb; @@@ -1327,28 -1441,8 +1441,8 @@@ static struct sk_buff *bnxt_gro_func_57 return NULL; }
- if (nw_off) { /* tunnel */ - struct udphdr *uh = NULL; - - if (skb->protocol == htons(ETH_P_IP)) { - struct iphdr *iph = (struct iphdr *)skb->data; - - if (iph->protocol == IPPROTO_UDP) - uh = (struct udphdr *)(iph + 1); - } else { - struct ipv6hdr *iph = (struct ipv6hdr *)skb->data; - - if (iph->nexthdr == IPPROTO_UDP) - uh = (struct udphdr *)(iph + 1); - } - if (uh) { - if (uh->check) - skb_shinfo(skb)->gso_type |= - SKB_GSO_UDP_TUNNEL_CSUM; - else - skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_TUNNEL; - } - } + if (nw_off) /* tunnel */ + bnxt_gro_tunnel(skb, skb->protocol); #endif return skb; } @@@ -1371,9 -1465,10 +1465,10 @@@ static inline struct sk_buff *bnxt_gro_ skb_shinfo(skb)->gso_size = le32_to_cpu(tpa_end1->rx_tpa_end_cmp_seg_len); skb_shinfo(skb)->gso_type = tpa_info->gso_type; - payload_off = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) & - RX_TPA_END_CMP_PAYLOAD_OFFSET) >> - RX_TPA_END_CMP_PAYLOAD_OFFSET_SHIFT; + if (bp->flags & BNXT_FLAG_CHIP_P5) + payload_off = TPA_END_PAYLOAD_OFF_P5(tpa_end1); + else + payload_off = TPA_END_PAYLOAD_OFF(tpa_end); skb = bp->gro_func(tpa_info, payload_off, TPA_END_GRO_TS(tpa_end), skb); if (likely(skb)) tcp_gro_complete(skb); @@@ -1401,14 -1496,14 +1496,14 @@@ static inline struct sk_buff *bnxt_tpa_ { struct bnxt_napi *bnapi = cpr->bnapi; struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; - u8 agg_id = TPA_END_AGG_ID(tpa_end); u8 *data_ptr, agg_bufs; - u16 cp_cons = RING_CMP(*raw_cons); unsigned int len; struct bnxt_tpa_info *tpa_info; dma_addr_t mapping; struct sk_buff *skb; + u16 idx = 0, agg_id; void *data; + bool gro;
if (unlikely(bnapi->in_reset)) { int rc = bnxt_discard_rx(bp, cpr, raw_cons, tpa_end); @@@ -1418,26 -1513,43 +1513,43 @@@ return NULL; }
- tpa_info = &rxr->rx_tpa[agg_id]; + if (bp->flags & BNXT_FLAG_CHIP_P5) { + agg_id = TPA_END_AGG_ID_P5(tpa_end); + agg_id = bnxt_lookup_agg_idx(rxr, agg_id); + agg_bufs = TPA_END_AGG_BUFS_P5(tpa_end1); + tpa_info = &rxr->rx_tpa[agg_id]; + if (unlikely(agg_bufs != tpa_info->agg_count)) { + netdev_warn(bp->dev, "TPA end agg_buf %d != expected agg_bufs %d\n", + agg_bufs, tpa_info->agg_count); + agg_bufs = tpa_info->agg_count; + } + tpa_info->agg_count = 0; + *event |= BNXT_AGG_EVENT; + bnxt_free_agg_idx(rxr, agg_id); + idx = agg_id; + gro = !!(bp->flags & BNXT_FLAG_GRO); + } else { + agg_id = TPA_END_AGG_ID(tpa_end); + agg_bufs = TPA_END_AGG_BUFS(tpa_end); + tpa_info = &rxr->rx_tpa[agg_id]; + idx = RING_CMP(*raw_cons); + if (agg_bufs) { + if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons)) + return ERR_PTR(-EBUSY); + + *event |= BNXT_AGG_EVENT; + idx = NEXT_CMP(idx); + } + gro = !!TPA_END_GRO(tpa_end); + } data = tpa_info->data; data_ptr = tpa_info->data_ptr; prefetch(data_ptr); len = tpa_info->len; mapping = tpa_info->mapping;
- agg_bufs = (le32_to_cpu(tpa_end->rx_tpa_end_cmp_misc_v1) & - RX_TPA_END_CMP_AGG_BUFS) >> RX_TPA_END_CMP_AGG_BUFS_SHIFT; - - if (agg_bufs) { - if (!bnxt_agg_bufs_valid(bp, cpr, agg_bufs, raw_cons)) - return ERR_PTR(-EBUSY); - - *event |= BNXT_AGG_EVENT; - cp_cons = NEXT_CMP(cp_cons); - } - if (unlikely(agg_bufs > MAX_SKB_FRAGS || TPA_END_ERRORS(tpa_end1))) { - bnxt_abort_tpa(cpr, cp_cons, agg_bufs); + bnxt_abort_tpa(cpr, idx, agg_bufs); if (agg_bufs > MAX_SKB_FRAGS) netdev_warn(bp->dev, "TPA frags %d exceeded MAX_SKB_FRAGS %d\n", agg_bufs, (int)MAX_SKB_FRAGS); @@@ -1447,7 -1559,7 +1559,7 @@@ if (len <= bp->rx_copy_thresh) { skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { - bnxt_abort_tpa(cpr, cp_cons, agg_bufs); + bnxt_abort_tpa(cpr, idx, agg_bufs); return NULL; } } else { @@@ -1456,7 -1568,7 +1568,7 @@@
new_data = __bnxt_alloc_rx_data(bp, &new_mapping, GFP_ATOMIC); if (!new_data) { - bnxt_abort_tpa(cpr, cp_cons, agg_bufs); + bnxt_abort_tpa(cpr, idx, agg_bufs); return NULL; }
@@@ -1471,7 -1583,7 +1583,7 @@@
if (!skb) { kfree(data); - bnxt_abort_tpa(cpr, cp_cons, agg_bufs); + bnxt_abort_tpa(cpr, idx, agg_bufs); return NULL; } skb_reserve(skb, bp->rx_offset); @@@ -1479,7 -1591,7 +1591,7 @@@ }
if (agg_bufs) { - skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs); + skb = bnxt_rx_pages(bp, cpr, skb, idx, agg_bufs, true); if (!skb) { /* Page reuse already handled by bnxt_rx_pages(). */ return NULL; @@@ -1508,12 -1620,24 +1620,24 @@@ (tpa_info->flags2 & RX_CMP_FLAGS2_T_L4_CS_CALC) >> 3; }
- if (TPA_END_GRO(tpa_end)) + if (gro) skb = bnxt_gro_skb(bp, tpa_info, tpa_end, tpa_end1, skb);
return skb; }
+ static void bnxt_tpa_agg(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, + struct rx_agg_cmp *rx_agg) + { + u16 agg_id = TPA_AGG_AGG_ID(rx_agg); + struct bnxt_tpa_info *tpa_info; + + agg_id = bnxt_lookup_agg_idx(rxr, agg_id); + tpa_info = &rxr->rx_tpa[agg_id]; + BUG_ON(tpa_info->agg_count >= MAX_SKB_FRAGS); + tpa_info->agg_arr[tpa_info->agg_count++] = *rx_agg; + } + static void bnxt_deliver_skb(struct bnxt *bp, struct bnxt_napi *bnapi, struct sk_buff *skb) { @@@ -1555,6 -1679,13 +1679,13 @@@ static int bnxt_rx_pkt(struct bnxt *bp rxcmp = (struct rx_cmp *) &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)];
+ cmp_type = RX_CMP_TYPE(rxcmp); + + if (cmp_type == CMP_TYPE_RX_TPA_AGG_CMP) { + bnxt_tpa_agg(bp, rxr, (struct rx_agg_cmp *)rxcmp); + goto next_rx_no_prod_no_len; + } + tmp_raw_cons = NEXT_RAW_CMP(tmp_raw_cons); cp_cons = RING_CMP(tmp_raw_cons); rxcmp1 = (struct rx_cmp_ext *) @@@ -1563,8 -1694,6 +1694,6 @@@ if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) return -EBUSY;
- cmp_type = RX_CMP_TYPE(rxcmp); - prod = rxr->rx_prod;
if (cmp_type == CMP_TYPE_RX_L2_TPA_START_CMP) { @@@ -1623,7 -1752,8 +1752,8 @@@
bnxt_reuse_rx_data(rxr, cons, data); if (agg_bufs) - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); + bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, agg_bufs, + false);
rc = -EIO; if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) { @@@ -1646,7 -1776,8 +1776,8 @@@ bnxt_reuse_rx_data(rxr, cons, data); if (!skb) { if (agg_bufs) - bnxt_reuse_rx_agg_bufs(cpr, cp_cons, agg_bufs); + bnxt_reuse_rx_agg_bufs(cpr, cp_cons, 0, + agg_bufs, false); rc = -ENOMEM; goto next_rx; } @@@ -1666,7 -1797,7 +1797,7 @@@ }
if (agg_bufs) { - skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs); + skb = bnxt_rx_pages(bp, cpr, skb, cp_cons, agg_bufs, false); if (!skb) { rc = -ENOMEM; goto next_rx; @@@ -2021,9 -2152,9 +2152,9 @@@ static void __bnxt_poll_work_done(struc if (bnapi->events & BNXT_RX_EVENT) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
- bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); if (bnapi->events & BNXT_AGG_EVENT) bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); } bnapi->events = 0; } @@@ -2325,10 -2456,11 +2456,11 @@@ static void bnxt_free_rx_skbs(struct bn max_agg_idx = bp->rx_agg_nr_pages * RX_DESC_CNT; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; + struct bnxt_tpa_idx_map *map; int j;
if (rxr->rx_tpa) { - for (j = 0; j < MAX_TPA; j++) { + for (j = 0; j < bp->max_tpa; j++) { struct bnxt_tpa_info *tpa_info = &rxr->rx_tpa[j]; u8 *data = tpa_info->data; @@@ -2395,6 -2527,9 +2527,9 @@@ __free_page(rxr->rx_page); rxr->rx_page = NULL; } + map = rxr->rx_tpa_idx_map; + if (map) + memset(map->agg_idx_bmap, 0, sizeof(map->agg_idx_bmap)); } }
@@@ -2483,6 -2618,61 +2618,61 @@@ static int bnxt_alloc_ring(struct bnxt return 0; }
+ static void bnxt_free_tpa_info(struct bnxt *bp) + { + int i; + + for (i = 0; i < bp->rx_nr_rings; i++) { + struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; + + kfree(rxr->rx_tpa_idx_map); + rxr->rx_tpa_idx_map = NULL; + if (rxr->rx_tpa) { + kfree(rxr->rx_tpa[0].agg_arr); + rxr->rx_tpa[0].agg_arr = NULL; + } + kfree(rxr->rx_tpa); + rxr->rx_tpa = NULL; + } + } + + static int bnxt_alloc_tpa_info(struct bnxt *bp) + { + int i, j, total_aggs = 0; + + bp->max_tpa = MAX_TPA; + if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (!bp->max_tpa_v2) + return 0; + bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); + total_aggs = bp->max_tpa * MAX_SKB_FRAGS; + } + + for (i = 0; i < bp->rx_nr_rings; i++) { + struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; + struct rx_agg_cmp *agg; + + rxr->rx_tpa = kcalloc(bp->max_tpa, sizeof(struct bnxt_tpa_info), + GFP_KERNEL); + if (!rxr->rx_tpa) + return -ENOMEM; + + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + continue; + agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL); + rxr->rx_tpa[0].agg_arr = agg; + if (!agg) + return -ENOMEM; + for (j = 1; j < bp->max_tpa; j++) + rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS; + rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map), + GFP_KERNEL); + if (!rxr->rx_tpa_idx_map) + return -ENOMEM; + } + return 0; + } + static void bnxt_free_rx_rings(struct bnxt *bp) { int i; @@@ -2490,6 -2680,7 +2680,7 @@@ if (!bp->rx_ring) return;
+ bnxt_free_tpa_info(bp); for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring; @@@ -2503,9 -2694,6 +2694,6 @@@ page_pool_destroy(rxr->page_pool); rxr->page_pool = NULL;
- kfree(rxr->rx_tpa); - rxr->rx_tpa = NULL; - kfree(rxr->rx_agg_bmap); rxr->rx_agg_bmap = NULL;
@@@ -2539,7 -2727,7 +2727,7 @@@ static int bnxt_alloc_rx_page_pool(stru
static int bnxt_alloc_rx_rings(struct bnxt *bp) { - int i, rc, agg_rings = 0, tpa_rings = 0; + int i, rc = 0, agg_rings = 0;
if (!bp->rx_ring) return -ENOMEM; @@@ -2547,9 -2735,6 +2735,6 @@@ if (bp->flags & BNXT_FLAG_AGG_RINGS) agg_rings = 1;
- if (bp->flags & BNXT_FLAG_TPA) - tpa_rings = 1; - for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring; @@@ -2591,17 -2776,11 +2776,11 @@@ rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL); if (!rxr->rx_agg_bmap) return -ENOMEM; - - if (tpa_rings) { - rxr->rx_tpa = kcalloc(MAX_TPA, - sizeof(struct bnxt_tpa_info), - GFP_KERNEL); - if (!rxr->rx_tpa) - return -ENOMEM; - } } } - return 0; + if (bp->flags & BNXT_FLAG_TPA) + rc = bnxt_alloc_tpa_info(bp); + return rc; }
static void bnxt_free_tx_rings(struct bnxt *bp) @@@ -2953,7 -3132,7 +3132,7 @@@ static int bnxt_init_one_rx_ring(struc u8 *data; dma_addr_t mapping;
- for (i = 0; i < MAX_TPA; i++) { + for (i = 0; i < bp->max_tpa; i++) { data = __bnxt_alloc_rx_data(bp, &mapping, GFP_KERNEL); if (!data) @@@ -3468,7 -3647,7 +3647,7 @@@ static void bnxt_free_ring_stats(struc if (!bp->bnapi) return;
- size = sizeof(struct ctx_hw_stats); + size = bp->hw_ring_stats_size;
for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; @@@ -3487,7 -3666,7 +3666,7 @@@ static int bnxt_alloc_stats(struct bnx u32 size, i; struct pci_dev *pdev = bp->pdev;
- size = sizeof(struct ctx_hw_stats); + size = bp->hw_ring_stats_size;
for (i = 0; i < bp->cp_nr_rings; i++) { struct bnxt_napi *bnapi = bp->bnapi[i]; @@@ -4414,6 -4593,7 +4593,7 @@@ static int bnxt_hwrm_clear_vnic_filter( static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) { struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; + u16 max_aggs = VNIC_TPA_CFG_REQ_MAX_AGGS_MAX; struct hwrm_vnic_tpa_cfg_input req = {0};
if (vnic->fw_vnic_id == INVALID_HW_RING_ID) @@@ -4453,9 -4633,14 +4633,14 @@@ nsegs = (MAX_SKB_FRAGS - n) / n; }
- segs = ilog2(nsegs); + if (bp->flags & BNXT_FLAG_CHIP_P5) { + segs = MAX_TPA_SEGS_P5; + max_aggs = bp->max_tpa; + } else { + segs = ilog2(nsegs); + } req.max_agg_segs = cpu_to_le16(segs); - req.max_aggs = cpu_to_le16(VNIC_TPA_CFG_REQ_MAX_AGGS_MAX); + req.max_aggs = cpu_to_le16(max_aggs);
req.min_agg_len = cpu_to_le32(512); } @@@ -4815,6 -5000,12 +5000,12 @@@ static int bnxt_hwrm_vnic_qcaps(struct if (flags & VNIC_QCAPS_RESP_FLAGS_ROCE_MIRRORING_CAPABLE_VNIC_CAP) bp->flags |= BNXT_FLAG_ROCE_MIRROR_CAP; + bp->max_tpa_v2 = le16_to_cpu(resp->max_aggs_supported); + if (bp->max_tpa_v2) + bp->hw_ring_stats_size = + sizeof(struct ctx_hw_stats_ext); + else + bp->hw_ring_stats_size = sizeof(struct ctx_hw_stats); } mutex_unlock(&bp->hwrm_cmd_lock); return rc; @@@ -5064,7 -5255,6 +5255,7 @@@ static void bnxt_set_db(struct bnxt *bp
static int bnxt_hwrm_ring_alloc(struct bnxt *bp) { + bool agg_rings = !!(bp->flags & BNXT_FLAG_AGG_RINGS); int i, rc = 0; u32 type;
@@@ -5140,9 -5330,7 +5331,9 @@@ if (rc) goto err_out; bnxt_set_db(bp, &rxr->rx_db, type, map_idx, ring->fw_ring_id); - bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); + /* If we have agg rings, post agg buffers first. */ + if (!agg_rings) + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id; if (bp->flags & BNXT_FLAG_CHIP_P5) { struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; @@@ -5161,7 -5349,7 +5352,7 @@@ } }
- if (bp->flags & BNXT_FLAG_AGG_RINGS) { + if (agg_rings) { type = HWRM_RING_ALLOC_AGG; for (i = 0; i < bp->rx_nr_rings; i++) { struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; @@@ -5177,7 -5365,6 +5368,7 @@@ bnxt_set_db(bp, &rxr->rx_agg_db, type, map_idx, ring->fw_ring_id); bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); bp->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id; } } @@@ -6016,6 -6203,7 +6207,7 @@@ static int bnxt_hwrm_stat_ctx_alloc(str
bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1);
+ req.stats_dma_length = cpu_to_le16(bp->hw_ring_stats_size); req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000);
mutex_lock(&bp->hwrm_cmd_lock); @@@ -7020,29 -7208,19 +7212,29 @@@ static void bnxt_hwrm_clear_vnic_rss(st bnxt_hwrm_vnic_set_rss(bp, i, false); }
-static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, - bool irq_re_init) +static void bnxt_clear_vnic(struct bnxt *bp) { - if (bp->vnic_info) { - bnxt_hwrm_clear_vnic_filter(bp); + if (!bp->vnic_info) + return; + + bnxt_hwrm_clear_vnic_filter(bp); + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) { /* clear all RSS setting before free vnic ctx */ bnxt_hwrm_clear_vnic_rss(bp); bnxt_hwrm_vnic_ctx_free(bp); - /* before free the vnic, undo the vnic tpa settings */ - if (bp->flags & BNXT_FLAG_TPA) - bnxt_set_tpa(bp, false); - bnxt_hwrm_vnic_free(bp); } + /* before free the vnic, undo the vnic tpa settings */ + if (bp->flags & BNXT_FLAG_TPA) + bnxt_set_tpa(bp, false); + bnxt_hwrm_vnic_free(bp); + if (bp->flags & BNXT_FLAG_CHIP_P5) + bnxt_hwrm_vnic_ctx_free(bp); +} + +static void bnxt_hwrm_resource_free(struct bnxt *bp, bool close_path, + bool irq_re_init) +{ + bnxt_clear_vnic(bp); bnxt_hwrm_ring_free(bp, close_path); bnxt_hwrm_ring_grp_free(bp); if (irq_re_init) { @@@ -9306,7 -9484,8 +9498,8 @@@ static int bnxt_set_features(struct net if (changes & BNXT_FLAG_TPA) { update_tpa = true; if ((bp->flags & BNXT_FLAG_TPA) == 0 || - (flags & BNXT_FLAG_TPA) == 0) + (flags & BNXT_FLAG_TPA) == 0 || + (bp->flags & BNXT_FLAG_CHIP_P5)) re_init = true; }
@@@ -9316,9 -9495,8 +9509,8 @@@ if (flags != bp->flags) { u32 old_flags = bp->flags;
- bp->flags = flags; - if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { + bp->flags = flags; if (update_tpa) bnxt_set_ring_params(bp); return rc; @@@ -9326,12 -9504,14 +9518,14 @@@
if (re_init) { bnxt_close_nic(bp, false, false); + bp->flags = flags; if (update_tpa) bnxt_set_ring_params(bp);
return bnxt_open_nic(bp, false, false); } if (update_tpa) { + bp->flags = flags; rc = bnxt_set_tpa(bp, (flags & BNXT_FLAG_TPA) ? true : false); @@@ -9728,6 -9908,68 +9922,68 @@@ static void bnxt_init_dflt_coal(struct bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS; }
+ static int bnxt_fw_init_one_p1(struct bnxt *bp) + { + int rc; + + bp->fw_cap = 0; + rc = bnxt_hwrm_ver_get(bp); + if (rc) + return rc; + + if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) { + rc = bnxt_alloc_kong_hwrm_resources(bp); + if (rc) + bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL; + } + + if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) || + bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) { + rc = bnxt_alloc_hwrm_short_cmd_req(bp); + if (rc) + return rc; + } + rc = bnxt_hwrm_func_reset(bp); + if (rc) + return -ENODEV; + + bnxt_hwrm_fw_set_time(bp); + return 0; + } + + static int bnxt_fw_init_one_p2(struct bnxt *bp) + { + int rc; + + /* Get the MAX capabilities for this function */ + rc = bnxt_hwrm_func_qcaps(bp); + if (rc) { + netdev_err(bp->dev, "hwrm query capability failure rc: %x\n", + rc); + return -ENODEV; + } + + rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp); + if (rc) + netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n", + rc); + + rc = bnxt_hwrm_func_drv_rgtr(bp); + if (rc) + return -ENODEV; + + rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0); + if (rc) + return -ENODEV; + + bnxt_hwrm_func_qcfg(bp); + bnxt_hwrm_vnic_qcaps(bp); + bnxt_hwrm_port_led_qcaps(bp); + bnxt_ethtool_init(bp); + bnxt_dcb_init(bp); + return 0; + } + static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev) { int rc; @@@ -10683,32 -10925,18 +10939,18 @@@ static int bnxt_init_one(struct pci_de goto init_err_pci_clean;
mutex_init(&bp->hwrm_cmd_lock); - rc = bnxt_hwrm_ver_get(bp); + + rc = bnxt_fw_init_one_p1(bp); if (rc) goto init_err_pci_clean;
- if (bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL) { - rc = bnxt_alloc_kong_hwrm_resources(bp); - if (rc) - bp->fw_cap &= ~BNXT_FW_CAP_KONG_MB_CHNL; - } - - if ((bp->fw_cap & BNXT_FW_CAP_SHORT_CMD) || - bp->hwrm_max_ext_req_len > BNXT_HWRM_MAX_REQ_LEN) { - rc = bnxt_alloc_hwrm_short_cmd_req(bp); - if (rc) - goto init_err_pci_clean; - } - if (BNXT_CHIP_P5(bp)) bp->flags |= BNXT_FLAG_CHIP_P5;
- rc = bnxt_hwrm_func_reset(bp); + rc = bnxt_fw_init_one_p2(bp); if (rc) goto init_err_pci_clean;
- bnxt_hwrm_fw_set_time(bp); - dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | @@@ -10746,41 -10974,14 +10988,14 @@@ bp->gro_func = bnxt_gro_func_5730x; if (BNXT_CHIP_P4(bp)) bp->gro_func = bnxt_gro_func_5731x; + else if (BNXT_CHIP_P5(bp)) + bp->gro_func = bnxt_gro_func_5750x; } if (!BNXT_CHIP_P4_PLUS(bp)) bp->flags |= BNXT_FLAG_DOUBLE_DB;
- rc = bnxt_hwrm_func_drv_rgtr(bp); - if (rc) - goto init_err_pci_clean; - - rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0); - if (rc) - goto init_err_pci_clean; - bp->ulp_probe = bnxt_ulp_probe;
- rc = bnxt_hwrm_queue_qportcfg(bp); - if (rc) { - netdev_err(bp->dev, "hwrm query qportcfg failure rc: %x\n", - rc); - rc = -1; - goto init_err_pci_clean; - } - /* Get the MAX capabilities for this function */ - rc = bnxt_hwrm_func_qcaps(bp); - if (rc) { - netdev_err(bp->dev, "hwrm query capability failure rc: %x\n", - rc); - rc = -1; - goto init_err_pci_clean; - } - - rc = bnxt_hwrm_cfa_adv_flow_mgnt_qcaps(bp); - if (rc) - netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n", - rc); - rc = bnxt_init_mac_addr(bp); if (rc) { dev_err(&pdev->dev, "Unable to initialize mac address.\n"); @@@ -10794,11 -10995,6 +11009,6 @@@ if (rc) goto init_err_pci_clean; } - bnxt_hwrm_func_qcfg(bp); - bnxt_hwrm_vnic_qcaps(bp); - bnxt_hwrm_port_led_qcaps(bp); - bnxt_ethtool_init(bp); - bnxt_dcb_init(bp);
/* MTU range: 60 - FW defined max */ dev->min_mtu = ETH_ZLEN; @@@ -10934,8 -11130,7 +11144,7 @@@ shutdown_exit #ifdef CONFIG_PM_SLEEP static int bnxt_suspend(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct net_device *dev = pci_get_drvdata(pdev); + struct net_device *dev = dev_get_drvdata(device); struct bnxt *bp = netdev_priv(dev); int rc = 0;
@@@ -10951,8 -11146,7 +11160,7 @@@
static int bnxt_resume(struct device *device) { - struct pci_dev *pdev = to_pci_dev(device); - struct net_device *dev = pci_get_drvdata(pdev); + struct net_device *dev = dev_get_drvdata(device); struct bnxt *bp = netdev_priv(dev); int rc = 0;
diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 8445a0cce849,3a3d8a9be5ed..b624174c8594 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@@ -137,7 -137,44 +137,44 @@@ reset_coalesce return rc; }
- #define BNXT_NUM_STATS 22 + static const char * const bnxt_ring_stats_str[] = { + "rx_ucast_packets", + "rx_mcast_packets", + "rx_bcast_packets", + "rx_discards", + "rx_drops", + "rx_ucast_bytes", + "rx_mcast_bytes", + "rx_bcast_bytes", + "tx_ucast_packets", + "tx_mcast_packets", + "tx_bcast_packets", + "tx_discards", + "tx_drops", + "tx_ucast_bytes", + "tx_mcast_bytes", + "tx_bcast_bytes", + }; + + static const char * const bnxt_ring_tpa_stats_str[] = { + "tpa_packets", + "tpa_bytes", + "tpa_events", + "tpa_aborts", + }; + + static const char * const bnxt_ring_tpa2_stats_str[] = { + "rx_tpa_eligible_pkt", + "rx_tpa_eligible_bytes", + "rx_tpa_pkt", + "rx_tpa_bytes", + "rx_tpa_errors", + }; + + static const char * const bnxt_ring_sw_stats_str[] = { + "rx_l4_csum_errors", + "missed_irqs", + };
#define BNXT_RX_STATS_ENTRY(counter) \ { BNXT_RX_STATS_OFFSET(counter), __stringify(counter) } @@@ -207,6 -244,20 +244,20 @@@ BNXT_TX_STATS_EXT_COS_ENTRY(6), \ BNXT_TX_STATS_EXT_COS_ENTRY(7) \
+ #define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(n) \ + BNXT_RX_STATS_EXT_ENTRY(rx_discard_bytes_cos##n), \ + BNXT_RX_STATS_EXT_ENTRY(rx_discard_packets_cos##n) + + #define BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(0), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(1), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(2), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(3), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(4), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(5), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(6), \ + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRY(7) + #define BNXT_RX_STATS_PRI_ENTRY(counter, n) \ { BNXT_RX_STATS_EXT_OFFSET(counter##_cos0), \ __stringify(counter##_pri##n) } @@@ -352,6 -403,7 +403,7 @@@ static const struct BNXT_RX_STATS_EXT_ENTRY(rx_buffer_passed_threshold), BNXT_RX_STATS_EXT_ENTRY(rx_pcs_symbol_err), BNXT_RX_STATS_EXT_ENTRY(rx_corrected_bits), + BNXT_RX_STATS_EXT_DISCARD_COS_ENTRIES, };
static const struct { @@@ -417,9 -469,29 +469,29 @@@ static const struct ARRAY_SIZE(bnxt_tx_pkts_pri_arr)) #define BNXT_NUM_PCIE_STATS ARRAY_SIZE(bnxt_pcie_stats_arr)
+ static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp) + { + if (BNXT_SUPPORTS_TPA(bp)) { + if (bp->max_tpa_v2) + return ARRAY_SIZE(bnxt_ring_tpa2_stats_str); + return ARRAY_SIZE(bnxt_ring_tpa_stats_str); + } + return 0; + } + + static int bnxt_get_num_ring_stats(struct bnxt *bp) + { + int num_stats; + + num_stats = ARRAY_SIZE(bnxt_ring_stats_str) + + ARRAY_SIZE(bnxt_ring_sw_stats_str) + + bnxt_get_num_tpa_ring_stats(bp); + return num_stats * bp->cp_nr_rings; + } + static int bnxt_get_num_stats(struct bnxt *bp) { - int num_stats = BNXT_NUM_STATS * bp->cp_nr_rings; + int num_stats = bnxt_get_num_ring_stats(bp);
num_stats += BNXT_NUM_SW_FUNC_STATS;
@@@ -460,10 -532,11 +532,11 @@@ static void bnxt_get_ethtool_stats(stru { u32 i, j = 0; struct bnxt *bp = netdev_priv(dev); - u32 stat_fields = sizeof(struct ctx_hw_stats) / 8; + u32 stat_fields = ARRAY_SIZE(bnxt_ring_stats_str) + + bnxt_get_num_tpa_ring_stats(bp);
if (!bp->bnapi) { - j += BNXT_NUM_STATS * bp->cp_nr_rings + BNXT_NUM_SW_FUNC_STATS; + j += bnxt_get_num_ring_stats(bp) + BNXT_NUM_SW_FUNC_STATS; goto skip_ring_stats; }
@@@ -551,56 -624,39 +624,39 @@@ skip_ring_stats static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf) { struct bnxt *bp = netdev_priv(dev); - u32 i; + static const char * const *str; + u32 i, j, num_str;
switch (stringset) { - /* The number of strings must match BNXT_NUM_STATS defined above. */ case ETH_SS_STATS: for (i = 0; i < bp->cp_nr_rings; i++) { - sprintf(buf, "[%d]: rx_ucast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_mcast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_bcast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_discards", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_drops", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_ucast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_mcast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_bcast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_ucast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_mcast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_bcast_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_discards", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_drops", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_ucast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_mcast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tx_bcast_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tpa_packets", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tpa_bytes", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tpa_events", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: tpa_aborts", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: rx_l4_csum_errors", i); - buf += ETH_GSTRING_LEN; - sprintf(buf, "[%d]: missed_irqs", i); - buf += ETH_GSTRING_LEN; + num_str = ARRAY_SIZE(bnxt_ring_stats_str); + for (j = 0; j < num_str; j++) { + sprintf(buf, "[%d]: %s", i, + bnxt_ring_stats_str[j]); + buf += ETH_GSTRING_LEN; + } + if (!BNXT_SUPPORTS_TPA(bp)) + goto skip_tpa_stats; + + if (bp->max_tpa_v2) { + num_str = ARRAY_SIZE(bnxt_ring_tpa2_stats_str); + str = bnxt_ring_tpa2_stats_str; + } else { + num_str = ARRAY_SIZE(bnxt_ring_tpa_stats_str); + str = bnxt_ring_tpa_stats_str; + } + for (j = 0; j < num_str; j++) { + sprintf(buf, "[%d]: %s", i, str[j]); + buf += ETH_GSTRING_LEN; + } + skip_tpa_stats: + num_str = ARRAY_SIZE(bnxt_ring_sw_stats_str); + for (j = 0; j < num_str; j++) { + sprintf(buf, "[%d]: %s", i, + bnxt_ring_sw_stats_str[j]); + buf += ETH_GSTRING_LEN; + } } for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++) { strcpy(buf, bnxt_sw_func_stats[i].string); @@@ -2016,19 -2072,21 +2072,19 @@@ static int bnxt_flash_package_from_file mutex_lock(&bp->hwrm_cmd_lock); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; - - if (resp->error_code) { + if (hwrm_err) { u8 error_code = ((struct hwrm_err_output *)resp)->cmd_err;
- if (error_code == NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { + if (resp->error_code && error_code == + NVM_INSTALL_UPDATE_CMD_ERR_CODE_FRAG_ERR) { install.flags |= cpu_to_le16( NVM_INSTALL_UPDATE_REQ_FLAGS_ALLOWED_TO_DEFRAG); hwrm_err = _hwrm_send_message(bp, &install, sizeof(install), INSTALL_PACKAGE_TIMEOUT); - if (hwrm_err) - goto flash_pkg_exit; } + if (hwrm_err) + goto flash_pkg_exit; }
if (resp->result) { diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index d692251ee252,dd99c55d9a88..ae6a47dd7dc9 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@@ -3236,10 -3236,8 +3236,10 @@@ static ssize_t blocked_fl_write(struct return -ENOMEM;
err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz); - if (err) + if (err) { + kvfree(t); return err; + }
bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz); kvfree(t); @@@ -3531,7 -3529,6 +3531,6 @@@ int t4_setup_debugfs(struct adapter *ad { int i; u32 size = 0; - struct dentry *de;
static struct t4_debugfs_entry t4_debugfs_files[] = { { "cim_la", &cim_la_fops, 0400, 0 }, @@@ -3642,8 -3639,8 +3641,8 @@@ } }
- de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap, - &flash_debugfs_fops, adap->params.sf_size); + debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap, + &flash_debugfs_fops, adap->params.sf_size); debugfs_create_bool("use_backdoor", 0600, adap->debugfs_root, &adap->use_bd); debugfs_create_bool("trace_rss", 0600, diff --combined drivers/net/ethernet/ibm/ibmvnic.c index cebd20f3128d,81a05ea38237..07efa2b40003 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@@ -1485,7 -1485,7 +1485,7 @@@ static netdev_tx_t ibmvnic_xmit(struct
memcpy(dst + cur, page_address(skb_frag_page(frag)) + - frag->page_offset, skb_frag_size(frag)); + skb_frag_off(frag), skb_frag_size(frag)); cur += skb_frag_size(frag); } } else { @@@ -1568,8 -1568,6 +1568,8 @@@ lpar_rc = send_subcrq_indirect(adapter, handle_array[queue_num], (u64)tx_buff->indir_dma, (u64)num_entries); + dma_unmap_single(dev, tx_buff->indir_dma, + sizeof(tx_buff->indir_arr), DMA_TO_DEVICE); } else { tx_buff->num_entries = num_entries; lpar_rc = send_subcrq(adapter, handle_array[queue_num], @@@ -2790,6 -2788,7 +2790,6 @@@ static int ibmvnic_complete_tx(struct i union sub_crq *next; int index; int i, j; - u8 *first;
restart_loop: while (pending_scrq(adapter, scrq)) { @@@ -2819,6 -2818,14 +2819,6 @@@
txbuff->data_dma[j] = 0; } - /* if sub_crq was sent indirectly */ - first = &txbuff->indir_arr[0].generic.first; - if (*first == IBMVNIC_CRQ_CMD) { - dma_unmap_single(dev, txbuff->indir_dma, - sizeof(txbuff->indir_arr), - DMA_TO_DEVICE); - *first = 0; - }
if (txbuff->last_frag) { dev_kfree_skb_any(txbuff->skb); diff --combined drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7882148abb43,dc7b128c780e..17b7ae9f46ec --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@@ -1785,7 -1785,7 +1785,7 @@@ static bool ixgbe_is_non_eop(struct ixg static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; unsigned char *va; unsigned int pull_len;
@@@ -1807,7 -1807,7 +1807,7 @@@
/* update all of the pointers */ skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; + skb_frag_off_add(frag, pull_len); skb->data_len -= pull_len; skb->tail += pull_len; } @@@ -1840,11 -1840,11 +1840,11 @@@ static void ixgbe_dma_sync_frag(struct skb_headlen(skb), DMA_FROM_DEVICE); } else { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
dma_sync_single_range_for_cpu(rx_ring->dev, IXGBE_CB(skb)->dma, - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag), DMA_FROM_DEVICE); } @@@ -7897,8 -7897,11 +7897,8 @@@ static void ixgbe_service_task(struct w return; } if (ixgbe_check_fw_error(adapter)) { - if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - rtnl_lock(); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) unregister_netdev(adapter->netdev); - rtnl_unlock(); - } ixgbe_service_event_complete(adapter); return; } @@@ -8183,7 -8186,7 +8183,7 @@@ static int ixgbe_tx_map(struct ixgbe_ri struct sk_buff *skb = first->skb; struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; - struct skb_frag_struct *frag; + skb_frag_t *frag; dma_addr_t dma; unsigned int data_len, size; u32 tx_flags = first->tx_flags; @@@ -8602,7 -8605,8 +8602,8 @@@ netdev_tx_t ixgbe_xmit_frame_ring(struc * otherwise try next time */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + count += TXD_USE_COUNT(skb_frag_size( + &skb_shinfo(skb)->frags[f]));
if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) { tx_ring->tx_stats.tx_busy++; diff --combined drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 53d09620e215,973f90888b1f..ea934cd02448 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@@ -446,8 -446,6 +446,8 @@@ static int mlx5_internal_err_ret_value( case MLX5_CMD_OP_CREATE_UMEM: case MLX5_CMD_OP_DESTROY_UMEM: case MLX5_CMD_OP_ALLOC_MEMIC: + case MLX5_CMD_OP_MODIFY_XRQ: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@@ -639,8 -637,6 +639,8 @@@ const char *mlx5_command_str(int comman MLX5_COMMAND_STR_CASE(DESTROY_UCTX); MLX5_COMMAND_STR_CASE(CREATE_UMEM); MLX5_COMMAND_STR_CASE(DESTROY_UMEM); + MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR); + MLX5_COMMAND_STR_CASE(MODIFY_XRQ); default: return "unknown command opcode"; } } @@@ -1372,49 -1368,19 +1372,19 @@@ static void clean_debug_files(struct ml debugfs_remove_recursive(dbg->dbg_root); }
- static int create_debugfs_files(struct mlx5_core_dev *dev) + static void create_debugfs_files(struct mlx5_core_dev *dev) { struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; - int err = -ENOMEM; - - if (!mlx5_debugfs_root) - return 0;
dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); - if (!dbg->dbg_root) - return err; - - dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root, - dev, &dfops); - if (!dbg->dbg_in) - goto err_dbg;
- dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root, - dev, &dfops); - if (!dbg->dbg_out) - goto err_dbg; - - dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root, - dev, &olfops); - if (!dbg->dbg_outlen) - goto err_dbg; - - dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root, - &dbg->status); - if (!dbg->dbg_status) - goto err_dbg; - - dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); - if (!dbg->dbg_run) - goto err_dbg; + debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops); + debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status); + debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
mlx5_cmdif_debugfs_init(dev); - - return 0; - - err_dbg: - clean_debug_files(dev); - return err; }
static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) @@@ -2011,17 -1977,10 +1981,10 @@@ int mlx5_cmd_init(struct mlx5_core_dev goto err_cache; }
- err = create_debugfs_files(dev); - if (err) { - err = -ENOMEM; - goto err_wq; - } + create_debugfs_files(dev);
return 0;
- err_wq: - destroy_workqueue(cmd->wq); - err_cache: destroy_msg_cache(dev);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h index 65bec19a438f,0807992090b8..8cf548c7ad9c --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@@ -184,13 -184,8 +184,13 @@@ static inline int mlx5e_get_max_num_cha
struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + union { + struct { + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[0]; + }; + u8 tls_progress_params_ctx[0]; + }; };
struct mlx5e_rx_wqe_ll { @@@ -356,6 -351,7 +356,7 @@@ enum MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, + MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, };
struct mlx5e_sq_wqe_info { @@@ -480,8 -476,6 +481,6 @@@ struct mlx5e_xdp_mpwqe struct mlx5e_tx_wqe *wqe; u8 ds_count; u8 pkt_count; - u8 max_ds_count; - u8 complete; u8 inline_on; };
@@@ -1105,8 -1099,6 +1104,8 @@@ u32 mlx5e_ethtool_get_rxfh_key_size(str u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, @@@ -1135,7 -1127,6 +1134,6 @@@ void mlx5e_build_rq_params(struct mlx5_ struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); - u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index c7f86453c638,6e54fefea410..817c6ea7e349 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@@ -1,7 -1,6 +1,6 @@@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2019 Mellanox Technologies. */
- #include <net/devlink.h> #include "reporter.h" #include "lib/eq.h"
@@@ -76,21 -75,26 +75,21 @@@ static int mlx5e_tx_reporter_err_cqe_re u8 state; int err;
- if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) - return 0; - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); if (err) { netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", sq->sqn, err); - return err; + goto out; }
- if (state != MLX5_SQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return -EINVAL; - } + if (state != MLX5_SQC_STATE_ERR) + goto out;
mlx5e_tx_disable_queue(sq->txq);
err = mlx5e_wait_for_sq_flush(sq); if (err) - return err; + goto out;
/* At this point, no new packets will arrive from the stack as TXQ is * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all @@@ -99,24 -103,20 +98,24 @@@
err = mlx5e_sq_to_ready(sq, state); if (err) - return err; + goto out;
mlx5e_reset_txqsq_cc_pc(sq); sq->stats->recover++; + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); mlx5e_activate_txqsq(sq);
return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + return err; }
static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, char *err_str, struct mlx5e_tx_err_ctx *err_ctx) { - if (IS_ERR_OR_NULL(tx_reporter)) { + if (!tx_reporter) { netdev_err(err_ctx->sq->channel->netdev, err_str); return err_ctx->recover(err_ctx->sq); } @@@ -288,23 -288,27 +287,27 @@@ static const struct devlink_health_repo
int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) { + struct devlink_health_reporter *reporter; struct mlx5_core_dev *mdev = priv->mdev; struct devlink *devlink = priv_to_devlink(mdev);
- priv->tx_reporter = + reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, true, priv); - if (IS_ERR(priv->tx_reporter)) + if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", - PTR_ERR(priv->tx_reporter)); - return IS_ERR_OR_NULL(priv->tx_reporter); + PTR_ERR(reporter)); + return PTR_ERR(reporter); + } + priv->tx_reporter = reporter; + return 0; }
void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) { - if (IS_ERR_OR_NULL(priv->tx_reporter)) + if (!priv->tx_reporter) return;
devlink_health_reporter_destroy(priv->tx_reporter); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 7f78c004d12f,f701e4f3c076..2c4d1f415968 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@@ -60,24 -60,28 +60,28 @@@ int mlx5e_open_xsk(struct mlx5e_priv *p struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, struct mlx5e_channel *c) { - struct mlx5e_channel_param cparam = {}; + struct mlx5e_channel_param *cparam; struct dim_cq_moder icocq_moder = {}; int err;
if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) return -EINVAL;
- mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!cparam) + return -ENOMEM;
- err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + mlx5e_build_xsk_cparam(priv, params, xsk, cparam); + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq); if (unlikely(err)) - return err; + goto err_free_cparam;
- err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq); if (unlikely(err)) goto err_close_rx_cq;
- err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq); if (unlikely(err)) goto err_close_rq;
@@@ -87,21 -91,23 +91,23 @@@ * is disabled and then reenabled, but the SQ continues receiving CQEs * from the old UMEM. */ - err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true); if (unlikely(err)) goto err_close_tx_cq;
- err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq); if (unlikely(err)) goto err_close_sq;
/* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be * triggered and NAPI to be called on the correct CPU. */ - err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq); if (unlikely(err)) goto err_close_icocq;
+ kvfree(cparam); + spin_lock_init(&c->xskicosq_lock);
set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); @@@ -123,6 -129,9 +129,9 @@@ err_close_rq err_close_rx_cq: mlx5e_close_cq(&c->xskrq.cq);
+ err_free_cparam: + kvfree(cparam); + return err; }
@@@ -143,10 -152,7 +152,10 @@@ void mlx5e_activate_xsk(struct mlx5e_ch { set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* TX queue is created active. */ + + spin_lock(&c->xskicosq_lock); mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); }
void mlx5e_deactivate_xsk(struct mlx5e_channel *c) diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 20e628c907e5,02530b50609c..7347d673f448 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@@ -1081,14 -1081,6 +1081,14 @@@ int mlx5e_ethtool_set_link_ksettings(st link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : mlx5e_port_speed2linkmodes(mdev, speed, !ext);
+ if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + err = -EINVAL; + goto out; + } + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", @@@ -1346,9 -1338,6 +1346,9 @@@ int mlx5e_ethtool_set_pauseparam(struc struct mlx5_core_dev *mdev = priv->mdev; int err;
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + if (pauseparam->autoneg) return -EINVAL;
@@@ -1690,40 -1679,6 +1690,40 @@@ static int mlx5e_get_module_eeprom(stru return 0; }
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw, NULL); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { @@@ -1958,21 -1913,27 +1958,27 @@@ static u32 mlx5e_get_priv_flags(struct return priv->channels.params.pflags; }
- #ifndef CONFIG_MLX5_EN_RXNFC - /* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS - * otherwise this function will be defined from en_fs_ethtool.c - */ static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev);
- if (info->cmd != ETHTOOL_GRXRINGS) - return -EOPNOTSUPP; - /* ring_count is needed by ethtool -x */ - info->data = priv->channels.params.num_channels; - return 0; + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); + } + + static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) + { + return mlx5e_ethtool_set_rxnfc(dev, cmd); } - #endif
const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, @@@ -1993,9 -1954,7 +1999,7 @@@ .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, - #ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, - #endif .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, @@@ -2006,7 -1965,6 +2010,7 @@@ .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .flash_device = mlx5e_flash_device, .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9d5f6e56188f,9a2fcef6e7f0..0c8e847a9eee --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -1130,6 -1130,8 +1130,8 @@@ static int mlx5e_alloc_txqsq(struct mlx sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) { @@@ -1321,6 -1323,7 +1323,6 @@@ err_free_txqsq void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); - clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); @@@ -2321,7 -2324,7 +2323,7 @@@ int mlx5e_open_channels(struct mlx5e_pr goto err_close_channels; }
- if (!IS_ERR_OR_NULL(priv->tx_reporter)) + if (priv->tx_reporter) devlink_health_reporter_state_update(priv->tx_reporter, DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
@@@ -3422,7 -3425,7 +3424,7 @@@ out #ifdef CONFIG_MLX5_ESWITCH static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, struct flow_cls_offload *cls_flower, - int flags) + unsigned long flags) { switch (cls_flower->command) { case FLOW_CLS_REPLACE: @@@ -3442,12 -3445,12 +3444,12 @@@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); struct mlx5e_priv *priv = cb_priv;
switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | - MLX5E_TC_NIC_OFFLOAD); + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); default: return -EOPNOTSUPP; } @@@ -3640,7 -3643,7 +3642,7 @@@ static int set_feature_tc_num_filters(s { struct mlx5e_priv *priv = netdev_priv(netdev);
- if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) { + if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; @@@ -3781,9 -3784,10 +3783,10 @@@ static netdev_features_t mlx5e_fix_feat netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); } if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { - features &= ~NETIF_F_LRO; - if (params->lro_en) + if (features & NETIF_F_LRO) { netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_LRO; + } }
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { @@@ -3950,7 -3954,8 +3953,8 @@@ int mlx5e_hwstamp_set(struct mlx5e_pri case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: /* Disable CQE compression */ - netdev_warn(priv->netdev, "Disabling cqe compression"); + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); err = mlx5e_modify_rx_cqe_compression_locked(priv, false); if (err) { netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); @@@ -4768,7 -4773,7 +4772,7 @@@ void mlx5e_build_nic_params(struct mlx5 mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
/* TX inline */ - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode);
/* RSS */ mlx5e_build_rss_params(rss_params, params->num_channels); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 00b2d4a86159,5be3da621499..c57f7533a6d0 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@@ -38,6 -38,8 +38,8 @@@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> + #include <linux/refcount.h> + #include <linux/completion.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> @@@ -65,19 -67,20 +67,20 @@@ struct mlx5_nic_flow_attr struct mlx5_fc *counter; };
- #define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) + #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
enum { - MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, - MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, - MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD, - MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD, - MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE), - MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2), - MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3), - MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4), - MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, };
#define MLX5E_TC_MAX_SPLITS 1 @@@ -100,6 -103,7 +103,7 @@@ * container_of(helper item, containing struct type, helper field[index]) */ struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ struct list_head list; int index; }; @@@ -108,7 -112,7 +112,7 @@@ struct mlx5e_tc_flow struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; - u16 flags; + unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported @@@ -116,10 -120,14 +120,14 @@@ */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5e_tc_flow *peer_flow; + struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ + refcount_t refcnt; + struct rcu_head rcu_head; union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@@ -157,12 -165,20 +165,20 @@@ struct mlx5e_hairpin_entry /* a node of a hash table which keeps all the hairpin entries */ struct hlist_node hairpin_hlist;
+ /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same hairpin */ struct list_head flows; + /* hpe's that were not fully initialized when dead peer update event + * function traversed them. + */ + struct list_head dead_peer_wait_list;
u16 peer_vhca_id; u8 prio; struct mlx5e_hairpin *hp; + refcount_t refcnt; + struct completion res_ready; };
struct mod_hdr_key { @@@ -174,16 -190,93 +190,93 @@@ struct mlx5e_mod_hdr_entry /* a node of a hash table which keeps all the mod_hdr entries */ struct hlist_node mod_hdr_hlist;
+ /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same mod_hdr entry */ struct list_head flows;
struct mod_hdr_key key;
u32 mod_hdr_id; + + refcount_t refcnt; + struct completion res_ready; + int compl_result; };
#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
+ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + + static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) + { + if (!flow || !refcount_inc_not_zero(&flow->refcnt)) + return ERR_PTR(-EINVAL); + return flow; + } + + static void mlx5e_flow_put(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) + { + if (refcount_dec_and_test(&flow->refcnt)) { + mlx5e_tc_del_flow(priv, flow); + kfree_rcu(flow, rcu_head); + } + } + + static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) + { + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); + } + + #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + + static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) + { + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); + } + + #define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + + static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) + { + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); + } + + #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + + static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) + { + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; + } + + #define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + + static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) + { + return flow_flag_test(flow, ESWITCH); + } + + static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) + { + return flow_flag_test(flow, OFFLOADED); + } + static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key) { return jhash(key->actions, @@@ -199,15 -292,62 +292,62 @@@ static inline int cmp_mod_hdr_info(stru return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ); }
+ static struct mod_hdr_tbl * + get_mod_hdr_table(struct mlx5e_priv *priv, int namespace) + { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr : + &priv->fs.tc.mod_hdr; + } + + static struct mlx5e_mod_hdr_entry * + mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key) + { + struct mlx5e_mod_hdr_entry *mh, *found = NULL; + + hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, key)) { + refcount_inc(&mh->refcnt); + found = mh; + break; + } + } + + return found; + } + + static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv, + struct mlx5e_mod_hdr_entry *mh, + int namespace) + { + struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace); + + if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock)) + return; + hash_del(&mh->mod_hdr_hlist); + mutex_unlock(&tbl->lock); + + WARN_ON(!list_empty(&mh->flows)); + if (mh->compl_result > 0) + mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); + + kfree(mh); + } + + static int get_flow_name_space(struct mlx5e_tc_flow *flow) + { + return mlx5e_is_eswitch_flow(flow) ? + MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; + } static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int num_actions, actions_size, namespace, err; struct mlx5e_mod_hdr_entry *mh; + struct mod_hdr_tbl *tbl; struct mod_hdr_key key; - bool found = false; u32 hash_key;
num_actions = parse_attr->num_mod_hdr_actions; @@@ -218,80 -358,82 +358,82 @@@
hash_key = hash_mod_hdr_info(&key);
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - namespace = MLX5_FLOW_NAMESPACE_FDB; - hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } else { - namespace = MLX5_FLOW_NAMESPACE_KERNEL; - hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } + namespace = get_flow_name_space(flow); + tbl = get_mod_hdr_table(priv, namespace); + + mutex_lock(&tbl->lock); + mh = mlx5e_mod_hdr_get(tbl, &key, hash_key); + if (mh) { + mutex_unlock(&tbl->lock); + wait_for_completion(&mh->res_ready);
- if (found) + if (mh->compl_result < 0) { + err = -EREMOTEIO; + goto attach_header_err; + } goto attach_flow; + }
mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); - if (!mh) + if (!mh) { + mutex_unlock(&tbl->lock); return -ENOMEM; + }
mh->key.actions = (void *)mh + sizeof(*mh); memcpy(mh->key.actions, key.actions, actions_size); mh->key.num_actions = num_actions; + spin_lock_init(&mh->flows_lock); INIT_LIST_HEAD(&mh->flows); + refcount_set(&mh->refcnt, 1); + init_completion(&mh->res_ready); + + hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); + mutex_unlock(&tbl->lock);
err = mlx5_modify_header_alloc(priv->mdev, namespace, mh->key.num_actions, mh->key.actions, &mh->mod_hdr_id); - if (err) - goto out_err; - - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); - else - hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + if (err) { + mh->compl_result = err; + goto alloc_header_err; + } + mh->compl_result = 1; + complete_all(&mh->res_ready);
attach_flow: + flow->mh = mh; + spin_lock(&mh->flows_lock); list_add(&flow->mod_hdr, &mh->flows); - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + spin_unlock(&mh->flows_lock); + if (mlx5e_is_eswitch_flow(flow)) flow->esw_attr->mod_hdr_id = mh->mod_hdr_id; else flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
return 0;
- out_err: - kfree(mh); + alloc_header_err: + complete_all(&mh->res_ready); + attach_header_err: + mlx5e_mod_hdr_put(priv, mh, namespace); return err; }
static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->mod_hdr.next; + /* flow wasn't fully initialized */ + if (!flow->mh) + return;
+ spin_lock(&flow->mh->flows_lock); list_del(&flow->mod_hdr); + spin_unlock(&flow->mh->flows_lock);
- if (list_empty(next)) { - struct mlx5e_mod_hdr_entry *mh; - - mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows); - - mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); - hash_del(&mh->mod_hdr_hlist); - kfree(mh); - } + mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow)); + flow->mh = NULL; }
static @@@ -555,13 -697,35 +697,35 @@@ static struct mlx5e_hairpin_entry *mlx5
hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, hairpin_hlist, hash_key) { - if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { + refcount_inc(&hpe->refcnt); return hpe; + } }
return NULL; }
+ static void mlx5e_hairpin_put(struct mlx5e_priv *priv, + struct mlx5e_hairpin_entry *hpe) + { + /* no more hairpin flows for us, release the hairpin pair */ + if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) + return; + hash_del(&hpe->hairpin_hlist); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + if (!IS_ERR_OR_NULL(hpe->hp)) { + netdev_dbg(priv->netdev, "del hairpin: peer %s\n", + dev_name(hpe->hp->pair->peer_mdev->device)); + + mlx5e_hairpin_destroy(hpe->hp); + } + + WARN_ON(!list_empty(&hpe->flows)); + kfree(hpe); + } + #define UNKNOWN_MATCH_PRIO 8
static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, @@@ -627,17 -791,37 +791,37 @@@ static int mlx5e_hairpin_flow_add(struc extack); if (err) return err; + + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); - if (hpe) + if (hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + wait_for_completion(&hpe->res_ready); + + if (IS_ERR(hpe->hp)) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + }
hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); - if (!hpe) + if (!hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); return -ENOMEM; + }
+ spin_lock_init(&hpe->flows_lock); INIT_LIST_HEAD(&hpe->flows); + INIT_LIST_HEAD(&hpe->dead_peer_wait_list); hpe->peer_vhca_id = peer_id; hpe->prio = match_prio; + refcount_set(&hpe->refcnt, 1); + init_completion(&hpe->res_ready); + + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
params.log_data_size = 15; params.log_data_size = min_t(u8, params.log_data_size, @@@ -659,9 -843,11 +843,11 @@@ params.num_channels = link_speed64;
hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); + hpe->hp = hp; + complete_all(&hpe->res_ready); if (IS_ERR(hp)) { err = PTR_ERR(hp); - goto create_hairpin_err; + goto out_err; }
netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", @@@ -669,46 -855,39 +855,39 @@@ dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
- hpe->hp = hp; - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, - hash_hairpin_info(peer_id, match_prio)); - attach_flow: if (hpe->hp->num_channels > 1) { - flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS; + flow_flag_set(flow, HAIRPIN_RSS); flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; } else { flow->nic_attr->hairpin_tirn = hpe->hp->tirn; } + + flow->hpe = hpe; + spin_lock(&hpe->flows_lock); list_add(&flow->hairpin, &hpe->flows); + spin_unlock(&hpe->flows_lock);
return 0;
- create_hairpin_err: - kfree(hpe); + out_err: + mlx5e_hairpin_put(priv, hpe); return err; }
static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->hairpin.next; + /* flow wasn't fully initialized */ + if (!flow->hpe) + return;
+ spin_lock(&flow->hpe->flows_lock); list_del(&flow->hairpin); + spin_unlock(&flow->hpe->flows_lock);
- /* no more hairpin flows for us, release the hairpin pair */ - if (list_empty(next)) { - struct mlx5e_hairpin_entry *hpe; - - hpe = list_entry(next, struct mlx5e_hairpin_entry, flows); - - netdev_dbg(priv->netdev, "del hairpin: peer %s\n", - dev_name(hpe->hp->pair->peer_mdev->device)); - - mlx5e_hairpin_destroy(hpe->hp); - hash_del(&hpe->hairpin_hlist); - kfree(hpe); - } + mlx5e_hairpin_put(priv, flow->hpe); + flow->hpe = NULL; }
static int @@@ -727,18 -906,17 +906,17 @@@ mlx5e_tc_add_nic_flow(struct mlx5e_pri .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; - bool table_created = false; int err, dest_ix = 0;
flow_context->flags |= FLOW_CONTEXT_HAS_TAG; flow_context->flow_tag = attr->flow_tag;
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); - if (err) { - goto err_add_hairpin_flow; - } - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { + if (err) + return err; + + if (flow_flag_test(flow, HAIRPIN_RSS)) { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[dest_ix].ft = attr->hairpin_ft; } else { @@@ -754,10 -932,9 +932,9 @@@
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_fc_create; - } + if (IS_ERR(counter)) + return PTR_ERR(counter); + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[dest_ix].counter_id = mlx5_fc_id(counter); dest_ix++; @@@ -769,9 -946,10 +946,10 @@@ flow_act.modify_id = attr->mod_hdr_id; kfree(parse_attr->mod_hdr_actions); if (err) - goto err_create_mod_hdr_id; + return err; }
+ mutex_lock(&priv->fs.tc.t_lock); if (IS_ERR_OR_NULL(priv->fs.tc.t)) { int tc_grp_size, tc_tbl_size; u32 max_flow_counter; @@@ -791,15 -969,13 +969,13 @@@ MLX5E_TC_TABLE_NUM_GROUPS, MLX5E_TC_FT_LEVEL, 0); if (IS_ERR(priv->fs.tc.t)) { + mutex_unlock(&priv->fs.tc.t_lock); NL_SET_ERR_MSG_MOD(extack, "Failed to create tc offload table\n"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - err = PTR_ERR(priv->fs.tc.t); - goto err_create_ft; + return PTR_ERR(priv->fs.tc.t); } - - table_created = true; }
if (attr->match_level != MLX5_MATCH_NONE) @@@ -807,29 -983,12 +983,12 @@@
flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); + mutex_unlock(&priv->fs.tc.t_lock);
- if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } + if (IS_ERR(flow->rule[0])) + return PTR_ERR(flow->rule[0]);
return 0; - - err_add_rule: - if (table_created) { - mlx5_destroy_flow_table(priv->fs.tc.t); - priv->fs.tc.t = NULL; - } - err_create_ft: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); - err_create_mod_hdr_id: - mlx5_fc_destroy(dev, counter); - err_fc_create: - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) - mlx5e_hairpin_flow_del(priv, flow); - err_add_hairpin_flow: - return err; }
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, @@@ -839,18 -998,21 +998,21 @@@ struct mlx5_fc *counter = NULL;
counter = attr->counter; - mlx5_del_flow_rules(flow->rule[0]); + if (!IS_ERR_OR_NULL(flow->rule[0])) + mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter);
- if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) { + mutex_lock(&priv->fs.tc.t_lock); + if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } + mutex_unlock(&priv->fs.tc.t_lock);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow);
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); }
@@@ -885,7 -1047,6 +1047,6 @@@ mlx5e_tc_offload_fdb_rules(struct mlx5_ } }
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED; return rule; }
@@@ -894,7 -1055,7 +1055,7 @@@ mlx5e_tc_unoffload_fdb_rules(struct mlx struct mlx5e_tc_flow *flow, struct mlx5_esw_flow_attr *attr) { - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + flow_flag_clear(flow, OFFLOADED);
if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@@ -917,7 -1078,7 +1078,7 @@@ mlx5e_tc_offload_to_slow_path(struct ml
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) - flow->flags |= MLX5E_TC_FLOW_SLOW; + flow_flag_set(flow, SLOW);
return rule; } @@@ -932,7 -1093,26 +1093,26 @@@ mlx5e_tc_unoffload_from_slow_path(struc slow_attr->split_count = 0; slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); - flow->flags &= ~MLX5E_TC_FLOW_SLOW; + flow_flag_clear(flow, SLOW); + } + + /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ + static void unready_flow_add(struct mlx5e_tc_flow *flow, + struct list_head *unready_flows) + { + flow_flag_set(flow, NOT_READY); + list_add_tail(&flow->unready, unready_flows); + } + + /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ + static void unready_flow_del(struct mlx5e_tc_flow *flow) + { + list_del(&flow->unready); + flow_flag_clear(flow, NOT_READY); }
static void add_unready_flow(struct mlx5e_tc_flow *flow) @@@ -945,14 -1125,24 +1125,24 @@@ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &rpriv->uplink_priv;
- flow->flags |= MLX5E_TC_FLOW_NOT_READY; - list_add_tail(&flow->unready, &uplink_priv->unready_flows); + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_add(flow, &uplink_priv->unready_flows); + mutex_unlock(&uplink_priv->unready_flows_lock); }
static void remove_unready_flow(struct mlx5e_tc_flow *flow) { - list_del(&flow->unready); - flow->flags &= ~MLX5E_TC_FLOW_NOT_READY; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_del(flow); + mutex_unlock(&uplink_priv->unready_flows_lock); }
static int @@@ -980,14 -1170,12 +1170,12 @@@ mlx5e_tc_add_fdb_flow(struct mlx5e_pri
if (attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; }
if (attr->prio > max_prio) { NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; }
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { @@@ -1002,7 -1190,7 +1190,7 @@@ err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); if (err) - goto err_attach_encap; + return err;
out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@@ -1012,21 -1200,19 +1200,19 @@@
err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) - goto err_add_vlan; + return err;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); if (err) - goto err_mod_hdr; + return err; }
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(attr->counter_dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_create_counter; - } + if (IS_ERR(counter)) + return PTR_ERR(counter);
attr->counter = counter; } @@@ -1044,27 -1230,12 +1230,12 @@@ flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); }
- if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } + if (IS_ERR(flow->rule[0])) + return PTR_ERR(flow->rule[0]); + else + flow_flag_set(flow, OFFLOADED);
return 0; - - err_add_rule: - mlx5_fc_destroy(attr->counter_dev, counter); - err_create_counter: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); - err_mod_hdr: - mlx5_eswitch_del_vlan_action(esw, attr); - err_add_vlan: - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) - mlx5e_detach_encap(priv, flow, out_index); - err_attach_encap: - err_max_prio_chain: - return err; }
static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) @@@ -1088,14 -1259,14 +1259,14 @@@ static void mlx5e_tc_del_fdb_flow(struc struct mlx5_esw_flow_attr slow_attr; int out_index;
- if (flow->flags & MLX5E_TC_FLOW_NOT_READY) { + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); return; }
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - if (flow->flags & MLX5E_TC_FLOW_SLOW) + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, SLOW)) mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); @@@ -1123,9 -1294,9 +1294,9 @@@ void mlx5e_tc_encap_flows_add(struct ml { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr, *esw_attr; + struct encap_flow_item *efi, *tmp; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err;
@@@ -1142,11 -1313,14 +1313,14 @@@ e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(priv);
- list_for_each_entry(efi, &e->flows, list) { + list_for_each_entry_safe(efi, tmp, &e->flows, list) { bool all_flow_encaps_valid = true; int i;
flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + esw_attr = flow->esw_attr; spec = &esw_attr->parse_attr->spec;
@@@ -1166,19 -1340,23 +1340,23 @@@ } /* Do not offload flows with unresolved neighbors */ if (!all_flow_encaps_valid) - continue; + goto loop_cont; /* update from slow path rule to encap rule */ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); - continue; + goto loop_cont; }
mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */ flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); + + loop_cont: + mlx5e_flow_put(priv, flow); } }
@@@ -1187,14 -1365,17 +1365,17 @@@ void mlx5e_tc_encap_flows_del(struct ml { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr; + struct encap_flow_item *efi, *tmp; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err;
- list_for_each_entry(efi, &e->flows, list) { + list_for_each_entry_safe(efi, tmp, &e->flows, list) { flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + spec = &flow->esw_attr->parse_attr->spec;
/* update from encap rule to slow path rule */ @@@ -1206,12 -1387,16 +1387,16 @@@ err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", err); - continue; + goto loop_cont; }
mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */ flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); + + loop_cont: + mlx5e_flow_put(priv, flow); }
/* we know that the encap is valid */ @@@ -1221,7 -1406,7 +1406,7 @@@
static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) return flow->esw_attr->counter; else return flow->nic_attr->counter; @@@ -1248,21 -1433,32 +1433,32 @@@ void mlx5e_tc_update_neigh_used_value(s return;
list_for_each_entry(e, &nhe->encap_list, encap_list) { - struct encap_flow_item *efi; - if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) + struct encap_flow_item *efi, *tmp; + + if (!(e->flags & MLX5_ENCAP_ENTRY_VALID) || + !mlx5e_encap_take(e)) continue; - list_for_each_entry(efi, &e->flows, list) { + + list_for_each_entry_safe(efi, tmp, &e->flows, list) { flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); lastuse = mlx5_fc_query_lastuse(counter); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + mlx5e_flow_put(netdev_priv(e->out_dev), flow); neigh_used = true; break; } } + + mlx5e_flow_put(netdev_priv(e->out_dev), flow); } + + mlx5e_encap_put(netdev_priv(e->out_dev), e); if (neigh_used) break; } @@@ -1282,40 -1478,66 +1478,66 @@@ } }
- static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) + static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { - struct list_head *next = flow->encaps[out_index].list.next; + WARN_ON(!list_empty(&e->flows)); + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
- list_del(&flow->encaps[out_index].list); - if (list_empty(next)) { - struct mlx5e_encap_entry *e; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
- e = list_entry(next, struct mlx5e_encap_entry, flows); - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + kfree(e->encap_header); + kfree(e); + }
- if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); + void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) + { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- hash_del_rcu(&e->encap_hlist); - kfree(e->encap_header); - kfree(e); + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); + } + + static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index) + { + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + /* flow wasn't fully initialized */ + if (!e) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; } + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); }
static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
- if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || - !(flow->flags & MLX5E_TC_FLOW_DUP)) + if (!flow_flag_test(flow, ESWITCH) || + !flow_flag_test(flow, DUP)) return;
mutex_lock(&esw->offloads.peer_mutex); list_del(&flow->peer); mutex_unlock(&esw->offloads.peer_mutex);
- flow->flags &= ~MLX5E_TC_FLOW_DUP; + flow_flag_clear(flow, DUP);
mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); kvfree(flow->peer_flow); @@@ -1339,7 -1561,7 +1561,7 @@@ static void mlx5e_tc_del_fdb_peer_flow( static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (mlx5e_is_eswitch_flow(flow)) { mlx5e_tc_del_fdb_peer_flow(flow); mlx5e_tc_del_fdb_flow(priv, flow); } else { @@@ -1480,7 -1702,7 +1702,7 @@@ static int __parse_cls_flower(struct ml struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, - u8 *match_level, u8 *tunnel_match_level) + u8 *inner_match_level, u8 *outer_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@@ -1495,9 -1717,8 +1717,9 @@@ struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; + u8 *match_level;
- *match_level = MLX5_MATCH_NONE; + match_level = outer_match_level;
if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_META) | @@@ -1525,14 -1746,12 +1747,14 @@@ }
if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, + outer_match_level)) return -EOPNOTSUPP;
- /* In decap flow, header pointers should point to the inner + /* At this point, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr */ + match_level = inner_match_level; headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, spec); headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, @@@ -1834,41 -2053,37 +2056,43 @@@ static int parse_cls_flower(struct mlx5 struct flow_cls_offload *f, struct net_device *filter_dev) { + u8 inner_match_level, outer_match_level, non_tunnel_match_level; struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; - u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; + bool is_eswitch_flow; int err;
- err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); + inner_match_level = MLX5_MATCH_NONE; + outer_match_level = MLX5_MATCH_NONE; + + err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, + &outer_match_level); + non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? + outer_match_level : inner_match_level;
- if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + is_eswitch_flow = mlx5e_is_eswitch_flow(flow); + if (!err && is_eswitch_flow) { rep = rpriv->rep; if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < match_level)) { + esw->offloads.inline_mode < non_tunnel_match_level)) { NL_SET_ERR_MSG_MOD(extack, "Flow is not offloaded due to min inline setting"); netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", - match_level, esw->offloads.inline_mode); + non_tunnel_match_level, esw->offloads.inline_mode); return -EOPNOTSUPP; } }
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (is_eswitch_flow) { - flow->esw_attr->match_level = match_level; - flow->esw_attr->tunnel_match_level = tunnel_match_level; + flow->esw_attr->inner_match_level = inner_match_level; + flow->esw_attr->outer_match_level = outer_match_level; } else { - flow->nic_attr->match_level = match_level; + flow->nic_attr->match_level = non_tunnel_match_level; }
return err; @@@ -2385,14 -2600,15 +2609,15 @@@ static bool actions_match_supported(str { u32 actions;
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) actions = flow->esw_attr->action; else actions = flow->nic_attr->action;
- if (flow->flags & MLX5E_TC_FLOW_EGRESS && + if (flow_flag_test(flow, EGRESS) && !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP))) + (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@@ -2542,7 -2758,7 +2767,7 @@@ static int parse_tc_nic_actions(struct if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { parse_attr->mirred_ifindex[0] = peer_dev->ifindex; - flow->flags |= MLX5E_TC_FLOW_HAIRPIN; + flow_flag_set(flow, HAIRPIN); action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; } else { @@@ -2629,6 -2845,31 +2854,31 @@@ static bool is_merged_eswitch_dev(struc
+ bool mlx5e_encap_take(struct mlx5e_encap_entry *e) + { + return refcount_inc_not_zero(&e->refcnt); + } + + static struct mlx5e_encap_entry * + mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, + uintptr_t hash_key) + { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e; + struct encap_key e_key; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (!cmp_encap_info(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; + } + static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, @@@ -2641,11 -2882,10 +2891,10 @@@ struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; const struct ip_tunnel_info *tun_info; - struct encap_key key, e_key; + struct encap_key key; struct mlx5e_encap_entry *e; unsigned short family; uintptr_t hash_key; - bool found = false; int err = 0;
parse_attr = attr->parse_attr; @@@ -2660,42 -2900,59 +2909,59 @@@
hash_key = hash_encap_info(&key);
- hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, - encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info->key; - e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, &key)) { - found = true; - break; - } - } + mutex_lock(&esw->offloads.encap_tbl_lock); + e = mlx5e_encap_get(priv, &key, hash_key);
/* must verify if encap is valid or not */ - if (found) + if (e) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + wait_for_completion(&e->res_ready); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + if (e->compl_result) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + }
e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - return -ENOMEM; + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready);
e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); - if (err) + if (err) { + kfree(e); + e = NULL; goto out_err; + }
INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + mutex_unlock(&esw->offloads.encap_tbl_lock);
if (family == AF_INET) err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
- if (err) + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; goto out_err; - - hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + }
attach_flow: + flow->encaps[out_index].e = e; list_add(&flow->encaps[out_index].list, &e->flows); flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; @@@ -2706,11 -2963,14 +2972,14 @@@ } else { *encap_valid = false; } + mutex_unlock(&esw->offloads.encap_tbl_lock);
return err;
out_err: - kfree(e); + mutex_unlock(&esw->offloads.encap_tbl_lock); + if (e) + mlx5e_encap_put(priv, e); return err; }
@@@ -2890,12 -3150,16 +3159,16 @@@ static int parse_tc_fdb_actions(struct if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + struct net_device *uplink_upper;
+ rcu_read_lock(); + uplink_upper = + netdev_master_upper_dev_get_rcu(uplink_dev); if (uplink_upper && netif_is_lag_master(uplink_upper) && uplink_upper == out_dev) out_dev = uplink_dev; + rcu_read_unlock();
if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, @@@ -3066,19 -3330,19 +3339,19 @@@ return 0; }
- static void get_flags(int flags, u16 *flow_flags) + static void get_flags(int flags, unsigned long *flow_flags) { - u16 __flow_flags = 0; + unsigned long __flow_flags = 0;
- if (flags & MLX5E_TC_INGRESS) - __flow_flags |= MLX5E_TC_FLOW_INGRESS; - if (flags & MLX5E_TC_EGRESS) - __flow_flags |= MLX5E_TC_FLOW_EGRESS; + if (flags & MLX5_TC_FLAG(INGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); + if (flags & MLX5_TC_FLAG(EGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
- if (flags & MLX5E_TC_ESW_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_ESWITCH; - if (flags & MLX5E_TC_NIC_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_NIC; + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
*flow_flags = __flow_flags; } @@@ -3090,12 -3354,13 +3363,13 @@@ static const struct rhashtable_params t .automatic_shrinking = true, };
- static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) + static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, + unsigned long flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *uplink_rpriv;
- if (flags & MLX5E_TC_ESW_OFFLOAD) { + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); return &uplink_rpriv->uplink_priv.tc_ht; } else /* NIC offload */ @@@ -3106,7 -3371,7 +3380,7 @@@ static bool is_peer_flow_needed(struct { struct mlx5_esw_flow_attr *attr = flow->esw_attr; bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && - flow->flags & MLX5E_TC_FLOW_INGRESS; + flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, @@@ -3125,13 -3390,13 +3399,13 @@@
static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, - struct flow_cls_offload *f, u16 flow_flags, + struct flow_cls_offload *f, unsigned long flow_flags, struct mlx5e_tc_flow_parse_attr **__parse_attr, struct mlx5e_tc_flow **__flow) { struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; - int err; + int out_index, err;
flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); @@@ -3143,6 -3408,11 +3417,11 @@@ flow->cookie = f->cookie; flow->flags = flow_flags; flow->priv = priv; + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + INIT_LIST_HEAD(&flow->encaps[out_index].list); + INIT_LIST_HEAD(&flow->mod_hdr); + INIT_LIST_HEAD(&flow->hairpin); + refcount_set(&flow->refcnt, 1);
*__flow = flow; *__parse_attr = parse_attr; @@@ -3167,7 -3437,7 +3446,7 @@@ mlx5e_flow_esw_attr_init(struct mlx5_es
esw_attr->parse_attr = parse_attr; esw_attr->chain = f->common.chain_index; - esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; + esw_attr->prio = f->common.prio;
esw_attr->in_rep = in_rep; esw_attr->in_mdev = in_mdev; @@@ -3182,7 -3452,7 +3461,7 @@@ static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) @@@ -3193,7 -3463,7 +3472,7 @@@ struct mlx5e_tc_flow *flow; int attr_size, err;
- flow_flags |= MLX5E_TC_FLOW_ESWITCH; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); attr_size = sizeof(struct mlx5_esw_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@@ -3225,15 -3495,14 +3504,14 @@@ return flow;
err_free: - kfree(flow); - kvfree(parse_attr); + mlx5e_flow_put(priv, flow); out: return ERR_PTR(err); }
static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, - u16 flow_flags) + unsigned long flow_flags) { struct mlx5e_priv *priv = flow->priv, *peer_priv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; @@@ -3271,7 -3540,7 +3549,7 @@@ }
flow->peer_flow = peer_flow; - flow->flags |= MLX5E_TC_FLOW_DUP; + flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); list_add_tail(&flow->peer, &esw->offloads.peer_flows); mutex_unlock(&esw->offloads.peer_mutex); @@@ -3284,7 -3553,7 +3562,7 @@@ out static int mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@@ -3318,7 -3587,7 +3596,7 @@@ out static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@@ -3332,7 -3601,7 +3610,7 @@@ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) return -EOPNOTSUPP;
- flow_flags |= MLX5E_TC_FLOW_NIC; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); attr_size = sizeof(struct mlx5_nic_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@@ -3353,14 -3622,14 +3631,14 @@@ if (err) goto err_free;
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + flow_flag_set(flow, OFFLOADED); kvfree(parse_attr); *__flow = flow;
return 0;
err_free: - kfree(flow); + mlx5e_flow_put(priv, flow); kvfree(parse_attr); out: return err; @@@ -3369,12 -3638,12 +3647,12 @@@ static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - int flags, + unsigned long flags, struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u16 flow_flags; + unsigned long flow_flags; int err;
get_flags(flags, &flow_flags); @@@ -3393,14 -3662,16 +3671,16 @@@ }
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; int err = 0;
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + rcu_read_unlock(); if (flow) { NL_SET_ERR_MSG_MOD(extack, "flow cookie already exists, ignoring"); @@@ -3415,51 -3686,62 +3695,62 @@@ if (err) goto out;
- err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); if (err) goto err_free;
return 0;
err_free: - mlx5e_tc_del_flow(priv, flow); - kfree(flow); + mlx5e_flow_put(priv, flow); out: return err; }
- #define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS) - #define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS) - static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) { - if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK)) - return true; + bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); + bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
- return false; + return flow_flag_test(flow, INGRESS) == dir_ingress && + flow_flag_test(flow, EGRESS) == dir_egress; }
int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; + int err;
+ rcu_read_lock(); flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + if (!flow || !same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + }
+ /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag + * set. + */ + if (flow_flag_test_and_set(flow, DELETED)) { + err = -EINVAL; + goto errout; + } rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); + rcu_read_unlock();
- mlx5e_tc_del_flow(priv, flow); - - kfree(flow); + mlx5e_flow_put(priv, flow);
return 0; + + errout: + rcu_read_unlock(); + return err; }
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@@ -3469,15 -3751,24 +3760,24 @@@ u64 lastuse = 0; u64 packets = 0; u64 bytes = 0; + int err = 0;
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + rcu_read_lock(); + flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, + tc_ht_params)); + rcu_read_unlock(); + if (IS_ERR(flow)) + return PTR_ERR(flow);
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (!same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); if (!counter) - return 0; + goto errout;
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); } @@@ -3489,8 -3780,8 +3789,8 @@@ if (!peer_esw) goto out;
- if ((flow->flags & MLX5E_TC_FLOW_DUP) && - (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + if (flow_flag_test(flow, DUP) && + flow_flag_test(flow->peer_flow, OFFLOADED)) { u64 bytes2; u64 packets2; u64 lastuse2; @@@ -3509,15 -3800,117 +3809,117 @@@ no_peer_counter mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: flow_stats_update(&f->stats, bytes, packets, lastuse); + errout: + mlx5e_flow_put(priv, flow); + return err; + } + + static int apply_police_params(struct mlx5e_priv *priv, u32 rate, + struct netlink_ext_ack *extack) + { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch *esw; + u16 vport_num; + u32 rate_mbps; + int err; + + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. + * mbit means million bits. + * Moreover, if rate is non zero we choose to configure to a minimum of + * 1 mbit/sec. + */ + rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; + vport_num = rpriv->rep->vport; + + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); + + return err; + } + + static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) + { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + const struct flow_action_entry *act; + int err; + int i; + + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); + if (err) + return err; + + rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); + return -EOPNOTSUPP; + } + }
return 0; }
+ int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) + { + struct netlink_ext_ack *extack = ma->common.extack; + int prio = TC_H_MAJ(ma->common.prio) >> 16; + + if (prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); + return -EINVAL; + } + + return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); + } + + int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) + { + struct netlink_ext_ack *extack = ma->common.extack; + + return apply_police_params(priv, 0, extack); + } + + void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) + { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct rtnl_link_stats64 cur_stats; + u64 dbytes; + u64 dpkts; + + cur_stats = priv->stats.vf_vport; + dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + rpriv->prev_vf_vport_stats = cur_stats; + flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); + } + static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *peer_mdev = peer_priv->mdev; - struct mlx5e_hairpin_entry *hpe; + struct mlx5e_hairpin_entry *hpe, *tmp; + LIST_HEAD(init_wait_list); u16 peer_vhca_id; int bkt;
@@@ -3526,9 -3919,18 +3928,18 @@@
peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
- hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) { - if (hpe->peer_vhca_id == peer_vhca_id) + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); + hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) + if (refcount_inc_not_zero(&hpe->refcnt)) + list_add(&hpe->dead_peer_wait_list, &init_wait_list); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { + wait_for_completion(&hpe->res_ready); + if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) hpe->hp->pair->peer_gone = true; + + mlx5e_hairpin_put(priv, hpe); } }
@@@ -3564,7 -3966,10 +3975,10 @@@ int mlx5e_tc_nic_init(struct mlx5e_pri struct mlx5e_tc_table *tc = &priv->fs.tc; int err;
- hash_init(tc->mod_hdr_tbl); + mutex_init(&tc->t_lock); + mutex_init(&tc->mod_hdr.lock); + hash_init(tc->mod_hdr.hlist); + mutex_init(&tc->hairpin_tbl_lock); hash_init(tc->hairpin_tbl);
err = rhashtable_init(&tc->ht, &tc_ht_params); @@@ -3596,12 -4001,16 +4010,16 @@@ void mlx5e_tc_nic_cleanup(struct mlx5e_ if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier(&tc->netdevice_nb);
+ mutex_destroy(&tc->mod_hdr.lock); + mutex_destroy(&tc->hairpin_tbl_lock); + rhashtable_destroy(&tc->ht);
if (!IS_ERR_OR_NULL(tc->t)) { mlx5_destroy_flow_table(tc->t); tc->t = NULL; } + mutex_destroy(&tc->t_lock); }
int mlx5e_tc_esw_init(struct rhashtable *tc_ht) @@@ -3614,7 -4023,7 +4032,7 @@@ void mlx5e_tc_esw_cleanup(struct rhasht rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); }
- int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) + int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags);
@@@ -3636,10 -4045,10 +4054,10 @@@ void mlx5e_tc_reoffload_flows_work(stru reoffload_flows_work); struct mlx5e_tc_flow *flow, *tmp;
- rtnl_lock(); + mutex_lock(&rpriv->unready_flows_lock); list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) - remove_unready_flow(flow); + unready_flow_del(flow); } - rtnl_unlock(); + mutex_unlock(&rpriv->unready_flows_lock); } diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 81e03e493a01,f0692407f617..30aae76b6a1d --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@@ -1413,7 -1413,7 +1413,7 @@@ out
static bool element_type_supported(struct mlx5_eswitch *esw, int type) { - struct mlx5_core_dev *dev = esw->dev = esw->dev; + const struct mlx5_core_dev *dev = esw->dev;
switch (type) { case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: @@@ -1585,6 -1585,22 +1585,22 @@@ static int esw_vport_qos_config(struct return 0; }
+ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps) + { + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + return mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW); + } + static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) { ((u8 *)node_guid)[7] = mac[0]; @@@ -1983,8 -1999,11 +1999,11 @@@ int mlx5_eswitch_init(struct mlx5_core_ if (err) goto abort;
+ mutex_init(&esw->offloads.encap_tbl_lock); hash_init(esw->offloads.encap_tbl); - hash_init(esw->offloads.mod_hdr_tbl); + mutex_init(&esw->offloads.mod_hdr.lock); + hash_init(esw->offloads.mod_hdr.hlist); + atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock);
mlx5_esw_for_all_vports(esw, i, vport) { @@@ -2020,6 -2039,8 +2039,8 @@@ void mlx5_eswitch_cleanup(struct mlx5_e esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->offloads.mod_hdr.lock); + mutex_destroy(&esw->offloads.encap_tbl_lock); kfree(esw->vports); kfree(esw); } @@@ -2137,23 -2158,19 +2158,19 @@@ int __mlx5_eswitch_set_vport_vlan(struc if (vlan > 4095 || qos > 7) return -EINVAL;
- mutex_lock(&esw->state_lock); - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) - goto unlock; + return err;
evport->info.vlan = vlan; evport->info.qos = qos; if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) - goto unlock; + return err; err = esw_vport_egress_config(esw, evport); }
- unlock: - mutex_unlock(&esw->state_lock); return err; }
@@@ -2161,11 -2178,16 +2178,16 @@@ int mlx5_eswitch_set_vport_vlan(struct u16 vport, u16 vlan, u8 qos) { u8 set_flags = 0; + int err;
if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
- return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + mutex_lock(&esw->state_lock); + err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + mutex_unlock(&esw->state_lock); + + return err; }
int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 5ec7c6dfb88f,86db0e9776da..aba9e7a6ad3c --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@@ -35,6 -35,7 +35,7 @@@
#include <linux/if_ether.h> #include <linux/if_link.h> + #include <linux/atomic.h> #include <net/devlink.h> #include <linux/mlx5/device.h> #include <linux/mlx5/eswitch.h> @@@ -180,13 -181,14 +181,14 @@@ struct mlx5_esw_offload struct mlx5_eswitch_rep *vport_reps; struct list_head peer_flows; struct mutex peer_mutex; + struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); - DECLARE_HASHTABLE(mod_hdr_tbl, 8); + struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; - u64 num_flows; + atomic64_t num_flows; enum devlink_eswitch_encap_mode encap; };
@@@ -260,6 -262,8 +262,8 @@@ void esw_vport_disable_ingress_acl(stru struct mlx5_vport *vport); void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps);
/* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); @@@ -386,8 -390,8 +390,8 @@@ struct mlx5_esw_flow_attr struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; - u8 match_level; - u8 tunnel_match_level; + u8 inner_match_level; + u8 outer_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 2adee1b867f6,42cc5001255b..7d3582ee66b7 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@@ -207,10 -207,14 +207,10 @@@ mlx5_eswitch_add_offloaded_rule(struct
mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { - if (attr->tunnel_match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - if (attr->match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - } else if (attr->match_level != MLX5_MATCH_NONE) { + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - } + if (attr->inner_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; @@@ -229,7 -233,7 +229,7 @@@ if (IS_ERR(rule)) goto err_add_rule; else - esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows);
return rule;
@@@ -286,7 -290,7 +286,7 @@@ mlx5_eswitch_add_fwd_rule(struct mlx5_e mlx5_eswitch_set_rule_source_port(esw, spec, attr);
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - if (attr->match_level != MLX5_MATCH_NONE) + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); @@@ -294,7 -298,7 +294,7 @@@ if (IS_ERR(rule)) goto add_err;
- esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows);
return rule; add_err: @@@ -322,7 -326,7 +322,7 @@@ __mlx5_eswitch_del_rule(struct mlx5_esw mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); }
- esw->offloads.num_flows--; + atomic64_dec(&esw->offloads.num_flows);
if (fwd_rule) { esw_put_prio_table(esw, attr->chain, attr->prio, 1); @@@ -438,9 -442,11 +438,11 @@@ int mlx5_eswitch_add_vlan_action(struc fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !attr->dest_chain);
+ mutex_lock(&esw->state_lock); + err = esw_add_vlan_action_check(attr, push, pop, fwd); if (err) - return err; + goto unlock;
attr->vlan_handled = false;
@@@ -453,11 -459,11 +455,11 @@@ attr->vlan_handled = true; }
- return 0; + goto unlock; }
if (!push && !pop) - return 0; + goto unlock;
if (!(offloads->vlan_push_pop_refcount)) { /* it's the 1st vlan rule, apply global vlan pop policy */ @@@ -482,6 -488,8 +484,8 @@@ skip_set_push out: if (!err) attr->vlan_handled = true; + unlock: + mutex_unlock(&esw->state_lock); return err; }
@@@ -504,6 -512,8 +508,8 @@@ int mlx5_eswitch_del_vlan_action(struc pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+ mutex_lock(&esw->state_lock); + vport = esw_vlan_action_get_vport(attr, push, pop);
if (!push && !pop && fwd) { @@@ -511,7 -521,7 +517,7 @@@ if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--;
- return 0; + goto out; }
if (push) { @@@ -529,12 -539,13 +535,13 @@@ skip_unset_push: offloads->vlan_push_pop_refcount--; if (offloads->vlan_push_pop_refcount) - return 0; + goto out;
/* no more vlan rules, stop global vlan pop policy */ err = esw_set_global_vlan_pop(esw, 0);
out: + mutex_unlock(&esw->state_lock); return err; }
@@@ -1382,10 -1393,9 +1389,9 @@@ void esw_offloads_cleanup_reps(struct m int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vports = esw->total_vports; - struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN], rep_type; int vport_index; + u8 rep_type;
esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), @@@ -1393,12 -1403,9 +1399,9 @@@ if (!esw->offloads.vport_reps) return -ENOMEM;
- mlx5_query_mac_address(dev, hw_id); - mlx5_esw_for_all_reps(esw, vport_index, rep) { rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); rep->vport_index = vport_index; - ether_addr_copy(rep->hw_id, hw_id);
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) atomic_set(&rep->rep_data[rep_type].state, @@@ -2325,7 -2332,7 +2328,7 @@@ int mlx5_devlink_eswitch_inline_mode_se break; }
- if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); return -EOPNOTSUPP; @@@ -2435,7 -2442,7 +2438,7 @@@ int mlx5_devlink_eswitch_encap_mode_set if (esw->offloads.encap == encap) return 0;
- if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); return -EOPNOTSUPP; diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c index 84a87d059333,1aaab8446270..150b3a144b83 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c @@@ -239,7 -239,8 +239,8 @@@ mlxsw_sp_acl_block_lookup(struct mlxsw_ int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_acl_block *block, struct mlxsw_sp_port *mlxsw_sp_port, - bool ingress) + bool ingress, + struct netlink_ext_ack *extack) { struct mlxsw_sp_acl_block_binding *binding; int err; @@@ -247,6 -248,11 +248,11 @@@ if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress))) return -EEXIST;
+ if (!ingress && block->egress_blocker_rule_count) { + NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules"); + return -EOPNOTSUPP; + } + binding = kzalloc(sizeof(*binding), GFP_KERNEL); if (!binding) return -ENOMEM; @@@ -471,7 -477,7 +477,7 @@@ int mlxsw_sp_acl_rulei_commit(struct ml void mlxsw_sp_acl_rulei_priority(struct mlxsw_sp_acl_rule_info *rulei, unsigned int priority) { - rulei->priority = priority >> 16; + rulei->priority = priority; }
void mlxsw_sp_acl_rulei_keymask_u32(struct mlxsw_sp_acl_rule_info *rulei, @@@ -672,6 -678,7 +678,7 @@@ int mlxsw_sp_acl_rule_add(struct mlxsw_ { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_block *block = ruleset->ht_key.block; int err;
err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei); @@@ -689,14 -696,14 +696,14 @@@ * one, to be directly bound to device. The rest of the * rulesets are bound by "Goto action set". */ - err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, - ruleset->ht_key.block); + err = mlxsw_sp_acl_ruleset_block_bind(mlxsw_sp, ruleset, block); if (err) goto err_ruleset_block_bind; }
list_add_tail(&rule->list, &mlxsw_sp->acl->rules); - ruleset->ht_key.block->rule_count++; + block->rule_count++; + block->egress_blocker_rule_count += rule->rulei->egress_bind_blocker; return 0;
err_ruleset_block_bind: @@@ -712,7 -719,9 +719,9 @@@ void mlxsw_sp_acl_rule_del(struct mlxsw { struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset; const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops; + struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
+ block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker; ruleset->ht_key.block->rule_count--; list_del(&rule->list); if (!ruleset->ht_key.chain_index && diff --combined drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 337b0cbfd153,61fe92719982..c979f38a2e0c --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@@ -1286,7 -1286,7 +1286,7 @@@ myri10ge_vlan_rx(struct net_device *dev { u8 *va; struct vlan_ethhdr *veh; - struct skb_frag_struct *frag; + skb_frag_t *frag; __wsum vsum;
va = addr; @@@ -1306,8 -1306,8 +1306,8 @@@ skb->len -= VLAN_HLEN; skb->data_len -= VLAN_HLEN; frag = skb_shinfo(skb)->frags; - frag->page_offset += VLAN_HLEN; - skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN); + skb_frag_off_add(frag, VLAN_HLEN); + skb_frag_size_sub(frag, VLAN_HLEN); } }
@@@ -1318,7 -1318,7 +1318,7 @@@ myri10ge_rx_done(struct myri10ge_slice_ { struct myri10ge_priv *mgp = ss->mgp; struct sk_buff *skb; - struct skb_frag_struct *rx_frags; + skb_frag_t *rx_frags; struct myri10ge_rx_buf *rx; int i, idx, remainder, bytes; struct pci_dev *pdev = mgp->pdev; @@@ -1351,7 -1351,7 +1351,7 @@@ return 0; } rx_frags = skb_shinfo(skb)->frags; - /* Fill skb_frag_struct(s) with data from our receive */ + /* Fill skb_frag_t(s) with data from our receive */ for (i = 0, remainder = len; remainder > 0; i++) { myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); skb_fill_page_desc(skb, i, rx->info[idx].page, @@@ -1364,8 -1364,8 +1364,8 @@@ }
/* remove padding */ - rx_frags[0].page_offset += MXGEFW_PAD; - rx_frags[0].size -= MXGEFW_PAD; + skb_frag_off_add(&rx_frags[0], MXGEFW_PAD); + skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD); len -= MXGEFW_PAD;
skb->len = len; @@@ -2628,7 -2628,7 +2628,7 @@@ static netdev_tx_t myri10ge_xmit(struc struct myri10ge_slice_state *ss; struct mcp_kreq_ether_send *req; struct myri10ge_tx_buf *tx; - struct skb_frag_struct *frag; + skb_frag_t *frag; struct netdev_queue *netdev_queue; dma_addr_t bus; u32 low; @@@ -3037,7 -3037,6 +3037,6 @@@ static int myri10ge_set_mac_address(str static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) { struct myri10ge_priv *mgp = netdev_priv(dev); - int error = 0;
netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu); if (mgp->running) { @@@ -3049,7 -3048,7 +3048,7 @@@ } else dev->mtu = new_mtu;
- return error; + return 0; }
/* @@@ -3919,7 -3918,7 +3918,7 @@@ static int myri10ge_probe(struct pci_de * setup (if available). */ status = myri10ge_request_irq(mgp); if (status != 0) - goto abort_with_firmware; + goto abort_with_slices; myri10ge_free_irq(mgp);
/* Save configuration space to be restored if the diff --combined drivers/net/hyperv/netvsc_drv.c index e8fce6d715ef,86884c863013..0a6cd2f1111f --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@@ -435,7 -435,7 +435,7 @@@ static u32 init_page_array(void *hdr, u skb_frag_t *frag = skb_shinfo(skb)->frags + i;
slots_used += fill_pg_buf(skb_frag_page(frag), - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag), &pb[slots_used]); } return slots_used; @@@ -449,7 -449,7 +449,7 @@@ static int count_skb_frag_slots(struct for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; unsigned long size = skb_frag_size(frag); - unsigned long offset = frag->page_offset; + unsigned long offset = skb_frag_off(frag);
/* Skip unused frames from start of page */ offset &= ~PAGE_MASK; @@@ -1239,15 -1239,12 +1239,15 @@@ static void netvsc_get_stats64(struct n struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); + struct netvsc_device *nvdev; struct netvsc_vf_pcpu_stats vf_tot; int i;
+ rcu_read_lock(); + + nvdev = rcu_dereference(ndev_ctx->nvdev); if (!nvdev) - return; + goto out;
netdev_stats_to_stats64(t, &net->stats);
@@@ -1286,8 -1283,6 +1286,8 @@@ t->rx_packets += packets; t->multicast += multicast; } +out: + rcu_read_unlock(); }
static int netvsc_set_mac_addr(struct net_device *ndev, void *p) diff --combined drivers/net/netdevsim/dev.c index bcc40a236624,c217049552f7..c5b026150bf5 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@@ -17,16 -17,60 +17,60 @@@
#include <linux/debugfs.h> #include <linux/device.h> + #include <linux/etherdevice.h> + #include <linux/inet.h> + #include <linux/jiffies.h> + #include <linux/kernel.h> #include <linux/list.h> #include <linux/mutex.h> #include <linux/random.h> #include <linux/rtnetlink.h> + #include <linux/workqueue.h> #include <net/devlink.h> + #include <net/ip.h> + #include <uapi/linux/devlink.h> + #include <uapi/linux/ip.h> + #include <uapi/linux/udp.h>
#include "netdevsim.h"
static struct dentry *nsim_dev_ddir;
+ #define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32) + + static ssize_t nsim_dev_take_snapshot_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) + { + struct nsim_dev *nsim_dev = file->private_data; + void *dummy_data; + int err; + u32 id; + + dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL); + if (!dummy_data) + return -ENOMEM; + + get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE); + + id = devlink_region_shapshot_id_get(priv_to_devlink(nsim_dev)); + err = devlink_region_snapshot_create(nsim_dev->dummy_region, + dummy_data, id, kfree); + if (err) { + pr_err("Failed to create region snapshot\n"); + kfree(dummy_data); + return err; + } + + return count; + } + + static const struct file_operations nsim_dev_take_snapshot_fops = { + .open = simple_open, + .write = nsim_dev_take_snapshot_write, + .llseek = generic_file_llseek, + }; + static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { char dev_ddir_name[16]; @@@ -40,6 -84,12 +84,12 @@@ return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL; debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir, &nsim_dev->fw_update_status); + debugfs_create_u32("max_macs", 0600, nsim_dev->ddir, + &nsim_dev->max_macs); + debugfs_create_bool("test1", 0600, nsim_dev->ddir, + &nsim_dev->test1); + debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev, + &nsim_dev_take_snapshot_fops); return 0; }
@@@ -73,47 -123,46 +123,47 @@@ static void nsim_dev_port_debugfs_exit( debugfs_remove_recursive(nsim_dev_port->ddir); }
+static struct net *nsim_devlink_net(struct devlink *devlink) +{ + return &init_net; +} + static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); }
static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); }
static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); }
static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); }
static int nsim_dev_resources_register(struct devlink *devlink) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); struct devlink_resource_size_params params = { .size_max = (u64)-1, .size_granularity = 1, .unit = DEVLINK_RESOURCE_UNIT_ENTRY }; + struct net *net = nsim_devlink_net(devlink); int err; u64 n;
@@@ -127,7 -176,8 +177,7 @@@ goto out; }
- n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4, ¶ms); @@@ -136,7 -186,8 +186,7 @@@ return err; }
- n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV4, ¶ms); @@@ -155,7 -206,8 +205,7 @@@ goto out; }
- n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6, ¶ms); @@@ -164,7 -216,8 +214,7 @@@ return err; }
- n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_IPV6, ¶ms); @@@ -176,31 -229,308 +226,308 @@@ devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB, nsim_dev_ipv4_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB_RULES, nsim_dev_ipv4_fib_rules_res_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB, nsim_dev_ipv6_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_dev_ipv6_fib_rules_res_occ_get, - nsim_dev); + net); out: return err; }
+ enum nsim_devlink_param_id { + NSIM_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + NSIM_DEVLINK_PARAM_ID_TEST1, + }; + + static const struct devlink_param nsim_devlink_params[] = { + DEVLINK_PARAM_GENERIC(MAX_MACS, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER(NSIM_DEVLINK_PARAM_ID_TEST1, + "test1", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + }; + + static void nsim_devlink_set_params_init_values(struct nsim_dev *nsim_dev, + struct devlink *devlink) + { + union devlink_param_value value; + + value.vu32 = nsim_dev->max_macs; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + value.vbool = nsim_dev->test1; + devlink_param_driverinit_value_set(devlink, + NSIM_DEVLINK_PARAM_ID_TEST1, + value); + } + + static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink) + { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + union devlink_param_value saved_value; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &saved_value); + if (!err) + nsim_dev->max_macs = saved_value.vu32; + err = devlink_param_driverinit_value_get(devlink, + NSIM_DEVLINK_PARAM_ID_TEST1, + &saved_value); + if (!err) + nsim_dev->test1 = saved_value.vbool; + } + + #define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16 + + static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev, + struct devlink *devlink) + { + nsim_dev->dummy_region = + devlink_region_create(devlink, "dummy", + NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX, + NSIM_DEV_DUMMY_REGION_SIZE); + return PTR_ERR_OR_ZERO(nsim_dev->dummy_region); + } + + static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) + { + devlink_region_destroy(nsim_dev->dummy_region); + } + + struct nsim_trap_item { + void *trap_ctx; + enum devlink_trap_action action; + }; + + struct nsim_trap_data { + struct delayed_work trap_report_dw; + struct nsim_trap_item *trap_items_arr; + struct nsim_dev *nsim_dev; + spinlock_t trap_lock; /* Protects trap_items_arr */ + }; + + /* All driver-specific traps must be documented in + * Documentation/networking/devlink-trap-netdevsim.rst + */ + enum { + NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX, + NSIM_TRAP_ID_FID_MISS, + }; + + #define NSIM_TRAP_NAME_FID_MISS "fid_miss" + + #define NSIM_TRAP_METADATA DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT + + #define NSIM_TRAP_DROP(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(DROP, DROP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + NSIM_TRAP_METADATA) + #define NSIM_TRAP_EXCEPTION(_id, _group_id) \ + DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + NSIM_TRAP_METADATA) + #define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id) \ + DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id, \ + NSIM_TRAP_NAME_##_id, \ + DEVLINK_TRAP_GROUP_GENERIC(_group_id), \ + NSIM_TRAP_METADATA) + + static const struct devlink_trap nsim_traps_arr[] = { + NSIM_TRAP_DROP(SMAC_MC, L2_DROPS), + NSIM_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS), + NSIM_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS), + NSIM_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS), + NSIM_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS), + NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS), + NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS), + NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS), + NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS), + NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS), + }; + + #define NSIM_TRAP_L4_DATA_LEN 100 + + static struct sk_buff *nsim_dev_trap_skb_build(void) + { + int tot_len, data_len = NSIM_TRAP_L4_DATA_LEN; + struct sk_buff *skb; + struct udphdr *udph; + struct ethhdr *eth; + struct iphdr *iph; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + if (!skb) + return NULL; + tot_len = sizeof(struct iphdr) + sizeof(struct udphdr) + data_len; + + eth = skb_put(skb, sizeof(struct ethhdr)); + eth_random_addr(eth->h_dest); + eth_random_addr(eth->h_source); + eth->h_proto = htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); + + iph = skb_put(skb, sizeof(struct iphdr)); + iph->protocol = IPPROTO_UDP; + iph->saddr = in_aton("192.0.2.1"); + iph->daddr = in_aton("198.51.100.1"); + iph->version = 0x4; + iph->frag_off = 0; + iph->ihl = 0x5; + iph->tot_len = htons(tot_len); + iph->ttl = 100; + ip_send_check(iph); + + udph = skb_put_zero(skb, sizeof(struct udphdr) + data_len); + get_random_bytes(&udph->source, sizeof(u16)); + get_random_bytes(&udph->dest, sizeof(u16)); + udph->len = htons(sizeof(struct udphdr) + data_len); + + return skb; + } + + static void nsim_dev_trap_report(struct nsim_dev_port *nsim_dev_port) + { + struct nsim_dev *nsim_dev = nsim_dev_port->ns->nsim_dev; + struct devlink *devlink = priv_to_devlink(nsim_dev); + struct nsim_trap_data *nsim_trap_data; + int i; + + nsim_trap_data = nsim_dev->trap_data; + + spin_lock(&nsim_trap_data->trap_lock); + for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) { + struct nsim_trap_item *nsim_trap_item; + struct sk_buff *skb; + + nsim_trap_item = &nsim_trap_data->trap_items_arr[i]; + if (nsim_trap_item->action == DEVLINK_TRAP_ACTION_DROP) + continue; + + skb = nsim_dev_trap_skb_build(); + if (!skb) + continue; + skb->dev = nsim_dev_port->ns->netdev; + + /* Trapped packets are usually passed to devlink in softIRQ, + * but in this case they are generated in a workqueue. Disable + * softIRQs to prevent lockdep from complaining about + * "incosistent lock state". + */ + local_bh_disable(); + devlink_trap_report(devlink, skb, nsim_trap_item->trap_ctx, + &nsim_dev_port->devlink_port); + local_bh_enable(); + consume_skb(skb); + } + spin_unlock(&nsim_trap_data->trap_lock); + } + + #define NSIM_TRAP_REPORT_INTERVAL_MS 100 + + static void nsim_dev_trap_report_work(struct work_struct *work) + { + struct nsim_trap_data *nsim_trap_data; + struct nsim_dev_port *nsim_dev_port; + struct nsim_dev *nsim_dev; + + nsim_trap_data = container_of(work, struct nsim_trap_data, + trap_report_dw.work); + nsim_dev = nsim_trap_data->nsim_dev; + + /* For each running port and enabled packet trap, generate a UDP + * packet with a random 5-tuple and report it. + */ + mutex_lock(&nsim_dev->port_list_lock); + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { + if (!netif_running(nsim_dev_port->ns->netdev)) + continue; + + nsim_dev_trap_report(nsim_dev_port); + } + mutex_unlock(&nsim_dev->port_list_lock); + + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, + msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); + } + + static int nsim_dev_traps_init(struct devlink *devlink) + { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_data *nsim_trap_data; + int err; + + nsim_trap_data = kzalloc(sizeof(*nsim_trap_data), GFP_KERNEL); + if (!nsim_trap_data) + return -ENOMEM; + + nsim_trap_data->trap_items_arr = kcalloc(ARRAY_SIZE(nsim_traps_arr), + sizeof(struct nsim_trap_item), + GFP_KERNEL); + if (!nsim_trap_data->trap_items_arr) { + err = -ENOMEM; + goto err_trap_data_free; + } + + /* The lock is used to protect the action state of the registered + * traps. The value is written by user and read in delayed work when + * iterating over all the traps. + */ + spin_lock_init(&nsim_trap_data->trap_lock); + nsim_trap_data->nsim_dev = nsim_dev; + nsim_dev->trap_data = nsim_trap_data; + + err = devlink_traps_register(devlink, nsim_traps_arr, + ARRAY_SIZE(nsim_traps_arr), NULL); + if (err) + goto err_trap_items_free; + + INIT_DELAYED_WORK(&nsim_dev->trap_data->trap_report_dw, + nsim_dev_trap_report_work); + schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, + msecs_to_jiffies(NSIM_TRAP_REPORT_INTERVAL_MS)); + + return 0; + + err_trap_items_free: + kfree(nsim_trap_data->trap_items_arr); + err_trap_data_free: + kfree(nsim_trap_data); + return err; + } + + static void nsim_dev_traps_exit(struct devlink *devlink) + { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + + cancel_delayed_work_sync(&nsim_dev->trap_data->trap_report_dw); + devlink_traps_unregister(devlink, nsim_traps_arr, + ARRAY_SIZE(nsim_traps_arr)); + kfree(nsim_dev->trap_data->trap_items_arr); + kfree(nsim_dev->trap_data); + } + static int nsim_dev_reload(struct devlink *devlink, struct netlink_ext_ack *extack) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES }; + struct net *net = nsim_devlink_net(devlink); int i;
for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { @@@ -209,11 -539,13 +536,12 @@@
err = devlink_resource_size_get(devlink, res_ids[i], &val); if (!err) { - err = nsim_fib_set_max(nsim_dev->fib_data, - res_ids[i], val, extack); + err = nsim_fib_set_max(net, res_ids[i], val, extack); if (err) return err; } } + nsim_devlink_param_load_driverinit_values(devlink);
return 0; } @@@ -258,11 -590,66 +586,66 @@@ static int nsim_dev_flash_update(struc return 0; }
+ static struct nsim_trap_item * + nsim_dev_trap_item_lookup(struct nsim_dev *nsim_dev, u16 trap_id) + { + struct nsim_trap_data *nsim_trap_data = nsim_dev->trap_data; + int i; + + for (i = 0; i < ARRAY_SIZE(nsim_traps_arr); i++) { + if (nsim_traps_arr[i].id == trap_id) + return &nsim_trap_data->trap_items_arr[i]; + } + + return NULL; + } + + static int nsim_dev_devlink_trap_init(struct devlink *devlink, + const struct devlink_trap *trap, + void *trap_ctx) + { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_item *nsim_trap_item; + + nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id); + if (WARN_ON(!nsim_trap_item)) + return -ENOENT; + + nsim_trap_item->trap_ctx = trap_ctx; + nsim_trap_item->action = trap->init_action; + + return 0; + } + + static int + nsim_dev_devlink_trap_action_set(struct devlink *devlink, + const struct devlink_trap *trap, + enum devlink_trap_action action) + { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + struct nsim_trap_item *nsim_trap_item; + + nsim_trap_item = nsim_dev_trap_item_lookup(nsim_dev, trap->id); + if (WARN_ON(!nsim_trap_item)) + return -ENOENT; + + spin_lock(&nsim_dev->trap_data->trap_lock); + nsim_trap_item->action = action; + spin_unlock(&nsim_dev->trap_data->trap_lock); + + return 0; + } + static const struct devlink_ops nsim_dev_devlink_ops = { .reload = nsim_dev_reload, .flash_update = nsim_dev_flash_update, + .trap_init = nsim_dev_devlink_trap_init, + .trap_action_set = nsim_dev_devlink_trap_action_set, };
+ #define NSIM_DEV_MAX_MACS_DEFAULT 32 + #define NSIM_DEV_TEST1_DEFAULT true + static struct nsim_dev * nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) { @@@ -280,31 -667,63 +663,55 @@@ INIT_LIST_HEAD(&nsim_dev->port_list); mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; + nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; + nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT;
- nsim_dev->fib_data = nsim_fib_create(); - if (IS_ERR(nsim_dev->fib_data)) { - err = PTR_ERR(nsim_dev->fib_data); - goto err_devlink_free; - } - err = nsim_dev_resources_register(devlink); if (err) - goto err_fib_destroy; + goto err_devlink_free;
err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) goto err_resources_unregister;
- err = nsim_dev_debugfs_init(nsim_dev); + err = devlink_params_register(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); if (err) goto err_dl_unregister; + nsim_devlink_set_params_init_values(nsim_dev, devlink); + + err = nsim_dev_dummy_region_init(nsim_dev, devlink); + if (err) + goto err_params_unregister; + + err = nsim_dev_traps_init(devlink); + if (err) + goto err_dummy_region_exit; + + err = nsim_dev_debugfs_init(nsim_dev); + if (err) + goto err_traps_exit;
err = nsim_bpf_dev_init(nsim_dev); if (err) goto err_debugfs_exit;
+ devlink_params_publish(devlink); return nsim_dev;
err_debugfs_exit: nsim_dev_debugfs_exit(nsim_dev); + err_traps_exit: + nsim_dev_traps_exit(devlink); + err_dummy_region_exit: + nsim_dev_dummy_region_exit(nsim_dev); + err_params_unregister: + devlink_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); err_dl_unregister: devlink_unregister(devlink); err_resources_unregister: devlink_resources_unregister(devlink, NULL); -err_fib_destroy: - nsim_fib_destroy(nsim_dev->fib_data); err_devlink_free: devlink_free(devlink); return ERR_PTR(err); @@@ -316,8 -735,13 +723,12 @@@ static void nsim_dev_destroy(struct nsi
nsim_bpf_dev_exit(nsim_dev); nsim_dev_debugfs_exit(nsim_dev); + nsim_dev_traps_exit(devlink); + nsim_dev_dummy_region_exit(nsim_dev); + devlink_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); devlink_unregister(devlink); devlink_resources_unregister(devlink, NULL); - nsim_fib_destroy(nsim_dev->fib_data); mutex_destroy(&nsim_dev->port_list_lock); devlink_free(devlink); } diff --combined drivers/net/netdevsim/netdevsim.h index 9404637d34b7,262a6978bbca..66bf13765ad0 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@@ -145,6 -145,7 +145,7 @@@ struct nsim_dev_port struct nsim_dev { struct nsim_bus_dev *nsim_bus_dev; struct nsim_fib_data *fib_data; + struct nsim_trap_data *trap_data; struct dentry *ddir; struct dentry *ports_ddir; struct bpf_offload_dev *bpf_dev; @@@ -158,6 -159,9 +159,9 @@@ struct list_head port_list; struct mutex port_list_lock; /* protects port list */ bool fw_update_status; + u32 max_macs; + bool test1; + struct devlink_region *dummy_region; };
int nsim_dev_init(void); @@@ -169,10 -173,12 +173,10 @@@ int nsim_dev_port_add(struct nsim_bus_d int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_index);
-struct nsim_fib_data *nsim_fib_create(void); -void nsim_fib_destroy(struct nsim_fib_data *fib_data); -u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, bool max); -int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, +int nsim_fib_init(void); +void nsim_fib_exit(void); +u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max); +int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, struct netlink_ext_ack *extack);
#if IS_ENABLED(CONFIG_XFRM_OFFLOAD) diff --combined drivers/net/phy/at803x.c index 6ad8b1c63c34,d98aa56710a9..2aa7b2e60046 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@@ -249,28 -249,40 +249,24 @@@ static int at803x_config_init(struct ph { int ret;
- ret = genphy_config_init(phydev); - if (ret < 0) - return ret; - /* The RX and TX delay default is: * after HW reset: RX delay enabled and TX delay disabled * after SW reset: RX delay enabled, while TX delay retains the * value before reset. - * - * So let's first disable the RX and TX delays in PHY and enable - * them based on the mode selected (this also takes care of RGMII - * mode where we expect delays to be disabled) */ - - ret = at803x_disable_rx_delay(phydev); - if (ret < 0) - return ret; - ret = at803x_disable_tx_delay(phydev); - if (ret < 0) - return ret; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { - /* If RGMII_ID or RGMII_RXID are specified enable RX delay, - * otherwise keep it disabled - */ + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) ret = at803x_enable_rx_delay(phydev); - if (ret < 0) - return ret; - } + else + ret = at803x_disable_rx_delay(phydev); + if (ret < 0) + return ret;
if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { - /* If RGMII_ID or RGMII_TXID are specified enable TX delay, - * otherwise keep it disabled - */ + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) ret = at803x_enable_tx_delay(phydev); - } + else + ret = at803x_disable_tx_delay(phydev);
return ret; } diff --combined drivers/net/phy/phy_device.c index 27ebc2c6c2d0,d5db7604d7c4..d347ddcac45b --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@@ -1564,24 -1564,20 +1564,20 @@@ EXPORT_SYMBOL(phy_reset_after_clk_enabl */ static int genphy_config_advert(struct phy_device *phydev) { - u32 advertise; - int bmsr, adv; - int err, changed = 0; + int err, bmsr, changed = 0; + u32 adv;
/* Only allow advertising what this PHY supports */ linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); - if (!ethtool_convert_link_mode_to_legacy_u32(&advertise, - phydev->advertising)) - phydev_warn(phydev, "PHY advertising (%*pb) more modes than genphy supports, some modes not advertised.\n", - __ETHTOOL_LINK_MODE_MASK_NBITS, - phydev->advertising); + + adv = linkmode_adv_to_mii_adv_t(phydev->advertising);
/* Setup standard advertisement */ err = phy_modify_changed(phydev, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, - ethtool_adv_to_mii_adv_t(advertise)); + adv); if (err < 0) return err; if (err > 0) @@@ -1598,13 -1594,7 +1594,7 @@@ if (!(bmsr & BMSR_ESTATEN)) return changed;
- /* Configure gigabit if it's supported */ - adv = 0; - if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - phydev->supported) || - linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - phydev->supported)) - adv = ethtool_adv_to_mii_ctrl1000_t(advertise); + adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
err = phy_modify_changed(phydev, MII_CTRL1000, ADVERTISE_1000FULL | ADVERTISE_1000HALF, @@@ -1681,18 -1671,20 +1671,20 @@@ int genphy_restart_aneg(struct phy_devi EXPORT_SYMBOL(genphy_restart_aneg);
/** - * genphy_config_aneg - restart auto-negotiation or write BMCR + * __genphy_config_aneg - restart auto-negotiation or write BMCR * @phydev: target phy_device struct + * @changed: whether autoneg is requested * * Description: If auto-negotiation is enabled, we configure the * advertising, and then restart auto-negotiation. If it is not * enabled, then we write the BMCR. */ - int genphy_config_aneg(struct phy_device *phydev) + int __genphy_config_aneg(struct phy_device *phydev, bool changed) { - int err, changed; + int err;
- changed = genphy_config_eee_advert(phydev); + if (genphy_config_eee_advert(phydev)) + changed = true;
if (AUTONEG_ENABLE != phydev->autoneg) return genphy_setup_forced(phydev); @@@ -1700,10 -1692,10 +1692,10 @@@ err = genphy_config_advert(phydev); if (err < 0) /* error */ return err; + else if (err) + changed = true;
- changed |= err; - - if (changed == 0) { + if (!changed) { /* Advertisement hasn't changed, but maybe aneg was never on to * begin with? Or maybe phy was isolated? */ @@@ -1713,18 -1705,15 +1705,15 @@@ return ctl;
if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) - changed = 1; /* do restart aneg */ + changed = true; /* do restart aneg */ }
/* Only restart aneg if we are advertising something different * than we were before. */ - if (changed > 0) - return genphy_restart_aneg(phydev); - - return 0; + return changed ? genphy_restart_aneg(phydev) : 0; } - EXPORT_SYMBOL(genphy_config_aneg); + EXPORT_SYMBOL(__genphy_config_aneg);
/** * genphy_aneg_done - return auto-negotiation status @@@ -1752,17 -1741,7 +1741,17 @@@ EXPORT_SYMBOL(genphy_aneg_done) */ int genphy_update_link(struct phy_device *phydev) { - int status; + int status = 0, bmcr; + + bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + /* Autoneg is being started, therefore disregard BMSR value and + * report link as down. + */ + if (bmcr & BMCR_ANRESTART) + goto done;
/* The link state is latched low so that momentary link * drops can be detected. Do not double-read the status @@@ -1805,7 -1784,7 +1794,7 @@@ EXPORT_SYMBOL(genphy_update_link) */ int genphy_read_status(struct phy_device *phydev) { - int adv, lpa, lpagb, err, old_link = phydev->link; + int lpa, lpagb, err, old_link = phydev->link;
/* Update the link, but return if there was an error */ err = genphy_update_link(phydev); @@@ -1821,19 -1800,18 +1810,18 @@@ phydev->pause = 0; phydev->asym_pause = 0;
- linkmode_zero(phydev->lp_advertising); - if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { if (phydev->is_gigabit_capable) { lpagb = phy_read(phydev, MII_STAT1000); if (lpagb < 0) return lpagb;
- adv = phy_read(phydev, MII_CTRL1000); - if (adv < 0) - return adv; - if (lpagb & LPA_1000MSFAIL) { + int adv = phy_read(phydev, MII_CTRL1000); + + if (adv < 0) + return adv; + if (adv & CTL1000_ENABLE_MASTER) phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n"); else @@@ -1907,57 -1885,6 +1895,6 @@@ int genphy_soft_reset(struct phy_devic } EXPORT_SYMBOL(genphy_soft_reset);
- int genphy_config_init(struct phy_device *phydev) - { - int val; - __ETHTOOL_DECLARE_LINK_MODE_MASK(features) = { 0, }; - - linkmode_set_bit_array(phy_basic_ports_array, - ARRAY_SIZE(phy_basic_ports_array), - features); - linkmode_set_bit(ETHTOOL_LINK_MODE_Pause_BIT, features); - linkmode_set_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, features); - - /* Do we support autonegotiation? */ - val = phy_read(phydev, MII_BMSR); - if (val < 0) - return val; - - if (val & BMSR_ANEGCAPABLE) - linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, features); - - if (val & BMSR_100FULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, features); - if (val & BMSR_100HALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, features); - if (val & BMSR_10FULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, features); - if (val & BMSR_10HALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, features); - - if (val & BMSR_ESTATEN) { - val = phy_read(phydev, MII_ESTATUS); - if (val < 0) - return val; - - if (val & ESTATUS_1000_TFULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - features); - if (val & ESTATUS_1000_THALF) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - features); - if (val & ESTATUS_1000_XFULL) - linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseX_Full_BIT, - features); - } - - linkmode_and(phydev->supported, phydev->supported, features); - linkmode_and(phydev->advertising, phydev->advertising, features); - - return 0; - } - EXPORT_SYMBOL(genphy_config_init); - /** * genphy_read_abilities - read PHY abilities from Clause 22 registers * @phydev: target phy_device struct diff --combined drivers/net/usb/lan78xx.c index f033fee225a1,769bb262fbec..58f5a219fb65 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@@ -1258,8 -1258,7 +1258,7 @@@ static void lan78xx_status(struct lan78 return; }
- memcpy(&intdata, urb->transfer_buffer, 4); - le32_to_cpus(&intdata); + intdata = get_unaligned_le32(urb->transfer_buffer);
if (intdata & INT_ENP_PHY_INT) { netif_dbg(dev, link, dev->net, "PHY INTR: 0x%08x\n", intdata); @@@ -2730,6 -2729,7 +2729,7 @@@ static struct sk_buff *lan78xx_tx_prep( struct sk_buff *skb, gfp_t flags) { u32 tx_cmd_a, tx_cmd_b; + void *ptr;
if (skb_cow_head(skb, TX_OVERHEAD)) { dev_kfree_skb_any(skb); @@@ -2758,13 -2758,9 +2758,9 @@@ tx_cmd_b |= skb_vlan_tag_get(skb) & TX_CMD_B_VTAG_MASK_; }
- skb_push(skb, 4); - cpu_to_le32s(&tx_cmd_b); - memcpy(skb->data, &tx_cmd_b, 4); - - skb_push(skb, 4); - cpu_to_le32s(&tx_cmd_a); - memcpy(skb->data, &tx_cmd_a, 4); + ptr = skb_push(skb, 8); + put_unaligned_le32(tx_cmd_a, ptr); + put_unaligned_le32(tx_cmd_b, ptr + 4);
return skb; } @@@ -3105,16 -3101,13 +3101,13 @@@ static int lan78xx_rx(struct lan78xx_ne struct sk_buff *skb2; unsigned char *packet;
- memcpy(&rx_cmd_a, skb->data, sizeof(rx_cmd_a)); - le32_to_cpus(&rx_cmd_a); + rx_cmd_a = get_unaligned_le32(skb->data); skb_pull(skb, sizeof(rx_cmd_a));
- memcpy(&rx_cmd_b, skb->data, sizeof(rx_cmd_b)); - le32_to_cpus(&rx_cmd_b); + rx_cmd_b = get_unaligned_le32(skb->data); skb_pull(skb, sizeof(rx_cmd_b));
- memcpy(&rx_cmd_c, skb->data, sizeof(rx_cmd_c)); - le16_to_cpus(&rx_cmd_c); + rx_cmd_c = get_unaligned_le16(skb->data); skb_pull(skb, sizeof(rx_cmd_c));
packet = skb->data; @@@ -3792,7 -3785,7 +3785,7 @@@ static int lan78xx_probe(struct usb_int ret = register_netdev(netdev); if (ret != 0) { netif_err(dev, probe, netdev, "couldn't register the device\n"); - goto out3; + goto out4; }
usb_set_intfdata(intf, dev); @@@ -3807,14 -3800,12 +3800,14 @@@
ret = lan78xx_phy_init(dev); if (ret < 0) - goto out4; + goto out5;
return 0;
-out4: +out5: unregister_netdev(netdev); +out4: + usb_free_urb(dev->urb_intr); out3: lan78xx_unbind(dev, intf); out2: diff --combined drivers/net/xen-netback/netback.c index c9262ffeefe4,4679fcf1a1c4..0020b2e8c279 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@@ -136,12 -136,12 +136,12 @@@ static inline struct xenvif_queue *ubuf
static u16 frag_get_pending_idx(skb_frag_t *frag) { - return (u16)frag->page_offset; + return (u16)skb_frag_off(frag); }
static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) { - frag->page_offset = pending_idx; + skb_frag_off_set(frag, pending_idx); }
static inline pending_ring_idx_t pending_index(unsigned i) @@@ -925,7 -925,6 +925,7 @@@ static void xenvif_tx_build_gops(struc skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; nskb = xenvif_alloc_skb(0); if (unlikely(nskb == NULL)) { + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); xenvif_tx_err(queue, &txreq, extra_count, idx); if (net_ratelimit()) @@@ -941,7 -940,6 +941,7 @@@
if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); kfree_skb(nskb); break; @@@ -1057,7 -1055,7 +1057,7 @@@ static int xenvif_handle_frag_list(stru int j; skb->truesize += skb->data_len; for (j = 0; j < i; j++) - put_page(frags[j].page.p); + put_page(skb_frag_page(&frags[j])); return -ENOMEM; }
@@@ -1069,8 -1067,8 +1069,8 @@@ BUG();
offset += len; - frags[i].page.p = page; - frags[i].page_offset = 0; + __skb_frag_set_page(&frags[i], page); + skb_frag_off_set(&frags[i], 0); skb_frag_size_set(&frags[i], len); }
@@@ -1655,9 -1653,6 +1655,6 @@@ static int __init netback_init(void
#ifdef CONFIG_DEBUG_FS xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); - if (IS_ERR_OR_NULL(xen_netback_dbg_root)) - pr_warn("Init of debugfs returned %ld!\n", - PTR_ERR(xen_netback_dbg_root)); #endif /* CONFIG_DEBUG_FS */
return 0; diff --combined drivers/s390/net/qeth_core_main.c index 9c3310c4d61d,5aa0f1268bca..0803070246aa --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@@ -544,7 -544,6 +544,7 @@@ static struct qeth_reply *qeth_alloc_re if (reply) { refcount_set(&reply->refcnt, 1); init_completion(&reply->received); + spin_lock_init(&reply->lock); } return reply; } @@@ -800,13 -799,6 +800,13 @@@ static void qeth_issue_next_read_cb(str
if (!reply->callback) { rc = 0; + goto no_callback; + } + + spin_lock_irqsave(&reply->lock, flags); + if (reply->rc) { + /* Bail out when the requestor has already left: */ + rc = reply->rc; } else { if (cmd) { reply->offset = (u16)((char *)cmd - (char *)iob->data); @@@ -815,9 -807,7 +815,9 @@@ rc = reply->callback(card, reply, (unsigned long)iob); } } + spin_unlock_irqrestore(&reply->lock, flags);
+no_callback: if (rc <= 0) qeth_notify_reply(reply, rc); qeth_put_reply(reply); @@@ -1759,16 -1749,6 +1759,16 @@@ static int qeth_send_control_data(struc rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME;
qeth_dequeue_reply(card, reply); + + if (reply_cb) { + /* Wait until the callback for a late reply has completed: */ + spin_lock_irq(&reply->lock); + if (rc) + /* Zap any callback that's still pending: */ + reply->rc = rc; + spin_unlock_irq(&reply->lock); + } + if (!rc) rc = reply->rc; qeth_put_reply(reply); @@@ -3535,7 -3515,7 +3535,7 @@@ static int qeth_get_elements_for_frags( int cnt, elements = 0;
for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[cnt]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[cnt];
elements += qeth_get_elements_for_range( (addr_t)skb_frag_address(frag), diff --combined drivers/staging/unisys/visornic/visornic_main.c index 40dd573e73c3,6fa7726185de..1d1440d43002 --- a/drivers/staging/unisys/visornic/visornic_main.c +++ b/drivers/staging/unisys/visornic/visornic_main.c @@@ -284,9 -284,9 +284,9 @@@ static int visor_copy_fragsinfo_from_sk for (frag = 0; frag < numfrags; frag++) { count = add_physinfo_entries(page_to_pfn( skb_frag_page(&skb_shinfo(skb)->frags[frag])), - skb_shinfo(skb)->frags[frag].page_offset, - skb_shinfo(skb)->frags[frag].size, count, - frags_max, frags); + skb_frag_off(&skb_shinfo(skb)->frags[frag]), + skb_frag_size(&skb_shinfo(skb)->frags[frag]), + count, frags_max, frags); /* add_physinfo_entries only returns * zero if the frags array is out of room * That should never happen because we @@@ -1750,8 -1750,7 +1750,8 @@@ static int visornic_poll(struct napi_st }
/* poll_for_irq - checks the status of the response queue - * @v: Void pointer to the visronic devdata struct. + * @t: pointer to the 'struct timer_list' from which we can retrieve the + * the visornic devdata struct. * * Main function of the vnic_incoming thread. Periodically check the response * queue and drain it if needed. diff --combined include/Kbuild index fc2aa4e20658,af498acb7cd2..ac284c7aa05b --- a/include/Kbuild +++ b/include/Kbuild @@@ -386,31 -386,6 +386,6 @@@ header-test- += linux/mvebu-pmsu. header-test- += linux/mxm-wmi.h header-test- += linux/n_r3964.h header-test- += linux/ndctl.h - header-test- += linux/netfilter/ipset/ip_set.h - header-test- += linux/netfilter/ipset/ip_set_bitmap.h - header-test- += linux/netfilter/ipset/ip_set_comment.h - header-test- += linux/netfilter/ipset/ip_set_counter.h - header-test- += linux/netfilter/ipset/ip_set_getport.h - header-test- += linux/netfilter/ipset/ip_set_hash.h - header-test- += linux/netfilter/ipset/ip_set_list.h - header-test- += linux/netfilter/ipset/ip_set_skbinfo.h - header-test- += linux/netfilter/ipset/ip_set_timeout.h - header-test- += linux/netfilter/nf_conntrack_amanda.h - header-test- += linux/netfilter/nf_conntrack_ftp.h - header-test- += linux/netfilter/nf_conntrack_h323.h - header-test- += linux/netfilter/nf_conntrack_h323_asn1.h - header-test- += linux/netfilter/nf_conntrack_irc.h - header-test- += linux/netfilter/nf_conntrack_pptp.h - header-test- += linux/netfilter/nf_conntrack_proto_gre.h - header-test- += linux/netfilter/nf_conntrack_sip.h - header-test- += linux/netfilter/nf_conntrack_snmp.h - header-test- += linux/netfilter/nf_conntrack_tftp.h - header-test- += linux/netfilter/x_tables.h - header-test- += linux/netfilter_arp/arp_tables.h - header-test- += linux/netfilter_bridge/ebtables.h - header-test- += linux/netfilter_ipv4/ip4_tables.h - header-test- += linux/netfilter_ipv4/ip_tables.h - header-test- += linux/netfilter_ipv6/ip6_tables.h header-test- += linux/nfs.h header-test- += linux/nfs_fs_i.h header-test- += linux/nfs_fs_sb.h @@@ -874,43 -849,6 +849,6 @@@ header-test- += net/mpls_iptunnel. header-test- += net/mrp.h header-test- += net/ncsi.h header-test- += net/netevent.h - header-test- += net/netfilter/br_netfilter.h - header-test- += net/netfilter/ipv4/nf_dup_ipv4.h - header-test- += net/netfilter/ipv6/nf_defrag_ipv6.h - header-test- += net/netfilter/ipv6/nf_dup_ipv6.h - header-test- += net/netfilter/nf_conntrack.h - header-test- += net/netfilter/nf_conntrack_acct.h - header-test- += net/netfilter/nf_conntrack_bridge.h - header-test- += net/netfilter/nf_conntrack_core.h - header-test- += net/netfilter/nf_conntrack_count.h - header-test- += net/netfilter/nf_conntrack_ecache.h - header-test- += net/netfilter/nf_conntrack_expect.h - header-test- += net/netfilter/nf_conntrack_extend.h - header-test- += net/netfilter/nf_conntrack_helper.h - header-test- += net/netfilter/nf_conntrack_l4proto.h - header-test- += net/netfilter/nf_conntrack_labels.h - header-test- += net/netfilter/nf_conntrack_seqadj.h - header-test- += net/netfilter/nf_conntrack_synproxy.h - header-test- += net/netfilter/nf_conntrack_timeout.h - header-test- += net/netfilter/nf_conntrack_timestamp.h - header-test- += net/netfilter/nf_conntrack_tuple.h - header-test- += net/netfilter/nf_dup_netdev.h - header-test- += net/netfilter/nf_flow_table.h - header-test- += net/netfilter/nf_nat.h - header-test- += net/netfilter/nf_nat_helper.h - header-test- += net/netfilter/nf_nat_masquerade.h - header-test- += net/netfilter/nf_nat_redirect.h - header-test- += net/netfilter/nf_queue.h - header-test- += net/netfilter/nf_reject.h - header-test- += net/netfilter/nf_synproxy.h - header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables.h - header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_core.h - header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_ipv4.h - header-test- += net/netfilter/nf_tables_ipv6.h - header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_offload.h - header-test- += net/netfilter/nft_fib.h - header-test- += net/netfilter/nft_meta.h - header-test- += net/netfilter/nft_reject.h header-test- += net/netns/can.h header-test- += net/netns/generic.h header-test- += net/netns/ieee802154_6lowpan.h @@@ -945,6 -883,12 +883,6 @@@ header-test- += net/xdp. header-test- += net/xdp_priv.h header-test- += pcmcia/cistpl.h header-test- += pcmcia/ds.h -header-test- += rdma/ib.h -header-test- += rdma/iw_portmap.h -header-test- += rdma/opa_port_info.h -header-test- += rdma/rdmavt_cq.h -header-test- += rdma/restrack.h -header-test- += rdma/signature.h header-test- += rdma/tid_rdma_defs.h header-test- += scsi/fc/fc_encaps.h header-test- += scsi/fc/fc_fc2.h @@@ -1134,18 -1078,6 +1072,6 @@@ header-test- += uapi/linux/kvm_para. header-test- += uapi/linux/lightnvm.h header-test- += uapi/linux/mic_common.h header-test- += uapi/linux/mman.h - header-test- += uapi/linux/netfilter/ipset/ip_set_bitmap.h - header-test- += uapi/linux/netfilter/ipset/ip_set_hash.h - header-test- += uapi/linux/netfilter/ipset/ip_set_list.h - header-test- += uapi/linux/netfilter/nf_synproxy.h - header-test- += uapi/linux/netfilter/xt_policy.h - header-test- += uapi/linux/netfilter/xt_set.h - header-test- += uapi/linux/netfilter_arp/arp_tables.h - header-test- += uapi/linux/netfilter_arp/arpt_mangle.h - header-test- += uapi/linux/netfilter_ipv4/ip_tables.h - header-test- += uapi/linux/netfilter_ipv4/ipt_LOG.h - header-test- += uapi/linux/netfilter_ipv6/ip6_tables.h - header-test- += uapi/linux/netfilter_ipv6/ip6t_LOG.h header-test- += uapi/linux/nilfs2_ondisk.h header-test- += uapi/linux/patchkey.h header-test- += uapi/linux/ptrace.h diff --combined include/linux/mlx5/driver.h index 0acd28f2e62c,df23f17eed64..ec668f63e8d3 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@@ -47,7 -47,6 +47,7 @@@ #include <linux/interrupt.h> #include <linux/idr.h> #include <linux/notifier.h> +#include <linux/refcount.h>
#include <linux/mlx5/device.h> #include <linux/mlx5/doorbell.h> @@@ -190,7 -189,6 +190,6 @@@ enum mlx5_coredev_type };
struct mlx5_field_desc { - struct dentry *dent; int i; };
@@@ -243,11 -241,6 +242,6 @@@ struct mlx5_cmd_msg
struct mlx5_cmd_debug { struct dentry *dbg_root; - struct dentry *dbg_in; - struct dentry *dbg_out; - struct dentry *dbg_outlen; - struct dentry *dbg_status; - struct dentry *dbg_run; void *in_msg; void *out_msg; u8 status; @@@ -272,8 -265,6 +266,6 @@@ struct mlx5_cmd_stats u64 sum; u64 n; struct dentry *root; - struct dentry *avg; - struct dentry *count; /* protect command average calculations */ spinlock_t lock; }; @@@ -399,7 -390,7 +391,7 @@@ enum mlx5_res_type
struct mlx5_core_rsc_common { enum mlx5_res_type res; - atomic_t refcount; + refcount_t refcount; struct completion free; };
@@@ -478,6 -469,17 +470,17 @@@ struct mlx5_core_sriov u16 max_vfs; };
+ struct mlx5_fc_pool { + struct mlx5_core_dev *dev; + struct mutex pool_lock; /* protects pool lists */ + struct list_head fully_used; + struct list_head partially_used; + struct list_head unused; + int available_fcs; + int used_fcs; + int threshold; + }; + struct mlx5_fc_stats { spinlock_t counters_idr_lock; /* protects counters_idr */ struct idr counters_idr; @@@ -490,6 -492,7 +493,7 @@@ unsigned long next_query; unsigned long sampling_interval; /* jiffies */ u32 *bulk_query_out; + struct mlx5_fc_pool fc_pool; };
struct mlx5_events; @@@ -961,7 -964,7 +965,7 @@@ int mlx5_vector2eqn(struct mlx5_core_de int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
- int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); + void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, @@@ -973,7 -976,7 +977,7 @@@ int mlx5_db_alloc_node(struct mlx5_core void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
const char *mlx5_command_str(int command); - int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); + void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index); diff --combined include/linux/skbuff.h index ba5583522d24,7eb28b72d9ba..77c6dc88e95d --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@@ -14,6 -14,7 +14,7 @@@ #include <linux/compiler.h> #include <linux/time.h> #include <linux/bug.h> + #include <linux/bvec.h> #include <linux/cache.h> #include <linux/rbtree.h> #include <linux/socket.h> @@@ -308,58 -309,45 +309,45 @@@ extern int sysctl_max_skb_frags */ #define GSO_BY_FRAGS 0xFFFF
- typedef struct skb_frag_struct skb_frag_t; - - struct skb_frag_struct { - struct { - struct page *p; - } page; - #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; - __u32 size; - #else - __u16 page_offset; - __u16 size; - #endif - }; + typedef struct bio_vec skb_frag_t;
/** - * skb_frag_size - Returns the size of a skb fragment + * skb_frag_size() - Returns the size of a skb fragment * @frag: skb fragment */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->size; + return frag->bv_len; }
/** - * skb_frag_size_set - Sets the size of a skb fragment + * skb_frag_size_set() - Sets the size of a skb fragment * @frag: skb fragment * @size: size of fragment */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->size = size; + frag->bv_len = size; }
/** - * skb_frag_size_add - Incrementes the size of a skb fragment by %delta + * skb_frag_size_add() - Increments the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to add */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->size += delta; + frag->bv_len += delta; }
/** - * skb_frag_size_sub - Decrements the size of a skb fragment by %delta + * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to subtract */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->size -= delta; + frag->bv_len -= delta; }
/** @@@ -379,7 -367,7 +367,7 @@@ static inline bool skb_frag_must_loop(s * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->page.p + * @f_off: offset from start of f->bv_page * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@@ -1283,7 -1271,7 +1271,7 @@@ static inline int skb_flow_dissector_bp
struct bpf_flow_dissector; bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, - __be16 proto, int nhoff, int hlen); + __be16 proto, int nhoff, int hlen, unsigned int flags);
bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, @@@ -1374,14 -1362,6 +1362,14 @@@ static inline void skb_copy_hash(struc to->l4_hash = from->l4_hash; };
+static inline void skb_copy_decrypted(struct sk_buff *to, + const struct sk_buff *from) +{ +#ifdef CONFIG_TLS_DEVICE + to->decrypted = from->decrypted; +#endif +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@@ -2097,8 -2077,8 +2085,8 @@@ static inline void __skb_fill_page_desc * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). */ - frag->page.p = page; - frag->page_offset = off; + frag->bv_page = page; + frag->bv_offset = off; skb_frag_size_set(frag, size);
page = compound_head(page); @@@ -2878,6 -2858,46 +2866,46 @@@ static inline void skb_propagate_pfmema }
/** + * skb_frag_off() - Returns the offset of a skb fragment + * @frag: the paged fragment + */ + static inline unsigned int skb_frag_off(const skb_frag_t *frag) + { + return frag->bv_offset; + } + + /** + * skb_frag_off_add() - Increments the offset of a skb fragment by @delta + * @frag: skb fragment + * @delta: value to add + */ + static inline void skb_frag_off_add(skb_frag_t *frag, int delta) + { + frag->bv_offset += delta; + } + + /** + * skb_frag_off_set() - Sets the offset of a skb fragment + * @frag: skb fragment + * @offset: offset of fragment + */ + static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) + { + frag->bv_offset = offset; + } + + /** + * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment + * @fragto: skb fragment where offset is set + * @fragfrom: skb fragment offset is copied from + */ + static inline void skb_frag_off_copy(skb_frag_t *fragto, + const skb_frag_t *fragfrom) + { + fragto->bv_offset = fragfrom->bv_offset; + } + + /** * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment * @@@ -2885,7 -2905,7 +2913,7 @@@ */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->page.p; + return frag->bv_page; }
/** @@@ -2943,7 -2963,7 +2971,7 @@@ static inline void skb_frag_unref(struc */ static inline void *skb_frag_address(const skb_frag_t *frag) { - return page_address(skb_frag_page(frag)) + frag->page_offset; + return page_address(skb_frag_page(frag)) + skb_frag_off(frag); }
/** @@@ -2959,7 -2979,18 +2987,18 @@@ static inline void *skb_frag_address_sa if (unlikely(!ptr)) return NULL;
- return ptr + frag->page_offset; + return ptr + skb_frag_off(frag); + } + + /** + * skb_frag_page_copy() - sets the page in a fragment from another fragment + * @fragto: skb fragment where page is set + * @fragfrom: skb fragment page is copied from + */ + static inline void skb_frag_page_copy(skb_frag_t *fragto, + const skb_frag_t *fragfrom) + { + fragto->bv_page = fragfrom->bv_page; }
/** @@@ -2971,7 -3002,7 +3010,7 @@@ */ static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) { - frag->page.p = page; + frag->bv_page = page; }
/** @@@ -3007,7 -3038,7 +3046,7 @@@ static inline dma_addr_t skb_frag_dma_m enum dma_data_direction dir) { return dma_map_page(dev, skb_frag_page(frag), - frag->page_offset + offset, size, dir); + skb_frag_off(frag) + offset, size, dir); }
static inline struct sk_buff *pskb_copy(struct sk_buff *skb, @@@ -3174,10 -3205,10 +3213,10 @@@ static inline bool skb_can_coalesce(str if (skb_zcopy(skb)) return false; if (i) { - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
return page == skb_frag_page(frag) && - off == frag->page_offset + skb_frag_size(frag); + off == skb_frag_off(frag) + skb_frag_size(frag); } return false; } diff --combined include/net/netfilter/nf_tables.h index 475d6f28ca67,dc301e3d6739..e73d16f8b870 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@@ -25,6 -25,7 +25,7 @@@ struct nft_pktinfo struct xt_action_param xt; };
+ #if IS_ENABLED(CONFIG_NETFILTER) static inline struct net *nft_net(const struct nft_pktinfo *pkt) { return pkt->xt.state->net; @@@ -57,6 -58,7 +58,7 @@@ static inline void nft_set_pktinfo(stru pkt->skb = skb; pkt->xt.state = state; } + #endif
static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, struct sk_buff *skb) @@@ -421,7 -423,8 +423,7 @@@ struct nft_set unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:13, - bound:1, + u16 flags:14, genmask:2; u8 klen; u8 dlen; @@@ -926,9 -929,11 +928,11 @@@ struct nft_chain_type int family; struct module *owner; unsigned int hook_mask; + #if IS_ENABLED(CONFIG_NETFILTER) nf_hookfn *hooks[NF_MAX_HOOKS]; int (*ops_register)(struct net *net, const struct nf_hook_ops *ops); void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops); + #endif };
int nft_chain_validate_dependency(const struct nft_chain *chain, @@@ -954,7 -959,9 +958,9 @@@ struct nft_stats * @flow_block: flow block (for hardware offload) */ struct nft_base_chain { + #if IS_ENABLED(CONFIG_NETFILTER) struct nf_hook_ops ops; + #endif const struct nft_chain_type *type; u8 policy; u8 flags; @@@ -1151,7 -1158,9 +1157,9 @@@ struct nft_flowtable use:30; u64 handle; /* runtime data below here */ + #if IS_ENABLED(CONFIG_NETFILTER) struct nf_hook_ops *ops ____cacheline_aligned; + #endif struct nf_flowtable data; };
@@@ -1206,6 -1215,8 +1214,8 @@@ void nft_trace_notify(struct nft_tracei #define MODULE_ALIAS_NFT_OBJ(type) \ MODULE_ALIAS("nft-obj-" __stringify(type))
+ #if IS_ENABLED(CONFIG_NF_TABLES) + /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations @@@ -1279,6 -1290,8 +1289,8 @@@ static inline void nft_set_elem_change_ ext->genmask ^= nft_genmask_next(net); }
+ #endif /* IS_ENABLED(CONFIG_NF_TABLES) */ + /* * We use a free bit in the genmask field to indicate the element * is busy, meaning it is currently being processed either by @@@ -1347,15 -1360,12 +1359,15 @@@ struct nft_trans_rule struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; };
#define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound)
struct nft_trans_chain { bool update; @@@ -1386,15 -1396,12 +1398,15 @@@ struct nft_trans_table struct nft_trans_elem { struct nft_set *set; struct nft_set_elem elem; + bool bound; };
#define nft_trans_elem_set(trans) \ (((struct nft_trans_elem *)trans->data)->set) #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) +#define nft_trans_elem_set_bound(trans) \ + (((struct nft_trans_elem *)trans->data)->bound)
struct nft_trans_obj { struct nft_object *obj; diff --combined include/net/netfilter/nf_tables_offload.h index c8b9dec376f5,8a5969d9b80b..db104665a9e4 --- a/include/net/netfilter/nf_tables_offload.h +++ b/include/net/netfilter/nf_tables_offload.h @@@ -9,6 -9,7 +9,7 @@@ struct nft_offload_reg u32 len; u32 base_offset; u32 offset; + struct nft_data data; struct nft_data mask; };
@@@ -63,6 -64,10 +64,10 @@@ struct nft_rule struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule); void nft_flow_rule_destroy(struct nft_flow_rule *flow); int nft_flow_rule_offload_commit(struct net *net); + void nft_indr_block_get_and_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command);
#define NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \ (__reg)->base_offset = \ @@@ -73,6 -78,4 +78,6 @@@ (__reg)->key = __key; \ memset(&(__reg)->mask, 0xff, (__reg)->len);
+int nft_chain_offload_priority(struct nft_base_chain *basechain); + #endif diff --combined include/net/pkt_cls.h index 98be18ef1ed3,0790a4ed909c..64999ffcb486 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@@ -70,15 -70,6 +70,6 @@@ static inline struct Qdisc *tcf_block_q return block->q; }
- int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident); - int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident); - void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident); - void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident); - int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode);
@@@ -137,32 -128,6 +128,6 @@@ void tc_setup_cb_block_unregister(struc { }
- static inline - int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) - { - return 0; - } - - static inline - int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) - { - return 0; - } - - static inline - void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) - { - } - - static inline - void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) - { - } - static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { @@@ -646,7 -611,7 +611,7 @@@ tc_cls_common_offload_init(struct flow_ { cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; - cls_common->prio = tp->prio; + cls_common->prio = tp->prio >> 16; if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE) cls_common->extack = extack; } diff --combined include/uapi/linux/bpf.h index a5aa7d3ac6a1,4393bd4b2419..0e66371bea13 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@@ -134,6 -134,7 +134,7 @@@ enum bpf_map_type BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, };
/* Note that tracing related programs such as @@@ -1466,8 -1467,8 +1467,8 @@@ union bpf_attr * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. @@@ -2713,6 -2714,33 +2714,33 @@@ * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. + * + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@@ -2824,7 -2852,8 +2852,8 @@@ FN(strtoul), \ FN(sk_storage_get), \ FN(sk_storage_delete), \ - FN(send_signal), + FN(send_signal), \ + FN(tcp_gen_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@@ -3507,6 -3536,10 +3536,10 @@@ enum bpf_task_fd_type BPF_FD_TYPE_URETPROBE, /* filename + offset */ };
+ #define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) + #define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) + #define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) + struct bpf_flow_keys { __u16 nhoff; __u16 thoff; @@@ -3528,6 -3561,8 +3561,8 @@@ __u32 ipv6_dst[4]; /* in6_addr; network order */ }; }; + __u32 flags; + __be32 flow_label; };
struct bpf_func_info { diff --combined net/ipv4/tcp.c index 77b485d60b9d,f8fa1686f7f3..051ef10374f6 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@@ -984,9 -984,6 +984,9 @@@ new_segment if (!skb) goto wait_for_memory;
+#ifdef CONFIG_TLS_DEVICE + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); +#endif skb_entail(sk, skb); copy = size_goal; } @@@ -1165,7 -1162,7 +1165,7 @@@ int tcp_sendmsg_locked(struct sock *sk struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; - bool process_backlog = false; + int process_backlog = 0; bool zc = false; long timeo;
@@@ -1257,9 -1254,10 +1257,10 @@@ new_segment if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf;
- if (process_backlog && sk_flush_backlog(sk)) { - process_backlog = false; - goto restart; + if (unlikely(process_backlog >= 16)) { + process_backlog = 0; + if (sk_flush_backlog(sk)) + goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, @@@ -1267,7 -1265,7 +1268,7 @@@ if (!skb) goto wait_for_memory;
- process_backlog = true; + process_backlog++; skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb); @@@ -1779,19 -1777,21 +1780,21 @@@ static int tcp_zerocopy_receive(struct break; frags = skb_shinfo(skb)->frags; while (offset) { - if (frags->size > offset) + if (skb_frag_size(frags) > offset) goto out; - offset -= frags->size; + offset -= skb_frag_size(frags); frags++; } } - if (frags->size != PAGE_SIZE || frags->page_offset) { + if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { int remaining = zc->recv_skip_hint; + int size = skb_frag_size(frags);
- while (remaining && (frags->size != PAGE_SIZE || - frags->page_offset)) { - remaining -= frags->size; + while (remaining && (size != PAGE_SIZE || + skb_frag_off(frags))) { + remaining -= size; frags++; + size = skb_frag_size(frags); } zc->recv_skip_hint -= remaining; break; @@@ -3784,8 -3784,8 +3787,8 @@@ int tcp_md5_hash_skb_data(struct tcp_md return 1;
for (i = 0; i < shi->nr_frags; ++i) { - const struct skb_frag_struct *f = &shi->frags[i]; - unsigned int offset = f->page_offset; + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
sg_set_page(&sg, page, skb_frag_size(f), diff --combined net/ipv4/tcp_output.c index 979520e46e33,e6d02e05bb1c..5c46bc4c7e8d --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@@ -1320,7 -1320,6 +1320,7 @@@ int tcp_fragment(struct sock *sk, enum buff = sk_stream_alloc_skb(sk, nsize, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ + skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@@ -1403,7 -1402,7 +1403,7 @@@ static int __pskb_trim_head(struct sk_b } else { shinfo->frags[k] = shinfo->frags[i]; if (eat) { - shinfo->frags[k].page_offset += eat; + skb_frag_off_add(&shinfo->frags[k], eat); skb_frag_size_sub(&shinfo->frags[k], eat); eat = 0; } @@@ -1875,7 -1874,6 +1875,7 @@@ static int tso_fragment(struct sock *sk buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) return -ENOMEM; + skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@@ -2145,7 -2143,6 +2145,7 @@@ static int tcp_mtu_probe(struct sock *s sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk); + skb_copy_decrypted(nskb, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; diff --combined net/netfilter/nf_tables_api.c index d47469f824a1,fe3b7b0c6c66..6d00bef023c4 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@@ -138,14 -138,9 +138,14 @@@ static void nft_set_trans_bind(const st return;
list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { - if (trans->msg_type == NFT_MSG_NEWSET && - nft_trans_set(trans) == set) { - set->bound = true; + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_trans_set(trans) == set) + nft_trans_set_bound(trans) = true; + break; + case NFT_MSG_NEWSETELEM: + if (nft_trans_elem_set(trans) == set) + nft_trans_elem_set_bound(trans) = true; break; } } @@@ -1667,10 -1662,6 +1667,10 @@@ static int nf_tables_addchain(struct nf
chain->flags |= NFT_BASE_CHAIN | flags; basechain->policy = NF_ACCEPT; + if (chain->flags & NFT_CHAIN_HW_OFFLOAD && + nft_chain_offload_priority(basechain) < 0) + return -EOPNOTSUPP; + flow_block_init(&basechain->flow_block); } else { chain = kzalloc(sizeof(*chain), GFP_KERNEL); @@@ -6915,7 -6906,7 +6915,7 @@@ static int __nf_tables_abort(struct ne break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - if (nft_trans_set(trans)->bound) { + if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; } @@@ -6927,7 -6918,7 +6927,7 @@@ nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - if (nft_trans_elem_set(trans)->bound) { + if (nft_trans_elem_set_bound(trans)) { nft_trans_destroy(trans); break; } @@@ -7602,6 -7593,11 +7602,11 @@@ static struct pernet_operations nf_tabl .exit = nf_tables_exit_net, };
+ static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = nft_indr_block_get_and_ing_cmd, + .list = LIST_HEAD_INIT(block_ing_entry.list), + }; + static int __init nf_tables_module_init(void) { int err; @@@ -7633,6 -7629,7 +7638,7 @@@ goto err5;
nft_chain_route_init(); + flow_indr_add_block_ing_cb(&block_ing_entry); return err; err5: rhltable_destroy(&nft_objname_ht); @@@ -7649,6 -7646,7 +7655,7 @@@ err1
static void __exit nf_tables_module_exit(void) { + flow_indr_del_block_ing_cb(&block_ing_entry); nfnetlink_subsys_unregister(&nf_tables_subsys); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); diff --combined net/netfilter/nf_tables_offload.c index c0d18c1d77ac,d3c4c9c88bc8..3c2725ade61b --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@@ -103,11 -103,10 +103,11 @@@ void nft_offload_update_dependency(stru }
static void nft_flow_offload_common_init(struct flow_cls_common_offload *common, - __be16 proto, - struct netlink_ext_ack *extack) + __be16 proto, int priority, + struct netlink_ext_ack *extack) { common->protocol = proto; + common->prio = priority; common->extack = extack; }
@@@ -125,15 -124,6 +125,15 @@@ static int nft_setup_cb_call(struct nft return 0; }
+int nft_chain_offload_priority(struct nft_base_chain *basechain) +{ + if (basechain->ops.priority <= 0 || + basechain->ops.priority > USHRT_MAX) + return -1; + + return 0; +} + static int nft_flow_offload_rule(struct nft_trans *trans, enum flow_cls_command command) { @@@ -152,8 -142,7 +152,8 @@@ if (flow) proto = flow->proto;
- nft_flow_offload_common_init(&cls_flow.common, proto, &extack); + nft_flow_offload_common_init(&cls_flow.common, proto, + basechain->ops.priority, &extack); cls_flow.command = command; cls_flow.cookie = (unsigned long) rule; if (flow) @@@ -182,24 -171,110 +182,110 @@@ static int nft_flow_offload_unbind(stru return 0; }
+ static int nft_block_setup(struct nft_base_chain *basechain, + struct flow_block_offload *bo, + enum flow_block_command cmd) + { + int err; + + switch (cmd) { + case FLOW_BLOCK_BIND: + err = nft_flow_offload_bind(bo, basechain); + break; + case FLOW_BLOCK_UNBIND: + err = nft_flow_offload_unbind(bo, basechain); + break; + default: + WARN_ON_ONCE(1); + err = -EOPNOTSUPP; + } + + return err; + } + + static int nft_block_offload_cmd(struct nft_base_chain *chain, + struct net_device *dev, + enum flow_block_command cmd) + { + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo = {}; + int err; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + if (err < 0) + return err; + + return nft_block_setup(chain, &bo, cmd); + } + + static void nft_indr_block_ing_cmd(struct net_device *dev, + struct nft_base_chain *chain, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) + { + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo = {}; + + if (!chain) + return; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); + + nft_block_setup(chain, &bo, cmd); + } + + static int nft_indr_block_offload_cmd(struct nft_base_chain *chain, + struct net_device *dev, + enum flow_block_command cmd) + { + struct flow_block_offload bo = {}; + struct netlink_ext_ack extack = {}; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + flow_indr_block_call(dev, &bo, cmd); + + if (list_empty(&bo.cb_list)) + return -EOPNOTSUPP; + + return nft_block_setup(chain, &bo, cmd); + } + #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
static int nft_flow_offload_chain(struct nft_trans *trans, enum flow_block_command cmd) { struct nft_chain *chain = trans->ctx.chain; - struct netlink_ext_ack extack = {}; - struct flow_block_offload bo = {}; struct nft_base_chain *basechain; struct net_device *dev; - int err;
if (!nft_is_base_chain(chain)) return -EOPNOTSUPP;
basechain = nft_base_chain(chain); dev = basechain->ops.dev; - if (!dev || !dev->netdev_ops->ndo_setup_tc) + if (!dev) return -EOPNOTSUPP;
/* Only default policy to accept is supported for now. */ @@@ -208,26 -283,10 +294,10 @@@ nft_trans_chain_policy(trans) != NF_ACCEPT) return -EOPNOTSUPP;
- bo.command = cmd; - bo.block = &basechain->flow_block; - bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; - bo.extack = &extack; - INIT_LIST_HEAD(&bo.cb_list); - - err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo); - if (err < 0) - return err; - - switch (cmd) { - case FLOW_BLOCK_BIND: - err = nft_flow_offload_bind(&bo, basechain); - break; - case FLOW_BLOCK_UNBIND: - err = nft_flow_offload_unbind(&bo, basechain); - break; - } - - return err; + if (dev->netdev_ops->ndo_setup_tc) + return nft_block_offload_cmd(basechain, dev, cmd); + else + return nft_indr_block_offload_cmd(basechain, dev, cmd); }
int nft_flow_rule_offload_commit(struct net *net) @@@ -277,3 -336,33 +347,33 @@@
return err; } + + void nft_indr_block_get_and_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) + { + struct net *net = dev_net(dev); + const struct nft_table *table; + const struct nft_chain *chain; + + list_for_each_entry_rcu(table, &net->nft.tables, list) { + if (table->family != NFPROTO_NETDEV) + continue; + + list_for_each_entry_rcu(chain, &table->chains, list) { + if (nft_is_base_chain(chain)) { + struct nft_base_chain *basechain; + + basechain = nft_base_chain(chain); + if (!strncmp(basechain->dev_name, dev->name, + IFNAMSIZ)) { + nft_indr_block_ing_cmd(dev, basechain, + cb, cb_priv, + command); + return; + } + } + } + } + } diff --combined net/rxrpc/ar-internal.h index a42d6b833675,63b26baa108a..b99823f461d7 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@@ -185,18 -185,11 +185,18 @@@ struct rxrpc_host_header * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - union { - u8 nr_jumbo; /* Number of jumbo subpackets */ - }; + atomic_t nr_ring_pins; /* Number of rxtx ring pins */ + u8 nr_subpackets; /* Number of subpackets */ + u8 rx_flags; /* Received packet flags */ +#define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */ +#define RXRPC_SKB_TX_BUFFER 0x02 /* - Is transmit buffer */ +#define RXRPC_SKB_NEEDS_COW 0x04 /* - Needs skb_cow_data() calling */ union { int remain; /* amount of space remaining for next write */ + + /* List of requested ACKs on subpackets */ + unsigned long rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) / + BITS_PER_LONG]; };
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@@ -233,6 -226,9 +233,9 @@@ struct rxrpc_security int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, unsigned int, unsigned int, rxrpc_seq_t, u16);
+ /* Free crypto request on a call */ + void (*free_call_crypto)(struct rxrpc_call *); + /* Locate the data in a received packet that has been verified. */ void (*locate_data)(struct rxrpc_call *, struct sk_buff *, unsigned int *, unsigned int *); @@@ -261,8 -257,7 +264,8 @@@ */ struct rxrpc_local { struct rcu_head rcu; - atomic_t usage; + atomic_t active_users; /* Number of users of the local endpoint */ + atomic_t usage; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct list_head link; struct socket *socket; /* my UDP socket */ @@@ -565,6 -560,7 +568,7 @@@ struct rxrpc_call unsigned long expect_term_by; /* When we expect call termination by */ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ + struct skcipher_request *cipher_req; /* Packet cipher request buffer */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@@ -620,7 -616,8 +624,7 @@@ #define RXRPC_TX_ANNO_LAST 0x04 #define RXRPC_TX_ANNO_RESENT 0x08
-#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ -#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_SUBPACKET 0x3f /* Subpacket number in jumbogram */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but * not hard-ACK'd packet follows this. @@@ -656,6 -653,7 +660,6 @@@
/* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ - u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ @@@ -749,7 -747,7 +753,7 @@@ int rxrpc_reject_call(struct rxrpc_soc /* * call_event.c */ -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *);
@@@ -1008,8 -1006,6 +1012,8 @@@ struct rxrpc_local *rxrpc_lookup_local( struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); void rxrpc_put_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); +void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *);
@@@ -1114,8 -1110,6 +1118,8 @@@ void rxrpc_see_skb(struct sk_buff *, en void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); +void rxrpc_pin_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_unpin_skb(struct sk_buff *, enum rxrpc_skb_trace);
/* * sysctl.c diff --combined net/rxrpc/call_object.c index 830b6152dfa3,60cbc81dc461..7997075f134a --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@@ -422,19 -422,6 +422,19 @@@ void rxrpc_get_call(struct rxrpc_call * }
/* + * Clean up the RxTx skb ring. + */ +static void rxrpc_cleanup_ring(struct rxrpc_call *call) +{ + int i; + + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_unpin_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned); + call->rxtx_buffer[i] = NULL; + } +} + +/* * Detach a call from its owning socket. */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) @@@ -442,6 -429,7 +442,6 @@@ const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; - int i;
_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
@@@ -488,10 -476,18 +488,12 @@@
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn) + if (conn) { rxrpc_disconnect_call(call); + conn->security->free_call_crypto(call); + }
- for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); - call->rxtx_buffer[i] = NULL; - } - + rxrpc_cleanup_ring(call); _leave(""); }
@@@ -574,6 -570,8 +576,6 @@@ static void rxrpc_rcu_destroy_call(stru */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - int i; - _net("DESTROY CALL %d", call->debug_id);
memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); @@@ -584,8 -582,13 +586,8 @@@ ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); ASSERTCMP(call->conn, ==, NULL);
- /* Clean up the Rx/Tx buffer */ - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); - - rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); + rxrpc_cleanup_ring(call); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned);
call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --combined net/rxrpc/rxkad.c index c60c520fde7c,dbb109da1835..8d8aa3c230b5 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@@ -43,6 -43,7 +43,7 @@@ struct rxkad_level2_hdr * packets */ static struct crypto_sync_skcipher *rxkad_ci; + static struct skcipher_request *rxkad_ci_req; static DEFINE_MUTEX(rxkad_ci_mutex);
/* @@@ -99,8 -100,8 +100,8 @@@ error */ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) { + struct skcipher_request *req; struct rxrpc_key_token *token; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); struct scatterlist sg; struct rxrpc_crypt iv; __be32 *tmpbuf; @@@ -115,6 -116,12 +116,12 @@@ if (!tmpbuf) return -ENOMEM;
+ req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) { + kfree(tmpbuf); + return -ENOMEM; + } + token = conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv));
@@@ -128,7 -135,7 +135,7 @@@ skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); crypto_skcipher_encrypt(req); - skcipher_request_zero(req); + skcipher_request_free(req);
memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv)); kfree(tmpbuf); @@@ -137,6 -144,35 +144,35 @@@ }
/* + * Allocate and prepare the crypto request on a call. For any particular call, + * this is called serially for the packets, so no lock should be necessary. + */ + static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) + { + struct crypto_skcipher *tfm = &call->conn->cipher->base; + struct skcipher_request *cipher_req = call->cipher_req; + + if (!cipher_req) { + cipher_req = skcipher_request_alloc(tfm, GFP_NOFS); + if (!cipher_req) + return NULL; + call->cipher_req = cipher_req; + } + + return cipher_req; + } + + /* + * Clean up the crypto on a call. + */ + static void rxkad_free_call_crypto(struct rxrpc_call *call) + { + if (call->cipher_req) + skcipher_request_free(call->cipher_req); + call->cipher_req = NULL; + } + + /* * partially encrypt a packet (level 1 security) */ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, @@@ -187,8 -223,10 +223,8 @@@ static int rxkad_secure_packet_encrypt( struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg[16]; - struct sk_buff *trailer; unsigned int len; u16 check; - int nsg; int err;
sp = rxrpc_skb(skb); @@@ -212,14 -250,15 +248,14 @@@ crypto_skcipher_encrypt(req);
/* we want to encrypt the skbuff in-place */ - nsg = skb_cow_data(skb, 0, &trailer); - err = -ENOMEM; - if (nsg < 0 || nsg > 16) + err = -EMSGSIZE; + if (skb_shinfo(skb)->nr_frags > 16) goto out;
len = data_size + call->conn->size_align - 1; len &= ~(call->conn->size_align - 1);
- sg_init_table(sg, nsg); + sg_init_table(sg, ARRAY_SIZE(sg)); err = skb_to_sgvec(skb, sg, 0, len); if (unlikely(err < 0)) goto out; @@@ -243,7 -282,7 +279,7 @@@ static int rxkad_secure_packet(struct r void *sechdr) { struct rxrpc_skb_priv *sp; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; u32 x, y; @@@ -262,6 -301,10 +298,10 @@@ if (ret < 0) return ret;
+ req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
@@@ -316,10 -359,11 +356,10 @@@ static int rxkad_verify_packet_1(struc struct rxkad_level1_hdr sechdr; struct rxrpc_crypt iv; struct scatterlist sg[16]; - struct sk_buff *trailer; bool aborted; u32 data_size, buf; u16 check; - int nsg, ret; + int ret;
_enter("");
@@@ -332,7 -376,11 +372,7 @@@ /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - nsg = skb_cow_data(skb, 0, &trailer); - if (nsg < 0 || nsg > 16) - goto nomem; - - sg_init_table(sg, nsg); + sg_init_table(sg, ARRAY_SIZE(sg)); ret = skb_to_sgvec(skb, sg, offset, 8); if (unlikely(ret < 0)) return ret; @@@ -380,6 -428,10 +420,6 @@@ protocol_error if (aborted) rxrpc_send_abort_packet(call); return -EPROTO; - -nomem: - _leave(" = -ENOMEM"); - return -ENOMEM; }
/* @@@ -394,6 -446,7 +434,6 @@@ static int rxkad_verify_packet_2(struc struct rxkad_level2_hdr sechdr; struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; - struct sk_buff *trailer; bool aborted; u32 data_size, buf; u16 check; @@@ -410,11 -463,12 +450,11 @@@ /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - nsg = skb_cow_data(skb, 0, &trailer); - if (nsg < 0) - goto nomem; - sg = _sg; - if (unlikely(nsg > 4)) { + nsg = skb_shinfo(skb)->nr_frags; + if (nsg <= 4) { + nsg = 4; + } else { sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); if (!sg) goto nomem; @@@ -488,7 -542,7 +528,7 @@@ static int rxkad_verify_packet(struct r unsigned int offset, unsigned int len, rxrpc_seq_t seq, u16 expected_cksum) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; bool aborted; @@@ -501,6 -555,10 +541,10 @@@ if (!call->conn->cipher) return 0;
+ req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
@@@ -733,14 -791,18 +777,18 @@@ static void rxkad_calc_response_checksu /* * encrypt the response packet */ - static void rxkad_encrypt_response(struct rxrpc_connection *conn, - struct rxkad_response *resp, - const struct rxkad_key *s2) + static int rxkad_encrypt_response(struct rxrpc_connection *conn, + struct rxkad_response *resp, + const struct rxkad_key *s2) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg[1];
+ req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, s2->session_key, sizeof(iv));
@@@ -750,7 -812,8 +798,8 @@@ skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); crypto_skcipher_encrypt(req); - skcipher_request_zero(req); + skcipher_request_free(req); + return 0; }
/* @@@ -825,8 -888,9 +874,9 @@@ static int rxkad_respond_to_challenge(s
/* calculate the response checksum and then do the encryption */ rxkad_calc_response_checksum(resp); - rxkad_encrypt_response(conn, resp, token->kad); - ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); + ret = rxkad_encrypt_response(conn, resp, token->kad); + if (ret == 0) + ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); kfree(resp); return ret;
@@@ -1003,18 -1067,16 +1053,16 @@@ static void rxkad_decrypt_response(stru struct rxkad_response *resp, const struct rxrpc_crypt *session_key) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci); + struct skcipher_request *req = rxkad_ci_req; struct scatterlist sg[1]; struct rxrpc_crypt iv;
_enter(",,%08x%08x", ntohl(session_key->n[0]), ntohl(session_key->n[1]));
- ASSERT(rxkad_ci != NULL); - mutex_lock(&rxkad_ci_mutex); if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x, - sizeof(*session_key)) < 0) + sizeof(*session_key)) < 0) BUG();
memcpy(&iv, session_key, sizeof(iv)); @@@ -1208,10 -1270,26 +1256,26 @@@ static void rxkad_clear(struct rxrpc_co */ static int rxkad_init(void) { + struct crypto_sync_skcipher *tfm; + struct skcipher_request *req; + /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ - rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); - return PTR_ERR_OR_ZERO(rxkad_ci); + tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + req = skcipher_request_alloc(&tfm->base, GFP_KERNEL); + if (!req) + goto nomem_tfm; + + rxkad_ci_req = req; + rxkad_ci = tfm; + return 0; + + nomem_tfm: + crypto_free_sync_skcipher(tfm); + return -ENOMEM; }
/* @@@ -1219,8 -1297,8 +1283,8 @@@ */ static void rxkad_exit(void) { - if (rxkad_ci) - crypto_free_sync_skcipher(rxkad_ci); + crypto_free_sync_skcipher(rxkad_ci); + skcipher_request_free(rxkad_ci_req); }
/* @@@ -1235,6 -1313,7 +1299,7 @@@ const struct rxrpc_security rxkad = .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .free_call_crypto = rxkad_free_call_crypto, .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, diff --combined net/sched/sch_taprio.c index e25d414ae12f,046fd2c102b4..540bde009ea5 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@@ -677,10 -677,6 +677,6 @@@ static const struct nla_policy entry_po [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, };
- static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { - [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, - }; - static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@@ -1195,8 -1191,7 +1191,8 @@@ unlock spin_unlock_bh(qdisc_lock(sch));
free_sched: - kfree(new_admin); + if (new_admin) + call_rcu(&new_admin->rcu, taprio_free_sched_cb);
return err; } diff --combined net/tipc/link.c index c2c5c53cad22,289e848084ac..6cc75ffd9e2c --- a/net/tipc/link.c +++ b/net/tipc/link.c @@@ -106,6 -106,8 +106,6 @@@ struct tipc_stats * @transmitq: queue for sent, non-acked messages * @backlogq: queue for messages waiting to be sent * @snt_nxt: next sequence number to use for outbound messages - * @prev_from: sequence number of most previous retransmission request - * @stale_limit: time when repeated identical retransmits must force link reset * @ackers: # of peers that needs to ack each packet before it can be released * @acked: # last packet acked by a certain peer. Used for broadcast. * @rcv_nxt: next sequence number to expect for inbound messages @@@ -162,7 -164,9 +162,7 @@@ struct tipc_link u16 limit; } backlog[5]; u16 snd_nxt; - u16 prev_from; u16 window; - unsigned long stale_limit;
/* Reception */ u16 rcv_nxt; @@@ -176,6 -180,7 +176,7 @@@
/* Fragmentation/reassembly */ struct sk_buff *reasm_buf; + struct sk_buff *reasm_tnlmsg;
/* Broadcast */ u16 ackers; @@@ -849,18 -854,31 +850,31 @@@ static int link_schedule_user(struct ti */ static void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; - int imp, i = 0; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; + + __skb_queue_head_init(&tmpq); + + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len;
- skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - if (l->backlog[imp].len < l->backlog[imp].limit) { - skb_unlink(skb, &l->wakeupq); - skb_queue_tail(l->inputq, skb); - } else if (i++ > 10) { - break; - } + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); + }
void tipc_link_reset(struct tipc_link *l) @@@ -893,8 -911,10 +907,10 @@@ l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; kfree_skb(l->reasm_buf); + kfree_skb(l->reasm_tnlmsg); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; + l->reasm_tnlmsg = NULL; l->failover_reasm_skb = NULL; l->rcv_unacked = 0; l->snd_nxt = 1; @@@ -936,7 -956,10 +952,10 @@@ int tipc_link_xmit(struct tipc_link *l int rc = 0;
if (unlikely(msg_size(hdr) > mtu)) { - skb_queue_purge(list); + pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", + skb_queue_len(list), msg_user(hdr), + msg_type(hdr), msg_size(hdr), mtu); + __skb_queue_purge(list); return -EMSGSIZE; }
@@@ -965,7 -988,7 +984,7 @@@ if (likely(skb_queue_len(transmq) < maxwin)) { _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) { - skb_queue_purge(list); + __skb_queue_purge(list); return -ENOBUFS; } __skb_dequeue(list); @@@ -1040,53 -1063,47 +1059,53 @@@ static void tipc_link_advance_backlog(s * link_retransmit_failure() - Detect repeated retransmit failures * @l: tipc link sender * @r: tipc link receiver (= l in case of unicast) - * @from: seqno of the 1st packet in retransmit request * @rc: returned code * * Return: true if the repeated retransmit failures happens, otherwise * false */ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r, - u16 from, int *rc) + int *rc) { struct sk_buff *skb = skb_peek(&l->transmq); struct tipc_msg *hdr;
if (!skb) return false; - hdr = buf_msg(skb);
- /* Detect repeated retransmit failures on same packet */ - if (r->prev_from != from) { - r->prev_from = from; - r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance); - } else if (time_after(jiffies, r->stale_limit)) { - pr_warn("Retransmission failure on link <%s>\n", l->name); - link_print(l, "State of link "); - pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", - msg_user(hdr), msg_type(hdr), msg_size(hdr), - msg_errcode(hdr)); - pr_info("sqno %u, prev: %x, src: %x\n", - msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); - - trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); - trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); - trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + if (!TIPC_SKB_CB(skb)->retr_cnt) + return false;
- if (link_is_bc_sndlink(l)) - *rc = TIPC_LINK_DOWN_EVT; + if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp + + msecs_to_jiffies(r->tolerance))) + return false; + + hdr = buf_msg(skb); + if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr))) + return false;
+ pr_warn("Retransmission failure on link <%s>\n", l->name); + link_print(l, "State of link "); + pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", + msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); + pr_info("sqno %u, prev: %x, dest: %x\n", + msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr)); + pr_info("retr_stamp %d, retr_cnt %d\n", + jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp), + TIPC_SKB_CB(skb)->retr_cnt); + + trace_tipc_list_dump(&l->transmq, true, "retrans failure!"); + trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!"); + trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!"); + + if (link_is_bc_sndlink(l)) { + r->state = LINK_RESET; + *rc = TIPC_LINK_DOWN_EVT; + } else { *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT); - return true; }
- return false; + return true; }
/* tipc_link_bc_retrans() - retransmit zero or more packets @@@ -1112,7 -1129,7 +1131,7 @@@ static int tipc_link_bc_retrans(struct
trace_tipc_link_retrans(r, from, to, &l->transmq);
- if (link_retransmit_failure(l, r, from, &rc)) + if (link_retransmit_failure(l, r, &rc)) return rc;
skb_queue_walk(&l->transmq, skb) { @@@ -1121,10 -1138,11 +1140,10 @@@ continue; if (more(msg_seqno(hdr), to)) break; - if (link_is_bc_sndlink(l)) { - if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) - continue; - TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; - } + + if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) + continue; + TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM; _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, GFP_ATOMIC); if (!_skb) return 0; @@@ -1134,10 -1152,6 +1153,10 @@@ _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } return 0; } @@@ -1238,6 -1252,7 +1257,7 @@@ static int tipc_link_tnl_rcv(struct tip struct sk_buff_head *inputq) { struct sk_buff **reasm_skb = &l->failover_reasm_skb; + struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg; struct sk_buff_head *fdefq = &l->failover_deferdq; struct tipc_msg *hdr = buf_msg(skb); struct sk_buff *iskb; @@@ -1245,40 -1260,56 +1265,56 @@@ int rc = 0; u16 seqno;
- /* SYNCH_MSG */ - if (msg_type(hdr) == SYNCH_MSG) - goto drop; + if (msg_type(hdr) == SYNCH_MSG) { + kfree_skb(skb); + return 0; + }
- /* FAILOVER_MSG */ - if (!tipc_msg_extract(skb, &iskb, &ipos)) { - pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n", - skb_queue_len(fdefq)); - return rc; + /* Not a fragment? */ + if (likely(!msg_nof_fragms(hdr))) { + if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) { + pr_warn_ratelimited("Unable to extract msg, defq: %d\n", + skb_queue_len(fdefq)); + return 0; + } + kfree_skb(skb); + } else { + /* Set fragment type for buf_append */ + if (msg_fragm_no(hdr) == 1) + msg_set_type(hdr, FIRST_FRAGMENT); + else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr)) + msg_set_type(hdr, FRAGMENT); + else + msg_set_type(hdr, LAST_FRAGMENT); + + if (!tipc_buf_append(reasm_tnlmsg, &skb)) { + /* Successful but non-complete reassembly? */ + if (*reasm_tnlmsg || link_is_bc_rcvlink(l)) + return 0; + pr_warn_ratelimited("Unable to reassemble tunnel msg\n"); + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + iskb = skb; }
do { seqno = buf_seqno(iskb); - if (unlikely(less(seqno, l->drop_point))) { kfree_skb(iskb); continue; } - if (unlikely(seqno != l->drop_point)) { __tipc_skb_queue_sorted(fdefq, seqno, iskb); continue; }
l->drop_point++; - if (!tipc_data_input(l, iskb, inputq)) rc |= tipc_link_input(l, iskb, inputq, reasm_skb); if (unlikely(rc)) break; } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point)));
- drop: - kfree_skb(skb); return rc; }
@@@ -1362,10 -1393,12 +1398,10 @@@ static int tipc_link_advance_transmq(st struct tipc_msg *hdr; u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1; u16 ack = l->rcv_nxt - 1; + bool passed = false; u16 seqno, n = 0; int rc = 0;
- if (gap && link_retransmit_failure(l, l, acked + 1, &rc)) - return rc; - skb_queue_walk_safe(&l->transmq, skb, tmp) { seqno = buf_seqno(skb);
@@@ -1375,17 -1408,12 +1411,17 @@@ next_gap_ack __skb_unlink(skb, &l->transmq); kfree_skb(skb); } else if (less_eq(seqno, acked + gap)) { - /* retransmit skb */ + /* First, check if repeated retrans failures occurs? */ + if (!passed && link_retransmit_failure(l, l, &rc)) + return rc; + passed = true; + + /* retransmit skb if unrestricted*/ if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr)) continue; TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME; - - _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC); + _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE, + GFP_ATOMIC); if (!_skb) continue; hdr = buf_msg(_skb); @@@ -1394,10 -1422,6 +1430,10 @@@ _skb->priority = TC_PRIO_CONTROL; __skb_queue_tail(xmitq, _skb); l->stats.retransmitted++; + + /* Increase actual retrans counter & mark first time */ + if (!TIPC_SKB_CB(skb)->retr_cnt++) + TIPC_SKB_CB(skb)->retr_stamp = jiffies; } else { /* retry with Gap ACK blocks if any */ if (!ga || n >= ga->gack_cnt) @@@ -1644,7 -1668,7 +1680,7 @@@ void tipc_link_create_dummy_tnl_msg(str struct sk_buff *skb; u32 dnode = l->addr;
- skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tnlq); skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, BASIC_H_SIZE, dnode, onode, 0, 0, 0); @@@ -1675,14 -1699,18 +1711,18 @@@ void tipc_link_tnl_prepare(struct tipc_ struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; - struct sk_buff_head tmpxq, tnlq; + struct sk_buff_head tmpxq, tnlq, frags; u16 pktlen, pktcnt, seqno = l->snd_nxt; + bool pktcnt_need_update = false; + u16 syncpt; + int rc;
if (!tnl) return;
- skb_queue_head_init(&tnlq); - skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&frags);
/* At least one packet required for safe algorithm => add dummy */ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, @@@ -1692,10 -1720,35 +1732,35 @@@ pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } - skb_queue_tail(&tnlq, skb); + __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, &tmpxq); __skb_queue_purge(&tmpxq);
+ /* Link Synching: + * From now on, send only one single ("dummy") SYNCH message + * to peer. The SYNCH message does not contain any data, just + * a header conveying the synch point to the peer. + */ + if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG, + INT_H_SIZE, 0, l->addr, + tipc_own_addr(l->net), + 0, 0, 0); + if (!tnlskb) { + pr_warn("%sunable to create dummy SYNCH_MSG\n", + link_co_err); + return; + } + + hdr = buf_msg(tnlskb); + syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1; + msg_set_syncpt(hdr, syncpt); + msg_set_bearer_id(hdr, l->peer_bearer_id); + __skb_queue_tail(&tnlq, tnlskb); + tipc_link_xmit(tnl, &tnlq, xmitq); + return; + } + /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); @@@ -1713,6 -1766,39 +1778,39 @@@ tnl if (queue == &l->backlogq) msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); + + /* Tunnel link MTU is not large enough? This could be + * due to: + * 1) Link MTU has just changed or set differently; + * 2) Or FAILOVER on the top of a SYNCH message + * + * The 2nd case should not happen if peer supports + * TIPC_TUNNEL_ENHANCED + */ + if (pktlen > tnl->mtu - INT_H_SIZE) { + if (mtyp == FAILOVER_MSG && + (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu, + &frags); + if (rc) { + pr_warn("%sunable to frag msg: rc %d\n", + link_co_err, rc); + return; + } + pktcnt += skb_queue_len(&frags) - 1; + pktcnt_need_update = true; + skb_queue_splice_tail_init(&frags, &tnlq); + continue; + } + /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED + * => Just warn it and return! + */ + pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n", + link_co_err, msg_user(hdr), + msg_type(hdr), msg_size(hdr)); + return; + } + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { @@@ -1728,6 -1814,12 +1826,12 @@@ goto tnl; }
+ if (pktcnt_need_update) + skb_queue_walk(&tnlq, skb) { + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, pktcnt); + } + tipc_link_xmit(tnl, &tnlq, xmitq);
if (mtyp == FAILOVER_MSG) { @@@ -2589,7 -2681,7 +2693,7 @@@ int tipc_link_dump(struct tipc_link *l i += scnprintf(buf + i, sz - i, " %x", l->peer_caps); i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt); i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt); - i += scnprintf(buf + i, sz - i, " %u", l->prev_from); + i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", 0); i += scnprintf(buf + i, sz - i, " %u", l->acked);
diff --combined net/tipc/msg.h index d7ebc9e955f6,1c8c8dd32a4e..0daa6f04ca81 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@@ -102,15 -102,13 +102,15 @@@ struct plist #define TIPC_MEDIA_INFO_OFFSET 5
struct tipc_skb_cb { - u32 bytes_read; - u32 orig_member; struct sk_buff *tail; unsigned long nxt_retr; - bool validated; + unsigned long retr_stamp; + u32 bytes_read; + u32 orig_member; u16 chain_imp; u16 ackers; + u16 retr_cnt; + bool validated; };
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0])) @@@ -723,12 -721,26 +723,26 @@@ static inline void msg_set_last_bcast(s msg_set_bits(m, 4, 16, 0xffff, n); }
+ static inline u32 msg_nof_fragms(struct tipc_msg *m) + { + return msg_bits(m, 4, 0, 0xffff); + } + + static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n) + { + msg_set_bits(m, 4, 0, 0xffff, n); + } + + static inline u32 msg_fragm_no(struct tipc_msg *m) + { + return msg_bits(m, 4, 16, 0xffff); + } + static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); }
- static inline u16 msg_next_sent(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); @@@ -879,6 -891,16 +893,16 @@@ static inline void msg_set_msgcnt(struc msg_set_bits(m, 9, 16, 0xffff, n); }
+ static inline u16 msg_syncpt(struct tipc_msg *m) + { + return msg_bits(m, 9, 16, 0xffff); + } + + static inline void msg_set_syncpt(struct tipc_msg *m, u16 n) + { + msg_set_bits(m, 9, 16, 0xffff, n); + } + static inline u32 msg_conn_ack(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); @@@ -1037,6 -1059,8 +1061,8 @@@ bool tipc_msg_bundle(struct sk_buff *sk bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, u32 mtu, u32 dnode); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); + int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); diff --combined net/tls/tls_device.c index 43922d86e510,d184230665eb..a470df7ffcf9 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@@ -243,14 -243,14 +243,14 @@@ static void tls_append_frag(struct tls_ skb_frag_t *frag;
frag = &record->frags[record->num_frags - 1]; - if (frag->page.p == pfrag->page && - frag->page_offset + frag->size == pfrag->offset) { - frag->size += size; + if (skb_frag_page(frag) == pfrag->page && + skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) { + skb_frag_size_add(frag, size); } else { ++frag; - frag->page.p = pfrag->page; - frag->page_offset = pfrag->offset; - frag->size = size; + __skb_frag_set_page(frag, pfrag->page); + skb_frag_off_set(frag, pfrag->offset); + skb_frag_size_set(frag, size); ++record->num_frags; get_page(pfrag->page); } @@@ -301,8 -301,8 +301,8 @@@ static int tls_push_record(struct sock frag = &record->frags[i]; sg_unmark_end(&offload_ctx->sg_tx_data[i]); sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag), - frag->size, frag->page_offset); - sk_mem_charge(sk, frag->size); + skb_frag_size(frag), skb_frag_off(frag)); + sk_mem_charge(sk, skb_frag_size(frag)); get_page(skb_frag_page(frag)); } sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]); @@@ -324,7 -324,7 +324,7 @@@ static int tls_create_new_record(struc
frag = &record->frags[0]; __skb_frag_set_page(frag, pfrag->page); - frag->page_offset = pfrag->offset; + skb_frag_off_set(frag, pfrag->offset); skb_frag_size_set(frag, prepend_size);
get_page(pfrag->page); @@@ -373,9 -373,9 +373,9 @@@ static int tls_push_data(struct sock *s struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); - int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); struct tls_record_info *record = ctx->open_record; + int tls_push_record_flags; struct page_frag *pfrag; size_t orig_size = size; u32 max_open_record_len; @@@ -390,9 -390,6 +390,9 @@@ if (sk->sk_err) return -sk->sk_err;
+ flags |= MSG_SENDPAGE_DECRYPTED; + tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (tls_is_partially_sent_record(tls_ctx)) { rc = tls_push_partial_record(sk, tls_ctx, flags); @@@ -579,9 -576,7 +579,9 @@@ void tls_device_write_space(struct soc gfp_t sk_allocation = sk->sk_allocation;
sk->sk_allocation = GFP_ATOMIC; - tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL); + tls_push_partial_record(sk, ctx, + MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_DECRYPTED); sk->sk_allocation = sk_allocation; } } diff --combined scripts/link-vmlinux.sh index 2438a9faf3f1,c31193340108..f7edb75f9806 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@@ -56,11 -56,10 +56,11 @@@ modpost_link( }
# Link of vmlinux - # ${1} - optional extra .o files - # ${2} - output file + # ${1} - output file + # ${@:2} - optional extra .o files vmlinux_link() { - info LD ${2} ++ info LD ${1} local lds="${objtree}/${KBUILD_LDS}" local objects
@@@ -71,9 -70,9 +71,9 @@@ --start-group \ ${KBUILD_VMLINUX_LIBS} \ --end-group \ - ${1}" + ${@:2}"
- ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ + ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${1} \ -T ${lds} ${objects} else objects="-Wl,--whole-archive \ @@@ -82,9 -81,9 +82,9 @@@ -Wl,--start-group \ ${KBUILD_VMLINUX_LIBS} \ -Wl,--end-group \ - ${1}" + ${@:2}"
- ${CC} ${CFLAGS_vmlinux} -o ${2} \ + ${CC} ${CFLAGS_vmlinux} -o ${1} \ -Wl,-T,${lds} \ ${objects} \ -lutil -lrt -lpthread @@@ -93,23 -92,34 +93,34 @@@ }
# generate .BTF typeinfo from DWARF debuginfo + # ${1} - vmlinux image + # ${2} - file to dump raw BTF data into gen_btf() { - local pahole_ver; + local pahole_ver + local bin_arch
if ! [ -x "$(command -v ${PAHOLE})" ]; then info "BTF" "${1}: pahole (${PAHOLE}) is not available" - return 0 + return 1 fi
pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+).([0-9]+)/\1\2/') if [ "${pahole_ver}" -lt "113" ]; then info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" - return 0 + return 1 fi
- info "BTF" ${1} + info "BTF" ${2} + vmlinux_link ${1} LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} + + # dump .BTF section into raw binary file to link with final vmlinux + bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \ + cut -d, -f1 | cut -d' ' -f2) + ${OBJCOPY} --dump-section .BTF=.btf.vmlinux.bin ${1} 2>/dev/null + ${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \ + --rename-section .data=.BTF .btf.vmlinux.bin ${2} }
# Create ${2} .o file with all symbols from the ${1} object file @@@ -139,18 -149,6 +150,18 @@@ kallsyms( ${CC} ${aflags} -c -o ${2} ${afile} }
+# Perform one step in kallsyms generation, including temporary linking of +# vmlinux. +kallsyms_step() +{ + kallsymso_prev=${kallsymso} + kallsymso=.tmp_kallsyms${1}.o + kallsyms_vmlinux=.tmp_vmlinux${1} + - vmlinux_link "${kallsymso_prev}" ${kallsyms_vmlinux} ++ vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o} + kallsyms ${kallsyms_vmlinux} ${kallsymso} +} + # Create map file with all symbols from ${1} # See mksymap for additional details mksysmap() @@@ -166,6 -164,7 +177,7 @@@ sortextable( # Delete output files in case of error cleanup() { + rm -f .btf.* rm -f .tmp_System.map rm -f .tmp_kallsyms* rm -f .tmp_vmlinux* @@@ -228,8 -227,14 +240,15 @@@ ${MAKE} -f "${srctree}/scripts/Makefile info MODINFO modules.builtin.modinfo ${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo
+ btf_vmlinux_bin_o="" + if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then + if gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then + btf_vmlinux_bin_o=.btf.vmlinux.bin.o + fi + fi + kallsymso="" +kallsymso_prev="" kallsyms_vmlinux="" if [ -n "${CONFIG_KALLSYMS}" ]; then
@@@ -256,23 -261,32 +275,19 @@@ # a) Verify that the System.map from vmlinux matches the map from # ${kallsymso}.
- kallsymso=.tmp_kallsyms2.o - kallsyms_vmlinux=.tmp_vmlinux2 - - # step 1 - vmlinux_link .tmp_vmlinux1 ${btf_vmlinux_bin_o} - kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o - - # step 2 - vmlinux_link .tmp_vmlinux2 .tmp_kallsyms1.o ${btf_vmlinux_bin_o} - kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o + kallsyms_step 1 + kallsyms_step 2
# step 3 - size1=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" .tmp_kallsyms1.o) - size2=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" .tmp_kallsyms2.o) + size1=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso_prev}) + size2=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso})
if [ $size1 -ne $size2 ] || [ -n "${KALLSYMS_EXTRA_PASS}" ]; then - kallsymso=.tmp_kallsyms3.o - kallsyms_vmlinux=.tmp_vmlinux3 - - vmlinux_link .tmp_vmlinux3 .tmp_kallsyms2.o ${btf_vmlinux_bin_o} - kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o + kallsyms_step 3 fi fi
- vmlinux_link "${kallsymso}" vmlinux - - if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then - gen_btf vmlinux - fi -info LD vmlinux + vmlinux_link vmlinux "${kallsymso}" "${btf_vmlinux_bin_o}"
if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then info SORTEX vmlinux diff --combined tools/include/uapi/linux/bpf.h index a5aa7d3ac6a1,4393bd4b2419..0e66371bea13 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@@ -134,6 -134,7 +134,7 @@@ enum bpf_map_type BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, };
/* Note that tracing related programs such as @@@ -1466,8 -1467,8 +1467,8 @@@ union bpf_attr * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. @@@ -2713,6 -2714,33 +2714,33 @@@ * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. + * + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@@ -2824,7 -2852,8 +2852,8 @@@ FN(strtoul), \ FN(sk_storage_get), \ FN(sk_storage_delete), \ - FN(send_signal), + FN(send_signal), \ + FN(tcp_gen_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@@ -3507,6 -3536,10 +3536,10 @@@ enum bpf_task_fd_type BPF_FD_TYPE_URETPROBE, /* filename + offset */ };
+ #define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) + #define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) + #define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) + struct bpf_flow_keys { __u16 nhoff; __u16 thoff; @@@ -3528,6 -3561,8 +3561,8 @@@ __u32 ipv6_dst[4]; /* in6_addr; network order */ }; }; + __u32 flags; + __be32 flow_label; };
struct bpf_func_info { diff --combined tools/lib/bpf/libbpf.c index 2b57d7ea7836,2233f919dd88..e0276520171b --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@@ -39,6 -39,7 +39,7 @@@ #include <sys/stat.h> #include <sys/types.h> #include <sys/vfs.h> + #include <sys/utsname.h> #include <tools/libc_compat.h> #include <libelf.h> #include <gelf.h> @@@ -48,6 -49,7 +49,7 @@@ #include "btf.h" #include "str_error.h" #include "libbpf_internal.h" + #include "hashmap.h"
#ifndef EM_BPF #define EM_BPF 247 @@@ -75,9 -77,12 +77,12 @@@ static int __base_pr(enum libbpf_print_
static libbpf_print_fn_t __libbpf_pr = __base_pr;
- void libbpf_set_print(libbpf_print_fn_t fn) + libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) { + libbpf_print_fn_t old_print_fn = __libbpf_pr; + __libbpf_pr = fn; + return old_print_fn; }
__printf(2, 3) @@@ -182,6 -187,7 +187,6 @@@ struct bpf_program bpf_program_clear_priv_t clear_priv;
enum bpf_attach_type expected_attach_type; - int btf_fd; void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@@ -312,6 -318,7 +317,6 @@@ void bpf_program__unload(struct bpf_pro prog->instances.nr = -1; zfree(&prog->instances.fds);
- zclose(prog->btf_fd); zfree(&prog->func_info); zfree(&prog->line_info); } @@@ -390,6 -397,7 +395,6 @@@ bpf_program__init(void *data, size_t si prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->btf_fd = -1;
return 0; errout: @@@ -1010,23 -1018,21 +1015,21 @@@ static int bpf_object__init_user_maps(s return 0; }
- static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, - __u32 id) + static const struct btf_type * + skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) { const struct btf_type *t = btf__type_by_id(btf, id);
- while (true) { - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - case BTF_KIND_TYPEDEF: - t = btf__type_by_id(btf, t->type); - break; - default: - return t; - } + if (res_id) + *res_id = id; + + while (btf_is_mod(t) || btf_is_typedef(t)) { + if (res_id) + *res_id = t->type; + t = btf__type_by_id(btf, t->type); } + + return t; }
/* @@@ -1039,14 -1045,14 +1042,14 @@@ static bool get_map_field_int(const char *map_name, const struct btf *btf, const struct btf_type *def, const struct btf_member *m, __u32 *res) { - const struct btf_type *t = skip_mods_and_typedefs(btf, m->type); + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); const char *name = btf__name_by_offset(btf, m->name_off); const struct btf_array *arr_info; const struct btf_type *arr_t;
- if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': attr '%s': expected PTR, got %u.\n", - map_name, name, BTF_INFO_KIND(t->info)); + map_name, name, btf_kind(t)); return false; }
@@@ -1056,12 -1062,12 +1059,12 @@@ map_name, name, t->type); return false; } - if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) { + if (!btf_is_array(arr_t)) { pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n", - map_name, name, BTF_INFO_KIND(arr_t->info)); + map_name, name, btf_kind(arr_t)); return false; } - arr_info = (const void *)(arr_t + 1); + arr_info = btf_array(arr_t); *res = arr_info->nelems; return true; } @@@ -1079,11 -1085,11 +1082,11 @@@ static int bpf_object__init_user_btf_ma struct bpf_map *map; int vlen, i;
- vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx; + vi = btf_var_secinfos(sec) + var_idx; var = btf__type_by_id(obj->btf, vi->type); - var_extra = (const void *)(var + 1); + var_extra = btf_var(var); map_name = btf__name_by_offset(obj->btf, var->name_off); - vlen = BTF_INFO_VLEN(var->info); + vlen = btf_vlen(var);
if (map_name == NULL || map_name[0] == '\0') { pr_warning("map #%d: empty name.\n", var_idx); @@@ -1093,9 -1099,9 +1096,9 @@@ pr_warning("map '%s' BTF data is corrupted.\n", map_name); return -EINVAL; } - if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) { + if (!btf_is_var(var)) { pr_warning("map '%s': unexpected var kind %u.\n", - map_name, BTF_INFO_KIND(var->info)); + map_name, btf_kind(var)); return -EINVAL; } if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && @@@ -1105,10 -1111,10 +1108,10 @@@ return -EOPNOTSUPP; }
- def = skip_mods_and_typedefs(obj->btf, var->type); - if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) { + def = skip_mods_and_typedefs(obj->btf, var->type, NULL); + if (!btf_is_struct(def)) { pr_warning("map '%s': unexpected def kind %u.\n", - map_name, BTF_INFO_KIND(var->info)); + map_name, btf_kind(var)); return -EINVAL; } if (def->size > vi->size) { @@@ -1131,8 -1137,8 +1134,8 @@@ pr_debug("map '%s': at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset);
- vlen = BTF_INFO_VLEN(def->info); - m = (const void *)(def + 1); + vlen = btf_vlen(def); + m = btf_members(def); for (i = 0; i < vlen; i++, m++) { const char *name = btf__name_by_offset(obj->btf, m->name_off);
@@@ -1182,9 -1188,9 +1185,9 @@@ map_name, m->type); return -EINVAL; } - if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': key spec is not PTR: %u.\n", - map_name, BTF_INFO_KIND(t->info)); + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); @@@ -1225,9 -1231,9 +1228,9 @@@ map_name, m->type); return -EINVAL; } - if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': value spec is not PTR: %u.\n", - map_name, BTF_INFO_KIND(t->info)); + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); @@@ -1288,7 -1294,7 +1291,7 @@@ static int bpf_object__init_user_btf_ma nr_types = btf__get_nr_types(obj->btf); for (i = 1; i <= nr_types; i++) { t = btf__type_by_id(obj->btf, i); - if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC) + if (!btf_is_datasec(t)) continue; name = btf__name_by_offset(obj->btf, t->name_off); if (strcmp(name, MAPS_ELF_SEC) == 0) { @@@ -1302,7 -1308,7 +1305,7 @@@ return -ENOENT; }
- vlen = BTF_INFO_VLEN(sec->info); + vlen = btf_vlen(sec); for (i = 0; i < vlen; i++) { err = bpf_object__init_user_btf_map(obj, sec, i, obj->efile.btf_maps_shndx, @@@ -1363,16 -1369,14 +1366,14 @@@ static void bpf_object__sanitize_btf(st struct btf *btf = obj->btf; struct btf_type *t; int i, j, vlen; - __u16 kind;
if (!obj->btf || (has_func && has_datasec)) return;
for (i = 1; i <= btf__get_nr_types(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); - kind = BTF_INFO_KIND(t->info);
- if (!has_datasec && kind == BTF_KIND_VAR) { + if (!has_datasec && btf_is_var(t)) { /* replace VAR with INT */ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); /* @@@ -1381,11 -1385,11 +1382,11 @@@ * original variable took less than 4 bytes */ t->size = 1; - *(int *)(t+1) = BTF_INT_ENC(0, 0, 8); - } else if (!has_datasec && kind == BTF_KIND_DATASEC) { + *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); + } else if (!has_datasec && btf_is_datasec(t)) { /* replace DATASEC with STRUCT */ - struct btf_var_secinfo *v = (void *)(t + 1); - struct btf_member *m = (void *)(t + 1); + const struct btf_var_secinfo *v = btf_var_secinfos(t); + struct btf_member *m = btf_members(t); struct btf_type *vt; char *name;
@@@ -1396,7 -1400,7 +1397,7 @@@ name++; }
- vlen = BTF_INFO_VLEN(t->info); + vlen = btf_vlen(t); t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); for (j = 0; j < vlen; j++, v++, m++) { /* order of field assignments is important */ @@@ -1406,12 -1410,12 +1407,12 @@@ vt = (void *)btf__type_by_id(btf, v->type); m->name_off = vt->name_off; } - } else if (!has_func && kind == BTF_KIND_FUNC_PROTO) { + } else if (!has_func && btf_is_func_proto(t)) { /* replace FUNC_PROTO with ENUM */ - vlen = BTF_INFO_VLEN(t->info); + vlen = btf_vlen(t); t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); t->size = sizeof(__u32); /* kernel enforced */ - } else if (!has_func && kind == BTF_KIND_FUNC) { + } else if (!has_func && btf_is_func(t)) { /* replace FUNC with TYPEDEF */ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); } @@@ -1769,15 -1773,22 +1770,22 @@@ bpf_program__collect_reloc(struct bpf_p (long long) sym.st_value, sym.st_name, name);
shdr_idx = sym.st_shndx; + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n", + insn_idx, shdr_idx); + + if (shdr_idx >= SHN_LORESERVE) { + pr_warning("relocation: not yet supported relo for non-static global '%s' variable in special section (0x%x) found in insns[%d].code 0x%x\n", + name, shdr_idx, insn_idx, + insns[insn_idx].code); + return -LIBBPF_ERRNO__RELOC; + } if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", prog->section_name, shdr_idx); return -LIBBPF_ERRNO__RELOC; }
- insn_idx = rel.r_offset / sizeof(struct bpf_insn); - pr_debug("relocation: insn_idx=%u\n", insn_idx); - if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { pr_warning("incorrect bpf_call opcode\n"); @@@ -2285,9 -2296,900 +2293,897 @@@ bpf_program_reloc_btf_ext(struct bpf_pr prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); }
- if (!insn_offset) - prog->btf_fd = btf__fd(obj->btf); - return 0; }
+ #define BPF_CORE_SPEC_MAX_LEN 64 + + /* represents BPF CO-RE field or array element accessor */ + struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ + }; + + struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field byte offset represented by spec */ + __u32 offset; + }; + + static bool str_is_empty(const char *s) + { + return !s || !s[0]; + } + + /* + * Turn bpf_offset_reloc into a low- and high-level spec representation, + * validating correctness along the way, as well as calculating resulting + * field offset (in bytes), specified by accessor string. Low-level spec + * captures every single level of nestedness, including traversing anonymous + * struct/union members. High-level one only captures semantically meaningful + * "turning points": named fields and array indicies. + * E.g., for this case: + * + * struct sample { + * int __unimportant; + * struct { + * int __1; + * int __2; + * int a[7]; + * }; + * }; + * + * struct sample *s = ...; + * + * int x = &s->a[3]; // access string = '0:1:2:3' + * + * Low-level spec has 1:1 mapping with each element of access string (it's + * just a parsed access string representation): [0, 1, 2, 3]. + * + * High-level spec will capture only 3 points: + * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - field 'a' access (corresponds to '2' in low-level spec); + * - array element #3 access (corresponds to '3' in low-level spec). + * + */ + static int bpf_core_spec_parse(const struct btf *btf, + __u32 type_id, + const char *spec_str, + struct bpf_core_spec *spec) + { + int access_idx, parsed_len, i; + const struct btf_type *t; + const char *name; + __u32 id; + __s64 sz; + + if (str_is_empty(spec_str) || *spec_str == ':') + return -EINVAL; + + memset(spec, 0, sizeof(*spec)); + spec->btf = btf; + + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ + while (*spec_str) { + if (*spec_str == ':') + ++spec_str; + if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) + return -EINVAL; + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + spec_str += parsed_len; + spec->raw_spec[spec->raw_len++] = access_idx; + } + + if (spec->raw_len == 0) + return -EINVAL; + + /* first spec value is always reloc type array index */ + t = skip_mods_and_typedefs(btf, type_id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[0]; + spec->spec[0].type_id = id; + spec->spec[0].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->offset = access_idx * sz; + + for (i = 1; i < spec->raw_len; i++) { + t = skip_mods_and_typedefs(btf, id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[i]; + + if (btf_is_composite(t)) { + const struct btf_member *m; + __u32 offset; + + if (access_idx >= btf_vlen(t)) + return -EINVAL; + if (btf_member_bitfield_size(t, access_idx)) + return -EINVAL; + + offset = btf_member_bit_offset(t, access_idx); + if (offset % 8) + return -EINVAL; + spec->offset += offset / 8; + + m = btf_members(t) + access_idx; + if (m->name_off) { + name = btf__name_by_offset(btf, m->name_off); + if (str_is_empty(name)) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->spec[spec->len].name = name; + spec->len++; + } + + id = m->type; + } else if (btf_is_array(t)) { + const struct btf_array *a = btf_array(t); + + t = skip_mods_and_typedefs(btf, a->type, &id); + if (!t || access_idx >= a->nelems) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->offset += access_idx * sz; + } else { + pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", + type_id, spec_str, i, id, btf_kind(t)); + return -EINVAL; + } + } + + return 0; + } + + static bool bpf_core_is_flavor_sep(const char *s) + { + /* check X___Y name pattern, where X and Y are not underscores */ + return s[0] != '_' && /* X */ + s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ + s[4] != '_'; /* Y */ + } + + /* Given 'some_struct_name___with_flavor' return the length of a name prefix + * before last triple underscore. Struct name part after last triple + * underscore is ignored by BPF CO-RE relocation during relocation matching. + */ + static size_t bpf_core_essential_name_len(const char *name) + { + size_t n = strlen(name); + int i; + + for (i = n - 5; i >= 0; i--) { + if (bpf_core_is_flavor_sep(name + i)) + return i + 1; + } + return n; + } + + /* dynamically sized list of type IDs */ + struct ids_vec { + __u32 *data; + int len; + }; + + static void bpf_core_free_cands(struct ids_vec *cand_ids) + { + free(cand_ids->data); + free(cand_ids); + } + + static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, + __u32 local_type_id, + const struct btf *targ_btf) + { + size_t local_essent_len, targ_essent_len; + const char *local_name, *targ_name; + const struct btf_type *t; + struct ids_vec *cand_ids; + __u32 *new_ids; + int i, err, n; + + t = btf__type_by_id(local_btf, local_type_id); + if (!t) + return ERR_PTR(-EINVAL); + + local_name = btf__name_by_offset(local_btf, t->name_off); + if (str_is_empty(local_name)) + return ERR_PTR(-EINVAL); + local_essent_len = bpf_core_essential_name_len(local_name); + + cand_ids = calloc(1, sizeof(*cand_ids)); + if (!cand_ids) + return ERR_PTR(-ENOMEM); + + n = btf__get_nr_types(targ_btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(targ_btf, i); + targ_name = btf__name_by_offset(targ_btf, t->name_off); + if (str_is_empty(targ_name)) + continue; + + targ_essent_len = bpf_core_essential_name_len(targ_name); + if (targ_essent_len != local_essent_len) + continue; + + if (strncmp(local_name, targ_name, local_essent_len) == 0) { + pr_debug("[%d] %s: found candidate [%d] %s\n", + local_type_id, local_name, i, targ_name); + new_ids = realloc(cand_ids->data, cand_ids->len + 1); + if (!new_ids) { + err = -ENOMEM; + goto err_out; + } + cand_ids->data = new_ids; + cand_ids->data[cand_ids->len++] = i; + } + } + return cand_ids; + err_out: + bpf_core_free_cands(cand_ids); + return ERR_PTR(err); + } + + /* Check two types for compatibility, skipping const/volatile/restrict and + * typedefs, to ensure we are relocating offset to the compatible entities: + * - any two STRUCTs/UNIONs are compatible and can be mixed; + * - any two FWDs are compatible; + * - any two PTRs are always compatible; + * - for ENUMs, check sizes, names are ignored; + * - for INT, size and bitness should match, signedness is ignored; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - everything else shouldn't be ever a target of relocation. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ + static int bpf_core_fields_are_compat(const struct btf *local_btf, + __u32 local_id, + const struct btf *targ_btf, + __u32 targ_id) + { + const struct btf_type *local_type, *targ_type; + + recur: + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (btf_is_composite(local_type) && btf_is_composite(targ_type)) + return 1; + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_FWD: + case BTF_KIND_PTR: + return 1; + case BTF_KIND_ENUM: + return local_type->size == targ_type->size; + case BTF_KIND_INT: + return btf_int_offset(local_type) == 0 && + btf_int_offset(targ_type) == 0 && + local_type->size == targ_type->size && + btf_int_bits(local_type) == btf_int_bits(targ_type); + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + default: + pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); + return 0; + } + } + + /* + * Given single high-level named field accessor in local type, find + * corresponding high-level accessor for a target type. Along the way, + * maintain low-level spec for target as well. Also keep updating target + * offset. + * + * Searching is performed through recursive exhaustive enumeration of all + * fields of a struct/union. If there are any anonymous (embedded) + * structs/unions, they are recursively searched as well. If field with + * desired name is found, check compatibility between local and target types, + * before returning result. + * + * 1 is returned, if field is found. + * 0 is returned if no compatible field is found. + * <0 is returned on error. + */ + static int bpf_core_match_member(const struct btf *local_btf, + const struct bpf_core_accessor *local_acc, + const struct btf *targ_btf, + __u32 targ_id, + struct bpf_core_spec *spec, + __u32 *next_targ_id) + { + const struct btf_type *local_type, *targ_type; + const struct btf_member *local_member, *m; + const char *local_name, *targ_name; + __u32 local_id; + int i, n, found; + + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!targ_type) + return -EINVAL; + if (!btf_is_composite(targ_type)) + return 0; + + local_id = local_acc->type_id; + local_type = btf__type_by_id(local_btf, local_id); + local_member = btf_members(local_type) + local_acc->idx; + local_name = btf__name_by_offset(local_btf, local_member->name_off); + + n = btf_vlen(targ_type); + m = btf_members(targ_type); + for (i = 0; i < n; i++, m++) { + __u32 offset; + + /* bitfield relocations not supported */ + if (btf_member_bitfield_size(targ_type, i)) + continue; + offset = btf_member_bit_offset(targ_type, i); + if (offset % 8) + continue; + + /* too deep struct/union/array nesting */ + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + /* speculate this member will be the good one */ + spec->offset += offset / 8; + spec->raw_spec[spec->raw_len++] = i; + + targ_name = btf__name_by_offset(targ_btf, m->name_off); + if (str_is_empty(targ_name)) { + /* embedded struct/union, we need to go deeper */ + found = bpf_core_match_member(local_btf, local_acc, + targ_btf, m->type, + spec, next_targ_id); + if (found) /* either found or error */ + return found; + } else if (strcmp(local_name, targ_name) == 0) { + /* matching named field */ + struct bpf_core_accessor *targ_acc; + + targ_acc = &spec->spec[spec->len++]; + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + + *next_targ_id = m->type; + found = bpf_core_fields_are_compat(local_btf, + local_member->type, + targ_btf, m->type); + if (!found) + spec->len--; /* pop accessor */ + return found; + } + /* member turned out not to be what we looked for */ + spec->offset -= offset / 8; + spec->raw_len--; + } + + return 0; + } + + /* + * Try to match local spec to a target type and, if successful, produce full + * target spec (high-level, low-level + offset). + */ + static int bpf_core_spec_match(struct bpf_core_spec *local_spec, + const struct btf *targ_btf, __u32 targ_id, + struct bpf_core_spec *targ_spec) + { + const struct btf_type *targ_type; + const struct bpf_core_accessor *local_acc; + struct bpf_core_accessor *targ_acc; + int i, sz, matched; + + memset(targ_spec, 0, sizeof(*targ_spec)); + targ_spec->btf = targ_btf; + + local_acc = &local_spec->spec[0]; + targ_acc = &targ_spec->spec[0]; + + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, + &targ_id); + if (!targ_type) + return -EINVAL; + + if (local_acc->name) { + matched = bpf_core_match_member(local_spec->btf, + local_acc, + targ_btf, targ_id, + targ_spec, &targ_id); + if (matched <= 0) + return matched; + } else { + /* for i=0, targ_id is already treated as array element + * type (because it's the original struct), for others + * we should find array element type first + */ + if (i > 0) { + const struct btf_array *a; + + if (!btf_is_array(targ_type)) + return 0; + + a = btf_array(targ_type); + if (local_acc->idx >= a->nelems) + return 0; + if (!skip_mods_and_typedefs(targ_btf, a->type, + &targ_id)) + return -EINVAL; + } + + /* too deep struct/union/array nesting */ + if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + targ_acc->type_id = targ_id; + targ_acc->idx = local_acc->idx; + targ_acc->name = NULL; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + + sz = btf__resolve_size(targ_btf, targ_id); + if (sz < 0) + return sz; + targ_spec->offset += local_acc->idx * sz; + } + } + + return 1; + } + + /* + * Patch relocatable BPF instruction. + * Expected insn->imm value is provided for validation, as well as the new + * relocated value. + * + * Currently three kinds of BPF instructions are supported: + * 1. rX = <imm> (assignment with immediate operand); + * 2. rX += <imm> (arithmetic operations with immediate operand); + * 3. *(rX) = <imm> (indirect memory assignment with immediate operand). + * + * If actual insn->imm value is wrong, bail out. + */ + static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, + __u32 orig_off, __u32 new_off) + { + struct bpf_insn *insn; + int insn_idx; + __u8 class; + + if (insn_off % sizeof(struct bpf_insn)) + return -EINVAL; + insn_idx = insn_off / sizeof(struct bpf_insn); + + insn = &prog->insns[insn_idx]; + class = BPF_CLASS(insn->code); + + if (class == BPF_ALU || class == BPF_ALU64) { + if (BPF_SRC(insn->code) != BPF_K) + return -EINVAL; + if (insn->imm != orig_off) + return -EINVAL; + insn->imm = new_off; + pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n", + bpf_program__title(prog, false), + insn_idx, orig_off, new_off); + } else { + pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), + insn_idx, insn->code, insn->src_reg, insn->dst_reg, + insn->off, insn->imm); + return -EINVAL; + } + return 0; + } + + static struct btf *btf_load_raw(const char *path) + { + struct btf *btf; + size_t read_cnt; + struct stat st; + void *data; + FILE *f; + + if (stat(path, &st)) + return ERR_PTR(-errno); + + data = malloc(st.st_size); + if (!data) + return ERR_PTR(-ENOMEM); + + f = fopen(path, "rb"); + if (!f) { + btf = ERR_PTR(-errno); + goto cleanup; + } + + read_cnt = fread(data, 1, st.st_size, f); + fclose(f); + if (read_cnt < st.st_size) { + btf = ERR_PTR(-EBADF); + goto cleanup; + } + + btf = btf__new(data, read_cnt); + + cleanup: + free(data); + return btf; + } + + /* + * Probe few well-known locations for vmlinux kernel image and try to load BTF + * data out of it to use for target BTF. + */ + static struct btf *bpf_core_find_kernel_btf(void) + { + struct { + const char *path_fmt; + bool raw_btf; + } locations[] = { + /* try canonical vmlinux BTF through sysfs first */ + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, + /* fall back to trying to find vmlinux ELF on disk otherwise */ + { "/boot/vmlinux-%1$s" }, + { "/lib/modules/%1$s/vmlinux-%1$s" }, + { "/lib/modules/%1$s/build/vmlinux" }, + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, + { "/usr/lib/debug/boot/vmlinux-%1$s" }, + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + }; + char path[PATH_MAX + 1]; + struct utsname buf; + struct btf *btf; + int i; + + uname(&buf); + + for (i = 0; i < ARRAY_SIZE(locations); i++) { + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + + if (access(path, R_OK)) + continue; + + if (locations[i].raw_btf) + btf = btf_load_raw(path); + else + btf = btf__parse_elf(path, NULL); + + pr_debug("loading kernel BTF '%s': %ld\n", + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); + if (IS_ERR(btf)) + continue; + + return btf; + } + + pr_warning("failed to find valid kernel BTF\n"); + return ERR_PTR(-ESRCH); + } + + /* Output spec definition in the format: + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b + */ + static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) + { + const struct btf_type *t; + const char *s; + __u32 type_id; + int i; + + type_id = spec->spec[0].type_id; + t = btf__type_by_id(spec->btf, type_id); + s = btf__name_by_offset(spec->btf, t->name_off); + libbpf_print(level, "[%u] %s + ", type_id, s); + + for (i = 0; i < spec->raw_len; i++) + libbpf_print(level, "%d%s", spec->raw_spec[i], + i == spec->raw_len - 1 ? " => " : ":"); + + libbpf_print(level, "%u @ &x", spec->offset); + + for (i = 0; i < spec->len; i++) { + if (spec->spec[i].name) + libbpf_print(level, ".%s", spec->spec[i].name); + else + libbpf_print(level, "[%u]", spec->spec[i].idx); + } + + } + + static size_t bpf_core_hash_fn(const void *key, void *ctx) + { + return (size_t)key; + } + + static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) + { + return k1 == k2; + } + + static void *u32_as_hash_key(__u32 x) + { + return (void *)(uintptr_t)x; + } + + /* + * CO-RE relocate single instruction. + * + * The outline and important points of the algorithm: + * 1. For given local type, find corresponding candidate target types. + * Candidate type is a type with the same "essential" name, ignoring + * everything after last triple underscore (___). E.g., `sample`, + * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates + * for each other. Names with triple underscore are referred to as + * "flavors" and are useful, among other things, to allow to + * specify/support incompatible variations of the same kernel struct, which + * might differ between different kernel versions and/or build + * configurations. + * + * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C + * converter, when deduplicated BTF of a kernel still contains more than + * one different types with the same name. In that case, ___2, ___3, etc + * are appended starting from second name conflict. But start flavors are + * also useful to be defined "locally", in BPF program, to extract same + * data from incompatible changes between different kernel + * versions/configurations. For instance, to handle field renames between + * kernel versions, one can use two flavors of the struct name with the + * same common name and use conditional relocations to extract that field, + * depending on target kernel version. + * 2. For each candidate type, try to match local specification to this + * candidate target type. Matching involves finding corresponding + * high-level spec accessors, meaning that all named fields should match, + * as well as all array accesses should be within the actual bounds. Also, + * types should be compatible (see bpf_core_fields_are_compat for details). + * 3. It is supported and expected that there might be multiple flavors + * matching the spec. As long as all the specs resolve to the same set of + * offsets across all candidates, there is not error. If there is any + * ambiguity, CO-RE relocation will fail. This is necessary to accomodate + * imprefection of BTF deduplication, which can cause slight duplication of + * the same BTF type, if some directly or indirectly referenced (by + * pointer) type gets resolved to different actual types in different + * object files. If such situation occurs, deduplicated BTF will end up + * with two (or more) structurally identical types, which differ only in + * types they refer to through pointer. This should be OK in most cases and + * is not an error. + * 4. Candidate types search is performed by linearly scanning through all + * types in target BTF. It is anticipated that this is overall more + * efficient memory-wise and not significantly worse (if not better) + * CPU-wise compared to prebuilding a map from all local type names to + * a list of candidate type names. It's also sped up by caching resolved + * list of matching candidates per each local "root" type ID, that has at + * least one bpf_offset_reloc associated with it. This list is shared + * between multiple relocations for the same type ID and is updated as some + * of the candidates are pruned due to structural incompatibility. + */ + static int bpf_core_reloc_offset(struct bpf_program *prog, + const struct bpf_offset_reloc *relo, + int relo_idx, + const struct btf *local_btf, + const struct btf *targ_btf, + struct hashmap *cand_cache) + { + const char *prog_name = bpf_program__title(prog, false); + struct bpf_core_spec local_spec, cand_spec, targ_spec; + const void *type_key = u32_as_hash_key(relo->type_id); + const struct btf_type *local_type, *cand_type; + const char *local_name, *cand_name; + struct ids_vec *cand_ids; + __u32 local_id, cand_id; + const char *spec_str; + int i, j, err; + + local_id = relo->type_id; + local_type = btf__type_by_id(local_btf, local_id); + if (!local_type) + return -EINVAL; + + local_name = btf__name_by_offset(local_btf, local_type->name_off); + if (str_is_empty(local_name)) + return -EINVAL; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + if (str_is_empty(spec_str)) + return -EINVAL; + + err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); + if (err) { + pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", + prog_name, relo_idx, local_id, local_name, spec_str, + err); + return -EINVAL; + } + + pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx); + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { + cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); + if (IS_ERR(cand_ids)) { + pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", + prog_name, relo_idx, local_id, local_name, + PTR_ERR(cand_ids)); + return PTR_ERR(cand_ids); + } + err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); + if (err) { + bpf_core_free_cands(cand_ids); + return err; + } + } + + for (i = 0, j = 0; i < cand_ids->len; i++) { + cand_id = cand_ids->data[i]; + cand_type = btf__type_by_id(targ_btf, cand_id); + cand_name = btf__name_by_offset(targ_btf, cand_type->name_off); + + err = bpf_core_spec_match(&local_spec, targ_btf, + cand_id, &cand_spec); + pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ", + prog_name, relo_idx, i, cand_name); + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + libbpf_print(LIBBPF_DEBUG, ": %d\n", err); + if (err < 0) { + pr_warning("prog '%s': relo #%d: matching error: %d\n", + prog_name, relo_idx, err); + return err; + } + if (err == 0) + continue; + + if (j == 0) { + targ_spec = cand_spec; + } else if (cand_spec.offset != targ_spec.offset) { + /* if there are many candidates, they should all + * resolve to the same offset + */ + pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n", + prog_name, relo_idx, cand_spec.offset, + targ_spec.offset); + return -EINVAL; + } + + cand_ids->data[j++] = cand_spec.spec[0].type_id; + } + + cand_ids->len = j; + if (cand_ids->len == 0) { + pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); + return -ESRCH; + } + + err = bpf_core_reloc_insn(prog, relo->insn_off, + local_spec.offset, targ_spec.offset); + if (err) { + pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", + prog_name, relo_idx, relo->insn_off, err); + return -EINVAL; + } + + return 0; + } + + static int + bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) + { + const struct btf_ext_info_sec *sec; + const struct bpf_offset_reloc *rec; + const struct btf_ext_info *seg; + struct hashmap_entry *entry; + struct hashmap *cand_cache = NULL; + struct bpf_program *prog; + struct btf *targ_btf; + const char *sec_name; + int i, err = 0; + + if (targ_btf_path) + targ_btf = btf__parse_elf(targ_btf_path, NULL); + else + targ_btf = bpf_core_find_kernel_btf(); + if (IS_ERR(targ_btf)) { + pr_warning("failed to get target BTF: %ld\n", + PTR_ERR(targ_btf)); + return PTR_ERR(targ_btf); + } + + cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); + if (IS_ERR(cand_cache)) { + err = PTR_ERR(cand_cache); + goto out; + } + + seg = &obj->btf_ext->offset_reloc_info; + for_each_btf_ext_sec(seg, sec) { + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) { + err = -EINVAL; + goto out; + } + prog = bpf_object__find_program_by_title(obj, sec_name); + if (!prog) { + pr_warning("failed to find program '%s' for CO-RE offset relocation\n", + sec_name); + err = -EINVAL; + goto out; + } + + pr_debug("prog '%s': performing %d CO-RE offset relocs\n", + sec_name, sec->num_info); + + for_each_btf_ext_rec(seg, sec, i, rec) { + err = bpf_core_reloc_offset(prog, rec, i, obj->btf, + targ_btf, cand_cache); + if (err) { + pr_warning("prog '%s': relo #%d: failed to relocate: %d\n", + sec_name, i, err); + goto out; + } + } + } + + out: + btf__free(targ_btf); + if (!IS_ERR_OR_NULL(cand_cache)) { + hashmap__for_each_entry(cand_cache, entry, i) { + bpf_core_free_cands(entry->value); + } + hashmap__free(cand_cache); + } + return err; + } + + static int + bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) + { + int err = 0; + + if (obj->btf_ext->offset_reloc_info.len) + err = bpf_core_reloc_offsets(obj, targ_btf_path); + + return err; + } + static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) @@@ -2395,14 -3297,21 +3291,21 @@@ bpf_program__relocate(struct bpf_progra return 0; }
- static int - bpf_object__relocate(struct bpf_object *obj) + bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { struct bpf_program *prog; size_t i; int err;
+ if (obj->btf_ext) { + err = bpf_object__relocate_core(obj, targ_btf_path); + if (err) { + pr_warning("failed to perform CO-RE relocations: %d\n", + err); + return err; + } + } for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i];
@@@ -2457,7 -3366,7 +3360,7 @@@ load_program(struct bpf_program *prog, char *cp, errmsg[STRERR_BUFSIZE]; int log_buf_size = BPF_LOG_BUF_SIZE; char *log_buf; - int ret; + int btf_fd, ret;
if (!insns || !insns_cnt) return -EINVAL; @@@ -2472,12 -3381,7 +3375,12 @@@ load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ + if (prog->obj->btf_ext) + btf_fd = bpf_object__btf_fd(prog->obj); + else + btf_fd = -1; + load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; @@@ -2808,7 -3712,7 +3711,7 @@@ int bpf_object__load_xattr(struct bpf_o obj->loaded = true;
CHECK_ERR(bpf_object__create_maps(obj), err, out); - CHECK_ERR(bpf_object__relocate(obj), err, out); + CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
return 0; @@@ -4999,15 -5903,13 +5902,15 @@@ int libbpf_num_possible_cpus(void static const char *fcpu = "/sys/devices/system/cpu/possible"; int len = 0, n = 0, il = 0, ir = 0; unsigned int start = 0, end = 0; + int tmp_cpus = 0; static int cpus; char buf[128]; int error = 0; int fd = -1;
- if (cpus > 0) - return cpus; + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus;
fd = open(fcpu, O_RDONLY); if (fd < 0) { @@@ -5030,7 -5932,7 +5933,7 @@@ } buf[len] = '\0';
- for (ir = 0, cpus = 0; ir <= len; ir++) { + for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ if (buf[ir] == ',' || buf[ir] == '\0') { buf[ir] = '\0'; @@@ -5042,15 -5944,13 +5945,15 @@@ } else if (n == 1) { end = start; } - cpus += end - start + 1; + tmp_cpus += end - start + 1; il = ir + 1; } } - if (cpus <= 0) { - pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu); + if (tmp_cpus <= 0) { + pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); return -EINVAL; } - return cpus; + + WRITE_ONCE(cpus, tmp_cpus); + return tmp_cpus; }