The following commit has been merged in the master branch: commit e945bfb6aacab25ae884985bdff451ef726f401d Merge: 6a28e78da36db5c81df425524a2ec220795cc7ea 459c5fb44379335c966d98c7fdc4e8ebe2d2b93f Author: Stephen Rothwell sfr@canb.auug.org.au Date: Fri Aug 16 12:40:36 2019 +1000
Merge remote-tracking branch 'net-next/master'
# Conflicts: # drivers/net/ethernet/mellanox/mlx5/core/en_tc.c # scripts/link-vmlinux.sh
diff --combined MAINTAINERS index 985b1b41ee0e,e352550a6895..466ac1920ac5 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -517,6 -517,14 +517,6 @@@ W: http://ez.analog.com/community/linux S: Supported F: drivers/video/backlight/adp8860_bl.c
-ADS1015 HARDWARE MONITOR DRIVER -M: Dirk Eibach eibach@gdsys.de -L: linux-hwmon@vger.kernel.org -S: Maintained -F: Documentation/hwmon/ads1015.rst -F: drivers/hwmon/ads1015.c -F: include/linux/platform_data/ads1015.h - ADT746X FAN DRIVER M: Colin Leroy colin@colino.net S: Maintained @@@ -658,7 -666,7 +658,7 @@@ ALI1563 I2C DRIVE M: Rudolf Marek r.marek@assembler.cz L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-ali1563 +F: Documentation/i2c/busses/i2c-ali1563.rst F: drivers/i2c/busses/i2c-ali1563.c
ALLEGRO DVT VIDEO IP CORE DRIVER @@@ -668,13 -676,6 +668,13 @@@ L: linux-media@vger.kernel.or S: Maintained F: drivers/staging/media/allegro-dvt/
+ALLWINNER CPUFREQ DRIVER +M: Yangtao Li tiny.windzz@gmail.com +L: linux-pm@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/opp/sun50i-nvmem-cpufreq.txt +F: drivers/cpufreq/sun50i-cpufreq-nvmem.c + ALLWINNER SECURITY SYSTEM M: Corentin Labbe clabbe.montjoie@gmail.com L: linux-crypto@vger.kernel.org @@@ -682,7 -683,7 +682,7 @@@ S: Maintaine F: drivers/crypto/sunxi-ss/
ALLWINNER VPU DRIVER -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org M: Paul Kocialkowski paul.kocialkowski@bootlin.com L: linux-media@vger.kernel.org S: Maintained @@@ -1407,7 -1408,7 +1407,7 @@@ S: Maintaine F: drivers/clk/sunxi/
ARM/Allwinner sunXi SoC support -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org M: Chen-Yu Tsai wens@csie.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained @@@ -1581,8 -1582,8 +1581,8 @@@ R: Suzuki K Poulose <suzuki.poulose@arm L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: drivers/hwtracing/coresight/* -F: Documentation/trace/coresight.txt -F: Documentation/trace/coresight-cpu-debug.txt +F: Documentation/trace/coresight.rst +F: Documentation/trace/coresight-cpu-debug.rst F: Documentation/devicetree/bindings/arm/coresight.txt F: Documentation/devicetree/bindings/arm/coresight-cpu-debug.txt F: Documentation/ABI/testing/sysfs-bus-coresight-devices-* @@@ -1625,18 -1626,6 +1625,18 @@@ F: drivers/clocksource/timer-atlas7. N: [^a-z]sirf X: drivers/gnss
+ARM/CZ.NIC TURRIS MOX SUPPORT +M: Marek Behun marek.behun@nic.cz +W: http://mox.turris.cz +S: Maintained +F: Documentation/ABI/testing/debugfs-moxtet +F: Documentation/ABI/testing/sysfs-bus-moxtet-devices +F: Documentation/devicetree/bindings/bus/moxtet.txt +F: Documentation/devicetree/bindings/gpio/gpio-moxtet.txt +F: include/linux/moxtet.h +F: drivers/bus/moxtet.c +F: drivers/gpio/gpio-moxtet.c + ARM/EBSA110 MACHINE SUPPORT M: Russell King linux@armlinux.org.uk L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -1760,11 -1749,20 +1760,11 @@@ L: linux-arm-kernel@lists.infradead.or S: Maintained F: arch/arm/mach-pxa/colibri-pxa270-income.c
-ARM/INTEL IOP13XX ARM ARCHITECTURE -M: Lennert Buytenhek kernel@wantstofly.org -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained - ARM/INTEL IOP32X ARM ARCHITECTURE M: Lennert Buytenhek kernel@wantstofly.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained
-ARM/INTEL IOP33X ARM ARCHITECTURE -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Orphan - ARM/INTEL IQ81342EX MACHINE SUPPORT M: Lennert Buytenhek kernel@wantstofly.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -1923,6 -1921,12 +1923,6 @@@ S: Maintaine F: drivers/phy/mediatek/ F: Documentation/devicetree/bindings/phy/phy-mtk-*
-ARM/MICREL KS8695 ARCHITECTURE -M: Greg Ungerer gerg@uclinux.org -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -F: arch/arm/mach-ks8695/ -S: Odd Fixes - ARM/Microchip (AT91) SoC support M: Nicolas Ferre nicolas.ferre@microchip.com M: Alexandre Belloni alexandre.belloni@bootlin.com @@@ -1964,7 -1968,6 +1964,7 @@@ F: Documentation/devicetree/bindings/i2 F: arch/arm/mach-nomadik/ F: arch/arm/mach-u300/ F: arch/arm/mach-ux500/ +F: drivers/soc/ux500/ F: arch/arm/boot/dts/ste-* F: drivers/clk/clk-nomadik.c F: drivers/clk/clk-u300.c @@@ -2008,6 -2011,22 +2008,6 @@@ F: drivers/*/*npcm F: Documentation/devicetree/bindings/*/*npcm* F: Documentation/devicetree/bindings/*/*/*npcm*
-ARM/NUVOTON W90X900 ARM ARCHITECTURE -M: Wan ZongShun mcuos.com@gmail.com -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -W: http://www.mcuos.com -S: Maintained -F: arch/arm/mach-w90x900/ -F: drivers/input/keyboard/w90p910_keypad.c -F: drivers/input/touchscreen/w90p910_ts.c -F: drivers/watchdog/nuc900_wdt.c -F: drivers/net/ethernet/nuvoton/w90p910_ether.c -F: drivers/mtd/nand/raw/nuc900_nand.c -F: drivers/rtc/rtc-nuc900.c -F: drivers/spi/spi-nuc900.c -F: drivers/usb/host/ehci-w90x900.c -F: drivers/video/fbdev/nuc900fb.c - ARM/OPENMOKO NEO FREERUNNER (GTA02) MACHINE SUPPORT L: openmoko-kernel@lists.openmoko.org (subscribers-only) W: http://wiki.openmoko.org/wiki/Neo_FreeRunner @@@ -3558,7 -3577,7 +3558,7 @@@ F: Documentation/filesystems/caching/ca F: fs/cachefiles/
CADENCE MIPI-CSI2 BRIDGES -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org L: linux-media@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/media/cdns,*.txt @@@ -3616,9 -3635,12 +3616,12 @@@ S: Maintaine F: Documentation/devicetree/bindings/net/can/ F: drivers/net/can/ F: include/linux/can/dev.h + F: include/linux/can/led.h + F: include/linux/can/rx-offload.h F: include/linux/can/platform/ F: include/uapi/linux/can/error.h F: include/uapi/linux/can/netlink.h + F: include/uapi/linux/can/vxcan.h
CAN NETWORK LAYER M: Oliver Hartkopp socketcan@hartkopp.net @@@ -3631,6 -3653,8 +3634,8 @@@ S: Maintaine F: Documentation/networking/can.rst F: net/can/ F: include/linux/can/core.h + F: include/linux/can/skb.h + F: include/net/netns/can.h F: include/uapi/linux/can.h F: include/uapi/linux/can/bcm.h F: include/uapi/linux/can/raw.h @@@ -4084,7 -4108,7 +4089,7 @@@ L: samba-technical@lists.samba.org (mod W: http://linux-cifs.samba.org/ T: git git://git.samba.org/sfrench/cifs-2.6.git S: Supported -F: Documentation/filesystems/cifs/ +F: Documentation/admin-guide/cifs/ F: fs/cifs/
COMPACTPCI HOTPLUG CORE @@@ -4271,14 -4295,6 +4276,14 @@@ S: Supporte F: drivers/cpuidle/cpuidle-exynos.c F: arch/arm/mach-exynos/pm.c
+CPUIDLE DRIVER - ARM PSCI +M: Lorenzo Pieralisi lorenzo.pieralisi@arm.com +M: Sudeep Holla sudeep.holla@arm.com +L: linux-pm@vger.kernel.org +L: linux-arm-kernel@lists.infradead.org +S: Supported +F: drivers/cpuidle/cpuidle-psci.c + CPU IDLE TIME MANAGEMENT FRAMEWORK M: "Rafael J. Wysocki" rjw@rjwysocki.net M: Daniel Lezcano daniel.lezcano@linaro.org @@@ -4940,9 -4956,7 +4945,9 @@@ M: Jonathan Corbet <corbet@lwn.net L: linux-doc@vger.kernel.org S: Maintained F: Documentation/ +F: scripts/documentation-file-ref-check F: scripts/kernel-doc +F: scripts/sphinx-pre-install X: Documentation/ABI/ X: Documentation/firmware-guide/acpi/ X: Documentation/devicetree/ @@@ -4958,14 -4972,6 +4963,14 @@@ L: linux-doc@vger.kernel.or S: Maintained F: Documentation/translations/it_IT
+DOCUMENTATION SCRIPTS +M: Mauro Carvalho Chehab mchehab@kernel.org +L: linux-doc@vger.kernel.org +S: Maintained +F: scripts/documentation-file-ref-check +F: scripts/sphinx-pre-install +F: Documentation/sphinx/parse-headers.pl + DONGWOON DW9714 LENS VOICE COIL DRIVER M: Sakari Ailus sakari.ailus@linux.intel.com L: linux-media@vger.kernel.org @@@ -5294,7 -5300,7 +5299,7 @@@ F: include/linux/vga
DRM DRIVERS AND MISC GPU PATCHES M: Maarten Lankhorst maarten.lankhorst@linux.intel.com -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org M: Sean Paul sean@poorly.run W: https://01.org/linuxgraphics/gfx-docs/maintainer-tools/drm-misc.html S: Maintained @@@ -5307,7 -5313,7 +5312,7 @@@ F: include/uapi/drm/drm F: include/linux/vga*
DRM DRIVERS FOR ALLWINNER A10 -M: Maxime Ripard maxime.ripard@bootlin.com +M: Maxime Ripard mripard@kernel.org L: dri-devel@lists.freedesktop.org S: Supported F: drivers/gpu/drm/sun4i/ @@@ -6064,7 -6070,7 +6069,7 @@@ M: Florian Fainelli <f.fainelli@gmail.c M: Heiner Kallweit hkallweit1@gmail.com L: netdev@vger.kernel.org S: Maintained -F: Documentation/ABI/testing/sysfs-bus-mdio +F: Documentation/ABI/testing/sysfs-class-net-phydev F: Documentation/devicetree/bindings/net/ethernet-phy.yaml F: Documentation/devicetree/bindings/net/mdio* F: Documentation/networking/phy.rst @@@ -6320,7 -6326,7 +6325,7 @@@ FLEXTIMER FTM-QUADDEC DRIVE M: Patrick Havelange patrick.havelange@essensium.com L: linux-iio@vger.kernel.org S: Maintained -F: Documentation/ABI/testing/sysfs-bus-counter-ftm-quadddec +F: Documentation/ABI/testing/sysfs-bus-counter-ftm-quaddec F: Documentation/devicetree/bindings/counter/ftm-quaddec.txt F: drivers/counter/ftm-quaddec.c
@@@ -6343,7 -6349,7 +6348,7 @@@ FPGA MANAGER FRAMEWOR M: Moritz Fischer mdf@kernel.org L: linux-fpga@vger.kernel.org S: Maintained -T: git git://git.kernel.org/pub/scm/linux/kernel/git/atull/linux-fpga.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/mdf/linux-fpga.git Q: http://patchwork.kernel.org/project/linux-fpga/list/ F: Documentation/fpga/ F: Documentation/driver-api/fpga/ @@@ -6376,7 -6382,7 +6381,7 @@@ FRAMEBUFFER LAYE M: Bartlomiej Zolnierkiewicz b.zolnierkie@samsung.com L: dri-devel@lists.freedesktop.org L: linux-fbdev@vger.kernel.org -T: git git://github.com/bzolnier/linux.git +T: git git://anongit.freedesktop.org/drm/drm-misc Q: http://patchwork.kernel.org/project/linux-fbdev/list/ S: Maintained F: Documentation/fb/ @@@ -6602,7 -6608,6 +6607,7 @@@ T: git git://git.kernel.org/pub/scm/fs/ S: Supported F: fs/crypto/ F: include/linux/fscrypt*.h +F: include/uapi/linux/fscrypt.h F: Documentation/filesystems/fscrypt.rst
FSI SUBSYSTEM @@@ -6634,18 -6639,6 +6639,18 @@@ S: Maintaine F: fs/notify/ F: include/linux/fsnotify*.h
+FSVERITY: READ-ONLY FILE-BASED AUTHENTICITY PROTECTION +M: Eric Biggers ebiggers@kernel.org +M: Theodore Y. Ts'o tytso@mit.edu +L: linux-fscrypt@vger.kernel.org +Q: https://patchwork.kernel.org/project/linux-fscrypt/list/ +T: git git://git.kernel.org/pub/scm/fs/fscrypt/fscrypt.git fsverity +S: Supported +F: fs/verity/ +F: include/linux/fsverity.h +F: include/uapi/linux/fsverity.h +F: Documentation/filesystems/fsverity.rst + FUJITSU LAPTOP EXTRAS M: Jonathan Woithe jwoithe@just42.net L: platform-driver-x86@vger.kernel.org @@@ -6736,13 -6729,6 +6741,13 @@@ W: https://linuxtv.or S: Maintained F: drivers/media/radio/radio-gemtek*
+GENERIC ARCHITECTURE TOPOLOGY +M: Sudeep Holla sudeep.holla@arm.com +L: linux-kernel@vger.kernel.org +S: Maintained +F: drivers/base/arch_topology.c +F: include/linux/arch_topology.h + GENERIC GPIO I2C DRIVER M: Wolfram Sang wsa+renesas@sang-engineering.com S: Supported @@@ -6755,7 -6741,7 +6760,7 @@@ L: linux-i2c@vger.kernel.or S: Supported F: drivers/i2c/muxes/i2c-mux-gpio.c F: include/linux/platform_data/i2c-mux-gpio.h -F: Documentation/i2c/muxes/i2c-mux-gpio +F: Documentation/i2c/muxes/i2c-mux-gpio.rst
GENERIC HDLC (WAN) DRIVERS M: Krzysztof Halasa khc@pm.waw.pl @@@ -7504,14 -7490,14 +7509,14 @@@ I2C CONTROLLER DRIVER FOR NVIDIA GP M: Ajay Gupta ajayg@nvidia.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-nvidia-gpu +F: Documentation/i2c/busses/i2c-nvidia-gpu.rst F: drivers/i2c/busses/i2c-nvidia-gpu.c
I2C MUXES M: Peter Rosin peda@axentia.se L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/i2c-topology +F: Documentation/i2c/i2c-topology.rst F: Documentation/i2c/muxes/ F: Documentation/devicetree/bindings/i2c/i2c-mux* F: Documentation/devicetree/bindings/i2c/i2c-arb* @@@ -7524,15 -7510,15 +7529,15 @@@ I2C MV64XXX MARVELL AND ALLWINNER DRIVE M: Gregory CLEMENT gregory.clement@bootlin.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/i2c/i2c-mv64xxx.txt +F: Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml F: drivers/i2c/busses/i2c-mv64xxx.c
I2C OVER PARALLEL PORT M: Jean Delvare jdelvare@suse.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-parport -F: Documentation/i2c/busses/i2c-parport-light +F: Documentation/i2c/busses/i2c-parport.rst +F: Documentation/i2c/busses/i2c-parport-light.rst F: drivers/i2c/busses/i2c-parport.c F: drivers/i2c/busses/i2c-parport-light.c
@@@ -7566,7 -7552,7 +7571,7 @@@ I2C-TAOS-EVM DRIVE M: Jean Delvare jdelvare@suse.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-taos-evm +F: Documentation/i2c/busses/i2c-taos-evm.rst F: drivers/i2c/busses/i2c-taos-evm.c
I2C-TINY-USB DRIVER @@@ -7580,19 -7566,19 +7585,19 @@@ I2C/SMBUS CONTROLLER DRIVERS FOR P M: Jean Delvare jdelvare@suse.com L: linux-i2c@vger.kernel.org S: Maintained -F: Documentation/i2c/busses/i2c-ali1535 -F: Documentation/i2c/busses/i2c-ali1563 -F: Documentation/i2c/busses/i2c-ali15x3 -F: Documentation/i2c/busses/i2c-amd756 -F: Documentation/i2c/busses/i2c-amd8111 -F: Documentation/i2c/busses/i2c-i801 -F: Documentation/i2c/busses/i2c-nforce2 -F: Documentation/i2c/busses/i2c-piix4 -F: Documentation/i2c/busses/i2c-sis5595 -F: Documentation/i2c/busses/i2c-sis630 -F: Documentation/i2c/busses/i2c-sis96x -F: Documentation/i2c/busses/i2c-via -F: Documentation/i2c/busses/i2c-viapro +F: Documentation/i2c/busses/i2c-ali1535.rst +F: Documentation/i2c/busses/i2c-ali1563.rst +F: Documentation/i2c/busses/i2c-ali15x3.rst +F: Documentation/i2c/busses/i2c-amd756.rst +F: Documentation/i2c/busses/i2c-amd8111.rst +F: Documentation/i2c/busses/i2c-i801.rst +F: Documentation/i2c/busses/i2c-nforce2.rst +F: Documentation/i2c/busses/i2c-piix4.rst +F: Documentation/i2c/busses/i2c-sis5595.rst +F: Documentation/i2c/busses/i2c-sis630.rst +F: Documentation/i2c/busses/i2c-sis96x.rst +F: Documentation/i2c/busses/i2c-via.rst +F: Documentation/i2c/busses/i2c-viapro.rst F: drivers/i2c/busses/i2c-ali1535.c F: drivers/i2c/busses/i2c-ali1563.c F: drivers/i2c/busses/i2c-ali15x3.c @@@ -7621,7 -7607,7 +7626,7 @@@ M: Seth Heasley <seth.heasley@intel.com M: Neil Horman nhorman@tuxdriver.com L: linux-i2c@vger.kernel.org F: drivers/i2c/busses/i2c-ismt.c -F: Documentation/i2c/busses/i2c-ismt +F: Documentation/i2c/busses/i2c-ismt.rst
I2C/SMBUS STUB DRIVER M: Jean Delvare jdelvare@suse.com @@@ -8061,7 -8047,6 +8066,7 @@@ S: Maintaine F: drivers/video/fbdev/i810/
INTEL ASoC DRIVERS +M: Cezary Rojewski cezary.rojewski@intel.com M: Pierre-Louis Bossart pierre-louis.bossart@linux.intel.com M: Liam Girdwood liam.r.girdwood@linux.intel.com M: Jie Yang yang.jie@linux.intel.com @@@ -8364,7 -8349,7 +8369,7 @@@ M: linux-wimax@intel.co L: wimax@linuxwimax.org (subscribers-only) S: Supported W: http://linuxwimax.org -F: Documentation/wimax/README.i2400m +F: Documentation/admin-guide/wimax/i2400m.rst F: drivers/net/wimax/i2400m/ F: include/uapi/linux/wimax/i2400m.h
@@@ -8434,6 -8419,7 +8439,6 @@@ L: linux-xfs@vger.kernel.or L: linux-fsdevel@vger.kernel.org T: git git://git.kernel.org/pub/scm/fs/xfs/xfs-linux.git S: Supported -F: fs/iomap.c F: fs/iomap/ F: include/linux/iomap.h
@@@ -8666,7 -8652,7 +8671,7 @@@ L: jfs-discussion@lists.sourceforge.ne W: http://jfs.sourceforge.net/ T: git git://github.com/kleikamp/linux-shaggy.git S: Maintained -F: Documentation/filesystems/jfs.txt +F: Documentation/admin-guide/jfs.rst F: fs/jfs/
JME NETWORK DRIVER @@@ -9008,7 -8994,7 +9013,7 @@@ F: kernel/kprobes. KS0108 LCD CONTROLLER DRIVER M: Miguel Ojeda Sandonis miguel.ojeda.sandonis@gmail.com S: Maintained -F: Documentation/auxdisplay/ks0108 +F: Documentation/admin-guide/auxdisplay/ks0108.rst F: drivers/auxdisplay/ks0108.c F: include/linux/ks0108.h
@@@ -9585,7 -9571,7 +9590,7 @@@ F: Documentation/networking/mac80211-in F: include/net/mac80211.h F: net/mac80211/ F: drivers/net/wireless/mac80211_hwsim.[ch] -F: Documentation/networking/mac80211_hwsim/README +F: Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst
MAILBOX API M: Jassi Brar jassisinghbrar@gmail.com @@@ -10034,8 -10020,8 +10039,8 @@@ L: linux-media@vger.kernel.or L: linux-renesas-soc@vger.kernel.org T: git git://linuxtv.org/media_tree.git S: Supported -F: Documentation/devicetree/bindings/media/renesas,rcar-csi2.txt -F: Documentation/devicetree/bindings/media/rcar_vin.txt +F: Documentation/devicetree/bindings/media/renesas,csi2.txt +F: Documentation/devicetree/bindings/media/renesas,vin.txt F: drivers/media/platform/rcar-vin/
MEDIA DRIVERS FOR RENESAS - VSP1 @@@ -10362,7 -10348,7 +10367,7 @@@ L: linux-i2c@vger.kernel.or S: Supported F: drivers/i2c/busses/i2c-mlxcpld.c F: drivers/i2c/muxes/i2c-mux-mlxcpld.c -F: Documentation/i2c/busses/i2c-mlxcpld +F: Documentation/i2c/busses/i2c-mlxcpld.rst
MELLANOX MLXCPLD LED DRIVER M: Vadim Pasternak vadimp@mellanox.com @@@ -11341,7 -11327,6 +11346,6 @@@ F: include/net/nfc F: include/uapi/linux/nfc.h F: drivers/nfc/ F: include/linux/platform_data/nfcmrvl.h - F: include/linux/platform_data/nxp-nci.h F: Documentation/devicetree/bindings/net/nfc/
NFS, SUNRPC, AND LOCKD CLIENTS @@@ -11782,7 -11767,6 +11786,7 @@@ S: Maintaine F: arch/arm/mach-omap2/ F: arch/arm/plat-omap/ F: arch/arm/configs/omap2plus_defconfig +F: drivers/bus/ti-sysc.c F: drivers/i2c/busses/i2c-omap.c F: drivers/irqchip/irq-omap-intc.c F: drivers/mfd/*omap*.c @@@ -11803,7 -11787,6 +11807,7 @@@ F: drivers/regulator/tps65910-regulator F: drivers/regulator/twl-regulator.c F: drivers/regulator/twl6030-regulator.c F: include/linux/platform_data/i2c-omap.h +F: include/linux/platform_data/ti-sysc.h
ONION OMEGA2+ BOARD M: Harvey Hunt harveyhuntnexus@gmail.com @@@ -11866,21 -11849,6 +11870,21 @@@ T: git git://linuxtv.org/media_tree.gi S: Maintained F: drivers/media/i2c/ov5647.c
+OMNIVISION OV5670 SENSOR DRIVER +M: Chiranjeevi Rapolu chiranjeevi.rapolu@intel.com +M: Hyungwoo Yang hyungwoo.yang@intel.com +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/i2c/ov5670.c + +OMNIVISION OV5675 SENSOR DRIVER +M: Shawn Tu shawnx.tu@intel.com +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/i2c/ov5675.c + OMNIVISION OV5695 SENSOR DRIVER M: Shunqian Zheng zhengsq@rock-chips.com L: linux-media@vger.kernel.org @@@ -12002,7 -11970,7 +12006,7 @@@ M: Andrew Lunn <andrew@lunn.ch L: linux-i2c@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/i2c/i2c-ocores.txt -F: Documentation/i2c/busses/i2c-ocores +F: Documentation/i2c/busses/i2c-ocores.rst F: drivers/i2c/busses/i2c-ocores.c F: include/linux/platform_data/i2c-ocores.h
@@@ -12125,7 -12093,7 +12129,7 @@@ L: netdev@vger.kernel.or S: Supported F: lib/packing.c F: include/linux/packing.h -F: Documentation/packing.txt +F: Documentation/core-api/packing.rst
PADATA PARALLEL EXECUTION MECHANISM M: Steffen Klassert steffen.klassert@secunet.com @@@ -13253,7 -13221,7 +13257,7 @@@ M: Manish Chopra <manishc@marvell.com M: GR-Linux-NIC-Dev@marvell.com L: netdev@vger.kernel.org S: Supported - F: drivers/net/ethernet/qlogic/qlge/ + F: drivers/staging/qlge/
QM1D1B0004 MEDIA DRIVER M: Akihiro Tsukada tskd08@gmail.com @@@ -13321,8 -13289,8 +13325,8 @@@ QUALCOMM CPUFREQ DRIVER MSM8996/APQ809 M: Ilia Lin ilia.lin@kernel.org L: linux-pm@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/opp/kryo-cpufreq.txt -F: drivers/cpufreq/qcom-cpufreq-kryo.c +F: Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt +F: drivers/cpufreq/qcom-cpufreq-nvmem.c
QUALCOMM EMAC GIGABIT ETHERNET DRIVER M: Timur Tabi timur@kernel.org @@@ -13687,14 -13655,14 +13691,14 @@@ RENESAS R-CAR I2C DRIVER M: Wolfram Sang wsa+renesas@sang-engineering.com S: Supported F: Documentation/devicetree/bindings/i2c/i2c-rcar.txt -F: Documentation/devicetree/bindings/i2c/i2c-sh_mobile.txt +F: Documentation/devicetree/bindings/i2c/renesas,iic.txt F: drivers/i2c/busses/i2c-rcar.c F: drivers/i2c/busses/i2c-sh_mobile.c
RENESAS RIIC DRIVER M: Chris Brandt chris.brandt@renesas.com S: Supported -F: Documentation/devicetree/bindings/i2c/i2c-riic.txt +F: Documentation/devicetree/bindings/i2c/renesas,riic.txt F: drivers/i2c/busses/i2c-riic.c
RENESAS USB PHY DRIVER @@@ -13790,7 -13758,7 +13794,7 @@@ HANTRO VPU CODEC DRIVE M: Ezequiel Garcia ezequiel@collabora.com L: linux-media@vger.kernel.org S: Maintained -F: drivers/staging/media/platform/hantro/ +F: drivers/staging/media/hantro/ F: Documentation/devicetree/bindings/media/rockchip-vpu.txt
ROCKER DRIVER @@@ -14312,7 -14280,7 +14316,7 @@@ F: net/sctp SCx200 CPU SUPPORT M: Jim Cromie jim.cromie@gmail.com S: Odd Fixes -F: Documentation/i2c/busses/scx200_acb +F: Documentation/i2c/busses/scx200_acb.rst F: arch/x86/platform/scx200/ F: drivers/watchdog/scx200_wdt.c F: drivers/i2c/busses/scx200* @@@ -15587,7 -15555,6 +15591,7 @@@ F: drivers/clk/clk-sc[mp]i. F: drivers/cpufreq/sc[mp]i-cpufreq.c F: drivers/firmware/arm_scpi.c F: drivers/firmware/arm_scmi/ +F: drivers/reset/reset-scmi.c F: include/linux/sc[mp]i_protocol.h
SYSTEM RESET/SHUTDOWN DRIVERS @@@ -15949,7 -15916,7 +15953,7 @@@ M: Viresh Kumar <viresh.kumar@linaro.or M: Javi Merino javi.merino@kernel.org L: linux-pm@vger.kernel.org S: Supported -F: Documentation/thermal/cpu-cooling-api.rst +F: Documentation/driver-api/thermal/cpu-cooling-api.rst F: drivers/thermal/cpu_cooling.c F: include/linux/cpu_cooling.h
@@@ -16121,7 -16088,7 +16125,7 @@@ S: Maintaine F: drivers/net/ethernet/ti/netcp*
TI PCM3060 ASoC CODEC DRIVER -M: Kirill Marinushkin kmarinushkin@birdec.tech +M: Kirill Marinushkin kmarinushkin@birdec.com L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: Documentation/devicetree/bindings/sound/pcm3060.txt @@@ -16472,7 -16439,7 +16476,7 @@@ F: drivers/hid/hid-udraw-ps3. UFS FILESYSTEM M: Evgeniy Dushistov dushistov@mail.ru S: Maintained -F: Documentation/filesystems/ufs.txt +F: Documentation/admin-guide/ufs.rst F: fs/ufs/
UHID USERSPACE HID IO DRIVER: @@@ -17390,7 -17357,7 +17394,7 @@@ M: linux-wimax@intel.co L: wimax@linuxwimax.org (subscribers-only) S: Supported W: http://linuxwimax.org -F: Documentation/wimax/README.wimax +F: Documentation/admin-guide/wimax/wimax.rst F: include/linux/wimax/debug.h F: include/net/wimax.h F: include/uapi/linux/wimax.h @@@ -17842,6 -17809,14 +17846,6 @@@ S: Maintaine F: mm/zpool.c F: include/linux/zpool.h
-ZR36067 VIDEO FOR LINUX DRIVER -L: mjpeg-users@lists.sourceforge.net -L: linux-media@vger.kernel.org -W: http://mjpeg.sourceforge.net/driver-zoran/ -T: hg https://linuxtv.org/hg/v4l-dvb -S: Odd Fixes -F: drivers/staging/media/zoran/ - ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER M: Minchan Kim minchan@kernel.org M: Nitin Gupta ngupta@vflare.org diff --combined arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi index 20d7e7db5dcb,de71153fda00..d34c867b49ba --- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi @@@ -29,7 -29,6 +29,7 @@@ clocks = <&clockgen 1 0>; next-level-cache = <&l2>; cpu-idle-states = <&CPU_PW20>; + #cooling-cells = <2>; };
cpu1: cpu@1 { @@@ -40,7 -39,6 +40,7 @@@ clocks = <&clockgen 1 0>; next-level-cache = <&l2>; cpu-idle-states = <&CPU_PW20>; + #cooling-cells = <2>; };
l2: l2-cache { @@@ -505,89 -503,6 +505,89 @@@ status = "disabled"; };
+ tmu: tmu@1f00000 { + compatible = "fsl,qoriq-tmu"; + reg = <0x0 0x1f80000 0x0 0x10000>; + interrupts = <0 23 0x4>; + fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x70061>; + fsl,tmu-calibration = <0x00000000 0x00000024 + 0x00000001 0x0000002b + 0x00000002 0x00000031 + 0x00000003 0x00000038 + 0x00000004 0x0000003f + 0x00000005 0x00000045 + 0x00000006 0x0000004c + 0x00000007 0x00000053 + 0x00000008 0x00000059 + 0x00000009 0x00000060 + 0x0000000a 0x00000066 + 0x0000000b 0x0000006d + + 0x00010000 0x0000001c + 0x00010001 0x00000024 + 0x00010002 0x0000002c + 0x00010003 0x00000035 + 0x00010004 0x0000003d + 0x00010005 0x00000045 + 0x00010006 0x0000004d + 0x00010007 0x00000045 + 0x00010008 0x0000005e + 0x00010009 0x00000066 + 0x0001000a 0x0000006e + + 0x00020000 0x00000018 + 0x00020001 0x00000022 + 0x00020002 0x0000002d + 0x00020003 0x00000038 + 0x00020004 0x00000043 + 0x00020005 0x0000004d + 0x00020006 0x00000058 + 0x00020007 0x00000063 + 0x00020008 0x0000006e + + 0x00030000 0x00000010 + 0x00030001 0x0000001c + 0x00030002 0x00000029 + 0x00030003 0x00000036 + 0x00030004 0x00000042 + 0x00030005 0x0000004f + 0x00030006 0x0000005b + 0x00030007 0x00000068>; + little-endian; + #thermal-sensor-cells = <1>; + }; + + thermal-zones { + core-cluster { + polling-delay-passive = <1000>; + polling-delay = <5000>; + thermal-sensors = <&tmu 0>; + + trips { + core_cluster_alert: core-cluster-alert { + temperature = <85000>; + hysteresis = <2000>; + type = "passive"; + }; + + core_cluster_crit: core-cluster-crit { + temperature = <95000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&core_cluster_alert>; + cooling-device = + <&cpu0 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>, + <&cpu1 THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; + }; + }; + }; + }; + pcie@1f0000000 { /* Integrated Endpoint Root Complex */ compatible = "pci-host-ecam-generic"; reg = <0x01 0xf0000000 0x0 0x100000>; @@@ -621,6 -536,12 +621,12 @@@ compatible = "fsl,enetc"; reg = <0x000100 0 0 0 0>; }; + enetc_mdio_pf3: mdio@0,3 { + compatible = "fsl,enetc-mdio"; + reg = <0x000300 0 0 0 0>; + #address-cells = <1>; + #size-cells = <0>; + }; ethernet@0,4 { compatible = "fsl,enetc-ptp"; reg = <0x000400 0 0 0 0>; @@@ -639,7 -560,6 +645,7 @@@ clocks = <&dpclk>, <&aclk>, <&aclk>, <&pclk>; clock-names = "pxlclk", "mclk", "aclk", "pclk"; arm,malidp-output-port-lines = /bits/ 8 <8 8 8>; + arm,malidp-arqos-value = <0xd000d000>;
port { dp0_out: endpoint { diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index d692251ee252,dd99c55d9a88..ae6a47dd7dc9 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@@ -3236,10 -3236,8 +3236,10 @@@ static ssize_t blocked_fl_write(struct return -ENOMEM;
err = bitmap_parse_user(ubuf, count, t, adap->sge.egr_sz); - if (err) + if (err) { + kvfree(t); return err; + }
bitmap_copy(adap->sge.blocked_fl, t, adap->sge.egr_sz); kvfree(t); @@@ -3531,7 -3529,6 +3531,6 @@@ int t4_setup_debugfs(struct adapter *ad { int i; u32 size = 0; - struct dentry *de;
static struct t4_debugfs_entry t4_debugfs_files[] = { { "cim_la", &cim_la_fops, 0400, 0 }, @@@ -3642,8 -3639,8 +3641,8 @@@ } }
- de = debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap, - &flash_debugfs_fops, adap->params.sf_size); + debugfs_create_file_size("flash", 0400, adap->debugfs_root, adap, + &flash_debugfs_fops, adap->params.sf_size); debugfs_create_bool("use_backdoor", 0600, adap->debugfs_root, &adap->use_bd); debugfs_create_bool("trace_rss", 0600, diff --combined drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 7882148abb43,dc7b128c780e..17b7ae9f46ec --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@@ -1785,7 -1785,7 +1785,7 @@@ static bool ixgbe_is_non_eop(struct ixg static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, struct sk_buff *skb) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; unsigned char *va; unsigned int pull_len;
@@@ -1807,7 -1807,7 +1807,7 @@@
/* update all of the pointers */ skb_frag_size_sub(frag, pull_len); - frag->page_offset += pull_len; + skb_frag_off_add(frag, pull_len); skb->data_len -= pull_len; skb->tail += pull_len; } @@@ -1840,11 -1840,11 +1840,11 @@@ static void ixgbe_dma_sync_frag(struct skb_headlen(skb), DMA_FROM_DEVICE); } else { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
dma_sync_single_range_for_cpu(rx_ring->dev, IXGBE_CB(skb)->dma, - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag), DMA_FROM_DEVICE); } @@@ -7897,8 -7897,11 +7897,8 @@@ static void ixgbe_service_task(struct w return; } if (ixgbe_check_fw_error(adapter)) { - if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - rtnl_lock(); + if (!test_bit(__IXGBE_DOWN, &adapter->state)) unregister_netdev(adapter->netdev); - rtnl_unlock(); - } ixgbe_service_event_complete(adapter); return; } @@@ -8183,7 -8186,7 +8183,7 @@@ static int ixgbe_tx_map(struct ixgbe_ri struct sk_buff *skb = first->skb; struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; - struct skb_frag_struct *frag; + skb_frag_t *frag; dma_addr_t dma; unsigned int data_len, size; u32 tx_flags = first->tx_flags; @@@ -8602,7 -8605,8 +8602,8 @@@ netdev_tx_t ixgbe_xmit_frame_ring(struc * otherwise try next time */ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + count += TXD_USE_COUNT(skb_frag_size( + &skb_shinfo(skb)->frags[f]));
if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) { tx_ring->tx_stats.tx_busy++; diff --combined drivers/net/ethernet/mellanox/mlx5/core/cmd.c index 53d09620e215,973f90888b1f..ea934cd02448 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@@ -446,8 -446,6 +446,8 @@@ static int mlx5_internal_err_ret_value( case MLX5_CMD_OP_CREATE_UMEM: case MLX5_CMD_OP_DESTROY_UMEM: case MLX5_CMD_OP_ALLOC_MEMIC: + case MLX5_CMD_OP_MODIFY_XRQ: + case MLX5_CMD_OP_RELEASE_XRQ_ERROR: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@@ -639,8 -637,6 +639,8 @@@ const char *mlx5_command_str(int comman MLX5_COMMAND_STR_CASE(DESTROY_UCTX); MLX5_COMMAND_STR_CASE(CREATE_UMEM); MLX5_COMMAND_STR_CASE(DESTROY_UMEM); + MLX5_COMMAND_STR_CASE(RELEASE_XRQ_ERROR); + MLX5_COMMAND_STR_CASE(MODIFY_XRQ); default: return "unknown command opcode"; } } @@@ -1372,49 -1368,19 +1372,19 @@@ static void clean_debug_files(struct ml debugfs_remove_recursive(dbg->dbg_root); }
- static int create_debugfs_files(struct mlx5_core_dev *dev) + static void create_debugfs_files(struct mlx5_core_dev *dev) { struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; - int err = -ENOMEM; - - if (!mlx5_debugfs_root) - return 0;
dbg->dbg_root = debugfs_create_dir("cmd", dev->priv.dbg_root); - if (!dbg->dbg_root) - return err; - - dbg->dbg_in = debugfs_create_file("in", 0400, dbg->dbg_root, - dev, &dfops); - if (!dbg->dbg_in) - goto err_dbg;
- dbg->dbg_out = debugfs_create_file("out", 0200, dbg->dbg_root, - dev, &dfops); - if (!dbg->dbg_out) - goto err_dbg; - - dbg->dbg_outlen = debugfs_create_file("out_len", 0600, dbg->dbg_root, - dev, &olfops); - if (!dbg->dbg_outlen) - goto err_dbg; - - dbg->dbg_status = debugfs_create_u8("status", 0600, dbg->dbg_root, - &dbg->status); - if (!dbg->dbg_status) - goto err_dbg; - - dbg->dbg_run = debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops); - if (!dbg->dbg_run) - goto err_dbg; + debugfs_create_file("in", 0400, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out", 0200, dbg->dbg_root, dev, &dfops); + debugfs_create_file("out_len", 0600, dbg->dbg_root, dev, &olfops); + debugfs_create_u8("status", 0600, dbg->dbg_root, &dbg->status); + debugfs_create_file("run", 0200, dbg->dbg_root, dev, &fops);
mlx5_cmdif_debugfs_init(dev); - - return 0; - - err_dbg: - clean_debug_files(dev); - return err; }
static void mlx5_cmd_change_mod(struct mlx5_core_dev *dev, int mode) @@@ -2011,17 -1977,10 +1981,10 @@@ int mlx5_cmd_init(struct mlx5_core_dev goto err_cache; }
- err = create_debugfs_files(dev); - if (err) { - err = -ENOMEM; - goto err_wq; - } + create_debugfs_files(dev);
return 0;
- err_wq: - destroy_workqueue(cmd->wq); - err_cache: destroy_msg_cache(dev);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h index 65bec19a438f,0807992090b8..8cf548c7ad9c --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@@ -184,13 -184,8 +184,13 @@@ static inline int mlx5e_get_max_num_cha
struct mlx5e_tx_wqe { struct mlx5_wqe_ctrl_seg ctrl; - struct mlx5_wqe_eth_seg eth; - struct mlx5_wqe_data_seg data[0]; + union { + struct { + struct mlx5_wqe_eth_seg eth; + struct mlx5_wqe_data_seg data[0]; + }; + u8 tls_progress_params_ctx[0]; + }; };
struct mlx5e_rx_wqe_ll { @@@ -356,6 -351,7 +356,7 @@@ enum MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, MLX5E_SQ_STATE_TLS, + MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, };
struct mlx5e_sq_wqe_info { @@@ -480,8 -476,6 +481,6 @@@ struct mlx5e_xdp_mpwqe struct mlx5e_tx_wqe *wqe; u8 ds_count; u8 pkt_count; - u8 max_ds_count; - u8 complete; u8 inline_on; };
@@@ -1105,8 -1099,6 +1104,8 @@@ u32 mlx5e_ethtool_get_rxfh_key_size(str u32 mlx5e_ethtool_get_rxfh_indir_size(struct mlx5e_priv *priv); int mlx5e_ethtool_get_ts_info(struct mlx5e_priv *priv, struct ethtool_ts_info *info); +int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash); void mlx5e_ethtool_get_pauseparam(struct mlx5e_priv *priv, struct ethtool_pauseparam *pauseparam); int mlx5e_ethtool_set_pauseparam(struct mlx5e_priv *priv, @@@ -1135,7 -1127,6 +1134,6 @@@ void mlx5e_build_rq_params(struct mlx5_ struct mlx5e_params *params); void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params, u16 num_channels); - u8 mlx5e_params_calculate_tx_min_inline(struct mlx5_core_dev *mdev); void mlx5e_rx_dim_work(struct work_struct *work); void mlx5e_tx_dim_work(struct work_struct *work);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index c7f86453c638,6e54fefea410..817c6ea7e349 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@@ -1,7 -1,6 +1,6 @@@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2019 Mellanox Technologies. */
- #include <net/devlink.h> #include "reporter.h" #include "lib/eq.h"
@@@ -76,21 -75,26 +75,21 @@@ static int mlx5e_tx_reporter_err_cqe_re u8 state; int err;
- if (!test_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) - return 0; - err = mlx5_core_query_sq_state(mdev, sq->sqn, &state); if (err) { netdev_err(dev, "Failed to query SQ 0x%x state. err = %d\n", sq->sqn, err); - return err; + goto out; }
- if (state != MLX5_SQC_STATE_ERR) { - netdev_err(dev, "SQ 0x%x not in ERROR state\n", sq->sqn); - return -EINVAL; - } + if (state != MLX5_SQC_STATE_ERR) + goto out;
mlx5e_tx_disable_queue(sq->txq);
err = mlx5e_wait_for_sq_flush(sq); if (err) - return err; + goto out;
/* At this point, no new packets will arrive from the stack as TXQ is * marked with QUEUE_STATE_DRV_XOFF. In addition, NAPI cleared all @@@ -99,24 -103,20 +98,24 @@@
err = mlx5e_sq_to_ready(sq, state); if (err) - return err; + goto out;
mlx5e_reset_txqsq_cc_pc(sq); sq->stats->recover++; + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); mlx5e_activate_txqsq(sq);
return 0; +out: + clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); + return err; }
static int mlx5_tx_health_report(struct devlink_health_reporter *tx_reporter, char *err_str, struct mlx5e_tx_err_ctx *err_ctx) { - if (IS_ERR_OR_NULL(tx_reporter)) { + if (!tx_reporter) { netdev_err(err_ctx->sq->channel->netdev, err_str); return err_ctx->recover(err_ctx->sq); } @@@ -288,23 -288,27 +287,27 @@@ static const struct devlink_health_repo
int mlx5e_tx_reporter_create(struct mlx5e_priv *priv) { + struct devlink_health_reporter *reporter; struct mlx5_core_dev *mdev = priv->mdev; struct devlink *devlink = priv_to_devlink(mdev);
- priv->tx_reporter = + reporter = devlink_health_reporter_create(devlink, &mlx5_tx_reporter_ops, MLX5_REPORTER_TX_GRACEFUL_PERIOD, true, priv); - if (IS_ERR(priv->tx_reporter)) + if (IS_ERR(reporter)) { netdev_warn(priv->netdev, "Failed to create tx reporter, err = %ld\n", - PTR_ERR(priv->tx_reporter)); - return IS_ERR_OR_NULL(priv->tx_reporter); + PTR_ERR(reporter)); + return PTR_ERR(reporter); + } + priv->tx_reporter = reporter; + return 0; }
void mlx5e_tx_reporter_destroy(struct mlx5e_priv *priv) { - if (IS_ERR_OR_NULL(priv->tx_reporter)) + if (!priv->tx_reporter) return;
devlink_health_reporter_destroy(priv->tx_reporter); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 7f78c004d12f,f701e4f3c076..2c4d1f415968 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@@ -60,24 -60,28 +60,28 @@@ int mlx5e_open_xsk(struct mlx5e_priv *p struct mlx5e_xsk_param *xsk, struct xdp_umem *umem, struct mlx5e_channel *c) { - struct mlx5e_channel_param cparam = {}; + struct mlx5e_channel_param *cparam; struct dim_cq_moder icocq_moder = {}; int err;
if (!mlx5e_validate_xsk_param(params, xsk, priv->mdev)) return -EINVAL;
- mlx5e_build_xsk_cparam(priv, params, xsk, &cparam); + cparam = kvzalloc(sizeof(*cparam), GFP_KERNEL); + if (!cparam) + return -ENOMEM;
- err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam.rx_cq, &c->xskrq.cq); + mlx5e_build_xsk_cparam(priv, params, xsk, cparam); + + err = mlx5e_open_cq(c, params->rx_cq_moderation, &cparam->rx_cq, &c->xskrq.cq); if (unlikely(err)) - return err; + goto err_free_cparam;
- err = mlx5e_open_rq(c, params, &cparam.rq, xsk, umem, &c->xskrq); + err = mlx5e_open_rq(c, params, &cparam->rq, xsk, umem, &c->xskrq); if (unlikely(err)) goto err_close_rx_cq;
- err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam.tx_cq, &c->xsksq.cq); + err = mlx5e_open_cq(c, params->tx_cq_moderation, &cparam->tx_cq, &c->xsksq.cq); if (unlikely(err)) goto err_close_rq;
@@@ -87,21 -91,23 +91,23 @@@ * is disabled and then reenabled, but the SQ continues receiving CQEs * from the old UMEM. */ - err = mlx5e_open_xdpsq(c, params, &cparam.xdp_sq, umem, &c->xsksq, true); + err = mlx5e_open_xdpsq(c, params, &cparam->xdp_sq, umem, &c->xsksq, true); if (unlikely(err)) goto err_close_tx_cq;
- err = mlx5e_open_cq(c, icocq_moder, &cparam.icosq_cq, &c->xskicosq.cq); + err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->xskicosq.cq); if (unlikely(err)) goto err_close_sq;
/* Create a dedicated SQ for posting NOPs whenever we need an IRQ to be * triggered and NAPI to be called on the correct CPU. */ - err = mlx5e_open_icosq(c, params, &cparam.icosq, &c->xskicosq); + err = mlx5e_open_icosq(c, params, &cparam->icosq, &c->xskicosq); if (unlikely(err)) goto err_close_icocq;
+ kvfree(cparam); + spin_lock_init(&c->xskicosq_lock);
set_bit(MLX5E_CHANNEL_STATE_XSK, c->state); @@@ -123,6 -129,9 +129,9 @@@ err_close_rq err_close_rx_cq: mlx5e_close_cq(&c->xskrq.cq);
+ err_free_cparam: + kvfree(cparam); + return err; }
@@@ -143,10 -152,7 +152,10 @@@ void mlx5e_activate_xsk(struct mlx5e_ch { set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); /* TX queue is created active. */ + + spin_lock(&c->xskicosq_lock); mlx5e_trigger_irq(&c->xskicosq); + spin_unlock(&c->xskicosq_lock); }
void mlx5e_deactivate_xsk(struct mlx5e_channel *c) diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 20e628c907e5,02530b50609c..7347d673f448 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@@ -1081,14 -1081,6 +1081,14 @@@ int mlx5e_ethtool_set_link_ksettings(st link_modes = autoneg == AUTONEG_ENABLE ? ethtool2ptys_adver_func(adver) : mlx5e_port_speed2linkmodes(mdev, speed, !ext);
+ if ((link_modes & MLX5E_PROT_MASK(MLX5E_56GBASE_R4)) && + autoneg != AUTONEG_ENABLE) { + netdev_err(priv->netdev, "%s: 56G link speed requires autoneg enabled\n", + __func__); + err = -EINVAL; + goto out; + } + link_modes = link_modes & eproto.cap; if (!link_modes) { netdev_err(priv->netdev, "%s: Not supported link mode(s) requested", @@@ -1346,9 -1338,6 +1346,9 @@@ int mlx5e_ethtool_set_pauseparam(struc struct mlx5_core_dev *mdev = priv->mdev; int err;
+ if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return -EOPNOTSUPP; + if (pauseparam->autoneg) return -EINVAL;
@@@ -1690,40 -1679,6 +1690,40 @@@ static int mlx5e_get_module_eeprom(stru return 0; }
+int mlx5e_ethtool_flash_device(struct mlx5e_priv *priv, + struct ethtool_flash *flash) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct net_device *dev = priv->netdev; + const struct firmware *fw; + int err; + + if (flash->region != ETHTOOL_FLASH_ALL_REGIONS) + return -EOPNOTSUPP; + + err = request_firmware_direct(&fw, flash->data, &dev->dev); + if (err) + return err; + + dev_hold(dev); + rtnl_unlock(); + + err = mlx5_firmware_flash(mdev, fw, NULL); + release_firmware(fw); + + rtnl_lock(); + dev_put(dev); + return err; +} + +static int mlx5e_flash_device(struct net_device *dev, + struct ethtool_flash *flash) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return mlx5e_ethtool_flash_device(priv, flash); +} + static int set_pflag_cqe_based_moder(struct net_device *netdev, bool enable, bool is_rx_cq) { @@@ -1958,21 -1913,27 +1958,27 @@@ static u32 mlx5e_get_priv_flags(struct return priv->channels.params.pflags; }
- #ifndef CONFIG_MLX5_EN_RXNFC - /* When CONFIG_MLX5_EN_RXNFC=n we only support ETHTOOL_GRXRINGS - * otherwise this function will be defined from en_fs_ethtool.c - */ static int mlx5e_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) { struct mlx5e_priv *priv = netdev_priv(dev);
- if (info->cmd != ETHTOOL_GRXRINGS) - return -EOPNOTSUPP; - /* ring_count is needed by ethtool -x */ - info->data = priv->channels.params.num_channels; - return 0; + /* ETHTOOL_GRXRINGS is needed by ethtool -x which is not part + * of rxnfc. We keep this logic out of mlx5e_ethtool_get_rxnfc, + * to avoid breaking "ethtool -x" when mlx5e_ethtool_get_rxnfc + * is compiled out via CONFIG_MLX5_EN_RXNFC=n. + */ + if (info->cmd == ETHTOOL_GRXRINGS) { + info->data = priv->channels.params.num_channels; + return 0; + } + + return mlx5e_ethtool_get_rxnfc(dev, info, rule_locs); + } + + static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) + { + return mlx5e_ethtool_set_rxnfc(dev, cmd); } - #endif
const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, @@@ -1993,9 -1954,7 +1999,7 @@@ .get_rxfh = mlx5e_get_rxfh, .set_rxfh = mlx5e_set_rxfh, .get_rxnfc = mlx5e_get_rxnfc, - #ifdef CONFIG_MLX5_EN_RXNFC .set_rxnfc = mlx5e_set_rxnfc, - #endif .get_tunable = mlx5e_get_tunable, .set_tunable = mlx5e_set_tunable, .get_pauseparam = mlx5e_get_pauseparam, @@@ -2006,7 -1965,6 +2010,7 @@@ .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .flash_device = mlx5e_flash_device, .get_priv_flags = mlx5e_get_priv_flags, .set_priv_flags = mlx5e_set_priv_flags, .self_test = mlx5e_self_test, diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9d5f6e56188f,9a2fcef6e7f0..0c8e847a9eee --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -1130,6 -1130,8 +1130,8 @@@ static int mlx5e_alloc_txqsq(struct mlx sq->stats = &c->priv->channel_stats[c->ix].sq[tc]; sq->stop_room = MLX5E_SQ_STOP_ROOM; INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work); + if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert)) + set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state); if (MLX5_IPSEC_DEV(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) { @@@ -1321,6 -1323,7 +1323,6 @@@ err_free_txqsq void mlx5e_activate_txqsq(struct mlx5e_txqsq *sq) { sq->txq = netdev_get_tx_queue(sq->channel->netdev, sq->txq_ix); - clear_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state); set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); netdev_tx_reset_queue(sq->txq); netif_tx_start_queue(sq->txq); @@@ -2321,7 -2324,7 +2323,7 @@@ int mlx5e_open_channels(struct mlx5e_pr goto err_close_channels; }
- if (!IS_ERR_OR_NULL(priv->tx_reporter)) + if (priv->tx_reporter) devlink_health_reporter_state_update(priv->tx_reporter, DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
@@@ -3422,7 -3425,7 +3424,7 @@@ out #ifdef CONFIG_MLX5_ESWITCH static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv, struct flow_cls_offload *cls_flower, - int flags) + unsigned long flags) { switch (cls_flower->command) { case FLOW_CLS_REPLACE: @@@ -3442,12 -3445,12 +3444,12 @@@ static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { + unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD); struct mlx5e_priv *priv = cb_priv;
switch (type) { case TC_SETUP_CLSFLOWER: - return mlx5e_setup_tc_cls_flower(priv, type_data, MLX5E_TC_INGRESS | - MLX5E_TC_NIC_OFFLOAD); + return mlx5e_setup_tc_cls_flower(priv, type_data, flags); default: return -EOPNOTSUPP; } @@@ -3640,7 -3643,7 +3642,7 @@@ static int set_feature_tc_num_filters(s { struct mlx5e_priv *priv = netdev_priv(netdev);
- if (!enable && mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD)) { + if (!enable && mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD))) { netdev_err(netdev, "Active offloaded tc filters, can't turn hw_tc_offload off\n"); return -EINVAL; @@@ -3781,9 -3784,10 +3783,10 @@@ static netdev_features_t mlx5e_fix_feat netdev_warn(netdev, "Dropping C-tag vlan stripping offload due to S-tag vlan\n"); } if (!MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_STRIDING_RQ)) { - features &= ~NETIF_F_LRO; - if (params->lro_en) + if (features & NETIF_F_LRO) { netdev_warn(netdev, "Disabling LRO, not supported in legacy RQ\n"); + features &= ~NETIF_F_LRO; + } }
if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS)) { @@@ -3950,7 -3954,8 +3953,8 @@@ int mlx5e_hwstamp_set(struct mlx5e_pri case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: case HWTSTAMP_FILTER_NTP_ALL: /* Disable CQE compression */ - netdev_warn(priv->netdev, "Disabling cqe compression"); + if (MLX5E_GET_PFLAG(&priv->channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS)) + netdev_warn(priv->netdev, "Disabling RX cqe compression\n"); err = mlx5e_modify_rx_cqe_compression_locked(priv, false); if (err) { netdev_err(priv->netdev, "Failed disabling cqe compression err=%d\n", err); @@@ -4768,7 -4773,7 +4772,7 @@@ void mlx5e_build_nic_params(struct mlx5 mlx5e_set_tx_cq_mode_params(params, MLX5_CQ_PERIOD_MODE_START_FROM_EQE);
/* TX inline */ - params->tx_min_inline_mode = mlx5e_params_calculate_tx_min_inline(mdev); + mlx5_query_min_inline(mdev, ¶ms->tx_min_inline_mode);
/* RSS */ mlx5e_build_rss_params(rss_params, params->num_channels); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index deeb65da99f3,5be3da621499..00473d09a5c5 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@@ -38,6 -38,8 +38,8 @@@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> + #include <linux/refcount.h> + #include <linux/completion.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> @@@ -65,19 -67,20 +67,20 @@@ struct mlx5_nic_flow_attr struct mlx5_fc *counter; };
- #define MLX5E_TC_FLOW_BASE (MLX5E_TC_LAST_EXPORTED_BIT + 1) + #define MLX5E_TC_FLOW_BASE (MLX5E_TC_FLAG_LAST_EXPORTED_BIT + 1)
enum { - MLX5E_TC_FLOW_INGRESS = MLX5E_TC_INGRESS, - MLX5E_TC_FLOW_EGRESS = MLX5E_TC_EGRESS, - MLX5E_TC_FLOW_ESWITCH = MLX5E_TC_ESW_OFFLOAD, - MLX5E_TC_FLOW_NIC = MLX5E_TC_NIC_OFFLOAD, - MLX5E_TC_FLOW_OFFLOADED = BIT(MLX5E_TC_FLOW_BASE), - MLX5E_TC_FLOW_HAIRPIN = BIT(MLX5E_TC_FLOW_BASE + 1), - MLX5E_TC_FLOW_HAIRPIN_RSS = BIT(MLX5E_TC_FLOW_BASE + 2), - MLX5E_TC_FLOW_SLOW = BIT(MLX5E_TC_FLOW_BASE + 3), - MLX5E_TC_FLOW_DUP = BIT(MLX5E_TC_FLOW_BASE + 4), - MLX5E_TC_FLOW_NOT_READY = BIT(MLX5E_TC_FLOW_BASE + 5), + MLX5E_TC_FLOW_FLAG_INGRESS = MLX5E_TC_FLAG_INGRESS_BIT, + MLX5E_TC_FLOW_FLAG_EGRESS = MLX5E_TC_FLAG_EGRESS_BIT, + MLX5E_TC_FLOW_FLAG_ESWITCH = MLX5E_TC_FLAG_ESW_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_NIC = MLX5E_TC_FLAG_NIC_OFFLOAD_BIT, + MLX5E_TC_FLOW_FLAG_OFFLOADED = MLX5E_TC_FLOW_BASE, + MLX5E_TC_FLOW_FLAG_HAIRPIN = MLX5E_TC_FLOW_BASE + 1, + MLX5E_TC_FLOW_FLAG_HAIRPIN_RSS = MLX5E_TC_FLOW_BASE + 2, + MLX5E_TC_FLOW_FLAG_SLOW = MLX5E_TC_FLOW_BASE + 3, + MLX5E_TC_FLOW_FLAG_DUP = MLX5E_TC_FLOW_BASE + 4, + MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, + MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, };
#define MLX5E_TC_MAX_SPLITS 1 @@@ -100,6 -103,7 +103,7 @@@ * container_of(helper item, containing struct type, helper field[index]) */ struct encap_flow_item { + struct mlx5e_encap_entry *e; /* attached encap instance */ struct list_head list; int index; }; @@@ -108,7 -112,7 +112,7 @@@ struct mlx5e_tc_flow struct rhash_head node; struct mlx5e_priv *priv; u64 cookie; - u16 flags; + unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported @@@ -116,10 -120,14 +120,14 @@@ */ struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS]; struct mlx5e_tc_flow *peer_flow; + struct mlx5e_mod_hdr_entry *mh; /* attached mod header instance */ struct list_head mod_hdr; /* flows sharing the same mod hdr ID */ + struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */ struct list_head hairpin; /* flows sharing the same hairpin */ struct list_head peer; /* flows with peer flow */ struct list_head unready; /* flows not ready to be offloaded (e.g due to missing route) */ + refcount_t refcnt; + struct rcu_head rcu_head; union { struct mlx5_esw_flow_attr esw_attr[0]; struct mlx5_nic_flow_attr nic_attr[0]; @@@ -157,12 -165,20 +165,20 @@@ struct mlx5e_hairpin_entry /* a node of a hash table which keeps all the hairpin entries */ struct hlist_node hairpin_hlist;
+ /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same hairpin */ struct list_head flows; + /* hpe's that were not fully initialized when dead peer update event + * function traversed them. + */ + struct list_head dead_peer_wait_list;
u16 peer_vhca_id; u8 prio; struct mlx5e_hairpin *hp; + refcount_t refcnt; + struct completion res_ready; };
struct mod_hdr_key { @@@ -174,16 -190,93 +190,93 @@@ struct mlx5e_mod_hdr_entry /* a node of a hash table which keeps all the mod_hdr entries */ struct hlist_node mod_hdr_hlist;
+ /* protects flows list */ + spinlock_t flows_lock; /* flows sharing the same mod_hdr entry */ struct list_head flows;
struct mod_hdr_key key;
u32 mod_hdr_id; + + refcount_t refcnt; + struct completion res_ready; + int compl_result; };
#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
+ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); + + static struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow) + { + if (!flow || !refcount_inc_not_zero(&flow->refcnt)) + return ERR_PTR(-EINVAL); + return flow; + } + + static void mlx5e_flow_put(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) + { + if (refcount_dec_and_test(&flow->refcnt)) { + mlx5e_tc_del_flow(priv, flow); + kfree_rcu(flow, rcu_head); + } + } + + static void __flow_flag_set(struct mlx5e_tc_flow *flow, unsigned long flag) + { + /* Complete all memory stores before setting bit. */ + smp_mb__before_atomic(); + set_bit(flag, &flow->flags); + } + + #define flow_flag_set(flow, flag) __flow_flag_set(flow, MLX5E_TC_FLOW_FLAG_##flag) + + static bool __flow_flag_test_and_set(struct mlx5e_tc_flow *flow, + unsigned long flag) + { + /* test_and_set_bit() provides all necessary barriers */ + return test_and_set_bit(flag, &flow->flags); + } + + #define flow_flag_test_and_set(flow, flag) \ + __flow_flag_test_and_set(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + + static void __flow_flag_clear(struct mlx5e_tc_flow *flow, unsigned long flag) + { + /* Complete all memory stores before clearing bit. */ + smp_mb__before_atomic(); + clear_bit(flag, &flow->flags); + } + + #define flow_flag_clear(flow, flag) __flow_flag_clear(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + + static bool __flow_flag_test(struct mlx5e_tc_flow *flow, unsigned long flag) + { + bool ret = test_bit(flag, &flow->flags); + + /* Read fields of flow structure only after checking flags. */ + smp_mb__after_atomic(); + return ret; + } + + #define flow_flag_test(flow, flag) __flow_flag_test(flow, \ + MLX5E_TC_FLOW_FLAG_##flag) + + static bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow) + { + return flow_flag_test(flow, ESWITCH); + } + + static bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow) + { + return flow_flag_test(flow, OFFLOADED); + } + static inline u32 hash_mod_hdr_info(struct mod_hdr_key *key) { return jhash(key->actions, @@@ -199,15 -292,62 +292,62 @@@ static inline int cmp_mod_hdr_info(stru return memcmp(a->actions, b->actions, a->num_actions * MLX5_MH_ACT_SZ); }
+ static struct mod_hdr_tbl * + get_mod_hdr_table(struct mlx5e_priv *priv, int namespace) + { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + return namespace == MLX5_FLOW_NAMESPACE_FDB ? &esw->offloads.mod_hdr : + &priv->fs.tc.mod_hdr; + } + + static struct mlx5e_mod_hdr_entry * + mlx5e_mod_hdr_get(struct mod_hdr_tbl *tbl, struct mod_hdr_key *key, u32 hash_key) + { + struct mlx5e_mod_hdr_entry *mh, *found = NULL; + + hash_for_each_possible(tbl->hlist, mh, mod_hdr_hlist, hash_key) { + if (!cmp_mod_hdr_info(&mh->key, key)) { + refcount_inc(&mh->refcnt); + found = mh; + break; + } + } + + return found; + } + + static void mlx5e_mod_hdr_put(struct mlx5e_priv *priv, + struct mlx5e_mod_hdr_entry *mh, + int namespace) + { + struct mod_hdr_tbl *tbl = get_mod_hdr_table(priv, namespace); + + if (!refcount_dec_and_mutex_lock(&mh->refcnt, &tbl->lock)) + return; + hash_del(&mh->mod_hdr_hlist); + mutex_unlock(&tbl->lock); + + WARN_ON(!list_empty(&mh->flows)); + if (mh->compl_result > 0) + mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); + + kfree(mh); + } + + static int get_flow_name_space(struct mlx5e_tc_flow *flow) + { + return mlx5e_is_eswitch_flow(flow) ? + MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL; + } static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct mlx5e_tc_flow_parse_attr *parse_attr) { - struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; int num_actions, actions_size, namespace, err; struct mlx5e_mod_hdr_entry *mh; + struct mod_hdr_tbl *tbl; struct mod_hdr_key key; - bool found = false; u32 hash_key;
num_actions = parse_attr->num_mod_hdr_actions; @@@ -218,80 -358,82 +358,82 @@@
hash_key = hash_mod_hdr_info(&key);
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { - namespace = MLX5_FLOW_NAMESPACE_FDB; - hash_for_each_possible(esw->offloads.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } else { - namespace = MLX5_FLOW_NAMESPACE_KERNEL; - hash_for_each_possible(priv->fs.tc.mod_hdr_tbl, mh, - mod_hdr_hlist, hash_key) { - if (!cmp_mod_hdr_info(&mh->key, &key)) { - found = true; - break; - } - } - } + namespace = get_flow_name_space(flow); + tbl = get_mod_hdr_table(priv, namespace); + + mutex_lock(&tbl->lock); + mh = mlx5e_mod_hdr_get(tbl, &key, hash_key); + if (mh) { + mutex_unlock(&tbl->lock); + wait_for_completion(&mh->res_ready);
- if (found) + if (mh->compl_result < 0) { + err = -EREMOTEIO; + goto attach_header_err; + } goto attach_flow; + }
mh = kzalloc(sizeof(*mh) + actions_size, GFP_KERNEL); - if (!mh) + if (!mh) { + mutex_unlock(&tbl->lock); return -ENOMEM; + }
mh->key.actions = (void *)mh + sizeof(*mh); memcpy(mh->key.actions, key.actions, actions_size); mh->key.num_actions = num_actions; + spin_lock_init(&mh->flows_lock); INIT_LIST_HEAD(&mh->flows); + refcount_set(&mh->refcnt, 1); + init_completion(&mh->res_ready); + + hash_add(tbl->hlist, &mh->mod_hdr_hlist, hash_key); + mutex_unlock(&tbl->lock);
err = mlx5_modify_header_alloc(priv->mdev, namespace, mh->key.num_actions, mh->key.actions, &mh->mod_hdr_id); - if (err) - goto out_err; - - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) - hash_add(esw->offloads.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); - else - hash_add(priv->fs.tc.mod_hdr_tbl, &mh->mod_hdr_hlist, hash_key); + if (err) { + mh->compl_result = err; + goto alloc_header_err; + } + mh->compl_result = 1; + complete_all(&mh->res_ready);
attach_flow: + flow->mh = mh; + spin_lock(&mh->flows_lock); list_add(&flow->mod_hdr, &mh->flows); - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + spin_unlock(&mh->flows_lock); + if (mlx5e_is_eswitch_flow(flow)) flow->esw_attr->mod_hdr_id = mh->mod_hdr_id; else flow->nic_attr->mod_hdr_id = mh->mod_hdr_id;
return 0;
- out_err: - kfree(mh); + alloc_header_err: + complete_all(&mh->res_ready); + attach_header_err: + mlx5e_mod_hdr_put(priv, mh, namespace); return err; }
static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->mod_hdr.next; + /* flow wasn't fully initialized */ + if (!flow->mh) + return;
+ spin_lock(&flow->mh->flows_lock); list_del(&flow->mod_hdr); + spin_unlock(&flow->mh->flows_lock);
- if (list_empty(next)) { - struct mlx5e_mod_hdr_entry *mh; - - mh = list_entry(next, struct mlx5e_mod_hdr_entry, flows); - - mlx5_modify_header_dealloc(priv->mdev, mh->mod_hdr_id); - hash_del(&mh->mod_hdr_hlist); - kfree(mh); - } + mlx5e_mod_hdr_put(priv, flow->mh, get_flow_name_space(flow)); + flow->mh = NULL; }
static @@@ -555,13 -697,35 +697,35 @@@ static struct mlx5e_hairpin_entry *mlx5
hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe, hairpin_hlist, hash_key) { - if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) + if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) { + refcount_inc(&hpe->refcnt); return hpe; + } }
return NULL; }
+ static void mlx5e_hairpin_put(struct mlx5e_priv *priv, + struct mlx5e_hairpin_entry *hpe) + { + /* no more hairpin flows for us, release the hairpin pair */ + if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock)) + return; + hash_del(&hpe->hairpin_hlist); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + if (!IS_ERR_OR_NULL(hpe->hp)) { + netdev_dbg(priv->netdev, "del hairpin: peer %s\n", + dev_name(hpe->hp->pair->peer_mdev->device)); + + mlx5e_hairpin_destroy(hpe->hp); + } + + WARN_ON(!list_empty(&hpe->flows)); + kfree(hpe); + } + #define UNKNOWN_MATCH_PRIO 8
static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv, @@@ -627,17 -791,37 +791,37 @@@ static int mlx5e_hairpin_flow_add(struc extack); if (err) return err; + + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); hpe = mlx5e_hairpin_get(priv, peer_id, match_prio); - if (hpe) + if (hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + wait_for_completion(&hpe->res_ready); + + if (IS_ERR(hpe->hp)) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + }
hpe = kzalloc(sizeof(*hpe), GFP_KERNEL); - if (!hpe) + if (!hpe) { + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); return -ENOMEM; + }
+ spin_lock_init(&hpe->flows_lock); INIT_LIST_HEAD(&hpe->flows); + INIT_LIST_HEAD(&hpe->dead_peer_wait_list); hpe->peer_vhca_id = peer_id; hpe->prio = match_prio; + refcount_set(&hpe->refcnt, 1); + init_completion(&hpe->res_ready); + + hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, + hash_hairpin_info(peer_id, match_prio)); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
params.log_data_size = 15; params.log_data_size = min_t(u8, params.log_data_size, @@@ -659,9 -843,11 +843,11 @@@ params.num_channels = link_speed64;
hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex); + hpe->hp = hp; + complete_all(&hpe->res_ready); if (IS_ERR(hp)) { err = PTR_ERR(hp); - goto create_hairpin_err; + goto out_err; }
netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n", @@@ -669,46 -855,39 +855,39 @@@ dev_name(hp->pair->peer_mdev->device), hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
- hpe->hp = hp; - hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist, - hash_hairpin_info(peer_id, match_prio)); - attach_flow: if (hpe->hp->num_channels > 1) { - flow->flags |= MLX5E_TC_FLOW_HAIRPIN_RSS; + flow_flag_set(flow, HAIRPIN_RSS); flow->nic_attr->hairpin_ft = hpe->hp->ttc.ft.t; } else { flow->nic_attr->hairpin_tirn = hpe->hp->tirn; } + + flow->hpe = hpe; + spin_lock(&hpe->flows_lock); list_add(&flow->hairpin, &hpe->flows); + spin_unlock(&hpe->flows_lock);
return 0;
- create_hairpin_err: - kfree(hpe); + out_err: + mlx5e_hairpin_put(priv, hpe); return err; }
static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->hairpin.next; + /* flow wasn't fully initialized */ + if (!flow->hpe) + return;
+ spin_lock(&flow->hpe->flows_lock); list_del(&flow->hairpin); + spin_unlock(&flow->hpe->flows_lock);
- /* no more hairpin flows for us, release the hairpin pair */ - if (list_empty(next)) { - struct mlx5e_hairpin_entry *hpe; - - hpe = list_entry(next, struct mlx5e_hairpin_entry, flows); - - netdev_dbg(priv->netdev, "del hairpin: peer %s\n", - dev_name(hpe->hp->pair->peer_mdev->device)); - - mlx5e_hairpin_destroy(hpe->hp); - hash_del(&hpe->hairpin_hlist); - kfree(hpe); - } + mlx5e_hairpin_put(priv, flow->hpe); + flow->hpe = NULL; }
static int @@@ -727,18 -906,17 +906,17 @@@ mlx5e_tc_add_nic_flow(struct mlx5e_pri .flags = FLOW_ACT_NO_APPEND, }; struct mlx5_fc *counter = NULL; - bool table_created = false; int err, dest_ix = 0;
flow_context->flags |= FLOW_CONTEXT_HAS_TAG; flow_context->flow_tag = attr->flow_tag;
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) { + if (flow_flag_test(flow, HAIRPIN)) { err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack); - if (err) { - goto err_add_hairpin_flow; - } - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN_RSS) { + if (err) + return err; + + if (flow_flag_test(flow, HAIRPIN_RSS)) { dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[dest_ix].ft = attr->hairpin_ft; } else { @@@ -754,10 -932,9 +932,9 @@@
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_fc_create; - } + if (IS_ERR(counter)) + return PTR_ERR(counter); + dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[dest_ix].counter_id = mlx5_fc_id(counter); dest_ix++; @@@ -769,9 -946,10 +946,10 @@@ flow_act.modify_id = attr->mod_hdr_id; kfree(parse_attr->mod_hdr_actions); if (err) - goto err_create_mod_hdr_id; + return err; }
+ mutex_lock(&priv->fs.tc.t_lock); if (IS_ERR_OR_NULL(priv->fs.tc.t)) { int tc_grp_size, tc_tbl_size; u32 max_flow_counter; @@@ -791,15 -969,13 +969,13 @@@ MLX5E_TC_TABLE_NUM_GROUPS, MLX5E_TC_FT_LEVEL, 0); if (IS_ERR(priv->fs.tc.t)) { + mutex_unlock(&priv->fs.tc.t_lock); NL_SET_ERR_MSG_MOD(extack, "Failed to create tc offload table\n"); netdev_err(priv->netdev, "Failed to create tc offload table\n"); - err = PTR_ERR(priv->fs.tc.t); - goto err_create_ft; + return PTR_ERR(priv->fs.tc.t); } - - table_created = true; }
if (attr->match_level != MLX5_MATCH_NONE) @@@ -807,29 -983,12 +983,12 @@@
flow->rule[0] = mlx5_add_flow_rules(priv->fs.tc.t, &parse_attr->spec, &flow_act, dest, dest_ix); + mutex_unlock(&priv->fs.tc.t_lock);
- if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } + if (IS_ERR(flow->rule[0])) + return PTR_ERR(flow->rule[0]);
return 0; - - err_add_rule: - if (table_created) { - mlx5_destroy_flow_table(priv->fs.tc.t); - priv->fs.tc.t = NULL; - } - err_create_ft: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); - err_create_mod_hdr_id: - mlx5_fc_destroy(dev, counter); - err_fc_create: - if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) - mlx5e_hairpin_flow_del(priv, flow); - err_add_hairpin_flow: - return err; }
static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv, @@@ -839,18 -998,21 +998,21 @@@ struct mlx5_fc *counter = NULL;
counter = attr->counter; - mlx5_del_flow_rules(flow->rule[0]); + if (!IS_ERR_OR_NULL(flow->rule[0])) + mlx5_del_flow_rules(flow->rule[0]); mlx5_fc_destroy(priv->mdev, counter);
- if (!mlx5e_tc_num_filters(priv, MLX5E_TC_NIC_OFFLOAD) && priv->fs.tc.t) { + mutex_lock(&priv->fs.tc.t_lock); + if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) && priv->fs.tc.t) { mlx5_destroy_flow_table(priv->fs.tc.t); priv->fs.tc.t = NULL; } + mutex_unlock(&priv->fs.tc.t_lock);
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) mlx5e_detach_mod_hdr(priv, flow);
- if (flow->flags & MLX5E_TC_FLOW_HAIRPIN) + if (flow_flag_test(flow, HAIRPIN)) mlx5e_hairpin_flow_del(priv, flow); }
@@@ -885,7 -1047,6 +1047,6 @@@ mlx5e_tc_offload_fdb_rules(struct mlx5_ } }
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED; return rule; }
@@@ -894,7 -1055,7 +1055,7 @@@ mlx5e_tc_unoffload_fdb_rules(struct mlx struct mlx5e_tc_flow *flow, struct mlx5_esw_flow_attr *attr) { - flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED; + flow_flag_clear(flow, OFFLOADED);
if (attr->split_count) mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr); @@@ -917,7 -1078,7 +1078,7 @@@ mlx5e_tc_offload_to_slow_path(struct ml
rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr); if (!IS_ERR(rule)) - flow->flags |= MLX5E_TC_FLOW_SLOW; + flow_flag_set(flow, SLOW);
return rule; } @@@ -932,7 -1093,26 +1093,26 @@@ mlx5e_tc_unoffload_from_slow_path(struc slow_attr->split_count = 0; slow_attr->dest_chain = FDB_SLOW_PATH_CHAIN; mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr); - flow->flags &= ~MLX5E_TC_FLOW_SLOW; + flow_flag_clear(flow, SLOW); + } + + /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ + static void unready_flow_add(struct mlx5e_tc_flow *flow, + struct list_head *unready_flows) + { + flow_flag_set(flow, NOT_READY); + list_add_tail(&flow->unready, unready_flows); + } + + /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this + * function. + */ + static void unready_flow_del(struct mlx5e_tc_flow *flow) + { + list_del(&flow->unready); + flow_flag_clear(flow, NOT_READY); }
static void add_unready_flow(struct mlx5e_tc_flow *flow) @@@ -945,14 -1125,24 +1125,24 @@@ rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); uplink_priv = &rpriv->uplink_priv;
- flow->flags |= MLX5E_TC_FLOW_NOT_READY; - list_add_tail(&flow->unready, &uplink_priv->unready_flows); + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_add(flow, &uplink_priv->unready_flows); + mutex_unlock(&uplink_priv->unready_flows_lock); }
static void remove_unready_flow(struct mlx5e_tc_flow *flow) { - list_del(&flow->unready); - flow->flags &= ~MLX5E_TC_FLOW_NOT_READY; + struct mlx5_rep_uplink_priv *uplink_priv; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + + esw = flow->priv->mdev->priv.eswitch; + rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); + uplink_priv = &rpriv->uplink_priv; + + mutex_lock(&uplink_priv->unready_flows_lock); + unready_flow_del(flow); + mutex_unlock(&uplink_priv->unready_flows_lock); }
static int @@@ -980,14 -1170,12 +1170,12 @@@ mlx5e_tc_add_fdb_flow(struct mlx5e_pri
if (attr->chain > max_chain) { NL_SET_ERR_MSG(extack, "Requested chain is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; }
if (attr->prio > max_prio) { NL_SET_ERR_MSG(extack, "Requested priority is out of supported range"); - err = -EOPNOTSUPP; - goto err_max_prio_chain; + return -EOPNOTSUPP; }
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { @@@ -1002,7 -1190,7 +1190,7 @@@ err = mlx5e_attach_encap(priv, flow, out_dev, out_index, extack, &encap_dev, &encap_valid); if (err) - goto err_attach_encap; + return err;
out_priv = netdev_priv(encap_dev); rpriv = out_priv->ppriv; @@@ -1012,21 -1200,19 +1200,19 @@@
err = mlx5_eswitch_add_vlan_action(esw, attr); if (err) - goto err_add_vlan; + return err;
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) { err = mlx5e_attach_mod_hdr(priv, flow, parse_attr); kfree(parse_attr->mod_hdr_actions); if (err) - goto err_mod_hdr; + return err; }
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(attr->counter_dev, true); - if (IS_ERR(counter)) { - err = PTR_ERR(counter); - goto err_create_counter; - } + if (IS_ERR(counter)) + return PTR_ERR(counter);
attr->counter = counter; } @@@ -1044,27 -1230,12 +1230,12 @@@ flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr); }
- if (IS_ERR(flow->rule[0])) { - err = PTR_ERR(flow->rule[0]); - goto err_add_rule; - } + if (IS_ERR(flow->rule[0])) + return PTR_ERR(flow->rule[0]); + else + flow_flag_set(flow, OFFLOADED);
return 0; - - err_add_rule: - mlx5_fc_destroy(attr->counter_dev, counter); - err_create_counter: - if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - mlx5e_detach_mod_hdr(priv, flow); - err_mod_hdr: - mlx5_eswitch_del_vlan_action(esw, attr); - err_add_vlan: - for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) - if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) - mlx5e_detach_encap(priv, flow, out_index); - err_attach_encap: - err_max_prio_chain: - return err; }
static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow) @@@ -1088,14 -1259,14 +1259,14 @@@ static void mlx5e_tc_del_fdb_flow(struc struct mlx5_esw_flow_attr slow_attr; int out_index;
- if (flow->flags & MLX5E_TC_FLOW_NOT_READY) { + if (flow_flag_test(flow, NOT_READY)) { remove_unready_flow(flow); kvfree(attr->parse_attr); return; }
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { - if (flow->flags & MLX5E_TC_FLOW_SLOW) + if (mlx5e_is_offloaded_flow(flow)) { + if (flow_flag_test(flow, SLOW)) mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); else mlx5e_tc_unoffload_fdb_rules(esw, flow, attr); @@@ -1123,9 -1294,9 +1294,9 @@@ void mlx5e_tc_encap_flows_add(struct ml { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr, *esw_attr; + struct encap_flow_item *efi, *tmp; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err;
@@@ -1142,11 -1313,14 +1313,14 @@@ e->flags |= MLX5_ENCAP_ENTRY_VALID; mlx5e_rep_queue_neigh_stats_work(priv);
- list_for_each_entry(efi, &e->flows, list) { + list_for_each_entry_safe(efi, tmp, &e->flows, list) { bool all_flow_encaps_valid = true; int i;
flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + esw_attr = flow->esw_attr; spec = &esw_attr->parse_attr->spec;
@@@ -1166,19 -1340,23 +1340,23 @@@ } /* Do not offload flows with unresolved neighbors */ if (!all_flow_encaps_valid) - continue; + goto loop_cont; /* update from slow path rule to encap rule */ rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, esw_attr); if (IS_ERR(rule)) { err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n", err); - continue; + goto loop_cont; }
mlx5e_tc_unoffload_from_slow_path(esw, flow, &slow_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when slow path rule removed */ flow->rule[0] = rule; + /* was unset when slow path rule removed */ + flow_flag_set(flow, OFFLOADED); + + loop_cont: + mlx5e_flow_put(priv, flow); } }
@@@ -1187,14 -1365,17 +1365,17 @@@ void mlx5e_tc_encap_flows_del(struct ml { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr slow_attr; + struct encap_flow_item *efi, *tmp; struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; - struct encap_flow_item *efi; struct mlx5e_tc_flow *flow; int err;
- list_for_each_entry(efi, &e->flows, list) { + list_for_each_entry_safe(efi, tmp, &e->flows, list) { flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + spec = &flow->esw_attr->parse_attr->spec;
/* update from encap rule to slow path rule */ @@@ -1206,12 -1387,16 +1387,16 @@@ err = PTR_ERR(rule); mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n", err); - continue; + goto loop_cont; }
mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->esw_attr); - flow->flags |= MLX5E_TC_FLOW_OFFLOADED; /* was unset when fast path rule removed */ flow->rule[0] = rule; + /* was unset when fast path rule removed */ + flow_flag_set(flow, OFFLOADED); + + loop_cont: + mlx5e_flow_put(priv, flow); }
/* we know that the encap is valid */ @@@ -1221,7 -1406,7 +1406,7 @@@
static struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) return flow->esw_attr->counter; else return flow->nic_attr->counter; @@@ -1248,21 -1433,32 +1433,32 @@@ void mlx5e_tc_update_neigh_used_value(s return;
list_for_each_entry(e, &nhe->encap_list, encap_list) { - struct encap_flow_item *efi; - if (!(e->flags & MLX5_ENCAP_ENTRY_VALID)) + struct encap_flow_item *efi, *tmp; + + if (!(e->flags & MLX5_ENCAP_ENTRY_VALID) || + !mlx5e_encap_take(e)) continue; - list_for_each_entry(efi, &e->flows, list) { + + list_for_each_entry_safe(efi, tmp, &e->flows, list) { flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]); - if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (IS_ERR(mlx5e_flow_get(flow))) + continue; + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); lastuse = mlx5_fc_query_lastuse(counter); if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) { + mlx5e_flow_put(netdev_priv(e->out_dev), flow); neigh_used = true; break; } } + + mlx5e_flow_put(netdev_priv(e->out_dev), flow); } + + mlx5e_encap_put(netdev_priv(e->out_dev), e); if (neigh_used) break; } @@@ -1282,40 -1478,66 +1478,66 @@@ } }
- static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow, int out_index) + static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { - struct list_head *next = flow->encaps[out_index].list.next; + WARN_ON(!list_empty(&e->flows)); + mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
- list_del(&flow->encaps[out_index].list); - if (list_empty(next)) { - struct mlx5e_encap_entry *e; + if (e->flags & MLX5_ENCAP_ENTRY_VALID) + mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id);
- e = list_entry(next, struct mlx5e_encap_entry, flows); - mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e); + kfree(e->encap_header); + kfree(e); + }
- if (e->flags & MLX5_ENCAP_ENTRY_VALID) - mlx5_packet_reformat_dealloc(priv->mdev, e->encap_id); + void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) + { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
- hash_del_rcu(&e->encap_hlist); - kfree(e->encap_header); - kfree(e); + if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock)) + return; + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); + } + + static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, int out_index) + { + struct mlx5e_encap_entry *e = flow->encaps[out_index].e; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + /* flow wasn't fully initialized */ + if (!e) + return; + + mutex_lock(&esw->offloads.encap_tbl_lock); + list_del(&flow->encaps[out_index].list); + flow->encaps[out_index].e = NULL; + if (!refcount_dec_and_test(&e->refcnt)) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + return; } + hash_del_rcu(&e->encap_hlist); + mutex_unlock(&esw->offloads.encap_tbl_lock); + + mlx5e_encap_dealloc(priv, e); }
static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
- if (!(flow->flags & MLX5E_TC_FLOW_ESWITCH) || - !(flow->flags & MLX5E_TC_FLOW_DUP)) + if (!flow_flag_test(flow, ESWITCH) || + !flow_flag_test(flow, DUP)) return;
mutex_lock(&esw->offloads.peer_mutex); list_del(&flow->peer); mutex_unlock(&esw->offloads.peer_mutex);
- flow->flags &= ~MLX5E_TC_FLOW_DUP; + flow_flag_clear(flow, DUP);
mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow); kvfree(flow->peer_flow); @@@ -1339,7 -1561,7 +1561,7 @@@ static void mlx5e_tc_del_fdb_peer_flow( static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { - if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (mlx5e_is_eswitch_flow(flow)) { mlx5e_tc_del_fdb_peer_flow(flow); mlx5e_tc_del_fdb_flow(priv, flow); } else { @@@ -1480,7 -1702,7 +1702,7 @@@ static int __parse_cls_flower(struct ml struct mlx5_flow_spec *spec, struct flow_cls_offload *f, struct net_device *filter_dev, - u8 *match_level, u8 *tunnel_match_level) + u8 *inner_match_level, u8 *outer_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@@ -1495,9 -1717,8 +1717,9 @@@ struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0; + u8 *match_level;
- *match_level = MLX5_MATCH_NONE; + match_level = outer_match_level;
if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_META) | @@@ -1525,14 -1746,12 +1747,14 @@@ }
if (mlx5e_get_tc_tun(filter_dev)) { - if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, + outer_match_level)) return -EOPNOTSUPP;
- /* In decap flow, header pointers should point to the inner + /* At this point, header pointers should point to the inner * headers, outer header were already set by parse_tunnel_attr */ + match_level = inner_match_level; headers_c = get_match_headers_criteria(MLX5_FLOW_CONTEXT_ACTION_DECAP, spec); headers_v = get_match_headers_value(MLX5_FLOW_CONTEXT_ACTION_DECAP, @@@ -1834,41 -2053,37 +2056,43 @@@ static int parse_cls_flower(struct mlx5 struct flow_cls_offload *f, struct net_device *filter_dev) { + u8 inner_match_level, outer_match_level, non_tunnel_match_level; struct netlink_ext_ack *extack = f->common.extack; struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; - u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; + bool is_eswitch_flow; int err;
- err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level); + inner_match_level = MLX5_MATCH_NONE; + outer_match_level = MLX5_MATCH_NONE; + + err = __parse_cls_flower(priv, spec, f, filter_dev, &inner_match_level, + &outer_match_level); + non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ? + outer_match_level : inner_match_level;
- if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { + is_eswitch_flow = mlx5e_is_eswitch_flow(flow); + if (!err && is_eswitch_flow) { rep = rpriv->rep; if (rep->vport != MLX5_VPORT_UPLINK && (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE && - esw->offloads.inline_mode < match_level)) { + esw->offloads.inline_mode < non_tunnel_match_level)) { NL_SET_ERR_MSG_MOD(extack, "Flow is not offloaded due to min inline setting"); netdev_warn(priv->netdev, "Flow is not offloaded due to min inline setting, required %d actual %d\n", - match_level, esw->offloads.inline_mode); + non_tunnel_match_level, esw->offloads.inline_mode); return -EOPNOTSUPP; } }
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { + if (is_eswitch_flow) { - flow->esw_attr->match_level = match_level; - flow->esw_attr->tunnel_match_level = tunnel_match_level; + flow->esw_attr->inner_match_level = inner_match_level; + flow->esw_attr->outer_match_level = outer_match_level; } else { - flow->nic_attr->match_level = match_level; + flow->nic_attr->match_level = non_tunnel_match_level; }
return err; @@@ -2385,14 -2600,15 +2609,15 @@@ static bool actions_match_supported(str { u32 actions;
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (mlx5e_is_eswitch_flow(flow)) actions = flow->esw_attr->action; else actions = flow->nic_attr->action;
- if (flow->flags & MLX5E_TC_FLOW_EGRESS && + if (flow_flag_test(flow, EGRESS) && !((actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) || - (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP))) + (actions & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) || + (actions & MLX5_FLOW_CONTEXT_ACTION_DROP))) return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) @@@ -2542,7 -2758,7 +2767,7 @@@ static int parse_tc_nic_actions(struct if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { parse_attr->mirred_ifindex[0] = peer_dev->ifindex; - flow->flags |= MLX5E_TC_FLOW_HAIRPIN; + flow_flag_set(flow, HAIRPIN); action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; } else { @@@ -2629,6 -2845,31 +2854,31 @@@ static bool is_merged_eswitch_dev(struc
+ bool mlx5e_encap_take(struct mlx5e_encap_entry *e) + { + return refcount_inc_not_zero(&e->refcnt); + } + + static struct mlx5e_encap_entry * + mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, + uintptr_t hash_key) + { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_encap_entry *e; + struct encap_key e_key; + + hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, + encap_hlist, hash_key) { + e_key.ip_tun_key = &e->tun_info->key; + e_key.tc_tunnel = e->tunnel; + if (!cmp_encap_info(&e_key, key) && + mlx5e_encap_take(e)) + return e; + } + + return NULL; + } + static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, struct net_device *mirred_dev, @@@ -2641,11 -2882,10 +2891,10 @@@ struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_tc_flow_parse_attr *parse_attr; const struct ip_tunnel_info *tun_info; - struct encap_key key, e_key; + struct encap_key key; struct mlx5e_encap_entry *e; unsigned short family; uintptr_t hash_key; - bool found = false; int err = 0;
parse_attr = attr->parse_attr; @@@ -2660,42 -2900,59 +2909,59 @@@
hash_key = hash_encap_info(&key);
- hash_for_each_possible_rcu(esw->offloads.encap_tbl, e, - encap_hlist, hash_key) { - e_key.ip_tun_key = &e->tun_info->key; - e_key.tc_tunnel = e->tunnel; - if (!cmp_encap_info(&e_key, &key)) { - found = true; - break; - } - } + mutex_lock(&esw->offloads.encap_tbl_lock); + e = mlx5e_encap_get(priv, &key, hash_key);
/* must verify if encap is valid or not */ - if (found) + if (e) { + mutex_unlock(&esw->offloads.encap_tbl_lock); + wait_for_completion(&e->res_ready); + + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + if (e->compl_result) { + err = -EREMOTEIO; + goto out_err; + } goto attach_flow; + }
e = kzalloc(sizeof(*e), GFP_KERNEL); - if (!e) - return -ENOMEM; + if (!e) { + err = -ENOMEM; + goto out_err; + } + + refcount_set(&e->refcnt, 1); + init_completion(&e->res_ready);
e->tun_info = tun_info; err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack); - if (err) + if (err) { + kfree(e); + e = NULL; goto out_err; + }
INIT_LIST_HEAD(&e->flows); + hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + mutex_unlock(&esw->offloads.encap_tbl_lock);
if (family == AF_INET) err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e); else if (family == AF_INET6) err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
- if (err) + /* Protect against concurrent neigh update. */ + mutex_lock(&esw->offloads.encap_tbl_lock); + complete_all(&e->res_ready); + if (err) { + e->compl_result = err; goto out_err; - - hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key); + }
attach_flow: + flow->encaps[out_index].e = e; list_add(&flow->encaps[out_index].list, &e->flows); flow->encaps[out_index].index = out_index; *encap_dev = e->out_dev; @@@ -2706,11 -2963,14 +2972,14 @@@ } else { *encap_valid = false; } + mutex_unlock(&esw->offloads.encap_tbl_lock);
return err;
out_err: - kfree(e); + mutex_unlock(&esw->offloads.encap_tbl_lock); + if (e) + mlx5e_encap_put(priv, e); return err; }
@@@ -2890,12 -3150,16 +3159,16 @@@ static int parse_tc_fdb_actions(struct if (netdev_port_same_parent_id(priv->netdev, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); - struct net_device *uplink_upper = netdev_master_upper_dev_get(uplink_dev); + struct net_device *uplink_upper;
+ rcu_read_lock(); + uplink_upper = + netdev_master_upper_dev_get_rcu(uplink_dev); if (uplink_upper && netif_is_lag_master(uplink_upper) && uplink_upper == out_dev) out_dev = uplink_dev; + rcu_read_unlock();
if (is_vlan_dev(out_dev)) { err = add_vlan_push_action(priv, attr, @@@ -3066,19 -3330,19 +3339,19 @@@ return 0; }
- static void get_flags(int flags, u16 *flow_flags) + static void get_flags(int flags, unsigned long *flow_flags) { - u16 __flow_flags = 0; + unsigned long __flow_flags = 0;
- if (flags & MLX5E_TC_INGRESS) - __flow_flags |= MLX5E_TC_FLOW_INGRESS; - if (flags & MLX5E_TC_EGRESS) - __flow_flags |= MLX5E_TC_FLOW_EGRESS; + if (flags & MLX5_TC_FLAG(INGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS); + if (flags & MLX5_TC_FLAG(EGRESS)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
- if (flags & MLX5E_TC_ESW_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_ESWITCH; - if (flags & MLX5E_TC_NIC_OFFLOAD) - __flow_flags |= MLX5E_TC_FLOW_NIC; + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); + if (flags & MLX5_TC_FLAG(NIC_OFFLOAD)) + __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
*flow_flags = __flow_flags; } @@@ -3090,12 -3354,13 +3363,13 @@@ static const struct rhashtable_params t .automatic_shrinking = true, };
- static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, int flags) + static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv, + unsigned long flags) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_rep_priv *uplink_rpriv;
- if (flags & MLX5E_TC_ESW_OFFLOAD) { + if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) { uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH); return &uplink_rpriv->uplink_priv.tc_ht; } else /* NIC offload */ @@@ -3106,7 -3371,7 +3380,7 @@@ static bool is_peer_flow_needed(struct { struct mlx5_esw_flow_attr *attr = flow->esw_attr; bool is_rep_ingress = attr->in_rep->vport != MLX5_VPORT_UPLINK && - flow->flags & MLX5E_TC_FLOW_INGRESS; + flow_flag_test(flow, INGRESS); bool act_is_encap = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT); bool esw_paired = mlx5_devcom_is_paired(attr->in_mdev->priv.devcom, @@@ -3125,13 -3390,13 +3399,13 @@@
static int mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, - struct flow_cls_offload *f, u16 flow_flags, + struct flow_cls_offload *f, unsigned long flow_flags, struct mlx5e_tc_flow_parse_attr **__parse_attr, struct mlx5e_tc_flow **__flow) { struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; - int err; + int out_index, err;
flow = kzalloc(sizeof(*flow) + attr_size, GFP_KERNEL); parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL); @@@ -3143,6 -3408,11 +3417,11 @@@ flow->cookie = f->cookie; flow->flags = flow_flags; flow->priv = priv; + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) + INIT_LIST_HEAD(&flow->encaps[out_index].list); + INIT_LIST_HEAD(&flow->mod_hdr); + INIT_LIST_HEAD(&flow->hairpin); + refcount_set(&flow->refcnt, 1);
*__flow = flow; *__parse_attr = parse_attr; @@@ -3182,7 -3452,7 +3461,7 @@@ mlx5e_flow_esw_attr_init(struct mlx5_es static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, struct mlx5_core_dev *in_mdev) @@@ -3193,7 -3463,7 +3472,7 @@@ struct mlx5e_tc_flow *flow; int attr_size, err;
- flow_flags |= MLX5E_TC_FLOW_ESWITCH; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH); attr_size = sizeof(struct mlx5_esw_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@@ -3225,15 -3495,14 +3504,14 @@@ return flow;
err_free: - kfree(flow); - kvfree(parse_attr); + mlx5e_flow_put(priv, flow); out: return ERR_PTR(err); }
static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f, struct mlx5e_tc_flow *flow, - u16 flow_flags) + unsigned long flow_flags) { struct mlx5e_priv *priv = flow->priv, *peer_priv; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw; @@@ -3271,7 -3540,7 +3549,7 @@@ }
flow->peer_flow = peer_flow; - flow->flags |= MLX5E_TC_FLOW_DUP; + flow_flag_set(flow, DUP); mutex_lock(&esw->offloads.peer_mutex); list_add_tail(&flow->peer, &esw->offloads.peer_flows); mutex_unlock(&esw->offloads.peer_mutex); @@@ -3284,7 -3553,7 +3562,7 @@@ out static int mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@@ -3318,7 -3587,7 +3596,7 @@@ out static int mlx5e_add_nic_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - u16 flow_flags, + unsigned long flow_flags, struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { @@@ -3332,7 -3601,7 +3610,7 @@@ if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common)) return -EOPNOTSUPP;
- flow_flags |= MLX5E_TC_FLOW_NIC; + flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC); attr_size = sizeof(struct mlx5_nic_flow_attr); err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags, &parse_attr, &flow); @@@ -3353,14 -3622,14 +3631,14 @@@ if (err) goto err_free;
- flow->flags |= MLX5E_TC_FLOW_OFFLOADED; + flow_flag_set(flow, OFFLOADED); kvfree(parse_attr); *__flow = flow;
return 0;
err_free: - kfree(flow); + mlx5e_flow_put(priv, flow); kvfree(parse_attr); out: return err; @@@ -3369,12 -3638,12 +3647,12 @@@ static int mlx5e_tc_add_flow(struct mlx5e_priv *priv, struct flow_cls_offload *f, - int flags, + unsigned long flags, struct net_device *filter_dev, struct mlx5e_tc_flow **flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - u16 flow_flags; + unsigned long flow_flags; int err;
get_flags(flags, &flow_flags); @@@ -3393,14 -3662,16 +3671,16 @@@ }
int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct netlink_ext_ack *extack = f->common.extack; struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; int err = 0;
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); + rcu_read_lock(); + flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params); + rcu_read_unlock(); if (flow) { NL_SET_ERR_MSG_MOD(extack, "flow cookie already exists, ignoring"); @@@ -3415,51 -3686,62 +3695,62 @@@ if (err) goto out;
- err = rhashtable_insert_fast(tc_ht, &flow->node, tc_ht_params); + err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params); if (err) goto err_free;
return 0;
err_free: - mlx5e_tc_del_flow(priv, flow); - kfree(flow); + mlx5e_flow_put(priv, flow); out: return err; }
- #define DIRECTION_MASK (MLX5E_TC_INGRESS | MLX5E_TC_EGRESS) - #define FLOW_DIRECTION_MASK (MLX5E_TC_FLOW_INGRESS | MLX5E_TC_FLOW_EGRESS) - static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags) { - if ((flow->flags & FLOW_DIRECTION_MASK) == (flags & DIRECTION_MASK)) - return true; + bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS)); + bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
- return false; + return flow_flag_test(flow, INGRESS) == dir_ingress && + flow_flag_test(flow, EGRESS) == dir_egress; }
int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags); struct mlx5e_tc_flow *flow; + int err;
+ rcu_read_lock(); flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + if (!flow || !same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + }
+ /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag + * set. + */ + if (flow_flag_test_and_set(flow, DELETED)) { + err = -EINVAL; + goto errout; + } rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params); + rcu_read_unlock();
- mlx5e_tc_del_flow(priv, flow); - - kfree(flow); + mlx5e_flow_put(priv, flow);
return 0; + + errout: + rcu_read_unlock(); + return err; }
int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv, - struct flow_cls_offload *f, int flags) + struct flow_cls_offload *f, unsigned long flags) { struct mlx5_devcom *devcom = priv->mdev->priv.devcom; struct rhashtable *tc_ht = get_tc_ht(priv, flags); @@@ -3469,15 -3751,24 +3760,24 @@@ u64 lastuse = 0; u64 packets = 0; u64 bytes = 0; + int err = 0;
- flow = rhashtable_lookup_fast(tc_ht, &f->cookie, tc_ht_params); - if (!flow || !same_flow_direction(flow, flags)) - return -EINVAL; + rcu_read_lock(); + flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie, + tc_ht_params)); + rcu_read_unlock(); + if (IS_ERR(flow)) + return PTR_ERR(flow);
- if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) { + if (!same_flow_direction(flow, flags)) { + err = -EINVAL; + goto errout; + } + + if (mlx5e_is_offloaded_flow(flow)) { counter = mlx5e_tc_get_counter(flow); if (!counter) - return 0; + goto errout;
mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); } @@@ -3489,8 -3780,8 +3789,8 @@@ if (!peer_esw) goto out;
- if ((flow->flags & MLX5E_TC_FLOW_DUP) && - (flow->peer_flow->flags & MLX5E_TC_FLOW_OFFLOADED)) { + if (flow_flag_test(flow, DUP) && + flow_flag_test(flow->peer_flow, OFFLOADED)) { u64 bytes2; u64 packets2; u64 lastuse2; @@@ -3509,15 -3800,117 +3809,117 @@@ no_peer_counter mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); out: flow_stats_update(&f->stats, bytes, packets, lastuse); + errout: + mlx5e_flow_put(priv, flow); + return err; + } + + static int apply_police_params(struct mlx5e_priv *priv, u32 rate, + struct netlink_ext_ack *extack) + { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch *esw; + u16 vport_num; + u32 rate_mbps; + int err; + + esw = priv->mdev->priv.eswitch; + /* rate is given in bytes/sec. + * First convert to bits/sec and then round to the nearest mbit/secs. + * mbit means million bits. + * Moreover, if rate is non zero we choose to configure to a minimum of + * 1 mbit/sec. + */ + rate_mbps = rate ? max_t(u32, (rate * 8 + 500000) / 1000000, 1) : 0; + vport_num = rpriv->rep->vport; + + err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); + if (err) + NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware"); + + return err; + } + + static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, + struct netlink_ext_ack *extack) + { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + const struct flow_action_entry *act; + int err; + int i; + + if (!flow_action_has_entries(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall called with no action"); + return -EINVAL; + } + + if (!flow_offload_has_one_action(flow_action)) { + NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action"); + return -EOPNOTSUPP; + } + + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_POLICE: + err = apply_police_params(priv, act->police.rate_bytes_ps, extack); + if (err) + return err; + + rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + break; + default: + NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); + return -EOPNOTSUPP; + } + }
return 0; }
+ int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) + { + struct netlink_ext_ack *extack = ma->common.extack; + int prio = TC_H_MAJ(ma->common.prio) >> 16; + + if (prio != 1) { + NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported"); + return -EINVAL; + } + + return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack); + } + + int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) + { + struct netlink_ext_ack *extack = ma->common.extack; + + return apply_police_params(priv, 0, extack); + } + + void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, + struct tc_cls_matchall_offload *ma) + { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct rtnl_link_stats64 cur_stats; + u64 dbytes; + u64 dpkts; + + cur_stats = priv->stats.vf_vport; + dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; + dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; + rpriv->prev_vf_vport_stats = cur_stats; + flow_stats_update(&ma->stats, dpkts, dbytes, jiffies); + } + static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv) { struct mlx5_core_dev *peer_mdev = peer_priv->mdev; - struct mlx5e_hairpin_entry *hpe; + struct mlx5e_hairpin_entry *hpe, *tmp; + LIST_HEAD(init_wait_list); u16 peer_vhca_id; int bkt;
@@@ -3526,9 -3919,18 +3928,18 @@@
peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
- hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) { - if (hpe->peer_vhca_id == peer_vhca_id) + mutex_lock(&priv->fs.tc.hairpin_tbl_lock); + hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist) + if (refcount_inc_not_zero(&hpe->refcnt)) + list_add(&hpe->dead_peer_wait_list, &init_wait_list); + mutex_unlock(&priv->fs.tc.hairpin_tbl_lock); + + list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) { + wait_for_completion(&hpe->res_ready); + if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id) hpe->hp->pair->peer_gone = true; + + mlx5e_hairpin_put(priv, hpe); } }
@@@ -3564,7 -3966,10 +3975,10 @@@ int mlx5e_tc_nic_init(struct mlx5e_pri struct mlx5e_tc_table *tc = &priv->fs.tc; int err;
- hash_init(tc->mod_hdr_tbl); + mutex_init(&tc->t_lock); + mutex_init(&tc->mod_hdr.lock); + hash_init(tc->mod_hdr.hlist); + mutex_init(&tc->hairpin_tbl_lock); hash_init(tc->hairpin_tbl);
err = rhashtable_init(&tc->ht, &tc_ht_params); @@@ -3596,12 -4001,16 +4010,16 @@@ void mlx5e_tc_nic_cleanup(struct mlx5e_ if (tc->netdevice_nb.notifier_call) unregister_netdevice_notifier(&tc->netdevice_nb);
+ mutex_destroy(&tc->mod_hdr.lock); + mutex_destroy(&tc->hairpin_tbl_lock); + rhashtable_destroy(&tc->ht);
if (!IS_ERR_OR_NULL(tc->t)) { mlx5_destroy_flow_table(tc->t); tc->t = NULL; } + mutex_destroy(&tc->t_lock); }
int mlx5e_tc_esw_init(struct rhashtable *tc_ht) @@@ -3614,7 -4023,7 +4032,7 @@@ void mlx5e_tc_esw_cleanup(struct rhasht rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL); }
- int mlx5e_tc_num_filters(struct mlx5e_priv *priv, int flags) + int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags) { struct rhashtable *tc_ht = get_tc_ht(priv, flags);
@@@ -3636,10 -4045,10 +4054,10 @@@ void mlx5e_tc_reoffload_flows_work(stru reoffload_flows_work); struct mlx5e_tc_flow *flow, *tmp;
- rtnl_lock(); + mutex_lock(&rpriv->unready_flows_lock); list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) { if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL)) - remove_unready_flow(flow); + unready_flow_del(flow); } - rtnl_unlock(); + mutex_unlock(&rpriv->unready_flows_lock); } diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 81e03e493a01,f0692407f617..30aae76b6a1d --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@@ -1413,7 -1413,7 +1413,7 @@@ out
static bool element_type_supported(struct mlx5_eswitch *esw, int type) { - struct mlx5_core_dev *dev = esw->dev = esw->dev; + const struct mlx5_core_dev *dev = esw->dev;
switch (type) { case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: @@@ -1585,6 -1585,22 +1585,22 @@@ static int esw_vport_qos_config(struct return 0; }
+ int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps) + { + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + + return mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW); + } + static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) { ((u8 *)node_guid)[7] = mac[0]; @@@ -1983,8 -1999,11 +1999,11 @@@ int mlx5_eswitch_init(struct mlx5_core_ if (err) goto abort;
+ mutex_init(&esw->offloads.encap_tbl_lock); hash_init(esw->offloads.encap_tbl); - hash_init(esw->offloads.mod_hdr_tbl); + mutex_init(&esw->offloads.mod_hdr.lock); + hash_init(esw->offloads.mod_hdr.hlist); + atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock);
mlx5_esw_for_all_vports(esw, i, vport) { @@@ -2020,6 -2039,8 +2039,8 @@@ void mlx5_eswitch_cleanup(struct mlx5_e esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); esw_offloads_cleanup_reps(esw); + mutex_destroy(&esw->offloads.mod_hdr.lock); + mutex_destroy(&esw->offloads.encap_tbl_lock); kfree(esw->vports); kfree(esw); } @@@ -2137,23 -2158,19 +2158,19 @@@ int __mlx5_eswitch_set_vport_vlan(struc if (vlan > 4095 || qos > 7) return -EINVAL;
- mutex_lock(&esw->state_lock); - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) - goto unlock; + return err;
evport->info.vlan = vlan; evport->info.qos = qos; if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) - goto unlock; + return err; err = esw_vport_egress_config(esw, evport); }
- unlock: - mutex_unlock(&esw->state_lock); return err; }
@@@ -2161,11 -2178,16 +2178,16 @@@ int mlx5_eswitch_set_vport_vlan(struct u16 vport, u16 vlan, u8 qos) { u8 set_flags = 0; + int err;
if (vlan || qos) set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT;
- return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + mutex_lock(&esw->state_lock); + err = __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); + mutex_unlock(&esw->state_lock); + + return err; }
int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 5ec7c6dfb88f,86db0e9776da..aba9e7a6ad3c --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@@ -35,6 -35,7 +35,7 @@@
#include <linux/if_ether.h> #include <linux/if_link.h> + #include <linux/atomic.h> #include <net/devlink.h> #include <linux/mlx5/device.h> #include <linux/mlx5/eswitch.h> @@@ -180,13 -181,14 +181,14 @@@ struct mlx5_esw_offload struct mlx5_eswitch_rep *vport_reps; struct list_head peer_flows; struct mutex peer_mutex; + struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); - DECLARE_HASHTABLE(mod_hdr_tbl, 8); + struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ const struct mlx5_eswitch_rep_ops *rep_ops[NUM_REP_TYPES]; u8 inline_mode; - u64 num_flows; + atomic64_t num_flows; enum devlink_eswitch_encap_mode encap; };
@@@ -260,6 -262,8 +262,8 @@@ void esw_vport_disable_ingress_acl(stru struct mlx5_vport *vport); void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, + u32 rate_mbps);
/* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); @@@ -386,8 -390,8 +390,8 @@@ struct mlx5_esw_flow_attr struct mlx5_termtbl_handle *termtbl; } dests[MLX5_MAX_FLOW_FWD_VPORTS]; u32 mod_hdr_id; - u8 match_level; - u8 tunnel_match_level; + u8 inner_match_level; + u8 outer_match_level; struct mlx5_fc *counter; u32 chain; u16 prio; diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 2adee1b867f6,42cc5001255b..7d3582ee66b7 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@@ -207,10 -207,14 +207,10 @@@ mlx5_eswitch_add_offloaded_rule(struct
mlx5_eswitch_set_rule_source_port(esw, spec, attr);
- if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP) { - if (attr->tunnel_match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - if (attr->match_level != MLX5_MATCH_NONE) - spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS; - } else if (attr->match_level != MLX5_MATCH_NONE) { + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; - } + if (attr->inner_match_level != MLX5_MATCH_NONE) + spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) flow_act.modify_id = attr->mod_hdr_id; @@@ -229,7 -233,7 +229,7 @@@ if (IS_ERR(rule)) goto err_add_rule; else - esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows);
return rule;
@@@ -286,7 -290,7 +286,7 @@@ mlx5_eswitch_add_fwd_rule(struct mlx5_e mlx5_eswitch_set_rule_source_port(esw, spec, attr);
spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS; - if (attr->match_level != MLX5_MATCH_NONE) + if (attr->outer_match_level != MLX5_MATCH_NONE) spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
rule = mlx5_add_flow_rules(fast_fdb, spec, &flow_act, dest, i); @@@ -294,7 -298,7 +294,7 @@@ if (IS_ERR(rule)) goto add_err;
- esw->offloads.num_flows++; + atomic64_inc(&esw->offloads.num_flows);
return rule; add_err: @@@ -322,7 -326,7 +322,7 @@@ __mlx5_eswitch_del_rule(struct mlx5_esw mlx5_eswitch_termtbl_put(esw, attr->dests[i].termtbl); }
- esw->offloads.num_flows--; + atomic64_dec(&esw->offloads.num_flows);
if (fwd_rule) { esw_put_prio_table(esw, attr->chain, attr->prio, 1); @@@ -438,9 -442,11 +438,11 @@@ int mlx5_eswitch_add_vlan_action(struc fwd = !!((attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && !attr->dest_chain);
+ mutex_lock(&esw->state_lock); + err = esw_add_vlan_action_check(attr, push, pop, fwd); if (err) - return err; + goto unlock;
attr->vlan_handled = false;
@@@ -453,11 -459,11 +455,11 @@@ attr->vlan_handled = true; }
- return 0; + goto unlock; }
if (!push && !pop) - return 0; + goto unlock;
if (!(offloads->vlan_push_pop_refcount)) { /* it's the 1st vlan rule, apply global vlan pop policy */ @@@ -482,6 -488,8 +484,8 @@@ skip_set_push out: if (!err) attr->vlan_handled = true; + unlock: + mutex_unlock(&esw->state_lock); return err; }
@@@ -504,6 -512,8 +508,8 @@@ int mlx5_eswitch_del_vlan_action(struc pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST);
+ mutex_lock(&esw->state_lock); + vport = esw_vlan_action_get_vport(attr, push, pop);
if (!push && !pop && fwd) { @@@ -511,7 -521,7 +517,7 @@@ if (attr->dests[0].rep->vport == MLX5_VPORT_UPLINK) vport->vlan_refcount--;
- return 0; + goto out; }
if (push) { @@@ -529,12 -539,13 +535,13 @@@ skip_unset_push: offloads->vlan_push_pop_refcount--; if (offloads->vlan_push_pop_refcount) - return 0; + goto out;
/* no more vlan rules, stop global vlan pop policy */ err = esw_set_global_vlan_pop(esw, 0);
out: + mutex_unlock(&esw->state_lock); return err; }
@@@ -1382,10 -1393,9 +1389,9 @@@ void esw_offloads_cleanup_reps(struct m int esw_offloads_init_reps(struct mlx5_eswitch *esw) { int total_vports = esw->total_vports; - struct mlx5_core_dev *dev = esw->dev; struct mlx5_eswitch_rep *rep; - u8 hw_id[ETH_ALEN], rep_type; int vport_index; + u8 rep_type;
esw->offloads.vport_reps = kcalloc(total_vports, sizeof(struct mlx5_eswitch_rep), @@@ -1393,12 -1403,9 +1399,9 @@@ if (!esw->offloads.vport_reps) return -ENOMEM;
- mlx5_query_mac_address(dev, hw_id); - mlx5_esw_for_all_reps(esw, vport_index, rep) { rep->vport = mlx5_eswitch_index_to_vport_num(esw, vport_index); rep->vport_index = vport_index; - ether_addr_copy(rep->hw_id, hw_id);
for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) atomic_set(&rep->rep_data[rep_type].state, @@@ -2325,7 -2332,7 +2328,7 @@@ int mlx5_devlink_eswitch_inline_mode_se break; }
- if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set inline mode when flows are configured"); return -EOPNOTSUPP; @@@ -2435,7 -2442,7 +2438,7 @@@ int mlx5_devlink_eswitch_encap_mode_set if (esw->offloads.encap == encap) return 0;
- if (esw->offloads.num_flows > 0) { + if (atomic64_read(&esw->offloads.num_flows) > 0) { NL_SET_ERR_MSG_MOD(extack, "Can't set encapsulation when flows are configured"); return -EOPNOTSUPP; diff --combined drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 337b0cbfd153,61fe92719982..c979f38a2e0c --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@@ -1286,7 -1286,7 +1286,7 @@@ myri10ge_vlan_rx(struct net_device *dev { u8 *va; struct vlan_ethhdr *veh; - struct skb_frag_struct *frag; + skb_frag_t *frag; __wsum vsum;
va = addr; @@@ -1306,8 -1306,8 +1306,8 @@@ skb->len -= VLAN_HLEN; skb->data_len -= VLAN_HLEN; frag = skb_shinfo(skb)->frags; - frag->page_offset += VLAN_HLEN; - skb_frag_size_set(frag, skb_frag_size(frag) - VLAN_HLEN); + skb_frag_off_add(frag, VLAN_HLEN); + skb_frag_size_sub(frag, VLAN_HLEN); } }
@@@ -1318,7 -1318,7 +1318,7 @@@ myri10ge_rx_done(struct myri10ge_slice_ { struct myri10ge_priv *mgp = ss->mgp; struct sk_buff *skb; - struct skb_frag_struct *rx_frags; + skb_frag_t *rx_frags; struct myri10ge_rx_buf *rx; int i, idx, remainder, bytes; struct pci_dev *pdev = mgp->pdev; @@@ -1351,7 -1351,7 +1351,7 @@@ return 0; } rx_frags = skb_shinfo(skb)->frags; - /* Fill skb_frag_struct(s) with data from our receive */ + /* Fill skb_frag_t(s) with data from our receive */ for (i = 0, remainder = len; remainder > 0; i++) { myri10ge_unmap_rx_page(pdev, &rx->info[idx], bytes); skb_fill_page_desc(skb, i, rx->info[idx].page, @@@ -1364,8 -1364,8 +1364,8 @@@ }
/* remove padding */ - rx_frags[0].page_offset += MXGEFW_PAD; - rx_frags[0].size -= MXGEFW_PAD; + skb_frag_off_add(&rx_frags[0], MXGEFW_PAD); + skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD); len -= MXGEFW_PAD;
skb->len = len; @@@ -2628,7 -2628,7 +2628,7 @@@ static netdev_tx_t myri10ge_xmit(struc struct myri10ge_slice_state *ss; struct mcp_kreq_ether_send *req; struct myri10ge_tx_buf *tx; - struct skb_frag_struct *frag; + skb_frag_t *frag; struct netdev_queue *netdev_queue; dma_addr_t bus; u32 low; @@@ -3037,7 -3037,6 +3037,6 @@@ static int myri10ge_set_mac_address(str static int myri10ge_change_mtu(struct net_device *dev, int new_mtu) { struct myri10ge_priv *mgp = netdev_priv(dev); - int error = 0;
netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu); if (mgp->running) { @@@ -3049,7 -3048,7 +3048,7 @@@ } else dev->mtu = new_mtu;
- return error; + return 0; }
/* @@@ -3919,7 -3918,7 +3918,7 @@@ static int myri10ge_probe(struct pci_de * setup (if available). */ status = myri10ge_request_irq(mgp); if (status != 0) - goto abort_with_firmware; + goto abort_with_slices; myri10ge_free_irq(mgp);
/* Save configuration space to be restored if the diff --combined drivers/net/hyperv/netvsc_drv.c index e8fce6d715ef,86884c863013..0a6cd2f1111f --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@@ -435,7 -435,7 +435,7 @@@ static u32 init_page_array(void *hdr, u skb_frag_t *frag = skb_shinfo(skb)->frags + i;
slots_used += fill_pg_buf(skb_frag_page(frag), - frag->page_offset, + skb_frag_off(frag), skb_frag_size(frag), &pb[slots_used]); } return slots_used; @@@ -449,7 -449,7 +449,7 @@@ static int count_skb_frag_slots(struct for (i = 0; i < frags; i++) { skb_frag_t *frag = skb_shinfo(skb)->frags + i; unsigned long size = skb_frag_size(frag); - unsigned long offset = frag->page_offset; + unsigned long offset = skb_frag_off(frag);
/* Skip unused frames from start of page */ offset &= ~PAGE_MASK; @@@ -1239,15 -1239,12 +1239,15 @@@ static void netvsc_get_stats64(struct n struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); - struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev); + struct netvsc_device *nvdev; struct netvsc_vf_pcpu_stats vf_tot; int i;
+ rcu_read_lock(); + + nvdev = rcu_dereference(ndev_ctx->nvdev); if (!nvdev) - return; + goto out;
netdev_stats_to_stats64(t, &net->stats);
@@@ -1286,8 -1283,6 +1286,8 @@@ t->rx_packets += packets; t->multicast += multicast; } +out: + rcu_read_unlock(); }
static int netvsc_set_mac_addr(struct net_device *ndev, void *p) diff --combined drivers/net/netdevsim/dev.c index bcc40a236624,a570da406d1d..c67729c10940 --- a/drivers/net/netdevsim/dev.c +++ b/drivers/net/netdevsim/dev.c @@@ -27,6 -27,41 +27,41 @@@
static struct dentry *nsim_dev_ddir;
+ #define NSIM_DEV_DUMMY_REGION_SIZE (1024 * 32) + + static ssize_t nsim_dev_take_snapshot_write(struct file *file, + const char __user *data, + size_t count, loff_t *ppos) + { + struct nsim_dev *nsim_dev = file->private_data; + void *dummy_data; + int err; + u32 id; + + dummy_data = kmalloc(NSIM_DEV_DUMMY_REGION_SIZE, GFP_KERNEL); + if (!dummy_data) + return -ENOMEM; + + get_random_bytes(dummy_data, NSIM_DEV_DUMMY_REGION_SIZE); + + id = devlink_region_shapshot_id_get(priv_to_devlink(nsim_dev)); + err = devlink_region_snapshot_create(nsim_dev->dummy_region, + dummy_data, id, kfree); + if (err) { + pr_err("Failed to create region snapshot\n"); + kfree(dummy_data); + return err; + } + + return count; + } + + static const struct file_operations nsim_dev_take_snapshot_fops = { + .open = simple_open, + .write = nsim_dev_take_snapshot_write, + .llseek = generic_file_llseek, + }; + static int nsim_dev_debugfs_init(struct nsim_dev *nsim_dev) { char dev_ddir_name[16]; @@@ -40,6 -75,12 +75,12 @@@ return PTR_ERR_OR_ZERO(nsim_dev->ports_ddir) ?: -EINVAL; debugfs_create_bool("fw_update_status", 0600, nsim_dev->ddir, &nsim_dev->fw_update_status); + debugfs_create_u32("max_macs", 0600, nsim_dev->ddir, + &nsim_dev->max_macs); + debugfs_create_bool("test1", 0600, nsim_dev->ddir, + &nsim_dev->test1); + debugfs_create_file("take_snapshot", 0200, nsim_dev->ddir, nsim_dev, + &nsim_dev_take_snapshot_fops); return 0; }
@@@ -73,47 -114,46 +114,47 @@@ static void nsim_dev_port_debugfs_exit( debugfs_remove_recursive(nsim_dev_port->ddir); }
+static struct net *nsim_devlink_net(struct devlink *devlink) +{ + return &init_net; +} + static u64 nsim_dev_ipv4_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, false); }
static u64 nsim_dev_ipv4_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, false); }
static u64 nsim_dev_ipv6_fib_resource_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, false); }
static u64 nsim_dev_ipv6_fib_rules_res_occ_get(void *priv) { - struct nsim_dev *nsim_dev = priv; + struct net *net = priv;
- return nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, false); + return nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, false); }
static int nsim_dev_resources_register(struct devlink *devlink) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); struct devlink_resource_size_params params = { .size_max = (u64)-1, .size_granularity = 1, .unit = DEVLINK_RESOURCE_UNIT_ENTRY }; + struct net *net = nsim_devlink_net(devlink); int err; u64 n;
@@@ -127,7 -167,8 +168,7 @@@ goto out; }
- n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4, ¶ms); @@@ -136,7 -177,8 +177,7 @@@ return err; }
- n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV4_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV4_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV4, ¶ms); @@@ -155,7 -197,8 +196,7 @@@ goto out; }
- n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB, true); err = devlink_resource_register(devlink, "fib", n, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6, ¶ms); @@@ -164,7 -207,8 +205,7 @@@ return err; }
- n = nsim_fib_get_val(nsim_dev->fib_data, - NSIM_RESOURCE_IPV6_FIB_RULES, true); + n = nsim_fib_get_val(net, NSIM_RESOURCE_IPV6_FIB_RULES, true); err = devlink_resource_register(devlink, "fib-rules", n, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_IPV6, ¶ms); @@@ -176,31 -220,96 +217,96 @@@ devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB, nsim_dev_ipv4_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB_RULES, nsim_dev_ipv4_fib_rules_res_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB, nsim_dev_ipv6_fib_resource_occ_get, - nsim_dev); + net); devlink_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_dev_ipv6_fib_rules_res_occ_get, - nsim_dev); + net); out: return err; }
+ enum nsim_devlink_param_id { + NSIM_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX, + NSIM_DEVLINK_PARAM_ID_TEST1, + }; + + static const struct devlink_param nsim_devlink_params[] = { + DEVLINK_PARAM_GENERIC(MAX_MACS, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + DEVLINK_PARAM_DRIVER(NSIM_DEVLINK_PARAM_ID_TEST1, + "test1", DEVLINK_PARAM_TYPE_BOOL, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), + NULL, NULL, NULL), + }; + + static void nsim_devlink_set_params_init_values(struct nsim_dev *nsim_dev, + struct devlink *devlink) + { + union devlink_param_value value; + + value.vu32 = nsim_dev->max_macs; + devlink_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + value); + value.vbool = nsim_dev->test1; + devlink_param_driverinit_value_set(devlink, + NSIM_DEVLINK_PARAM_ID_TEST1, + value); + } + + static void nsim_devlink_param_load_driverinit_values(struct devlink *devlink) + { + struct nsim_dev *nsim_dev = devlink_priv(devlink); + union devlink_param_value saved_value; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_MAX_MACS, + &saved_value); + if (!err) + nsim_dev->max_macs = saved_value.vu32; + err = devlink_param_driverinit_value_get(devlink, + NSIM_DEVLINK_PARAM_ID_TEST1, + &saved_value); + if (!err) + nsim_dev->test1 = saved_value.vbool; + } + + #define NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX 16 + + static int nsim_dev_dummy_region_init(struct nsim_dev *nsim_dev, + struct devlink *devlink) + { + nsim_dev->dummy_region = + devlink_region_create(devlink, "dummy", + NSIM_DEV_DUMMY_REGION_SNAPSHOT_MAX, + NSIM_DEV_DUMMY_REGION_SIZE); + return PTR_ERR_OR_ZERO(nsim_dev->dummy_region); + } + + static void nsim_dev_dummy_region_exit(struct nsim_dev *nsim_dev) + { + devlink_region_destroy(nsim_dev->dummy_region); + } + static int nsim_dev_reload(struct devlink *devlink, struct netlink_ext_ack *extack) { - struct nsim_dev *nsim_dev = devlink_priv(devlink); enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES }; + struct net *net = nsim_devlink_net(devlink); int i;
for (i = 0; i < ARRAY_SIZE(res_ids); ++i) { @@@ -209,11 -318,13 +315,12 @@@
err = devlink_resource_size_get(devlink, res_ids[i], &val); if (!err) { - err = nsim_fib_set_max(nsim_dev->fib_data, - res_ids[i], val, extack); + err = nsim_fib_set_max(net, res_ids[i], val, extack); if (err) return err; } } + nsim_devlink_param_load_driverinit_values(devlink);
return 0; } @@@ -263,6 -374,9 +370,9 @@@ static const struct devlink_ops nsim_de .flash_update = nsim_dev_flash_update, };
+ #define NSIM_DEV_MAX_MACS_DEFAULT 32 + #define NSIM_DEV_TEST1_DEFAULT true + static struct nsim_dev * nsim_dev_create(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_count) { @@@ -280,31 -394,57 +390,49 @@@ INIT_LIST_HEAD(&nsim_dev->port_list); mutex_init(&nsim_dev->port_list_lock); nsim_dev->fw_update_status = true; + nsim_dev->max_macs = NSIM_DEV_MAX_MACS_DEFAULT; + nsim_dev->test1 = NSIM_DEV_TEST1_DEFAULT;
- nsim_dev->fib_data = nsim_fib_create(); - if (IS_ERR(nsim_dev->fib_data)) { - err = PTR_ERR(nsim_dev->fib_data); - goto err_devlink_free; - } - err = nsim_dev_resources_register(devlink); if (err) - goto err_fib_destroy; + goto err_devlink_free;
err = devlink_register(devlink, &nsim_bus_dev->dev); if (err) goto err_resources_unregister;
- err = nsim_dev_debugfs_init(nsim_dev); + err = devlink_params_register(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); if (err) goto err_dl_unregister; + nsim_devlink_set_params_init_values(nsim_dev, devlink); + + err = nsim_dev_dummy_region_init(nsim_dev, devlink); + if (err) + goto err_params_unregister; + + err = nsim_dev_debugfs_init(nsim_dev); + if (err) + goto err_dummy_region_exit;
err = nsim_bpf_dev_init(nsim_dev); if (err) goto err_debugfs_exit;
+ devlink_params_publish(devlink); return nsim_dev;
err_debugfs_exit: nsim_dev_debugfs_exit(nsim_dev); + err_dummy_region_exit: + nsim_dev_dummy_region_exit(nsim_dev); + err_params_unregister: + devlink_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); err_dl_unregister: devlink_unregister(devlink); err_resources_unregister: devlink_resources_unregister(devlink, NULL); -err_fib_destroy: - nsim_fib_destroy(nsim_dev->fib_data); err_devlink_free: devlink_free(devlink); return ERR_PTR(err); @@@ -316,8 -456,12 +444,11 @@@ static void nsim_dev_destroy(struct nsi
nsim_bpf_dev_exit(nsim_dev); nsim_dev_debugfs_exit(nsim_dev); + nsim_dev_dummy_region_exit(nsim_dev); + devlink_params_unregister(devlink, nsim_devlink_params, + ARRAY_SIZE(nsim_devlink_params)); devlink_unregister(devlink); devlink_resources_unregister(devlink, NULL); - nsim_fib_destroy(nsim_dev->fib_data); mutex_destroy(&nsim_dev->port_list_lock); devlink_free(devlink); } diff --combined drivers/net/netdevsim/netdevsim.h index 9404637d34b7,4c758c6919f5..e8ba59138bad --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@@ -158,6 -158,9 +158,9 @@@ struct nsim_dev struct list_head port_list; struct mutex port_list_lock; /* protects port list */ bool fw_update_status; + u32 max_macs; + bool test1; + struct devlink_region *dummy_region; };
int nsim_dev_init(void); @@@ -169,10 -172,12 +172,10 @@@ int nsim_dev_port_add(struct nsim_bus_d int nsim_dev_port_del(struct nsim_bus_dev *nsim_bus_dev, unsigned int port_index);
-struct nsim_fib_data *nsim_fib_create(void); -void nsim_fib_destroy(struct nsim_fib_data *fib_data); -u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, bool max); -int nsim_fib_set_max(struct nsim_fib_data *fib_data, - enum nsim_resource_id res_id, u64 val, +int nsim_fib_init(void); +void nsim_fib_exit(void); +u64 nsim_fib_get_val(struct net *net, enum nsim_resource_id res_id, bool max); +int nsim_fib_set_max(struct net *net, enum nsim_resource_id res_id, u64 val, struct netlink_ext_ack *extack);
#if IS_ENABLED(CONFIG_XFRM_OFFLOAD) diff --combined drivers/net/phy/phy_device.c index 27ebc2c6c2d0,9c546bae9ec9..7e7393f3cc2a --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@@ -1564,24 -1564,20 +1564,20 @@@ EXPORT_SYMBOL(phy_reset_after_clk_enabl */ static int genphy_config_advert(struct phy_device *phydev) { - u32 advertise; - int bmsr, adv; - int err, changed = 0; + int err, bmsr, changed = 0; + u32 adv;
/* Only allow advertising what this PHY supports */ linkmode_and(phydev->advertising, phydev->advertising, phydev->supported); - if (!ethtool_convert_link_mode_to_legacy_u32(&advertise, - phydev->advertising)) - phydev_warn(phydev, "PHY advertising (%*pb) more modes than genphy supports, some modes not advertised.\n", - __ETHTOOL_LINK_MODE_MASK_NBITS, - phydev->advertising); + + adv = linkmode_adv_to_mii_adv_t(phydev->advertising);
/* Setup standard advertisement */ err = phy_modify_changed(phydev, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM, - ethtool_adv_to_mii_adv_t(advertise)); + adv); if (err < 0) return err; if (err > 0) @@@ -1598,13 -1594,7 +1594,7 @@@ if (!(bmsr & BMSR_ESTATEN)) return changed;
- /* Configure gigabit if it's supported */ - adv = 0; - if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, - phydev->supported) || - linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, - phydev->supported)) - adv = ethtool_adv_to_mii_ctrl1000_t(advertise); + adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
err = phy_modify_changed(phydev, MII_CTRL1000, ADVERTISE_1000FULL | ADVERTISE_1000HALF, @@@ -1681,18 -1671,20 +1671,20 @@@ int genphy_restart_aneg(struct phy_devi EXPORT_SYMBOL(genphy_restart_aneg);
/** - * genphy_config_aneg - restart auto-negotiation or write BMCR + * __genphy_config_aneg - restart auto-negotiation or write BMCR * @phydev: target phy_device struct + * @changed: whether autoneg is requested * * Description: If auto-negotiation is enabled, we configure the * advertising, and then restart auto-negotiation. If it is not * enabled, then we write the BMCR. */ - int genphy_config_aneg(struct phy_device *phydev) + int __genphy_config_aneg(struct phy_device *phydev, bool changed) { - int err, changed; + int err;
- changed = genphy_config_eee_advert(phydev); + if (genphy_config_eee_advert(phydev)) + changed = true;
if (AUTONEG_ENABLE != phydev->autoneg) return genphy_setup_forced(phydev); @@@ -1700,10 -1692,10 +1692,10 @@@ err = genphy_config_advert(phydev); if (err < 0) /* error */ return err; + else if (err) + changed = true;
- changed |= err; - - if (changed == 0) { + if (!changed) { /* Advertisement hasn't changed, but maybe aneg was never on to * begin with? Or maybe phy was isolated? */ @@@ -1713,18 -1705,15 +1705,15 @@@ return ctl;
if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) - changed = 1; /* do restart aneg */ + changed = true; /* do restart aneg */ }
/* Only restart aneg if we are advertising something different * than we were before. */ - if (changed > 0) - return genphy_restart_aneg(phydev); - - return 0; + return changed ? genphy_restart_aneg(phydev) : 0; } - EXPORT_SYMBOL(genphy_config_aneg); + EXPORT_SYMBOL(__genphy_config_aneg);
/** * genphy_aneg_done - return auto-negotiation status @@@ -1752,17 -1741,7 +1741,17 @@@ EXPORT_SYMBOL(genphy_aneg_done) */ int genphy_update_link(struct phy_device *phydev) { - int status; + int status = 0, bmcr; + + bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + /* Autoneg is being started, therefore disregard BMSR value and + * report link as down. + */ + if (bmcr & BMCR_ANRESTART) + goto done;
/* The link state is latched low so that momentary link * drops can be detected. Do not double-read the status @@@ -1805,7 -1784,7 +1794,7 @@@ EXPORT_SYMBOL(genphy_update_link) */ int genphy_read_status(struct phy_device *phydev) { - int adv, lpa, lpagb, err, old_link = phydev->link; + int lpa, lpagb, err, old_link = phydev->link;
/* Update the link, but return if there was an error */ err = genphy_update_link(phydev); @@@ -1821,19 -1800,18 +1810,18 @@@ phydev->pause = 0; phydev->asym_pause = 0;
- linkmode_zero(phydev->lp_advertising); - if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) { if (phydev->is_gigabit_capable) { lpagb = phy_read(phydev, MII_STAT1000); if (lpagb < 0) return lpagb;
- adv = phy_read(phydev, MII_CTRL1000); - if (adv < 0) - return adv; - if (lpagb & LPA_1000MSFAIL) { + int adv = phy_read(phydev, MII_CTRL1000); + + if (adv < 0) + return adv; + if (adv & CTL1000_ENABLE_MASTER) phydev_err(phydev, "Master/Slave resolution failed, maybe conflicting manual settings?\n"); else diff --combined drivers/net/xen-netback/netback.c index c9262ffeefe4,4679fcf1a1c4..0020b2e8c279 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@@ -136,12 -136,12 +136,12 @@@ static inline struct xenvif_queue *ubuf
static u16 frag_get_pending_idx(skb_frag_t *frag) { - return (u16)frag->page_offset; + return (u16)skb_frag_off(frag); }
static void frag_set_pending_idx(skb_frag_t *frag, u16 pending_idx) { - frag->page_offset = pending_idx; + skb_frag_off_set(frag, pending_idx); }
static inline pending_ring_idx_t pending_index(unsigned i) @@@ -925,7 -925,6 +925,7 @@@ static void xenvif_tx_build_gops(struc skb_shinfo(skb)->nr_frags = MAX_SKB_FRAGS; nskb = xenvif_alloc_skb(0); if (unlikely(nskb == NULL)) { + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); xenvif_tx_err(queue, &txreq, extra_count, idx); if (net_ratelimit()) @@@ -941,7 -940,6 +941,7 @@@
if (xenvif_set_skb_gso(queue->vif, skb, gso)) { /* Failure in xenvif_set_skb_gso is fatal. */ + skb_shinfo(skb)->nr_frags = 0; kfree_skb(skb); kfree_skb(nskb); break; @@@ -1057,7 -1055,7 +1057,7 @@@ static int xenvif_handle_frag_list(stru int j; skb->truesize += skb->data_len; for (j = 0; j < i; j++) - put_page(frags[j].page.p); + put_page(skb_frag_page(&frags[j])); return -ENOMEM; }
@@@ -1069,8 -1067,8 +1069,8 @@@ BUG();
offset += len; - frags[i].page.p = page; - frags[i].page_offset = 0; + __skb_frag_set_page(&frags[i], page); + skb_frag_off_set(&frags[i], 0); skb_frag_size_set(&frags[i], len); }
@@@ -1655,9 -1653,6 +1655,6 @@@ static int __init netback_init(void
#ifdef CONFIG_DEBUG_FS xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL); - if (IS_ERR_OR_NULL(xen_netback_dbg_root)) - pr_warn("Init of debugfs returned %ld!\n", - PTR_ERR(xen_netback_dbg_root)); #endif /* CONFIG_DEBUG_FS */
return 0; diff --combined drivers/s390/net/qeth_core_main.c index 9c3310c4d61d,5aa0f1268bca..0803070246aa --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@@ -544,7 -544,6 +544,7 @@@ static struct qeth_reply *qeth_alloc_re if (reply) { refcount_set(&reply->refcnt, 1); init_completion(&reply->received); + spin_lock_init(&reply->lock); } return reply; } @@@ -800,13 -799,6 +800,13 @@@ static void qeth_issue_next_read_cb(str
if (!reply->callback) { rc = 0; + goto no_callback; + } + + spin_lock_irqsave(&reply->lock, flags); + if (reply->rc) { + /* Bail out when the requestor has already left: */ + rc = reply->rc; } else { if (cmd) { reply->offset = (u16)((char *)cmd - (char *)iob->data); @@@ -815,9 -807,7 +815,9 @@@ rc = reply->callback(card, reply, (unsigned long)iob); } } + spin_unlock_irqrestore(&reply->lock, flags);
+no_callback: if (rc <= 0) qeth_notify_reply(reply, rc); qeth_put_reply(reply); @@@ -1759,16 -1749,6 +1759,16 @@@ static int qeth_send_control_data(struc rc = (timeout == -ERESTARTSYS) ? -EINTR : -ETIME;
qeth_dequeue_reply(card, reply); + + if (reply_cb) { + /* Wait until the callback for a late reply has completed: */ + spin_lock_irq(&reply->lock); + if (rc) + /* Zap any callback that's still pending: */ + reply->rc = rc; + spin_unlock_irq(&reply->lock); + } + if (!rc) rc = reply->rc; qeth_put_reply(reply); @@@ -3535,7 -3515,7 +3535,7 @@@ static int qeth_get_elements_for_frags( int cnt, elements = 0;
for (cnt = 0; cnt < skb_shinfo(skb)->nr_frags; cnt++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[cnt]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[cnt];
elements += qeth_get_elements_for_range( (addr_t)skb_frag_address(frag), diff --combined drivers/staging/unisys/visornic/visornic_main.c index 40dd573e73c3,6fa7726185de..1d1440d43002 --- a/drivers/staging/unisys/visornic/visornic_main.c +++ b/drivers/staging/unisys/visornic/visornic_main.c @@@ -284,9 -284,9 +284,9 @@@ static int visor_copy_fragsinfo_from_sk for (frag = 0; frag < numfrags; frag++) { count = add_physinfo_entries(page_to_pfn( skb_frag_page(&skb_shinfo(skb)->frags[frag])), - skb_shinfo(skb)->frags[frag].page_offset, - skb_shinfo(skb)->frags[frag].size, count, - frags_max, frags); + skb_frag_off(&skb_shinfo(skb)->frags[frag]), + skb_frag_size(&skb_shinfo(skb)->frags[frag]), + count, frags_max, frags); /* add_physinfo_entries only returns * zero if the frags array is out of room * That should never happen because we @@@ -1750,8 -1750,7 +1750,8 @@@ static int visornic_poll(struct napi_st }
/* poll_for_irq - checks the status of the response queue - * @v: Void pointer to the visronic devdata struct. + * @t: pointer to the 'struct timer_list' from which we can retrieve the + * the visornic devdata struct. * * Main function of the vnic_incoming thread. Periodically check the response * queue and drain it if needed. diff --combined include/Kbuild index fc2aa4e20658,af498acb7cd2..ac284c7aa05b --- a/include/Kbuild +++ b/include/Kbuild @@@ -386,31 -386,6 +386,6 @@@ header-test- += linux/mvebu-pmsu. header-test- += linux/mxm-wmi.h header-test- += linux/n_r3964.h header-test- += linux/ndctl.h - header-test- += linux/netfilter/ipset/ip_set.h - header-test- += linux/netfilter/ipset/ip_set_bitmap.h - header-test- += linux/netfilter/ipset/ip_set_comment.h - header-test- += linux/netfilter/ipset/ip_set_counter.h - header-test- += linux/netfilter/ipset/ip_set_getport.h - header-test- += linux/netfilter/ipset/ip_set_hash.h - header-test- += linux/netfilter/ipset/ip_set_list.h - header-test- += linux/netfilter/ipset/ip_set_skbinfo.h - header-test- += linux/netfilter/ipset/ip_set_timeout.h - header-test- += linux/netfilter/nf_conntrack_amanda.h - header-test- += linux/netfilter/nf_conntrack_ftp.h - header-test- += linux/netfilter/nf_conntrack_h323.h - header-test- += linux/netfilter/nf_conntrack_h323_asn1.h - header-test- += linux/netfilter/nf_conntrack_irc.h - header-test- += linux/netfilter/nf_conntrack_pptp.h - header-test- += linux/netfilter/nf_conntrack_proto_gre.h - header-test- += linux/netfilter/nf_conntrack_sip.h - header-test- += linux/netfilter/nf_conntrack_snmp.h - header-test- += linux/netfilter/nf_conntrack_tftp.h - header-test- += linux/netfilter/x_tables.h - header-test- += linux/netfilter_arp/arp_tables.h - header-test- += linux/netfilter_bridge/ebtables.h - header-test- += linux/netfilter_ipv4/ip4_tables.h - header-test- += linux/netfilter_ipv4/ip_tables.h - header-test- += linux/netfilter_ipv6/ip6_tables.h header-test- += linux/nfs.h header-test- += linux/nfs_fs_i.h header-test- += linux/nfs_fs_sb.h @@@ -874,43 -849,6 +849,6 @@@ header-test- += net/mpls_iptunnel. header-test- += net/mrp.h header-test- += net/ncsi.h header-test- += net/netevent.h - header-test- += net/netfilter/br_netfilter.h - header-test- += net/netfilter/ipv4/nf_dup_ipv4.h - header-test- += net/netfilter/ipv6/nf_defrag_ipv6.h - header-test- += net/netfilter/ipv6/nf_dup_ipv6.h - header-test- += net/netfilter/nf_conntrack.h - header-test- += net/netfilter/nf_conntrack_acct.h - header-test- += net/netfilter/nf_conntrack_bridge.h - header-test- += net/netfilter/nf_conntrack_core.h - header-test- += net/netfilter/nf_conntrack_count.h - header-test- += net/netfilter/nf_conntrack_ecache.h - header-test- += net/netfilter/nf_conntrack_expect.h - header-test- += net/netfilter/nf_conntrack_extend.h - header-test- += net/netfilter/nf_conntrack_helper.h - header-test- += net/netfilter/nf_conntrack_l4proto.h - header-test- += net/netfilter/nf_conntrack_labels.h - header-test- += net/netfilter/nf_conntrack_seqadj.h - header-test- += net/netfilter/nf_conntrack_synproxy.h - header-test- += net/netfilter/nf_conntrack_timeout.h - header-test- += net/netfilter/nf_conntrack_timestamp.h - header-test- += net/netfilter/nf_conntrack_tuple.h - header-test- += net/netfilter/nf_dup_netdev.h - header-test- += net/netfilter/nf_flow_table.h - header-test- += net/netfilter/nf_nat.h - header-test- += net/netfilter/nf_nat_helper.h - header-test- += net/netfilter/nf_nat_masquerade.h - header-test- += net/netfilter/nf_nat_redirect.h - header-test- += net/netfilter/nf_queue.h - header-test- += net/netfilter/nf_reject.h - header-test- += net/netfilter/nf_synproxy.h - header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables.h - header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_core.h - header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_ipv4.h - header-test- += net/netfilter/nf_tables_ipv6.h - header-test-$(CONFIG_NF_TABLES) += net/netfilter/nf_tables_offload.h - header-test- += net/netfilter/nft_fib.h - header-test- += net/netfilter/nft_meta.h - header-test- += net/netfilter/nft_reject.h header-test- += net/netns/can.h header-test- += net/netns/generic.h header-test- += net/netns/ieee802154_6lowpan.h @@@ -945,6 -883,12 +883,6 @@@ header-test- += net/xdp. header-test- += net/xdp_priv.h header-test- += pcmcia/cistpl.h header-test- += pcmcia/ds.h -header-test- += rdma/ib.h -header-test- += rdma/iw_portmap.h -header-test- += rdma/opa_port_info.h -header-test- += rdma/rdmavt_cq.h -header-test- += rdma/restrack.h -header-test- += rdma/signature.h header-test- += rdma/tid_rdma_defs.h header-test- += scsi/fc/fc_encaps.h header-test- += scsi/fc/fc_fc2.h @@@ -1134,18 -1078,6 +1072,6 @@@ header-test- += uapi/linux/kvm_para. header-test- += uapi/linux/lightnvm.h header-test- += uapi/linux/mic_common.h header-test- += uapi/linux/mman.h - header-test- += uapi/linux/netfilter/ipset/ip_set_bitmap.h - header-test- += uapi/linux/netfilter/ipset/ip_set_hash.h - header-test- += uapi/linux/netfilter/ipset/ip_set_list.h - header-test- += uapi/linux/netfilter/nf_synproxy.h - header-test- += uapi/linux/netfilter/xt_policy.h - header-test- += uapi/linux/netfilter/xt_set.h - header-test- += uapi/linux/netfilter_arp/arp_tables.h - header-test- += uapi/linux/netfilter_arp/arpt_mangle.h - header-test- += uapi/linux/netfilter_ipv4/ip_tables.h - header-test- += uapi/linux/netfilter_ipv4/ipt_LOG.h - header-test- += uapi/linux/netfilter_ipv6/ip6_tables.h - header-test- += uapi/linux/netfilter_ipv6/ip6t_LOG.h header-test- += uapi/linux/nilfs2_ondisk.h header-test- += uapi/linux/patchkey.h header-test- += uapi/linux/ptrace.h diff --combined include/linux/mlx5/driver.h index 0acd28f2e62c,df23f17eed64..ec668f63e8d3 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@@ -47,7 -47,6 +47,7 @@@ #include <linux/interrupt.h> #include <linux/idr.h> #include <linux/notifier.h> +#include <linux/refcount.h>
#include <linux/mlx5/device.h> #include <linux/mlx5/doorbell.h> @@@ -190,7 -189,6 +190,6 @@@ enum mlx5_coredev_type };
struct mlx5_field_desc { - struct dentry *dent; int i; };
@@@ -243,11 -241,6 +242,6 @@@ struct mlx5_cmd_msg
struct mlx5_cmd_debug { struct dentry *dbg_root; - struct dentry *dbg_in; - struct dentry *dbg_out; - struct dentry *dbg_outlen; - struct dentry *dbg_status; - struct dentry *dbg_run; void *in_msg; void *out_msg; u8 status; @@@ -272,8 -265,6 +266,6 @@@ struct mlx5_cmd_stats u64 sum; u64 n; struct dentry *root; - struct dentry *avg; - struct dentry *count; /* protect command average calculations */ spinlock_t lock; }; @@@ -399,7 -390,7 +391,7 @@@ enum mlx5_res_type
struct mlx5_core_rsc_common { enum mlx5_res_type res; - atomic_t refcount; + refcount_t refcount; struct completion free; };
@@@ -478,6 -469,17 +470,17 @@@ struct mlx5_core_sriov u16 max_vfs; };
+ struct mlx5_fc_pool { + struct mlx5_core_dev *dev; + struct mutex pool_lock; /* protects pool lists */ + struct list_head fully_used; + struct list_head partially_used; + struct list_head unused; + int available_fcs; + int used_fcs; + int threshold; + }; + struct mlx5_fc_stats { spinlock_t counters_idr_lock; /* protects counters_idr */ struct idr counters_idr; @@@ -490,6 -492,7 +493,7 @@@ unsigned long next_query; unsigned long sampling_interval; /* jiffies */ u32 *bulk_query_out; + struct mlx5_fc_pool fc_pool; };
struct mlx5_events; @@@ -961,7 -964,7 +965,7 @@@ int mlx5_vector2eqn(struct mlx5_core_de int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn);
- int mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); + void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev); void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, @@@ -973,7 -976,7 +977,7 @@@ int mlx5_db_alloc_node(struct mlx5_core void mlx5_db_free(struct mlx5_core_dev *dev, struct mlx5_db *db);
const char *mlx5_command_str(int command); - int mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); + void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev); void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index); diff --combined include/linux/skbuff.h index ba5583522d24,7eb28b72d9ba..77c6dc88e95d --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@@ -14,6 -14,7 +14,7 @@@ #include <linux/compiler.h> #include <linux/time.h> #include <linux/bug.h> + #include <linux/bvec.h> #include <linux/cache.h> #include <linux/rbtree.h> #include <linux/socket.h> @@@ -308,58 -309,45 +309,45 @@@ extern int sysctl_max_skb_frags */ #define GSO_BY_FRAGS 0xFFFF
- typedef struct skb_frag_struct skb_frag_t; - - struct skb_frag_struct { - struct { - struct page *p; - } page; - #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; - __u32 size; - #else - __u16 page_offset; - __u16 size; - #endif - }; + typedef struct bio_vec skb_frag_t;
/** - * skb_frag_size - Returns the size of a skb fragment + * skb_frag_size() - Returns the size of a skb fragment * @frag: skb fragment */ static inline unsigned int skb_frag_size(const skb_frag_t *frag) { - return frag->size; + return frag->bv_len; }
/** - * skb_frag_size_set - Sets the size of a skb fragment + * skb_frag_size_set() - Sets the size of a skb fragment * @frag: skb fragment * @size: size of fragment */ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) { - frag->size = size; + frag->bv_len = size; }
/** - * skb_frag_size_add - Incrementes the size of a skb fragment by %delta + * skb_frag_size_add() - Increments the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to add */ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) { - frag->size += delta; + frag->bv_len += delta; }
/** - * skb_frag_size_sub - Decrements the size of a skb fragment by %delta + * skb_frag_size_sub() - Decrements the size of a skb fragment by @delta * @frag: skb fragment * @delta: value to subtract */ static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) { - frag->size -= delta; + frag->bv_len -= delta; }
/** @@@ -379,7 -367,7 +367,7 @@@ static inline bool skb_frag_must_loop(s * skb_frag_foreach_page - loop over pages in a fragment * * @f: skb frag to operate on - * @f_off: offset from start of f->page.p + * @f_off: offset from start of f->bv_page * @f_len: length from f_off to loop over * @p: (temp var) current page * @p_off: (temp var) offset from start of current page, @@@ -1283,7 -1271,7 +1271,7 @@@ static inline int skb_flow_dissector_bp
struct bpf_flow_dissector; bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, - __be16 proto, int nhoff, int hlen); + __be16 proto, int nhoff, int hlen, unsigned int flags);
bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, @@@ -1374,14 -1362,6 +1362,14 @@@ static inline void skb_copy_hash(struc to->l4_hash = from->l4_hash; };
+static inline void skb_copy_decrypted(struct sk_buff *to, + const struct sk_buff *from) +{ +#ifdef CONFIG_TLS_DEVICE + to->decrypted = from->decrypted; +#endif +} + #ifdef NET_SKBUFF_DATA_USES_OFFSET static inline unsigned char *skb_end_pointer(const struct sk_buff *skb) { @@@ -2097,8 -2077,8 +2085,8 @@@ static inline void __skb_fill_page_desc * that not all callers have unique ownership of the page but rely * on page_is_pfmemalloc doing the right thing(tm). */ - frag->page.p = page; - frag->page_offset = off; + frag->bv_page = page; + frag->bv_offset = off; skb_frag_size_set(frag, size);
page = compound_head(page); @@@ -2878,6 -2858,46 +2866,46 @@@ static inline void skb_propagate_pfmema }
/** + * skb_frag_off() - Returns the offset of a skb fragment + * @frag: the paged fragment + */ + static inline unsigned int skb_frag_off(const skb_frag_t *frag) + { + return frag->bv_offset; + } + + /** + * skb_frag_off_add() - Increments the offset of a skb fragment by @delta + * @frag: skb fragment + * @delta: value to add + */ + static inline void skb_frag_off_add(skb_frag_t *frag, int delta) + { + frag->bv_offset += delta; + } + + /** + * skb_frag_off_set() - Sets the offset of a skb fragment + * @frag: skb fragment + * @offset: offset of fragment + */ + static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) + { + frag->bv_offset = offset; + } + + /** + * skb_frag_off_copy() - Sets the offset of a skb fragment from another fragment + * @fragto: skb fragment where offset is set + * @fragfrom: skb fragment offset is copied from + */ + static inline void skb_frag_off_copy(skb_frag_t *fragto, + const skb_frag_t *fragfrom) + { + fragto->bv_offset = fragfrom->bv_offset; + } + + /** * skb_frag_page - retrieve the page referred to by a paged fragment * @frag: the paged fragment * @@@ -2885,7 -2905,7 +2913,7 @@@ */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->page.p; + return frag->bv_page; }
/** @@@ -2943,7 -2963,7 +2971,7 @@@ static inline void skb_frag_unref(struc */ static inline void *skb_frag_address(const skb_frag_t *frag) { - return page_address(skb_frag_page(frag)) + frag->page_offset; + return page_address(skb_frag_page(frag)) + skb_frag_off(frag); }
/** @@@ -2959,7 -2979,18 +2987,18 @@@ static inline void *skb_frag_address_sa if (unlikely(!ptr)) return NULL;
- return ptr + frag->page_offset; + return ptr + skb_frag_off(frag); + } + + /** + * skb_frag_page_copy() - sets the page in a fragment from another fragment + * @fragto: skb fragment where page is set + * @fragfrom: skb fragment page is copied from + */ + static inline void skb_frag_page_copy(skb_frag_t *fragto, + const skb_frag_t *fragfrom) + { + fragto->bv_page = fragfrom->bv_page; }
/** @@@ -2971,7 -3002,7 +3010,7 @@@ */ static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) { - frag->page.p = page; + frag->bv_page = page; }
/** @@@ -3007,7 -3038,7 +3046,7 @@@ static inline dma_addr_t skb_frag_dma_m enum dma_data_direction dir) { return dma_map_page(dev, skb_frag_page(frag), - frag->page_offset + offset, size, dir); + skb_frag_off(frag) + offset, size, dir); }
static inline struct sk_buff *pskb_copy(struct sk_buff *skb, @@@ -3174,10 -3205,10 +3213,10 @@@ static inline bool skb_can_coalesce(str if (skb_zcopy(skb)) return false; if (i) { - const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
return page == skb_frag_page(frag) && - off == frag->page_offset + skb_frag_size(frag); + off == skb_frag_off(frag) + skb_frag_size(frag); } return false; } diff --combined include/net/netfilter/nf_tables.h index 475d6f28ca67,dc301e3d6739..e73d16f8b870 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@@ -25,6 -25,7 +25,7 @@@ struct nft_pktinfo struct xt_action_param xt; };
+ #if IS_ENABLED(CONFIG_NETFILTER) static inline struct net *nft_net(const struct nft_pktinfo *pkt) { return pkt->xt.state->net; @@@ -57,6 -58,7 +58,7 @@@ static inline void nft_set_pktinfo(stru pkt->skb = skb; pkt->xt.state = state; } + #endif
static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, struct sk_buff *skb) @@@ -421,7 -423,8 +423,7 @@@ struct nft_set unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:13, - bound:1, + u16 flags:14, genmask:2; u8 klen; u8 dlen; @@@ -926,9 -929,11 +928,11 @@@ struct nft_chain_type int family; struct module *owner; unsigned int hook_mask; + #if IS_ENABLED(CONFIG_NETFILTER) nf_hookfn *hooks[NF_MAX_HOOKS]; int (*ops_register)(struct net *net, const struct nf_hook_ops *ops); void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops); + #endif };
int nft_chain_validate_dependency(const struct nft_chain *chain, @@@ -954,7 -959,9 +958,9 @@@ struct nft_stats * @flow_block: flow block (for hardware offload) */ struct nft_base_chain { + #if IS_ENABLED(CONFIG_NETFILTER) struct nf_hook_ops ops; + #endif const struct nft_chain_type *type; u8 policy; u8 flags; @@@ -1151,7 -1158,9 +1157,9 @@@ struct nft_flowtable use:30; u64 handle; /* runtime data below here */ + #if IS_ENABLED(CONFIG_NETFILTER) struct nf_hook_ops *ops ____cacheline_aligned; + #endif struct nf_flowtable data; };
@@@ -1206,6 -1215,8 +1214,8 @@@ void nft_trace_notify(struct nft_tracei #define MODULE_ALIAS_NFT_OBJ(type) \ MODULE_ALIAS("nft-obj-" __stringify(type))
+ #if IS_ENABLED(CONFIG_NF_TABLES) + /* * The gencursor defines two generations, the currently active and the * next one. Objects contain a bitmask of 2 bits specifying the generations @@@ -1279,6 -1290,8 +1289,8 @@@ static inline void nft_set_elem_change_ ext->genmask ^= nft_genmask_next(net); }
+ #endif /* IS_ENABLED(CONFIG_NF_TABLES) */ + /* * We use a free bit in the genmask field to indicate the element * is busy, meaning it is currently being processed either by @@@ -1347,15 -1360,12 +1359,15 @@@ struct nft_trans_rule struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; };
#define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) +#define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound)
struct nft_trans_chain { bool update; @@@ -1386,15 -1396,12 +1398,15 @@@ struct nft_trans_table struct nft_trans_elem { struct nft_set *set; struct nft_set_elem elem; + bool bound; };
#define nft_trans_elem_set(trans) \ (((struct nft_trans_elem *)trans->data)->set) #define nft_trans_elem(trans) \ (((struct nft_trans_elem *)trans->data)->elem) +#define nft_trans_elem_set_bound(trans) \ + (((struct nft_trans_elem *)trans->data)->bound)
struct nft_trans_obj { struct nft_object *obj; diff --combined include/uapi/linux/bpf.h index a5aa7d3ac6a1,4393bd4b2419..0e66371bea13 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@@ -134,6 -134,7 +134,7 @@@ enum bpf_map_type BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, };
/* Note that tracing related programs such as @@@ -1466,8 -1467,8 +1467,8 @@@ union bpf_attr * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. @@@ -2713,6 -2714,33 +2714,33 @@@ * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. + * + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@@ -2824,7 -2852,8 +2852,8 @@@ FN(strtoul), \ FN(sk_storage_get), \ FN(sk_storage_delete), \ - FN(send_signal), + FN(send_signal), \ + FN(tcp_gen_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@@ -3507,6 -3536,10 +3536,10 @@@ enum bpf_task_fd_type BPF_FD_TYPE_URETPROBE, /* filename + offset */ };
+ #define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) + #define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) + #define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) + struct bpf_flow_keys { __u16 nhoff; __u16 thoff; @@@ -3528,6 -3561,8 +3561,8 @@@ __u32 ipv6_dst[4]; /* in6_addr; network order */ }; }; + __u32 flags; + __be32 flow_label; };
struct bpf_func_info { diff --combined net/ipv4/tcp.c index 77b485d60b9d,f8fa1686f7f3..051ef10374f6 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@@ -984,9 -984,6 +984,9 @@@ new_segment if (!skb) goto wait_for_memory;
+#ifdef CONFIG_TLS_DEVICE + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); +#endif skb_entail(sk, skb); copy = size_goal; } @@@ -1165,7 -1162,7 +1165,7 @@@ int tcp_sendmsg_locked(struct sock *sk struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; - bool process_backlog = false; + int process_backlog = 0; bool zc = false; long timeo;
@@@ -1257,9 -1254,10 +1257,10 @@@ new_segment if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf;
- if (process_backlog && sk_flush_backlog(sk)) { - process_backlog = false; - goto restart; + if (unlikely(process_backlog >= 16)) { + process_backlog = 0; + if (sk_flush_backlog(sk)) + goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, @@@ -1267,7 -1265,7 +1268,7 @@@ if (!skb) goto wait_for_memory;
- process_backlog = true; + process_backlog++; skb->ip_summed = CHECKSUM_PARTIAL;
skb_entail(sk, skb); @@@ -1779,19 -1777,21 +1780,21 @@@ static int tcp_zerocopy_receive(struct break; frags = skb_shinfo(skb)->frags; while (offset) { - if (frags->size > offset) + if (skb_frag_size(frags) > offset) goto out; - offset -= frags->size; + offset -= skb_frag_size(frags); frags++; } } - if (frags->size != PAGE_SIZE || frags->page_offset) { + if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { int remaining = zc->recv_skip_hint; + int size = skb_frag_size(frags);
- while (remaining && (frags->size != PAGE_SIZE || - frags->page_offset)) { - remaining -= frags->size; + while (remaining && (size != PAGE_SIZE || + skb_frag_off(frags))) { + remaining -= size; frags++; + size = skb_frag_size(frags); } zc->recv_skip_hint -= remaining; break; @@@ -3784,8 -3784,8 +3787,8 @@@ int tcp_md5_hash_skb_data(struct tcp_md return 1;
for (i = 0; i < shi->nr_frags; ++i) { - const struct skb_frag_struct *f = &shi->frags[i]; - unsigned int offset = f->page_offset; + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT);
sg_set_page(&sg, page, skb_frag_size(f), diff --combined net/ipv4/tcp_output.c index 979520e46e33,e6d02e05bb1c..5c46bc4c7e8d --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@@ -1320,7 -1320,6 +1320,7 @@@ int tcp_fragment(struct sock *sk, enum buff = sk_stream_alloc_skb(sk, nsize, gfp, true); if (!buff) return -ENOMEM; /* We'll just try again later. */ + skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@@ -1403,7 -1402,7 +1403,7 @@@ static int __pskb_trim_head(struct sk_b } else { shinfo->frags[k] = shinfo->frags[i]; if (eat) { - shinfo->frags[k].page_offset += eat; + skb_frag_off_add(&shinfo->frags[k], eat); skb_frag_size_sub(&shinfo->frags[k], eat); eat = 0; } @@@ -1875,7 -1874,6 +1875,7 @@@ static int tso_fragment(struct sock *sk buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) return -ENOMEM; + skb_copy_decrypted(buff, skb);
sk->sk_wmem_queued += buff->truesize; sk_mem_charge(sk, buff->truesize); @@@ -2145,7 -2143,6 +2145,7 @@@ static int tcp_mtu_probe(struct sock *s sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk); + skb_copy_decrypted(nskb, skb);
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; diff --combined net/netfilter/nf_tables_api.c index 88abbddf8967,fe3b7b0c6c66..926f87702b64 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@@ -138,14 -138,9 +138,14 @@@ static void nft_set_trans_bind(const st return;
list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { - if (trans->msg_type == NFT_MSG_NEWSET && - nft_trans_set(trans) == set) { - set->bound = true; + switch (trans->msg_type) { + case NFT_MSG_NEWSET: + if (nft_trans_set(trans) == set) + nft_trans_set_bound(trans) = true; + break; + case NFT_MSG_NEWSETELEM: + if (nft_trans_elem_set(trans) == set) + nft_trans_elem_set_bound(trans) = true; break; } } @@@ -6911,7 -6906,7 +6911,7 @@@ static int __nf_tables_abort(struct ne break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - if (nft_trans_set(trans)->bound) { + if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; } @@@ -6923,7 -6918,7 +6923,7 @@@ nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - if (nft_trans_elem_set(trans)->bound) { + if (nft_trans_elem_set_bound(trans)) { nft_trans_destroy(trans); break; } @@@ -7598,6 -7593,11 +7598,11 @@@ static struct pernet_operations nf_tabl .exit = nf_tables_exit_net, };
+ static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = nft_indr_block_get_and_ing_cmd, + .list = LIST_HEAD_INIT(block_ing_entry.list), + }; + static int __init nf_tables_module_init(void) { int err; @@@ -7629,6 -7629,7 +7634,7 @@@ goto err5;
nft_chain_route_init(); + flow_indr_add_block_ing_cb(&block_ing_entry); return err; err5: rhltable_destroy(&nft_objname_ht); @@@ -7645,6 -7646,7 +7651,7 @@@ err1
static void __exit nf_tables_module_exit(void) { + flow_indr_del_block_ing_cb(&block_ing_entry); nfnetlink_subsys_unregister(&nf_tables_subsys); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); diff --combined net/rxrpc/ar-internal.h index a42d6b833675,63b26baa108a..b99823f461d7 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@@ -185,18 -185,11 +185,18 @@@ struct rxrpc_host_header * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - union { - u8 nr_jumbo; /* Number of jumbo subpackets */ - }; + atomic_t nr_ring_pins; /* Number of rxtx ring pins */ + u8 nr_subpackets; /* Number of subpackets */ + u8 rx_flags; /* Received packet flags */ +#define RXRPC_SKB_INCL_LAST 0x01 /* - Includes last packet */ +#define RXRPC_SKB_TX_BUFFER 0x02 /* - Is transmit buffer */ +#define RXRPC_SKB_NEEDS_COW 0x04 /* - Needs skb_cow_data() calling */ union { int remain; /* amount of space remaining for next write */ + + /* List of requested ACKs on subpackets */ + unsigned long rx_req_ack[(RXRPC_MAX_NR_JUMBO + BITS_PER_LONG - 1) / + BITS_PER_LONG]; };
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@@ -233,6 -226,9 +233,9 @@@ struct rxrpc_security int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, unsigned int, unsigned int, rxrpc_seq_t, u16);
+ /* Free crypto request on a call */ + void (*free_call_crypto)(struct rxrpc_call *); + /* Locate the data in a received packet that has been verified. */ void (*locate_data)(struct rxrpc_call *, struct sk_buff *, unsigned int *, unsigned int *); @@@ -261,8 -257,7 +264,8 @@@ */ struct rxrpc_local { struct rcu_head rcu; - atomic_t usage; + atomic_t active_users; /* Number of users of the local endpoint */ + atomic_t usage; /* Number of references to the structure */ struct rxrpc_net *rxnet; /* The network ns in which this resides */ struct list_head link; struct socket *socket; /* my UDP socket */ @@@ -565,6 -560,7 +568,7 @@@ struct rxrpc_call unsigned long expect_term_by; /* When we expect call termination by */ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ + struct skcipher_request *cipher_req; /* Packet cipher request buffer */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ @@@ -620,7 -616,8 +624,7 @@@ #define RXRPC_TX_ANNO_LAST 0x04 #define RXRPC_TX_ANNO_RESENT 0x08
-#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ -#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_SUBPACKET 0x3f /* Subpacket number in jumbogram */ #define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but * not hard-ACK'd packet follows this. @@@ -656,6 -653,7 +660,6 @@@
/* receive-phase ACK management */ u8 ackr_reason; /* reason to ACK */ - u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_serial_t ackr_first_seq; /* first sequence number received */ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ @@@ -749,7 -747,7 +753,7 @@@ int rxrpc_reject_call(struct rxrpc_soc /* * call_event.c */ -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool, enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *);
@@@ -1008,8 -1006,6 +1012,8 @@@ struct rxrpc_local *rxrpc_lookup_local( struct rxrpc_local *rxrpc_get_local(struct rxrpc_local *); struct rxrpc_local *rxrpc_get_local_maybe(struct rxrpc_local *); void rxrpc_put_local(struct rxrpc_local *); +struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *); +void rxrpc_unuse_local(struct rxrpc_local *); void rxrpc_queue_local(struct rxrpc_local *); void rxrpc_destroy_all_locals(struct rxrpc_net *);
@@@ -1114,8 -1110,6 +1118,8 @@@ void rxrpc_see_skb(struct sk_buff *, en void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); void rxrpc_purge_queue(struct sk_buff_head *); +void rxrpc_pin_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_unpin_skb(struct sk_buff *, enum rxrpc_skb_trace);
/* * sysctl.c diff --combined net/rxrpc/call_object.c index 830b6152dfa3,60cbc81dc461..7997075f134a --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@@ -422,19 -422,6 +422,19 @@@ void rxrpc_get_call(struct rxrpc_call * }
/* + * Clean up the RxTx skb ring. + */ +static void rxrpc_cleanup_ring(struct rxrpc_call *call) +{ + int i; + + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_unpin_skb(call->rxtx_buffer[i], rxrpc_skb_cleaned); + call->rxtx_buffer[i] = NULL; + } +} + +/* * Detach a call from its owning socket. */ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) @@@ -442,6 -429,7 +442,6 @@@ const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; bool put = false; - int i;
_enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
@@@ -488,10 -476,18 +488,12 @@@
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn) + if (conn) { rxrpc_disconnect_call(call); + conn->security->free_call_crypto(call); + }
- for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); - call->rxtx_buffer[i] = NULL; - } - + rxrpc_cleanup_ring(call); _leave(""); }
@@@ -574,6 -570,8 +576,6 @@@ static void rxrpc_rcu_destroy_call(stru */ void rxrpc_cleanup_call(struct rxrpc_call *call) { - int i; - _net("DESTROY CALL %d", call->debug_id);
memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); @@@ -584,8 -582,13 +586,8 @@@ ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); ASSERTCMP(call->conn, ==, NULL);
- /* Clean up the Rx/Tx buffer */ - for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) - rxrpc_free_skb(call->rxtx_buffer[i], - (call->tx_phase ? rxrpc_skb_tx_cleaned : - rxrpc_skb_rx_cleaned)); - - rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); + rxrpc_cleanup_ring(call); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_cleaned);
call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } diff --combined net/rxrpc/rxkad.c index c60c520fde7c,dbb109da1835..8d8aa3c230b5 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@@ -43,6 -43,7 +43,7 @@@ struct rxkad_level2_hdr * packets */ static struct crypto_sync_skcipher *rxkad_ci; + static struct skcipher_request *rxkad_ci_req; static DEFINE_MUTEX(rxkad_ci_mutex);
/* @@@ -99,8 -100,8 +100,8 @@@ error */ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) { + struct skcipher_request *req; struct rxrpc_key_token *token; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); struct scatterlist sg; struct rxrpc_crypt iv; __be32 *tmpbuf; @@@ -115,6 -116,12 +116,12 @@@ if (!tmpbuf) return -ENOMEM;
+ req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) { + kfree(tmpbuf); + return -ENOMEM; + } + token = conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv));
@@@ -128,7 -135,7 +135,7 @@@ skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); crypto_skcipher_encrypt(req); - skcipher_request_zero(req); + skcipher_request_free(req);
memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv)); kfree(tmpbuf); @@@ -137,6 -144,35 +144,35 @@@ }
/* + * Allocate and prepare the crypto request on a call. For any particular call, + * this is called serially for the packets, so no lock should be necessary. + */ + static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) + { + struct crypto_skcipher *tfm = &call->conn->cipher->base; + struct skcipher_request *cipher_req = call->cipher_req; + + if (!cipher_req) { + cipher_req = skcipher_request_alloc(tfm, GFP_NOFS); + if (!cipher_req) + return NULL; + call->cipher_req = cipher_req; + } + + return cipher_req; + } + + /* + * Clean up the crypto on a call. + */ + static void rxkad_free_call_crypto(struct rxrpc_call *call) + { + if (call->cipher_req) + skcipher_request_free(call->cipher_req); + call->cipher_req = NULL; + } + + /* * partially encrypt a packet (level 1 security) */ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, @@@ -187,8 -223,10 +223,8 @@@ static int rxkad_secure_packet_encrypt( struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg[16]; - struct sk_buff *trailer; unsigned int len; u16 check; - int nsg; int err;
sp = rxrpc_skb(skb); @@@ -212,14 -250,15 +248,14 @@@ crypto_skcipher_encrypt(req);
/* we want to encrypt the skbuff in-place */ - nsg = skb_cow_data(skb, 0, &trailer); - err = -ENOMEM; - if (nsg < 0 || nsg > 16) + err = -EMSGSIZE; + if (skb_shinfo(skb)->nr_frags > 16) goto out;
len = data_size + call->conn->size_align - 1; len &= ~(call->conn->size_align - 1);
- sg_init_table(sg, nsg); + sg_init_table(sg, ARRAY_SIZE(sg)); err = skb_to_sgvec(skb, sg, 0, len); if (unlikely(err < 0)) goto out; @@@ -243,7 -282,7 +279,7 @@@ static int rxkad_secure_packet(struct r void *sechdr) { struct rxrpc_skb_priv *sp; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; u32 x, y; @@@ -262,6 -301,10 +298,10 @@@ if (ret < 0) return ret;
+ req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
@@@ -316,10 -359,11 +356,10 @@@ static int rxkad_verify_packet_1(struc struct rxkad_level1_hdr sechdr; struct rxrpc_crypt iv; struct scatterlist sg[16]; - struct sk_buff *trailer; bool aborted; u32 data_size, buf; u16 check; - int nsg, ret; + int ret;
_enter("");
@@@ -332,7 -376,11 +372,7 @@@ /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - nsg = skb_cow_data(skb, 0, &trailer); - if (nsg < 0 || nsg > 16) - goto nomem; - - sg_init_table(sg, nsg); + sg_init_table(sg, ARRAY_SIZE(sg)); ret = skb_to_sgvec(skb, sg, offset, 8); if (unlikely(ret < 0)) return ret; @@@ -380,6 -428,10 +420,6 @@@ protocol_error if (aborted) rxrpc_send_abort_packet(call); return -EPROTO; - -nomem: - _leave(" = -ENOMEM"); - return -ENOMEM; }
/* @@@ -394,6 -446,7 +434,6 @@@ static int rxkad_verify_packet_2(struc struct rxkad_level2_hdr sechdr; struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; - struct sk_buff *trailer; bool aborted; u32 data_size, buf; u16 check; @@@ -410,11 -463,12 +450,11 @@@ /* Decrypt the skbuff in-place. TODO: We really want to decrypt * directly into the target buffer. */ - nsg = skb_cow_data(skb, 0, &trailer); - if (nsg < 0) - goto nomem; - sg = _sg; - if (unlikely(nsg > 4)) { + nsg = skb_shinfo(skb)->nr_frags; + if (nsg <= 4) { + nsg = 4; + } else { sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO); if (!sg) goto nomem; @@@ -488,7 -542,7 +528,7 @@@ static int rxkad_verify_packet(struct r unsigned int offset, unsigned int len, rxrpc_seq_t seq, u16 expected_cksum) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; bool aborted; @@@ -501,6 -555,10 +541,10 @@@ if (!call->conn->cipher) return 0;
+ req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
@@@ -733,14 -791,18 +777,18 @@@ static void rxkad_calc_response_checksu /* * encrypt the response packet */ - static void rxkad_encrypt_response(struct rxrpc_connection *conn, - struct rxkad_response *resp, - const struct rxkad_key *s2) + static int rxkad_encrypt_response(struct rxrpc_connection *conn, + struct rxkad_response *resp, + const struct rxkad_key *s2) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg[1];
+ req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, s2->session_key, sizeof(iv));
@@@ -750,7 -812,8 +798,8 @@@ skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); crypto_skcipher_encrypt(req); - skcipher_request_zero(req); + skcipher_request_free(req); + return 0; }
/* @@@ -825,8 -888,9 +874,9 @@@ static int rxkad_respond_to_challenge(s
/* calculate the response checksum and then do the encryption */ rxkad_calc_response_checksum(resp); - rxkad_encrypt_response(conn, resp, token->kad); - ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); + ret = rxkad_encrypt_response(conn, resp, token->kad); + if (ret == 0) + ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); kfree(resp); return ret;
@@@ -1003,18 -1067,16 +1053,16 @@@ static void rxkad_decrypt_response(stru struct rxkad_response *resp, const struct rxrpc_crypt *session_key) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci); + struct skcipher_request *req = rxkad_ci_req; struct scatterlist sg[1]; struct rxrpc_crypt iv;
_enter(",,%08x%08x", ntohl(session_key->n[0]), ntohl(session_key->n[1]));
- ASSERT(rxkad_ci != NULL); - mutex_lock(&rxkad_ci_mutex); if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x, - sizeof(*session_key)) < 0) + sizeof(*session_key)) < 0) BUG();
memcpy(&iv, session_key, sizeof(iv)); @@@ -1208,10 -1270,26 +1256,26 @@@ static void rxkad_clear(struct rxrpc_co */ static int rxkad_init(void) { + struct crypto_sync_skcipher *tfm; + struct skcipher_request *req; + /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ - rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); - return PTR_ERR_OR_ZERO(rxkad_ci); + tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + req = skcipher_request_alloc(&tfm->base, GFP_KERNEL); + if (!req) + goto nomem_tfm; + + rxkad_ci_req = req; + rxkad_ci = tfm; + return 0; + + nomem_tfm: + crypto_free_sync_skcipher(tfm); + return -ENOMEM; }
/* @@@ -1219,8 -1297,8 +1283,8 @@@ */ static void rxkad_exit(void) { - if (rxkad_ci) - crypto_free_sync_skcipher(rxkad_ci); + crypto_free_sync_skcipher(rxkad_ci); + skcipher_request_free(rxkad_ci_req); }
/* @@@ -1235,6 -1313,7 +1299,7 @@@ const struct rxrpc_security rxkad = .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .free_call_crypto = rxkad_free_call_crypto, .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, diff --combined net/sched/sch_taprio.c index e25d414ae12f,046fd2c102b4..540bde009ea5 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@@ -677,10 -677,6 +677,6 @@@ static const struct nla_policy entry_po [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, };
- static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { - [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, - }; - static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@@ -1195,8 -1191,7 +1191,8 @@@ unlock spin_unlock_bh(qdisc_lock(sch));
free_sched: - kfree(new_admin); + if (new_admin) + call_rcu(&new_admin->rcu, taprio_free_sched_cb);
return err; } diff --combined net/tls/tls_device.c index 43922d86e510,d184230665eb..a470df7ffcf9 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@@ -243,14 -243,14 +243,14 @@@ static void tls_append_frag(struct tls_ skb_frag_t *frag;
frag = &record->frags[record->num_frags - 1]; - if (frag->page.p == pfrag->page && - frag->page_offset + frag->size == pfrag->offset) { - frag->size += size; + if (skb_frag_page(frag) == pfrag->page && + skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) { + skb_frag_size_add(frag, size); } else { ++frag; - frag->page.p = pfrag->page; - frag->page_offset = pfrag->offset; - frag->size = size; + __skb_frag_set_page(frag, pfrag->page); + skb_frag_off_set(frag, pfrag->offset); + skb_frag_size_set(frag, size); ++record->num_frags; get_page(pfrag->page); } @@@ -301,8 -301,8 +301,8 @@@ static int tls_push_record(struct sock frag = &record->frags[i]; sg_unmark_end(&offload_ctx->sg_tx_data[i]); sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag), - frag->size, frag->page_offset); - sk_mem_charge(sk, frag->size); + skb_frag_size(frag), skb_frag_off(frag)); + sk_mem_charge(sk, skb_frag_size(frag)); get_page(skb_frag_page(frag)); } sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]); @@@ -324,7 -324,7 +324,7 @@@ static int tls_create_new_record(struc
frag = &record->frags[0]; __skb_frag_set_page(frag, pfrag->page); - frag->page_offset = pfrag->offset; + skb_frag_off_set(frag, pfrag->offset); skb_frag_size_set(frag, prepend_size);
get_page(pfrag->page); @@@ -373,9 -373,9 +373,9 @@@ static int tls_push_data(struct sock *s struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); - int tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; int more = flags & (MSG_SENDPAGE_NOTLAST | MSG_MORE); struct tls_record_info *record = ctx->open_record; + int tls_push_record_flags; struct page_frag *pfrag; size_t orig_size = size; u32 max_open_record_len; @@@ -390,9 -390,6 +390,9 @@@ if (sk->sk_err) return -sk->sk_err;
+ flags |= MSG_SENDPAGE_DECRYPTED; + tls_push_record_flags = flags | MSG_SENDPAGE_NOTLAST; + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); if (tls_is_partially_sent_record(tls_ctx)) { rc = tls_push_partial_record(sk, tls_ctx, flags); @@@ -579,9 -576,7 +579,9 @@@ void tls_device_write_space(struct soc gfp_t sk_allocation = sk->sk_allocation;
sk->sk_allocation = GFP_ATOMIC; - tls_push_partial_record(sk, ctx, MSG_DONTWAIT | MSG_NOSIGNAL); + tls_push_partial_record(sk, ctx, + MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_SENDPAGE_DECRYPTED); sk->sk_allocation = sk_allocation; } } diff --combined scripts/link-vmlinux.sh index 2438a9faf3f1,c31193340108..25b4ed3f9648 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@@ -56,11 -56,10 +56,11 @@@ modpost_link( }
# Link of vmlinux - # ${1} - optional extra .o files - # ${2} - output file + # ${1} - output file + # ${@:2} - optional extra .o files vmlinux_link() { + info LD ${2} local lds="${objtree}/${KBUILD_LDS}" local objects
@@@ -71,9 -70,9 +71,9 @@@ --start-group \ ${KBUILD_VMLINUX_LIBS} \ --end-group \ - ${1}" + ${@:2}"
- ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${2} \ + ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} -o ${1} \ -T ${lds} ${objects} else objects="-Wl,--whole-archive \ @@@ -82,9 -81,9 +82,9 @@@ -Wl,--start-group \ ${KBUILD_VMLINUX_LIBS} \ -Wl,--end-group \ - ${1}" + ${@:2}"
- ${CC} ${CFLAGS_vmlinux} -o ${2} \ + ${CC} ${CFLAGS_vmlinux} -o ${1} \ -Wl,-T,${lds} \ ${objects} \ -lutil -lrt -lpthread @@@ -93,23 -92,34 +93,34 @@@ }
# generate .BTF typeinfo from DWARF debuginfo + # ${1} - vmlinux image + # ${2} - file to dump raw BTF data into gen_btf() { - local pahole_ver; + local pahole_ver + local bin_arch
if ! [ -x "$(command -v ${PAHOLE})" ]; then info "BTF" "${1}: pahole (${PAHOLE}) is not available" - return 0 + return 1 fi
pahole_ver=$(${PAHOLE} --version | sed -E 's/v([0-9]+).([0-9]+)/\1\2/') if [ "${pahole_ver}" -lt "113" ]; then info "BTF" "${1}: pahole version $(${PAHOLE} --version) is too old, need at least v1.13" - return 0 + return 1 fi
- info "BTF" ${1} + info "BTF" ${2} + vmlinux_link ${1} LLVM_OBJCOPY=${OBJCOPY} ${PAHOLE} -J ${1} + + # dump .BTF section into raw binary file to link with final vmlinux + bin_arch=$(${OBJDUMP} -f ${1} | grep architecture | \ + cut -d, -f1 | cut -d' ' -f2) + ${OBJCOPY} --dump-section .BTF=.btf.vmlinux.bin ${1} 2>/dev/null + ${OBJCOPY} -I binary -O ${CONFIG_OUTPUT_FORMAT} -B ${bin_arch} \ + --rename-section .data=.BTF .btf.vmlinux.bin ${2} }
# Create ${2} .o file with all symbols from the ${1} object file @@@ -139,18 -149,6 +150,18 @@@ kallsyms( ${CC} ${aflags} -c -o ${2} ${afile} }
+# Perform one step in kallsyms generation, including temporary linking of +# vmlinux. +kallsyms_step() +{ + kallsymso_prev=${kallsymso} + kallsymso=.tmp_kallsyms${1}.o + kallsyms_vmlinux=.tmp_vmlinux${1} + - vmlinux_link "${kallsymso_prev}" ${kallsyms_vmlinux} ++ vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" ${btf_vmlinux_bin_o} + kallsyms ${kallsyms_vmlinux} ${kallsymso} +} + # Create map file with all symbols from ${1} # See mksymap for additional details mksysmap() @@@ -166,6 -164,7 +177,7 @@@ sortextable( # Delete output files in case of error cleanup() { + rm -f .btf.* rm -f .tmp_System.map rm -f .tmp_kallsyms* rm -f .tmp_vmlinux* @@@ -228,8 -227,14 +240,15 @@@ ${MAKE} -f "${srctree}/scripts/Makefile info MODINFO modules.builtin.modinfo ${OBJCOPY} -j .modinfo -O binary vmlinux.o modules.builtin.modinfo
+ btf_vmlinux_bin_o="" + if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then + if gen_btf .tmp_vmlinux.btf .btf.vmlinux.bin.o ; then + btf_vmlinux_bin_o=.btf.vmlinux.bin.o + fi + fi + kallsymso="" +kallsymso_prev="" kallsyms_vmlinux="" if [ -n "${CONFIG_KALLSYMS}" ]; then
@@@ -256,23 -261,32 +275,19 @@@ # a) Verify that the System.map from vmlinux matches the map from # ${kallsymso}.
- kallsymso=.tmp_kallsyms2.o - kallsyms_vmlinux=.tmp_vmlinux2 - - # step 1 - vmlinux_link .tmp_vmlinux1 ${btf_vmlinux_bin_o} - kallsyms .tmp_vmlinux1 .tmp_kallsyms1.o - - # step 2 - vmlinux_link .tmp_vmlinux2 .tmp_kallsyms1.o ${btf_vmlinux_bin_o} - kallsyms .tmp_vmlinux2 .tmp_kallsyms2.o + kallsyms_step 1 + kallsyms_step 2
# step 3 - size1=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" .tmp_kallsyms1.o) - size2=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" .tmp_kallsyms2.o) + size1=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso_prev}) + size2=$(${CONFIG_SHELL} "${srctree}/scripts/file-size.sh" ${kallsymso})
if [ $size1 -ne $size2 ] || [ -n "${KALLSYMS_EXTRA_PASS}" ]; then - kallsymso=.tmp_kallsyms3.o - kallsyms_vmlinux=.tmp_vmlinux3 - - vmlinux_link .tmp_vmlinux3 .tmp_kallsyms2.o ${btf_vmlinux_bin_o} - kallsyms .tmp_vmlinux3 .tmp_kallsyms3.o + kallsyms_step 3 fi fi
- vmlinux_link "${kallsymso}" vmlinux - - if [ -n "${CONFIG_DEBUG_INFO_BTF}" ]; then - gen_btf vmlinux - fi -info LD vmlinux + vmlinux_link vmlinux "${kallsymso}" "${btf_vmlinux_bin_o}"
if [ -n "${CONFIG_BUILDTIME_EXTABLE_SORT}" ]; then info SORTEX vmlinux diff --combined tools/include/uapi/linux/bpf.h index a5aa7d3ac6a1,4393bd4b2419..0e66371bea13 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@@ -134,6 -134,7 +134,7 @@@ enum bpf_map_type BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, };
/* Note that tracing related programs such as @@@ -1466,8 -1467,8 +1467,8 @@@ union bpf_attr * If no cookie has been set yet, generate a new cookie. Once * generated, the socket cookie remains stable for the life of the * socket. This helper can be useful for monitoring per socket - * networking traffic statistics as it provides a unique socket - * identifier per namespace. + * networking traffic statistics as it provides a global socket + * identifier that can be assumed unique. * Return * A 8-byte long non-decreasing number on success, or 0 if the * socket field is missing inside *skb*. @@@ -2713,6 -2714,33 +2714,33 @@@ * **-EPERM** if no permission to send the *sig*. * * **-EAGAIN** if bpf program can try again. + * + * s64 bpf_tcp_gen_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * Description + * Try to issue a SYN cookie for the packet with corresponding + * IP/TCP headers, *iph* and *th*, on the listening socket in *sk*. + * + * *iph* points to the start of the IPv4 or IPv6 header, while + * *iph_len* contains **sizeof**\ (**struct iphdr**) or + * **sizeof**\ (**struct ip6hdr**). + * + * *th* points to the start of the TCP header, while *th_len* + * contains the length of the TCP header. + * + * Return + * On success, lower 32 bits hold the generated SYN cookie in + * followed by 16 bits which hold the MSS value for that cookie, + * and the top 16 bits are unused. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** SYN cookie cannot be issued due to error + * + * **-ENOENT** SYN cookie should not be issued (no SYN flood) + * + * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies + * + * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@@ -2824,7 -2852,8 +2852,8 @@@ FN(strtoul), \ FN(sk_storage_get), \ FN(sk_storage_delete), \ - FN(send_signal), + FN(send_signal), \ + FN(tcp_gen_syncookie),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@@ -3507,6 -3536,10 +3536,10 @@@ enum bpf_task_fd_type BPF_FD_TYPE_URETPROBE, /* filename + offset */ };
+ #define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) + #define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) + #define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) + struct bpf_flow_keys { __u16 nhoff; __u16 thoff; @@@ -3528,6 -3561,8 +3561,8 @@@ __u32 ipv6_dst[4]; /* in6_addr; network order */ }; }; + __u32 flags; + __be32 flow_label; };
struct bpf_func_info { diff --combined tools/lib/bpf/libbpf.c index 2b57d7ea7836,2233f919dd88..e0276520171b --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@@ -39,6 -39,7 +39,7 @@@ #include <sys/stat.h> #include <sys/types.h> #include <sys/vfs.h> + #include <sys/utsname.h> #include <tools/libc_compat.h> #include <libelf.h> #include <gelf.h> @@@ -48,6 -49,7 +49,7 @@@ #include "btf.h" #include "str_error.h" #include "libbpf_internal.h" + #include "hashmap.h"
#ifndef EM_BPF #define EM_BPF 247 @@@ -75,9 -77,12 +77,12 @@@ static int __base_pr(enum libbpf_print_
static libbpf_print_fn_t __libbpf_pr = __base_pr;
- void libbpf_set_print(libbpf_print_fn_t fn) + libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn) { + libbpf_print_fn_t old_print_fn = __libbpf_pr; + __libbpf_pr = fn; + return old_print_fn; }
__printf(2, 3) @@@ -182,6 -187,7 +187,6 @@@ struct bpf_program bpf_program_clear_priv_t clear_priv;
enum bpf_attach_type expected_attach_type; - int btf_fd; void *func_info; __u32 func_info_rec_size; __u32 func_info_cnt; @@@ -312,6 -318,7 +317,6 @@@ void bpf_program__unload(struct bpf_pro prog->instances.nr = -1; zfree(&prog->instances.fds);
- zclose(prog->btf_fd); zfree(&prog->func_info); zfree(&prog->line_info); } @@@ -390,6 -397,7 +395,6 @@@ bpf_program__init(void *data, size_t si prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_UNSPEC; - prog->btf_fd = -1;
return 0; errout: @@@ -1010,23 -1018,21 +1015,21 @@@ static int bpf_object__init_user_maps(s return 0; }
- static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, - __u32 id) + static const struct btf_type * + skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id) { const struct btf_type *t = btf__type_by_id(btf, id);
- while (true) { - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - case BTF_KIND_TYPEDEF: - t = btf__type_by_id(btf, t->type); - break; - default: - return t; - } + if (res_id) + *res_id = id; + + while (btf_is_mod(t) || btf_is_typedef(t)) { + if (res_id) + *res_id = t->type; + t = btf__type_by_id(btf, t->type); } + + return t; }
/* @@@ -1039,14 -1045,14 +1042,14 @@@ static bool get_map_field_int(const char *map_name, const struct btf *btf, const struct btf_type *def, const struct btf_member *m, __u32 *res) { - const struct btf_type *t = skip_mods_and_typedefs(btf, m->type); + const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL); const char *name = btf__name_by_offset(btf, m->name_off); const struct btf_array *arr_info; const struct btf_type *arr_t;
- if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': attr '%s': expected PTR, got %u.\n", - map_name, name, BTF_INFO_KIND(t->info)); + map_name, name, btf_kind(t)); return false; }
@@@ -1056,12 -1062,12 +1059,12 @@@ map_name, name, t->type); return false; } - if (BTF_INFO_KIND(arr_t->info) != BTF_KIND_ARRAY) { + if (!btf_is_array(arr_t)) { pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n", - map_name, name, BTF_INFO_KIND(arr_t->info)); + map_name, name, btf_kind(arr_t)); return false; } - arr_info = (const void *)(arr_t + 1); + arr_info = btf_array(arr_t); *res = arr_info->nelems; return true; } @@@ -1079,11 -1085,11 +1082,11 @@@ static int bpf_object__init_user_btf_ma struct bpf_map *map; int vlen, i;
- vi = (const struct btf_var_secinfo *)(const void *)(sec + 1) + var_idx; + vi = btf_var_secinfos(sec) + var_idx; var = btf__type_by_id(obj->btf, vi->type); - var_extra = (const void *)(var + 1); + var_extra = btf_var(var); map_name = btf__name_by_offset(obj->btf, var->name_off); - vlen = BTF_INFO_VLEN(var->info); + vlen = btf_vlen(var);
if (map_name == NULL || map_name[0] == '\0') { pr_warning("map #%d: empty name.\n", var_idx); @@@ -1093,9 -1099,9 +1096,9 @@@ pr_warning("map '%s' BTF data is corrupted.\n", map_name); return -EINVAL; } - if (BTF_INFO_KIND(var->info) != BTF_KIND_VAR) { + if (!btf_is_var(var)) { pr_warning("map '%s': unexpected var kind %u.\n", - map_name, BTF_INFO_KIND(var->info)); + map_name, btf_kind(var)); return -EINVAL; } if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && @@@ -1105,10 -1111,10 +1108,10 @@@ return -EOPNOTSUPP; }
- def = skip_mods_and_typedefs(obj->btf, var->type); - if (BTF_INFO_KIND(def->info) != BTF_KIND_STRUCT) { + def = skip_mods_and_typedefs(obj->btf, var->type, NULL); + if (!btf_is_struct(def)) { pr_warning("map '%s': unexpected def kind %u.\n", - map_name, BTF_INFO_KIND(var->info)); + map_name, btf_kind(var)); return -EINVAL; } if (def->size > vi->size) { @@@ -1131,8 -1137,8 +1134,8 @@@ pr_debug("map '%s': at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset);
- vlen = BTF_INFO_VLEN(def->info); - m = (const void *)(def + 1); + vlen = btf_vlen(def); + m = btf_members(def); for (i = 0; i < vlen; i++, m++) { const char *name = btf__name_by_offset(obj->btf, m->name_off);
@@@ -1182,9 -1188,9 +1185,9 @@@ map_name, m->type); return -EINVAL; } - if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': key spec is not PTR: %u.\n", - map_name, BTF_INFO_KIND(t->info)); + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); @@@ -1225,9 -1231,9 +1228,9 @@@ map_name, m->type); return -EINVAL; } - if (BTF_INFO_KIND(t->info) != BTF_KIND_PTR) { + if (!btf_is_ptr(t)) { pr_warning("map '%s': value spec is not PTR: %u.\n", - map_name, BTF_INFO_KIND(t->info)); + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); @@@ -1288,7 -1294,7 +1291,7 @@@ static int bpf_object__init_user_btf_ma nr_types = btf__get_nr_types(obj->btf); for (i = 1; i <= nr_types; i++) { t = btf__type_by_id(obj->btf, i); - if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC) + if (!btf_is_datasec(t)) continue; name = btf__name_by_offset(obj->btf, t->name_off); if (strcmp(name, MAPS_ELF_SEC) == 0) { @@@ -1302,7 -1308,7 +1305,7 @@@ return -ENOENT; }
- vlen = BTF_INFO_VLEN(sec->info); + vlen = btf_vlen(sec); for (i = 0; i < vlen; i++) { err = bpf_object__init_user_btf_map(obj, sec, i, obj->efile.btf_maps_shndx, @@@ -1363,16 -1369,14 +1366,14 @@@ static void bpf_object__sanitize_btf(st struct btf *btf = obj->btf; struct btf_type *t; int i, j, vlen; - __u16 kind;
if (!obj->btf || (has_func && has_datasec)) return;
for (i = 1; i <= btf__get_nr_types(btf); i++) { t = (struct btf_type *)btf__type_by_id(btf, i); - kind = BTF_INFO_KIND(t->info);
- if (!has_datasec && kind == BTF_KIND_VAR) { + if (!has_datasec && btf_is_var(t)) { /* replace VAR with INT */ t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0); /* @@@ -1381,11 -1385,11 +1382,11 @@@ * original variable took less than 4 bytes */ t->size = 1; - *(int *)(t+1) = BTF_INT_ENC(0, 0, 8); - } else if (!has_datasec && kind == BTF_KIND_DATASEC) { + *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8); + } else if (!has_datasec && btf_is_datasec(t)) { /* replace DATASEC with STRUCT */ - struct btf_var_secinfo *v = (void *)(t + 1); - struct btf_member *m = (void *)(t + 1); + const struct btf_var_secinfo *v = btf_var_secinfos(t); + struct btf_member *m = btf_members(t); struct btf_type *vt; char *name;
@@@ -1396,7 -1400,7 +1397,7 @@@ name++; }
- vlen = BTF_INFO_VLEN(t->info); + vlen = btf_vlen(t); t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen); for (j = 0; j < vlen; j++, v++, m++) { /* order of field assignments is important */ @@@ -1406,12 -1410,12 +1407,12 @@@ vt = (void *)btf__type_by_id(btf, v->type); m->name_off = vt->name_off; } - } else if (!has_func && kind == BTF_KIND_FUNC_PROTO) { + } else if (!has_func && btf_is_func_proto(t)) { /* replace FUNC_PROTO with ENUM */ - vlen = BTF_INFO_VLEN(t->info); + vlen = btf_vlen(t); t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen); t->size = sizeof(__u32); /* kernel enforced */ - } else if (!has_func && kind == BTF_KIND_FUNC) { + } else if (!has_func && btf_is_func(t)) { /* replace FUNC with TYPEDEF */ t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0); } @@@ -1769,15 -1773,22 +1770,22 @@@ bpf_program__collect_reloc(struct bpf_p (long long) sym.st_value, sym.st_name, name);
shdr_idx = sym.st_shndx; + insn_idx = rel.r_offset / sizeof(struct bpf_insn); + pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n", + insn_idx, shdr_idx); + + if (shdr_idx >= SHN_LORESERVE) { + pr_warning("relocation: not yet supported relo for non-static global '%s' variable in special section (0x%x) found in insns[%d].code 0x%x\n", + name, shdr_idx, insn_idx, + insns[insn_idx].code); + return -LIBBPF_ERRNO__RELOC; + } if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", prog->section_name, shdr_idx); return -LIBBPF_ERRNO__RELOC; }
- insn_idx = rel.r_offset / sizeof(struct bpf_insn); - pr_debug("relocation: insn_idx=%u\n", insn_idx); - if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { pr_warning("incorrect bpf_call opcode\n"); @@@ -2285,9 -2296,900 +2293,897 @@@ bpf_program_reloc_btf_ext(struct bpf_pr prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext); }
- if (!insn_offset) - prog->btf_fd = btf__fd(obj->btf); - return 0; }
+ #define BPF_CORE_SPEC_MAX_LEN 64 + + /* represents BPF CO-RE field or array element accessor */ + struct bpf_core_accessor { + __u32 type_id; /* struct/union type or array element type */ + __u32 idx; /* field index or array index */ + const char *name; /* field name or NULL for array accessor */ + }; + + struct bpf_core_spec { + const struct btf *btf; + /* high-level spec: named fields and array indices only */ + struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; + /* high-level spec length */ + int len; + /* raw, low-level spec: 1-to-1 with accessor spec string */ + int raw_spec[BPF_CORE_SPEC_MAX_LEN]; + /* raw spec length */ + int raw_len; + /* field byte offset represented by spec */ + __u32 offset; + }; + + static bool str_is_empty(const char *s) + { + return !s || !s[0]; + } + + /* + * Turn bpf_offset_reloc into a low- and high-level spec representation, + * validating correctness along the way, as well as calculating resulting + * field offset (in bytes), specified by accessor string. Low-level spec + * captures every single level of nestedness, including traversing anonymous + * struct/union members. High-level one only captures semantically meaningful + * "turning points": named fields and array indicies. + * E.g., for this case: + * + * struct sample { + * int __unimportant; + * struct { + * int __1; + * int __2; + * int a[7]; + * }; + * }; + * + * struct sample *s = ...; + * + * int x = &s->a[3]; // access string = '0:1:2:3' + * + * Low-level spec has 1:1 mapping with each element of access string (it's + * just a parsed access string representation): [0, 1, 2, 3]. + * + * High-level spec will capture only 3 points: + * - intial zero-index access by pointer (&s->... is the same as &s[0]...); + * - field 'a' access (corresponds to '2' in low-level spec); + * - array element #3 access (corresponds to '3' in low-level spec). + * + */ + static int bpf_core_spec_parse(const struct btf *btf, + __u32 type_id, + const char *spec_str, + struct bpf_core_spec *spec) + { + int access_idx, parsed_len, i; + const struct btf_type *t; + const char *name; + __u32 id; + __s64 sz; + + if (str_is_empty(spec_str) || *spec_str == ':') + return -EINVAL; + + memset(spec, 0, sizeof(*spec)); + spec->btf = btf; + + /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */ + while (*spec_str) { + if (*spec_str == ':') + ++spec_str; + if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1) + return -EINVAL; + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + spec_str += parsed_len; + spec->raw_spec[spec->raw_len++] = access_idx; + } + + if (spec->raw_len == 0) + return -EINVAL; + + /* first spec value is always reloc type array index */ + t = skip_mods_and_typedefs(btf, type_id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[0]; + spec->spec[0].type_id = id; + spec->spec[0].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->offset = access_idx * sz; + + for (i = 1; i < spec->raw_len; i++) { + t = skip_mods_and_typedefs(btf, id, &id); + if (!t) + return -EINVAL; + + access_idx = spec->raw_spec[i]; + + if (btf_is_composite(t)) { + const struct btf_member *m; + __u32 offset; + + if (access_idx >= btf_vlen(t)) + return -EINVAL; + if (btf_member_bitfield_size(t, access_idx)) + return -EINVAL; + + offset = btf_member_bit_offset(t, access_idx); + if (offset % 8) + return -EINVAL; + spec->offset += offset / 8; + + m = btf_members(t) + access_idx; + if (m->name_off) { + name = btf__name_by_offset(btf, m->name_off); + if (str_is_empty(name)) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->spec[spec->len].name = name; + spec->len++; + } + + id = m->type; + } else if (btf_is_array(t)) { + const struct btf_array *a = btf_array(t); + + t = skip_mods_and_typedefs(btf, a->type, &id); + if (!t || access_idx >= a->nelems) + return -EINVAL; + + spec->spec[spec->len].type_id = id; + spec->spec[spec->len].idx = access_idx; + spec->len++; + + sz = btf__resolve_size(btf, id); + if (sz < 0) + return sz; + spec->offset += access_idx * sz; + } else { + pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", + type_id, spec_str, i, id, btf_kind(t)); + return -EINVAL; + } + } + + return 0; + } + + static bool bpf_core_is_flavor_sep(const char *s) + { + /* check X___Y name pattern, where X and Y are not underscores */ + return s[0] != '_' && /* X */ + s[1] == '_' && s[2] == '_' && s[3] == '_' && /* ___ */ + s[4] != '_'; /* Y */ + } + + /* Given 'some_struct_name___with_flavor' return the length of a name prefix + * before last triple underscore. Struct name part after last triple + * underscore is ignored by BPF CO-RE relocation during relocation matching. + */ + static size_t bpf_core_essential_name_len(const char *name) + { + size_t n = strlen(name); + int i; + + for (i = n - 5; i >= 0; i--) { + if (bpf_core_is_flavor_sep(name + i)) + return i + 1; + } + return n; + } + + /* dynamically sized list of type IDs */ + struct ids_vec { + __u32 *data; + int len; + }; + + static void bpf_core_free_cands(struct ids_vec *cand_ids) + { + free(cand_ids->data); + free(cand_ids); + } + + static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf, + __u32 local_type_id, + const struct btf *targ_btf) + { + size_t local_essent_len, targ_essent_len; + const char *local_name, *targ_name; + const struct btf_type *t; + struct ids_vec *cand_ids; + __u32 *new_ids; + int i, err, n; + + t = btf__type_by_id(local_btf, local_type_id); + if (!t) + return ERR_PTR(-EINVAL); + + local_name = btf__name_by_offset(local_btf, t->name_off); + if (str_is_empty(local_name)) + return ERR_PTR(-EINVAL); + local_essent_len = bpf_core_essential_name_len(local_name); + + cand_ids = calloc(1, sizeof(*cand_ids)); + if (!cand_ids) + return ERR_PTR(-ENOMEM); + + n = btf__get_nr_types(targ_btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(targ_btf, i); + targ_name = btf__name_by_offset(targ_btf, t->name_off); + if (str_is_empty(targ_name)) + continue; + + targ_essent_len = bpf_core_essential_name_len(targ_name); + if (targ_essent_len != local_essent_len) + continue; + + if (strncmp(local_name, targ_name, local_essent_len) == 0) { + pr_debug("[%d] %s: found candidate [%d] %s\n", + local_type_id, local_name, i, targ_name); + new_ids = realloc(cand_ids->data, cand_ids->len + 1); + if (!new_ids) { + err = -ENOMEM; + goto err_out; + } + cand_ids->data = new_ids; + cand_ids->data[cand_ids->len++] = i; + } + } + return cand_ids; + err_out: + bpf_core_free_cands(cand_ids); + return ERR_PTR(err); + } + + /* Check two types for compatibility, skipping const/volatile/restrict and + * typedefs, to ensure we are relocating offset to the compatible entities: + * - any two STRUCTs/UNIONs are compatible and can be mixed; + * - any two FWDs are compatible; + * - any two PTRs are always compatible; + * - for ENUMs, check sizes, names are ignored; + * - for INT, size and bitness should match, signedness is ignored; + * - for ARRAY, dimensionality is ignored, element types are checked for + * compatibility recursively; + * - everything else shouldn't be ever a target of relocation. + * These rules are not set in stone and probably will be adjusted as we get + * more experience with using BPF CO-RE relocations. + */ + static int bpf_core_fields_are_compat(const struct btf *local_btf, + __u32 local_id, + const struct btf *targ_btf, + __u32 targ_id) + { + const struct btf_type *local_type, *targ_type; + + recur: + local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id); + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!local_type || !targ_type) + return -EINVAL; + + if (btf_is_composite(local_type) && btf_is_composite(targ_type)) + return 1; + if (btf_kind(local_type) != btf_kind(targ_type)) + return 0; + + switch (btf_kind(local_type)) { + case BTF_KIND_FWD: + case BTF_KIND_PTR: + return 1; + case BTF_KIND_ENUM: + return local_type->size == targ_type->size; + case BTF_KIND_INT: + return btf_int_offset(local_type) == 0 && + btf_int_offset(targ_type) == 0 && + local_type->size == targ_type->size && + btf_int_bits(local_type) == btf_int_bits(targ_type); + case BTF_KIND_ARRAY: + local_id = btf_array(local_type)->type; + targ_id = btf_array(targ_type)->type; + goto recur; + default: + pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); + return 0; + } + } + + /* + * Given single high-level named field accessor in local type, find + * corresponding high-level accessor for a target type. Along the way, + * maintain low-level spec for target as well. Also keep updating target + * offset. + * + * Searching is performed through recursive exhaustive enumeration of all + * fields of a struct/union. If there are any anonymous (embedded) + * structs/unions, they are recursively searched as well. If field with + * desired name is found, check compatibility between local and target types, + * before returning result. + * + * 1 is returned, if field is found. + * 0 is returned if no compatible field is found. + * <0 is returned on error. + */ + static int bpf_core_match_member(const struct btf *local_btf, + const struct bpf_core_accessor *local_acc, + const struct btf *targ_btf, + __u32 targ_id, + struct bpf_core_spec *spec, + __u32 *next_targ_id) + { + const struct btf_type *local_type, *targ_type; + const struct btf_member *local_member, *m; + const char *local_name, *targ_name; + __u32 local_id; + int i, n, found; + + targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id); + if (!targ_type) + return -EINVAL; + if (!btf_is_composite(targ_type)) + return 0; + + local_id = local_acc->type_id; + local_type = btf__type_by_id(local_btf, local_id); + local_member = btf_members(local_type) + local_acc->idx; + local_name = btf__name_by_offset(local_btf, local_member->name_off); + + n = btf_vlen(targ_type); + m = btf_members(targ_type); + for (i = 0; i < n; i++, m++) { + __u32 offset; + + /* bitfield relocations not supported */ + if (btf_member_bitfield_size(targ_type, i)) + continue; + offset = btf_member_bit_offset(targ_type, i); + if (offset % 8) + continue; + + /* too deep struct/union/array nesting */ + if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + /* speculate this member will be the good one */ + spec->offset += offset / 8; + spec->raw_spec[spec->raw_len++] = i; + + targ_name = btf__name_by_offset(targ_btf, m->name_off); + if (str_is_empty(targ_name)) { + /* embedded struct/union, we need to go deeper */ + found = bpf_core_match_member(local_btf, local_acc, + targ_btf, m->type, + spec, next_targ_id); + if (found) /* either found or error */ + return found; + } else if (strcmp(local_name, targ_name) == 0) { + /* matching named field */ + struct bpf_core_accessor *targ_acc; + + targ_acc = &spec->spec[spec->len++]; + targ_acc->type_id = targ_id; + targ_acc->idx = i; + targ_acc->name = targ_name; + + *next_targ_id = m->type; + found = bpf_core_fields_are_compat(local_btf, + local_member->type, + targ_btf, m->type); + if (!found) + spec->len--; /* pop accessor */ + return found; + } + /* member turned out not to be what we looked for */ + spec->offset -= offset / 8; + spec->raw_len--; + } + + return 0; + } + + /* + * Try to match local spec to a target type and, if successful, produce full + * target spec (high-level, low-level + offset). + */ + static int bpf_core_spec_match(struct bpf_core_spec *local_spec, + const struct btf *targ_btf, __u32 targ_id, + struct bpf_core_spec *targ_spec) + { + const struct btf_type *targ_type; + const struct bpf_core_accessor *local_acc; + struct bpf_core_accessor *targ_acc; + int i, sz, matched; + + memset(targ_spec, 0, sizeof(*targ_spec)); + targ_spec->btf = targ_btf; + + local_acc = &local_spec->spec[0]; + targ_acc = &targ_spec->spec[0]; + + for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) { + targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, + &targ_id); + if (!targ_type) + return -EINVAL; + + if (local_acc->name) { + matched = bpf_core_match_member(local_spec->btf, + local_acc, + targ_btf, targ_id, + targ_spec, &targ_id); + if (matched <= 0) + return matched; + } else { + /* for i=0, targ_id is already treated as array element + * type (because it's the original struct), for others + * we should find array element type first + */ + if (i > 0) { + const struct btf_array *a; + + if (!btf_is_array(targ_type)) + return 0; + + a = btf_array(targ_type); + if (local_acc->idx >= a->nelems) + return 0; + if (!skip_mods_and_typedefs(targ_btf, a->type, + &targ_id)) + return -EINVAL; + } + + /* too deep struct/union/array nesting */ + if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN) + return -E2BIG; + + targ_acc->type_id = targ_id; + targ_acc->idx = local_acc->idx; + targ_acc->name = NULL; + targ_spec->len++; + targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx; + targ_spec->raw_len++; + + sz = btf__resolve_size(targ_btf, targ_id); + if (sz < 0) + return sz; + targ_spec->offset += local_acc->idx * sz; + } + } + + return 1; + } + + /* + * Patch relocatable BPF instruction. + * Expected insn->imm value is provided for validation, as well as the new + * relocated value. + * + * Currently three kinds of BPF instructions are supported: + * 1. rX = <imm> (assignment with immediate operand); + * 2. rX += <imm> (arithmetic operations with immediate operand); + * 3. *(rX) = <imm> (indirect memory assignment with immediate operand). + * + * If actual insn->imm value is wrong, bail out. + */ + static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, + __u32 orig_off, __u32 new_off) + { + struct bpf_insn *insn; + int insn_idx; + __u8 class; + + if (insn_off % sizeof(struct bpf_insn)) + return -EINVAL; + insn_idx = insn_off / sizeof(struct bpf_insn); + + insn = &prog->insns[insn_idx]; + class = BPF_CLASS(insn->code); + + if (class == BPF_ALU || class == BPF_ALU64) { + if (BPF_SRC(insn->code) != BPF_K) + return -EINVAL; + if (insn->imm != orig_off) + return -EINVAL; + insn->imm = new_off; + pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n", + bpf_program__title(prog, false), + insn_idx, orig_off, new_off); + } else { + pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), + insn_idx, insn->code, insn->src_reg, insn->dst_reg, + insn->off, insn->imm); + return -EINVAL; + } + return 0; + } + + static struct btf *btf_load_raw(const char *path) + { + struct btf *btf; + size_t read_cnt; + struct stat st; + void *data; + FILE *f; + + if (stat(path, &st)) + return ERR_PTR(-errno); + + data = malloc(st.st_size); + if (!data) + return ERR_PTR(-ENOMEM); + + f = fopen(path, "rb"); + if (!f) { + btf = ERR_PTR(-errno); + goto cleanup; + } + + read_cnt = fread(data, 1, st.st_size, f); + fclose(f); + if (read_cnt < st.st_size) { + btf = ERR_PTR(-EBADF); + goto cleanup; + } + + btf = btf__new(data, read_cnt); + + cleanup: + free(data); + return btf; + } + + /* + * Probe few well-known locations for vmlinux kernel image and try to load BTF + * data out of it to use for target BTF. + */ + static struct btf *bpf_core_find_kernel_btf(void) + { + struct { + const char *path_fmt; + bool raw_btf; + } locations[] = { + /* try canonical vmlinux BTF through sysfs first */ + { "/sys/kernel/btf/vmlinux", true /* raw BTF */ }, + /* fall back to trying to find vmlinux ELF on disk otherwise */ + { "/boot/vmlinux-%1$s" }, + { "/lib/modules/%1$s/vmlinux-%1$s" }, + { "/lib/modules/%1$s/build/vmlinux" }, + { "/usr/lib/modules/%1$s/kernel/vmlinux" }, + { "/usr/lib/debug/boot/vmlinux-%1$s" }, + { "/usr/lib/debug/boot/vmlinux-%1$s.debug" }, + { "/usr/lib/debug/lib/modules/%1$s/vmlinux" }, + }; + char path[PATH_MAX + 1]; + struct utsname buf; + struct btf *btf; + int i; + + uname(&buf); + + for (i = 0; i < ARRAY_SIZE(locations); i++) { + snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release); + + if (access(path, R_OK)) + continue; + + if (locations[i].raw_btf) + btf = btf_load_raw(path); + else + btf = btf__parse_elf(path, NULL); + + pr_debug("loading kernel BTF '%s': %ld\n", + path, IS_ERR(btf) ? PTR_ERR(btf) : 0); + if (IS_ERR(btf)) + continue; + + return btf; + } + + pr_warning("failed to find valid kernel BTF\n"); + return ERR_PTR(-ESRCH); + } + + /* Output spec definition in the format: + * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>, + * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b + */ + static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) + { + const struct btf_type *t; + const char *s; + __u32 type_id; + int i; + + type_id = spec->spec[0].type_id; + t = btf__type_by_id(spec->btf, type_id); + s = btf__name_by_offset(spec->btf, t->name_off); + libbpf_print(level, "[%u] %s + ", type_id, s); + + for (i = 0; i < spec->raw_len; i++) + libbpf_print(level, "%d%s", spec->raw_spec[i], + i == spec->raw_len - 1 ? " => " : ":"); + + libbpf_print(level, "%u @ &x", spec->offset); + + for (i = 0; i < spec->len; i++) { + if (spec->spec[i].name) + libbpf_print(level, ".%s", spec->spec[i].name); + else + libbpf_print(level, "[%u]", spec->spec[i].idx); + } + + } + + static size_t bpf_core_hash_fn(const void *key, void *ctx) + { + return (size_t)key; + } + + static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx) + { + return k1 == k2; + } + + static void *u32_as_hash_key(__u32 x) + { + return (void *)(uintptr_t)x; + } + + /* + * CO-RE relocate single instruction. + * + * The outline and important points of the algorithm: + * 1. For given local type, find corresponding candidate target types. + * Candidate type is a type with the same "essential" name, ignoring + * everything after last triple underscore (___). E.g., `sample`, + * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates + * for each other. Names with triple underscore are referred to as + * "flavors" and are useful, among other things, to allow to + * specify/support incompatible variations of the same kernel struct, which + * might differ between different kernel versions and/or build + * configurations. + * + * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C + * converter, when deduplicated BTF of a kernel still contains more than + * one different types with the same name. In that case, ___2, ___3, etc + * are appended starting from second name conflict. But start flavors are + * also useful to be defined "locally", in BPF program, to extract same + * data from incompatible changes between different kernel + * versions/configurations. For instance, to handle field renames between + * kernel versions, one can use two flavors of the struct name with the + * same common name and use conditional relocations to extract that field, + * depending on target kernel version. + * 2. For each candidate type, try to match local specification to this + * candidate target type. Matching involves finding corresponding + * high-level spec accessors, meaning that all named fields should match, + * as well as all array accesses should be within the actual bounds. Also, + * types should be compatible (see bpf_core_fields_are_compat for details). + * 3. It is supported and expected that there might be multiple flavors + * matching the spec. As long as all the specs resolve to the same set of + * offsets across all candidates, there is not error. If there is any + * ambiguity, CO-RE relocation will fail. This is necessary to accomodate + * imprefection of BTF deduplication, which can cause slight duplication of + * the same BTF type, if some directly or indirectly referenced (by + * pointer) type gets resolved to different actual types in different + * object files. If such situation occurs, deduplicated BTF will end up + * with two (or more) structurally identical types, which differ only in + * types they refer to through pointer. This should be OK in most cases and + * is not an error. + * 4. Candidate types search is performed by linearly scanning through all + * types in target BTF. It is anticipated that this is overall more + * efficient memory-wise and not significantly worse (if not better) + * CPU-wise compared to prebuilding a map from all local type names to + * a list of candidate type names. It's also sped up by caching resolved + * list of matching candidates per each local "root" type ID, that has at + * least one bpf_offset_reloc associated with it. This list is shared + * between multiple relocations for the same type ID and is updated as some + * of the candidates are pruned due to structural incompatibility. + */ + static int bpf_core_reloc_offset(struct bpf_program *prog, + const struct bpf_offset_reloc *relo, + int relo_idx, + const struct btf *local_btf, + const struct btf *targ_btf, + struct hashmap *cand_cache) + { + const char *prog_name = bpf_program__title(prog, false); + struct bpf_core_spec local_spec, cand_spec, targ_spec; + const void *type_key = u32_as_hash_key(relo->type_id); + const struct btf_type *local_type, *cand_type; + const char *local_name, *cand_name; + struct ids_vec *cand_ids; + __u32 local_id, cand_id; + const char *spec_str; + int i, j, err; + + local_id = relo->type_id; + local_type = btf__type_by_id(local_btf, local_id); + if (!local_type) + return -EINVAL; + + local_name = btf__name_by_offset(local_btf, local_type->name_off); + if (str_is_empty(local_name)) + return -EINVAL; + + spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + if (str_is_empty(spec_str)) + return -EINVAL; + + err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); + if (err) { + pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", + prog_name, relo_idx, local_id, local_name, spec_str, + err); + return -EINVAL; + } + + pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx); + bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); + libbpf_print(LIBBPF_DEBUG, "\n"); + + if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { + cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); + if (IS_ERR(cand_ids)) { + pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", + prog_name, relo_idx, local_id, local_name, + PTR_ERR(cand_ids)); + return PTR_ERR(cand_ids); + } + err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); + if (err) { + bpf_core_free_cands(cand_ids); + return err; + } + } + + for (i = 0, j = 0; i < cand_ids->len; i++) { + cand_id = cand_ids->data[i]; + cand_type = btf__type_by_id(targ_btf, cand_id); + cand_name = btf__name_by_offset(targ_btf, cand_type->name_off); + + err = bpf_core_spec_match(&local_spec, targ_btf, + cand_id, &cand_spec); + pr_debug("prog '%s': relo #%d: matching candidate #%d %s against spec ", + prog_name, relo_idx, i, cand_name); + bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); + libbpf_print(LIBBPF_DEBUG, ": %d\n", err); + if (err < 0) { + pr_warning("prog '%s': relo #%d: matching error: %d\n", + prog_name, relo_idx, err); + return err; + } + if (err == 0) + continue; + + if (j == 0) { + targ_spec = cand_spec; + } else if (cand_spec.offset != targ_spec.offset) { + /* if there are many candidates, they should all + * resolve to the same offset + */ + pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n", + prog_name, relo_idx, cand_spec.offset, + targ_spec.offset); + return -EINVAL; + } + + cand_ids->data[j++] = cand_spec.spec[0].type_id; + } + + cand_ids->len = j; + if (cand_ids->len == 0) { + pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); + return -ESRCH; + } + + err = bpf_core_reloc_insn(prog, relo->insn_off, + local_spec.offset, targ_spec.offset); + if (err) { + pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", + prog_name, relo_idx, relo->insn_off, err); + return -EINVAL; + } + + return 0; + } + + static int + bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) + { + const struct btf_ext_info_sec *sec; + const struct bpf_offset_reloc *rec; + const struct btf_ext_info *seg; + struct hashmap_entry *entry; + struct hashmap *cand_cache = NULL; + struct bpf_program *prog; + struct btf *targ_btf; + const char *sec_name; + int i, err = 0; + + if (targ_btf_path) + targ_btf = btf__parse_elf(targ_btf_path, NULL); + else + targ_btf = bpf_core_find_kernel_btf(); + if (IS_ERR(targ_btf)) { + pr_warning("failed to get target BTF: %ld\n", + PTR_ERR(targ_btf)); + return PTR_ERR(targ_btf); + } + + cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL); + if (IS_ERR(cand_cache)) { + err = PTR_ERR(cand_cache); + goto out; + } + + seg = &obj->btf_ext->offset_reloc_info; + for_each_btf_ext_sec(seg, sec) { + sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); + if (str_is_empty(sec_name)) { + err = -EINVAL; + goto out; + } + prog = bpf_object__find_program_by_title(obj, sec_name); + if (!prog) { + pr_warning("failed to find program '%s' for CO-RE offset relocation\n", + sec_name); + err = -EINVAL; + goto out; + } + + pr_debug("prog '%s': performing %d CO-RE offset relocs\n", + sec_name, sec->num_info); + + for_each_btf_ext_rec(seg, sec, i, rec) { + err = bpf_core_reloc_offset(prog, rec, i, obj->btf, + targ_btf, cand_cache); + if (err) { + pr_warning("prog '%s': relo #%d: failed to relocate: %d\n", + sec_name, i, err); + goto out; + } + } + } + + out: + btf__free(targ_btf); + if (!IS_ERR_OR_NULL(cand_cache)) { + hashmap__for_each_entry(cand_cache, entry, i) { + bpf_core_free_cands(entry->value); + } + hashmap__free(cand_cache); + } + return err; + } + + static int + bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) + { + int err = 0; + + if (obj->btf_ext->offset_reloc_info.len) + err = bpf_core_reloc_offsets(obj, targ_btf_path); + + return err; + } + static int bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, struct reloc_desc *relo) @@@ -2395,14 -3297,21 +3291,21 @@@ bpf_program__relocate(struct bpf_progra return 0; }
- static int - bpf_object__relocate(struct bpf_object *obj) + bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) { struct bpf_program *prog; size_t i; int err;
+ if (obj->btf_ext) { + err = bpf_object__relocate_core(obj, targ_btf_path); + if (err) { + pr_warning("failed to perform CO-RE relocations: %d\n", + err); + return err; + } + } for (i = 0; i < obj->nr_programs; i++) { prog = &obj->programs[i];
@@@ -2457,7 -3366,7 +3360,7 @@@ load_program(struct bpf_program *prog, char *cp, errmsg[STRERR_BUFSIZE]; int log_buf_size = BPF_LOG_BUF_SIZE; char *log_buf; - int ret; + int btf_fd, ret;
if (!insns || !insns_cnt) return -EINVAL; @@@ -2472,12 -3381,7 +3375,12 @@@ load_attr.license = license; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; - load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0; + /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ + if (prog->obj->btf_ext) + btf_fd = bpf_object__btf_fd(prog->obj); + else + btf_fd = -1; + load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0; load_attr.func_info = prog->func_info; load_attr.func_info_rec_size = prog->func_info_rec_size; load_attr.func_info_cnt = prog->func_info_cnt; @@@ -2808,7 -3712,7 +3711,7 @@@ int bpf_object__load_xattr(struct bpf_o obj->loaded = true;
CHECK_ERR(bpf_object__create_maps(obj), err, out); - CHECK_ERR(bpf_object__relocate(obj), err, out); + CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
return 0; @@@ -4999,15 -5903,13 +5902,15 @@@ int libbpf_num_possible_cpus(void static const char *fcpu = "/sys/devices/system/cpu/possible"; int len = 0, n = 0, il = 0, ir = 0; unsigned int start = 0, end = 0; + int tmp_cpus = 0; static int cpus; char buf[128]; int error = 0; int fd = -1;
- if (cpus > 0) - return cpus; + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus;
fd = open(fcpu, O_RDONLY); if (fd < 0) { @@@ -5030,7 -5932,7 +5933,7 @@@ } buf[len] = '\0';
- for (ir = 0, cpus = 0; ir <= len; ir++) { + for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ if (buf[ir] == ',' || buf[ir] == '\0') { buf[ir] = '\0'; @@@ -5042,15 -5944,13 +5945,15 @@@ } else if (n == 1) { end = start; } - cpus += end - start + 1; + tmp_cpus += end - start + 1; il = ir + 1; } } - if (cpus <= 0) { - pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu); + if (tmp_cpus <= 0) { + pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); return -EINVAL; } - return cpus; + + WRITE_ONCE(cpus, tmp_cpus); + return tmp_cpus; }