The following commit has been merged in the master branch: commit 832470c355798d65d010fdec6c42fab468c895ec Merge: f28a72b44f09eea6cf5fab2d2bd62da8fbe92ea1 8c2e6c904fd8701a8d02d2bdb86871dc3ec4e85b Author: Stephen Rothwell sfr@canb.auug.org.au Date: Fri Jan 12 10:42:21 2018 +1100
Merge remote-tracking branch 'net-next/master'
diff --combined MAINTAINERS index 6809623840de,e22ca0ae995d..52be6e038cf7 --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -321,7 -321,7 +321,7 @@@ F: drivers/acpi/apei
ACPI COMPONENT ARCHITECTURE (ACPICA) M: Robert Moore robert.moore@intel.com -M: Lv Zheng lv.zheng@intel.com +M: Erik Schmauss erik.schmauss@intel.com M: "Rafael J. Wysocki" rafael.j.wysocki@intel.com L: linux-acpi@vger.kernel.org L: devel@acpica.org @@@ -1255,12 -1255,6 +1255,12 @@@ L: linux-arm-kernel@lists.infradead.or S: Supported F: drivers/net/ethernet/cavium/thunder/
+ARM/CIRRUS LOGIC BK3 MACHINE SUPPORT +M: Lukasz Majewski lukma@denx.de +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/mach-ep93xx/ts72xx.c + ARM/CIRRUS LOGIC CLPS711X ARM ARCHITECTURE M: Alexander Shiyan shc_work@mail.ru L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -1589,7 -1583,6 +1589,7 @@@ F: arch/arm/boot/dts/kirkwood F: arch/arm/configs/mvebu_*_defconfig F: arch/arm/mach-mvebu/ F: arch/arm64/boot/dts/marvell/armada* +F: drivers/cpufreq/armada-37xx-cpufreq.c F: drivers/cpufreq/mvebu-cpufreq.c F: drivers/irqchip/irq-armada-370-xp.c F: drivers/irqchip/irq-mvebu-* @@@ -1642,38 -1635,14 +1642,38 @@@ ARM/NEC MOBILEPRO 900/c MACHINE SUPPOR M: Michael Petchkovsky mkpetch@internode.on.net S: Maintained
-ARM/NOMADIK ARCHITECTURE -M: Alessandro Rubini rubini@unipv.it +ARM/NOMADIK/U300/Ux500 ARCHITECTURES M: Linus Walleij linus.walleij@linaro.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/mach-nomadik/ -F: drivers/pinctrl/nomadik/ +F: arch/arm/mach-u300/ +F: arch/arm/mach-ux500/ +F: arch/arm/boot/dts/ste-* +F: drivers/clk/clk-nomadik.c +F: drivers/clk/clk-u300.c +F: drivers/clocksource/clksrc-dbx500-prcmu.c +F: drivers/clocksource/timer-u300.c +F: drivers/dma/coh901318* +F: drivers/dma/ste_dma40* +F: drivers/hwspinlock/u8500_hsem.c F: drivers/i2c/busses/i2c-nomadik.c +F: drivers/i2c/busses/i2c-stu300.c +F: drivers/mfd/ab3100* +F: drivers/mfd/ab8500* +F: drivers/mfd/abx500* +F: drivers/mfd/dbx500* +F: drivers/mfd/db8500* +F: drivers/pinctrl/nomadik/ +F: drivers/pinctrl/pinctrl-coh901* +F: drivers/pinctrl/pinctrl-u300.c +F: drivers/rtc/rtc-ab3100.c +F: drivers/rtc/rtc-ab8500.c +F: drivers/rtc/rtc-coh901331.c +F: drivers/rtc/rtc-pl031.c +F: drivers/watchdog/coh901327_wdt.c +F: Documentation/devicetree/bindings/arm/ste-* +F: Documentation/devicetree/bindings/arm/ux500/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
ARM/NUVOTON W90X900 ARM ARCHITECTURE @@@ -1987,10 -1956,9 +1987,10 @@@ N: stm3 F: drivers/clocksource/armv7m_systick.c
ARM/TANGO ARCHITECTURE -M: Marc Gonzalez marc_gonzalez@sigmadesigns.com +M: Marc Gonzalez marc.w.gonzalez@free.fr +M: Mans Rullgard mans@mansr.com L: linux-arm-kernel@lists.infradead.org -S: Maintained +S: Odd Fixes N: tango
ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT @@@ -2054,6 -2022,21 +2054,6 @@@ M: Dmitry Eremin-Solenikov <dbaryshkov@ M: Dirk Opfer dirk@opfer-online.de S: Maintained
-ARM/U300 MACHINE SUPPORT -M: Linus Walleij linus.walleij@linaro.org -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Supported -F: arch/arm/mach-u300/ -F: drivers/clocksource/timer-u300.c -F: drivers/i2c/busses/i2c-stu300.c -F: drivers/rtc/rtc-coh901331.c -F: drivers/watchdog/coh901327_wdt.c -F: drivers/dma/coh901318* -F: drivers/mfd/ab3100* -F: drivers/rtc/rtc-ab3100.c -F: drivers/rtc/rtc-coh901331.c -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git - ARM/UNIPHIER ARCHITECTURE M: Masahiro Yamada yamada.masahiro@socionext.com L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -2075,6 -2058,24 +2075,6 @@@ F: drivers/reset/reset-uniphier. F: drivers/tty/serial/8250/8250_uniphier.c N: uniphier
-ARM/Ux500 ARM ARCHITECTURE -M: Linus Walleij linus.walleij@linaro.org -L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) -S: Maintained -F: arch/arm/mach-ux500/ -F: drivers/clocksource/clksrc-dbx500-prcmu.c -F: drivers/dma/ste_dma40* -F: drivers/hwspinlock/u8500_hsem.c -F: drivers/mfd/abx500* -F: drivers/mfd/ab8500* -F: drivers/mfd/dbx500* -F: drivers/mfd/db8500* -F: drivers/pinctrl/nomadik/pinctrl-ab* -F: drivers/pinctrl/nomadik/pinctrl-nomadik* -F: drivers/rtc/rtc-ab8500.c -F: drivers/rtc/rtc-pl031.c -T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git - ARM/Ux500 CLOCK FRAMEWORK SUPPORT M: Ulf Hansson ulf.hansson@linaro.org L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) @@@ -2201,6 -2202,14 +2201,6 @@@ L: linux-leds@vger.kernel.or S: Maintained F: drivers/leds/leds-as3645a.c
-AS3645A LED FLASH CONTROLLER DRIVER -M: Laurent Pinchart laurent.pinchart@ideasonboard.com -L: linux-media@vger.kernel.org -T: git git://linuxtv.org/media_tree.git -S: Maintained -F: drivers/media/i2c/as3645a.c -F: include/media/i2c/as3645a.h - ASAHI KASEI AK8974 DRIVER M: Linus Walleij linus.walleij@linaro.org L: linux-iio@vger.kernel.org @@@ -2253,9 -2262,7 +2253,9 @@@ F: include/linux/async_tx. AT24 EEPROM DRIVER M: Bartosz Golaszewski brgl@bgdev.pl L: linux-i2c@vger.kernel.org +T: git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git S: Maintained +F: Documentation/devicetree/bindings/eeprom/at24.txt F: drivers/misc/eeprom/at24.c F: include/linux/platform_data/at24.h
@@@ -2494,8 -2501,6 +2494,8 @@@ L: linux-arm-kernel@lists.infradead.or S: Maintained F: Documentation/devicetree/bindings/arm/axentia.txt F: arch/arm/boot/dts/at91-linea.dtsi +F: arch/arm/boot/dts/at91-natte.dtsi +F: arch/arm/boot/dts/at91-nattis-2-natte-2.dts F: arch/arm/boot/dts/at91-tse850-3.dts
AXENTIA ASOC DRIVERS @@@ -2559,6 -2564,7 +2559,7 @@@ S: Maintaine F: Documentation/ABI/testing/sysfs-class-net-batman-adv F: Documentation/ABI/testing/sysfs-class-net-mesh F: Documentation/networking/batman-adv.rst + F: include/uapi/linux/batadv_packet.h F: include/uapi/linux/batman_adv.h F: net/batman-adv/
@@@ -2682,7 -2688,6 +2683,6 @@@ F: drivers/mtd/devices/block2mtd.
BLUETOOTH DRIVERS M: Marcel Holtmann marcel@holtmann.org - M: Gustavo Padovan gustavo@padovan.org M: Johan Hedberg johan.hedberg@gmail.com L: linux-bluetooth@vger.kernel.org W: http://www.bluez.org/ @@@ -2693,7 -2698,6 +2693,6 @@@ F: drivers/bluetooth
BLUETOOTH SUBSYSTEM M: Marcel Holtmann marcel@holtmann.org - M: Gustavo Padovan gustavo@padovan.org M: Johan Hedberg johan.hedberg@gmail.com L: linux-bluetooth@vger.kernel.org W: http://www.bluez.org/ @@@ -2718,12 -2722,16 +2717,16 @@@ M: Alexei Starovoitov <ast@kernel.org M: Daniel Borkmann daniel@iogearbox.net L: netdev@vger.kernel.org L: linux-kernel@vger.kernel.org + T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git + T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git S: Supported F: arch/x86/net/bpf_jit* F: Documentation/networking/filter.txt F: Documentation/bpf/ F: include/linux/bpf* F: include/linux/filter.h + F: include/trace/events/bpf.h + F: include/trace/events/xdp.h F: include/uapi/linux/bpf* F: include/uapi/linux/filter.h F: kernel/bpf/ @@@ -2827,8 -2835,6 +2830,8 @@@ S: Maintaine F: arch/arm/mach-bcm/*brcmstb* F: arch/arm/boot/dts/bcm7*.dts* F: drivers/bus/brcmstb_gisb.c +F: arch/arm/mm/cache-b15-rac.c +F: arch/arm/include/asm/hardware/cache-b15-rac.h N: brcmstb
BROADCOM BMIPS CPUFREQ DRIVER @@@ -4936,6 -4942,11 +4939,11 @@@ S: Maintaine F: lib/dynamic_debug.c F: include/linux/dynamic_debug.h
+ DYNAMIC INTERRUPT MODERATION + M: Tal Gilboa talgi@mellanox.com + S: Maintained + F: include/linux/net_dim.h + DZ DECSTATION DZ11 SERIAL DRIVER M: "Maciej W. Rozycki" macro@linux-mips.org S: Maintained @@@ -7070,14 -7081,6 +7078,14 @@@ R: Dan Williams <dan.j.williams@intel.c S: Odd fixes F: drivers/dma/iop-adma.c
+INTEL IPU3 CSI-2 CIO2 DRIVER +M: Yong Zhi yong.zhi@intel.com +M: Sakari Ailus sakari.ailus@linux.intel.com +L: linux-media@vger.kernel.org +S: Maintained +F: drivers/media/pci/intel/ipu3/ +F: Documentation/media/uapi/v4l/pixfmt-srggb10-ipu3.rst + INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT M: Krzysztof Halasa khalasa@piap.pl S: Maintained @@@ -8683,15 -8686,6 +8691,15 @@@ T: git git://linuxtv.org/media_tree.gi S: Maintained F: drivers/media/dvb-frontends/stv6111*
+MEDIA DRIVERS FOR NVIDIA TEGRA - VDE +M: Dmitry Osipenko digetx@gmail.com +L: linux-media@vger.kernel.org +L: linux-tegra@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt +F: drivers/staging/media/tegra-vde/ + MEDIA INPUT INFRASTRUCTURE (V4L/DVB) M: Mauro Carvalho Chehab mchehab@s-opensource.com M: Mauro Carvalho Chehab mchehab@kernel.org @@@ -8735,6 -8729,13 +8743,13 @@@ L: netdev@vger.kernel.or S: Maintained F: drivers/net/ethernet/mediatek/
+ MEDIATEK SWITCH DRIVER + M: Sean Wang sean.wang@mediatek.com + L: netdev@vger.kernel.org + S: Maintained + F: drivers/net/dsa/mt7530.* + F: net/dsa/tag_mtk.c + MEDIATEK JPEG DRIVER M: Rick Chang rick.chang@mediatek.com M: Bin Liu bin.liu@mediatek.com @@@ -9107,7 -9108,6 +9122,7 @@@ S: Supporte F: Documentation/devicetree/bindings/mips/ F: Documentation/mips/ F: arch/mips/ +F: drivers/platform/mips/
MIPS BOSTON DEVELOPMENT BOARD M: Paul Burton paul.burton@mips.com @@@ -9135,25 -9135,6 +9150,25 @@@ F: arch/mips/include/asm/mach-loongson3 F: drivers/*/*loongson1* F: drivers/*/*/*loongson1*
+MIPS/LOONGSON2 ARCHITECTURE +M: Jiaxun Yang jiaxun.yang@flygoat.com +L: linux-mips@linux-mips.org +S: Maintained +F: arch/mips/loongson64/*{2e/2f}* +F: arch/mips/include/asm/mach-loongson64/ +F: drivers/*/*loongson2* +F: drivers/*/*/*loongson2* + +MIPS/LOONGSON3 ARCHITECTURE +M: Huacai Chen chenhc@lemote.com +L: linux-mips@linux-mips.org +S: Maintained +F: arch/mips/loongson64/ +F: arch/mips/include/asm/mach-loongson64/ +F: drivers/platform/mips/cpu_hwmon.c +F: drivers/*/*loongson3* +F: drivers/*/*/*loongson3* + MIPS RINT INSTRUCTION EMULATION M: Aleksandar Markovic aleksandar.markovic@mips.com L: linux-mips@linux-mips.org @@@ -9631,6 -9612,11 +9646,11 @@@ NETWORKING [WIRELESS L: linux-wireless@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-wireless/list/
+ NETDEVSIM + M: Jakub Kicinski jakub.kicinski@netronome.com + S: Maintained + F: drivers/net/netdevsim/* + NETXEN (1/10) GbE SUPPORT M: Manish Chopra manish.chopra@cavium.com M: Rahul Verma rahul.verma@cavium.com @@@ -10085,14 -10071,6 +10105,14 @@@ S: Maintaine F: drivers/media/i2c/ov7670.c F: Documentation/devicetree/bindings/media/i2c/ov7670.txt
+OMNIVISION OV7740 SENSOR DRIVER +M: Wenyou Yang wenyou.yang@microchip.com +L: linux-media@vger.kernel.org +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/i2c/ov7740.c +F: Documentation/devicetree/bindings/media/i2c/ov7740.txt + ONENAND FLASH DRIVER M: Kyungmin Park kyungmin.park@samsung.com L: linux-mtd@lists.infradead.org @@@ -10591,12 -10569,8 +10611,12 @@@ T: git git://git.kernel.org/pub/scm/lin S: Supported F: Documentation/devicetree/bindings/pci/ F: Documentation/PCI/ +F: drivers/acpi/pci* F: drivers/pci/ +F: include/asm-generic/pci* F: include/linux/pci* +F: include/uapi/linux/pci* +F: lib/pci* F: arch/x86/pci/ F: arch/x86/kernel/quirks.c
@@@ -10935,7 -10909,6 +10955,7 @@@ F: include/linux/pm. F: include/linux/pm_* F: include/linux/powercap.h F: drivers/powercap/ +F: kernel/configs/nopm.config
POWER STATE COORDINATION INTERFACE (PSCI) M: Mark Rutland mark.rutland@arm.com @@@ -12383,14 -12356,6 +12403,14 @@@ T: git git://linuxtv.org/anttip/media_t S: Maintained F: drivers/media/tuners/si2157*
+SI2165 MEDIA DRIVER +M: Matthias Schwarzott zzam@gentoo.org +L: linux-media@vger.kernel.org +W: https://linuxtv.org +Q: http://patchwork.linuxtv.org/project/linux-media/list/ +S: Maintained +F: drivers/media/dvb-frontends/si2165* + SI2168 MEDIA DRIVER M: Antti Palosaari crope@iki.fi L: linux-media@vger.kernel.org @@@ -12682,6 -12647,13 +12702,13 @@@ F: drivers/md/raid F: include/linux/raid/ F: include/uapi/linux/raid/
+ SOCIONEXT (SNI) NETSEC NETWORK DRIVER + M: Jassi Brar jaswinder.singh@linaro.org + L: netdev@vger.kernel.org + S: Maintained + F: drivers/net/ethernet/socionext/netsec.c + F: Documentation/devicetree/bindings/net/socionext-netsec.txt + SONIC NETWORK DRIVER M: Thomas Bogendoerfer tsbogend@alpha.franken.de L: netdev@vger.kernel.org @@@ -12921,6 -12893,12 +12948,6 @@@ S: Odd Fixe F: Documentation/devicetree/bindings/staging/iio/ F: drivers/staging/iio/
-STAGING - LIRC (LINUX INFRARED REMOTE CONTROL) DRIVERS -M: Jarod Wilson jarod@wilsonet.com -W: http://www.lirc.org/ -S: Odd Fixes -F: drivers/staging/media/lirc/ - STAGING - LUSTRE PARALLEL FILESYSTEM M: Oleg Drokin oleg.drokin@intel.com M: Andreas Dilger andreas.dilger@intel.com @@@ -13302,15 -13280,6 +13329,15 @@@ T: git git://linuxtv.org/anttip/media_t S: Maintained F: drivers/media/tuners/tda18218*
+TDA18250 MEDIA DRIVER +M: Olli Salonen olli.salonen@iki.fi +L: linux-media@vger.kernel.org +W: https://linuxtv.org +Q: http://patchwork.linuxtv.org/project/linux-media/list/ +T: git git://linuxtv.org/media_tree.git +S: Maintained +F: drivers/media/tuners/tda18250* + TDA18271 MEDIA DRIVER M: Michael Krufky mkrufky@linuxtv.org L: linux-media@vger.kernel.org diff --combined arch/arm/boot/dts/imx25.dtsi index c43cf704b768,fcaff1c66bcb..9445f8e1473c --- a/arch/arm/boot/dts/imx25.dtsi +++ b/arch/arm/boot/dts/imx25.dtsi @@@ -122,7 -122,7 +122,7 @@@ };
can1: can@43f88000 { - compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx25-flexcan"; reg = <0x43f88000 0x4000>; interrupts = <43>; clocks = <&clks 75>, <&clks 75>; @@@ -131,7 -131,7 +131,7 @@@ };
can2: can@43f8c000 { - compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx25-flexcan"; reg = <0x43f8c000 0x4000>; interrupts = <44>; clocks = <&clks 76>, <&clks 76>; @@@ -628,13 -628,11 +628,13 @@@ usbphy0: usb-phy@0 { reg = <0>; compatible = "usb-nop-xceiv"; + #phy-cells = <0>; };
usbphy1: usb-phy@1 { reg = <1>; compatible = "usb-nop-xceiv"; + #phy-cells = <0>; }; }; }; diff --combined arch/arm/boot/dts/imx35.dtsi index f049c692c6b0,1f0e2203b576..e08c0c193767 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@@ -303,7 -303,7 +303,7 @@@ };
can1: can@53fe4000 { - compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx35-flexcan"; reg = <0x53fe4000 0x1000>; clocks = <&clks 33>, <&clks 33>; clock-names = "ipg", "per"; @@@ -312,7 -312,7 +312,7 @@@ };
can2: can@53fe8000 { - compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx35-flexcan"; reg = <0x53fe8000 0x1000>; clocks = <&clks 34>, <&clks 34>; clock-names = "ipg", "per"; @@@ -402,13 -402,11 +402,13 @@@ usbphy0: usb-phy@0 { reg = <0>; compatible = "usb-nop-xceiv"; + #phy-cells = <0>; };
usbphy1: usb-phy@1 { reg = <1>; compatible = "usb-nop-xceiv"; + #phy-cells = <0>; }; }; }; diff --combined arch/arm/boot/dts/imx53.dtsi index 38b31a37339b,85071ff8c639..1040251f2951 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@@ -116,28 -116,6 +116,28 @@@ }; };
+ pmu { + compatible = "arm,cortex-a8-pmu"; + interrupt-parent = <&tzic>; + interrupts = <77>; + }; + + usbphy0: usbphy-0 { + compatible = "usb-nop-xceiv"; + clocks = <&clks IMX5_CLK_USB_PHY1_GATE>; + clock-names = "main_clk"; + #phy-cells = <0>; + status = "okay"; + }; + + usbphy1: usbphy-1 { + compatible = "usb-nop-xceiv"; + clocks = <&clks IMX5_CLK_USB_PHY2_GATE>; + clock-names = "main_clk"; + #phy-cells = <0>; + status = "okay"; + }; + soc { #address-cells = <1>; #size-cells = <1>; @@@ -321,6 -299,20 +321,6 @@@ reg = <0x53f00000 0x60>; };
- usbphy0: usbphy-0 { - compatible = "usb-nop-xceiv"; - clocks = <&clks IMX5_CLK_USB_PHY1_GATE>; - clock-names = "main_clk"; - status = "okay"; - }; - - usbphy1: usbphy-1 { - compatible = "usb-nop-xceiv"; - clocks = <&clks IMX5_CLK_USB_PHY2_GATE>; - clock-names = "main_clk"; - status = "okay"; - }; - usbotg: usb@53f80000 { compatible = "fsl,imx53-usb", "fsl,imx27-usb"; reg = <0x53f80000 0x0200>; @@@ -441,13 -433,6 +441,13 @@@ clock-names = "ipg", "per"; };
+ srtc: rtc@53fa4000 { + compatible = "fsl,imx53-rtc"; + reg = <0x53fa4000 0x4000>; + interrupts = <24>; + clocks = <&clks IMX5_CLK_SRTC_GATE>; + }; + iomuxc: iomuxc@53fa8000 { compatible = "fsl,imx53-iomuxc"; reg = <0x53fa8000 0x4000>; @@@ -551,7 -536,7 +551,7 @@@ };
can1: can@53fc8000 { - compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx53-flexcan"; reg = <0x53fc8000 0x4000>; interrupts = <82>; clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>, @@@ -561,7 -546,7 +561,7 @@@ };
can2: can@53fcc000 { - compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan"; + compatible = "fsl,imx53-flexcan"; reg = <0x53fcc000 0x4000>; interrupts = <83>; clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>, @@@ -828,5 -813,10 +828,5 @@@ reg = <0xf8000000 0x20000>; clocks = <&clks IMX5_CLK_OCRAM>; }; - - pmu { - compatible = "arm,cortex-a8-pmu"; - interrupts = <77>; - }; }; }; diff --combined arch/arm/boot/dts/ls1021a-qds.dts index bf15dc27ca53,7bb402d3e9d0..499f41a2c6f0 --- a/arch/arm/boot/dts/ls1021a-qds.dts +++ b/arch/arm/boot/dts/ls1021a-qds.dts @@@ -239,11 -239,6 +239,11 @@@ device-width = <1>; };
+ nand@2,0 { + compatible = "fsl,ifc-nand"; + reg = <0x2 0x0 0x10000>; + }; + fpga: board-control@3,0 { #address-cells = <1>; #size-cells = <1>; @@@ -336,3 -331,19 +336,19 @@@ &uart1 { status = "okay"; }; + + &can0 { + status = "okay"; + }; + + &can1 { + status = "okay"; + }; + + &can2 { + status = "disabled"; + }; + + &can3 { + status = "disabled"; + }; diff --combined arch/arm/boot/dts/ls1021a-twr.dts index b186c370ad54,860b898141f0..f0c949d74833 --- a/arch/arm/boot/dts/ls1021a-twr.dts +++ b/arch/arm/boot/dts/ls1021a-twr.dts @@@ -228,10 -228,6 +228,10 @@@ }; };
+&esdhc { + status = "okay"; +}; + &sai1 { status = "okay"; }; @@@ -247,3 -243,19 +247,19 @@@ &uart1 { status = "okay"; }; + + &can0 { + status = "okay"; + }; + + &can1 { + status = "okay"; + }; + + &can2 { + status = "disabled"; + }; + + &can3 { + status = "disabled"; + }; diff --combined arch/arm/boot/dts/ls1021a.dtsi index c5edfa9a68a6,7789031898b0..c31dad98f989 --- a/arch/arm/boot/dts/ls1021a.dtsi +++ b/arch/arm/boot/dts/ls1021a.dtsi @@@ -106,14 -106,6 +106,14 @@@ compatible = "arm,cortex-a7-pmu"; interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>; + interrupt-affinity = <&cpu0>, <&cpu1>; + }; + + reboot { + compatible = "syscon-reboot"; + regmap = <&dcfg>; + offset = <0xb0>; + mask = <0x02>; };
soc { @@@ -162,22 -154,8 +162,22 @@@ big-endian; };
+ qspi: quadspi@1550000 { + compatible = "fsl,ls1021a-qspi"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0x0 0x1550000 0x0 0x10000>, + <0x0 0x40000000 0x0 0x40000000>; + reg-names = "QuadSPI", "QuadSPI-memory"; + interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>; + clock-names = "qspi_en", "qspi"; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + big-endian; + status = "disabled"; + }; + esdhc: esdhc@1560000 { - compatible = "fsl,esdhc"; + compatible = "fsl,ls1021a-esdhc", "fsl,esdhc"; reg = <0x0 0x1560000 0x0 0x10000>; interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>; clock-frequency = <0>; @@@ -597,7 -575,7 +597,7 @@@ fsl,tclk-period = <5>; fsl,tmr-prsc = <2>; fsl,tmr-add = <0xaaaaaaab>; - fsl,tmr-fiper1 = <999999990>; + fsl,tmr-fiper1 = <999999995>; fsl,tmr-fiper2 = <99990>; fsl,max-adj = <499999999>; }; @@@ -690,7 -668,7 +690,7 @@@ }; };
- usb@8600000 { + usb2: usb@8600000 { compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr"; reg = <0x0 0x8600000 0x0 0x1000>; interrupts = <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>; @@@ -698,7 -676,7 +698,7 @@@ phy_type = "ulpi"; };
- usb3@3100000 { + usb3: usb3@3100000 { compatible = "snps,dwc3"; reg = <0x0 0x3100000 0x0 0x10000>; interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>; @@@ -752,5 -730,41 +752,41 @@@ <0000 0 0 3 &gic GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>, <0000 0 0 4 &gic GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>; }; + + can0: can@2a70000 { + compatible = "fsl,ls1021ar2-flexcan"; + reg = <0x0 0x2a70000 0x0 0x1000>; + interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + big-endian; + }; + + can1: can@2a80000 { + compatible = "fsl,ls1021ar2-flexcan"; + reg = <0x0 0x2a80000 0x0 0x1000>; + interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + big-endian; + }; + + can2: can@2a90000 { + compatible = "fsl,ls1021ar2-flexcan"; + reg = <0x0 0x2a90000 0x0 0x1000>; + interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + big-endian; + }; + + can3: can@2aa0000 { + compatible = "fsl,ls1021ar2-flexcan"; + reg = <0x0 0x2aa0000 0x0 0x1000>; + interrupts = <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clockgen 4 1>, <&clockgen 4 1>; + clock-names = "ipg", "per"; + big-endian; + }; }; }; diff --combined drivers/net/ethernet/broadcom/bcmsysport.c index 9d7a834c5f62,f15a8fc6dfc9..c2969b260aed --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@@ -1156,7 -1156,7 +1156,7 @@@ static struct sk_buff *bcm_sysport_inse memset(tsb, 0, sizeof(*tsb));
if (skb->ip_summed == CHECKSUM_PARTIAL) { - ip_ver = htons(skb->protocol); + ip_ver = ntohs(skb->protocol); switch (ip_ver) { case ETH_P_IP: ip_proto = ip_hdr(skb)->protocol; @@@ -1216,18 -1216,6 +1216,6 @@@ static netdev_tx_t bcm_sysport_xmit(str goto out; }
- /* The Ethernet switch we are interfaced with needs packets to be at - * least 64 bytes (including FCS) otherwise they will be discarded when - * they enter the switch port logic. When Broadcom tags are enabled, we - * need to make sure that packets are at least 68 bytes - * (including FCS and tag) because the length verification is done after - * the Broadcom tag is stripped off the ingress packet. - */ - if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) { - ret = NETDEV_TX_OK; - goto out; - } - /* Insert TSB and checksum infos */ if (priv->tsb_en) { skb = bcm_sysport_insert_tsb(skb, dev); diff --combined drivers/net/ethernet/broadcom/tg3.c index 86ff8b49ee57,a77ee2f8fb8d..2bd77d9990f2 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@@ -3227,7 -3227,7 +3227,7 @@@ static int tg3_nvram_read_using_eeprom( return 0; }
- #define NVRAM_CMD_TIMEOUT 5000 + #define NVRAM_CMD_TIMEOUT 10000
static int tg3_nvram_exec_cmd(struct tg3 *tp, u32 nvram_cmd) { @@@ -3744,7 -3744,7 +3744,7 @@@ static int tg3_load_firmware_cpu(struc }
do { - u32 *fw_data = (u32 *)(fw_hdr + 1); + __be32 *fw_data = (__be32 *)(fw_hdr + 1); for (i = 0; i < tg3_fw_data_len(tp, fw_hdr); i++) write_op(tp, cpu_scratch_base + (be32_to_cpu(fw_hdr->base_addr) & 0xffff) + @@@ -14789,7 -14789,7 +14789,7 @@@ static void tg3_get_5717_nvram_info(str
static void tg3_get_5720_nvram_info(struct tg3 *tp) { - u32 nvcfg1, nvmpinstrp; + u32 nvcfg1, nvmpinstrp, nv_status;
nvcfg1 = tr32(NVRAM_CFG1); nvmpinstrp = nvcfg1 & NVRAM_CFG1_5752VENDOR_MASK; @@@ -14801,6 -14801,23 +14801,23 @@@ }
switch (nvmpinstrp) { + case FLASH_5762_MX25L_100: + case FLASH_5762_MX25L_200: + case FLASH_5762_MX25L_400: + case FLASH_5762_MX25L_800: + case FLASH_5762_MX25L_160_320: + tp->nvram_pagesize = 4096; + tp->nvram_jedecnum = JEDEC_MACRONIX; + tg3_flag_set(tp, NVRAM_BUFFERED); + tg3_flag_set(tp, NO_NVRAM_ADDR_TRANS); + tg3_flag_set(tp, FLASH); + nv_status = tr32(NVRAM_AUTOSENSE_STATUS); + tp->nvram_size = + (1 << (nv_status >> AUTOSENSE_DEVID & + AUTOSENSE_DEVID_MASK) + << AUTOSENSE_SIZE_IN_MB); + return; + case FLASH_5762_EEPROM_HD: nvmpinstrp = FLASH_5720_EEPROM_HD; break; diff --combined drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index b5397da94d7f,273300b75a68..971f689dd833 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@@ -41,13 -41,144 +41,145 @@@ #include "spectrum.h" #include "reg.h"
+ enum mlxsw_sp_qdisc_type { + MLXSW_SP_QDISC_NO_QDISC, + MLXSW_SP_QDISC_RED, + }; + + struct mlxsw_sp_qdisc_ops { + enum mlxsw_sp_qdisc_type type; + int (*check_params)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params); + int (*replace)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); + int (*destroy)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc); + int (*get_stats)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr); + int (*get_xstats)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *xstats_ptr); + void (*clean_stats)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc); + }; + + struct mlxsw_sp_qdisc { + u32 handle; + u8 tclass_num; + union { + struct red_stats red; + } xstats_base; + struct mlxsw_sp_qdisc_stats { + u64 tx_bytes; + u64 tx_packets; + u64 drops; + u64 overlimits; + } stats_base; + + struct mlxsw_sp_qdisc_ops *ops; + }; + + static bool + mlxsw_sp_qdisc_compare(struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, u32 handle, + enum mlxsw_sp_qdisc_type type) + { + return mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops && + mlxsw_sp_qdisc->ops->type == type && + mlxsw_sp_qdisc->handle == handle; + } + + static int + mlxsw_sp_qdisc_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) + { + int err = 0; + + if (!mlxsw_sp_qdisc) + return 0; + + if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->destroy) + err = mlxsw_sp_qdisc->ops->destroy(mlxsw_sp_port, + mlxsw_sp_qdisc); + + mlxsw_sp_qdisc->handle = TC_H_UNSPEC; + mlxsw_sp_qdisc->ops = NULL; + return err; + } + + static int + mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct mlxsw_sp_qdisc_ops *ops, void *params) + { + int err; + + if (mlxsw_sp_qdisc->ops && mlxsw_sp_qdisc->ops->type != ops->type) + /* In case this location contained a different qdisc of the + * same type we can override the old qdisc configuration. + * Otherwise, we need to remove the old qdisc before setting the + * new one. + */ + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + err = ops->check_params(mlxsw_sp_port, mlxsw_sp_qdisc, params); + if (err) + goto err_bad_param; + + err = ops->replace(mlxsw_sp_port, mlxsw_sp_qdisc, params); + if (err) + goto err_config; + + if (mlxsw_sp_qdisc->handle != handle) { + mlxsw_sp_qdisc->ops = ops; + if (ops->clean_stats) + ops->clean_stats(mlxsw_sp_port, mlxsw_sp_qdisc); + } + + mlxsw_sp_qdisc->handle = handle; + return 0; + + err_bad_param: + err_config: + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + return err; + } + + static int + mlxsw_sp_qdisc_get_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) + { + if (mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops && + mlxsw_sp_qdisc->ops->get_stats) + return mlxsw_sp_qdisc->ops->get_stats(mlxsw_sp_port, + mlxsw_sp_qdisc, + stats_ptr); + + return -EOPNOTSUPP; + } + + static int + mlxsw_sp_qdisc_get_xstats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *xstats_ptr) + { + if (mlxsw_sp_qdisc && mlxsw_sp_qdisc->ops && + mlxsw_sp_qdisc->ops->get_xstats) + return mlxsw_sp_qdisc->ops->get_xstats(mlxsw_sp_port, + mlxsw_sp_qdisc, + xstats_ptr); + + return -EOPNOTSUPP; + } + static int mlxsw_sp_tclass_congestion_enable(struct mlxsw_sp_port *mlxsw_sp_port, int tclass_num, u32 min, u32 max, u32 probability, bool is_ecn) { - char cwtp_cmd[max_t(u8, MLXSW_REG_CWTP_LEN, MLXSW_REG_CWTPM_LEN)]; + char cwtpm_cmd[MLXSW_REG_CWTPM_LEN]; + char cwtp_cmd[MLXSW_REG_CWTP_LEN]; struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; int err;
@@@ -61,10 -192,10 +193,10 @@@ if (err) return err;
- mlxsw_reg_cwtpm_pack(cwtp_cmd, mlxsw_sp_port->local_port, tclass_num, + mlxsw_reg_cwtpm_pack(cwtpm_cmd, mlxsw_sp_port->local_port, tclass_num, MLXSW_REG_CWTP_DEFAULT_PROFILE, true, is_ecn);
- return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtp_cmd); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(cwtpm), cwtpm_cmd); }
static int @@@ -80,80 -211,76 +212,76 @@@ mlxsw_sp_tclass_congestion_disable(stru }
static void - mlxsw_sp_setup_tc_qdisc_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - int tclass_num) + mlxsw_sp_setup_tc_qdisc_red_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; + u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; struct rtnl_link_stats64 *stats; + struct red_stats *red_base;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; stats = &mlxsw_sp_port->periodic_hw_stats.stats; + stats_base = &mlxsw_sp_qdisc->stats_base; + red_base = &mlxsw_sp_qdisc->xstats_base.red;
- mlxsw_sp_qdisc->tx_packets = stats->tx_packets; - mlxsw_sp_qdisc->tx_bytes = stats->tx_bytes; + stats_base->tx_packets = stats->tx_packets; + stats_base->tx_bytes = stats->tx_bytes;
- switch (mlxsw_sp_qdisc->type) { - case MLXSW_SP_QDISC_RED: - xstats_base->prob_mark = xstats->ecn; - xstats_base->prob_drop = xstats->wred_drop[tclass_num]; - xstats_base->pdrop = xstats->tail_drop[tclass_num]; + red_base->prob_mark = xstats->ecn; + red_base->prob_drop = xstats->wred_drop[tclass_num]; + red_base->pdrop = xstats->tail_drop[tclass_num];
- mlxsw_sp_qdisc->overlimits = xstats_base->prob_drop + - xstats_base->prob_mark; - mlxsw_sp_qdisc->drops = xstats_base->prob_drop + - xstats_base->pdrop; - break; - default: - break; - } + stats_base->overlimits = red_base->prob_drop + red_base->prob_mark; + stats_base->drops = red_base->prob_drop + red_base->pdrop; }
static int - mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - int tclass_num) + mlxsw_sp_qdisc_red_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) { - int err; - - if (mlxsw_sp_qdisc->handle != handle) - return 0; - - err = mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, tclass_num); - mlxsw_sp_qdisc->handle = TC_H_UNSPEC; - mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_NO_QDISC; - - return err; + return mlxsw_sp_tclass_congestion_disable(mlxsw_sp_port, + mlxsw_sp_qdisc->tclass_num); }
static int - mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, - struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - int tclass_num, - struct tc_red_qopt_offload_params *p) + mlxsw_sp_qdisc_red_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - u32 min, max; - u64 prob; - int err = 0; + struct tc_red_qopt_offload_params *p = params;
if (p->min > p->max) { dev_err(mlxsw_sp->bus_info->dev, "spectrum: RED: min %u is bigger then max %u\n", p->min, p->max); - goto err_bad_param; + return -EINVAL; } if (p->max > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE)) { dev_err(mlxsw_sp->bus_info->dev, "spectrum: RED: max value %u is too big\n", p->max); - goto err_bad_param; + return -EINVAL; } if (p->min == 0 || p->max == 0) { dev_err(mlxsw_sp->bus_info->dev, "spectrum: RED: 0 value is illegal for min and max\n"); - goto err_bad_param; + return -EINVAL; } + return 0; + } + + static int + mlxsw_sp_qdisc_red_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) + { + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + struct tc_red_qopt_offload_params *p = params; + u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + u32 min, max; + u64 prob;
/* calculate probability in percentage */ prob = p->probability; @@@ -162,116 -289,132 +290,132 @@@ prob = DIV_ROUND_UP(prob, 1 << 16); min = mlxsw_sp_bytes_cells(mlxsw_sp, p->min); max = mlxsw_sp_bytes_cells(mlxsw_sp, p->max); - err = mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min, - max, prob, p->is_ecn); - if (err) - goto err_config; - - mlxsw_sp_qdisc->type = MLXSW_SP_QDISC_RED; - if (mlxsw_sp_qdisc->handle != handle) - mlxsw_sp_setup_tc_qdisc_clean_stats(mlxsw_sp_port, - mlxsw_sp_qdisc, - tclass_num); - - mlxsw_sp_qdisc->handle = handle; - return 0; - - err_bad_param: - err = -EINVAL; - err_config: - mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, mlxsw_sp_qdisc->handle, - mlxsw_sp_qdisc, tclass_num); - return err; + return mlxsw_sp_tclass_congestion_enable(mlxsw_sp_port, tclass_num, min, + max, prob, p->is_ecn); }
static int - mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + mlxsw_sp_qdisc_get_red_xstats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - int tclass_num, struct red_stats *res) + void *xstats_ptr) { - struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base; + struct red_stats *xstats_base = &mlxsw_sp_qdisc->xstats_base.red; + u8 tclass_num = mlxsw_sp_qdisc->tclass_num; struct mlxsw_sp_port_xstats *xstats; - - if (mlxsw_sp_qdisc->handle != handle || - mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED) - return -EOPNOTSUPP; + struct red_stats *res = xstats_ptr; + int early_drops, marks, pdrops;
xstats = &mlxsw_sp_port->periodic_hw_stats.xstats;
- res->prob_drop = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; - res->prob_mark = xstats->ecn - xstats_base->prob_mark; - res->pdrop = xstats->tail_drop[tclass_num] - xstats_base->pdrop; + early_drops = xstats->wred_drop[tclass_num] - xstats_base->prob_drop; + marks = xstats->ecn - xstats_base->prob_mark; + pdrops = xstats->tail_drop[tclass_num] - xstats_base->pdrop; + + res->pdrop += pdrops; + res->prob_drop += early_drops; + res->prob_mark += marks; + + xstats_base->pdrop += pdrops; + xstats_base->prob_drop += early_drops; + xstats_base->prob_mark += marks; return 0; }
static int - mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, + mlxsw_sp_qdisc_get_red_stats(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, - int tclass_num, - struct tc_red_qopt_offload_stats *res) + struct tc_qopt_offload_stats *stats_ptr) { u64 tx_bytes, tx_packets, overlimits, drops; + u8 tclass_num = mlxsw_sp_qdisc->tclass_num; + struct mlxsw_sp_qdisc_stats *stats_base; struct mlxsw_sp_port_xstats *xstats; struct rtnl_link_stats64 *stats;
- if (mlxsw_sp_qdisc->handle != handle || - mlxsw_sp_qdisc->type != MLXSW_SP_QDISC_RED) - return -EOPNOTSUPP; - xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; stats = &mlxsw_sp_port->periodic_hw_stats.stats; + stats_base = &mlxsw_sp_qdisc->stats_base;
- tx_bytes = stats->tx_bytes - mlxsw_sp_qdisc->tx_bytes; - tx_packets = stats->tx_packets - mlxsw_sp_qdisc->tx_packets; + tx_bytes = stats->tx_bytes - stats_base->tx_bytes; + tx_packets = stats->tx_packets - stats_base->tx_packets; overlimits = xstats->wred_drop[tclass_num] + xstats->ecn - - mlxsw_sp_qdisc->overlimits; + stats_base->overlimits; drops = xstats->wred_drop[tclass_num] + xstats->tail_drop[tclass_num] - - mlxsw_sp_qdisc->drops; - - _bstats_update(res->bstats, tx_bytes, tx_packets); - res->qstats->overlimits += overlimits; - res->qstats->drops += drops; - res->qstats->backlog += mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, - xstats->backlog[tclass_num]); - - mlxsw_sp_qdisc->drops += drops; - mlxsw_sp_qdisc->overlimits += overlimits; - mlxsw_sp_qdisc->tx_bytes += tx_bytes; - mlxsw_sp_qdisc->tx_packets += tx_packets; + stats_base->drops; + + _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); + stats_ptr->qstats->overlimits += overlimits; + stats_ptr->qstats->drops += drops; + stats_ptr->qstats->backlog += + mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + xstats->backlog[tclass_num]); + + stats_base->drops += drops; + stats_base->overlimits += overlimits; + stats_base->tx_bytes += tx_bytes; + stats_base->tx_packets += tx_packets; return 0; }
#define MLXSW_SP_PORT_DEFAULT_TCLASS 0
+ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_red = { + .type = MLXSW_SP_QDISC_RED, + .check_params = mlxsw_sp_qdisc_red_check_params, + .replace = mlxsw_sp_qdisc_red_replace, + .destroy = mlxsw_sp_qdisc_red_destroy, + .get_stats = mlxsw_sp_qdisc_get_red_stats, + .get_xstats = mlxsw_sp_qdisc_get_red_xstats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_red_clean_stats, + }; + int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_red_qopt_offload *p) { struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; - int tclass_num;
if (p->parent != TC_H_ROOT) return -EOPNOTSUPP;
- mlxsw_sp_qdisc = &mlxsw_sp_port->root_qdisc; - tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; + + if (p->command == TC_RED_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_red, + &p->set); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_RED)) + return -EOPNOTSUPP;
switch (p->command) { - case TC_RED_REPLACE: - return mlxsw_sp_qdisc_red_replace(mlxsw_sp_port, p->handle, - mlxsw_sp_qdisc, tclass_num, - &p->set); case TC_RED_DESTROY: - return mlxsw_sp_qdisc_red_destroy(mlxsw_sp_port, p->handle, - mlxsw_sp_qdisc, tclass_num); + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); case TC_RED_XSTATS: - return mlxsw_sp_qdisc_get_red_xstats(mlxsw_sp_port, p->handle, - mlxsw_sp_qdisc, tclass_num, - p->xstats); + return mlxsw_sp_qdisc_get_xstats(mlxsw_sp_port, mlxsw_sp_qdisc, + p->xstats); case TC_RED_STATS: - return mlxsw_sp_qdisc_get_red_stats(mlxsw_sp_port, p->handle, - mlxsw_sp_qdisc, tclass_num, - &p->stats); + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); default: return -EOPNOTSUPP; } } + + int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) + { + mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc), + GFP_KERNEL); + if (!mlxsw_sp_port->root_qdisc) + return -ENOMEM; + + mlxsw_sp_port->root_qdisc->tclass_num = MLXSW_SP_PORT_DEFAULT_TCLASS; + + return 0; + } + + void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port) + { + kfree(mlxsw_sp_port->root_qdisc); + } diff --combined drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 99b0487b6d82,caee147fce04..07e0587dc14e --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@@ -568,7 -568,6 +568,7 @@@ nfp_net_aux_irq_request(struct nfp_net return err; } nn_writeb(nn, ctrl_offset, entry->entry); + nfp_net_irq_unmask(nn, entry->entry);
return 0; } @@@ -583,7 -582,6 +583,7 @@@ static void nfp_net_aux_irq_free(struc unsigned int vector_idx) { nn_writeb(nn, ctrl_offset, 0xff); + nn_pci_flush(nn); free_irq(nn->irq_entries[vector_idx].vector, nn); }
@@@ -1610,11 -1608,13 +1610,13 @@@ static int nfp_net_rx(struct nfp_net_rx unsigned int true_bufsz; struct sk_buff *skb; int pkts_polled = 0; + struct xdp_buff xdp; int idx;
rcu_read_lock(); xdp_prog = READ_ONCE(dp->xdp_prog); true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz; + xdp.rxq = &rx_ring->xdp_rxq; tx_ring = r_vec->xdp_ring;
while (pkts_polled < budget) { @@@ -1705,7 -1705,6 +1707,6 @@@ dp->bpf_offload_xdp) && !meta.portid) { void *orig_data = rxbuf->frag + pkt_off; unsigned int dma_off; - struct xdp_buff xdp; int act;
xdp.data_hard_start = rxbuf->frag + NFP_NET_RX_BUF_HEADROOM; @@@ -2254,6 -2253,8 +2255,8 @@@ static void nfp_net_rx_ring_free(struc struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp;
+ if (dp->netdev) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); kfree(rx_ring->rxbufs);
if (rx_ring->rxds) @@@ -2277,7 -2278,14 +2280,14 @@@ static int nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) { - int sz; + int sz, err; + + if (dp->netdev) { + err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, + rx_ring->idx); + if (err < 0) + return err; + }
rx_ring->cnt = dp->rxd_cnt; rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; @@@ -2852,6 -2860,11 +2862,11 @@@ static void nfp_net_set_rx_mode(struct
new_ctrl = nn->dp.ctrl;
+ if (!netdev_mc_empty(netdev) || netdev->flags & IFF_ALLMULTI) + new_ctrl |= nn->cap & NFP_NET_CFG_CTRL_L2MC; + else + new_ctrl &= ~NFP_NET_CFG_CTRL_L2MC; + if (netdev->flags & IFF_PROMISC) { if (nn->cap & NFP_NET_CFG_CTRL_PROMISC) new_ctrl |= NFP_NET_CFG_CTRL_PROMISC; @@@ -3036,6 -3049,11 +3051,11 @@@ static int nfp_net_change_mtu(struct ne { struct nfp_net *nn = netdev_priv(netdev); struct nfp_net_dp *dp; + int err; + + err = nfp_app_change_mtu(nn->app, netdev, new_mtu); + if (err) + return err;
dp = nfp_net_clone_dp(nn); if (!dp) @@@ -3394,17 -3412,10 +3414,10 @@@ static int nfp_net_xdp(struct net_devic if (nn->dp.bpf_offload_xdp) xdp->prog_attached = XDP_ATTACHED_HW; xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; + xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0; return 0; - case BPF_OFFLOAD_VERIFIER_PREP: - return nfp_app_bpf_verifier_prep(nn->app, nn, xdp); - case BPF_OFFLOAD_TRANSLATE: - return nfp_app_bpf_translate(nn->app, nn, - xdp->offload.prog); - case BPF_OFFLOAD_DESTROY: - return nfp_app_bpf_destroy(nn->app, nn, - xdp->offload.prog); default: - return -EINVAL; + return nfp_app_bpf(nn->app, nn, xdp); } }
@@@ -3563,9 -3574,6 +3576,6 @@@ struct nfp_net *nfp_net_alloc(struct pc */ void nfp_net_free(struct nfp_net *nn) { - if (nn->xdp_prog) - bpf_prog_put(nn->xdp_prog); - if (nn->dp.netdev) free_netdev(nn->dp.netdev); else @@@ -3791,8 -3799,6 +3801,6 @@@ int nfp_net_init(struct nfp_net *nn /* Allow L2 Broadcast and Multicast through by default, if supported */ if (nn->cap & NFP_NET_CFG_CTRL_L2BC) nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2BC; - if (nn->cap & NFP_NET_CFG_CTRL_L2MC) - nn->dp.ctrl |= NFP_NET_CFG_CTRL_L2MC;
/* Allow IRQ moderation, if supported */ if (nn->cap & NFP_NET_CFG_CTRL_IRQMOD) { diff --combined drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index 5bbcaf8298f6,2fd8456999f6..b419229d7457 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@@ -241,13 -241,12 +241,13 @@@ static int dwmac4_rx_check_timestamp(vo u32 own, ctxt; int ret = 1;
- own = p->des3 & RDES3_OWN; - ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR) + own = le32_to_cpu(p->des3) & RDES3_OWN; + ctxt = ((le32_to_cpu(p->des3) & RDES3_CONTEXT_DESCRIPTOR) >> RDES3_CONTEXT_DESCRIPTOR_SHIFT);
if (likely(!own && ctxt)) { - if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff)) + if ((p->des0 == cpu_to_le32(0xffffffff)) && + (p->des1 == cpu_to_le32(0xffffffff))) /* Corrupted value */ ret = -EINVAL; else @@@ -266,7 -265,7 +266,7 @@@ static int dwmac4_wrback_get_rx_timesta int ret = -EINVAL;
/* Get the status from normal w/b descriptor */ - if (likely(p->des3 & TDES3_RS1V)) { + if (likely(p->des3 & cpu_to_le32(TDES3_RS1V))) { if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) { int i = 0;
@@@ -407,7 -406,7 +407,7 @@@ static void dwmac4_display_ring(void *h pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
for (i = 0; i < size; i++) { - pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", + pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", i, (unsigned int)virt_to_phys(p), le32_to_cpu(p->des0), le32_to_cpu(p->des1), le32_to_cpu(p->des2), le32_to_cpu(p->des3)); diff --combined drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 323464576fc0,f99f14c35063..cf0e16d1a068 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@@ -2003,22 -2003,60 +2003,60 @@@ static void stmmac_set_dma_operation_mo static void stmmac_dma_interrupt(struct stmmac_priv *priv) { u32 tx_channel_count = priv->plat->tx_queues_to_use; - int status; + u32 rx_channel_count = priv->plat->rx_queues_to_use; + u32 channels_to_check = tx_channel_count > rx_channel_count ? + tx_channel_count : rx_channel_count; u32 chan; + bool poll_scheduled = false; + int status[channels_to_check]; + + /* Each DMA channel can be used for rx and tx simultaneously, yet + * napi_struct is embedded in struct stmmac_rx_queue rather than in a + * stmmac_channel struct. + * Because of this, stmmac_poll currently checks (and possibly wakes) + * all tx queues rather than just a single tx queue. + */ + for (chan = 0; chan < channels_to_check; chan++) + status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr, + &priv->xstats, + chan);
- for (chan = 0; chan < tx_channel_count; chan++) { - struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan]; + for (chan = 0; chan < rx_channel_count; chan++) { + if (likely(status[chan] & handle_rx)) { + struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
- status = priv->hw->dma->dma_interrupt(priv->ioaddr, - &priv->xstats, chan); - if (likely((status & handle_rx)) || (status & handle_tx)) { if (likely(napi_schedule_prep(&rx_q->napi))) { stmmac_disable_dma_irq(priv, chan); __napi_schedule(&rx_q->napi); + poll_scheduled = true; } } + }
- if (unlikely(status & tx_hard_error_bump_tc)) { + /* If we scheduled poll, we already know that tx queues will be checked. + * If we didn't schedule poll, see if any DMA channel (used by tx) has a + * completed transmission, if so, call stmmac_poll (once). + */ + if (!poll_scheduled) { + for (chan = 0; chan < tx_channel_count; chan++) { + if (status[chan] & handle_tx) { + /* It doesn't matter what rx queue we choose + * here. We use 0 since it always exists. + */ + struct stmmac_rx_queue *rx_q = + &priv->rx_queue[0]; + + if (likely(napi_schedule_prep(&rx_q->napi))) { + stmmac_disable_dma_irq(priv, chan); + __napi_schedule(&rx_q->napi); + } + break; + } + } + } + + for (chan = 0; chan < tx_channel_count; chan++) { + if (unlikely(status[chan] & tx_hard_error_bump_tc)) { /* Try to bump up the dma threshold on this failure */ if (unlikely(priv->xstats.threshold != SF_DMA_MODE) && (tc <= 256)) { @@@ -2035,7 -2073,7 +2073,7 @@@ chan); priv->xstats.threshold = tc; } - } else if (unlikely(status == tx_hard_error)) { + } else if (unlikely(status[chan] == tx_hard_error)) { stmmac_tx_err(priv, chan); } } @@@ -2539,7 -2577,7 +2577,7 @@@ static int stmmac_hw_setup(struct net_d }
if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane) - priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0); + priv->hw->mac->pcs_ctrl_ane(priv->ioaddr, 1, priv->hw->ps, 0);
/* set TX and RX rings length */ stmmac_set_rings_length(priv); @@@ -3404,9 -3442,8 +3442,8 @@@ static int stmmac_rx(struct stmmac_pri if (netif_msg_rx_status(priv)) { netdev_dbg(priv->dev, "\tdesc: %p [entry %d] buff=0x%x\n", p, entry, des); - if (frame_len > ETH_FRAME_LEN) - netdev_dbg(priv->dev, "frame size %d, COE: %d\n", - frame_len, status); + netdev_dbg(priv->dev, "frame size %d, COE: %d\n", + frame_len, status); }
/* The zero-copy is always used for all the sizes diff --combined drivers/net/tap.c index f39c6f876e67,7c38659b2a76..4f745eb878f3 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@@ -330,7 -330,7 +330,7 @@@ rx_handler_result_t tap_handle_frame(st if (!q) return RX_HANDLER_PASS;
- if (__skb_array_full(&q->skb_array)) + if (__ptr_ring_full(&q->ring)) goto drop;
skb_push(skb, ETH_HLEN); @@@ -348,7 -348,7 +348,7 @@@ goto drop;
if (!segs) { - if (skb_array_produce(&q->skb_array, skb)) + if (ptr_ring_produce(&q->ring, skb)) goto drop; goto wake_up; } @@@ -358,7 -358,7 +358,7 @@@ struct sk_buff *nskb = segs->next;
segs->next = NULL; - if (skb_array_produce(&q->skb_array, segs)) { + if (ptr_ring_produce(&q->ring, segs)) { kfree_skb(segs); kfree_skb_list(nskb); break; @@@ -375,7 -375,7 +375,7 @@@ !(features & NETIF_F_CSUM_MASK) && skb_checksum_help(skb)) goto drop; - if (skb_array_produce(&q->skb_array, skb)) + if (ptr_ring_produce(&q->ring, skb)) goto drop; }
@@@ -497,7 -497,7 +497,7 @@@ static void tap_sock_destruct(struct so { struct tap_queue *q = container_of(sk, struct tap_queue, sk);
- skb_array_cleanup(&q->skb_array); + ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb); }
static int tap_open(struct inode *inode, struct file *file) @@@ -517,7 -517,7 +517,7 @@@ &tap_proto, 0); if (!q) goto err; - if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) { + if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) { sk_free(&q->sk); goto err; } @@@ -546,7 -546,7 +546,7 @@@
err = tap_set_queue(tap, file, q); if (err) { - /* tap_sock_destruct() will take care of freeing skb_array */ + /* tap_sock_destruct() will take care of freeing ptr_ring */ goto err_put; }
@@@ -572,10 -572,10 +572,10 @@@ static int tap_release(struct inode *in return 0; }
-static unsigned int tap_poll(struct file *file, poll_table *wait) +static __poll_t tap_poll(struct file *file, poll_table *wait) { struct tap_queue *q = file->private_data; - unsigned int mask = POLLERR; + __poll_t mask = POLLERR;
if (!q) goto out; @@@ -583,7 -583,7 +583,7 @@@ mask = 0; poll_wait(file, &q->wq.wait, wait);
- if (!skb_array_empty(&q->skb_array)) + if (!ptr_ring_empty(&q->ring)) mask |= POLLIN | POLLRDNORM;
if (sock_writeable(&q->sk) || @@@ -844,7 -844,7 +844,7 @@@ static ssize_t tap_do_read(struct tap_q TASK_INTERRUPTIBLE);
/* Read frames from the queue */ - skb = skb_array_consume(&q->skb_array); + skb = ptr_ring_consume(&q->ring); if (skb) break; if (noblock) { @@@ -1176,7 -1176,7 +1176,7 @@@ static int tap_peek_len(struct socket * { struct tap_queue *q = container_of(sock, struct tap_queue, sock); - return skb_array_peek_len(&q->skb_array); + return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag); }
/* Ops structure to mimic raw sockets with tun */ @@@ -1202,7 -1202,7 +1202,7 @@@ struct socket *tap_get_socket(struct fi } EXPORT_SYMBOL_GPL(tap_get_socket);
- struct skb_array *tap_get_skb_array(struct file *file) + struct ptr_ring *tap_get_ptr_ring(struct file *file) { struct tap_queue *q;
@@@ -1211,29 -1211,30 +1211,30 @@@ q = file->private_data; if (!q) return ERR_PTR(-EBADFD); - return &q->skb_array; + return &q->ring; } - EXPORT_SYMBOL_GPL(tap_get_skb_array); + EXPORT_SYMBOL_GPL(tap_get_ptr_ring);
int tap_queue_resize(struct tap_dev *tap) { struct net_device *dev = tap->dev; struct tap_queue *q; - struct skb_array **arrays; + struct ptr_ring **rings; int n = tap->numqueues; int ret, i = 0;
- arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL); - if (!arrays) + rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); + if (!rings) return -ENOMEM;
list_for_each_entry(q, &tap->queue_list, next) - arrays[i++] = &q->skb_array; + rings[i++] = &q->ring;
- ret = skb_array_resize_multiple(arrays, n, - dev->tx_queue_len, GFP_KERNEL); + ret = ptr_ring_resize_multiple(rings, n, + dev->tx_queue_len, GFP_KERNEL, + __skb_array_destroy_skb);
- kfree(arrays); + kfree(rings); return ret; } EXPORT_SYMBOL_GPL(tap_queue_resize); diff --combined drivers/net/tun.c index 2ffe5dba7e09,2fba3be5719e..55e6fee87375 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@@ -179,7 -179,8 +179,8 @@@ struct tun_file struct mutex napi_mutex; /* Protects access to the above napi */ struct list_head next; struct tun_struct *detached; - struct skb_array tx_array; + struct ptr_ring tx_ring; + struct xdp_rxq_info xdp_rxq; };
struct tun_flow_entry { @@@ -195,6 -196,11 +196,11 @@@
#define TUN_NUM_FLOW_ENTRIES 1024
+ struct tun_steering_prog { + struct rcu_head rcu; + struct bpf_prog *prog; + }; + /* Since the socket were moved to tun_file, to preserve the behavior of persist * device, socket filter, sndbuf and vnet header size were restore when the * file were attached to a persist device. @@@ -232,8 -238,27 +238,27 @@@ struct tun_struct u32 rx_batched; struct tun_pcpu_stats __percpu *pcpu_stats; struct bpf_prog __rcu *xdp_prog; + struct tun_steering_prog __rcu *steering_prog; };
+ bool tun_is_xdp_buff(void *ptr) + { + return (unsigned long)ptr & TUN_XDP_FLAG; + } + EXPORT_SYMBOL(tun_is_xdp_buff); + + void *tun_xdp_to_ptr(void *ptr) + { + return (void *)((unsigned long)ptr | TUN_XDP_FLAG); + } + EXPORT_SYMBOL(tun_xdp_to_ptr); + + void *tun_ptr_to_xdp(void *ptr) + { + return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG); + } + EXPORT_SYMBOL(tun_ptr_to_xdp); + static int tun_napi_receive(struct napi_struct *napi, int budget) { struct tun_file *tfile = container_of(napi, struct tun_file, napi); @@@ -537,15 -562,12 +562,12 @@@ static inline void tun_flow_save_rps_rx * different rxq no. here. If we could not get rxhash, then we would * hope the rxq no. may help here. */ - static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, - void *accel_priv, select_queue_fallback_t fallback) + static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb) { - struct tun_struct *tun = netdev_priv(dev); struct tun_flow_entry *e; u32 txq = 0; u32 numqueues = 0;
- rcu_read_lock(); numqueues = READ_ONCE(tun->numqueues);
txq = __skb_get_hash_symmetric(skb); @@@ -563,10 -585,37 +585,37 @@@ txq -= numqueues; }
- rcu_read_unlock(); return txq; }
+ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb) + { + struct tun_steering_prog *prog; + u16 ret = 0; + + prog = rcu_dereference(tun->steering_prog); + if (prog) + ret = bpf_prog_run_clear_cb(prog->prog, skb); + + return ret % tun->numqueues; + } + + static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) + { + struct tun_struct *tun = netdev_priv(dev); + u16 ret; + + rcu_read_lock(); + if (rcu_dereference(tun->steering_prog)) + ret = tun_ebpf_select_queue(tun, skb); + else + ret = tun_automq_select_queue(tun, skb); + rcu_read_unlock(); + + return ret; + } + static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); @@@ -600,12 -649,25 +649,25 @@@ static struct tun_struct *tun_enable_qu return tun; }
+ static void tun_ptr_free(void *ptr) + { + if (!ptr) + return; + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + put_page(virt_to_head_page(xdp->data)); + } else { + __skb_array_destroy_skb(ptr); + } + } + static void tun_queue_purge(struct tun_file *tfile) { - struct sk_buff *skb; + void *ptr;
- while ((skb = skb_array_consume(&tfile->tx_array)) != NULL) - kfree_skb(skb); + while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL) + tun_ptr_free(ptr);
skb_queue_purge(&tfile->sk.sk_write_queue); skb_queue_purge(&tfile->sk.sk_error_queue); @@@ -657,8 -719,10 +719,10 @@@ static void __tun_detach(struct tun_fil tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - if (tun) - skb_array_cleanup(&tfile->tx_array); + if (tun) { + ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); + xdp_rxq_info_unreg(&tfile->xdp_rxq); + } sock_put(&tfile->sk); } } @@@ -673,7 -737,6 +737,6 @@@ static void tun_detach(struct tun_file static void tun_detach_all(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); - struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog); struct tun_file *tfile, *tmp; int i, n = tun->numqueues;
@@@ -699,18 -762,17 +762,17 @@@ tun_napi_del(tun, tfile); /* Drop read queue */ tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); } BUG_ON(tun->numdisabled != 0);
- if (xdp_prog) - bpf_prog_put(xdp_prog); - if (tun->flags & IFF_PERSIST) module_put(THIS_MODULE); } @@@ -751,13 -813,29 +813,29 @@@ static int tun_attach(struct tun_struc }
if (!tfile->detached && - skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) { + ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) { err = -ENOMEM; goto out; }
tfile->queue_index = tun->numqueues; tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN; + + if (tfile->detached) { + /* Re-attach detached tfile, updating XDP queue_index */ + WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq)); + + if (tfile->xdp_rxq.queue_index != tfile->queue_index) + tfile->xdp_rxq.queue_index = tfile->queue_index; + } else { + /* Setup XDP RX-queue info, for new tfile getting attached */ + err = xdp_rxq_info_reg(&tfile->xdp_rxq, + tun->dev, tfile->queue_index); + if (err < 0) + goto out; + err = 0; + } + rcu_assign_pointer(tfile->tun, tun); rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile); tun->numqueues++; @@@ -937,23 -1015,10 +1015,10 @@@ static int tun_net_close(struct net_dev }
/* Net device start xmit */ - static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) + static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb) { - struct tun_struct *tun = netdev_priv(dev); - int txq = skb->queue_mapping; - struct tun_file *tfile; - u32 numqueues = 0; - - rcu_read_lock(); - tfile = rcu_dereference(tun->tfiles[txq]); - numqueues = READ_ONCE(tun->numqueues); - - /* Drop packet if interface is not attached */ - if (txq >= numqueues) - goto drop; - #ifdef CONFIG_RPS - if (numqueues == 1 && static_key_false(&rps_needed)) { + if (tun->numqueues == 1 && static_key_false(&rps_needed)) { /* Select queue was not called for the skbuff, so we extract the * RPS hash and save it into the flow_table here. */ @@@ -969,6 -1034,24 +1034,24 @@@ } } #endif + } + + /* Net device start xmit */ + static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) + { + struct tun_struct *tun = netdev_priv(dev); + int txq = skb->queue_mapping; + struct tun_file *tfile; + + rcu_read_lock(); + tfile = rcu_dereference(tun->tfiles[txq]); + + /* Drop packet if interface is not attached */ + if (txq >= tun->numqueues) + goto drop; + + if (!rcu_dereference(tun->steering_prog)) + tun_automq_xmit(tun, skb);
tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
@@@ -996,7 -1079,7 +1079,7 @@@
nf_reset(skb);
- if (skb_array_produce(&tfile->tx_array, skb)) + if (ptr_ring_produce(&tfile->tx_ring, skb)) goto drop;
/* Notify and wake up reader process */ @@@ -1169,6 -1252,67 +1252,67 @@@ static const struct net_device_ops tun_ .ndo_get_stats64 = tun_net_get_stats64, };
+ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) + { + struct tun_struct *tun = netdev_priv(dev); + struct xdp_buff *buff = xdp->data_hard_start; + int headroom = xdp->data - xdp->data_hard_start; + struct tun_file *tfile; + u32 numqueues; + int ret = 0; + + /* Assure headroom is available and buff is properly aligned */ + if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp))) + return -ENOSPC; + + *buff = *xdp; + + rcu_read_lock(); + + numqueues = READ_ONCE(tun->numqueues); + if (!numqueues) { + ret = -ENOSPC; + goto out; + } + + tfile = rcu_dereference(tun->tfiles[smp_processor_id() % + numqueues]); + /* Encode the XDP flag into lowest bit for consumer to differ + * XDP buffer from sk_buff. + */ + if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) { + this_cpu_inc(tun->pcpu_stats->tx_dropped); + ret = -ENOSPC; + } + + out: + rcu_read_unlock(); + return ret; + } + + static void tun_xdp_flush(struct net_device *dev) + { + struct tun_struct *tun = netdev_priv(dev); + struct tun_file *tfile; + u32 numqueues; + + rcu_read_lock(); + + numqueues = READ_ONCE(tun->numqueues); + if (!numqueues) + goto out; + + tfile = rcu_dereference(tun->tfiles[smp_processor_id() % + numqueues]); + /* Notify and wake up reader process */ + if (tfile->flags & TUN_FASYNC) + kill_fasync(&tfile->fasync, SIGIO, POLL_IN); + tfile->socket.sk->sk_data_ready(tfile->socket.sk); + + out: + rcu_read_unlock(); + } + static const struct net_device_ops tap_netdev_ops = { .ndo_uninit = tun_net_uninit, .ndo_open = tun_net_open, @@@ -1186,6 -1330,8 +1330,8 @@@ .ndo_set_rx_headroom = tun_set_headroom, .ndo_get_stats64 = tun_net_get_stats64, .ndo_bpf = tun_xdp, + .ndo_xdp_xmit = tun_xdp_xmit, + .ndo_xdp_flush = tun_xdp_flush, };
static void tun_flow_init(struct tun_struct *tun) @@@ -1248,12 -1394,12 +1394,12 @@@ static void tun_net_init(struct net_dev /* Character device part */
/* Poll */ -static unsigned int tun_chr_poll(struct file *file, poll_table *wait) +static __poll_t tun_chr_poll(struct file *file, poll_table *wait) { struct tun_file *tfile = file->private_data; struct tun_struct *tun = tun_get(tfile); struct sock *sk; - unsigned int mask = 0; + __poll_t mask = 0;
if (!tun) return POLLERR; @@@ -1264,7 -1410,7 +1410,7 @@@
poll_wait(file, sk_sleep(sk), wait);
- if (!skb_array_empty(&tfile->tx_array)) + if (!ptr_ring_empty(&tfile->tx_ring)) mask |= POLLIN | POLLRDNORM;
if (tun->dev->flags & IFF_UP && @@@ -1477,6 -1623,7 +1623,7 @@@ static struct sk_buff *tun_build_skb(st xdp.data = buf + pad; xdp_set_data_meta_invalid(&xdp); xdp.data_end = xdp.data + len; + xdp.rxq = &tfile->xdp_rxq; orig_data = xdp.data; act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@@ -1551,7 -1698,7 +1698,7 @@@ static ssize_t tun_get_user(struct tun_ int copylen; bool zerocopy = false; int err; - u32 rxhash; + u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tun);
@@@ -1739,7 -1886,10 +1886,10 @@@ rcu_read_unlock(); }
- rxhash = __skb_get_hash_symmetric(skb); + rcu_read_lock(); + if (!rcu_dereference(tun->steering_prog)) + rxhash = __skb_get_hash_symmetric(skb); + rcu_read_unlock();
if (frags) { /* Exercise flow dissector code path. */ @@@ -1783,7 -1933,9 +1933,9 @@@ u64_stats_update_end(&stats->syncp); put_cpu_ptr(stats);
- tun_flow_update(tun, rxhash, tfile); + if (rxhash) + tun_flow_update(tun, rxhash, tfile); + return total_len; }
@@@ -1804,6 -1956,40 +1956,40 @@@ static ssize_t tun_chr_write_iter(struc return result; }
+ static ssize_t tun_put_user_xdp(struct tun_struct *tun, + struct tun_file *tfile, + struct xdp_buff *xdp, + struct iov_iter *iter) + { + int vnet_hdr_sz = 0; + size_t size = xdp->data_end - xdp->data; + struct tun_pcpu_stats *stats; + size_t ret; + + if (tun->flags & IFF_VNET_HDR) { + struct virtio_net_hdr gso = { 0 }; + + vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz); + if (unlikely(iov_iter_count(iter) < vnet_hdr_sz)) + return -EINVAL; + if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) != + sizeof(gso))) + return -EFAULT; + iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso)); + } + + ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz; + + stats = get_cpu_ptr(tun->pcpu_stats); + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += ret; + u64_stats_update_end(&stats->syncp); + put_cpu_ptr(tun->pcpu_stats); + + return ret; + } + /* Put packet to the user space buffer */ static ssize_t tun_put_user(struct tun_struct *tun, struct tun_file *tfile, @@@ -1901,15 -2087,14 +2087,14 @@@ done return total; }
- static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock, - int *err) + static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err) { DECLARE_WAITQUEUE(wait, current); - struct sk_buff *skb = NULL; + void *ptr = NULL; int error = 0;
- skb = skb_array_consume(&tfile->tx_array); - if (skb) + ptr = ptr_ring_consume(&tfile->tx_ring); + if (ptr) goto out; if (noblock) { error = -EAGAIN; @@@ -1920,8 -2105,8 +2105,8 @@@ current->state = TASK_INTERRUPTIBLE;
while (1) { - skb = skb_array_consume(&tfile->tx_array); - if (skb) + ptr = ptr_ring_consume(&tfile->tx_ring); + if (ptr) break; if (signal_pending(current)) { error = -ERESTARTSYS; @@@ -1940,12 -2125,12 +2125,12 @@@
out: *err = error; - return skb; + return ptr; }
static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, struct iov_iter *to, - int noblock, struct sk_buff *skb) + int noblock, void *ptr) { ssize_t ret; int err; @@@ -1953,23 -2138,31 +2138,31 @@@ tun_debug(KERN_INFO, tun, "tun_do_read\n");
if (!iov_iter_count(to)) { - if (skb) - kfree_skb(skb); + tun_ptr_free(ptr); return 0; }
- if (!skb) { + if (!ptr) { /* Read frames from ring */ - skb = tun_ring_recv(tfile, noblock, &err); - if (!skb) + ptr = tun_ring_recv(tfile, noblock, &err); + if (!ptr) return err; }
- ret = tun_put_user(tun, tfile, skb, to); - if (unlikely(ret < 0)) - kfree_skb(skb); - else - consume_skb(skb); + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + ret = tun_put_user_xdp(tun, tfile, xdp, to); + put_page(virt_to_head_page(xdp->data)); + } else { + struct sk_buff *skb = ptr; + + ret = tun_put_user(tun, tfile, skb, to); + if (unlikely(ret < 0)) + kfree_skb(skb); + else + consume_skb(skb); + }
return ret; } @@@ -1991,6 -2184,39 +2184,39 @@@ static ssize_t tun_chr_read_iter(struc return ret; }
+ static void tun_steering_prog_free(struct rcu_head *rcu) + { + struct tun_steering_prog *prog = container_of(rcu, + struct tun_steering_prog, rcu); + + bpf_prog_destroy(prog->prog); + kfree(prog); + } + + static int __tun_set_steering_ebpf(struct tun_struct *tun, + struct bpf_prog *prog) + { + struct tun_steering_prog *old, *new = NULL; + + if (prog) { + new = kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + new->prog = prog; + } + + spin_lock_bh(&tun->lock); + old = rcu_dereference_protected(tun->steering_prog, + lockdep_is_held(&tun->lock)); + rcu_assign_pointer(tun->steering_prog, new); + spin_unlock_bh(&tun->lock); + + if (old) + call_rcu(&old->rcu, tun_steering_prog_free); + + return 0; + } + static void tun_free_netdev(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); @@@ -1999,6 -2225,7 +2225,7 @@@ free_percpu(tun->pcpu_stats); tun_flow_uninit(tun); security_tun_dev_free_security(tun->security); + __tun_set_steering_ebpf(tun, NULL); }
static void tun_setup(struct net_device *dev) @@@ -2072,12 -2299,12 +2299,12 @@@ static int tun_recvmsg(struct socket *s { struct tun_file *tfile = container_of(sock, struct tun_file, socket); struct tun_struct *tun = tun_get(tfile); - struct sk_buff *skb = m->msg_control; + void *ptr = m->msg_control; int ret;
if (!tun) { ret = -EBADFD; - goto out_free_skb; + goto out_free; }
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) { @@@ -2089,7 -2316,7 +2316,7 @@@ SOL_PACKET, TUN_TX_TIMESTAMP); goto out; } - ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb); + ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr); if (ret > (ssize_t)total_len) { m->msg_flags |= MSG_TRUNC; ret = flags & MSG_TRUNC ? ret : total_len; @@@ -2100,12 -2327,25 +2327,25 @@@ out
out_put_tun: tun_put(tun); - out_free_skb: - if (skb) - kfree_skb(skb); + out_free: + tun_ptr_free(ptr); return ret; }
+ static int tun_ptr_peek_len(void *ptr) + { + if (likely(ptr)) { + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + return xdp->data_end - xdp->data; + } + return __skb_array_len_with_tag(ptr); + } else { + return 0; + } + } + static int tun_peek_len(struct socket *sock) { struct tun_file *tfile = container_of(sock, struct tun_file, socket); @@@ -2116,7 -2356,7 +2356,7 @@@ if (!tun) return 0;
- ret = skb_array_peek_len(&tfile->tx_array); + ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len); tun_put(tun);
return ret; @@@ -2287,6 -2527,7 +2527,7 @@@ static int tun_set_iff(struct net *net tun->filter_attached = false; tun->sndbuf = tfile->socket.sk->sk_sndbuf; tun->rx_batched = 0; + RCU_INIT_POINTER(tun->steering_prog, NULL);
tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats); if (!tun->pcpu_stats) { @@@ -2479,6 -2720,25 +2720,25 @@@ unlock return ret; }
+ static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data) + { + struct bpf_prog *prog; + int fd; + + if (copy_from_user(&fd, data, sizeof(fd))) + return -EFAULT; + + if (fd == -1) { + prog = NULL; + } else { + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + return __tun_set_steering_ebpf(tun, prog); + } + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { @@@ -2755,6 -3015,10 +3015,10 @@@ ret = 0; break;
+ case TUNSETSTEERINGEBPF: + ret = tun_set_steering_ebpf(tun, argp); + break; + default: ret = -EINVAL; break; @@@ -2998,25 -3262,26 +3262,26 @@@ static int tun_queue_resize(struct tun_ { struct net_device *dev = tun->dev; struct tun_file *tfile; - struct skb_array **arrays; + struct ptr_ring **rings; int n = tun->numqueues + tun->numdisabled; int ret, i;
- arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL); - if (!arrays) + rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL); + if (!rings) return -ENOMEM;
for (i = 0; i < tun->numqueues; i++) { tfile = rtnl_dereference(tun->tfiles[i]); - arrays[i] = &tfile->tx_array; + rings[i] = &tfile->tx_ring; } list_for_each_entry(tfile, &tun->disabled, next) - arrays[i++] = &tfile->tx_array; + rings[i++] = &tfile->tx_ring;
- ret = skb_array_resize_multiple(arrays, n, - dev->tx_queue_len, GFP_KERNEL); + ret = ptr_ring_resize_multiple(rings, n, + dev->tx_queue_len, GFP_KERNEL, + tun_ptr_free);
- kfree(arrays); + kfree(rings); return ret; }
@@@ -3102,7 -3367,7 +3367,7 @@@ struct socket *tun_get_socket(struct fi } EXPORT_SYMBOL_GPL(tun_get_socket);
- struct skb_array *tun_get_skb_array(struct file *file) + struct ptr_ring *tun_get_tx_ring(struct file *file) { struct tun_file *tfile;
@@@ -3111,9 -3376,9 +3376,9 @@@ tfile = file->private_data; if (!tfile) return ERR_PTR(-EBADFD); - return &tfile->tx_array; + return &tfile->tx_ring; } - EXPORT_SYMBOL_GPL(tun_get_skb_array); + EXPORT_SYMBOL_GPL(tun_get_tx_ring);
module_init(tun_init); module_exit(tun_cleanup); diff --combined drivers/net/wireless/ath/wcn36xx/main.c index 987f1252a3cf,5bed323f1100..ab5be6d2c691 --- a/drivers/net/wireless/ath/wcn36xx/main.c +++ b/drivers/net/wireless/ath/wcn36xx/main.c @@@ -384,18 -384,6 +384,18 @@@ static int wcn36xx_config(struct ieee80 } }
+ if (changed & IEEE80211_CONF_CHANGE_PS) { + list_for_each_entry(tmp, &wcn->vif_list, list) { + vif = wcn36xx_priv_to_vif(tmp); + if (hw->conf.flags & IEEE80211_CONF_PS) { + if (vif->bss_conf.ps) /* ps allowed ? */ + wcn36xx_pmc_enter_bmps_state(wcn, vif); + } else { + wcn36xx_pmc_exit_bmps_state(wcn, vif); + } + } + } + mutex_unlock(&wcn->conf_mutex);
return 0; @@@ -641,7 -629,6 +641,6 @@@ static int wcn36xx_hw_scan(struct ieee8 struct ieee80211_scan_request *hw_req) { struct wcn36xx *wcn = hw->priv; - mutex_lock(&wcn->scan_lock); if (wcn->scan_req) { mutex_unlock(&wcn->scan_lock); @@@ -650,11 -637,16 +649,16 @@@
wcn->scan_aborted = false; wcn->scan_req = &hw_req->req; + mutex_unlock(&wcn->scan_lock);
- schedule_work(&wcn->scan_work); + if (!get_feat_caps(wcn->fw_feat_caps, SCAN_OFFLOAD)) { + /* legacy manual/sw scan */ + schedule_work(&wcn->scan_work); + return 0; + }
- return 0; + return wcn36xx_smd_start_hw_scan(wcn, vif, &hw_req->req); }
static void wcn36xx_cancel_hw_scan(struct ieee80211_hw *hw, @@@ -662,6 -654,12 +666,12 @@@ { struct wcn36xx *wcn = hw->priv;
+ if (!wcn36xx_smd_stop_hw_scan(wcn)) { + struct cfg80211_scan_info scan_info = { .aborted = true }; + + ieee80211_scan_completed(wcn->hw, &scan_info); + } + mutex_lock(&wcn->scan_lock); wcn->scan_aborted = true; mutex_unlock(&wcn->scan_lock); @@@ -759,6 -757,17 +769,6 @@@ static void wcn36xx_bss_info_changed(st vif_priv->dtim_period = bss_conf->dtim_period; }
- if (changed & BSS_CHANGED_PS) { - wcn36xx_dbg(WCN36XX_DBG_MAC, - "mac bss PS set %d\n", - bss_conf->ps); - if (bss_conf->ps) { - wcn36xx_pmc_enter_bmps_state(wcn, vif); - } else { - wcn36xx_pmc_exit_bmps_state(wcn, vif); - } - } - if (changed & BSS_CHANGED_BSSID) { wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n", bss_conf->bssid); diff --combined drivers/of/of_mdio.c index a327be1d264b,1b9ef35cf0d9..8c0c92712fc9 --- a/drivers/of/of_mdio.c +++ b/drivers/of/of_mdio.c @@@ -77,6 -77,11 +77,11 @@@ static int of_mdiobus_register_phy(stru if (of_property_read_bool(child, "broken-turn-around")) mdio->phy_ignore_ta_mask |= 1 << addr;
+ of_property_read_u32(child, "reset-assert-us", + &phy->mdio.reset_assert_delay); + of_property_read_u32(child, "reset-deassert-us", + &phy->mdio.reset_deassert_delay); + /* Associate the OF node with the device structure so it * can be looked up later */ of_node_get(child); @@@ -231,12 -236,7 +236,12 @@@ int of_mdiobus_register(struct mii_bus rc = of_mdiobus_register_phy(mdio, child, addr); else rc = of_mdiobus_register_device(mdio, child, addr); - if (rc) + + if (rc == -ENODEV) + dev_err(&mdio->dev, + "MDIO device at address %d is missing.\n", + addr); + else if (rc) goto unregister; }
@@@ -260,7 -260,7 +265,7 @@@
if (of_mdiobus_child_is_phy(child)) { rc = of_mdiobus_register_phy(mdio, child, addr); - if (rc) + if (rc && rc != -ENODEV) goto unregister; } } diff --combined drivers/vhost/net.c index 9524ee16878a,7baa90abe097..7cf00872c189 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@@ -89,7 -89,7 +89,7 @@@ struct vhost_net_ubuf_ref
#define VHOST_RX_BATCH 64 struct vhost_net_buf { - struct sk_buff **queue; + void **queue; int tail; int head; }; @@@ -108,7 -108,7 +108,7 @@@ struct vhost_net_virtqueue /* Reference counting for outstanding ubufs. * Protected by vq mutex. Writers must also take device mutex. */ struct vhost_net_ubuf_ref *ubufs; - struct skb_array *rx_array; + struct ptr_ring *rx_ring; struct vhost_net_buf rxq; };
@@@ -158,7 -158,7 +158,7 @@@ static int vhost_net_buf_produce(struc struct vhost_net_buf *rxq = &nvq->rxq;
rxq->head = 0; - rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue, + rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue, VHOST_RX_BATCH); return rxq->tail; } @@@ -167,13 -167,25 +167,25 @@@ static void vhost_net_buf_unproduce(str { struct vhost_net_buf *rxq = &nvq->rxq;
- if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) { - skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head, - vhost_net_buf_get_size(rxq)); + if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) { + ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head, + vhost_net_buf_get_size(rxq), + __skb_array_destroy_skb); rxq->head = rxq->tail = 0; } }
+ static int vhost_net_buf_peek_len(void *ptr) + { + if (tun_is_xdp_buff(ptr)) { + struct xdp_buff *xdp = tun_ptr_to_xdp(ptr); + + return xdp->data_end - xdp->data; + } + + return __skb_array_len_with_tag(ptr); + } + static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq) { struct vhost_net_buf *rxq = &nvq->rxq; @@@ -185,7 -197,7 +197,7 @@@ return 0;
out: - return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq)); + return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq)); }
static void vhost_net_buf_init(struct vhost_net_buf *rxq) @@@ -583,7 -595,7 +595,7 @@@ static int peek_head_len(struct vhost_n int len = 0; unsigned long flags;
- if (rvq->rx_array) + if (rvq->rx_ring) return vhost_net_buf_peek(rvq);
spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); @@@ -744,7 -756,7 +756,7 @@@ static void handle_rx(struct vhost_net }; size_t total_len = 0; int err, mergeable; - s16 headcount; + s16 headcount, nheads = 0; size_t vhost_hlen, sock_hlen; size_t vhost_len, sock_len; struct socket *sock; @@@ -772,7 -784,7 +784,7 @@@ while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk))) { sock_len += sock_hlen; vhost_len = sock_len + vhost_hlen; - headcount = get_rx_bufs(vq, vq->heads, vhost_len, + headcount = get_rx_bufs(vq, vq->heads + nheads, vhost_len, &in, vq_log, &log, likely(mergeable) ? UIO_MAXIOV : 1); /* On error, stop handling until the next kick. */ @@@ -790,7 -802,7 +802,7 @@@ * they refilled. */ goto out; } - if (nvq->rx_array) + if (nvq->rx_ring) msg.msg_control = vhost_net_buf_consume(&nvq->rxq); /* On overrun, truncate and discard */ if (unlikely(headcount > UIO_MAXIOV)) { @@@ -844,8 -856,12 +856,12 @@@ vhost_discard_vq_desc(vq, headcount); goto out; } - vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, - headcount); + nheads += headcount; + if (nheads > VHOST_RX_BATCH) { + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, + nheads); + nheads = 0; + } if (unlikely(vq_log)) vhost_log_write(vq, vq_log, log, vhost_len); total_len += vhost_len; @@@ -856,6 -872,9 +872,9 @@@ } vhost_net_enable_vq(net, vq); out: + if (nheads) + vhost_add_used_and_signal_n(&net->dev, vq, vq->heads, + nheads); mutex_unlock(&vq->mutex); }
@@@ -896,7 -915,7 +915,7 @@@ static int vhost_net_open(struct inode struct vhost_net *n; struct vhost_dev *dev; struct vhost_virtqueue **vqs; - struct sk_buff **queue; + void **queue; int i;
n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL); @@@ -908,7 -927,7 +927,7 @@@ return -ENOMEM; }
- queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *), + queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *), GFP_KERNEL); if (!queue) { kfree(vqs); @@@ -1046,23 -1065,23 +1065,23 @@@ err return ERR_PTR(r); }
- static struct skb_array *get_tap_skb_array(int fd) + static struct ptr_ring *get_tap_ptr_ring(int fd) { - struct skb_array *array; + struct ptr_ring *ring; struct file *file = fget(fd);
if (!file) return NULL; - array = tun_get_skb_array(file); - if (!IS_ERR(array)) + ring = tun_get_tx_ring(file); + if (!IS_ERR(ring)) goto out; - array = tap_get_skb_array(file); - if (!IS_ERR(array)) + ring = tap_get_ptr_ring(file); + if (!IS_ERR(ring)) goto out; - array = NULL; + ring = NULL; out: fput(file); - return array; + return ring; }
static struct socket *get_tap_socket(int fd) @@@ -1143,7 -1162,7 +1162,7 @@@ static long vhost_net_set_backend(struc vq->private_data = sock; vhost_net_buf_unproduce(nvq); if (index == VHOST_NET_VQ_RX) - nvq->rx_array = get_tap_skb_array(fd); + nvq->rx_ring = get_tap_ptr_ring(fd); r = vhost_vq_init_access(vq); if (r) goto err_used; @@@ -1353,7 -1372,7 +1372,7 @@@ static ssize_t vhost_net_chr_write_iter return vhost_chr_write_iter(dev, from); }
-static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait) +static __poll_t vhost_net_chr_poll(struct file *file, poll_table *wait) { struct vhost_net *n = file->private_data; struct vhost_dev *dev = &n->dev; diff --combined fs/btrfs/disk-io.c index e5a4faf9e304,5da18ebc9222..bf31663de6b7 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@@ -30,6 -30,7 +30,7 @@@ #include <linux/ratelimit.h> #include <linux/uuid.h> #include <linux/semaphore.h> + #include <linux/bpf.h> #include <asm/unaligned.h> #include "ctree.h" #include "disk-io.h" @@@ -220,7 -221,7 +221,7 @@@ void btrfs_set_buffer_lockdep_class(u6 * extents on the btree inode are pretty simple, there's one extent * that covers the entire device */ -static struct extent_map *btree_get_extent(struct btrfs_inode *inode, +struct extent_map *btree_get_extent(struct btrfs_inode *inode, struct page *page, size_t pg_offset, u64 start, u64 len, int create) { @@@ -285,7 -286,7 +286,7 @@@ static int csum_tree_block(struct btrfs int verify) { u16 csum_size = btrfs_super_csum_size(fs_info->super_copy); - char *result = NULL; + char result[BTRFS_CSUM_SIZE]; unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; @@@ -294,6 -295,7 +295,6 @@@ unsigned long map_len; int err; u32 crc = ~(u32)0; - unsigned long inline_result;
len = buf->len - offset; while (len > 0) { @@@ -307,7 -309,13 +308,7 @@@ len -= cur_len; offset += cur_len; } - if (csum_size > sizeof(inline_result)) { - result = kzalloc(csum_size, GFP_NOFS); - if (!result) - return -ENOMEM; - } else { - result = (char *)&inline_result; - } + memset(result, 0, BTRFS_CSUM_SIZE);
btrfs_csum_final(crc, result);
@@@ -322,12 -330,15 +323,12 @@@ "%s checksum verify failed on %llu wanted %X found %X level %d", fs_info->sb->s_id, buf->start, val, found, btrfs_header_level(buf)); - if (result != (char *)&inline_result) - kfree(result); return -EUCLEAN; } } else { write_extent_buffer(buf, result, 0, csum_size); } - if (result != (char *)&inline_result) - kfree(result); + return 0; }
@@@ -381,7 -392,7 +382,7 @@@ static int verify_parent_transid(struc clear_extent_buffer_uptodate(eb); out: unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, - &cached_state, GFP_NOFS); + &cached_state); if (need_lock) btrfs_tree_read_unlock_blocking(eb); return ret; @@@ -445,7 -456,7 +446,7 @@@ static int btree_read_extent_buffer_pag io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE, - btree_get_extent, mirror_num); + mirror_num); if (!ret) { if (!verify_parent_transid(io_tree, eb, parent_transid, 0)) @@@ -855,8 -866,6 +856,8 @@@ static blk_status_t btree_submit_bio_ho int async = check_async_write(BTRFS_I(inode)); blk_status_t ret;
+ bio_associate_blkcg(bio, blkcg_root_css); + if (bio_op(bio) != REQ_OP_WRITE) { /* * called for a read, do the setup so that checksum validation @@@ -1004,7 -1013,7 +1005,7 @@@ void readahead_tree_block(struct btrfs_ if (IS_ERR(buf)) return; read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, - buf, WAIT_NONE, btree_get_extent, 0); + buf, WAIT_NONE, 0); free_extent_buffer(buf); }
@@@ -1023,7 -1032,7 +1024,7 @@@ int reada_tree_block_flagged(struct btr set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK, - btree_get_extent, mirror_num); + mirror_num); if (ret) { free_extent_buffer(buf); return ret; @@@ -1160,7 -1169,6 +1161,7 @@@ static void __setup_root(struct btrfs_r spin_lock_init(&root->accounting_lock); spin_lock_init(&root->log_extents_lock[0]); spin_lock_init(&root->log_extents_lock[1]); + spin_lock_init(&root->qgroup_meta_rsv_lock); mutex_init(&root->objectid_mutex); mutex_init(&root->log_mutex); mutex_init(&root->ordered_extent_mutex); @@@ -1177,6 -1185,7 +1178,6 @@@ atomic_set(&root->orphan_inodes, 0); refcount_set(&root->refs, 1); atomic_set(&root->will_be_snapshotted, 0); - atomic64_set(&root->qgroup_meta_rsv, 0); root->log_transid = 0; root->log_transid_committed = -1; root->last_log_commit = 0; @@@ -1235,7 -1244,7 +1236,7 @@@ struct btrfs_root *btrfs_create_tree(st struct btrfs_root *root; struct btrfs_key key; int ret = 0; - uuid_le uuid; + uuid_le uuid = NULL_UUID_LE;
root = btrfs_alloc_root(fs_info, GFP_KERNEL); if (!root) @@@ -1276,8 -1285,7 +1277,8 @@@ btrfs_set_root_used(&root->root_item, leaf->len); btrfs_set_root_last_snapshot(&root->root_item, 0); btrfs_set_root_dirid(&root->root_item, 0); - uuid_le_gen(&uuid); + if (is_fstree(objectid)) + uuid_le_gen(&uuid); memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE); root->root_item.drop_level = 0;
@@@ -2868,7 -2876,7 +2869,7 @@@ retry_root_backup goto fail_sysfs; }
- if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) { + if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) { btrfs_warn(fs_info, "writeable mount is not allowed due to too many missing devices"); goto fail_sysfs; @@@ -3116,6 -3124,7 +3117,7 @@@ recovery_tree_root goto fail_block_groups; goto retry_root_backup; } + BPF_ALLOW_ERROR_INJECTION(open_ctree);
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) { @@@ -3343,8 -3352,6 +3345,8 @@@ static void write_dev_flush(struct btrf return;
bio_reset(bio); + bio_associate_blkcg(bio, blkcg_root_css); + bio->bi_end_io = btrfs_end_empty_barrier; bio_set_dev(bio, device->bdev); bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; @@@ -3352,7 -3359,7 +3354,7 @@@ bio->bi_private = &device->flush_wait;
btrfsic_submit_bio(bio); - device->flush_bio_sent = 1; + set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); }
/* @@@ -3362,10 -3369,10 +3364,10 @@@ static blk_status_t wait_dev_flush(stru { struct bio *bio = device->flush_bio;
- if (!device->flush_bio_sent) + if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state)) return BLK_STS_OK;
- device->flush_bio_sent = 0; + clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state); wait_for_completion_io(&device->flush_wait);
return bio->bi_status; @@@ -3373,7 -3380,7 +3375,7 @@@
static int check_barrier_error(struct btrfs_fs_info *fs_info) { - if (!btrfs_check_rw_degradable(fs_info)) + if (!btrfs_check_rw_degradable(fs_info, NULL)) return -EIO; return 0; } @@@ -3389,16 -3396,14 +3391,16 @@@ static int barrier_all_devices(struct b int errors_wait = 0; blk_status_t ret;
+ lockdep_assert_held(&info->fs_devices->device_list_mutex); /* send down all the barriers */ head = &info->fs_devices->devices; - list_for_each_entry_rcu(dev, head, dev_list) { - if (dev->missing) + list_for_each_entry(dev, head, dev_list) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) continue; - if (!dev->in_fs_metadata || !dev->writeable) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue;
write_dev_flush(dev); @@@ -3406,15 -3411,14 +3408,15 @@@ }
/* wait for all the barriers */ - list_for_each_entry_rcu(dev, head, dev_list) { - if (dev->missing) + list_for_each_entry(dev, head, dev_list) { + if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state)) continue; if (!dev->bdev) { errors_wait++; continue; } - if (!dev->in_fs_metadata || !dev->writeable) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue;
ret = wait_dev_flush(dev); @@@ -3506,13 -3510,12 +3508,13 @@@ int write_all_supers(struct btrfs_fs_in } }
- list_for_each_entry_rcu(dev, head, dev_list) { + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) { total_errors++; continue; } - if (!dev->in_fs_metadata || !dev->writeable) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue;
btrfs_set_stack_device_generation(dev_item, 0); @@@ -3548,11 -3551,10 +3550,11 @@@ }
total_errors = 0; - list_for_each_entry_rcu(dev, head, dev_list) { + list_for_each_entry(dev, head, dev_list) { if (!dev->bdev) continue; - if (!dev->in_fs_metadata || !dev->writeable) + if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) || + !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) continue;
ret = wait_dev_supers(dev, max_mirrors); diff --combined fs/btrfs/free-space-cache.c index 9e8c1f046e02,fb1382893bfc..9088b0b0d10f --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@@ -22,6 -22,7 +22,7 @@@ #include <linux/slab.h> #include <linux/math64.h> #include <linux/ratelimit.h> + #include <linux/bpf.h> #include "ctree.h" #include "free-space-cache.h" #include "transaction.h" @@@ -332,6 -333,7 +333,7 @@@ static int io_ctl_init(struct btrfs_io_
return 0; } + BPF_ALLOW_ERROR_INJECTION(io_ctl_init);
static void io_ctl_free(struct btrfs_io_ctl *io_ctl) { @@@ -993,7 -995,8 +995,7 @@@ update_cache_item(struct btrfs_trans_ha ret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (ret < 0) { clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, - GFP_NOFS); + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL); goto fail; } leaf = path->nodes[0]; @@@ -1007,7 -1010,7 +1009,7 @@@ clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, - NULL, GFP_NOFS); + NULL); btrfs_release_path(path); goto fail; } @@@ -1104,7 -1107,8 +1106,7 @@@ static int flush_dirty_cache(struct ino ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); if (ret) clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, - EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, - GFP_NOFS); + EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
return ret; } @@@ -1125,7 -1129,8 +1127,7 @@@ cleanup_write_cache_enospc(struct inod { io_ctl_drop_pages(io_ctl); unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, cached_state, - GFP_NOFS); + i_size_read(inode) - 1, cached_state); }
static int __btrfs_wait_cache_io(struct btrfs_root *root, @@@ -1319,7 -1324,7 +1321,7 @@@ static int __btrfs_write_out_cache(stru io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, &cached_state, GFP_NOFS); + i_size_read(inode) - 1, &cached_state);
/* * at this point the pages are under IO and we're happy, @@@ -3545,7 -3550,7 +3547,7 @@@ int btrfs_write_out_ino_cache(struct bt if (ret) { if (release_metadata) btrfs_delalloc_release_metadata(BTRFS_I(inode), - inode->i_size); + inode->i_size, true); #ifdef DEBUG btrfs_err(fs_info, "failed to write free ino cache for root %llu", diff --combined include/linux/bpf.h index 0b25cf87b6d6,6be837c063c3..44f26f6df8fc --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@@ -17,6 -17,7 +17,7 @@@ #include <linux/numa.h> #include <linux/wait.h>
+ struct bpf_verifier_env; struct perf_event; struct bpf_prog; struct bpf_map; @@@ -43,14 -44,7 +44,14 @@@ struct bpf_map_ops };
struct bpf_map { - atomic_t refcnt; + /* 1st cacheline with read-mostly members of which some + * are also accessed in fast-path (e.g. ops, max_entries). + */ + const struct bpf_map_ops *ops ____cacheline_aligned; + struct bpf_map *inner_map_meta; +#ifdef CONFIG_SECURITY + void *security; +#endif enum bpf_map_type map_type; u32 key_size; u32 value_size; @@@ -59,17 -53,15 +60,17 @@@ u32 pages; u32 id; int numa_node; - struct user_struct *user; - const struct bpf_map_ops *ops; - struct work_struct work; + bool unpriv_array; + /* 7 bytes hole */ + + /* 2nd cacheline with misc members to avoid false sharing + * particularly with refcounting. + */ + struct user_struct *user ____cacheline_aligned; + atomic_t refcnt; atomic_t usercnt; - struct bpf_map *inner_map_meta; + struct work_struct work; char name[BPF_OBJ_NAME_LEN]; -#ifdef CONFIG_SECURITY - void *security; -#endif };
/* function argument constraints */ @@@ -193,14 -185,18 +194,18 @@@ struct bpf_verifier_ops struct bpf_prog *prog, u32 *target_size); };
+ struct bpf_prog_offload_ops { + int (*insn_hook)(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); + }; + struct bpf_dev_offload { struct bpf_prog *prog; struct net_device *netdev; void *dev_priv; struct list_head offloads; bool dev_state; - bool verifier_running; - wait_queue_head_t verifier_done; + const struct bpf_prog_offload_ops *dev_ops; };
struct bpf_prog_aux { @@@ -209,6 -205,10 +214,10 @@@ u32 max_ctx_offset; u32 stack_depth; u32 id; + u32 func_cnt; + bool offload_requested; + struct bpf_prog **func; + void *jit_data; /* JIT specific data. arch dependent */ struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; const struct bpf_prog_ops *ops; @@@ -230,7 -230,6 +239,7 @@@ struct bpf_array { struct bpf_map map; u32 elem_size; + u32 index_mask; /* 'ownership' of prog_array is claimed by the first program that * is going to use this map or by the first program which FD is stored * in the map to make sure that all callers and callees have the same @@@ -295,6 -294,9 +304,9 @@@ int bpf_prog_array_copy_to_user(struct
void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, struct bpf_prog *old_prog); + int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, + __u32 __user *prog_ids, u32 request_cnt, + __u32 __user *prog_cnt); int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, struct bpf_prog *exclude_prog, struct bpf_prog *include_prog, @@@ -355,6 -357,8 +367,8 @@@ void bpf_prog_put(struct bpf_prog *prog int __bpf_prog_charge(struct user_struct *user, u32 pages); void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
+ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); + struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); @@@ -409,6 -413,7 +423,7 @@@ static inline void bpf_long_memcpy(voi
/* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); + void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
/* Map specifics */ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); @@@ -536,13 -541,15 +551,15 @@@ bool bpf_prog_get_ok(struct bpf_prog *
int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); + int bpf_prog_offload_info_fill(struct bpf_prog_info *info, + struct bpf_prog *prog);
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) { - return aux->offload; + return aux->offload_requested; } #else static inline int bpf_prog_offload_init(struct bpf_prog *prog, @@@ -557,7 -564,7 +574,7 @@@ static inline bool bpf_prog_is_dev_boun } #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
- #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) + #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); #else @@@ -596,4 -603,15 +613,15 @@@ extern const struct bpf_func_proto bpf_ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) + #ifdef CONFIG_BPF_KPROBE_OVERRIDE + #define BPF_ALLOW_ERROR_INJECTION(fname) \ + static unsigned long __used \ + __attribute__((__section__("_kprobe_error_inject_list"))) \ + _eil_addr_##fname = (unsigned long)fname; + #else + #define BPF_ALLOW_ERROR_INJECTION(fname) + #endif + #endif + #endif /* _LINUX_BPF_H */ diff --combined include/linux/module.h index e6249795f9e2,548fa09fa806..0fd65481c045 --- a/include/linux/module.h +++ b/include/linux/module.h @@@ -475,6 -475,11 +475,11 @@@ struct module ctor_fn_t *ctors; unsigned int num_ctors; #endif + + #ifdef CONFIG_BPF_KPROBE_OVERRIDE + unsigned int num_kprobe_ei_funcs; + unsigned long *kprobe_ei_funcs; + #endif } ____cacheline_aligned __randomize_layout; #ifndef MODULE_ARCH_INIT #define MODULE_ARCH_INIT {} @@@ -606,9 -611,6 +611,9 @@@ int ref_module(struct module *a, struc __mod ? __mod->name : "kernel"; \ })
+/* Dereference module function descriptor */ +void *dereference_module_function_descriptor(struct module *mod, void *ptr); + /* For kallsyms to ask for address resolution. namebuf should be at * least KSYM_NAME_LEN long: a pointer to namebuf is returned if * found, otherwise NULL. */ @@@ -763,13 -765,6 +768,13 @@@ static inline bool is_module_sig_enforc return false; }
+/* Dereference module function descriptor */ +static inline +void *dereference_module_function_descriptor(struct module *mod, void *ptr) +{ + return ptr; +} + #endif /* CONFIG_MODULES */
#ifdef CONFIG_SYSFS diff --combined include/linux/pci.h index 95807535d175,0314e0716c30..66cca1c6f742 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@@ -48,17 -48,17 +48,17 @@@ * In the interest of not exposing interfaces to user-space unnecessarily, * the following kernel-only defines are being added here. */ -#define PCI_DEVID(bus, devfn) ((((u16)(bus)) << 8) | (devfn)) +#define PCI_DEVID(bus, devfn) ((((u16)(bus)) << 8) | (devfn)) /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */ #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
/* pci_slot represents a physical slot */ struct pci_slot { - struct pci_bus *bus; /* The bus this slot is on */ - struct list_head list; /* node in list of slots on this bus */ - struct hotplug_slot *hotplug; /* Hotplug info (migrate over time) */ - unsigned char number; /* PCI_SLOT(pci_dev->devfn) */ - struct kobject kobj; + struct pci_bus *bus; /* Bus this slot is on */ + struct list_head list; /* Node in list of slots */ + struct hotplug_slot *hotplug; /* Hotplug info (move here) */ + unsigned char number; /* PCI_SLOT(pci_dev->devfn) */ + struct kobject kobj; };
static inline const char *pci_slot_name(const struct pci_slot *slot) @@@ -72,7 -72,9 +72,7 @@@ enum pci_mmap_state pci_mmap_mem };
-/* - * For PCI devices, the region numbers are assigned this way: - */ +/* For PCI devices, the region numbers are assigned this way: */ enum { /* #0-5: standard PCI resources */ PCI_STD_RESOURCES, @@@ -81,23 -83,23 +81,23 @@@ /* #6: expansion ROM resource */ PCI_ROM_RESOURCE,
- /* device specific resources */ + /* Device-specific resources */ #ifdef CONFIG_PCI_IOV PCI_IOV_RESOURCES, PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1, #endif
- /* resources assigned to buses behind the bridge */ + /* Resources assigned to buses behind the bridge */ #define PCI_BRIDGE_RESOURCE_NUM 4
PCI_BRIDGE_RESOURCES, PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES + PCI_BRIDGE_RESOURCE_NUM - 1,
- /* total resources associated with a PCI device */ + /* Total resources associated with a PCI device */ PCI_NUM_RESOURCES,
- /* preserve this for compatibility */ + /* Preserve this for compatibility */ DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES, };
@@@ -150,10 -152,9 +150,10 @@@ static inline const char *pci_power_nam #define PCI_PM_D3COLD_WAIT 100 #define PCI_PM_BUS_WAIT 50
-/** The pci_channel state describes connectivity between the CPU and - * the pci device. If some PCI bus between here and the pci device - * has crashed or locked up, this info is reflected here. +/** + * The pci_channel state describes connectivity between the CPU and + * the PCI device. If some PCI bus between here and the PCI device + * has crashed or locked up, this info is reflected here. */ typedef unsigned int __bitwise pci_channel_state_t;
@@@ -183,7 -184,9 +183,7 @@@ enum pcie_reset_state
typedef unsigned short __bitwise pci_dev_flags_t; enum pci_dev_flags { - /* INTX_DISABLE in PCI_COMMAND register disables MSI - * generation too. - */ + /* INTX_DISABLE in PCI_COMMAND register disables MSI too */ PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) (1 << 0), /* Device configuration is irrevocably lost if disabled into D3 */ PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) (1 << 1), @@@ -199,7 -202,7 +199,7 @@@ PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), /* Get VPD from function 0 VPD */ PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), - /* a non-root bridge where translation occurs, stop alias search here */ + /* A non-root bridge where translation occurs, stop alias search here */ PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), /* Do not use FLR even if device advertises PCI_AF_CAP */ PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), @@@ -219,17 -222,17 +219,17 @@@ enum pci_bus_flags PCI_BUS_FLAGS_NO_AERSID = (__force pci_bus_flags_t) 4, };
-/* These values come from the PCI Express Spec */ +/* Values from Link Status register, PCIe r3.1, sec 7.8.8 */ enum pcie_link_width { PCIE_LNK_WIDTH_RESRV = 0x00, PCIE_LNK_X1 = 0x01, PCIE_LNK_X2 = 0x02, PCIE_LNK_X4 = 0x04, PCIE_LNK_X8 = 0x08, - PCIE_LNK_X12 = 0x0C, + PCIE_LNK_X12 = 0x0c, PCIE_LNK_X16 = 0x10, PCIE_LNK_X32 = 0x20, - PCIE_LNK_WIDTH_UNKNOWN = 0xFF, + PCIE_LNK_WIDTH_UNKNOWN = 0xff, };
/* Based on the PCI Hotplug Spec, but some values are made up by us */ @@@ -260,15 -263,15 +260,15 @@@ enum pci_bus_speed };
struct pci_cap_saved_data { - u16 cap_nr; - bool cap_extended; - unsigned int size; - u32 data[0]; + u16 cap_nr; + bool cap_extended; + unsigned int size; + u32 data[0]; };
struct pci_cap_saved_state { - struct hlist_node next; - struct pci_cap_saved_data cap; + struct hlist_node next; + struct pci_cap_saved_data cap; };
struct irq_affinity; @@@ -277,17 -280,19 +277,17 @@@ struct pci_vpd struct pci_sriov; struct pci_ats;
-/* - * The pci_dev structure is used to describe PCI devices. - */ +/* The pci_dev structure describes PCI devices */ struct pci_dev { - struct list_head bus_list; /* node in per-bus list */ - struct pci_bus *bus; /* bus this device is on */ - struct pci_bus *subordinate; /* bus this device bridges to */ + struct list_head bus_list; /* Node in per-bus list */ + struct pci_bus *bus; /* Bus this device is on */ + struct pci_bus *subordinate; /* Bus this device bridges to */
- void *sysdata; /* hook for sys-specific extension */ - struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */ + void *sysdata; /* Hook for sys-specific extension */ + struct proc_dir_entry *procent; /* Device entry in /proc/bus/pci */ struct pci_slot *slot; /* Physical slot this device is in */
- unsigned int devfn; /* encoded device & function index */ + unsigned int devfn; /* Encoded device & function index */ unsigned short vendor; unsigned short device; unsigned short subsystem_vendor; @@@ -302,12 -307,12 +302,12 @@@ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ u8 pcie_mpss:3; /* PCIe Max Payload Size Supported */ - u8 rom_base_reg; /* which config register controls the ROM */ - u8 pin; /* which interrupt pin this device uses */ - u16 pcie_flags_reg; /* cached PCIe Capabilities Register */ - unsigned long *dma_alias_mask;/* mask of enabled devfn aliases */ + u8 rom_base_reg; /* Config register controlling ROM */ + u8 pin; /* Interrupt pin this device uses */ + u16 pcie_flags_reg; /* Cached PCIe Capabilities Register */ + unsigned long *dma_alias_mask;/* Mask of enabled devfn aliases */
- struct pci_driver *driver; /* which driver has allocated this device */ + struct pci_driver *driver; /* Driver bound to this device */ u64 dma_mask; /* Mask of the bits of bus address this device implements. Normally this is 0xffffffff. You only need to change @@@ -316,9 -321,9 +316,9 @@@
struct device_dma_parameters dma_parms;
- pci_power_t current_state; /* Current operating state. In ACPI-speak, - this is D0-D3, D0 being fully functional, - and D3 being off. */ + pci_power_t current_state; /* Current operating state. In ACPI, + this is D0-D3, D0 being fully + functional, and D3 being off. */ u8 pm_cap; /* PM capability offset */ unsigned int pme_support:5; /* Bitmask of states from which PME# can be generated */ @@@ -329,10 -334,10 +329,10 @@@ unsigned int no_d3cold:1; /* D3cold is forbidden */ unsigned int bridge_d3:1; /* Allow D3 for bridge */ unsigned int d3cold_allowed:1; /* D3cold is allowed by user */ - unsigned int mmio_always_on:1; /* disallow turning off io/mem - decoding during bar sizing */ + unsigned int mmio_always_on:1; /* Disallow turning off io/mem + decoding during BAR sizing */ unsigned int wakeup_prepared:1; - unsigned int runtime_d3cold:1; /* whether go through runtime + unsigned int runtime_d3cold:1; /* Whether go through runtime D3cold, not set for devices powered on/off by the corresponding bridge */ @@@ -345,14 -350,12 +345,14 @@@
#ifdef CONFIG_PCIEASPM struct pcie_link_state *link_state; /* ASPM link state */ + unsigned int ltr_path:1; /* Latency Tolerance Reporting + supported from root to here */ #endif
- pci_channel_state_t error_state; /* current connectivity state */ - struct device dev; /* Generic device interface */ + pci_channel_state_t error_state; /* Current connectivity state */ + struct device dev; /* Generic device interface */
- int cfg_size; /* Size of configuration space */ + int cfg_size; /* Size of config space */
/* * Instead of touching interrupt line and base address registers @@@ -361,47 -364,47 +361,47 @@@ unsigned int irq; struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
- bool match_driver; /* Skip attaching driver */ - /* These fields are used by common fixups */ - unsigned int transparent:1; /* Subtractive decode PCI bridge */ - unsigned int multifunction:1;/* Part of multi-function device */ - /* keep track of device state */ + bool match_driver; /* Skip attaching driver */ + + unsigned int transparent:1; /* Subtractive decode bridge */ + unsigned int multifunction:1; /* Multi-function device */ + unsigned int is_added:1; - unsigned int is_busmaster:1; /* device is busmaster */ - unsigned int no_msi:1; /* device may not use msi */ - unsigned int no_64bit_msi:1; /* device may only use 32-bit MSIs */ - unsigned int block_cfg_access:1; /* config space access is blocked */ - unsigned int broken_parity_status:1; /* Device generates false positive parity */ - unsigned int irq_reroute_variant:2; /* device needs IRQ rerouting variant */ + unsigned int is_busmaster:1; /* Is busmaster */ + unsigned int no_msi:1; /* May not use MSI */ + unsigned int no_64bit_msi:1; /* May only use 32-bit MSIs */ + unsigned int block_cfg_access:1; /* Config space access blocked */ + unsigned int broken_parity_status:1; /* Generates false positive parity */ + unsigned int irq_reroute_variant:2; /* Needs IRQ rerouting variant */ unsigned int msi_enabled:1; unsigned int msix_enabled:1; - unsigned int ari_enabled:1; /* ARI forwarding */ - unsigned int ats_enabled:1; /* Address Translation Service */ + unsigned int ari_enabled:1; /* ARI forwarding */ + unsigned int ats_enabled:1; /* Address Translation Svc */ unsigned int pasid_enabled:1; /* Process Address Space ID */ unsigned int pri_enabled:1; /* Page Request Interface */ unsigned int is_managed:1; - unsigned int needs_freset:1; /* Dev requires fundamental reset */ + unsigned int needs_freset:1; /* Requires fundamental reset */ unsigned int state_saved:1; unsigned int is_physfn:1; unsigned int is_virtfn:1; unsigned int reset_fn:1; - unsigned int is_hotplug_bridge:1; - unsigned int is_thunderbolt:1; /* Thunderbolt controller */ - unsigned int __aer_firmware_first_valid:1; + unsigned int is_hotplug_bridge:1; + unsigned int is_thunderbolt:1; /* Thunderbolt controller */ + unsigned int __aer_firmware_first_valid:1; unsigned int __aer_firmware_first:1; - unsigned int broken_intx_masking:1; /* INTx masking can't be used */ - unsigned int io_window_1k:1; /* Intel P2P bridge 1K I/O windows */ + unsigned int broken_intx_masking:1; /* INTx masking can't be used */ + unsigned int io_window_1k:1; /* Intel bridge 1K I/O windows */ unsigned int irq_managed:1; unsigned int has_secondary_link:1; - unsigned int non_compliant_bars:1; /* broken BARs; ignore them */ - unsigned int is_probed:1; /* device probing in progress */ + unsigned int non_compliant_bars:1; /* Broken BARs; ignore them */ + unsigned int is_probed:1; /* Device probing in progress */ pci_dev_flags_t dev_flags; atomic_t enable_cnt; /* pci_enable_device has been called */
- u32 saved_config_space[16]; /* config space saved at suspend time */ + u32 saved_config_space[16]; /* Config space saved at suspend time */ struct hlist_head saved_cap_space; - struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */ - int rom_attr_enabled; /* has display of the rom attribute been enabled? */ + struct bin_attribute *rom_attr; /* Attribute descriptor for sysfs ROM entry */ + int rom_attr_enabled; /* Display of ROM attribute enabled? */ struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */ struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
@@@ -416,12 -419,12 +416,12 @@@ struct pci_vpd *vpd; #ifdef CONFIG_PCI_ATS union { - struct pci_sriov *sriov; /* SR-IOV capability related */ - struct pci_dev *physfn; /* the PF this VF is associated with */ + struct pci_sriov *sriov; /* PF: SR-IOV info */ + struct pci_dev *physfn; /* VF: related PF */ }; u16 ats_cap; /* ATS Capability offset */ u8 ats_stu; /* ATS Smallest Translation Unit */ - atomic_t ats_ref_cnt; /* number of VFs with ATS enabled */ + atomic_t ats_ref_cnt; /* Number of VFs with ATS enabled */ #endif #ifdef CONFIG_PCI_PRI u32 pri_reqs_alloc; /* Number of PRI requests allocated */ @@@ -429,11 -432,11 +429,11 @@@ #ifdef CONFIG_PCI_PASID u16 pasid_features; #endif - phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */ - size_t romlen; /* Length of ROM if it's not from the BAR */ - char *driver_override; /* Driver name to force a match */ + phys_addr_t rom; /* Physical address if not from BAR */ + size_t romlen; /* Length if not from BAR */ + char *driver_override; /* Driver name to force a match */
- unsigned long priv_flags; /* Private flags for the pci driver */ + unsigned long priv_flags; /* Private flags for the PCI driver */ };
static inline struct pci_dev *pci_physfn(struct pci_dev *dev) @@@ -456,26 -459,26 +456,26 @@@ static inline int pci_channel_offline(s }
struct pci_host_bridge { - struct device dev; - struct pci_bus *bus; /* root bus */ - struct pci_ops *ops; - void *sysdata; - int busnr; + struct device dev; + struct pci_bus *bus; /* Root bus */ + struct pci_ops *ops; + void *sysdata; + int busnr; struct list_head windows; /* resource_entry */ - u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* platform IRQ swizzler */ + u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */ int (*map_irq)(const struct pci_dev *, u8, u8); void (*release_fn)(struct pci_host_bridge *); - void *release_data; + void *release_data; struct msi_controller *msi; - unsigned int ignore_reset_delay:1; /* for entire hierarchy */ - unsigned int no_ext_tags:1; /* no Extended Tags */ + unsigned int ignore_reset_delay:1; /* For entire hierarchy */ + unsigned int no_ext_tags:1; /* No Extended Tags */ /* Resource alignment requirements */ resource_size_t (*align_resource)(struct pci_dev *dev, const struct resource *res, resource_size_t start, resource_size_t size, resource_size_t align); - unsigned long private[0] ____cacheline_aligned; + unsigned long private[0] ____cacheline_aligned; };
#define to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev) @@@ -497,8 -500,8 +497,8 @@@ void pci_free_host_bridge(struct pci_ho struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
void pci_set_host_bridge_release(struct pci_host_bridge *bridge, - void (*release_fn)(struct pci_host_bridge *), - void *release_data); + void (*release_fn)(struct pci_host_bridge *), + void *release_data);
int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge);
@@@ -518,32 -521,32 +518,32 @@@ #define PCI_SUBTRACTIVE_DECODE 0x1
struct pci_bus_resource { - struct list_head list; - struct resource *res; - unsigned int flags; + struct list_head list; + struct resource *res; + unsigned int flags; };
#define PCI_REGION_FLAG_MASK 0x0fU /* These bits of resource flags tell us the PCI region flags */
struct pci_bus { - struct list_head node; /* node in list of buses */ - struct pci_bus *parent; /* parent bus this bridge is on */ - struct list_head children; /* list of child buses */ - struct list_head devices; /* list of devices on this bus */ - struct pci_dev *self; /* bridge device as seen by parent */ - struct list_head slots; /* list of slots on this bus; + struct list_head node; /* Node in list of buses */ + struct pci_bus *parent; /* Parent bus this bridge is on */ + struct list_head children; /* List of child buses */ + struct list_head devices; /* List of devices on this bus */ + struct pci_dev *self; /* Bridge device as seen by parent */ + struct list_head slots; /* List of slots on this bus; protected by pci_slot_mutex */ struct resource *resource[PCI_BRIDGE_RESOURCE_NUM]; - struct list_head resources; /* address space routed to this bus */ - struct resource busn_res; /* bus numbers routed to this bus */ + struct list_head resources; /* Address space routed to this bus */ + struct resource busn_res; /* Bus numbers routed to this bus */
- struct pci_ops *ops; /* configuration access functions */ + struct pci_ops *ops; /* Configuration access functions */ struct msi_controller *msi; /* MSI controller */ - void *sysdata; /* hook for sys-specific extension */ - struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */ + void *sysdata; /* Hook for sys-specific extension */ + struct proc_dir_entry *procdir; /* Directory entry in /proc/bus/pci */
- unsigned char number; /* bus number */ - unsigned char primary; /* number of primary bridge */ + unsigned char number; /* Bus number */ + unsigned char primary; /* Number of primary bridge */ unsigned char max_bus_speed; /* enum pci_bus_speed */ unsigned char cur_bus_speed; /* enum pci_bus_speed */ #ifdef CONFIG_PCI_DOMAINS_GENERIC @@@ -552,12 -555,12 +552,12 @@@
char name[48];
- unsigned short bridge_ctl; /* manage NO_ISA/FBB/et al behaviors */ - pci_bus_flags_t bus_flags; /* inherited by child buses */ + unsigned short bridge_ctl; /* Manage NO_ISA/FBB/et al behaviors */ + pci_bus_flags_t bus_flags; /* Inherited by child buses */ struct device *bridge; struct device dev; - struct bin_attribute *legacy_io; /* legacy I/O for this bus */ - struct bin_attribute *legacy_mem; /* legacy mem */ + struct bin_attribute *legacy_io; /* Legacy I/O for this bus */ + struct bin_attribute *legacy_mem; /* Legacy mem */ unsigned int is_added:1; };
@@@ -614,7 -617,9 +614,7 @@@ static inline bool pci_dev_msi_enabled( static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; } #endif
-/* - * Error values that may be returned by PCI functions. - */ +/* Error values that may be returned by PCI functions */ #define PCIBIOS_SUCCESSFUL 0x00 #define PCIBIOS_FUNC_NOT_SUPPORTED 0x81 #define PCIBIOS_BAD_VENDOR_ID 0x83 @@@ -623,7 -628,9 +623,7 @@@ #define PCIBIOS_SET_FAILED 0x88 #define PCIBIOS_BUFFER_TOO_SMALL 0x89
-/* - * Translate above to generic errno for passing back through non-PCI code. - */ +/* Translate above to generic errno for passing back through non-PCI code */ static inline int pcibios_err_to_errno(int err) { if (err <= PCIBIOS_SUCCESSFUL) @@@ -673,13 -680,13 +673,13 @@@ typedef u32 pci_bus_addr_t #endif
struct pci_bus_region { - pci_bus_addr_t start; - pci_bus_addr_t end; + pci_bus_addr_t start; + pci_bus_addr_t end; };
struct pci_dynids { - spinlock_t lock; /* protects list, index */ - struct list_head list; /* for IDs added at runtime */ + spinlock_t lock; /* Protects list, index */ + struct list_head list; /* For IDs added at runtime */ };
@@@ -693,13 -700,13 +693,13 @@@ typedef unsigned int __bitwise pci_ers_result_t;
enum pci_ers_result { - /* no result/none/not supported in device driver */ + /* No result/none/not supported in device driver */ PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1,
/* Device driver can recover without slot reset */ PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2,
- /* Device driver wants slot to be reset. */ + /* Device driver wants slot to be reset */ PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3,
/* Device has completely failed, is unrecoverable */ @@@ -735,27 -742,27 +735,27 @@@ struct pci_error_handlers
struct module; struct pci_driver { - struct list_head node; - const char *name; - const struct pci_device_id *id_table; /* must be non-NULL for probe to be called */ - int (*probe) (struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ - void (*remove) (struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ - int (*suspend) (struct pci_dev *dev, pm_message_t state); /* Device suspended */ - int (*suspend_late) (struct pci_dev *dev, pm_message_t state); - int (*resume_early) (struct pci_dev *dev); - int (*resume) (struct pci_dev *dev); /* Device woken up */ + struct list_head node; + const char *name; + const struct pci_device_id *id_table; /* Must be non-NULL for probe to be called */ + int (*probe)(struct pci_dev *dev, const struct pci_device_id *id); /* New device inserted */ + void (*remove)(struct pci_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ + int (*suspend)(struct pci_dev *dev, pm_message_t state); /* Device suspended */ + int (*suspend_late)(struct pci_dev *dev, pm_message_t state); + int (*resume_early)(struct pci_dev *dev); + int (*resume) (struct pci_dev *dev); /* Device woken up */ void (*shutdown) (struct pci_dev *dev); - int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */ + int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* On PF */ const struct pci_error_handlers *err_handler; const struct attribute_group **groups; struct device_driver driver; - struct pci_dynids dynids; + struct pci_dynids dynids; };
#define to_pci_driver(drv) container_of(drv, struct pci_driver, driver)
/** - * PCI_DEVICE - macro used to describe a specific pci device + * PCI_DEVICE - macro used to describe a specific PCI device * @vend: the 16 bit PCI Vendor ID * @dev: the 16 bit PCI Device ID * @@@ -768,7 -775,7 +768,7 @@@ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
/** - * PCI_DEVICE_SUB - macro used to describe a specific pci device with subsystem + * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem * @vend: the 16 bit PCI Vendor ID * @dev: the 16 bit PCI Device ID * @subvend: the 16 bit PCI Subvendor ID @@@ -782,7 -789,7 +782,7 @@@ .subvendor = (subvend), .subdevice = (subdev)
/** - * PCI_DEVICE_CLASS - macro used to describe a specific pci device class + * PCI_DEVICE_CLASS - macro used to describe a specific PCI device class * @dev_class: the class, subclass, prog-if triple for this device * @dev_class_mask: the class mask for this device * @@@ -796,7 -803,7 +796,7 @@@ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
/** - * PCI_VDEVICE - macro used to describe a specific pci device in short form + * PCI_VDEVICE - macro used to describe a specific PCI device in short form * @vend: the vendor name * @dev: the 16 bit PCI Device ID * @@@ -805,21 -812,22 +805,21 @@@ * to PCI_ANY_ID. The macro allows the next field to follow as the device * private data. */ - #define PCI_VDEVICE(vend, dev) \ .vendor = PCI_VENDOR_ID_##vend, .device = (dev), \ .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
enum { - PCI_REASSIGN_ALL_RSRC = 0x00000001, /* ignore firmware setup */ - PCI_REASSIGN_ALL_BUS = 0x00000002, /* reassign all bus numbers */ - PCI_PROBE_ONLY = 0x00000004, /* use existing setup */ - PCI_CAN_SKIP_ISA_ALIGN = 0x00000008, /* don't do ISA alignment */ - PCI_ENABLE_PROC_DOMAINS = 0x00000010, /* enable domains in /proc */ + PCI_REASSIGN_ALL_RSRC = 0x00000001, /* Ignore firmware setup */ + PCI_REASSIGN_ALL_BUS = 0x00000002, /* Reassign all bus numbers */ + PCI_PROBE_ONLY = 0x00000004, /* Use existing setup */ + PCI_CAN_SKIP_ISA_ALIGN = 0x00000008, /* Don't do ISA alignment */ + PCI_ENABLE_PROC_DOMAINS = 0x00000010, /* Enable domains in /proc */ PCI_COMPAT_DOMAIN_0 = 0x00000020, /* ... except domain 0 */ - PCI_SCAN_ALL_PCIE_DEVS = 0x00000040, /* scan all, not just dev 0 */ + PCI_SCAN_ALL_PCIE_DEVS = 0x00000040, /* Scan all, not just dev 0 */ };
-/* these external functions are only available when PCI support is enabled */ +/* These external functions are only available when PCI support is enabled */ #ifdef CONFIG_PCI
extern unsigned int pci_flags; @@@ -832,11 -840,11 +832,11 @@@ static inline int pci_has_flag(int flag void pcie_bus_configure_settings(struct pci_bus *bus);
enum pcie_bus_config_types { - PCIE_BUS_TUNE_OFF, /* don't touch MPS at all */ - PCIE_BUS_DEFAULT, /* ensure MPS matches upstream bridge */ - PCIE_BUS_SAFE, /* use largest MPS boot-time devices support */ - PCIE_BUS_PERFORMANCE, /* use MPS and MRRS for best performance */ - PCIE_BUS_PEER2PEER, /* set MPS = 128 for all devices */ + PCIE_BUS_TUNE_OFF, /* Don't touch MPS at all */ + PCIE_BUS_DEFAULT, /* Ensure MPS matches upstream bridge */ + PCIE_BUS_SAFE, /* Use largest MPS boot-time devices support */ + PCIE_BUS_PERFORMANCE, /* Use MPS and MRRS for best performance */ + PCIE_BUS_PEER2PEER, /* Set MPS = 128 for all devices */ };
extern enum pcie_bus_config_types pcie_bus_config; @@@ -845,7 -853,7 +845,7 @@@ extern struct bus_type pci_bus_type
/* Do NOT directly access these two variables, unless you are arch-specific PCI * code, or PCI core code. */ -extern struct list_head pci_root_buses; /* list of all known PCI buses */ +extern struct list_head pci_root_buses; /* List of all known PCI buses */ /* Some device drivers need know if PCI is initiated */ int no_pci_devices(void);
@@@ -883,8 -891,8 +883,8 @@@ int pci_bus_insert_busn_res(struct pci_ int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax); void pci_bus_release_busn_res(struct pci_bus *b); struct pci_bus *pci_scan_root_bus(struct device *parent, int bus, - struct pci_ops *ops, void *sysdata, - struct list_head *resources); + struct pci_ops *ops, void *sysdata, + struct list_head *resources); int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge); struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev, int busnr); @@@ -941,10 -949,10 +941,10 @@@ int pci_find_next_ht_capability(struct struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device, - struct pci_dev *from); + struct pci_dev *from); struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device, - unsigned int ss_vendor, unsigned int ss_device, - struct pci_dev *from); + unsigned int ss_vendor, unsigned int ss_device, + struct pci_dev *from); struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn); struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, unsigned int devfn); @@@ -1020,7 -1028,7 +1020,7 @@@ static inline int pcie_capability_clear return pcie_capability_clear_and_set_dword(dev, pos, clear, 0); }
-/* user-space driven config access */ +/* User-space driven config access */ int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val); int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val); int pci_user_read_config_dword(struct pci_dev *dev, int where, u32 *val); @@@ -1064,6 -1072,7 +1064,7 @@@ int pci_set_pcie_reset_state(struct pci int pci_set_cacheline_size(struct pci_dev *dev); #define HAVE_PCI_SET_MWI int __must_check pci_set_mwi(struct pci_dev *dev); + int __must_check pcim_set_mwi(struct pci_dev *dev); int pci_try_set_mwi(struct pci_dev *dev); void pci_clear_mwi(struct pci_dev *dev); void pci_intx(struct pci_dev *dev, int enable); @@@ -1162,7 -1171,7 +1163,7 @@@ unsigned int pci_rescan_bus(struct pci_ void pci_lock_rescan_remove(void); void pci_unlock_rescan_remove(void);
-/* Vital product data routines */ +/* Vital Product Data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); int pci_set_vpd_size(struct pci_dev *dev, size_t len); @@@ -1247,7 -1256,9 +1248,7 @@@ static inline pci_bus_addr_t pci_bus_ad int __must_check __pci_register_driver(struct pci_driver *, struct module *, const char *mod_name);
-/* - * pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded - */ +/* pci_register_driver() must be a macro so KBUILD_MODNAME can be expanded */ #define pci_register_driver(driver) \ __pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
@@@ -1262,7 -1273,8 +1263,7 @@@ void pci_unregister_driver(struct pci_d * use this macro once, and calling it replaces module_init() and module_exit() */ #define module_pci_driver(__pci_driver) \ - module_driver(__pci_driver, pci_register_driver, \ - pci_unregister_driver) + module_driver(__pci_driver, pci_register_driver, pci_unregister_driver)
/** * builtin_pci_driver() - Helper macro for registering a PCI driver @@@ -1301,10 -1313,10 +1302,10 @@@ resource_size_t pcibios_iov_resource_al int pci_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags);
-#define PCI_IRQ_LEGACY (1 << 0) /* allow legacy interrupts */ -#define PCI_IRQ_MSI (1 << 1) /* allow MSI interrupts */ -#define PCI_IRQ_MSIX (1 << 2) /* allow MSI-X interrupts */ -#define PCI_IRQ_AFFINITY (1 << 3) /* auto-assign affinity */ +#define PCI_IRQ_LEGACY (1 << 0) /* Allow legacy interrupts */ +#define PCI_IRQ_MSI (1 << 1) /* Allow MSI interrupts */ +#define PCI_IRQ_MSIX (1 << 2) /* Allow MSI-X interrupts */ +#define PCI_IRQ_AFFINITY (1 << 3) /* Auto-assign affinity */ #define PCI_IRQ_ALL_TYPES \ (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
@@@ -1323,8 -1335,8 +1324,8 @@@ #define pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr)
struct msix_entry { - u32 vector; /* kernel uses to write allocated vector */ - u16 entry; /* driver uses to specify entry, OS writes */ + u32 vector; /* Kernel uses to write allocated vector */ + u16 entry; /* Driver uses to specify entry, OS writes */ };
#ifdef CONFIG_PCI_MSI @@@ -1364,10 -1376,10 +1365,10 @@@ static inline int pci_msi_enabled(void static inline int pci_enable_msi(struct pci_dev *dev) { return -ENOSYS; } static inline int pci_enable_msix_range(struct pci_dev *dev, - struct msix_entry *entries, int minvec, int maxvec) + struct msix_entry *entries, int minvec, int maxvec) { return -ENOSYS; } static inline int pci_enable_msix_exact(struct pci_dev *dev, - struct msix_entry *entries, int nvec) + struct msix_entry *entries, int nvec) { return -ENOSYS; }
static inline int @@@ -1532,9 -1544,9 +1533,9 @@@ static inline int acpi_pci_bus_find_dom int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent); #endif
-/* some architectures require additional setup to direct VGA traffic */ +/* Some architectures require additional setup to direct VGA traffic */ typedef int (*arch_set_vga_state_t)(struct pci_dev *pdev, bool decode, - unsigned int command_bits, u32 flags); + unsigned int command_bits, u32 flags); void pci_register_set_vga_state(arch_set_vga_state_t func);
static inline int @@@ -1573,9 -1585,10 +1574,9 @@@ static inline void pci_clear_flags(int static inline int pci_has_flag(int flag) { return 0; }
/* - * If the system does not have PCI, clearly these return errors. Define - * these as simple inline functions to avoid hair in drivers. + * If the system does not have PCI, clearly these return errors. Define + * these as simple inline functions to avoid hair in drivers. */ - #define _PCI_NOP(o, s, t) \ static inline int pci_##o##_config_##s(struct pci_dev *dev, \ int where, t val) \ @@@ -1714,10 -1727,8 +1715,10 @@@ int pci_iobar_pfn(struct pci_dev *pdev #define pci_root_bus_fwnode(bus) NULL #endif
-/* these helpers provide future and backwards compatibility - * for accessing popular PCI BAR info */ +/* + * These helpers provide future and backwards compatibility + * for accessing popular PCI BAR info + */ #define pci_resource_start(dev, bar) ((dev)->resource[(bar)].start) #define pci_resource_end(dev, bar) ((dev)->resource[(bar)].end) #define pci_resource_flags(dev, bar) ((dev)->resource[(bar)].flags) @@@ -1729,8 -1740,7 +1730,8 @@@ (pci_resource_end((dev), (bar)) - \ pci_resource_start((dev), (bar)) + 1))
-/* Similar to the helpers above, these manipulate per-pci_dev +/* + * Similar to the helpers above, these manipulate per-pci_dev * driver-specific data. They are really just a wrapper around * the generic device structure functions of these calls. */ @@@ -1744,14 -1754,16 +1745,14 @@@ static inline void pci_set_drvdata(stru dev_set_drvdata(&pdev->dev, data); }
-/* If you want to know what to call your pci_dev, ask this function. - * Again, it's a wrapper around the generic device. - */ static inline const char *pci_name(const struct pci_dev *pdev) { return dev_name(&pdev->dev); }
-/* Some archs don't want to expose struct resource to userland as-is +/* + * Some archs don't want to expose struct resource to userland as-is * in sysfs and /proc */ #ifdef HAVE_ARCH_PCI_RESOURCE_TO_USER @@@ -1770,16 -1782,16 +1771,16 @@@ static inline void pci_resource_to_user
/* - * The world is not perfect and supplies us with broken PCI devices. - * For at least a part of these bugs we need a work-around, so both - * generic (drivers/pci/quirks.c) and per-architecture code can define - * fixup hooks to be called for particular buggy devices. + * The world is not perfect and supplies us with broken PCI devices. + * For at least a part of these bugs we need a work-around, so both + * generic (drivers/pci/quirks.c) and per-architecture code can define + * fixup hooks to be called for particular buggy devices. */
struct pci_fixup { - u16 vendor; /* You can use PCI_ANY_ID here of course */ - u16 device; /* You can use PCI_ANY_ID here of course */ - u32 class; /* You can use PCI_ANY_ID here too */ + u16 vendor; /* Or PCI_ANY_ID */ + u16 device; /* Or PCI_ANY_ID */ + u32 class; /* Or PCI_ANY_ID */ unsigned int class_shift; /* should be 0, 8, 16 */ void (*hook)(struct pci_dev *dev); }; @@@ -1821,19 -1833,23 +1822,19 @@@ enum pci_fixup_pass #define DECLARE_PCI_FIXUP_CLASS_RESUME(vendor, device, class, \ class_shift, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ - resume##hook, vendor, device, class, \ - class_shift, hook) + resume##hook, vendor, device, class, class_shift, hook) #define DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(vendor, device, class, \ class_shift, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ - resume_early##hook, vendor, device, \ - class, class_shift, hook) + resume_early##hook, vendor, device, class, class_shift, hook) #define DECLARE_PCI_FIXUP_CLASS_SUSPEND(vendor, device, class, \ class_shift, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ - suspend##hook, vendor, device, class, \ - class_shift, hook) + suspend##hook, vendor, device, class, class_shift, hook) #define DECLARE_PCI_FIXUP_CLASS_SUSPEND_LATE(vendor, device, class, \ class_shift, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late, \ - suspend_late##hook, vendor, device, \ - class, class_shift, hook) + suspend_late##hook, vendor, device, class, class_shift, hook)
#define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early, \ @@@ -1849,16 -1865,20 +1850,16 @@@ hook, vendor, device, PCI_ANY_ID, 0, hook) #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume, \ - resume##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + resume##hook, vendor, device, PCI_ANY_ID, 0, hook) #define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early, \ - resume_early##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + resume_early##hook, vendor, device, PCI_ANY_ID, 0, hook) #define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend, \ - suspend##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + suspend##hook, vendor, device, PCI_ANY_ID, 0, hook) #define DECLARE_PCI_FIXUP_SUSPEND_LATE(vendor, device, hook) \ DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late, \ - suspend_late##hook, vendor, device, \ - PCI_ANY_ID, 0, hook) + suspend_late##hook, vendor, device, PCI_ANY_ID, 0, hook)
#ifdef CONFIG_PCI_QUIRKS void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev); @@@ -1945,7 -1965,6 +1946,7 @@@ int pci_vfs_assigned(struct pci_dev *de int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); int pci_sriov_get_totalvfs(struct pci_dev *dev); resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); +void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); #else static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id) { @@@ -1973,7 -1992,6 +1974,7 @@@ static inline int pci_sriov_get_totalvf { return 0; } static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) { return 0; } +static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } #endif
#if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE) @@@ -2095,7 -2113,7 +2096,7 @@@ static inline u16 pci_vpd_lrdt_size(con */ static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt) { - return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK); + return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK); }
/** @@@ -2180,7 -2198,7 +2181,7 @@@ static inline struct device_node *pci_b return bus ? bus->dev.of_node : NULL; }
-#else /* CONFIG_OF */ +#else /* CONFIG_OF */ static inline void pci_set_of_node(struct pci_dev *dev) { } static inline void pci_release_of_node(struct pci_dev *dev) { } static inline void pci_set_bus_of_node(struct pci_bus *bus) { } @@@ -2189,7 -2207,7 +2190,7 @@@ static inline struct device_node pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; } static inline struct irq_domain * pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; } -#endif /* CONFIG_OF */ +#endif /* CONFIG_OF */
#ifdef CONFIG_ACPI struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus); @@@ -2214,7 -2232,7 +2215,7 @@@ int pci_for_each_dma_alias(struct pci_d int (*fn)(struct pci_dev *pdev, u16 alias, void *data), void *data);
-/* helper functions for operation of device flag */ +/* Helper functions for operation of device flag */ static inline void pci_set_dev_assigned(struct pci_dev *pdev) { pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; @@@ -2261,7 -2279,7 +2262,7 @@@ static inline bool pci_is_thunderbolt_a return false; }
-/* provide the legacy pci_dma_* API */ +/* Provide the legacy pci_dma_* API */ #include <linux/pci-dma-compat.h>
#endif /* LINUX_PCI_H */ diff --combined include/linux/skbuff.h index a87e43d16f44,b8e0da6c27d6..ac89a93b7c83 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@@ -1211,6 -1211,11 +1211,11 @@@ static inline bool skb_flow_dissect_flo data, proto, nhoff, hlen, flags); }
+ void + skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container); + static inline __u32 skb_get_hash(struct sk_buff *skb) { if (!skb->l4_hash && !skb->sw_hash) @@@ -3241,7 -3246,7 +3246,7 @@@ struct sk_buff *__skb_recv_datagram(str int *peeked, int *off, int *err); struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); -unsigned int datagram_poll(struct file *file, struct socket *sock, +__poll_t datagram_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int skb_copy_datagram_iter(const struct sk_buff *from, int offset, struct iov_iter *to, int size); diff --combined include/net/inet_connection_sock.h index ec72cdb5bc39,8e1bf9ae4a5e..6692d67e9245 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@@ -77,6 -77,7 +77,7 @@@ struct inet_connection_sock_af_ops * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event @@@ -101,6 -102,7 +102,7 @@@ struct inet_connection_sock const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, icsk_ca_setsockopt:1, @@@ -305,7 -307,7 +307,7 @@@ void inet_csk_prepare_forced_close(stru /* * LISTEN is a special case for poll.. */ -static inline unsigned int inet_csk_listen_poll(const struct sock *sk) +static inline __poll_t inet_csk_listen_poll(const struct sock *sk) { return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? (POLLIN | POLLRDNORM) : 0; diff --combined include/net/sctp/sctp.h index 608d123ef25f,20c0c1be2ca7..f7ae6b0a21d0 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@@ -107,7 -107,7 +107,7 @@@ int sctp_backlog_rcv(struct sock *sk, s int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); void sctp_data_ready(struct sock *sk); -unsigned int sctp_poll(struct file *file, struct socket *sock, +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait); void sctp_sock_rfree(struct sk_buff *skb); void sctp_copy_sock(struct sock *newsk, struct sock *sk, @@@ -116,7 -116,7 +116,7 @@@ extern struct percpu_counter sctp_socke int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *); struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
- int sctp_transport_walk_start(struct rhashtable_iter *iter); + void sctp_transport_walk_start(struct rhashtable_iter *iter); void sctp_transport_walk_stop(struct rhashtable_iter *iter); struct sctp_transport *sctp_transport_get_next(struct net *net, struct rhashtable_iter *iter); @@@ -444,13 -444,13 +444,13 @@@ static inline int sctp_frag_point(cons int frag = pmtu;
frag -= sp->pf->af->net_header_len; - frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk); + frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag);
frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - - sizeof(struct sctp_data_chunk))); + sctp_datachk_len(&asoc->stream)));
return frag; } diff --combined include/net/sock.h index 4fd74e0d1bbb,73b7830b0bb8..0752f034f1bf --- a/include/net/sock.h +++ b/include/net/sock.h @@@ -72,6 -72,7 +72,7 @@@ #include <net/tcp_states.h> #include <linux/net_tstamp.h> #include <net/smc.h> + #include <net/l3mdev.h>
/* * This structure really needs to be cleaned up. @@@ -1262,6 -1263,7 +1263,7 @@@ proto_memory_pressure(struct proto *pro /* Called with local bh disabled */ void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); int sock_prot_inuse_get(struct net *net, struct proto *proto); + int sock_inuse_get(struct net *net); #else static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc) @@@ -1583,7 -1585,7 +1585,7 @@@ int sock_no_connect(struct socket *, st int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, int, bool); int sock_no_getname(struct socket *, struct sockaddr *, int *, int); -unsigned int sock_no_poll(struct file *, struct socket *, +__poll_t sock_no_poll(struct file *, struct socket *, struct poll_table_struct *); int sock_no_ioctl(struct socket *, unsigned int, unsigned long); int sock_no_listen(struct socket *, int); @@@ -2337,31 -2339,6 +2339,6 @@@ static inline bool sk_listener(const st return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV); }
- /** - * sk_state_load - read sk->sk_state for lockless contexts - * @sk: socket pointer - * - * Paired with sk_state_store(). Used in places we do not hold socket lock : - * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ... - */ - static inline int sk_state_load(const struct sock *sk) - { - return smp_load_acquire(&sk->sk_state); - } - - /** - * sk_state_store - update sk->sk_state - * @sk: socket pointer - * @newstate: new state - * - * Paired with sk_state_load(). Should be used in contexts where - * state change might impact lockless readers. - */ - static inline void sk_state_store(struct sock *sk, int newstate) - { - smp_store_release(&sk->sk_state, newstate); - } - void sock_enable_timestamp(struct sock *sk, int flag); int sock_get_timestamp(struct sock *, struct timeval __user *); int sock_get_timestampns(struct sock *, struct timespec __user *); @@@ -2412,4 -2389,34 +2389,34 @@@ static inline int sk_get_rmem0(const st return *proto->sysctl_rmem; }
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10) + * Some wifi drivers need to tweak it to get more chunks. + * They can use this helper from their ndo_start_xmit() + */ + static inline void sk_pacing_shift_update(struct sock *sk, int val) + { + if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val) + return; + sk->sk_pacing_shift = val; + } + + /* if a socket is bound to a device, check that the given device + * index is either the same or that the socket is bound to an L3 + * master device and the given device index is also enslaved to + * that L3 master + */ + static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif) + { + int mdif; + + if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif) + return true; + + mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif); + if (mdif && mdif == sk->sk_bound_dev_if) + return true; + + return false; + } + #endif /* _SOCK_H */ diff --combined include/net/tcp.h index 50b21a49d870,6939e69d3c37..26c2793846a1 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@@ -387,7 -387,7 +387,7 @@@ bool tcp_peer_is_proven(struct request_ void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); -unsigned int tcp_poll(struct file *file, struct socket *sock, +__poll_t tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); @@@ -1507,8 -1507,7 +1507,7 @@@ int tcp_md5_hash_key(struct tcp_md5sig_
/* From tcp_fastopen.c */ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, - struct tcp_fastopen_cookie *cookie, int *syn_loss, - unsigned long *last_syn_loss); + struct tcp_fastopen_cookie *cookie); void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_cookie *cookie, bool syn_lost, u16 try_exp); @@@ -1546,7 -1545,7 +1545,7 @@@ extern unsigned int sysctl_tcp_fastopen void tcp_fastopen_active_disable(struct sock *sk); bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); - void tcp_fastopen_active_timeout_reset(void); + void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
/* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. @@@ -2011,10 -2010,12 +2010,12 @@@ static inline int tcp_call_bpf(struct s struct bpf_sock_ops_kern sock_ops; int ret;
- if (sk_fullsock(sk)) + memset(&sock_ops, 0, sizeof(sock_ops)); + if (sk_fullsock(sk)) { + sock_ops.is_fullsock = 1; sock_owned_by_me(sk); + }
- memset(&sock_ops, 0, sizeof(sock_ops)); sock_ops.sk = sk; sock_ops.op = op;
diff --combined kernel/bpf/sockmap.c index 1712d319c2d8,3f662ee23a34..079968680bc3 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@@ -96,14 -96,6 +96,6 @@@ static inline struct smap_psock *smap_p return rcu_dereference_sk_user_data(sk); }
- /* compute the linear packet data range [data, data_end) for skb when - * sk_skb type programs are in use. - */ - static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb) - { - TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb); - } - enum __sk_action { __SK_DROP = 0, __SK_PASS, @@@ -591,15 -583,8 +583,15 @@@ static void sock_map_free(struct bpf_ma
write_lock_bh(&sock->sk_callback_lock); psock = smap_psock_sk(sock); - smap_list_remove(psock, &stab->sock_map[i]); - smap_release_sock(psock, sock); + /* This check handles a racing sock event that can get the + * sk_callback_lock before this case but after xchg happens + * causing the refcnt to hit zero and sock user data (psock) + * to be null and queued for garbage collection. + */ + if (likely(psock)) { + smap_list_remove(psock, &stab->sock_map[i]); + smap_release_sock(psock, sock); + } write_unlock_bh(&sock->sk_callback_lock); } rcu_read_unlock(); diff --combined kernel/bpf/verifier.c index 20eb04fd155e,3b2b47666180..281e7528b822 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@@ -20,6 -20,8 +20,8 @@@ #include <linux/file.h> #include <linux/vmalloc.h> #include <linux/stringify.h> + #include <linux/bsearch.h> + #include <linux/sort.h>
#include "disasm.h"
@@@ -167,11 -169,11 +169,11 @@@ struct bpf_call_arg_meta static DEFINE_MUTEX(bpf_verifier_lock);
/* log_level controls verbosity level of eBPF verifier. - * verbose() is used to dump the verification trace to the log, so the user - * can figure out what's wrong with the program + * bpf_verifier_log_write() is used to dump the verification trace to the log, + * so the user can figure out what's wrong with the program */ - static __printf(2, 3) void verbose(struct bpf_verifier_env *env, - const char *fmt, ...) + __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, + const char *fmt, ...) { struct bpf_verifer_log *log = &env->log; unsigned int n; @@@ -195,6 -197,14 +197,14 @@@ else log->ubuf = NULL; } + EXPORT_SYMBOL_GPL(bpf_verifier_log_write); + /* Historically bpf_verifier_log_write was called verbose, but the name was too + * generic for symbol export. The function was renamed, but not the calls in + * the verifier to avoid complicating backports. Hence the alias below. + */ + static __printf(2, 3) void verbose(struct bpf_verifier_env *env, + const char *fmt, ...) + __attribute__((alias("bpf_verifier_log_write")));
static bool type_is_pkt_pointer(enum bpf_reg_type type) { @@@ -216,23 -226,48 +226,48 @@@ static const char * const reg_type_str[ [PTR_TO_PACKET_END] = "pkt_end", };
+ static void print_liveness(struct bpf_verifier_env *env, + enum bpf_reg_liveness live) + { + if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN)) + verbose(env, "_"); + if (live & REG_LIVE_READ) + verbose(env, "r"); + if (live & REG_LIVE_WRITTEN) + verbose(env, "w"); + } + + static struct bpf_func_state *func(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg) + { + struct bpf_verifier_state *cur = env->cur_state; + + return cur->frame[reg->frameno]; + } + static void print_verifier_state(struct bpf_verifier_env *env, - struct bpf_verifier_state *state) + const struct bpf_func_state *state) { - struct bpf_reg_state *reg; + const struct bpf_reg_state *reg; enum bpf_reg_type t; int i;
+ if (state->frameno) + verbose(env, " frame%d:", state->frameno); for (i = 0; i < MAX_BPF_REG; i++) { reg = &state->regs[i]; t = reg->type; if (t == NOT_INIT) continue; - verbose(env, " R%d=%s", i, reg_type_str[t]); + verbose(env, " R%d", i); + print_liveness(env, reg->live); + verbose(env, "=%s", reg_type_str[t]); if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && tnum_is_const(reg->var_off)) { /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); + if (t == PTR_TO_STACK) + verbose(env, ",call_%d", func(env, reg)->callsite); } else { verbose(env, "(id=%d", reg->id); if (t != SCALAR_VALUE) @@@ -277,16 -312,21 +312,21 @@@ } } for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] == STACK_SPILL) - verbose(env, " fp%d=%s", - -MAX_BPF_STACK + i * BPF_REG_SIZE, + if (state->stack[i].slot_type[0] == STACK_SPILL) { + verbose(env, " fp%d", + (-i - 1) * BPF_REG_SIZE); + print_liveness(env, state->stack[i].spilled_ptr.live); + verbose(env, "=%s", reg_type_str[state->stack[i].spilled_ptr.type]); + } + if (state->stack[i].slot_type[0] == STACK_ZERO) + verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE); } verbose(env, "\n"); }
- static int copy_stack_state(struct bpf_verifier_state *dst, - const struct bpf_verifier_state *src) + static int copy_stack_state(struct bpf_func_state *dst, + const struct bpf_func_state *src) { if (!src->stack) return 0; @@@ -302,13 -342,13 +342,13 @@@
/* do_check() starts with zero-sized stack in struct bpf_verifier_state to * make it consume minimal amount of memory. check_stack_write() access from - * the program calls into realloc_verifier_state() to grow the stack size. + * the program calls into realloc_func_state() to grow the stack size. * Note there is a non-zero 'parent' pointer inside bpf_verifier_state * which this function copies over. It points to previous bpf_verifier_state * which is never reallocated */ - static int realloc_verifier_state(struct bpf_verifier_state *state, int size, - bool copy_old) + static int realloc_func_state(struct bpf_func_state *state, int size, + bool copy_old) { u32 old_size = state->allocated_stack; struct bpf_stack_state *new_stack; @@@ -341,10 -381,23 +381,23 @@@ return 0; }
+ static void free_func_state(struct bpf_func_state *state) + { + if (!state) + return; + kfree(state->stack); + kfree(state); + } + static void free_verifier_state(struct bpf_verifier_state *state, bool free_self) { - kfree(state->stack); + int i; + + for (i = 0; i <= state->curframe; i++) { + free_func_state(state->frame[i]); + state->frame[i] = NULL; + } if (free_self) kfree(state); } @@@ -352,18 -405,46 +405,46 @@@ /* copy verifier state from src to dst growing dst stack space * when necessary to accommodate larger src stack */ - static int copy_verifier_state(struct bpf_verifier_state *dst, - const struct bpf_verifier_state *src) + static int copy_func_state(struct bpf_func_state *dst, + const struct bpf_func_state *src) { int err;
- err = realloc_verifier_state(dst, src->allocated_stack, false); + err = realloc_func_state(dst, src->allocated_stack, false); if (err) return err; - memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack)); + memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack)); return copy_stack_state(dst, src); }
+ static int copy_verifier_state(struct bpf_verifier_state *dst_state, + const struct bpf_verifier_state *src) + { + struct bpf_func_state *dst; + int i, err; + + /* if dst has more stack frames then src frame, free them */ + for (i = src->curframe + 1; i <= dst_state->curframe; i++) { + free_func_state(dst_state->frame[i]); + dst_state->frame[i] = NULL; + } + dst_state->curframe = src->curframe; + dst_state->parent = src->parent; + for (i = 0; i <= src->curframe; i++) { + dst = dst_state->frame[i]; + if (!dst) { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return -ENOMEM; + dst_state->frame[i] = dst; + } + err = copy_func_state(dst, src->frame[i]); + if (err) + return err; + } + return 0; + } + static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, int *insn_idx) { @@@ -416,6 -497,8 +497,8 @@@ static struct bpf_verifier_state *push_ } return &elem->st; err: + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; /* pop all elements and return */ while (!pop_stack(env, NULL, NULL)); return NULL; @@@ -425,6 -508,10 +508,10 @@@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; + #define CALLEE_SAVED_REGS 5 + static const int callee_saved[CALLEE_SAVED_REGS] = { + BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9 + };
static void __mark_reg_not_init(struct bpf_reg_state *reg);
@@@ -449,6 -536,13 +536,13 @@@ static void __mark_reg_known_zero(struc __mark_reg_known(reg, 0); }
+ static void __mark_reg_const_zero(struct bpf_reg_state *reg) + { + __mark_reg_known(reg, 0); + reg->off = 0; + reg->type = SCALAR_VALUE; + } + static void mark_reg_known_zero(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno) { @@@ -560,6 -654,7 +654,7 @@@ static void __mark_reg_unknown(struct b reg->id = 0; reg->off = 0; reg->var_off = tnum_unknown; + reg->frameno = 0; __mark_reg_unbounded(reg); }
@@@ -568,8 -663,8 +663,8 @@@ static void mark_reg_unknown(struct bpf { if (WARN_ON(regno >= MAX_BPF_REG)) { verbose(env, "mark_reg_unknown(regs, %u)\n", regno); - /* Something bad happened, let's kill all regs */ - for (regno = 0; regno < MAX_BPF_REG; regno++) + /* Something bad happened, let's kill all regs except FP */ + for (regno = 0; regno < BPF_REG_FP; regno++) __mark_reg_not_init(regs + regno); return; } @@@ -587,8 -682,8 +682,8 @@@ static void mark_reg_not_init(struct bp { if (WARN_ON(regno >= MAX_BPF_REG)) { verbose(env, "mark_reg_not_init(regs, %u)\n", regno); - /* Something bad happened, let's kill all regs */ - for (regno = 0; regno < MAX_BPF_REG; regno++) + /* Something bad happened, let's kill all regs except FP */ + for (regno = 0; regno < BPF_REG_FP; regno++) __mark_reg_not_init(regs + regno); return; } @@@ -596,8 -691,9 +691,9 @@@ }
static void init_reg_state(struct bpf_verifier_env *env, - struct bpf_reg_state *regs) + struct bpf_func_state *state) { + struct bpf_reg_state *regs = state->regs; int i;
for (i = 0; i < MAX_BPF_REG; i++) { @@@ -608,41 -704,218 +704,218 @@@ /* frame pointer */ regs[BPF_REG_FP].type = PTR_TO_STACK; mark_reg_known_zero(env, regs, BPF_REG_FP); + regs[BPF_REG_FP].frameno = state->frameno;
/* 1st arg to a function */ regs[BPF_REG_1].type = PTR_TO_CTX; mark_reg_known_zero(env, regs, BPF_REG_1); }
+ #define BPF_MAIN_FUNC (-1) + static void init_func_state(struct bpf_verifier_env *env, + struct bpf_func_state *state, + int callsite, int frameno, int subprogno) + { + state->callsite = callsite; + state->frameno = frameno; + state->subprogno = subprogno; + init_reg_state(env, state); + } + enum reg_arg_type { SRC_OP, /* register is used as source operand */ DST_OP, /* register is used as destination operand */ DST_OP_NO_MARK /* same as above, check only, don't mark */ };
- static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno) + static int cmp_subprogs(const void *a, const void *b) + { + return *(int *)a - *(int *)b; + } + + static int find_subprog(struct bpf_verifier_env *env, int off) + { + u32 *p; + + p = bsearch(&off, env->subprog_starts, env->subprog_cnt, + sizeof(env->subprog_starts[0]), cmp_subprogs); + if (!p) + return -ENOENT; + return p - env->subprog_starts; + + } + + static int add_subprog(struct bpf_verifier_env *env, int off) + { + int insn_cnt = env->prog->len; + int ret; + + if (off >= insn_cnt || off < 0) { + verbose(env, "call to invalid destination\n"); + return -EINVAL; + } + ret = find_subprog(env, off); + if (ret >= 0) + return 0; + if (env->subprog_cnt >= BPF_MAX_SUBPROGS) { + verbose(env, "too many subprograms\n"); + return -E2BIG; + } + env->subprog_starts[env->subprog_cnt++] = off; + sort(env->subprog_starts, env->subprog_cnt, + sizeof(env->subprog_starts[0]), cmp_subprogs, NULL); + return 0; + } + + static int check_subprogs(struct bpf_verifier_env *env) + { + int i, ret, subprog_start, subprog_end, off, cur_subprog = 0; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + + /* determine subprog starts. The end is one before the next starts */ + for (i = 0; i < insn_cnt; i++) { + if (insn[i].code != (BPF_JMP | BPF_CALL)) + continue; + if (insn[i].src_reg != BPF_PSEUDO_CALL) + continue; + if (!env->allow_ptr_leaks) { + verbose(env, "function calls to other bpf functions are allowed for root only\n"); + return -EPERM; + } + if (bpf_prog_is_dev_bound(env->prog->aux)) { + verbose(env, "function calls in offloaded programs are not supported yet\n"); + return -EINVAL; + } + ret = add_subprog(env, i + insn[i].imm + 1); + if (ret < 0) + return ret; + } + + if (env->log.level > 1) + for (i = 0; i < env->subprog_cnt; i++) + verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]); + + /* now check that all jumps are within the same subprog */ + subprog_start = 0; + if (env->subprog_cnt == cur_subprog) + subprog_end = insn_cnt; + else + subprog_end = env->subprog_starts[cur_subprog++]; + for (i = 0; i < insn_cnt; i++) { + u8 code = insn[i].code; + + if (BPF_CLASS(code) != BPF_JMP) + goto next; + if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) + goto next; + off = i + insn[i].off + 1; + if (off < subprog_start || off >= subprog_end) { + verbose(env, "jump out of range from insn %d to %d\n", i, off); + return -EINVAL; + } + next: + if (i == subprog_end - 1) { + /* to avoid fall-through from one subprog into another + * the last insn of the subprog should be either exit + * or unconditional jump back + */ + if (code != (BPF_JMP | BPF_EXIT) && + code != (BPF_JMP | BPF_JA)) { + verbose(env, "last insn is not an exit or jmp\n"); + return -EINVAL; + } + subprog_start = subprog_end; + if (env->subprog_cnt == cur_subprog) + subprog_end = insn_cnt; + else + subprog_end = env->subprog_starts[cur_subprog++]; + } + } + return 0; + } + + static + struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env, + const struct bpf_verifier_state *state, + struct bpf_verifier_state *parent, + u32 regno) + { + struct bpf_verifier_state *tmp = NULL; + + /* 'parent' could be a state of caller and + * 'state' could be a state of callee. In such case + * parent->curframe < state->curframe + * and it's ok for r1 - r5 registers + * + * 'parent' could be a callee's state after it bpf_exit-ed. + * In such case parent->curframe > state->curframe + * and it's ok for r0 only + */ + if (parent->curframe == state->curframe || + (parent->curframe < state->curframe && + regno >= BPF_REG_1 && regno <= BPF_REG_5) || + (parent->curframe > state->curframe && + regno == BPF_REG_0)) + return parent; + + if (parent->curframe > state->curframe && + regno >= BPF_REG_6) { + /* for callee saved regs we have to skip the whole chain + * of states that belong to callee and mark as LIVE_READ + * the registers before the call + */ + tmp = parent; + while (tmp && tmp->curframe != state->curframe) { + tmp = tmp->parent; + } + if (!tmp) + goto bug; + parent = tmp; + } else { + goto bug; + } + return parent; + bug: + verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp); + verbose(env, "regno %d parent frame %d current frame %d\n", + regno, parent->curframe, state->curframe); + return NULL; + } + + static int mark_reg_read(struct bpf_verifier_env *env, + const struct bpf_verifier_state *state, + struct bpf_verifier_state *parent, + u32 regno) { - struct bpf_verifier_state *parent = state->parent; + bool writes = parent == state->parent; /* Observe write marks */
if (regno == BPF_REG_FP) /* We don't need to worry about FP liveness because it's read-only */ - return; + return 0;
while (parent) { /* if read wasn't screened by an earlier write ... */ - if (state->regs[regno].live & REG_LIVE_WRITTEN) + if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN) break; + parent = skip_callee(env, state, parent, regno); + if (!parent) + return -EFAULT; /* ... then we depend on parent's value */ - parent->regs[regno].live |= REG_LIVE_READ; + parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ; state = parent; parent = state->parent; + writes = true; } + return 0; }
static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t) { - struct bpf_reg_state *regs = env->cur_state->regs; + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + struct bpf_reg_state *regs = state->regs;
if (regno >= MAX_BPF_REG) { verbose(env, "R%d is invalid\n", regno); @@@ -655,7 -928,7 +928,7 @@@ verbose(env, "R%d !read_ok\n", regno); return -EACCES; } - mark_reg_read(env->cur_state, regno); + return mark_reg_read(env, vstate, vstate->parent, regno); } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { @@@ -686,17 -959,25 +959,25 @@@ static bool is_spillable_regtype(enum b } }
+ /* Does this register contain a constant zero? */ + static bool register_is_null(struct bpf_reg_state *reg) + { + return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); + } + /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ static int check_stack_write(struct bpf_verifier_env *env, - struct bpf_verifier_state *state, int off, - int size, int value_regno) + struct bpf_func_state *state, /* func where register points to */ + int off, int size, int value_regno) { + struct bpf_func_state *cur; /* state of the current function */ int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; + enum bpf_reg_type type;
- err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE), - true); + err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), + true); if (err) return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@@ -709,8 -990,9 +990,9 @@@ return -EACCES; }
+ cur = env->cur_state->frame[env->cur_state->curframe]; if (value_regno >= 0 && - is_spillable_regtype(state->regs[value_regno].type)) { + is_spillable_regtype((type = cur->regs[value_regno].type))) {
/* register containing pointer is being spilled into stack */ if (size != BPF_REG_SIZE) { @@@ -718,51 -1000,116 +1000,116 @@@ return -EACCES; }
+ if (state != cur && type == PTR_TO_STACK) { + verbose(env, "cannot spill pointers to stack into stack frame of the caller\n"); + return -EINVAL; + } + /* save register state */ - state->stack[spi].spilled_ptr = state->regs[value_regno]; + state->stack[spi].spilled_ptr = cur->regs[value_regno]; state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
for (i = 0; i < BPF_REG_SIZE; i++) state->stack[spi].slot_type[i] = STACK_SPILL; } else { + u8 type = STACK_MISC; + /* regular write of data into stack */ state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
+ /* only mark the slot as written if all 8 bytes were written + * otherwise read propagation may incorrectly stop too soon + * when stack slots are partially written. + * This heuristic means that read propagation will be + * conservative, since it will add reg_live_read marks + * to stack slots all the way to first state when programs + * writes+reads less than 8 bytes + */ + if (size == BPF_REG_SIZE) + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; + + /* when we zero initialize stack slots mark them as such */ + if (value_regno >= 0 && + register_is_null(&cur->regs[value_regno])) + type = STACK_ZERO; + for (i = 0; i < size; i++) state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = - STACK_MISC; + type; } return 0; }
- static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slot) + /* registers of every function are unique and mark_reg_read() propagates + * the liveness in the following cases: + * - from callee into caller for R1 - R5 that were used as arguments + * - from caller into callee for R0 that used as result of the call + * - from caller to the same caller skipping states of the callee for R6 - R9, + * since R6 - R9 are callee saved by implicit function prologue and + * caller's R6 != callee's R6, so when we propagate liveness up to + * parent states we need to skip callee states for R6 - R9. + * + * stack slot marking is different, since stacks of caller and callee are + * accessible in both (since caller can pass a pointer to caller's stack to + * callee which can pass it to another function), hence mark_stack_slot_read() + * has to propagate the stack liveness to all parent states at given frame number. + * Consider code: + * f1() { + * ptr = fp - 8; + * *ptr = ctx; + * call f2 { + * .. = *ptr; + * } + * .. = *ptr; + * } + * First *ptr is reading from f1's stack and mark_stack_slot_read() has + * to mark liveness at the f1's frame and not f2's frame. + * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has + * to propagate liveness to f2 states at f1's frame level and further into + * f1 states at f1's frame level until write into that stack slot + */ + static void mark_stack_slot_read(struct bpf_verifier_env *env, + const struct bpf_verifier_state *state, + struct bpf_verifier_state *parent, + int slot, int frameno) { - struct bpf_verifier_state *parent = state->parent; + bool writes = parent == state->parent; /* Observe write marks */
while (parent) { + if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE) + /* since LIVE_WRITTEN mark is only done for full 8-byte + * write the read marks are conservative and parent + * state may not even have the stack allocated. In such case + * end the propagation, since the loop reached beginning + * of the function + */ + break; /* if read wasn't screened by an earlier write ... */ - if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN) + if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN) break; /* ... then we depend on parent's value */ - parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ; + parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ; state = parent; parent = state->parent; + writes = true; } }
static int check_stack_read(struct bpf_verifier_env *env, - struct bpf_verifier_state *state, int off, int size, - int value_regno) + struct bpf_func_state *reg_state /* func where register points to */, + int off, int size, int value_regno) { + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; u8 *stype;
- if (state->allocated_stack <= slot) { + if (reg_state->allocated_stack <= slot) { verbose(env, "invalid read from stack off %d+0 size %d\n", off, size); return -EACCES; } - stype = state->stack[spi].slot_type; + stype = reg_state->stack[spi].slot_type;
if (stype[0] == STACK_SPILL) { if (size != BPF_REG_SIZE) { @@@ -778,21 -1125,44 +1125,44 @@@
if (value_regno >= 0) { /* restore register state from stack */ - state->regs[value_regno] = state->stack[spi].spilled_ptr; - mark_stack_slot_read(state, spi); + state->regs[value_regno] = reg_state->stack[spi].spilled_ptr; + /* mark reg as written since spilled pointer state likely + * has its liveness marks cleared by is_state_visited() + * which resets stack/reg liveness for state transitions + */ + state->regs[value_regno].live |= REG_LIVE_WRITTEN; } + mark_stack_slot_read(env, vstate, vstate->parent, spi, + reg_state->frameno); return 0; } else { + int zeros = 0; + for (i = 0; i < size; i++) { - if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) { - verbose(env, "invalid read from stack off %d+%d size %d\n", - off, i, size); - return -EACCES; + if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC) + continue; + if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) { + zeros++; + continue; + } + verbose(env, "invalid read from stack off %d+%d size %d\n", + off, i, size); + return -EACCES; + } + mark_stack_slot_read(env, vstate, vstate->parent, spi, + reg_state->frameno); + if (value_regno >= 0) { + if (zeros == size) { + /* any size read into register is zero extended, + * so the whole register == const_zero + */ + __mark_reg_const_zero(&state->regs[value_regno]); + } else { + /* have read misc data from the stack */ + mark_reg_unknown(env, state->regs, value_regno); } + state->regs[value_regno].live |= REG_LIVE_WRITTEN; } - if (value_regno >= 0) - /* have read misc data from the stack */ - mark_reg_unknown(env, state->regs, value_regno); return 0; } } @@@ -817,7 -1187,8 +1187,8 @@@ static int __check_map_access(struct bp static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed) { - struct bpf_verifier_state *state = env->cur_state; + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *reg = &state->regs[regno]; int err;
@@@ -1072,6 -1443,101 +1443,101 @@@ static int check_ptr_alignment(struct b strict); }
+ static int update_stack_depth(struct bpf_verifier_env *env, + const struct bpf_func_state *func, + int off) + { + u16 stack = env->subprog_stack_depth[func->subprogno]; + + if (stack >= -off) + return 0; + + /* update known max for given subprogram */ + env->subprog_stack_depth[func->subprogno] = -off; + return 0; + } + + /* starting from main bpf function walk all instructions of the function + * and recursively walk all callees that given function can call. + * Ignore jump and exit insns. + * Since recursion is prevented by check_cfg() this algorithm + * only needs a local stack of MAX_CALL_FRAMES to remember callsites + */ + static int check_max_stack_depth(struct bpf_verifier_env *env) + { + int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end; + struct bpf_insn *insn = env->prog->insnsi; + int insn_cnt = env->prog->len; + int ret_insn[MAX_CALL_FRAMES]; + int ret_prog[MAX_CALL_FRAMES]; + + process_func: + /* round up to 32-bytes, since this is granularity + * of interpreter stack size + */ + depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); + if (depth > MAX_BPF_STACK) { + verbose(env, "combined stack size of %d calls is %d. Too large\n", + frame + 1, depth); + return -EACCES; + } + continue_func: + if (env->subprog_cnt == subprog) + subprog_end = insn_cnt; + else + subprog_end = env->subprog_starts[subprog]; + for (; i < subprog_end; i++) { + if (insn[i].code != (BPF_JMP | BPF_CALL)) + continue; + if (insn[i].src_reg != BPF_PSEUDO_CALL) + continue; + /* remember insn and function to return to */ + ret_insn[frame] = i + 1; + ret_prog[frame] = subprog; + + /* find the callee */ + i = i + insn[i].imm + 1; + subprog = find_subprog(env, i); + if (subprog < 0) { + WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", + i); + return -EFAULT; + } + subprog++; + frame++; + if (frame >= MAX_CALL_FRAMES) { + WARN_ONCE(1, "verifier bug. Call stack is too deep\n"); + return -EFAULT; + } + goto process_func; + } + /* end of for() loop means the last insn of the 'subprog' + * was reached. Doesn't matter whether it was JA or EXIT + */ + if (frame == 0) + return 0; + depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32); + frame--; + i = ret_insn[frame]; + subprog = ret_prog[frame]; + goto continue_func; + } + + static int get_callee_stack_depth(struct bpf_verifier_env *env, + const struct bpf_insn *insn, int idx) + { + int start = idx + insn->imm + 1, subprog; + + subprog = find_subprog(env, start); + if (subprog < 0) { + WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", + start); + return -EFAULT; + } + subprog++; + return env->subprog_stack_depth[subprog]; + } + /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE */ @@@ -1105,9 -1571,9 +1571,9 @@@ static int check_mem_access(struct bpf_ int bpf_size, enum bpf_access_type t, int value_regno) { - struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = regs + regno; + struct bpf_func_state *state; int size, err = 0;
size = bpf_size_to_bytes(bpf_size); @@@ -1196,8 -1662,10 +1662,10 @@@ return -EACCES; }
- if (env->prog->aux->stack_depth < -off) - env->prog->aux->stack_depth = -off; + state = func(env, reg); + err = update_stack_depth(env, state, off); + if (err) + return err;
if (t == BPF_WRITE) err = check_stack_write(env, state, off, size, @@@ -1269,12 -1737,6 +1737,6 @@@ static int check_xadd(struct bpf_verifi BPF_SIZE(insn->code), BPF_WRITE, -1); }
- /* Does this register contain a constant zero? */ - static bool register_is_null(struct bpf_reg_state reg) - { - return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0); - } - /* when register 'regno' is passed into function that will read 'access_size' * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized. @@@ -1285,32 -1747,32 +1747,32 @@@ static int check_stack_boundary(struct int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct bpf_verifier_state *state = env->cur_state; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg = cur_regs(env) + regno; + struct bpf_func_state *state = func(env, reg); int off, i, slot, spi;
- if (regs[regno].type != PTR_TO_STACK) { + if (reg->type != PTR_TO_STACK) { /* Allow zero-byte read from NULL, regardless of pointer type */ if (zero_size_allowed && access_size == 0 && - register_is_null(regs[regno])) + register_is_null(reg)) return 0;
verbose(env, "R%d type=%s expected=%s\n", regno, - reg_type_str[regs[regno].type], + reg_type_str[reg->type], reg_type_str[PTR_TO_STACK]); return -EACCES; }
/* Only allow fixed-offset stack reads */ - if (!tnum_is_const(regs[regno].var_off)) { + if (!tnum_is_const(reg->var_off)) { char tn_buf[48];
- tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, "invalid variable stack read R%d var_off=%s\n", regno, tn_buf); return -EACCES; } - off = regs[regno].off + regs[regno].var_off.value; + off = reg->off + reg->var_off.value; if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || access_size < 0 || (access_size == 0 && !zero_size_allowed)) { verbose(env, "invalid stack type R%d off=%d access_size=%d\n", @@@ -1318,9 -1780,6 +1780,6 @@@ return -EACCES; }
- if (env->prog->aux->stack_depth < -off) - env->prog->aux->stack_depth = -off; - if (meta && meta->raw_mode) { meta->access_size = access_size; meta->regno = regno; @@@ -1328,17 -1787,32 +1787,32 @@@ }
for (i = 0; i < access_size; i++) { + u8 *stype; + slot = -(off + i) - 1; spi = slot / BPF_REG_SIZE; - if (state->allocated_stack <= slot || - state->stack[spi].slot_type[slot % BPF_REG_SIZE] != - STACK_MISC) { - verbose(env, "invalid indirect read from stack off %d+%d size %d\n", - off, i, access_size); - return -EACCES; + if (state->allocated_stack <= slot) + goto err; + stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; + if (*stype == STACK_MISC) + goto mark; + if (*stype == STACK_ZERO) { + /* helper can write anything into the stack */ + *stype = STACK_MISC; + goto mark; } + err: + verbose(env, "invalid indirect read from stack off %d+%d size %d\n", + off, i, access_size); + return -EACCES; + mark: + /* reading any byte out of 8-byte 'spill_slot' will cause + * the whole slot to be marked as 'read' + */ + mark_stack_slot_read(env, env->cur_state, env->cur_state->parent, + spi, state->frameno); } - return 0; + return update_stack_depth(env, state, off); }
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, @@@ -1418,7 -1892,7 +1892,7 @@@ static int check_func_arg(struct bpf_ve * passed in as argument, it's a SCALAR_VALUE type. Final test * happens during stack boundary checking. */ - if (register_is_null(*reg) && + if (register_is_null(reg) && arg_type == ARG_PTR_TO_MEM_OR_NULL) /* final test in check_stack_boundary() */; else if (!type_is_pkt_pointer(type) && @@@ -1591,6 -2065,10 +2065,10 @@@ static int check_map_func_compatibility case BPF_FUNC_tail_call: if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) goto error; + if (env->subprog_cnt) { + verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); + return -EINVAL; + } break; case BPF_FUNC_perf_event_read: case BPF_FUNC_perf_event_output: @@@ -1652,9 -2130,9 +2130,9 @@@ static int check_raw_mode(const struct /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] * are now invalid, so turn them into unknown SCALAR_VALUE. */ - static void clear_all_pkt_pointers(struct bpf_verifier_env *env) + static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, + struct bpf_func_state *state) { - struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *regs = state->regs, *reg; int i;
@@@ -1671,7 -2149,121 +2149,121 @@@ } }
- static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) + static void clear_all_pkt_pointers(struct bpf_verifier_env *env) + { + struct bpf_verifier_state *vstate = env->cur_state; + int i; + + for (i = 0; i <= vstate->curframe; i++) + __clear_all_pkt_pointers(env, vstate->frame[i]); + } + + static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, + int *insn_idx) + { + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *caller, *callee; + int i, subprog, target_insn; + + if (state->curframe + 1 >= MAX_CALL_FRAMES) { + verbose(env, "the call stack of %d frames is too deep\n", + state->curframe + 2); + return -E2BIG; + } + + target_insn = *insn_idx + insn->imm; + subprog = find_subprog(env, target_insn + 1); + if (subprog < 0) { + verbose(env, "verifier bug. No program starts at insn %d\n", + target_insn + 1); + return -EFAULT; + } + + caller = state->frame[state->curframe]; + if (state->frame[state->curframe + 1]) { + verbose(env, "verifier bug. Frame %d already allocated\n", + state->curframe + 1); + return -EFAULT; + } + + callee = kzalloc(sizeof(*callee), GFP_KERNEL); + if (!callee) + return -ENOMEM; + state->frame[state->curframe + 1] = callee; + + /* callee cannot access r0, r6 - r9 for reading and has to write + * into its own stack before reading from it. + * callee can read/write into caller's stack + */ + init_func_state(env, callee, + /* remember the callsite, it will be used by bpf_exit */ + *insn_idx /* callsite */, + state->curframe + 1 /* frameno within this callchain */, + subprog + 1 /* subprog number within this prog */); + + /* copy r1 - r5 args that callee can access */ + for (i = BPF_REG_1; i <= BPF_REG_5; i++) + callee->regs[i] = caller->regs[i]; + + /* after the call regsiters r0 - r5 were scratched */ + for (i = 0; i < CALLER_SAVED_REGS; i++) { + mark_reg_not_init(env, caller->regs, caller_saved[i]); + check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); + } + + /* only increment it after check_reg_arg() finished */ + state->curframe++; + + /* and go analyze first insn of the callee */ + *insn_idx = target_insn; + + if (env->log.level) { + verbose(env, "caller:\n"); + print_verifier_state(env, caller); + verbose(env, "callee:\n"); + print_verifier_state(env, callee); + } + return 0; + } + + static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) + { + struct bpf_verifier_state *state = env->cur_state; + struct bpf_func_state *caller, *callee; + struct bpf_reg_state *r0; + + callee = state->frame[state->curframe]; + r0 = &callee->regs[BPF_REG_0]; + if (r0->type == PTR_TO_STACK) { + /* technically it's ok to return caller's stack pointer + * (or caller's caller's pointer) back to the caller, + * since these pointers are valid. Only current stack + * pointer will be invalid as soon as function exits, + * but let's be conservative + */ + verbose(env, "cannot return stack pointer to the caller\n"); + return -EINVAL; + } + + state->curframe--; + caller = state->frame[state->curframe]; + /* return to the caller whatever r0 had in the callee */ + caller->regs[BPF_REG_0] = *r0; + + *insn_idx = callee->callsite + 1; + if (env->log.level) { + verbose(env, "returning from callee:\n"); + print_verifier_state(env, callee); + verbose(env, "to caller at %d:\n", *insn_idx); + print_verifier_state(env, caller); + } + /* clear everything in the callee */ + free_func_state(callee); + state->frame[state->curframe + 1] = NULL; + return 0; + } + + static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { const struct bpf_func_proto *fn = NULL; struct bpf_reg_state *regs; @@@ -1729,13 -2321,6 +2321,13 @@@ err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); if (err) return err; + if (func_id == BPF_FUNC_tail_call) { + if (meta.map_ptr == NULL) { + verbose(env, "verifier bug\n"); + return -EINVAL; + } + env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr; + } err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); if (err) return err; @@@ -1871,7 -2456,9 +2463,9 @@@ static int adjust_ptr_min_max_vals(stru const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg) { - struct bpf_reg_state *regs = cur_regs(env), *dst_reg; + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + struct bpf_reg_state *regs = state->regs, *dst_reg; bool known = tnum_is_const(off_reg->var_off); s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; @@@ -1883,13 -2470,13 +2477,13 @@@ dst_reg = ®s[dst];
if (WARN_ON_ONCE(known && (smin_val != smax_val))) { - print_verifier_state(env, env->cur_state); + print_verifier_state(env, state); verbose(env, "verifier internal error: known but bad sbounds\n"); return -EINVAL; } if (WARN_ON_ONCE(known && (umin_val != umax_val))) { - print_verifier_state(env, env->cur_state); + print_verifier_state(env, state); verbose(env, "verifier internal error: known but bad ubounds\n"); return -EINVAL; @@@ -2301,7 -2888,9 +2895,9 @@@ static int adjust_scalar_min_max_vals(s static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_func_state *state = vstate->frame[vstate->curframe]; + struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; u8 opcode = BPF_OP(insn->code);
@@@ -2352,12 -2941,12 +2948,12 @@@
/* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(env, env->cur_state); + print_verifier_state(env, state); verbose(env, "verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(env, env->cur_state); + print_verifier_state(env, state); verbose(env, "verifier internal error: no src_reg\n"); return -EINVAL; } @@@ -2493,11 -3082,6 +3089,11 @@@ static int check_alu_op(struct bpf_veri return -EINVAL; }
+ if (opcode == BPF_ARSH && BPF_CLASS(insn->code) != BPF_ALU64) { + verbose(env, "BPF_ARSH not supported for 32 bit ALU\n"); + return -EINVAL; + } + if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) { int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32; @@@ -2519,14 -3103,15 +3115,15 @@@ return 0; }
- static void find_good_pkt_pointers(struct bpf_verifier_state *state, + static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open) { + struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *regs = state->regs, *reg; u16 new_range; - int i; + int i, j;
if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@@ -2596,12 -3181,15 +3193,15 @@@ /* keep the maximum range already checked */ regs[i].range = max(regs[i].range, new_range);
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] != STACK_SPILL) - continue; - reg = &state->stack[i].spilled_ptr; - if (reg->type == type && reg->id == dst_reg->id) - reg->range = max(reg->range, new_range); + for (j = 0; j <= vstate->curframe; j++) { + state = vstate->frame[j]; + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_SPILL) + continue; + reg = &state->stack[i].spilled_ptr; + if (reg->type == type && reg->id == dst_reg->id) + reg->range = max(reg->range, new_range); + } } }
@@@ -2839,20 -3427,24 +3439,24 @@@ static void mark_map_reg(struct bpf_reg /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ - static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, + static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null) { + struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *regs = state->regs; u32 id = regs[regno].id; - int i; + int i, j;
for (i = 0; i < MAX_BPF_REG; i++) mark_map_reg(regs, i, id, is_null);
- for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] != STACK_SPILL) - continue; - mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); + for (j = 0; j <= vstate->curframe; j++) { + state = vstate->frame[j]; + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_SPILL) + continue; + mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); + } } }
@@@ -2952,8 -3544,10 +3556,10 @@@ static bool try_match_pkt_pointers(cons static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct bpf_verifier_state *other_branch, *this_branch = env->cur_state; - struct bpf_reg_state *regs = this_branch->regs, *dst_reg; + struct bpf_verifier_state *this_branch = env->cur_state; + struct bpf_verifier_state *other_branch; + struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs; + struct bpf_reg_state *dst_reg, *other_branch_regs; u8 opcode = BPF_OP(insn->code); int err;
@@@ -2996,8 -3590,9 +3602,9 @@@ if (BPF_SRC(insn->code) == BPF_K && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == SCALAR_VALUE && - tnum_equals_const(dst_reg->var_off, insn->imm)) { - if (opcode == BPF_JEQ) { + tnum_is_const(dst_reg->var_off)) { + if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) || + (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) { /* if (imm == imm) goto pc+off; * only follow the goto, ignore fall-through */ @@@ -3015,6 -3610,7 +3622,7 @@@ other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); if (!other_branch) return -EFAULT; + other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
/* detect if we are comparing against a constant value so we can adjust * our min/max values for our dst register. @@@ -3027,22 -3623,22 +3635,22 @@@ if (dst_reg->type == SCALAR_VALUE && regs[insn->src_reg].type == SCALAR_VALUE) { if (tnum_is_const(regs[insn->src_reg].var_off)) - reg_set_min_max(&other_branch->regs[insn->dst_reg], + reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, regs[insn->src_reg].var_off.value, opcode); else if (tnum_is_const(dst_reg->var_off)) - reg_set_min_max_inv(&other_branch->regs[insn->src_reg], + reg_set_min_max_inv(&other_branch_regs[insn->src_reg], ®s[insn->src_reg], dst_reg->var_off.value, opcode); else if (opcode == BPF_JEQ || opcode == BPF_JNE) /* Comparing for equality, we can combine knowledge */ - reg_combine_min_max(&other_branch->regs[insn->src_reg], - &other_branch->regs[insn->dst_reg], + reg_combine_min_max(&other_branch_regs[insn->src_reg], + &other_branch_regs[insn->dst_reg], ®s[insn->src_reg], ®s[insn->dst_reg], opcode); } } else if (dst_reg->type == SCALAR_VALUE) { - reg_set_min_max(&other_branch->regs[insn->dst_reg], + reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, insn->imm, opcode); }
@@@ -3063,7 -3659,7 +3671,7 @@@ return -EACCES; } if (env->log.level) - print_verifier_state(env, this_branch); + print_verifier_state(env, this_branch->frame[this_branch->curframe]); return 0; }
@@@ -3148,6 -3744,18 +3756,18 @@@ static int check_ld_abs(struct bpf_veri return -EINVAL; }
+ if (env->subprog_cnt) { + /* when program has LD_ABS insn JITs and interpreter assume + * that r1 == ctx == skb which is not the case for callees + * that can have arbitrary arguments. It's problematic + * for main prog as well since JITs would need to analyze + * all functions in order to make proper register save/restore + * decisions in the main prog. Hence disallow LD_ABS with calls + */ + verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n"); + return -EINVAL; + } + if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || BPF_SIZE(insn->code) == BPF_DW || (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { @@@ -3324,6 -3932,10 +3944,10 @@@ static int check_cfg(struct bpf_verifie int ret = 0; int i, t;
+ ret = check_subprogs(env); + if (ret < 0) + return ret; + insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_state) return -ENOMEM; @@@ -3356,6 -3968,14 +3980,14 @@@ peek_stack goto err_free; if (t + 1 < insn_cnt) env->explored_states[t + 1] = STATE_LIST_MARK; + if (insns[t].src_reg == BPF_PSEUDO_CALL) { + env->explored_states[t] = STATE_LIST_MARK; + ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env); + if (ret == 1) + goto peek_stack; + else if (ret < 0) + goto err_free; + } } else if (opcode == BPF_JA) { if (BPF_SRC(insns[t].code) != BPF_K) { ret = -EINVAL; @@@ -3474,11 -4094,21 +4106,21 @@@ static bool check_ids(u32 old_id, u32 c static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, struct idpair *idmap) { + bool equal; + if (!(rold->live & REG_LIVE_READ)) /* explored state didn't use this */ return true;
- if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, live)) == 0) + equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0; + + if (rold->type == PTR_TO_STACK) + /* two stack pointers are equal only if they're pointing to + * the same stack frame, since fp-8 in foo != fp-8 in bar + */ + return equal && rold->frameno == rcur->frameno; + + if (equal) return true;
if (rold->type == NOT_INIT) @@@ -3550,7 -4180,6 +4192,6 @@@ tnum_in(rold->var_off, rcur->var_off); case PTR_TO_CTX: case CONST_PTR_TO_MAP: - case PTR_TO_STACK: case PTR_TO_PACKET_END: /* Only valid matches are exact, which memcmp() above * would have accepted @@@ -3565,8 -4194,8 +4206,8 @@@ return false; }
- static bool stacksafe(struct bpf_verifier_state *old, - struct bpf_verifier_state *cur, + static bool stacksafe(struct bpf_func_state *old, + struct bpf_func_state *cur, struct idpair *idmap) { int i, spi; @@@ -3584,8 -4213,19 +4225,19 @@@ for (i = 0; i < old->allocated_stack; i++) { spi = i / BPF_REG_SIZE;
+ if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) + /* explored state didn't use this */ + continue; + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue; + /* if old state was safe with misc data in the stack + * it will be safe with zero-initialized stack. + * The opposite is not true + */ + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC && + cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) + continue; if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) /* Ex: old explored (safe) state has STACK_SPILL in @@@ -3642,9 -4282,8 +4294,8 @@@ * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ - static bool states_equal(struct bpf_verifier_env *env, - struct bpf_verifier_state *old, - struct bpf_verifier_state *cur) + static bool func_states_equal(struct bpf_func_state *old, + struct bpf_func_state *cur) { struct idpair *idmap; bool ret = false; @@@ -3668,71 -4307,72 +4319,72 @@@ out_free return ret; }
+ static bool states_equal(struct bpf_verifier_env *env, + struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) + { + int i; + + if (old->curframe != cur->curframe) + return false; + + /* for states to be equal callsites have to be the same + * and all frame states need to be equivalent + */ + for (i = 0; i <= old->curframe; i++) { + if (old->frame[i]->callsite != cur->frame[i]->callsite) + return false; + if (!func_states_equal(old->frame[i], cur->frame[i])) + return false; + } + return true; + } + /* A write screens off any subsequent reads; but write marks come from the - * straight-line code between a state and its parent. When we arrive at a - * jump target (in the first iteration of the propagate_liveness() loop), - * we didn't arrive by the straight-line code, so read marks in state must - * propagate to parent regardless of state's write marks. + * straight-line code between a state and its parent. When we arrive at an + * equivalent state (jump target or such) we didn't arrive by the straight-line + * code, so read marks in the state must propagate to the parent regardless + * of the state's write marks. That's what 'parent == state->parent' comparison + * in mark_reg_read() and mark_stack_slot_read() is for. */ - static bool do_propagate_liveness(const struct bpf_verifier_state *state, - struct bpf_verifier_state *parent) + static int propagate_liveness(struct bpf_verifier_env *env, + const struct bpf_verifier_state *vstate, + struct bpf_verifier_state *vparent) { - bool writes = parent == state->parent; /* Observe write marks */ - bool touched = false; /* any changes made? */ - int i; + int i, frame, err = 0; + struct bpf_func_state *state, *parent;
- if (!parent) - return touched; + if (vparent->curframe != vstate->curframe) { + WARN(1, "propagate_live: parent frame %d current frame %d\n", + vparent->curframe, vstate->curframe); + return -EFAULT; + } /* Propagate read liveness of registers... */ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); /* We don't need to worry about FP liveness because it's read-only */ for (i = 0; i < BPF_REG_FP; i++) { - if (parent->regs[i].live & REG_LIVE_READ) - continue; - if (writes && (state->regs[i].live & REG_LIVE_WRITTEN)) + if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ) continue; - if (state->regs[i].live & REG_LIVE_READ) { - parent->regs[i].live |= REG_LIVE_READ; - touched = true; + if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) { + err = mark_reg_read(env, vstate, vparent, i); + if (err) + return err; } } + /* ... and stack slots */ - for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && - i < parent->allocated_stack / BPF_REG_SIZE; i++) { - if (parent->stack[i].slot_type[0] != STACK_SPILL) - continue; - if (state->stack[i].slot_type[0] != STACK_SPILL) - continue; - if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) - continue; - if (writes && - (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN)) - continue; - if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) { - parent->stack[i].spilled_ptr.live |= REG_LIVE_READ; - touched = true; + for (frame = 0; frame <= vstate->curframe; frame++) { + state = vstate->frame[frame]; + parent = vparent->frame[frame]; + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && + i < parent->allocated_stack / BPF_REG_SIZE; i++) { + if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) + continue; + if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) + mark_stack_slot_read(env, vstate, vparent, i, frame); } } - return touched; - } - - /* "parent" is "a state from which we reach the current state", but initially - * it is not the state->parent (i.e. "the state whose straight-line code leads - * to the current state"), instead it is the state that happened to arrive at - * a (prunable) equivalent of the current state. See comment above - * do_propagate_liveness() for consequences of this. - * This function is just a more efficient way of calling mark_reg_read() or - * mark_stack_slot_read() on each reg in "parent" that is read in "state", - * though it requires that parent != state->parent in the call arguments. - */ - static void propagate_liveness(const struct bpf_verifier_state *state, - struct bpf_verifier_state *parent) - { - while (do_propagate_liveness(state, parent)) { - /* Something changed, so we need to feed those changes onward */ - state = parent; - parent = state->parent; - } + return err; }
static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) @@@ -3740,7 -4380,7 +4392,7 @@@ struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state; - int i, err; + int i, j, err;
sl = env->explored_states[insn_idx]; if (!sl) @@@ -3761,7 -4401,9 +4413,9 @@@ * they'll be immediately forgotten as we're pruning * this state and will pop a new one. */ - propagate_liveness(&sl->state, cur); + err = propagate_liveness(env, &sl->state, cur); + if (err) + return err; return 1; } sl = sl->next; @@@ -3769,9 -4411,10 +4423,10 @@@
/* there were no equivalent states, remember current one. * technically the current state is not proven to be safe yet, - * but it will either reach bpf_exit (which means it's safe) or - * it will be rejected. Since there are no loops, we won't be - * seeing this 'insn_idx' instruction again on the way to bpf_exit + * but it will either reach outer most bpf_exit (which means it's safe) + * or it will be rejected. Since there are no loops, we won't be + * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx) + * again on the way to bpf_exit */ new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); if (!new_sl) @@@ -3795,19 -4438,15 +4450,15 @@@ * explored_states can get read marks.) */ for (i = 0; i < BPF_REG_FP; i++) - cur->regs[i].live = REG_LIVE_NONE; - for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++) - if (cur->stack[i].slot_type[0] == STACK_SPILL) - cur->stack[i].spilled_ptr.live = REG_LIVE_NONE; - return 0; - } + cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
- static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, - int insn_idx, int prev_insn_idx) - { - if (env->dev_ops && env->dev_ops->insn_hook) - return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx); + /* all stack frames are accessible from callee, clear them all */ + for (j = 0; j <= cur->curframe; j++) { + struct bpf_func_state *frame = cur->frame[j];
+ for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) + frame->stack[i].spilled_ptr.live = REG_LIVE_NONE; + } return 0; }
@@@ -3816,7 -4455,7 +4467,7 @@@ static int do_check(struct bpf_verifier struct bpf_verifier_state *state; struct bpf_insn *insns = env->prog->insnsi; struct bpf_reg_state *regs; - int insn_cnt = env->prog->len; + int insn_cnt = env->prog->len, i; int insn_idx, prev_insn_idx = 0; int insn_processed = 0; bool do_print_state = false; @@@ -3824,9 -4463,18 +4475,18 @@@ state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); if (!state) return -ENOMEM; - env->cur_state = state; - init_reg_state(env, state->regs); + state->curframe = 0; state->parent = NULL; + state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); + if (!state->frame[0]) { + kfree(state); + return -ENOMEM; + } + env->cur_state = state; + init_func_state(env, state->frame[0], + BPF_MAIN_FUNC /* callsite */, + 0 /* frameno */, + 0 /* subprogno, zero == main subprog */); insn_idx = 0; for (;;) { struct bpf_insn *insn; @@@ -3873,19 -4521,25 +4533,25 @@@ else verbose(env, "\nfrom %d to %d:", prev_insn_idx, insn_idx); - print_verifier_state(env, state); + print_verifier_state(env, state->frame[state->curframe]); do_print_state = false; }
if (env->log.level) { + const struct bpf_insn_cbs cbs = { + .cb_print = verbose, + }; + verbose(env, "%d: ", insn_idx); - print_bpf_insn(verbose, env, insn, - env->allow_ptr_leaks); + print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks); }
- err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); - if (err) - return err; + if (bpf_prog_is_dev_bound(env->prog->aux)) { + err = bpf_prog_offload_verify_insn(env, insn_idx, + prev_insn_idx); + if (err) + return err; + }
regs = cur_regs(env); env->insn_aux_data[insn_idx].seen = true; @@@ -4006,13 -4660,17 +4672,17 @@@ if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || insn->off != 0 || - insn->src_reg != BPF_REG_0 || + (insn->src_reg != BPF_REG_0 && + insn->src_reg != BPF_PSEUDO_CALL) || insn->dst_reg != BPF_REG_0) { verbose(env, "BPF_CALL uses reserved fields\n"); return -EINVAL; }
- err = check_call(env, insn->imm, insn_idx); + if (insn->src_reg == BPF_PSEUDO_CALL) + err = check_func_call(env, insn, &insn_idx); + else + err = check_helper_call(env, insn->imm, insn_idx); if (err) return err;
@@@ -4037,6 -4695,16 +4707,16 @@@ return -EINVAL; }
+ if (state->curframe) { + /* exit from nested function */ + prev_insn_idx = insn_idx; + err = prepare_func_exit(env, &insn_idx); + if (err) + return err; + do_print_state = true; + continue; + } + /* eBPF calling convetion is such that R0 is used * to return the value from eBPF program. * Make sure that it's readable at this time @@@ -4097,8 -4765,16 +4777,16 @@@ process_bpf_exit insn_idx++; }
- verbose(env, "processed %d insns, stack depth %d\n", insn_processed, - env->prog->aux->stack_depth); + verbose(env, "processed %d insns, stack depth ", insn_processed); + for (i = 0; i < env->subprog_cnt + 1; i++) { + u32 depth = env->subprog_stack_depth[i]; + + verbose(env, "%d", depth); + if (i + 1 < env->subprog_cnt + 1) + verbose(env, "+"); + } + verbose(env, "\n"); + env->prog->aux->stack_depth = env->subprog_stack_depth[0]; return 0; }
@@@ -4284,6 -4960,19 +4972,19 @@@ static int adjust_insn_aux_data(struct return 0; }
+ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len) + { + int i; + + if (len == 1) + return; + for (i = 0; i < env->subprog_cnt; i++) { + if (env->subprog_starts[i] < off) + continue; + env->subprog_starts[i] += len - 1; + } + } + static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, const struct bpf_insn *patch, u32 len) { @@@ -4294,6 -4983,7 +4995,7 @@@ return NULL; if (adjust_insn_aux_data(env, new_prog->len, off, len)) return NULL; + adjust_subprog_starts(env, off, len); return new_prog; }
@@@ -4428,6 -5118,172 +5130,172 @@@ static int convert_ctx_accesses(struct return 0; }
+ static int jit_subprogs(struct bpf_verifier_env *env) + { + struct bpf_prog *prog = env->prog, **func, *tmp; + int i, j, subprog_start, subprog_end = 0, len, subprog; + struct bpf_insn *insn; + void *old_bpf_func; + int err = -ENOMEM; + + if (env->subprog_cnt == 0) + return 0; + + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL) || + insn->src_reg != BPF_PSEUDO_CALL) + continue; + subprog = find_subprog(env, i + insn->imm + 1); + if (subprog < 0) { + WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", + i + insn->imm + 1); + return -EFAULT; + } + /* temporarily remember subprog id inside insn instead of + * aux_data, since next loop will split up all insns into funcs + */ + insn->off = subprog + 1; + /* remember original imm in case JIT fails and fallback + * to interpreter will be needed + */ + env->insn_aux_data[i].call_imm = insn->imm; + /* point imm to __bpf_call_base+1 from JITs point of view */ + insn->imm = 1; + } + + func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL); + if (!func) + return -ENOMEM; + + for (i = 0; i <= env->subprog_cnt; i++) { + subprog_start = subprog_end; + if (env->subprog_cnt == i) + subprog_end = prog->len; + else + subprog_end = env->subprog_starts[i]; + + len = subprog_end - subprog_start; + func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER); + if (!func[i]) + goto out_free; + memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], + len * sizeof(struct bpf_insn)); + func[i]->type = prog->type; + func[i]->len = len; + if (bpf_prog_calc_tag(func[i])) + goto out_free; + func[i]->is_func = 1; + /* Use bpf_prog_F_tag to indicate functions in stack traces. + * Long term would need debug info to populate names + */ + func[i]->aux->name[0] = 'F'; + func[i]->aux->stack_depth = env->subprog_stack_depth[i]; + func[i]->jit_requested = 1; + func[i] = bpf_int_jit_compile(func[i]); + if (!func[i]->jited) { + err = -ENOTSUPP; + goto out_free; + } + cond_resched(); + } + /* at this point all bpf functions were successfully JITed + * now populate all bpf_calls with correct addresses and + * run last pass of JIT + */ + for (i = 0; i <= env->subprog_cnt; i++) { + insn = func[i]->insnsi; + for (j = 0; j < func[i]->len; j++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL) || + insn->src_reg != BPF_PSEUDO_CALL) + continue; + subprog = insn->off; + insn->off = 0; + insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) + func[subprog]->bpf_func - + __bpf_call_base; + } + } + for (i = 0; i <= env->subprog_cnt; i++) { + old_bpf_func = func[i]->bpf_func; + tmp = bpf_int_jit_compile(func[i]); + if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) { + verbose(env, "JIT doesn't support bpf-to-bpf calls\n"); + err = -EFAULT; + goto out_free; + } + cond_resched(); + } + + /* finally lock prog and jit images for all functions and + * populate kallsysm + */ + for (i = 0; i <= env->subprog_cnt; i++) { + bpf_prog_lock_ro(func[i]); + bpf_prog_kallsyms_add(func[i]); + } + + /* Last step: make now unused interpreter insns from main + * prog consistent for later dump requests, so they can + * later look the same as if they were interpreted only. + */ + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + unsigned long addr; + + if (insn->code != (BPF_JMP | BPF_CALL) || + insn->src_reg != BPF_PSEUDO_CALL) + continue; + insn->off = env->insn_aux_data[i].call_imm; + subprog = find_subprog(env, i + insn->off + 1); + addr = (unsigned long)func[subprog + 1]->bpf_func; + addr &= PAGE_MASK; + insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) + addr - __bpf_call_base; + } + + prog->jited = 1; + prog->bpf_func = func[0]->bpf_func; + prog->aux->func = func; + prog->aux->func_cnt = env->subprog_cnt + 1; + return 0; + out_free: + for (i = 0; i <= env->subprog_cnt; i++) + if (func[i]) + bpf_jit_free(func[i]); + kfree(func); + /* cleanup main prog to be interpreted */ + prog->jit_requested = 0; + for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL) || + insn->src_reg != BPF_PSEUDO_CALL) + continue; + insn->off = 0; + insn->imm = env->insn_aux_data[i].call_imm; + } + return err; + } + + static int fixup_call_args(struct bpf_verifier_env *env) + { + struct bpf_prog *prog = env->prog; + struct bpf_insn *insn = prog->insnsi; + int i, depth; + + if (env->prog->jit_requested) + if (jit_subprogs(env) == 0) + return 0; + + for (i = 0; i < prog->len; i++, insn++) { + if (insn->code != (BPF_JMP | BPF_CALL) || + insn->src_reg != BPF_PSEUDO_CALL) + continue; + depth = get_callee_stack_depth(env, insn, i); + if (depth < 0) + return depth; + bpf_patch_call_args(insn, depth); + } + return 0; + } + /* fixup insn->imm field of bpf_call instructions * and inline eligible helpers as explicit sequence of BPF instructions * @@@ -4447,11 -5303,15 +5315,15 @@@ static int fixup_bpf_calls(struct bpf_v for (i = 0; i < insn_cnt; i++, insn++) { if (insn->code != (BPF_JMP | BPF_CALL)) continue; + if (insn->src_reg == BPF_PSEUDO_CALL) + continue;
if (insn->imm == BPF_FUNC_get_route_realm) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_override_return) + prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can @@@ -4468,42 -5328,13 +5340,42 @@@ */ insn->imm = 0; insn->code = BPF_JMP | BPF_TAIL_CALL; + + /* instead of changing every JIT dealing with tail_call + * emit two extra insns: + * if (index >= max_entries) goto out; + * index &= array->index_mask; + * to avoid out-of-bounds cpu speculation + */ + map_ptr = env->insn_aux_data[i + delta].map_ptr; + if (map_ptr == BPF_MAP_PTR_POISON) { + verbose(env, "tail_call abusing map_ptr\n"); + return -EINVAL; + } + if (!map_ptr->unpriv_array) + continue; + insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, + map_ptr->max_entries, 2); + insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, + container_of(map_ptr, + struct bpf_array, + map)->index_mask); + insn_buf[2] = *insn; + cnt = 3; + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; continue; }
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup * handlers are currently limited to 64 bit only. */ - if (ebpf_jit_enabled() && BITS_PER_LONG == 64 && + if (prog->jit_requested && BITS_PER_LONG == 64 && insn->imm == BPF_FUNC_map_lookup_elem) { map_ptr = env->insn_aux_data[i + delta].map_ptr; if (map_ptr == BPF_MAP_PTR_POISON || @@@ -4638,7 -5469,7 +5510,7 @@@ int bpf_check(struct bpf_prog **prog, u if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true;
- if (env->prog->aux->offload) { + if (bpf_prog_is_dev_bound(env->prog->aux)) { ret = bpf_prog_offload_verifier_prep(env); if (ret) goto err_unlock; @@@ -4655,12 -5486,12 +5527,12 @@@ if (!env->explored_states) goto skip_full_check;
+ env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + ret = check_cfg(env); if (ret < 0) goto skip_full_check;
- env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); - ret = do_check(env); if (env->cur_state) { free_verifier_state(env->cur_state, true); @@@ -4675,12 -5506,18 +5547,18 @@@ skip_full_check sanitize_dead_code(env);
if (ret == 0) + ret = check_max_stack_depth(env); + + if (ret == 0) /* program is valid, convert *(u32*)(ctx + off) accesses */ ret = convert_ctx_accesses(env);
if (ret == 0) ret = fixup_bpf_calls(env);
+ if (ret == 0) + ret = fixup_call_args(env); + if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; if (log->level && !log->ubuf) { diff --combined kernel/events/core.c index 56d2b99de409,878d86c513d6..0f2fe78c2fa2 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@@ -4511,11 -4511,11 +4511,11 @@@ perf_read(struct file *file, char __use return ret; }
-static unsigned int perf_poll(struct file *file, poll_table *wait) +static __poll_t perf_poll(struct file *file, poll_table *wait) { struct perf_event *event = file->private_data; struct ring_buffer *rb; - unsigned int events = POLLHUP; + __poll_t events = POLLHUP;
poll_wait(file, &event->waitq, wait);
@@@ -4723,6 -4723,9 +4723,9 @@@ static long _perf_ioctl(struct perf_eve rcu_read_unlock(); return 0; } + + case PERF_EVENT_IOC_QUERY_BPF: + return perf_event_query_prog_array(event, (void __user *)arg); default: return -ENOTTY; } @@@ -4904,7 -4907,6 +4907,7 @@@ void perf_event_update_userpage(struct unlock: rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(perf_event_update_userpage);
static int perf_mmap_fault(struct vm_fault *vmf) { @@@ -8081,6 -8083,13 +8084,13 @@@ static int perf_event_set_bpf_prog(stru return -EINVAL; }
+ /* Kprobe override only works for kprobes, not uprobes. */ + if (prog->kprobe_override && + !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { + bpf_prog_put(prog); + return -EINVAL; + } + if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event);
diff --combined kernel/module.c index 8042b8fcbf14,bd695bfdc5c4..83075a104710 --- a/kernel/module.c +++ b/kernel/module.c @@@ -3118,7 -3118,11 +3118,11 @@@ static int find_module_sections(struct sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif - + #ifdef CONFIG_BPF_KPROBE_OVERRIDE + mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list", + sizeof(*mod->kprobe_ei_funcs), + &mod->num_kprobe_ei_funcs); + #endif mod->extable = section_objs(info, "__ex_table", sizeof(*mod->extable), &mod->num_exentries);
@@@ -3938,12 -3942,6 +3942,12 @@@ static const char *get_ksymbol(struct m return symname(kallsyms, best); }
+void * __weak dereference_module_function_descriptor(struct module *mod, + void *ptr) +{ + return ptr; +} + /* For kallsyms to ask for address resolution. NULL means not found. Careful * not to lock to avoid deadlock on oopses, simply disable preemption. */ const char *module_address_lookup(unsigned long addr, diff --combined net/atm/common.c index 8f12f1c6fa14,5763fd241dc3..6523f38c4957 --- a/net/atm/common.c +++ b/net/atm/common.c @@@ -14,7 -14,7 +14,7 @@@ #include <linux/capability.h> #include <linux/mm.h> #include <linux/sched/signal.h> - #include <linux/time.h> /* struct timeval */ + #include <linux/time64.h> /* 64-bit time for seconds */ #include <linux/skbuff.h> #include <linux/bitops.h> #include <linux/init.h> @@@ -648,11 -648,11 +648,11 @@@ out return error; }
-unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait) +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct atm_vcc *vcc; - unsigned int mask; + __poll_t mask;
sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; diff --combined net/batman-adv/icmp_socket.c index a98e0a986cef,8041cf106c37..581375d0eed2 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@@ -1,3 -1,4 +1,4 @@@ + // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2007-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@@ -26,6 -27,7 +27,7 @@@ #include <linux/export.h> #include <linux/fcntl.h> #include <linux/fs.h> + #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/kernel.h> #include <linux/list.h> @@@ -42,11 -44,11 +44,11 @@@ #include <linux/string.h> #include <linux/uaccess.h> #include <linux/wait.h> + #include <uapi/linux/batadv_packet.h>
#include "hard-interface.h" #include "log.h" #include "originator.h" - #include "packet.h" #include "send.h"
static struct batadv_socket_client *batadv_socket_client_hash[256]; @@@ -55,6 -57,9 +57,9 @@@ static void batadv_socket_add_packet(st struct batadv_icmp_header *icmph, size_t icmp_len);
+ /** + * batadv_socket_init() - Initialize soft interface independent socket data + */ void batadv_socket_init(void) { memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash)); @@@ -292,7 -297,7 +297,7 @@@ out return len; }
-static unsigned int batadv_socket_poll(struct file *file, poll_table *wait) +static __poll_t batadv_socket_poll(struct file *file, poll_table *wait) { struct batadv_socket_client *socket_client = file->private_data;
@@@ -314,6 -319,12 +319,12 @@@ static const struct file_operations bat .llseek = no_llseek, };
+ /** + * batadv_socket_setup() - Create debugfs "socket" file + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_socket_setup(struct batadv_priv *bat_priv) { struct dentry *d; @@@ -333,7 -344,7 +344,7 @@@ err }
/** - * batadv_socket_add_packet - schedule an icmp packet to be sent to + * batadv_socket_add_packet() - schedule an icmp packet to be sent to * userspace on an icmp socket. * @socket_client: the socket this packet belongs to * @icmph: pointer to the header of the icmp packet @@@ -390,7 -401,7 +401,7 @@@ static void batadv_socket_add_packet(st }
/** - * batadv_socket_receive_packet - schedule an icmp packet to be received + * batadv_socket_receive_packet() - schedule an icmp packet to be received * locally and sent to userspace. * @icmph: pointer to the header of the icmp packet * @icmp_len: total length of the icmp packet diff --combined net/batman-adv/log.c index 76451460c98d,da004980ab8b..9be74a44e99d --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@@ -1,3 -1,4 +1,4 @@@ + // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2010-2017 B.A.T.M.A.N. contributors: * * Marek Lindner @@@ -24,6 -25,7 +25,7 @@@ #include <linux/export.h> #include <linux/fcntl.h> #include <linux/fs.h> + #include <linux/gfp.h> #include <linux/jiffies.h> #include <linux/kernel.h> #include <linux/module.h> @@@ -86,6 -88,13 +88,13 @@@ static int batadv_fdebug_log(struct bat return 0; }
+ /** + * batadv_debug_log() - Add debug log entry + * @bat_priv: the bat priv with all the soft interface information + * @fmt: format string + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...) { va_list args; @@@ -176,7 -185,7 +185,7 @@@ static ssize_t batadv_log_read(struct f return error; }
-static unsigned int batadv_log_poll(struct file *file, poll_table *wait) +static __poll_t batadv_log_poll(struct file *file, poll_table *wait) { struct batadv_priv *bat_priv = file->private_data; struct batadv_priv_debug_log *debug_log = bat_priv->debug_log; @@@ -197,6 -206,12 +206,12 @@@ static const struct file_operations bat .llseek = no_llseek, };
+ /** + * batadv_debug_log_setup() - Initialize debug log + * @bat_priv: the bat priv with all the soft interface information + * + * Return: 0 on success or negative error number in case of failure + */ int batadv_debug_log_setup(struct batadv_priv *bat_priv) { struct dentry *d; @@@ -222,6 -237,10 +237,10 @@@ err return -ENOMEM; }
+ /** + * batadv_debug_log_cleanup() - Destroy debug log + * @bat_priv: the bat priv with all the soft interface information + */ void batadv_debug_log_cleanup(struct batadv_priv *bat_priv) { kfree(bat_priv->debug_log); diff --combined net/bluetooth/af_bluetooth.c index 671b907ba678,f044202346c6..f897681780db --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@@ -421,7 -421,7 +421,7 @@@ out } EXPORT_SYMBOL(bt_sock_stream_recvmsg);
-static inline unsigned int bt_accept_poll(struct sock *parent) +static inline __poll_t bt_accept_poll(struct sock *parent) { struct bt_sock *s, *n; struct sock *sk; @@@ -437,11 -437,11 +437,11 @@@ return 0; }
-unsigned int bt_sock_poll(struct file *file, struct socket *sock, +__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - unsigned int mask = 0; + __poll_t mask = 0;
BT_DBG("sock %p, sk %p", sock, sk);
@@@ -766,43 -766,39 +766,39 @@@ static int __init bt_init(void return err;
err = sock_register(&bt_sock_family_ops); - if (err < 0) { - bt_sysfs_cleanup(); - return err; - } + if (err) + goto cleanup_sysfs;
BT_INFO("HCI device and connection manager initialized");
err = hci_sock_init(); - if (err < 0) - goto error; + if (err) + goto unregister_socket;
err = l2cap_init(); - if (err < 0) - goto sock_err; + if (err) + goto cleanup_socket;
err = sco_init(); - if (err < 0) { - l2cap_exit(); - goto sock_err; - } + if (err) + goto cleanup_cap;
err = mgmt_init(); - if (err < 0) { - sco_exit(); - l2cap_exit(); - goto sock_err; - } + if (err) + goto cleanup_sco;
return 0;
- sock_err: + cleanup_sco: + sco_exit(); + cleanup_cap: + l2cap_exit(); + cleanup_socket: hci_sock_cleanup(); - - error: + unregister_socket: sock_unregister(PF_BLUETOOTH); + cleanup_sysfs: bt_sysfs_cleanup(); - return err; }
diff --combined net/core/sock.c index 1211159718ad,72d14b221784..420c380bc61d --- a/net/core/sock.c +++ b/net/core/sock.c @@@ -145,6 -145,8 +145,8 @@@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list);
+ static void sock_inuse_add(struct net *net, int val); + /** * sk_ns_capable - General socket capability test * @sk: Socket to use a capability on or through @@@ -1531,8 -1533,11 +1533,11 @@@ struct sock *sk_alloc(struct net *net, sk->sk_kern_sock = kern; sock_lock_init(sk); sk->sk_net_refcnt = kern ? 0 : 1; - if (likely(sk->sk_net_refcnt)) + if (likely(sk->sk_net_refcnt)) { get_net(net); + sock_inuse_add(net, 1); + } + sock_net_set(sk, net); refcount_set(&sk->sk_wmem_alloc, 1);
@@@ -1595,6 -1600,9 +1600,9 @@@ void sk_destruct(struct sock *sk
static void __sk_free(struct sock *sk) { + if (likely(sk->sk_net_refcnt)) + sock_inuse_add(sock_net(sk), -1); + if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) sock_diag_broadcast_destroy(sk); else @@@ -1716,6 -1724,8 +1724,8 @@@ struct sock *sk_clone_lock(const struc newsk->sk_priority = 0; newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); + if (likely(newsk->sk_net_refcnt)) + sock_inuse_add(sock_net(newsk), 1);
/* * Before updating sk_refcnt, we must commit prior changes to memory @@@ -2496,7 -2506,7 +2506,7 @@@ int sock_no_getname(struct socket *sock } EXPORT_SYMBOL(sock_no_getname);
-unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) +__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt) { return 0; } @@@ -3045,7 -3055,7 +3055,7 @@@ static DECLARE_BITMAP(proto_inuse_idx,
void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { - __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); + __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val); } EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
@@@ -3055,21 -3065,50 +3065,50 @@@ int sock_prot_inuse_get(struct net *net int res = 0;
for_each_possible_cpu(cpu) - res += per_cpu_ptr(net->core.inuse, cpu)->val[idx]; + res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
return res >= 0 ? res : 0; } EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
+ static void sock_inuse_add(struct net *net, int val) + { + this_cpu_add(*net->core.sock_inuse, val); + } + + int sock_inuse_get(struct net *net) + { + int cpu, res = 0; + + for_each_possible_cpu(cpu) + res += *per_cpu_ptr(net->core.sock_inuse, cpu); + + return res; + } + + EXPORT_SYMBOL_GPL(sock_inuse_get); + static int __net_init sock_inuse_init_net(struct net *net) { - net->core.inuse = alloc_percpu(struct prot_inuse); - return net->core.inuse ? 0 : -ENOMEM; + net->core.prot_inuse = alloc_percpu(struct prot_inuse); + if (net->core.prot_inuse == NULL) + return -ENOMEM; + + net->core.sock_inuse = alloc_percpu(int); + if (net->core.sock_inuse == NULL) + goto out; + + return 0; + + out: + free_percpu(net->core.prot_inuse); + return -ENOMEM; }
static void __net_exit sock_inuse_exit_net(struct net *net) { - free_percpu(net->core.inuse); + free_percpu(net->core.prot_inuse); + free_percpu(net->core.sock_inuse); }
static struct pernet_operations net_inuse_ops = { @@@ -3112,6 -3151,10 +3151,10 @@@ static inline void assign_proto_idx(str static inline void release_proto_idx(struct proto *prot) { } + + static void sock_inuse_add(struct net *net, int val) + { + } #endif
static void req_prot_cleanup(struct request_sock_ops *rsk_prot) diff --combined net/dccp/proto.c index 8b8db3d481bd,fa7e92e08920..74685fecfdb9 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@@ -38,6 -38,9 +38,9 @@@ #include "dccp.h" #include "feat.h"
+ #define CREATE_TRACE_POINTS + #include "trace.h" + DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
EXPORT_SYMBOL_GPL(dccp_statistics); @@@ -110,7 -113,7 +113,7 @@@ void dccp_set_state(struct sock *sk, co /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ - sk->sk_state = state; + inet_sk_set_state(sk, state); }
EXPORT_SYMBOL_GPL(dccp_set_state); @@@ -318,10 -321,10 +321,10 @@@ EXPORT_SYMBOL_GPL(dccp_disconnect) * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ -unsigned int dccp_poll(struct file *file, struct socket *sock, +__poll_t dccp_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; + __poll_t mask; struct sock *sk = sock->sk;
sock_poll_wait(file, sk_sleep(sk), wait); @@@ -761,6 -764,8 +764,8 @@@ int dccp_sendmsg(struct sock *sk, struc int rc, size; long timeo;
+ trace_dccp_probe(sk, len); + if (len > dp->dccps_mss_cache) return -EMSGSIZE;
diff --combined net/ipv4/esp4.c index 61fe6e4d23fc,6f00e43120a8..296d0b956bfe --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@@ -121,14 -121,32 +121,32 @@@ static void esp_ssg_unref(struct xfrm_s static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; + struct xfrm_offload *xo = xfrm_offload(skb); void *tmp; - struct dst_entry *dst = skb_dst(skb); - struct xfrm_state *x = dst->xfrm; + struct xfrm_state *x; + + if (xo && (xo->flags & XFRM_DEV_RESUME)) + x = skb->sp->xvec[skb->sp->len - 1]; + else + x = skb_dst(skb)->xfrm;
tmp = ESP_SKB_CB(skb)->tmp; esp_ssg_unref(x, tmp); kfree(tmp); - xfrm_output_resume(skb, err); + + if (xo && (xo->flags & XFRM_DEV_RESUME)) { + if (err) { + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); + kfree_skb(skb); + return; + } + + skb_push(skb, skb->data - skb_mac_header(skb)); + secpath_reset(skb); + xfrm_dev_resume(skb); + } else { + xfrm_output_resume(skb, err); + } }
/* Move ESP header back into place. */ @@@ -825,17 -843,13 +843,13 @@@ static int esp_init_aead(struct xfrm_st char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - u32 mask = 0;
err = -ENAMETOOLONG; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) goto error;
- if (x->xso.offload_handle) - mask |= CRYPTO_ALG_ASYNC; - - aead = crypto_alloc_aead(aead_name, 0, mask); + aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@@ -865,7 -879,6 +879,6 @@@ static int esp_init_authenc(struct xfrm char authenc_name[CRYPTO_MAX_ALG_NAME]; unsigned int keylen; int err; - u32 mask = 0;
err = -EINVAL; if (!x->ealg) @@@ -891,10 -904,7 +904,7 @@@ goto error; }
- if (x->xso.offload_handle) - mask |= CRYPTO_ALG_ASYNC; - - aead = crypto_alloc_aead(authenc_name, 0, mask); + aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@@ -981,7 -991,6 +991,7 @@@ static int esp_init_state(struct xfrm_s
switch (encap->encap_type) { default: + err = -EINVAL; goto error; case UDP_ENCAP_ESPINUDP: x->props.header_len += sizeof(struct udphdr); diff --combined net/ipv4/esp4_offload.c index b1338e576d00,c359f3cfeec3..32fbd9ba3609 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@@ -38,8 -38,7 +38,8 @@@ static struct sk_buff **esp4_gro_receiv __be32 spi; int err;
- skb_pull(skb, offset); + if (!pskb_pull(skb, offset)) + return NULL;
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; @@@ -109,75 -108,36 +109,36 @@@ static void esp4_gso_encap(struct xfrm_ static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { - __u32 seq; - int err = 0; - struct sk_buff *skb2; struct xfrm_state *x; struct ip_esp_hdr *esph; struct crypto_aead *aead; - struct sk_buff *segs = ERR_PTR(-EINVAL); netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb);
if (!xo) - goto out; - - seq = xo->seq.low; + return ERR_PTR(-EINVAL);
x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb);
if (esph->spi != x->id.spi) - goto out; + return ERR_PTR(-EINVAL);
if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) - goto out; + return ERR_PTR(-EINVAL);
__skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
skb->encap_hdr_csum = 1;
- if (!(features & NETIF_F_HW_ESP)) + if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || + (x->xso.dev != skb->dev)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
- segs = x->outer_mode->gso_segment(x, skb, esp_features); - if (IS_ERR_OR_NULL(segs)) - goto out; - - __skb_pull(skb, skb->data - skb_mac_header(skb)); - - skb2 = segs; - do { - struct sk_buff *nskb = skb2->next; - - xo = xfrm_offload(skb2); - xo->flags |= XFRM_GSO_SEGMENT; - xo->seq.low = seq; - xo->seq.hi = xfrm_replay_seqhi(x, seq); + xo->flags |= XFRM_GSO_SEGMENT;
- if(!(features & NETIF_F_HW_ESP)) - xo->flags |= CRYPTO_FALLBACK; - - x->outer_mode->xmit(x, skb2); - - err = x->type_offload->xmit(x, skb2, esp_features); - if (err) { - kfree_skb_list(segs); - return ERR_PTR(err); - } - - if (!skb_is_gso(skb2)) - seq++; - else - seq += skb_shinfo(skb2)->gso_segs; - - skb_push(skb2, skb2->mac_len); - skb2 = nskb; - } while (skb2); - - out: - return segs; + return x->outer_mode->gso_segment(x, skb, esp_features); }
static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) @@@ -204,6 -164,7 +165,7 @@@ static int esp_xmit(struct xfrm_state * struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; + __u32 seq;
esp.inplace = true;
@@@ -242,23 -203,30 +204,30 @@@ return esp.nfrags; }
+ seq = xo->seq.low; + esph = esp.esph; esph->spi = x->id.spi;
skb_push(skb, -skb_network_offset(skb));
if (xo->flags & XFRM_GSO_SEGMENT) { - esph->seq_no = htonl(xo->seq.low); - } else { - ip_hdr(skb)->tot_len = htons(skb->len); - ip_send_check(ip_hdr(skb)); + esph->seq_no = htonl(seq); + + if (!skb_is_gso(skb)) + xo->seq.low++; + else + xo->seq.low += skb_shinfo(skb)->gso_segs; }
+ esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); + + ip_hdr(skb)->tot_len = htons(skb->len); + ip_send_check(ip_hdr(skb)); + if (hw_offload) return 0;
- esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); - err = esp_output_tail(x, skb, &esp); if (err) return err; diff --combined net/ipv4/tcp.c index c4a7ee7f6721,d7cf861bf699..c1a295d93b4a --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@@ -283,8 -283,6 +283,6 @@@ #include <asm/ioctls.h> #include <net/busy_poll.h>
- #include <trace/events/tcp.h> - struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@@ -493,18 -491,16 +491,16 @@@ static void tcp_tx_timestamp(struct soc * take care of normal races (between the test and the event) and we don't * go look at any of the socket buffers directly. */ -unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) +__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; + __poll_t mask; struct sock *sk = sock->sk; const struct tcp_sock *tp = tcp_sk(sk); int state;
- sock_rps_record_flow(sk); - sock_poll_wait(file, sk_sleep(sk), wait);
- state = sk_state_load(sk); + state = inet_sk_state_load(sk); if (state == TCP_LISTEN) return inet_csk_listen_poll(sk);
@@@ -1106,12 -1102,15 +1102,15 @@@ static int linear_payload_sz(bool first return 0; }
- static int select_size(const struct sock *sk, bool sg, bool first_skb) + static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache;
if (sg) { + if (zc) + return 0; + if (sk_can_gso(sk)) { tmp = linear_payload_sz(first_skb); } else { @@@ -1188,7 -1187,7 +1187,7 @@@ int tcp_sendmsg_locked(struct sock *sk int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; bool process_backlog = false; - bool sg; + bool sg, zc = false; long timeo;
flags = msg->msg_flags; @@@ -1206,7 -1205,8 +1205,8 @@@ goto out_err; }
- if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG)) + zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG; + if (!zc) uarg->zerocopy = 0; }
@@@ -1283,6 -1283,7 +1283,7 @@@ restart
if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { bool first_skb; + int linear;
new_segment: /* Allocate new segment. If the interface is SG, @@@ -1296,9 -1297,8 +1297,8 @@@ goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); - skb = sk_stream_alloc_skb(sk, - select_size(sk, sg, first_skb), - sk->sk_allocation, + linear = select_size(sk, sg, first_skb, zc); + skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation, first_skb); if (!skb) goto wait_for_memory; @@@ -1327,13 -1327,13 +1327,13 @@@ copy = msg_data_left(msg);
/* Where to copy to? */ - if (skb_availroom(skb) > 0) { + if (skb_availroom(skb) > 0 && !zc) { /* We have some space in skb head. Superb! */ copy = min_t(int, copy, skb_availroom(skb)); err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; - } else if (!uarg || !uarg->zerocopy) { + } else if (!zc) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@@ -1373,8 -1373,10 +1373,10 @@@ pfrag->offset += copy; } else { err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); - if (err == -EMSGSIZE || err == -EEXIST) + if (err == -EMSGSIZE || err == -EEXIST) { + tcp_mark_push(tp, skb); goto new_segment; + } if (err < 0) goto do_error; copy = err; @@@ -1731,8 -1733,8 +1733,8 @@@ static void tcp_update_recv_tstamps(str }
/* Similar to __sock_recv_timestamp, but does not require an skb */ - void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, - struct scm_timestamping *tss) + static void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, + struct scm_timestamping *tss) { struct timeval tv; bool has_timestamping = false; @@@ -2040,8 -2042,6 +2042,6 @@@ void tcp_set_state(struct sock *sk, in { int oldstate = sk->sk_state;
- trace_tcp_set_state(sk, oldstate, state); - switch (state) { case TCP_ESTABLISHED: if (oldstate != TCP_ESTABLISHED) @@@ -2065,7 -2065,7 +2065,7 @@@ /* Change state AFTER socket is unhashed to avoid closed * socket sitting in hash tables. */ - sk_state_store(sk, state); + inet_sk_state_store(sk, state);
#ifdef STATE_TRACE SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]); @@@ -2920,7 -2920,7 +2920,7 @@@ void tcp_get_info(struct sock *sk, stru if (sk->sk_type != SOCK_STREAM) return;
- info->tcpi_state = sk_state_load(sk); + info->tcpi_state = inet_sk_state_load(sk);
/* Report meaningful fields for all TCP states, including listeners */ rate = READ_ONCE(sk->sk_pacing_rate); @@@ -3578,6 -3578,9 +3578,9 @@@ void __init tcp_init(void percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); inet_hashinfo_init(&tcp_hashinfo); + inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash", + thash_entries, 21, /* one slot per 2 MB*/ + 0, 64 * 1024); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, diff --combined net/ipv4/udp.c index ef45adfc0edb,db72619e07e4..6eddd0602813 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@@ -357,18 -357,12 +357,12 @@@ fail } EXPORT_SYMBOL(udp_lib_get_port);
- static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr, - unsigned int port) - { - return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port; - } - int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = - udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); + ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); unsigned int hash2_partial = - udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0); + ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
/* precompute partial secondary hash */ udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@@ -445,7 -439,7 +439,7 @@@ static struct sock *udp4_lib_lookup2(st struct sk_buff *skb) { struct sock *sk, *result; - int score, badness, matches = 0, reuseport = 0; + int score, badness; u32 hash = 0;
result = NULL; @@@ -454,23 -448,16 +448,16 @@@ score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif, exact_dif); if (score > badness) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); if (result) return result; - matches = 1; } badness = score; result = sk; - } else if (score == badness && reuseport) { - matches++; - if (reciprocal_scale(hash, matches) == 0) - result = sk; - hash = next_pseudo_random32(hash); } } return result; @@@ -488,11 -475,11 +475,11 @@@ struct sock *__udp4_lib_lookup(struct n unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask); struct udp_hslot *hslot2, *hslot = &udptable->hash[slot]; bool exact_dif = udp_lib_exact_dif_match(net, skb); - int score, badness, matches = 0, reuseport = 0; + int score, badness; u32 hash = 0;
if (hslot->count > 10) { - hash2 = udp4_portaddr_hash(net, daddr, hnum); + hash2 = ipv4_portaddr_hash(net, daddr, hnum); slot2 = hash2 & udptable->mask; hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) @@@ -503,7 -490,7 +490,7 @@@ exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; - hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); + hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; /* avoid searching the same slot again. */ if (unlikely(slot2 == old_slot2)) @@@ -526,23 -513,16 +513,16 @@@ begin score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif, exact_dif); if (score > badness) { - reuseport = sk->sk_reuseport; - if (reuseport) { + if (sk->sk_reuseport) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); if (result) return result; - matches = 1; } result = sk; badness = score; - } else if (score == badness && reuseport) { - matches++; - if (reciprocal_scale(hash, matches) == 0) - result = sk; - hash = next_pseudo_random32(hash); } } return result; @@@ -1775,7 -1755,7 +1755,7 @@@ EXPORT_SYMBOL(udp_lib_rehash)
static void udp_v4_rehash(struct sock *sk) { - u16 new_hash = udp4_portaddr_hash(sock_net(sk), + u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, inet_sk(sk)->inet_num); udp_lib_rehash(sk, new_hash); @@@ -1966,9 -1946,9 +1946,9 @@@ static int __udp4_lib_mcast_deliver(str struct sk_buff *nskb;
if (use_hash2) { - hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & + hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & udptable->mask; - hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask; + hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask; start_lookup: hslot = &udptable->hash2[hash2]; offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node); @@@ -2200,7 -2180,7 +2180,7 @@@ static struct sock *__udp4_lib_demux_lo int dif, int sdif) { unsigned short hnum = ntohs(loc_port); - unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); + unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum); unsigned int slot2 = hash2 & udp_table.mask; struct udp_hslot *hslot2 = &udp_table.hash2[slot2]; INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr); @@@ -2502,16 -2482,14 +2482,14 @@@ int compat_udp_getsockopt(struct sock * * but then block when reading it. Add special case code * to work around these arguably broken applications. */ -unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask = datagram_poll(file, sock, wait); + __poll_t mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue)) mask |= POLLIN | POLLRDNORM;
- sock_rps_record_flow(sk); - /* Check for false positives due to checksum errors */ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1) diff --combined net/ipv6/esp6.c index 1a7f00cd4803,7c888c6e53a9..97513f35bcc5 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@@ -141,14 -141,32 +141,32 @@@ static void esp_ssg_unref(struct xfrm_s static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; + struct xfrm_offload *xo = xfrm_offload(skb); void *tmp; - struct dst_entry *dst = skb_dst(skb); - struct xfrm_state *x = dst->xfrm; + struct xfrm_state *x; + + if (xo && (xo->flags & XFRM_DEV_RESUME)) + x = skb->sp->xvec[skb->sp->len - 1]; + else + x = skb_dst(skb)->xfrm;
tmp = ESP_SKB_CB(skb)->tmp; esp_ssg_unref(x, tmp); kfree(tmp); - xfrm_output_resume(skb, err); + + if (xo && (xo->flags & XFRM_DEV_RESUME)) { + if (err) { + XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR); + kfree_skb(skb); + return; + } + + skb_push(skb, skb->data - skb_mac_header(skb)); + secpath_reset(skb); + xfrm_dev_resume(skb); + } else { + xfrm_output_resume(skb, err); + } }
/* Move ESP header back into place. */ @@@ -734,17 -752,13 +752,13 @@@ static int esp_init_aead(struct xfrm_st char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - u32 mask = 0;
err = -ENAMETOOLONG; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) goto error;
- if (x->xso.offload_handle) - mask |= CRYPTO_ALG_ASYNC; - - aead = crypto_alloc_aead(aead_name, 0, mask); + aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@@ -774,7 -788,6 +788,6 @@@ static int esp_init_authenc(struct xfrm char authenc_name[CRYPTO_MAX_ALG_NAME]; unsigned int keylen; int err; - u32 mask = 0;
err = -EINVAL; if (!x->ealg) @@@ -800,10 -813,7 +813,7 @@@ goto error; }
- if (x->xso.offload_handle) - mask |= CRYPTO_ALG_ASYNC; - - aead = crypto_alloc_aead(authenc_name, 0, mask); + aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@@ -890,12 -900,13 +900,12 @@@ static int esp6_init_state(struct xfrm_ x->props.header_len += IPV4_BEET_PHMAXLEN + (sizeof(struct ipv6hdr) - sizeof(struct iphdr)); break; + default: case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); break; - default: - goto error; }
align = ALIGN(crypto_aead_blocksize(aead), 4); diff --combined net/ipv6/esp6_offload.c index dd9627490c7c,0bb7d54cf2cb..44d109c435bc --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@@ -60,8 -60,7 +60,8 @@@ static struct sk_buff **esp6_gro_receiv int nhoff; int err;
- skb_pull(skb, offset); + if (!pskb_pull(skb, offset)) + return NULL;
if ((err = xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq)) != 0) goto out; @@@ -136,75 -135,36 +136,36 @@@ static void esp6_gso_encap(struct xfrm_ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { - __u32 seq; - int err = 0; - struct sk_buff *skb2; struct xfrm_state *x; struct ip_esp_hdr *esph; struct crypto_aead *aead; - struct sk_buff *segs = ERR_PTR(-EINVAL); netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb);
if (!xo) - goto out; - - seq = xo->seq.low; + return ERR_PTR(-EINVAL);
x = skb->sp->xvec[skb->sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb);
if (esph->spi != x->id.spi) - goto out; + return ERR_PTR(-EINVAL);
if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) - goto out; + return ERR_PTR(-EINVAL);
__skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
skb->encap_hdr_csum = 1;
- if (!(features & NETIF_F_HW_ESP)) + if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle || + (x->xso.dev != skb->dev)) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
- segs = x->outer_mode->gso_segment(x, skb, esp_features); - if (IS_ERR_OR_NULL(segs)) - goto out; - - __skb_pull(skb, skb->data - skb_mac_header(skb)); - - skb2 = segs; - do { - struct sk_buff *nskb = skb2->next; - - xo = xfrm_offload(skb2); - xo->flags |= XFRM_GSO_SEGMENT; - xo->seq.low = seq; - xo->seq.hi = xfrm_replay_seqhi(x, seq); - - if(!(features & NETIF_F_HW_ESP)) - xo->flags |= CRYPTO_FALLBACK; - - x->outer_mode->xmit(x, skb2); - - err = x->type_offload->xmit(x, skb2, esp_features); - if (err) { - kfree_skb_list(segs); - return ERR_PTR(err); - } - - if (!skb_is_gso(skb2)) - seq++; - else - seq += skb_shinfo(skb2)->gso_segs; - - skb_push(skb2, skb2->mac_len); - skb2 = nskb; - } while (skb2); + xo->flags |= XFRM_GSO_SEGMENT;
- out: - return segs; + return x->outer_mode->gso_segment(x, skb, esp_features); }
static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) @@@ -223,6 -183,7 +184,7 @@@
static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { + int len; int err; int alen; int blksize; @@@ -231,6 -192,7 +193,7 @@@ struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; + __u32 seq;
esp.inplace = true;
@@@ -266,28 -228,33 +229,33 @@@ return esp.nfrags; }
+ seq = xo->seq.low; + esph = ip_esp_hdr(skb); esph->spi = x->id.spi;
skb_push(skb, -skb_network_offset(skb));
if (xo->flags & XFRM_GSO_SEGMENT) { - esph->seq_no = htonl(xo->seq.low); - } else { - int len; - - len = skb->len - sizeof(struct ipv6hdr); - if (len > IPV6_MAXPLEN) - len = 0; + esph->seq_no = htonl(seq);
- ipv6_hdr(skb)->payload_len = htons(len); + if (!skb_is_gso(skb)) + xo->seq.low++; + else + xo->seq.low += skb_shinfo(skb)->gso_segs; }
+ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); + + len = skb->len - sizeof(struct ipv6hdr); + if (len > IPV6_MAXPLEN) + len = 0; + + ipv6_hdr(skb)->payload_len = htons(len); + if (hw_offload) return 0;
- esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); - err = esp6_output_tail(x, skb, &esp); if (err) return err; diff --combined net/ipv6/ip6_fib.c index 9dcc3924a975,e31118f417b4..b7c4befe67ec --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@@ -107,16 -107,13 +107,13 @@@ enum
void fib6_update_sernum(struct rt6_info *rt) { - struct fib6_table *table = rt->rt6i_table; struct net *net = dev_net(rt->dst.dev); struct fib6_node *fn;
- spin_lock_bh(&table->tb6_lock); fn = rcu_dereference_protected(rt->rt6i_node, - lockdep_is_held(&table->tb6_lock)); + lockdep_is_held(&rt->rt6i_table->tb6_lock)); if (fn) fn->fn_sernum = fib6_new_sernum(net); - spin_unlock_bh(&table->tb6_lock); }
/* @@@ -640,11 -637,6 +637,11 @@@ static struct fib6_node *fib6_add_1(str if (!(fn->fn_flags & RTN_RTINFO)) { RCU_INIT_POINTER(fn->leaf, NULL); rt6_release(leaf); + /* remove null_entry in the root node */ + } else if (fn->fn_flags & RTN_TL_ROOT && + rcu_access_pointer(fn->leaf) == + net->ipv6.ip6_null_entry) { + RCU_INIT_POINTER(fn->leaf, NULL); }
return fn; @@@ -804,12 -796,6 +801,6 @@@ insert_above return ln; }
- static bool rt6_qualify_for_ecmp(struct rt6_info *rt) - { - return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; - } - static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc) { int i; @@@ -898,7 -884,7 +889,7 @@@ static int fib6_add_rt2node(struct fib6 ins = &fn->leaf;
for (iter = leaf; iter; - iter = rcu_dereference_protected(iter->dst.rt6_next, + iter = rcu_dereference_protected(iter->rt6_next, lockdep_is_held(&rt->rt6i_table->tb6_lock))) { /* * Search for duplicates @@@ -955,7 -941,7 +946,7 @@@ break;
next_iter: - ins = &iter->dst.rt6_next; + ins = &iter->rt6_next; }
if (fallback_ins && !found) { @@@ -984,7 -970,7 +975,7 @@@ &sibling->rt6i_siblings); break; } - sibling = rcu_dereference_protected(sibling->dst.rt6_next, + sibling = rcu_dereference_protected(sibling->rt6_next, lockdep_is_held(&rt->rt6i_table->tb6_lock)); } /* For each sibling in the list, increment the counter of @@@ -999,6 -985,7 +990,7 @@@ rt6i_nsiblings++; } BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings); + rt6_multipath_rebalance(temp_sibling); }
/* @@@ -1014,7 -1001,7 +1006,7 @@@ add if (err) return err;
- rcu_assign_pointer(rt->dst.rt6_next, iter); + rcu_assign_pointer(rt->rt6_next, iter); atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); rcu_assign_pointer(*ins, rt); @@@ -1045,7 -1032,7 +1037,7 @@@
atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); - rt->dst.rt6_next = iter->dst.rt6_next; + rt->rt6_next = iter->rt6_next; rcu_assign_pointer(*ins, rt); call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, rt, extack); @@@ -1064,14 -1051,14 +1056,14 @@@
if (nsiblings) { /* Replacing an ECMP route, remove all siblings */ - ins = &rt->dst.rt6_next; + ins = &rt->rt6_next; iter = rcu_dereference_protected(*ins, lockdep_is_held(&rt->rt6i_table->tb6_lock)); while (iter) { if (iter->rt6i_metric > rt->rt6i_metric) break; if (rt6_qualify_for_ecmp(iter)) { - *ins = iter->dst.rt6_next; + *ins = iter->rt6_next; iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); if (rcu_access_pointer(fn->rr_ptr) == iter) @@@ -1080,7 -1067,7 +1072,7 @@@ nsiblings--; info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } else { - ins = &iter->dst.rt6_next; + ins = &iter->rt6_next; } iter = rcu_dereference_protected(*ins, lockdep_is_held(&rt->rt6i_table->tb6_lock)); @@@ -1107,8 -1094,8 +1099,8 @@@ void fib6_force_start_gc(struct net *ne jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); }
- static void fib6_update_sernum_upto_root(struct rt6_info *rt, - int sernum) + static void __fib6_update_sernum_upto_root(struct rt6_info *rt, + int sernum) { struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, lockdep_is_held(&rt->rt6i_table->tb6_lock)); @@@ -1122,6 -1109,11 +1114,11 @@@ } }
+ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt) + { + __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); + } + /* * Add routing information to the routing tree. * <destination addr>/<source addr> @@@ -1235,7 -1227,7 +1232,7 @@@ int fib6_add(struct fib6_node *root, st
err = fib6_add_rt2node(fn, rt, info, mxc, extack); if (!err) { - fib6_update_sernum_upto_root(rt, sernum); + __fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); }
@@@ -1275,17 -1267,13 +1272,17 @@@ out return err;
failure: - /* fn->leaf could be NULL if fn is an intermediate node and we - * failed to add the new route to it in both subtree creation - * failure and fib6_add_rt2node() failure case. - * In both cases, fib6_repair_tree() should be called to fix - * fn->leaf. + /* fn->leaf could be NULL and fib6_repair_tree() needs to be called if: + * 1. fn is an intermediate node and we failed to add the new + * route to it in both subtree creation failure and fib6_add_rt2node() + * failure case. + * 2. fn is the root node in the table and we fail to add the first + * default route to it. */ - if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) + if (fn && + (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) || + (fn->fn_flags & RTN_TL_ROOT && + !rcu_access_pointer(fn->leaf)))) fib6_repair_tree(info->nl_net, table, fn); /* Always release dst as dst->__refcnt is guaranteed * to be taken before entering this function @@@ -1540,12 -1528,6 +1537,12 @@@ static struct fib6_node *fib6_repair_tr struct fib6_walker *w; int iter = 0;
+ /* Set fn->leaf to null_entry for root node. */ + if (fn->fn_flags & RTN_TL_ROOT) { + rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry); + return fn; + } + for (;;) { struct fib6_node *fn_r = rcu_dereference_protected(fn->right, lockdep_is_held(&table->tb6_lock)); @@@ -1664,7 -1646,7 +1661,7 @@@ static void fib6_del_route(struct fib6_ WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
/* Unlink it */ - *rtp = rt->dst.rt6_next; + *rtp = rt->rt6_next; rt->rt6i_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; @@@ -1685,6 -1667,7 +1682,7 @@@ sibling->rt6i_nsiblings--; rt->rt6i_nsiblings = 0; list_del_init(&rt->rt6i_siblings); + rt6_multipath_rebalance(next_sibling); }
/* Adjust walkers */ @@@ -1692,7 -1675,7 +1690,7 @@@ FOR_WALKERS(net, w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); - w->leaf = rcu_dereference_protected(rt->dst.rt6_next, + w->leaf = rcu_dereference_protected(rt->rt6_next, lockdep_is_held(&table->tb6_lock)); if (!w->leaf) w->state = FWS_U; @@@ -1700,15 -1683,10 +1698,15 @@@ } read_unlock(&net->ipv6.fib6_walker_lock);
- /* If it was last route, expunge its radix tree node */ + /* If it was last route, call fib6_repair_tree() to: + * 1. For root node, put back null_entry as how the table was created. + * 2. For other nodes, expunge its radix tree node. + */ if (!rcu_access_pointer(fn->leaf)) { - fn->fn_flags &= ~RTN_RTINFO; - net->ipv6.rt6_stats->fib_route_nodes--; + if (!(fn->fn_flags & RTN_TL_ROOT)) { + fn->fn_flags &= ~RTN_RTINFO; + net->ipv6.rt6_stats->fib_route_nodes--; + } fn = fib6_repair_tree(net, table, fn); }
@@@ -1756,7 -1734,7 +1754,7 @@@ int fib6_del(struct rt6_info *rt, struc fib6_del_route(table, fn, rtp, info); return 0; } - rtp_next = &cur->dst.rt6_next; + rtp_next = &cur->rt6_next; } return -ENOENT; } @@@ -1912,7 -1890,7 +1910,7 @@@ static int fib6_clean_node(struct fib6_
for_each_fib6_walker_rt(w) { res = c->func(rt, c->arg); - if (res < 0) { + if (res == -1) { w->leaf = rt; res = fib6_del(rt, &info); if (res) { @@@ -1925,6 -1903,12 +1923,12 @@@ continue; } return 0; + } else if (res == -2) { + if (WARN_ON(!rt->rt6i_nsiblings)) + continue; + rt = list_last_entry(&rt->rt6i_siblings, + struct rt6_info, rt6i_siblings); + continue; } WARN_ON(res != 0); } @@@ -1936,7 -1920,8 +1940,8 @@@ * Convenient frontend to tree walker. * * func is called on each route. - * It may return -1 -> delete this route. + * It may return -2 -> skip multipath route. + * -1 -> delete this route. * 0 -> continue walking */
@@@ -2128,7 -2113,6 +2133,6 @@@ static void fib6_net_exit(struct net *n { unsigned int i;
- rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer);
for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { @@@ -2167,8 -2151,8 +2171,8 @@@ int __init fib6_init(void if (ret) goto out_kmem_cache_create;
- ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - 0); + ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL, + inet6_dump_fib, 0); if (ret) goto out_unregister_subsys;
@@@ -2233,7 -2217,7 +2237,7 @@@ static int ipv6_route_yield(struct fib6
do { iter->w.leaf = rcu_dereference_protected( - iter->w.leaf->dst.rt6_next, + iter->w.leaf->rt6_next, lockdep_is_held(&iter->tbl->tb6_lock)); iter->skip--; if (!iter->skip && iter->w.leaf) @@@ -2299,7 -2283,7 +2303,7 @@@ static void *ipv6_route_seq_next(struc if (!v) goto iter_table;
- n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next); + n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next); if (n) { ++*pos; return n; diff --combined net/ipv6/ip6_output.c index 688ba5f7516b,19adad6d90bc..18547a44bdaf --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@@ -138,6 -138,14 +138,14 @@@ static int ip6_finish_output(struct ne return ret; }
+ #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) + /* Policy lookup after SNAT yielded a new policy */ + if (skb_dst(skb)->xfrm) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(net, sk, skb); + } + #endif + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) @@@ -370,7 -378,7 +378,7 @@@ static inline int ip6_forward_finish(st return dst_output(net, sk, skb); }
- static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) + unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) { unsigned int mtu; struct inet6_dev *idev; @@@ -390,6 -398,7 +398,7 @@@
return mtu; } + EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) { @@@ -1209,13 -1218,13 +1218,13 @@@ static int ip6_setup_cork(struct sock * rt->dst.dev->mtu : dst_mtu(&rt->dst); else mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? - rt->dst.dev->mtu : dst_mtu(rt->dst.path); + rt->dst.dev->mtu : dst_mtu(xfrm_dst_path(&rt->dst)); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } cork->base.fragsize = mtu; - if (dst_allfrag(rt->dst.path)) + if (dst_allfrag(xfrm_dst_path(&rt->dst))) cork->base.flags |= IPCORK_ALLFRAG; cork->base.length = 0;
@@@ -1735,10 -1744,9 +1744,10 @@@ struct sk_buff *ip6_make_skb(struct soc cork.base.opt = NULL; v6_cork.opt = NULL; err = ip6_setup_cork(sk, &cork, &v6_cork, ipc6, rt, fl6); - if (err) + if (err) { + ip6_cork_release(&cork, &v6_cork); return ERR_PTR(err); - + } if (ipc6->dontfrag < 0) ipc6->dontfrag = inet6_sk(sk)->dontfrag;
diff --combined net/netfilter/x_tables.c index e02a21549c99,10c19a3f4cbd..ecbdea0431d4 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@@ -39,6 -39,7 +39,6 @@@ MODULE_LICENSE("GPL") MODULE_AUTHOR("Harald Welte laforge@netfilter.org"); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
-#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) #define XT_PCPU_BLOCK_SIZE 4096
struct compat_delta { @@@ -999,7 -1000,7 +999,7 @@@ struct xt_table_info *xt_alloc_table_in return NULL;
/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages) + if ((size >> PAGE_SHIFT) + 2 > totalram_pages) return NULL;
info = kvmalloc(sz, GFP_KERNEL); @@@ -1026,7 -1027,7 +1026,7 @@@ void xt_free_table_info(struct xt_table } EXPORT_SYMBOL(xt_free_table_info);
- /* Find table by name, grabs mutex & ref. Returns NULL on error. */ + /* Find table by name, grabs mutex & ref. Returns ERR_PTR on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) { @@@ -1042,17 -1043,17 +1042,17 @@@
/* Table doesn't exist in this netns, re-try init */ list_for_each_entry(t, &init_net.xt.tables[af], list) { + int err; + if (strcmp(t->name, name)) continue; - if (!try_module_get(t->me)) { - mutex_unlock(&xt[af].mutex); - return NULL; - } - + if (!try_module_get(t->me)) + goto out; mutex_unlock(&xt[af].mutex); - if (t->table_init(net) != 0) { + err = t->table_init(net); + if (err < 0) { module_put(t->me); - return NULL; + return ERR_PTR(err); }
found = t; @@@ -1072,10 -1073,28 +1072,28 @@@ module_put(found->me); out: mutex_unlock(&xt[af].mutex); - return NULL; + return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(xt_find_table_lock);
+ struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af, + const char *name) + { + struct xt_table *t = xt_find_table_lock(net, af, name); + + #ifdef CONFIG_MODULE + if (IS_ERR(t)) { + int err = request_module("%stable_%s", xt_prefix[af], name); + if (err) + return ERR_PTR(err); + t = xt_find_table_lock(net, af, name); + } + #endif + + return t; + } + EXPORT_SYMBOL_GPL(xt_request_find_table_lock); + void xt_table_unlock(struct xt_table *table) { mutex_unlock(&xt[table->af].mutex); @@@ -1396,7 -1415,7 +1414,7 @@@ static void *xt_mttg_seq_next(struct se trav->curr = trav->curr->next; if (trav->curr != trav->head) break; - /* fallthru, _stop will unlock */ + /* fall through */ default: return NULL; } diff --combined net/packet/af_packet.c index 3b4d6a3cf190,ee7aa0ba3a67..2a80f19f0913 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@@ -247,12 -247,13 +247,13 @@@ static int packet_direct_xmit(struct sk struct sk_buff *orig_skb = skb; struct netdev_queue *txq; int ret = NETDEV_TX_BUSY; + bool again = false;
if (unlikely(!netif_running(dev) || !netif_carrier_ok(dev))) goto drop;
- skb = validate_xmit_skb_list(skb, dev); + skb = validate_xmit_skb_list(skb, dev, &again); if (skb != orig_skb) goto drop;
@@@ -4073,12 -4074,12 +4074,12 @@@ static int packet_ioctl(struct socket * return 0; }
-static unsigned int packet_poll(struct file *file, struct socket *sock, +static __poll_t packet_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct packet_sock *po = pkt_sk(sk); - unsigned int mask = datagram_poll(file, sock, wait); + __poll_t mask = datagram_poll(file, sock, wait);
spin_lock_bh(&sk->sk_receive_queue.lock); if (po->rx_ring.pg_vec) { diff --combined net/sctp/socket.c index 2a16db8c4a1f,a5e2150ab013..112cda6426a3 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@@ -201,6 -201,22 +201,22 @@@ static void sctp_for_each_tx_datachunk( cb(chunk); }
+ static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk, + void (*cb)(struct sk_buff *, struct sock *)) + + { + struct sk_buff *skb, *tmp; + + sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp) + cb(skb, sk); + + sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp) + cb(skb, sk); + + sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp) + cb(skb, sk); + } + /* Verify that this is a valid address. */ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, int len) @@@ -1528,7 -1544,7 +1544,7 @@@ static void sctp_close(struct sock *sk
lock_sock_nested(sk, SINGLE_DEPTH_NESTING); sk->sk_shutdown = SHUTDOWN_MASK; - sk->sk_state = SCTP_SS_CLOSING; + inet_sk_set_state(sk, SCTP_SS_CLOSING);
ep = sctp_sk(sk)->ep;
@@@ -1554,6 -1570,7 +1570,7 @@@
if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) || !skb_queue_empty(&asoc->ulpq.reasm) || + !skb_queue_empty(&asoc->ulpq.reasm_uo) || (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) { struct sctp_chunk *chunk;
@@@ -2002,7 -2019,20 +2019,20 @@@ static int sctp_sendmsg(struct sock *sk if (err < 0) goto out_free;
- wait_connect = true; + /* If stream interleave is enabled, wait_connect has to be + * done earlier than data enqueue, as it needs to make data + * or idata according to asoc->intl_enable which is set + * after connection is done. + */ + if (sctp_sk(asoc->base.sk)->strm_interleave) { + timeo = sock_sndtimeo(sk, 0); + err = sctp_wait_for_connect(asoc, &timeo); + if (err) + goto out_unlock; + } else { + wait_connect = true; + } + pr_debug("%s: we associated primitively\n", __func__); }
@@@ -2277,11 -2307,11 +2307,11 @@@ static int sctp_setsockopt_events(struc
if (asoc && sctp_outq_is_empty(&asoc->outqueue)) { event = sctp_ulpevent_make_sender_dry_event(asoc, - GFP_ATOMIC); + GFP_USER | __GFP_NOWARN); if (!event) return -ENOMEM;
- sctp_ulpq_tail_event(&asoc->ulpq, event); + asoc->stream.si->enqueue_event(&asoc->ulpq, event); } }
@@@ -3180,7 -3210,7 +3210,7 @@@ static int sctp_setsockopt_maxseg(struc if (val == 0) { val = asoc->pathmtu - sp->pf->af->net_header_len; val -= sizeof(struct sctphdr) + - sizeof(struct sctp_data_chunk); + sctp_datachk_len(&asoc->stream); } asoc->user_frag = val; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); @@@ -3350,7 -3380,10 +3380,10 @@@ static int sctp_setsockopt_fragment_int if (get_user(val, (int __user *)optval)) return -EFAULT;
- sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1; + sctp_sk(sk)->frag_interleave = !!val; + + if (!sctp_sk(sk)->frag_interleave) + sctp_sk(sk)->strm_interleave = 0;
return 0; } @@@ -3498,8 -3531,6 +3531,8 @@@ static int sctp_setsockopt_hmac_ident(s
if (optlen < sizeof(struct sctp_hmacalgo)) return -EINVAL; + optlen = min_t(unsigned int, optlen, sizeof(struct sctp_hmacalgo) + + SCTP_AUTH_NUM_HMACS * sizeof(u16));
hmacs = memdup_user(optval, optlen); if (IS_ERR(hmacs)) @@@ -3538,11 -3569,6 +3571,11 @@@ static int sctp_setsockopt_auth_key(str
if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; + /* authkey->sca_keylength is u16, so optlen can't be bigger than + * this. + */ + optlen = min_t(unsigned int, optlen, USHRT_MAX + + sizeof(struct sctp_authkey));
authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) @@@ -3900,9 -3926,6 +3933,9 @@@ static int sctp_setsockopt_reset_stream
if (optlen < sizeof(*params)) return -EINVAL; + /* srs_number_streams is u16, so optlen can't be bigger than this. */ + optlen = min_t(unsigned int, optlen, USHRT_MAX + + sizeof(__u16) * sizeof(*params));
params = memdup_user(optval, optlen); if (IS_ERR(params)) @@@ -4033,6 -4056,40 +4066,40 @@@ out return retval; }
+ static int sctp_setsockopt_interleaving_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) + { + struct sctp_sock *sp = sctp_sk(sk); + struct net *net = sock_net(sk); + struct sctp_assoc_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_id) + goto out; + + if (!net->sctp.intl_enable || !sp->frag_interleave) { + retval = -EPERM; + goto out; + } + + sp->strm_interleave = !!params.assoc_value; + + retval = 0; + + out: + return retval; + } + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@@ -4220,6 -4277,10 +4287,10 @@@ static int sctp_setsockopt(struct sock case SCTP_STREAM_SCHEDULER_VALUE: retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); break; + case SCTP_INTERLEAVING_SUPPORTED: + retval = sctp_setsockopt_interleaving_supported(sk, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@@ -4596,7 -4657,7 +4667,7 @@@ static void sctp_shutdown(struct sock * if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) { struct sctp_association *asoc;
- sk->sk_state = SCTP_SS_CLOSING; + inet_sk_set_state(sk, SCTP_SS_CLOSING); asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); sctp_primitive_SHUTDOWN(net, asoc, NULL); @@@ -4690,20 -4751,11 +4761,11 @@@ int sctp_get_sctp_info(struct sock *sk EXPORT_SYMBOL_GPL(sctp_get_sctp_info);
/* use callback to avoid exporting the core structure */ - int sctp_transport_walk_start(struct rhashtable_iter *iter) + void sctp_transport_walk_start(struct rhashtable_iter *iter) { - int err; - rhltable_walk_enter(&sctp_transport_hashtable, iter);
- err = rhashtable_walk_start(iter); - if (err && err != -EAGAIN) { - rhashtable_walk_stop(iter); - rhashtable_walk_exit(iter); - return err; - } - - return 0; + rhashtable_walk_start(iter); }
void sctp_transport_walk_stop(struct rhashtable_iter *iter) @@@ -4794,12 -4846,10 +4856,10 @@@ int sctp_for_each_transport(int (*cb)(s struct net *net, int *pos, void *p) { struct rhashtable_iter hti; struct sctp_transport *tsp; - int ret; + int ret = 0;
again: - ret = sctp_transport_walk_start(&hti); - if (ret) - return ret; + sctp_transport_walk_start(&hti);
tsp = sctp_transport_get_idx(net, &hti, *pos + 1); for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { @@@ -5025,7 -5075,7 +5085,7 @@@ static int sctp_getsockopt_autoclose(st len = sizeof(int); if (put_user(len, optlen)) return -EFAULT; - if (copy_to_user(optval, &sctp_sk(sk)->autoclose, sizeof(int))) + if (copy_to_user(optval, &sctp_sk(sk)->autoclose, len)) return -EFAULT; return 0; } @@@ -5655,9 -5705,6 +5715,9 @@@ copy_getaddrs err = -EFAULT; goto out; } + /* XXX: We should have accounted for sizeof(struct sctp_getaddrs) too, + * but we can't change it anymore. + */ if (put_user(bytes_copied, optlen)) err = -EFAULT; out: @@@ -6094,7 -6141,7 +6154,7 @@@ static int sctp_getsockopt_maxseg(struc params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); - if (copy_from_user(¶ms, optval, sizeof(params))) + if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else return -EINVAL; @@@ -6264,9 -6311,7 +6324,9 @@@ static int sctp_getsockopt_active_key(s
if (len < sizeof(struct sctp_authkeyid)) return -EINVAL; - if (copy_from_user(&val, optval, sizeof(struct sctp_authkeyid))) + + len = sizeof(struct sctp_authkeyid); + if (copy_from_user(&val, optval, len)) return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id); @@@ -6278,6 -6323,7 +6338,6 @@@ else val.scact_keynumber = ep->active_key_id;
- len = sizeof(struct sctp_authkeyid); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, &val, len)) @@@ -6303,7 -6349,7 +6363,7 @@@ static int sctp_getsockopt_peer_auth_ch if (len < sizeof(struct sctp_authchunks)) return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) + if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT;
to = p->gauth_chunks; @@@ -6348,7 -6394,7 +6408,7 @@@ static int sctp_getsockopt_local_auth_c if (len < sizeof(struct sctp_authchunks)) return -EINVAL;
- if (copy_from_user(&val, optval, sizeof(struct sctp_authchunks))) + if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT;
to = p->gauth_chunks; @@@ -6998,6 -7044,47 +7058,47 @@@ out return retval; }
+ static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) + { + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (asoc) { + params.assoc_value = asoc->intl_enable; + } else if (!params.assoc_id) { + struct sctp_sock *sp = sctp_sk(sk); + + params.assoc_value = sp->strm_interleave; + } else { + retval = -EINVAL; + goto out; + } + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + + out: + return retval; + } + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@@ -7188,6 -7275,10 +7289,10 @@@ retval = sctp_getsockopt_scheduler_value(sk, len, optval, optlen); break; + case SCTP_INTERLEAVING_SUPPORTED: + retval = sctp_getsockopt_interleaving_supported(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@@ -7422,13 -7513,13 +7527,13 @@@ static int sctp_listen_start(struct soc * sockets. * */ - sk->sk_state = SCTP_SS_LISTENING; + inet_sk_set_state(sk, SCTP_SS_LISTENING); if (!ep->base.bind_addr.port) { if (sctp_autobind(sk)) return -EAGAIN; } else { if (sctp_get_port(sk, inet_sk(sk)->inet_num)) { - sk->sk_state = SCTP_SS_CLOSED; + inet_sk_set_state(sk, SCTP_SS_CLOSED); return -EADDRINUSE; } } @@@ -7514,11 -7605,11 +7619,11 @@@ out * here, again, by modeling the current TCP/UDP code. We don't have * a good way to test with it yet. */ -unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct sctp_sock *sp = sctp_sk(sk); - unsigned int mask; + __poll_t mask;
poll_wait(file, sk_sleep(sk), wait);
@@@ -8425,11 -8516,7 +8530,7 @@@ static void sctp_sock_migrate(struct so
}
- sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp) - sctp_skb_set_owner_r_frag(skb, newsk); - - sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp) - sctp_skb_set_owner_r_frag(skb, newsk); + sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag);
/* Set the type of socket to indicate that it is peeled off from the * original UDP-style socket or created with the accept() call on a @@@ -8455,10 -8542,10 +8556,10 @@@ * is called, set RCV_SHUTDOWN flag. */ if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) { - newsk->sk_state = SCTP_SS_CLOSED; + inet_sk_set_state(newsk, SCTP_SS_CLOSED); newsk->sk_shutdown |= RCV_SHUTDOWN; } else { - newsk->sk_state = SCTP_SS_ESTABLISHED; + inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED); }
release_sock(newsk); diff --combined net/smc/af_smc.c index 449f62e1e270,daf8075f5a4c..b6e4e2e4fe12 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@@ -520,7 -520,7 +520,7 @@@ decline_rdma smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { rc = smc_clc_send_decline(smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (rc < 0) goto out_err; } goto out_connected; @@@ -751,14 -751,16 +751,16 @@@ static void smc_listen_work(struct work { struct smc_sock *new_smc = container_of(work, struct smc_sock, smc_listen_work); + struct smc_clc_msg_proposal_prefix *pclc_prfx; struct socket *newclcsock = new_smc->clcsock; struct smc_sock *lsmc = new_smc->listen_smc; struct smc_clc_msg_accept_confirm cclc; int local_contact = SMC_REUSE_CONTACT; struct sock *newsmcsk = &new_smc->sk; - struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_proposal *pclc; struct smc_ib_device *smcibdev; struct sockaddr_in peeraddr; + u8 buf[SMC_CLC_MAX_LEN]; struct smc_link *link; int reason_code = 0; int rc = 0, len; @@@ -775,7 -777,7 +777,7 @@@ /* do inband token exchange - *wait for and receive SMC Proposal CLC message */ - reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc), + reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf), SMC_CLC_PROPOSAL); if (reason_code < 0) goto out_err; @@@ -804,8 -806,11 +806,11 @@@ reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } - if ((pclc.outgoing_subnet != subnet) || - (pclc.prefix_len != prefix_len)) { + + pclc = (struct smc_clc_msg_proposal *)&buf; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (pclc_prfx->outgoing_subnet != subnet || + pclc_prfx->prefix_len != prefix_len) { reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */ goto decline_rdma; } @@@ -816,7 -821,7 +821,7 @@@ /* allocate connection / link group */ mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, - smcibdev, ibport, &pclc.lcl, 0); + smcibdev, ibport, &pclc->lcl, 0); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) @@@ -879,11 -884,9 +884,9 @@@ } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); - if (reason_code < 0) { + if (reason_code < 0) /* peer is not aware of a problem */ - rc = reason_code; goto out_err_unlock; - } if (reason_code > 0) goto decline_rdma_unlock; } @@@ -916,8 -919,7 +919,7 @@@ decline_rdma smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(new_smc, reason_code); - if (rc < sizeof(struct smc_clc_msg_decline)) + if (smc_clc_send_decline(new_smc, reason_code) < 0) goto out_err; } goto out_connected; @@@ -1107,7 -1109,7 +1109,7 @@@ out return rc; }
-static unsigned int smc_accept_poll(struct sock *parent) +static __poll_t smc_accept_poll(struct sock *parent) { struct smc_sock *isk; struct sock *sk; @@@ -1126,11 -1128,11 +1128,11 @@@ return 0; }
-static unsigned int smc_poll(struct file *file, struct socket *sock, +static __poll_t smc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - unsigned int mask = 0; + __poll_t mask = 0; struct smc_sock *smc; int rc;
diff --combined net/smc/smc_clc.c index 511548085d16,abf7ceb6690b..8ac51583a063 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@@ -22,6 -22,54 +22,54 @@@ #include "smc_clc.h" #include "smc_ib.h"
+ /* check if received message has a correct header length and contains valid + * heading and trailing eyecatchers + */ + static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm) + { + struct smc_clc_msg_proposal_prefix *pclc_prfx; + struct smc_clc_msg_accept_confirm *clc; + struct smc_clc_msg_proposal *pclc; + struct smc_clc_msg_decline *dclc; + struct smc_clc_msg_trail *trl; + + if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + switch (clcm->type) { + case SMC_CLC_PROPOSAL: + pclc = (struct smc_clc_msg_proposal *)clcm; + pclc_prfx = smc_clc_proposal_get_prefix(pclc); + if (ntohs(pclc->hdr.length) != + sizeof(*pclc) + ntohs(pclc->iparea_offset) + + sizeof(*pclc_prfx) + + pclc_prfx->ipv6_prefixes_cnt * + sizeof(struct smc_clc_ipv6_prefix) + + sizeof(*trl)) + return false; + trl = (struct smc_clc_msg_trail *) + ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); + break; + case SMC_CLC_ACCEPT: + case SMC_CLC_CONFIRM: + clc = (struct smc_clc_msg_accept_confirm *)clcm; + if (ntohs(clc->hdr.length) != sizeof(*clc)) + return false; + trl = &clc->trl; + break; + case SMC_CLC_DECLINE: + dclc = (struct smc_clc_msg_decline *)clcm; + if (ntohs(dclc->hdr.length) != sizeof(*dclc)) + return false; + trl = &dclc->trl; + break; + default: + return false; + } + if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER))) + return false; + return true; + } + /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. @@@ -35,7 -83,7 +83,7 @@@ int smc_clc_wait_msg(struct smc_sock *s struct smc_clc_msg_hdr *clcm = buf; struct msghdr msg = {NULL, 0}; int reason_code = 0; - struct kvec vec; + struct kvec vec = {buf, buflen}; int len, datlen; int krflags;
@@@ -43,15 -91,12 +91,15 @@@ * so we don't consume any subsequent CLC message or payload data * in the TCP byte stream */ - vec.iov_base = buf; - vec.iov_len = buflen; + /* + * Caller must make sure that buflen is no less than + * sizeof(struct smc_clc_msg_hdr) + */ krflags = MSG_PEEK | MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; - len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, - sizeof(struct smc_clc_msg_hdr), krflags); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, + sizeof(struct smc_clc_msg_hdr)); + len = sock_recvmsg(smc->clcsock, &msg, krflags); if (signal_pending(current)) { reason_code = -EINTR; clc_sk->sk_err = EINTR; @@@ -75,9 -120,7 +123,7 @@@ } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || - (datlen < sizeof(struct smc_clc_msg_decline)) || - (datlen > sizeof(struct smc_clc_msg_accept_confirm)) || - memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) || + (datlen > buflen) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; @@@ -86,12 -129,13 +132,12 @@@ }
/* receive the complete CLC message */ - vec.iov_base = buf; - vec.iov_len = buflen; memset(&msg, 0, sizeof(struct msghdr)); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen); krflags = MSG_WAITALL; smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; - len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags); + len = sock_recvmsg(smc->clcsock, &msg, krflags); - if (len < datlen) { + if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; @@@ -135,7 -179,7 +181,7 @@@ int smc_clc_send_decline(struct smc_soc smc->sk.sk_err = EPROTO; if (len < 0) smc->sk.sk_err = -len; - return len; + return sock_error(&smc->sk); }
/* send CLC PROPOSAL message across internal TCP socket */ @@@ -143,33 -187,43 +189,43 @@@ int smc_clc_send_proposal(struct smc_so struct smc_ib_device *smcibdev, u8 ibport) { + struct smc_clc_msg_proposal_prefix pclc_prfx; struct smc_clc_msg_proposal pclc; + struct smc_clc_msg_trail trl; int reason_code = 0; + struct kvec vec[3]; struct msghdr msg; - struct kvec vec; - int len, rc; + int len, plen, rc;
/* send SMC Proposal CLC message */ + plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl); memset(&pclc, 0, sizeof(pclc)); memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc.hdr.type = SMC_CLC_PROPOSAL; - pclc.hdr.length = htons(sizeof(pclc)); + pclc.hdr.length = htons(plen); pclc.hdr.version = SMC_CLC_V1; /* SMC version */ memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE); memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN); + pclc.iparea_offset = htons(0);
+ memset(&pclc_prfx, 0, sizeof(pclc_prfx)); /* determine subnet and mask from internal TCP socket */ - rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet, - &pclc.prefix_len); + rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet, + &pclc_prfx.prefix_len); if (rc) return SMC_CLC_DECL_CNFERR; /* configuration error */ - memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); + pclc_prfx.ipv6_prefixes_cnt = 0; + memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); memset(&msg, 0, sizeof(msg)); - vec.iov_base = &pclc; - vec.iov_len = sizeof(pclc); + vec[0].iov_base = &pclc; + vec[0].iov_len = sizeof(pclc); + vec[1].iov_base = &pclc_prfx; + vec[1].iov_len = sizeof(pclc_prfx); + vec[2].iov_base = &trl; + vec[2].iov_len = sizeof(trl); /* due to the few bytes needed for clc-handshake this cannot block */ - len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc)); + len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen); if (len < sizeof(pclc)) { if (len >= 0) { reason_code = -ENETUNREACH; diff --combined net/socket.c index ad09aa97154b,bbd2e9ceb692..60d05479b2c1 --- a/net/socket.c +++ b/net/socket.c @@@ -118,7 -118,7 +118,7 @@@ static ssize_t sock_write_iter(struct k static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file); -static unsigned int sock_poll(struct file *file, +static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait); static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT @@@ -163,12 -163,6 +163,6 @@@ static DEFINE_SPINLOCK(net_family_lock) static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
/* - * Statistics counters of the socket lists - */ - - static DEFINE_PER_CPU(int, sockets_in_use); - - /* * Support routines. * Move socket addresses back and forth across the kernel/user * divide and look after the messy bits. @@@ -578,7 -572,6 +572,6 @@@ struct socket *sock_alloc(void inode->i_gid = current_fsgid(); inode->i_op = &sockfs_inode_ops;
- this_cpu_add(sockets_in_use, 1); return sock; } EXPORT_SYMBOL(sock_alloc); @@@ -605,7 -598,6 +598,6 @@@ void sock_release(struct socket *sock if (rcu_dereference_protected(sock->wq, 1)->fasync_list) pr_err("%s: fasync list not empty!\n", __func__);
- this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); return; @@@ -1095,9 -1087,9 +1087,9 @@@ out_release EXPORT_SYMBOL(sock_create_lite);
/* No kernel lock held - perfect */ -static unsigned int sock_poll(struct file *file, poll_table *wait) +static __poll_t sock_poll(struct file *file, poll_table *wait) { - unsigned int busy_flag = 0; + __poll_t busy_flag = 0; struct socket *sock;
/* @@@ -2619,29 -2611,11 +2611,11 @@@ out_fs
core_initcall(sock_init); /* early initcall */
- static int __init jit_init(void) - { - #ifdef CONFIG_BPF_JIT_ALWAYS_ON - bpf_jit_enable = 1; - #endif - return 0; - } - pure_initcall(jit_init); - #ifdef CONFIG_PROC_FS void socket_seq_show(struct seq_file *seq) { - int cpu; - int counter = 0; - - for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); - - /* It can be negative, by the way. 8) */ - if (counter < 0) - counter = 0; - - seq_printf(seq, "sockets: used %d\n", counter); + seq_printf(seq, "sockets: used %d\n", + sock_inuse_get(seq->private)); } #endif /* CONFIG_PROC_FS */
diff --combined net/tipc/server.c index 78a292a84afc,8ee5e86b7870..125da165e523 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@@ -264,8 -264,8 +264,8 @@@ static int tipc_receive_from_sock(struc iov.iov_base = buf; iov.iov_len = s->max_rcvbuf_size; msg.msg_name = &addr; - ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, - MSG_DONTWAIT); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); + ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret <= 0) { kmem_cache_free(s->rcvbuf_cache, buf); goto out_close; @@@ -489,8 -489,8 +489,8 @@@ void tipc_conn_terminate(struct tipc_se } }
- bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, - u32 lower, u32 upper, int *conid) + bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) { struct tipc_subscriber *scbr; struct tipc_subscr sub; @@@ -501,7 -501,7 +501,7 @@@ sub.seq.lower = lower; sub.seq.upper = upper; sub.timeout = TIPC_WAIT_FOREVER; - sub.filter = TIPC_SUB_PORTS; + sub.filter = filter; *(u32 *)&sub.usr_handle = port;
con = tipc_alloc_conn(tipc_topsrv(net)); diff --combined net/tipc/socket.c index 2aa46e8cd8fe,1f236271766c..cbabc44cd19a --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@@ -710,13 -710,13 +710,13 @@@ static int tipc_getname(struct socket * * imply that the operation will succeed, merely that it should be performed * and will not block. */ -static unsigned int tipc_poll(struct file *file, struct socket *sock, +static __poll_t tipc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp = tsk->group; + struct tipc_group *grp; - u32 revents = 0; + __poll_t revents = 0;
sock_poll_wait(file, sk_sleep(sk), wait);
@@@ -736,9 -736,9 +736,9 @@@ revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: - if (!grp || tipc_group_size(grp)) - if (!tsk->cong_link_cnt) - revents |= POLLOUT; + grp = tsk->group; + if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt) + revents |= POLLOUT; if (!tipc_sk_type_connectionless(sk)) break; if (skb_queue_empty(&sk->sk_receive_queue)) @@@ -928,21 -928,22 +928,22 @@@ static int tipc_send_group_anycast(stru struct list_head *cong_links = &tsk->cong_links; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct tipc_member *first = NULL; struct tipc_member *mbr = NULL; struct net *net = sock_net(sk); u32 node, port, exclude; - u32 type, inst, domain; struct list_head dsts; + u32 type, inst, scope; int lookups = 0; int dstcnt, rc; bool cong;
INIT_LIST_HEAD(&dsts);
- type = dest->addr.name.name.type; + type = msg_nametype(hdr); inst = dest->addr.name.name.instance; - domain = addr_domain(net, dest->scope); + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp);
while (++lookups < 4) { @@@ -950,7 -951,7 +951,7 @@@
/* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts, + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, &dstcnt, exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); @@@ -1079,22 -1080,23 +1080,23 @@@ static int tipc_send_group_mcast(struc { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - struct tipc_name_seq *seq = &dest->addr.nameseq; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 domain, exclude, dstcnt; + u32 type, inst, scope, exclude; struct list_head dsts; + u32 dstcnt;
INIT_LIST_HEAD(&dsts);
- if (seq->lower != seq->upper) - return -ENOTSUPP; - - domain = addr_domain(net, dest->scope); + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); - if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain, - &dsts, &dstcnt, exclude, true)) + + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, true)) return -EHOSTUNREACH;
if (dstcnt == 1) { @@@ -1116,24 -1118,29 +1118,29 @@@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { - u32 scope = TIPC_CLUSTER_SCOPE; u32 self = tipc_own_addr(net); + u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; - u32 lower = 0, upper = ~0; - struct sk_buff_head tmpq; u32 portid, oport, onode; + struct sk_buff_head tmpq; struct list_head dports; - struct tipc_msg *msg; - int user, mtyp, hsz; + struct tipc_msg *hdr; + int user, mtyp, hlen; + bool exact;
__skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports);
skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { - msg = buf_msg(skb); - user = msg_user(msg); - mtyp = msg_type(msg); + hdr = buf_msg(skb); + user = msg_user(hdr); + mtyp = msg_type(hdr); + hlen = skb_headroom(skb) + msg_hdr_sz(hdr); + oport = msg_origport(hdr); + onode = msg_orignode(hdr); + type = msg_nametype(hdr); + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { @@@ -1144,21 -1151,31 +1151,31 @@@ spin_unlock_bh(&inputq->lock); continue; } - hsz = skb_headroom(skb) + msg_hdr_sz(msg); - oport = msg_origport(msg); - onode = msg_orignode(msg); - if (onode == self) - scope = TIPC_NODE_SCOPE; - - /* Create destination port list and message clones: */ - if (!msg_in_group(msg)) { - lower = msg_namelower(msg); - upper = msg_nameupper(msg); + + /* Group messages require exact scope match */ + if (msg_in_group(hdr)) { + lower = 0; + upper = ~0; + scope = msg_lookup_scope(hdr); + exact = true; + } else { + /* TIPC_NODE_SCOPE means "any scope" in this context */ + if (onode == self) + scope = TIPC_NODE_SCOPE; + else + scope = TIPC_CLUSTER_SCOPE; + exact = false; + lower = msg_namelower(hdr); + upper = msg_nameupper(hdr); } - tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper, - scope, &dports); + + /* Create destination port list: */ + tipc_nametbl_mc_lookup(net, type, lower, upper, + scope, exact, &dports); + + /* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { - _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); __skb_queue_tail(&tmpq, _skb); @@@ -1933,8 -1950,7 +1950,7 @@@ static void tipc_sk_proto_rcv(struct so break; case TOP_SRV: tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, - skb, inputq, xmitq); - skb = NULL; + hdr, inputq, xmitq); break; default: break; @@@ -2640,9 -2656,7 +2656,7 @@@ void tipc_sk_reinit(struct net *net rhashtable_walk_enter(&tn->sk_rht, &iter);
do { - tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(tsk)) - goto walk_stop; + rhashtable_walk_start(&iter);
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@@ -2651,7 -2665,7 +2665,7 @@@ msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } - walk_stop: + rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } @@@ -2734,7 -2748,6 +2748,6 @@@ void tipc_sk_rht_destroy(struct net *ne static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) { struct net *net = sock_net(&tsk->sk); - u32 domain = addr_domain(net, mreq->scope); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq seq; @@@ -2742,6 -2755,8 +2755,8 @@@
if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; + if (mreq->scope > TIPC_NODE_SCOPE) + return -EINVAL; if (grp) return -EACCES; grp = tipc_group_create(net, tsk->portid, mreq); @@@ -2754,16 -2769,16 +2769,16 @@@ seq.type = mreq->type; seq.lower = mreq->instance; seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, domain); + tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); rc = tipc_sk_publish(tsk, mreq->scope, &seq); if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; } - - /* Eliminate any risk that a broadcast overtakes the sent JOIN */ + /* Eliminate any risk that a broadcast overtakes sent JOINs */ tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; + tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); return rc; }
diff --combined net/xfrm/xfrm_input.c index 5b2409746ae0,26b10eb7a206..1472c0857975 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@@ -257,7 -257,6 +257,6 @@@ int xfrm_input(struct sk_buff *skb, in
if (xo && (xo->flags & CRYPTO_DONE)) { crypto_done = true; - x = xfrm_input_state(skb); family = XFRM_SPI_SKB_CB(skb)->family;
if (!(xo->status & CRYPTO_SUCCESS)) { @@@ -518,7 -517,7 +517,7 @@@ int xfrm_trans_queue(struct sk_buff *sk return -ENOBUFS;
XFRM_TRANS_SKB_CB(skb)->finish = finish; - skb_queue_tail(&trans->queue, skb); + __skb_queue_tail(&trans->queue, skb); tasklet_schedule(&trans->tasklet); return 0; } diff --combined net/xfrm/xfrm_policy.c index bd6b0e7a0ee4,d8a8129b9232..7a23078132cf --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@@ -54,7 -54,7 +54,7 @@@ static struct xfrm_policy_afinfo const static struct kmem_cache *xfrm_dst_cache __read_mostly; static __read_mostly seqcount_t xfrm_policy_hash_generation;
- static void xfrm_init_pmtu(struct dst_entry *dst); + static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); static void xfrm_policy_queue_process(struct timer_list *t); @@@ -609,8 -609,7 +609,8 @@@ static void xfrm_hash_rebuild(struct wo
/* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { - if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { + if (policy->walk.dead || + xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) { /* skip socket policies */ continue; } @@@ -975,6 -974,8 +975,6 @@@ int xfrm_policy_flush(struct net *net, } if (!cnt) err = -ESRCH; - else - xfrm_policy_cache_flush(); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; @@@ -1256,7 -1257,7 +1256,7 @@@ EXPORT_SYMBOL(xfrm_policy_delete)
int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { - struct net *net = xp_net(pol); + struct net *net = sock_net(sk); struct xfrm_policy *old_pol;
#ifdef CONFIG_XFRM_SUB_POLICY @@@ -1543,7 -1544,9 +1543,9 @@@ static inline int xfrm_fill_dst(struct */
static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, - struct xfrm_state **xfrm, int nx, + struct xfrm_state **xfrm, + struct xfrm_dst **bundle, + int nx, const struct flowi *fl, struct dst_entry *dst) { @@@ -1551,8 -1554,8 +1553,8 @@@ unsigned long now = jiffies; struct net_device *dev; struct xfrm_mode *inner_mode; - struct dst_entry *dst_prev = NULL; - struct dst_entry *dst0 = NULL; + struct xfrm_dst *xdst_prev = NULL; + struct xfrm_dst *xdst0 = NULL; int i = 0; int err; int header_len = 0; @@@ -1578,13 -1581,14 +1580,14 @@@ goto put_states; }
- if (!dst_prev) - dst0 = dst1; + bundle[i] = xdst; + if (!xdst_prev) + xdst0 = xdst; else /* Ref count is taken during xfrm_alloc_dst() * No need to do dst_clone() on dst1 */ - dst_prev->child = dst1; + xfrm_dst_set_child(xdst_prev, &xdst->u.dst);
if (xfrm[i]->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(xfrm[i], @@@ -1621,8 -1625,7 +1624,7 @@@ dst1->input = dst_discard; dst1->output = inner_mode->afinfo->output;
- dst1->next = dst_prev; - dst_prev = dst1; + xdst_prev = xdst;
header_len += xfrm[i]->props.header_len; if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) @@@ -1630,40 -1633,39 +1632,39 @@@ trailer_len += xfrm[i]->props.trailer_len; }
- dst_prev->child = dst; - dst0->path = dst; + xfrm_dst_set_child(xdst_prev, dst); + xdst0->path = dst;
err = -ENODEV; dev = dst->dev; if (!dev) goto free_dst;
- xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len); - xfrm_init_pmtu(dst_prev); - - for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) { - struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev; + xfrm_init_path(xdst0, dst, nfheader_len); + xfrm_init_pmtu(bundle, nx);
- err = xfrm_fill_dst(xdst, dev, fl); + for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst; + xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) { + err = xfrm_fill_dst(xdst_prev, dev, fl); if (err) goto free_dst;
- dst_prev->header_len = header_len; - dst_prev->trailer_len = trailer_len; - header_len -= xdst->u.dst.xfrm->props.header_len; - trailer_len -= xdst->u.dst.xfrm->props.trailer_len; + xdst_prev->u.dst.header_len = header_len; + xdst_prev->u.dst.trailer_len = trailer_len; + header_len -= xdst_prev->u.dst.xfrm->props.header_len; + trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len; }
out: - return dst0; + return &xdst0->u.dst;
put_states: for (; i < nx; i++) xfrm_state_put(xfrm[i]); free_dst: - if (dst0) - dst_release_immediate(dst0); - dst0 = ERR_PTR(err); + if (xdst0) + dst_release_immediate(&xdst0->u.dst); + xdst0 = ERR_PTR(err); goto out; }
@@@ -1742,8 -1744,6 +1743,8 @@@ void xfrm_policy_cache_flush(void bool found = 0; int cpu;
+ might_sleep(); + local_bh_disable(); rcu_read_lock(); for_each_possible_cpu(cpu) { @@@ -1807,7 -1807,7 +1808,7 @@@ static bool xfrm_xdst_can_reuse(struct for (i = 0; i < num; i++) { if (!dst || dst->xfrm != xfrm[i]) return false; - dst = dst->child; + dst = xfrm_dst_child(dst); }
return xfrm_bundle_ok(xdst); @@@ -1820,6 -1820,7 +1821,7 @@@ xfrm_resolve_and_create_bundle(struct x { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; struct xfrm_dst *xdst, *old; struct dst_entry *dst; int err; @@@ -1848,7 -1849,7 +1850,7 @@@
old = xdst;
- dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); + dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig); if (IS_ERR(dst)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); return ERR_CAST(dst); @@@ -1888,8 -1889,8 +1890,8 @@@ static void xfrm_policy_queue_process(s xfrm_decode_session(skb, &fl, dst->ops->family); spin_unlock(&pq->hold_queue.lock);
- dst_hold(dst->path); - dst = xfrm_lookup(net, dst->path, &fl, sk, 0); + dst_hold(xfrm_dst_path(dst)); + dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0); if (IS_ERR(dst)) goto purge_queue;
@@@ -1918,8 -1919,8 +1920,8 @@@ skb = __skb_dequeue(&list);
xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); - dst_hold(skb_dst(skb)->path); - dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0); + dst_hold(xfrm_dst_path(skb_dst(skb))); + dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0); if (IS_ERR(dst)) { kfree_skb(skb); continue; @@@ -2020,8 -2021,8 +2022,8 @@@ static struct xfrm_dst *xfrm_create_dum dst1->output = xdst_queue_output;
dst_hold(dst); - dst1->child = dst; - dst1->path = dst; + xfrm_dst_set_child(xdst, dst); + xdst->path = dst;
xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
@@@ -2063,11 -2064,8 +2065,11 @@@ xfrm_bundle_lookup(struct net *net, con if (num_xfrms <= 0) goto make_dummy_bundle;
+ local_bh_disable(); xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, - xflo->dst_orig); + xflo->dst_orig); + local_bh_enable(); + if (IS_ERR(xdst)) { err = PTR_ERR(xdst); if (err != -EAGAIN) @@@ -2154,12 -2152,9 +2156,12 @@@ struct dst_entry *xfrm_lookup(struct ne goto no_transform; }
+ local_bh_disable(); xdst = xfrm_resolve_and_create_bundle( pols, num_pols, fl, family, dst_orig); + local_bh_enable(); + if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); err = PTR_ERR(xdst); @@@ -2590,7 -2585,7 +2592,7 @@@ static int stale_bundle(struct dst_entr
void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { - while ((dst = dst->child) && dst->xfrm && dst->dev == dev) { + while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) { dst->dev = dev_net(dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); @@@ -2614,13 -2609,15 +2616,15 @@@ static struct dst_entry *xfrm_negative_ return dst; }
- static void xfrm_init_pmtu(struct dst_entry *dst) + static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) { - do { - struct xfrm_dst *xdst = (struct xfrm_dst *)dst; + while (nr--) { + struct xfrm_dst *xdst = bundle[nr]; u32 pmtu, route_mtu_cached; + struct dst_entry *dst;
- pmtu = dst_mtu(dst->child); + dst = &xdst->u.dst; + pmtu = dst_mtu(xfrm_dst_child(dst)); xdst->child_mtu_cached = pmtu;
pmtu = xfrm_state_mtu(dst->xfrm, pmtu); @@@ -2632,7 -2629,7 +2636,7 @@@ pmtu = route_mtu_cached;
dst_metric_set(dst, RTAX_MTU, pmtu); - } while ((dst = dst->next)); + } }
/* Check that the bundle accepts the flow and its components are @@@ -2641,19 -2638,20 +2645,20 @@@
static int xfrm_bundle_ok(struct xfrm_dst *first) { + struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; struct dst_entry *dst = &first->u.dst; - struct xfrm_dst *last; + struct xfrm_dst *xdst; + int start_from, nr; u32 mtu;
- if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || + if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0;
if (dst->flags & DST_XFRM_QUEUE) return 1;
- last = NULL; - + start_from = nr = 0; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
@@@ -2665,9 -2663,11 +2670,11 @@@ xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0;
- mtu = dst_mtu(dst->child); + bundle[nr++] = xdst; + + mtu = dst_mtu(xfrm_dst_child(dst)); if (xdst->child_mtu_cached != mtu) { - last = xdst; + start_from = nr; xdst->child_mtu_cached = mtu; }
@@@ -2675,30 -2675,30 +2682,30 @@@ return 0; mtu = dst_mtu(xdst->route); if (xdst->route_mtu_cached != mtu) { - last = xdst; + start_from = nr; xdst->route_mtu_cached = mtu; }
- dst = dst->child; + dst = xfrm_dst_child(dst); } while (dst->xfrm);
- if (likely(!last)) + if (likely(!start_from)) return 1;
- mtu = last->child_mtu_cached; - for (;;) { - dst = &last->u.dst; + xdst = bundle[start_from - 1]; + mtu = xdst->child_mtu_cached; + while (start_from--) { + dst = &xdst->u.dst;
mtu = xfrm_state_mtu(dst->xfrm, mtu); - if (mtu > last->route_mtu_cached) - mtu = last->route_mtu_cached; + if (mtu > xdst->route_mtu_cached) + mtu = xdst->route_mtu_cached; dst_metric_set(dst, RTAX_MTU, mtu); - - if (last == first) + if (!start_from) break;
- last = (struct xfrm_dst *)last->u.dst.next; - last->child_mtu_cached = mtu; + xdst = bundle[start_from - 1]; + xdst->child_mtu_cached = mtu; }
return 1; @@@ -2706,22 -2706,20 +2713,20 @@@
static unsigned int xfrm_default_advmss(const struct dst_entry *dst) { - return dst_metric_advmss(dst->path); + return dst_metric_advmss(xfrm_dst_path(dst)); }
static unsigned int xfrm_mtu(const struct dst_entry *dst) { unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
- return mtu ? : dst_mtu(dst->path); + return mtu ? : dst_mtu(xfrm_dst_path(dst)); }
static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, const void *daddr) { - const struct dst_entry *path = dst->path; - - for (; dst != path; dst = dst->child) { + while (dst->xfrm) { const struct xfrm_state *xfrm = dst->xfrm;
if (xfrm->props.mode == XFRM_MODE_TRANSPORT) @@@ -2730,6 -2728,8 +2735,8 @@@ daddr = xfrm->coaddr; else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) daddr = &xfrm->id.daddr; + + dst = xfrm_dst_child(dst); } return daddr; } @@@ -2738,7 -2738,7 +2745,7 @@@ static struct neighbour *xfrm_neigh_loo struct sk_buff *skb, const void *daddr) { - const struct dst_entry *path = dst->path; + const struct dst_entry *path = xfrm_dst_path(dst);
if (!skb) daddr = xfrm_get_dst_nexthop(dst, daddr); @@@ -2747,7 -2747,7 +2754,7 @@@
static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) { - const struct dst_entry *path = dst->path; + const struct dst_entry *path = xfrm_dst_path(dst);
daddr = xfrm_get_dst_nexthop(dst, daddr); path->ops->confirm_neigh(path, daddr); diff --combined net/xfrm/xfrm_state.c index 429957412633,cc4c519cad76..20b1e414dbee --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@@ -313,14 -313,13 +313,14 @@@ retry if ((type && !try_module_get(type->owner))) type = NULL;
+ rcu_read_unlock(); + if (!type && try_load) { request_module("xfrm-offload-%d-%d", family, proto); try_load = 0; goto retry; }
- rcu_read_unlock(); return type; }
@@@ -1535,12 -1534,8 +1535,12 @@@ out err = -EINVAL; spin_lock_bh(&x1->lock); if (likely(x1->km.state == XFRM_STATE_VALID)) { - if (x->encap && x1->encap) + if (x->encap && x1->encap && + x->encap->encap_type == x1->encap->encap_type) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + else if (x->encap || x1->encap) + goto fail; + if (x->coaddr && x1->coaddr) { memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); } @@@ -1557,8 -1552,6 +1557,8 @@@ x->km.state = XFRM_STATE_DEAD; __xfrm_state_put(x); } + +fail: spin_unlock_bh(&x1->lock);
xfrm_state_put(x1); @@@ -2056,6 -2049,13 +2056,13 @@@ int xfrm_user_policy(struct sock *sk, i struct xfrm_mgr *km; struct xfrm_policy *pol = NULL;
+ if (!optval && !optlen) { + xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL); + xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL); + __sk_dst_reset(sk); + return 0; + } + if (optlen <= 0 || optlen > PAGE_SIZE) return -EMSGSIZE;
diff --combined tools/testing/selftests/bpf/test_align.c index 471bbbdb94db,fe916d29e166..e19b410125eb --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@@ -64,11 -64,11 +64,11 @@@ static struct bpf_align_test tests[] = .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv2"}, - {2, "R3=inv4"}, - {3, "R3=inv8"}, - {4, "R3=inv16"}, - {5, "R3=inv32"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, + {4, "R3_w=inv16"}, + {5, "R3_w=inv32"}, }, }, { @@@ -92,17 -92,17 +92,17 @@@ .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv1"}, - {2, "R3=inv2"}, - {3, "R3=inv4"}, - {4, "R3=inv8"}, - {5, "R3=inv16"}, - {6, "R3=inv1"}, - {7, "R4=inv32"}, - {8, "R4=inv16"}, - {9, "R4=inv8"}, - {10, "R4=inv4"}, - {11, "R4=inv2"}, + {1, "R3_w=inv1"}, + {2, "R3_w=inv2"}, + {3, "R3_w=inv4"}, + {4, "R3_w=inv8"}, + {5, "R3_w=inv16"}, + {6, "R3_w=inv1"}, + {7, "R4_w=inv32"}, + {8, "R4_w=inv16"}, + {9, "R4_w=inv8"}, + {10, "R4_w=inv4"}, + {11, "R4_w=inv2"}, }, }, { @@@ -121,12 -121,12 +121,12 @@@ .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv4"}, - {2, "R3=inv8"}, - {3, "R3=inv10"}, - {4, "R4=inv8"}, - {5, "R4=inv12"}, - {6, "R4=inv14"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv10"}, + {4, "R4_w=inv8"}, + {5, "R4_w=inv12"}, + {6, "R4_w=inv14"}, }, }, { @@@ -143,10 -143,10 +143,10 @@@ .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv7"}, - {2, "R3=inv7"}, - {3, "R3=inv14"}, - {4, "R3=inv56"}, + {1, "R3_w=inv7"}, + {2, "R3_w=inv7"}, + {3, "R3_w=inv14"}, + {4, "R3_w=inv56"}, }, },
@@@ -185,18 -185,18 +185,18 @@@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R0=pkt(id=0,off=8,r=8,imm=0)"}, - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, {18, "R3=pkt_end(id=0,off=0,imm=0)"}, - {18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, - {20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@@ -217,16 -217,16 +217,16 @@@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@@ -257,14 -257,14 +257,14 @@@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - {5, "R5=pkt(id=0,off=14,r=0,imm=0)"}, - {6, "R4=pkt(id=0,off=14,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, - {15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@@ -320,11 -320,11 +320,11 @@@ * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable @@@ -336,11 -336,11 +336,11 @@@ /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - {18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@@ -352,18 -352,18 +352,18 @@@ /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5=pkt(id=0,off=14,r=8"}, + {26, "R5_w=pkt(id=0,off=14,r=8"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). */ - {27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@@ -410,11 -410,11 +410,11 @@@ * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ - {11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@@ -426,11 -426,11 +426,11 @@@ /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@@ -473,8 -473,28 +473,8 @@@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, - /* ptr & 0x40 == either 0 or 0x40 */ - {5, "R5_w=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"}, - /* ptr << 2 == unknown, (4n) */ - {7, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, - /* (4n) + 14 == (4n+2). We blow our bounds, because - * the add could overflow. - */ - {8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"}, - /* Checked s>=0 */ - {10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, - /* packet pointer + nonnegative (4n+2) */ - {12, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, - {14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, - /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. - * We checked the bounds, but it might have been able - * to overflow if the packet pointer started in the - * upper half of the address space. - * So we did not get a 'range' on R6, and the access - * attempt will fail. - */ - {16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + /* R5 bitwise operator &= on pointer prohibited */ } }, { @@@ -510,11 -530,11 +510,11 @@@ * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* New unknown value in R7 is (4n) */ - {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, /* Checked s>= 0 */ @@@ -563,15 -583,15 +563,15 @@@ * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, /* Adding 14 makes R6 be (4n+2) */ - {11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, /* New unknown value in R7 is (4n), >= 76 */ - {15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so diff --combined tools/testing/selftests/bpf/test_verifier.c index 6bafa5456568,543847957fdd..960179882a1c --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@@ -2,6 -2,7 +2,7 @@@ * Testsuite for eBPF verifier * * Copyright (c) 2014 PLUMgrid, http://plumgrid.com + * Copyright (c) 2017 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@@ -273,51 -274,11 +274,51 @@@ static struct bpf_test tests[] = .result = REJECT, }, { + "arsh32 on imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "BPF_ARSH not supported for 32 bit ALU", + }, + { + "arsh32 on reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "BPF_ARSH not supported for 32 bit ALU", + }, + { + "arsh64 on imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { + "arsh64 on reg", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 5), + BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + }, + { "no bpf_exit", .insns = { BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2), }, - .errstr = "jump out of range", + .errstr = "not an exit", .result = REJECT, }, { @@@ -5675,7 -5636,7 +5676,7 @@@ "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)", .insns = { BPF_MOV64_IMM(BPF_REG_1, 0), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), @@@ -5910,7 -5871,7 +5911,7 @@@ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), @@@ -8680,6 -8641,1864 +8681,1864 @@@ .result = REJECT, .prog_type = BPF_PROG_TYPE_CGROUP_SOCK, }, + { + "calls: basic sanity", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: not on unpriviledged", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: overlapping caller/callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn is not an exit or jmp", + .result = REJECT, + }, + { + "calls: wrong recursive calls", + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: wrong src reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: wrong off value", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "BPF_CALL uses reserved fields", + .result = REJECT, + }, + { + "calls: jump back loop", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn 0 to 0", + .result = REJECT, + }, + { + "calls: conditional call", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: conditional call 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 4", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -5), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: conditional call 5", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -6), + BPF_MOV64_IMM(BPF_REG_0, 3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: conditional call 6", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, mark)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge from insn", + .result = REJECT, + }, + { + "calls: using r0 returned by callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .result = ACCEPT, + }, + { + "calls: using uninit r0 from callee", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "!read_ok", + .result = REJECT, + }, + { + "calls: callee is using r1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_ACT, + .result = ACCEPT, + }, + { + "calls: callee using args1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: callee using wrong args2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "R2 !read_ok", + .result = REJECT, + }, + { + "calls: callee using two args", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6, + offsetof(struct __sk_buff, len)), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "calls: callee changing pkt pointers", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* clear_all_pkt_pointers() has to walk all frames + * to make sure that pkt pointers in the caller + * are cleared when callee is calling a helper that + * adjusts packet size + */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_xdp_adjust_head), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R6 invalid mem access 'inv'", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: two calls with args", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: calls with stack arith", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: calls with misaligned stack access", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63), + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .flags = F_LOAD_WITH_STRICT_ALIGNMENT, + .errstr = "misaligned stack access", + .result = REJECT, + }, + { + "calls: calls control flow, jump test", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: calls control flow, jump test 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 42), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 43), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "jump out of range from insn 1 to 4", + .result = REJECT, + }, + { + "calls: two calls with bad jump", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range from insn 11 to 9", + .result = REJECT, + }, + { + "calls: recursive call. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: recursive call. test2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "back-edge", + .result = REJECT, + }, + { + "calls: unreachable code", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "unreachable insn 6", + .result = REJECT, + }, + { + "calls: invalid call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: invalid call 2", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "invalid destination", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test1", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: jumping across function bodies. test2", + .insns = { + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "jump out of range", + .result = REJECT, + }, + { + "calls: call without exit", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: call into middle of ld_imm64", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_IMM64(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: call into middle of other call", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "last insn", + .result = REJECT, + }, + { + "calls: ld_abs with changing ctx data in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_LD_ABS(BPF_B, 0), + BPF_LD_ABS(BPF_H, 0), + BPF_LD_ABS(BPF_W, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_3, 2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_vlan_push), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", + .result = REJECT, + }, + { + "calls: two calls with bad fallthrough", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_0), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, + offsetof(struct __sk_buff, len)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr = "not an exit", + .result = REJECT, + }, + { + "calls: two calls with stack read", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_7), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: two calls with stack write", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_8), + /* write into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* read from stack frame of main prog */ + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: stack overflow using two frames (pre-call access)", + .insns = { + /* prog 1 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack overflow using two frames (post-call access)", + .insns = { + /* prog 1 */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + + /* prog 2 */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "combined stack size", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test1", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=256, stack_B=64 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test2", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=32, stack_A=64, stack_B=256 + * and max(main+A, main+A+B) < 512 + */ + .result = ACCEPT, + }, + { + "calls: stack depth check using three frames. test3", + .insns = { + /* main */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */ + BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -3), + /* B */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */ + BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + /* stack_main=64, stack_A=224, stack_B=256 + * and max(main+A, main+A+B) > 512 + */ + .errstr = "combined stack", + .result = REJECT, + }, + { + "calls: stack depth check using three frames. test4", + /* void main(void) { + * func1(0); + * func1(1); + * func2(1); + * } + * void func1(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } else { + * func2(alloc_or_recurse); + * } + * } + * void func2(int alloc_or_recurse) { + * if (alloc_or_recurse) { + * frame_pointer[-300] = 1; + * } + * } + */ + .insns = { + /* main */ + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */ + BPF_MOV64_IMM(BPF_REG_1, 1), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + /* A */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "combined stack", + }, + { + "calls: stack depth check using three frames. test5", + .insns = { + /* main */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */ + BPF_EXIT_INSN(), + /* A */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */ + BPF_EXIT_INSN(), + /* B */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */ + BPF_EXIT_INSN(), + /* C */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */ + BPF_EXIT_INSN(), + /* D */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */ + BPF_EXIT_INSN(), + /* E */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */ + BPF_EXIT_INSN(), + /* F */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */ + BPF_EXIT_INSN(), + /* G */ + BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */ + BPF_EXIT_INSN(), + /* H */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "call stack", + .result = REJECT, + }, + { + "calls: spill into caller stack frame", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot spill", + .result = REJECT, + }, + { + "calls: write into caller stack frame", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: write into callee stack frame", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_XDP, + .errstr = "cannot return stack pointer", + .result = REJECT, + }, + { + "calls: two calls with stack write and void return", + .insns = { + /* main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* write into stack frame of main prog */ + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), /* void return */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = ACCEPT, + }, + { + "calls: ambiguous return value", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr_unpriv = "allowed for root only", + .result_unpriv = REJECT, + .errstr = "R0 !read_ok", + .result = REJECT, + }, + { + "calls: two calls that return map_value", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8), + + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with bool condition", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = ACCEPT, + }, + { + "calls: two calls that return map_value with incorrect bool check", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* call 3rd function twice */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* first time with fp-8 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + /* second time with fp-16 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + /* fetch secound map_value_ptr from the stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 2 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + /* lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), /* return 0 */ + /* write map_value_ptr into stack frame of main prog */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), /* return 1 */ + }, + .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map1 = { 23 }, + .result = REJECT, + .errstr = "invalid read from stack off -16+0 size 8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */ + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */ + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), // 26 + BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* write map_value_ptr into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30 + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34 + BPF_JMP_IMM(BPF_JA, 0, 0, -30), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, -8), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=8 off=2 size=8", + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test1", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 1 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = ACCEPT, + }, + { + "calls: two calls that receive map_value_ptr_or_null via arg. test2", + .insns = { + /* main prog */ + /* pass fp-16, fp-8 into a function */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_2), + /* 1st lookup from map */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + + /* 2nd lookup from map */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */ + BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + + /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */ + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_9), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 2 */ + /* if arg2 == 1 do *arg1 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + + /* if arg4 == 0 do *arg3 = 0 */ + BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2), + /* fetch map_value_ptr from the stack of this function */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + /* write into map value */ + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .fixup_map1 = { 12, 22 }, + .result = REJECT, + .errstr = "R0 invalid mem access 'inv'", + }, + { + "calls: pkt_ptr spill into caller stack", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "calls: pkt_ptr spill into caller stack 2", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + /* Marking is still kept, but not in all cases safe. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 3", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Marking is still kept and safe here. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* now the pkt range is verified, read pkt_ptr from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0), + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 4", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + /* Check marking propagated. */ + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 5", + .insns = { + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "same insn cannot be used with different", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 6", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 7", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "R4 invalid mem access", + .result = REJECT, + }, + { + "calls: pkt_ptr spill into caller stack 8", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3), + /* spill checked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + }, + { + "calls: pkt_ptr spill into caller stack 9", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8), + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3), + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_5, 0), + /* spill unchecked pkt_ptr into stack of caller */ + BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_5, 1), + /* don't read back pkt_ptr from stack here */ + /* write 4 bytes into packet */ + BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_5), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .errstr = "invalid access to packet", + .result = REJECT, + }, + { + "calls: caller stack init to zero or map_value_or_null", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + /* fetch map_value_or_null or const_zero from stack */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + /* store into map_value */ + BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0), + BPF_EXIT_INSN(), + + /* subprog 1 */ + /* if (ctx == 0) return; */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + /* else bpf_map_lookup() and *(fp - 8) = r0 */ + BPF_MOV64_REG(BPF_REG_6, BPF_REG_2), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */ + BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 13 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "calls: stack init to zero and pruning", + .insns = { + /* first make allocated_stack 16 byte */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), + /* now fork the execution such that the false branch + * of JGT insn will be verified second and it skisp zero + * init of fp-8 stack slot. If stack liveness marking + * is missing live_read marks from call map_lookup + * processing then pruning will incorrectly assume + * that fp-8 stack slot was unused in the fall-through + * branch and will accept the program incorrectly + */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_map_lookup_elem), + BPF_EXIT_INSN(), + }, + .fixup_map2 = { 6 }, + .errstr = "invalid indirect read from stack off -8+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "search pruning: all branches should be verified (nop operation)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_A(1), + BPF_MOV64_IMM(BPF_REG_4, 1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "R6 invalid mem access 'inv'", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, + { + "search pruning: all branches should be verified (invalid stack access)", + .insns = { + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16), + BPF_JMP_A(1), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24), + BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns), + BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16), + BPF_EXIT_INSN(), + }, + .fixup_map1 = { 3 }, + .errstr = "invalid read from stack off -16+0 size 8", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + }, };
static int probe_filter_length(const struct bpf_insn *fp)