[linux-next] LinuxNextTracking branch, master, updated. next-20171222

batman at open-mesh.org batman at open-mesh.org
Sat Dec 23 00:19:08 CET 2017


The following commit has been merged in the master branch:
commit 903628bbc3a7505bc388d025a95c4f444d66fa7a
Merge: ba4bfc36630762e97164c810a5560a2712404b01 0a80f0c26bf5a131892b91db5318eb67608006d2
Author: Stephen Rothwell <sfr at canb.auug.org.au>
Date:   Fri Dec 22 11:10:09 2017 +1100

    Merge remote-tracking branch 'net-next/master'

diff --combined MAINTAINERS
index 428e478a20d1,753799d24cd9..def69b76f8e9
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -321,7 -321,7 +321,7 @@@ F:	drivers/acpi/apei
  
  ACPI COMPONENT ARCHITECTURE (ACPICA)
  M:	Robert Moore <robert.moore at intel.com>
 -M:	Lv Zheng <lv.zheng at intel.com>
 +M:	Erik Schmauss <erik.schmauss at intel.com>
  M:	"Rafael J. Wysocki" <rafael.j.wysocki at intel.com>
  L:	linux-acpi at vger.kernel.org
  L:	devel at acpica.org
@@@ -1255,12 -1255,6 +1255,12 @@@ L:	linux-arm-kernel at lists.infradead.or
  S:	Supported
  F:	drivers/net/ethernet/cavium/thunder/
  
 +ARM/CIRRUS LOGIC BK3 MACHINE SUPPORT
 +M:	Lukasz Majewski <lukma at denx.de>
 +L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
 +S:	Maintained
 +F:	arch/arm/mach-ep93xx/ts72xx.c
 +
  ARM/CIRRUS LOGIC CLPS711X ARM ARCHITECTURE
  M:	Alexander Shiyan <shc_work at mail.ru>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
@@@ -1589,7 -1583,6 +1589,7 @@@ F:	arch/arm/boot/dts/kirkwood
  F:	arch/arm/configs/mvebu_*_defconfig
  F:	arch/arm/mach-mvebu/
  F:	arch/arm64/boot/dts/marvell/armada*
 +F:	drivers/cpufreq/armada-37xx-cpufreq.c
  F:	drivers/cpufreq/mvebu-cpufreq.c
  F:	drivers/irqchip/irq-armada-370-xp.c
  F:	drivers/irqchip/irq-mvebu-*
@@@ -1642,38 -1635,14 +1642,38 @@@ ARM/NEC MOBILEPRO 900/c MACHINE SUPPOR
  M:	Michael Petchkovsky <mkpetch at internode.on.net>
  S:	Maintained
  
 -ARM/NOMADIK ARCHITECTURE
 -M:	Alessandro Rubini <rubini at unipv.it>
 +ARM/NOMADIK/U300/Ux500 ARCHITECTURES
  M:	Linus Walleij <linus.walleij at linaro.org>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  F:	arch/arm/mach-nomadik/
 -F:	drivers/pinctrl/nomadik/
 +F:	arch/arm/mach-u300/
 +F:	arch/arm/mach-ux500/
 +F:	arch/arm/boot/dts/ste-*
 +F:	drivers/clk/clk-nomadik.c
 +F:	drivers/clk/clk-u300.c
 +F:	drivers/clocksource/clksrc-dbx500-prcmu.c
 +F:	drivers/clocksource/timer-u300.c
 +F:	drivers/dma/coh901318*
 +F:	drivers/dma/ste_dma40*
 +F:	drivers/hwspinlock/u8500_hsem.c
  F:	drivers/i2c/busses/i2c-nomadik.c
 +F:	drivers/i2c/busses/i2c-stu300.c
 +F:	drivers/mfd/ab3100*
 +F:	drivers/mfd/ab8500*
 +F:	drivers/mfd/abx500*
 +F:	drivers/mfd/dbx500*
 +F:	drivers/mfd/db8500*
 +F:	drivers/pinctrl/nomadik/
 +F:	drivers/pinctrl/pinctrl-coh901*
 +F:	drivers/pinctrl/pinctrl-u300.c
 +F:	drivers/rtc/rtc-ab3100.c
 +F:	drivers/rtc/rtc-ab8500.c
 +F:	drivers/rtc/rtc-coh901331.c
 +F:	drivers/rtc/rtc-pl031.c
 +F:	drivers/watchdog/coh901327_wdt.c
 +F:	Documentation/devicetree/bindings/arm/ste-*
 +F:	Documentation/devicetree/bindings/arm/ux500/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
  
  ARM/NUVOTON W90X900 ARM ARCHITECTURE
@@@ -2053,6 -2022,21 +2053,6 @@@ M:	Dmitry Eremin-Solenikov <dbaryshkov@
  M:	Dirk Opfer <dirk at opfer-online.de>
  S:	Maintained
  
 -ARM/U300 MACHINE SUPPORT
 -M:	Linus Walleij <linus.walleij at linaro.org>
 -L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
 -S:	Supported
 -F:	arch/arm/mach-u300/
 -F:	drivers/clocksource/timer-u300.c
 -F:	drivers/i2c/busses/i2c-stu300.c
 -F:	drivers/rtc/rtc-coh901331.c
 -F:	drivers/watchdog/coh901327_wdt.c
 -F:	drivers/dma/coh901318*
 -F:	drivers/mfd/ab3100*
 -F:	drivers/rtc/rtc-ab3100.c
 -F:	drivers/rtc/rtc-coh901331.c
 -T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 -
  ARM/UNIPHIER ARCHITECTURE
  M:	Masahiro Yamada <yamada.masahiro at socionext.com>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
@@@ -2074,6 -2058,24 +2074,6 @@@ F:	drivers/reset/reset-uniphier.
  F:	drivers/tty/serial/8250/8250_uniphier.c
  N:	uniphier
  
 -ARM/Ux500 ARM ARCHITECTURE
 -M:	Linus Walleij <linus.walleij at linaro.org>
 -L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
 -S:	Maintained
 -F:	arch/arm/mach-ux500/
 -F:	drivers/clocksource/clksrc-dbx500-prcmu.c
 -F:	drivers/dma/ste_dma40*
 -F:	drivers/hwspinlock/u8500_hsem.c
 -F:	drivers/mfd/abx500*
 -F:	drivers/mfd/ab8500*
 -F:	drivers/mfd/dbx500*
 -F:	drivers/mfd/db8500*
 -F:	drivers/pinctrl/nomadik/pinctrl-ab*
 -F:	drivers/pinctrl/nomadik/pinctrl-nomadik*
 -F:	drivers/rtc/rtc-ab8500.c
 -F:	drivers/rtc/rtc-pl031.c
 -T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 -
  ARM/Ux500 CLOCK FRAMEWORK SUPPORT
  M:	Ulf Hansson <ulf.hansson at linaro.org>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
@@@ -2499,8 -2501,6 +2499,8 @@@ L:	linux-arm-kernel at lists.infradead.or
  S:	Maintained
  F:	Documentation/devicetree/bindings/arm/axentia.txt
  F:	arch/arm/boot/dts/at91-linea.dtsi
 +F:	arch/arm/boot/dts/at91-natte.dtsi
 +F:	arch/arm/boot/dts/at91-nattis-2-natte-2.dts
  F:	arch/arm/boot/dts/at91-tse850-3.dts
  
  AXENTIA ASOC DRIVERS
@@@ -2564,6 -2564,7 +2564,7 @@@ S:	Maintaine
  F:	Documentation/ABI/testing/sysfs-class-net-batman-adv
  F:	Documentation/ABI/testing/sysfs-class-net-mesh
  F:	Documentation/networking/batman-adv.rst
+ F:	include/uapi/linux/batadv_packet.h
  F:	include/uapi/linux/batman_adv.h
  F:	net/batman-adv/
  
@@@ -2689,7 -2690,6 +2690,6 @@@ F:	drivers/mtd/devices/block2mtd.
  
  BLUETOOTH DRIVERS
  M:	Marcel Holtmann <marcel at holtmann.org>
- M:	Gustavo Padovan <gustavo at padovan.org>
  M:	Johan Hedberg <johan.hedberg at gmail.com>
  L:	linux-bluetooth at vger.kernel.org
  W:	http://www.bluez.org/
@@@ -2700,7 -2700,6 +2700,6 @@@ F:	drivers/bluetooth
  
  BLUETOOTH SUBSYSTEM
  M:	Marcel Holtmann <marcel at holtmann.org>
- M:	Gustavo Padovan <gustavo at padovan.org>
  M:	Johan Hedberg <johan.hedberg at gmail.com>
  L:	linux-bluetooth at vger.kernel.org
  W:	http://www.bluez.org/
@@@ -2725,12 -2724,16 +2724,16 @@@ M:	Alexei Starovoitov <ast at kernel.org
  M:	Daniel Borkmann <daniel at iogearbox.net>
  L:	netdev at vger.kernel.org
  L:	linux-kernel at vger.kernel.org
+ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
+ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
  S:	Supported
  F:	arch/x86/net/bpf_jit*
  F:	Documentation/networking/filter.txt
  F:	Documentation/bpf/
  F:	include/linux/bpf*
  F:	include/linux/filter.h
+ F:	include/trace/events/bpf.h
+ F:	include/trace/events/xdp.h
  F:	include/uapi/linux/bpf*
  F:	include/uapi/linux/filter.h
  F:	kernel/bpf/
@@@ -2834,8 -2837,6 +2837,8 @@@ S:	Maintaine
  F:	arch/arm/mach-bcm/*brcmstb*
  F:	arch/arm/boot/dts/bcm7*.dts*
  F:	drivers/bus/brcmstb_gisb.c
 +F:	arch/arm/mm/cache-b15-rac.c
 +F:	arch/arm/include/asm/hardware/cache-b15-rac.h
  N:	brcmstb
  
  BROADCOM BMIPS CPUFREQ DRIVER
@@@ -8683,15 -8684,6 +8686,15 @@@ T:	git git://linuxtv.org/media_tree.gi
  S:	Maintained
  F:	drivers/media/dvb-frontends/stv6111*
  
 +MEDIA DRIVERS FOR NVIDIA TEGRA - VDE
 +M:	Dmitry Osipenko <digetx at gmail.com>
 +L:	linux-media at vger.kernel.org
 +L:	linux-tegra at vger.kernel.org
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Maintained
 +F:	Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
 +F:	drivers/staging/media/tegra-vde/
 +
  MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
  M:	Mauro Carvalho Chehab <mchehab at s-opensource.com>
  M:	Mauro Carvalho Chehab <mchehab at kernel.org>
@@@ -8735,6 -8727,13 +8738,13 @@@ L:	netdev at vger.kernel.or
  S:	Maintained
  F:	drivers/net/ethernet/mediatek/
  
+ MEDIATEK SWITCH DRIVER
+ M:	Sean Wang <sean.wang at mediatek.com>
+ L:	netdev at vger.kernel.org
+ S:	Maintained
+ F:	drivers/net/dsa/mt7530.*
+ F:	net/dsa/tag_mtk.c
+ 
  MEDIATEK JPEG DRIVER
  M:	Rick Chang <rick.chang at mediatek.com>
  M:	Bin Liu <bin.liu at mediatek.com>
@@@ -9107,7 -9106,6 +9117,7 @@@ S:	Supporte
  F:	Documentation/devicetree/bindings/mips/
  F:	Documentation/mips/
  F:	arch/mips/
 +F:	drivers/platform/mips/
  
  MIPS BOSTON DEVELOPMENT BOARD
  M:	Paul Burton <paul.burton at mips.com>
@@@ -9135,25 -9133,6 +9145,25 @@@ F:	arch/mips/include/asm/mach-loongson3
  F:	drivers/*/*loongson1*
  F:	drivers/*/*/*loongson1*
  
 +MIPS/LOONGSON2 ARCHITECTURE
 +M:	Jiaxun Yang <jiaxun.yang at flygoat.com>
 +L:	linux-mips at linux-mips.org
 +S:	Maintained
 +F:	arch/mips/loongson64/*{2e/2f}*
 +F:	arch/mips/include/asm/mach-loongson64/
 +F:	drivers/*/*loongson2*
 +F:	drivers/*/*/*loongson2*
 +
 +MIPS/LOONGSON3 ARCHITECTURE
 +M:	Huacai Chen <chenhc at lemote.com>
 +L:	linux-mips at linux-mips.org
 +S:	Maintained
 +F:	arch/mips/loongson64/
 +F:	arch/mips/include/asm/mach-loongson64/
 +F:	drivers/platform/mips/cpu_hwmon.c
 +F:	drivers/*/*loongson3*
 +F:	drivers/*/*/*loongson3*
 +
  MIPS RINT INSTRUCTION EMULATION
  M:	Aleksandar Markovic <aleksandar.markovic at mips.com>
  L:	linux-mips at linux-mips.org
@@@ -9631,6 -9610,11 +9641,11 @@@ NETWORKING [WIRELESS
  L:	linux-wireless at vger.kernel.org
  Q:	http://patchwork.kernel.org/project/linux-wireless/list/
  
+ NETDEVSIM
+ M:	Jakub Kicinski <jakub.kicinski at netronome.com>
+ S:	Maintained
+ F:	drivers/net/netdevsim/*
+ 
  NETXEN (1/10) GbE SUPPORT
  M:	Manish Chopra <manish.chopra at cavium.com>
  M:	Rahul Verma <rahul.verma at cavium.com>
@@@ -10583,12 -10567,8 +10598,12 @@@ T:	git git://git.kernel.org/pub/scm/lin
  S:	Supported
  F:	Documentation/devicetree/bindings/pci/
  F:	Documentation/PCI/
 +F:	drivers/acpi/pci*
  F:	drivers/pci/
 +F:	include/asm-generic/pci*
  F:	include/linux/pci*
 +F:	include/uapi/linux/pci*
 +F:	lib/pci*
  F:	arch/x86/pci/
  F:	arch/x86/kernel/quirks.c
  
@@@ -10927,7 -10907,6 +10942,7 @@@ F:	include/linux/pm.
  F:	include/linux/pm_*
  F:	include/linux/powercap.h
  F:	drivers/powercap/
 +F:	kernel/configs/nopm.config
  
  POWER STATE COORDINATION INTERFACE (PSCI)
  M:	Mark Rutland <mark.rutland at arm.com>
@@@ -12375,14 -12354,6 +12390,14 @@@ T:	git git://linuxtv.org/anttip/media_t
  S:	Maintained
  F:	drivers/media/tuners/si2157*
  
 +SI2165 MEDIA DRIVER
 +M:	Matthias Schwarzott <zzam at gentoo.org>
 +L:	linux-media at vger.kernel.org
 +W:	https://linuxtv.org
 +Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 +S:	Maintained
 +F:	drivers/media/dvb-frontends/si2165*
 +
  SI2168 MEDIA DRIVER
  M:	Antti Palosaari <crope at iki.fi>
  L:	linux-media at vger.kernel.org
@@@ -12913,6 -12884,12 +12928,6 @@@ S:	Odd Fixe
  F:	Documentation/devicetree/bindings/staging/iio/
  F:	drivers/staging/iio/
  
 -STAGING - LIRC (LINUX INFRARED REMOTE CONTROL) DRIVERS
 -M:	Jarod Wilson <jarod at wilsonet.com>
 -W:	http://www.lirc.org/
 -S:	Odd Fixes
 -F:	drivers/staging/media/lirc/
 -
  STAGING - LUSTRE PARALLEL FILESYSTEM
  M:	Oleg Drokin <oleg.drokin at intel.com>
  M:	Andreas Dilger <andreas.dilger at intel.com>
@@@ -13294,15 -13271,6 +13309,15 @@@ T:	git git://linuxtv.org/anttip/media_t
  S:	Maintained
  F:	drivers/media/tuners/tda18218*
  
 +TDA18250 MEDIA DRIVER
 +M:	Olli Salonen <olli.salonen at iki.fi>
 +L:	linux-media at vger.kernel.org
 +W:	https://linuxtv.org
 +Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Maintained
 +F:	drivers/media/tuners/tda18250*
 +
  TDA18271 MEDIA DRIVER
  M:	Michael Krufky <mkrufky at linuxtv.org>
  L:	linux-media at vger.kernel.org
@@@ -13540,7 -13508,6 +13555,7 @@@ M:	Mika Westerberg <mika.westerberg at lin
  M:	Yehezkel Bernat <yehezkel.bernat at intel.com>
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/westeri/thunderbolt.git
  S:	Maintained
 +F:	Documentation/admin-guide/thunderbolt.rst
  F:	drivers/thunderbolt/
  F:	include/linux/thunderbolt.h
  
diff --combined arch/arm/boot/dts/imx25.dtsi
index c43cf704b768,fcaff1c66bcb..9445f8e1473c
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@@ -122,7 -122,7 +122,7 @@@
  			};
  
  			can1: can at 43f88000 {
- 				compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx25-flexcan";
  				reg = <0x43f88000 0x4000>;
  				interrupts = <43>;
  				clocks = <&clks 75>, <&clks 75>;
@@@ -131,7 -131,7 +131,7 @@@
  			};
  
  			can2: can at 43f8c000 {
- 				compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx25-flexcan";
  				reg = <0x43f8c000 0x4000>;
  				interrupts = <44>;
  				clocks = <&clks 76>, <&clks 76>;
@@@ -628,13 -628,11 +628,13 @@@
  		usbphy0: usb-phy at 0 {
  			reg = <0>;
  			compatible = "usb-nop-xceiv";
 +			#phy-cells = <0>;
  		};
  
  		usbphy1: usb-phy at 1 {
  			reg = <1>;
  			compatible = "usb-nop-xceiv";
 +			#phy-cells = <0>;
  		};
  	};
  };
diff --combined arch/arm/boot/dts/imx35.dtsi
index f049c692c6b0,1f0e2203b576..e08c0c193767
--- a/arch/arm/boot/dts/imx35.dtsi
+++ b/arch/arm/boot/dts/imx35.dtsi
@@@ -303,7 -303,7 +303,7 @@@
  			};
  
  			can1: can at 53fe4000 {
- 				compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx35-flexcan";
  				reg = <0x53fe4000 0x1000>;
  				clocks = <&clks 33>, <&clks 33>;
  				clock-names = "ipg", "per";
@@@ -312,7 -312,7 +312,7 @@@
  			};
  
  			can2: can at 53fe8000 {
- 				compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx35-flexcan";
  				reg = <0x53fe8000 0x1000>;
  				clocks = <&clks 34>, <&clks 34>;
  				clock-names = "ipg", "per";
@@@ -402,13 -402,11 +402,13 @@@
  		usbphy0: usb-phy at 0 {
  			reg = <0>;
  			compatible = "usb-nop-xceiv";
 +			#phy-cells = <0>;
  		};
  
  		usbphy1: usb-phy at 1 {
  			reg = <1>;
  			compatible = "usb-nop-xceiv";
 +			#phy-cells = <0>;
  		};
  	};
  };
diff --combined arch/arm/boot/dts/imx53.dtsi
index fb6cdd629ee7,85071ff8c639..d55b0755a36e
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@@ -303,7 -303,6 +303,7 @@@
  				compatible = "usb-nop-xceiv";
  				clocks = <&clks IMX5_CLK_USB_PHY1_GATE>;
  				clock-names = "main_clk";
 +				#phy-cells = <0>;
  				status = "okay";
  			};
  
@@@ -311,7 -310,6 +311,7 @@@
  				compatible = "usb-nop-xceiv";
  				clocks = <&clks IMX5_CLK_USB_PHY2_GATE>;
  				clock-names = "main_clk";
 +				#phy-cells = <0>;
  				status = "okay";
  			};
  
@@@ -538,7 -536,7 +538,7 @@@
  			};
  
  			can1: can at 53fc8000 {
- 				compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx53-flexcan";
  				reg = <0x53fc8000 0x4000>;
  				interrupts = <82>;
  				clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>,
@@@ -548,7 -546,7 +548,7 @@@
  			};
  
  			can2: can at 53fcc000 {
- 				compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx53-flexcan";
  				reg = <0x53fcc000 0x4000>;
  				interrupts = <83>;
  				clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>,
diff --combined arch/arm/boot/dts/ls1021a-twr.dts
index f7946f40d35d,7202d9c504be..3adf79372057
--- a/arch/arm/boot/dts/ls1021a-twr.dts
+++ b/arch/arm/boot/dts/ls1021a-twr.dts
@@@ -228,10 -228,6 +228,10 @@@
  	};
  };
  
 +&esdhc {
 +        status = "okay";
 +};
 +
  &sai1 {
  	status = "okay";
  };
@@@ -247,3 -243,19 +247,19 @@@
  &uart1 {
  	status = "okay";
  };
+ 
+ &can0 {
+ 	status = "okay";
+ };
+ 
+ &can1 {
+ 	status = "okay";
+ };
+ 
+ &can2 {
+ 	status = "disabled";
+ };
+ 
+ &can3 {
+ 	status = "disabled";
+ };
diff --combined arch/arm/boot/dts/ls1021a.dtsi
index 64249726b3cb,7789031898b0..a121c9130271
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@@ -154,22 -154,8 +154,22 @@@
  			big-endian;
  		};
  
 +		qspi: quadspi at 1550000 {
 +			compatible = "fsl,ls1021a-qspi";
 +			#address-cells = <1>;
 +			#size-cells = <0>;
 +			reg = <0x0 0x1550000 0x0 0x10000>,
 +			      <0x0 0x40000000 0x0 0x40000000>;
 +			reg-names = "QuadSPI", "QuadSPI-memory";
 +			interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
 +			clock-names = "qspi_en", "qspi";
 +			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
 +			big-endian;
 +			status = "disabled";
 +		};
 +
  		esdhc: esdhc at 1560000 {
 -			compatible = "fsl,esdhc";
 +			compatible = "fsl,ls1021a-esdhc", "fsl,esdhc";
  			reg = <0x0 0x1560000 0x0 0x10000>;
  			interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
  			clock-frequency = <0>;
@@@ -744,5 -730,41 +744,41 @@@
  					<0000 0 0 3 &gic GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>,
  					<0000 0 0 4 &gic GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
  		};
+ 
+ 		can0: can at 2a70000 {
+ 			compatible = "fsl,ls1021ar2-flexcan";
+ 			reg = <0x0 0x2a70000 0x0 0x1000>;
+ 			interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ 			clock-names = "ipg", "per";
+ 			big-endian;
+ 		};
+ 
+ 		can1: can at 2a80000 {
+ 			compatible = "fsl,ls1021ar2-flexcan";
+ 			reg = <0x0 0x2a80000 0x0 0x1000>;
+ 			interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ 			clock-names = "ipg", "per";
+ 			big-endian;
+ 		};
+ 
+ 		can2: can at 2a90000 {
+ 			compatible = "fsl,ls1021ar2-flexcan";
+ 			reg = <0x0 0x2a90000 0x0 0x1000>;
+ 			interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ 			clock-names = "ipg", "per";
+ 			big-endian;
+ 		};
+ 
+ 		can3: can at 2aa0000 {
+ 			compatible = "fsl,ls1021ar2-flexcan";
+ 			reg = <0x0 0x2aa0000 0x0 0x1000>;
+ 			interrupts = <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ 			clock-names = "ipg", "per";
+ 			big-endian;
+ 		};
  	};
  };
diff --combined arch/powerpc/net/bpf_jit_comp64.c
index d183b4801bdb,d5a5bc43cf8f..6771c63b2bec
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@@ -763,8 -763,7 +763,8 @@@ emit_clear
  			func = (u8 *) __bpf_call_base + imm;
  
  			/* Save skb pointer if we need to re-cache skb data */
 -			if (bpf_helper_changes_pkt_data(func))
 +			if ((ctx->seen & SEEN_SKB) &&
 +			    bpf_helper_changes_pkt_data(func))
  				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
  
  			bpf_jit_emit_func_call(image, ctx, (u64)func);
@@@ -773,8 -772,7 +773,8 @@@
  			PPC_MR(b2p[BPF_REG_0], 3);
  
  			/* refresh skb cache */
 -			if (bpf_helper_changes_pkt_data(func)) {
 +			if ((ctx->seen & SEEN_SKB) &&
 +			    bpf_helper_changes_pkt_data(func)) {
  				/* reload skb pointer to r3 */
  				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
  				bpf_jit_emit_skb_loads(image, ctx);
@@@ -995,7 -993,7 +995,7 @@@ struct bpf_prog *bpf_int_jit_compile(st
  	struct bpf_prog *tmp_fp;
  	bool bpf_blinded = false;
  
- 	if (!bpf_jit_enable)
+ 	if (!fp->jit_requested)
  		return org_fp;
  
  	tmp_fp = bpf_jit_blind_constants(org_fp);
diff --combined arch/s390/net/bpf_jit_comp.c
index 9557d8b516df,f4baa8c514d3..1dfadbd126f3
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@@ -55,7 -55,8 +55,7 @@@ struct bpf_jit 
  #define SEEN_LITERAL	8	/* code uses literals */
  #define SEEN_FUNC	16	/* calls C functions */
  #define SEEN_TAIL_CALL	32	/* code uses tail calls */
 -#define SEEN_SKB_CHANGE	64	/* code changes skb data */
 -#define SEEN_REG_AX	128	/* code uses constant blinding */
 +#define SEEN_REG_AX	64	/* code uses constant blinding */
  #define SEEN_STACK	(SEEN_FUNC | SEEN_MEM | SEEN_SKB)
  
  /*
@@@ -447,12 -448,12 +447,12 @@@ static void bpf_jit_prologue(struct bpf
  			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
  				      REG_15, 152);
  	}
 -	if (jit->seen & SEEN_SKB)
 +	if (jit->seen & SEEN_SKB) {
  		emit_load_skb_data_hlen(jit);
 -	if (jit->seen & SEEN_SKB_CHANGE)
  		/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
  		EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
  			      STK_OFF_SKBP);
 +	}
  }
  
  /*
@@@ -982,8 -983,8 +982,8 @@@ static noinline int bpf_jit_insn(struc
  		EMIT2(0x0d00, REG_14, REG_W1);
  		/* lgr %b0,%r2: load return value into %b0 */
  		EMIT4(0xb9040000, BPF_REG_0, REG_2);
 -		if (bpf_helper_changes_pkt_data((void *)func)) {
 -			jit->seen |= SEEN_SKB_CHANGE;
 +		if ((jit->seen & SEEN_SKB) &&
 +		    bpf_helper_changes_pkt_data((void *)func)) {
  			/* lg %b1,ST_OFF_SKBP(%r15) */
  			EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
  				      REG_15, STK_OFF_SKBP);
@@@ -1299,7 -1300,7 +1299,7 @@@ struct bpf_prog *bpf_int_jit_compile(st
  	struct bpf_jit jit;
  	int pass;
  
- 	if (!bpf_jit_enable)
+ 	if (!fp->jit_requested)
  		return orig_fp;
  
  	tmp = bpf_jit_blind_constants(fp);
diff --combined arch/sparc/net/bpf_jit_comp_64.c
index ff5f9cb3039a,a2f1b5e774a7..22aff21fa44d
--- a/arch/sparc/net/bpf_jit_comp_64.c
+++ b/arch/sparc/net/bpf_jit_comp_64.c
@@@ -1245,16 -1245,14 +1245,16 @@@ static int build_insn(const struct bpf_
  		u8 *func = ((u8 *)__bpf_call_base) + imm;
  
  		ctx->saw_call = true;
 +		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
 +			emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
  
  		emit_call((u32 *)func, ctx);
  		emit_nop(ctx);
  
  		emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
  
 -		if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
 -			load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
 +		if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
 +			load_skb_regs(ctx, L7);
  		break;
  	}
  
@@@ -1519,7 -1517,7 +1519,7 @@@ struct bpf_prog *bpf_int_jit_compile(st
  	u8 *image_ptr;
  	int pass;
  
- 	if (!bpf_jit_enable)
+ 	if (!prog->jit_requested)
  		return orig_prog;
  
  	tmp = bpf_jit_blind_constants(prog);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index d9d8227f195f,0f5c012de52e..3aa1c90e7c86
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@@ -71,6 -71,11 +71,6 @@@ struct mlx5e_channel_param 
  	struct mlx5e_cq_param      icosq_cq;
  };
  
 -static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
 -{
 -	return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
 -}
 -
  static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
  {
  	return MLX5_CAP_GEN(mdev, striding_rq) &&
@@@ -78,8 -83,8 +78,8 @@@
  		MLX5_CAP_ETH(mdev, reg_umr_sq);
  }
  
 -void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
 -			      struct mlx5e_params *params, u8 rq_type)
 +void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
 +			       struct mlx5e_params *params, u8 rq_type)
  {
  	params->rq_wq_type = rq_type;
  	params->lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ;
@@@ -88,8 -93,10 +88,8 @@@
  		params->log_rq_size = is_kdump_kernel() ?
  			MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
  			MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
 -		params->mpwqe_log_stride_sz =
 -			MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
 -			MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
 -			MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
 +		params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
 +			MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
  		params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
  			params->mpwqe_log_stride_sz;
  		break;
@@@ -113,14 -120,13 +113,14 @@@
  		       MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
  }
  
 -static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
 +static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
 +				struct mlx5e_params *params)
  {
  	u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
  		    !params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
  		    MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
  		    MLX5_WQ_TYPE_LINKED_LIST;
 -	mlx5e_set_rq_type_params(mdev, params, rq_type);
 +	mlx5e_init_rq_type_params(mdev, params, rq_type);
  }
  
  static void mlx5e_update_carrier(struct mlx5e_priv *priv)
@@@ -438,16 -444,17 +438,16 @@@ static int mlx5e_rq_alloc_mpwqe_info(st
  	int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
  	int mtt_sz = mlx5e_get_wqe_mtt_sz();
  	int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
 -	int node = mlx5e_get_node(c->priv, c->ix);
  	int i;
  
  	rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
 -					GFP_KERNEL, node);
 +				      GFP_KERNEL, cpu_to_node(c->cpu));
  	if (!rq->mpwqe.info)
  		goto err_out;
  
  	/* We allocate more than mtt_sz as we will align the pointer */
 -	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
 -					GFP_KERNEL, node);
 +	rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
 +					cpu_to_node(c->cpu));
  	if (unlikely(!rq->mpwqe.mtt_no_align))
  		goto err_free_wqe_info;
  
@@@ -555,7 -562,7 +555,7 @@@ static int mlx5e_alloc_rq(struct mlx5e_
  	int err;
  	int i;
  
 -	rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
 +	rqp->wq.db_numa_node = cpu_to_node(c->cpu);
  
  	err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
  				&rq->wq_ctrl);
@@@ -622,7 -629,8 +622,7 @@@
  	default: /* MLX5_WQ_TYPE_LINKED_LIST */
  		rq->wqe.frag_info =
  			kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
 -				     GFP_KERNEL,
 -				     mlx5e_get_node(c->priv, c->ix));
 +				     GFP_KERNEL, cpu_to_node(c->cpu));
  		if (!rq->wqe.frag_info) {
  			err = -ENOMEM;
  			goto err_rq_wq_destroy;
@@@ -992,13 -1000,13 +992,13 @@@ static int mlx5e_alloc_xdpsq(struct mlx
  	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
  	sq->min_inline_mode = params->tx_min_inline_mode;
  
 -	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
 +	param->wq.db_numa_node = cpu_to_node(c->cpu);
  	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
  	if (err)
  		return err;
  	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
  
 -	err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
 +	err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
  	if (err)
  		goto err_sq_wq_destroy;
  
@@@ -1045,13 -1053,13 +1045,13 @@@ static int mlx5e_alloc_icosq(struct mlx
  	sq->channel   = c;
  	sq->uar_map   = mdev->mlx5e_res.bfreg.map;
  
 -	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
 +	param->wq.db_numa_node = cpu_to_node(c->cpu);
  	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
  	if (err)
  		return err;
  	sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
  
 -	err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
 +	err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
  	if (err)
  		goto err_sq_wq_destroy;
  
@@@ -1118,13 -1126,13 +1118,13 @@@ static int mlx5e_alloc_txqsq(struct mlx
  	if (MLX5_IPSEC_DEV(c->priv->mdev))
  		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
  
 -	param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
 +	param->wq.db_numa_node = cpu_to_node(c->cpu);
  	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
  	if (err)
  		return err;
  	sq->wq.db    = &sq->wq.db[MLX5_SND_DBR];
  
 -	err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
 +	err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
  	if (err)
  		goto err_sq_wq_destroy;
  
@@@ -1496,8 -1504,8 +1496,8 @@@ static int mlx5e_alloc_cq(struct mlx5e_
  	struct mlx5_core_dev *mdev = c->priv->mdev;
  	int err;
  
 -	param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
 -	param->wq.db_numa_node  = mlx5e_get_node(c->priv, c->ix);
 +	param->wq.buf_numa_node = cpu_to_node(c->cpu);
 +	param->wq.db_numa_node  = cpu_to_node(c->cpu);
  	param->eq_ix   = c->ix;
  
  	err = mlx5e_alloc_cq_common(mdev, param, cq);
@@@ -1596,11 -1604,6 +1596,11 @@@ static void mlx5e_close_cq(struct mlx5e
  	mlx5e_free_cq(cq);
  }
  
 +static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
 +{
 +	return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
 +}
 +
  static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
  			     struct mlx5e_params *params,
  			     struct mlx5e_channel_param *cparam)
@@@ -1749,13 -1752,12 +1749,13 @@@ static int mlx5e_open_channel(struct ml
  {
  	struct mlx5e_cq_moder icocq_moder = {0, 0};
  	struct net_device *netdev = priv->netdev;
 +	int cpu = mlx5e_get_cpu(priv, ix);
  	struct mlx5e_channel *c;
  	unsigned int irq;
  	int err;
  	int eqn;
  
 -	c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
 +	c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
  	if (!c)
  		return -ENOMEM;
  
@@@ -1763,7 -1765,6 +1763,7 @@@
  	c->mdev     = priv->mdev;
  	c->tstamp   = &priv->tstamp;
  	c->ix       = ix;
 +	c->cpu      = cpu;
  	c->pdev     = &priv->mdev->pdev->dev;
  	c->netdev   = priv->netdev;
  	c->mkey_be  = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
@@@ -1852,7 -1853,8 +1852,7 @@@ static void mlx5e_activate_channel(stru
  	for (tc = 0; tc < c->num_tc; tc++)
  		mlx5e_activate_txqsq(&c->sq[tc]);
  	mlx5e_activate_rq(&c->rq);
 -	netif_set_xps_queue(c->netdev,
 -		mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
 +	netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
  }
  
  static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
@@@ -3677,7 -3679,6 +3677,7 @@@ static netdev_features_t mlx5e_tunnel_f
  						     struct sk_buff *skb,
  						     netdev_features_t features)
  {
 +	unsigned int offset = 0;
  	struct udphdr *udph;
  	u8 proto;
  	u16 port;
@@@ -3687,7 -3688,7 +3687,7 @@@
  		proto = ip_hdr(skb)->protocol;
  		break;
  	case htons(ETH_P_IPV6):
 -		proto = ipv6_hdr(skb)->nexthdr;
 +		proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
  		break;
  	default:
  		goto out;
@@@ -4307,9 -4308,6 +4307,6 @@@ static void mlx5e_nic_cleanup(struct ml
  {
  	mlx5e_ipsec_cleanup(priv);
  	mlx5e_vxlan_cleanup(priv);
- 
- 	if (priv->channels.params.xdp_prog)
- 		bpf_prog_put(priv->channels.params.xdp_prog);
  }
  
  static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
diff --combined drivers/net/ethernet/netronome/nfp/bpf/main.c
index 13190aa09faf,4f6553f01178..348471fae6a2
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@@ -34,10 -34,12 +34,12 @@@
  #include <net/pkt_cls.h>
  
  #include "../nfpcore/nfp_cpp.h"
+ #include "../nfpcore/nfp_nffw.h"
  #include "../nfp_app.h"
  #include "../nfp_main.h"
  #include "../nfp_net.h"
  #include "../nfp_port.h"
+ #include "fw.h"
  #include "main.h"
  
  static bool nfp_net_ebpf_capable(struct nfp_net *nn)
@@@ -82,43 -84,11 +84,11 @@@ static const char *nfp_bpf_extra_cap(st
  	return nfp_net_ebpf_capable(nn) ? "BPF" : "";
  }
  
- static int
- nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id)
- {
- 	int err;
- 
- 	nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL);
- 	if (!nn->app_priv)
- 		return -ENOMEM;
- 
- 	err = nfp_app_nic_vnic_alloc(app, nn, id);
- 	if (err)
- 		goto err_free_priv;
- 
- 	return 0;
- err_free_priv:
- 	kfree(nn->app_priv);
- 	return err;
- }
- 
- static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn)
- {
- 	struct nfp_bpf_vnic *bv = nn->app_priv;
- 
- 	if (nn->dp.bpf_offload_xdp)
- 		nfp_bpf_xdp_offload(app, nn, NULL);
- 	WARN_ON(bv->tc_prog);
- 	kfree(bv);
- }
- 
  static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type,
  				     void *type_data, void *cb_priv)
  {
  	struct tc_cls_bpf_offload *cls_bpf = type_data;
  	struct nfp_net *nn = cb_priv;
- 	struct bpf_prog *oldprog;
- 	struct nfp_bpf_vnic *bv;
- 	int err;
  
  	if (type != TC_SETUP_CLSBPF ||
  	    !tc_can_offload(nn->dp.netdev) ||
@@@ -126,6 -96,8 +96,8 @@@
  	    cls_bpf->common.protocol != htons(ETH_P_ALL) ||
  	    cls_bpf->common.chain_index)
  		return -EOPNOTSUPP;
+ 	if (nn->dp.bpf_offload_xdp)
+ 		return -EBUSY;
  
  	/* Only support TC direct action */
  	if (!cls_bpf->exts_integrated ||
@@@ -134,25 -106,16 +106,10 @@@
  		return -EOPNOTSUPP;
  	}
  
 -	switch (cls_bpf->command) {
 -	case TC_CLSBPF_REPLACE:
 -		return nfp_net_bpf_offload(nn, cls_bpf->prog, true);
 -	case TC_CLSBPF_ADD:
 -		return nfp_net_bpf_offload(nn, cls_bpf->prog, false);
 -	case TC_CLSBPF_DESTROY:
 -		return nfp_net_bpf_offload(nn, NULL, true);
 -	default:
 +	if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
  		return -EOPNOTSUPP;
 -	}
 +
- 	bv = nn->app_priv;
- 	oldprog = cls_bpf->oldprog;
- 
- 	/* Don't remove if oldprog doesn't match driver's state */
- 	if (bv->tc_prog != oldprog) {
- 		oldprog = NULL;
- 		if (!cls_bpf->prog)
- 			return 0;
- 	}
- 
- 	err = nfp_net_bpf_offload(nn, cls_bpf->prog, oldprog);
- 	if (err)
- 		return err;
- 
- 	bv->tc_prog = cls_bpf->prog;
- 	return 0;
++	return nfp_net_bpf_offload(nn, cls_bpf->prog, cls_bpf->oldprog);
  }
  
  static int nfp_bpf_setup_tc_block(struct net_device *netdev,
@@@ -194,14 -157,126 +151,126 @@@ static bool nfp_bpf_tc_busy(struct nfp_
  	return nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF;
  }
  
+ static int
+ nfp_bpf_parse_cap_adjust_head(struct nfp_app_bpf *bpf, void __iomem *value,
+ 			      u32 length)
+ {
+ 	struct nfp_bpf_cap_tlv_adjust_head __iomem *cap = value;
+ 	struct nfp_cpp *cpp = bpf->app->pf->cpp;
+ 
+ 	if (length < sizeof(*cap)) {
+ 		nfp_err(cpp, "truncated adjust_head TLV: %d\n", length);
+ 		return -EINVAL;
+ 	}
+ 
+ 	bpf->adjust_head.flags = readl(&cap->flags);
+ 	bpf->adjust_head.off_min = readl(&cap->off_min);
+ 	bpf->adjust_head.off_max = readl(&cap->off_max);
+ 	bpf->adjust_head.guaranteed_sub = readl(&cap->guaranteed_sub);
+ 	bpf->adjust_head.guaranteed_add = readl(&cap->guaranteed_add);
+ 
+ 	if (bpf->adjust_head.off_min > bpf->adjust_head.off_max) {
+ 		nfp_err(cpp, "invalid adjust_head TLV: min > max\n");
+ 		return -EINVAL;
+ 	}
+ 	if (!FIELD_FIT(UR_REG_IMM_MAX, bpf->adjust_head.off_min) ||
+ 	    !FIELD_FIT(UR_REG_IMM_MAX, bpf->adjust_head.off_max)) {
+ 		nfp_warn(cpp, "disabling adjust_head - driver expects min/max to fit in as immediates\n");
+ 		memset(&bpf->adjust_head, 0, sizeof(bpf->adjust_head));
+ 		return 0;
+ 	}
+ 
+ 	return 0;
+ }
+ 
+ static int nfp_bpf_parse_capabilities(struct nfp_app *app)
+ {
+ 	struct nfp_cpp *cpp = app->pf->cpp;
+ 	struct nfp_cpp_area *area;
+ 	u8 __iomem *mem, *start;
+ 
+ 	mem = nfp_rtsym_map(app->pf->rtbl, "_abi_bpf_capabilities", "bpf.cap",
+ 			    8, &area);
+ 	if (IS_ERR(mem))
+ 		return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem);
+ 
+ 	start = mem;
+ 	while (mem - start + 8 < nfp_cpp_area_size(area)) {
+ 		u8 __iomem *value;
+ 		u32 type, length;
+ 
+ 		type = readl(mem);
+ 		length = readl(mem + 4);
+ 		value = mem + 8;
+ 
+ 		mem += 8 + length;
+ 		if (mem - start > nfp_cpp_area_size(area))
+ 			goto err_release_free;
+ 
+ 		switch (type) {
+ 		case NFP_BPF_CAP_TYPE_ADJUST_HEAD:
+ 			if (nfp_bpf_parse_cap_adjust_head(app->priv, value,
+ 							  length))
+ 				goto err_release_free;
+ 			break;
+ 		default:
+ 			nfp_dbg(cpp, "unknown BPF capability: %d\n", type);
+ 			break;
+ 		}
+ 	}
+ 	if (mem - start != nfp_cpp_area_size(area)) {
+ 		nfp_err(cpp, "BPF capabilities left after parsing, parsed:%zd total length:%zu\n",
+ 			mem - start, nfp_cpp_area_size(area));
+ 		goto err_release_free;
+ 	}
+ 
+ 	nfp_cpp_area_release_free(area);
+ 
+ 	return 0;
+ 
+ err_release_free:
+ 	nfp_err(cpp, "invalid BPF capabilities at offset:%zd\n", mem - start);
+ 	nfp_cpp_area_release_free(area);
+ 	return -EINVAL;
+ }
+ 
+ static int nfp_bpf_init(struct nfp_app *app)
+ {
+ 	struct nfp_app_bpf *bpf;
+ 	int err;
+ 
+ 	bpf = kzalloc(sizeof(*bpf), GFP_KERNEL);
+ 	if (!bpf)
+ 		return -ENOMEM;
+ 	bpf->app = app;
+ 	app->priv = bpf;
+ 
+ 	err = nfp_bpf_parse_capabilities(app);
+ 	if (err)
+ 		goto err_free_bpf;
+ 
+ 	return 0;
+ 
+ err_free_bpf:
+ 	kfree(bpf);
+ 	return err;
+ }
+ 
+ static void nfp_bpf_clean(struct nfp_app *app)
+ {
+ 	kfree(app->priv);
+ }
+ 
  const struct nfp_app_type app_bpf = {
  	.id		= NFP_APP_BPF_NIC,
  	.name		= "ebpf",
  
+ 	.init		= nfp_bpf_init,
+ 	.clean		= nfp_bpf_clean,
+ 
  	.extra_cap	= nfp_bpf_extra_cap,
  
- 	.vnic_alloc	= nfp_bpf_vnic_alloc,
- 	.vnic_free	= nfp_bpf_vnic_free,
+ 	.vnic_alloc	= nfp_app_nic_vnic_alloc,
  
  	.setup_tc	= nfp_bpf_setup_tc,
  	.tc_busy	= nfp_bpf_tc_busy,
diff --combined drivers/net/ethernet/netronome/nfp/bpf/main.h
index 57b6043177a3,f49669bf6b44..aae1be9ed056
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@@ -1,5 -1,5 +1,5 @@@
  /*
-  * Copyright (C) 2016 Netronome Systems, Inc.
+  * Copyright (C) 2016-2017 Netronome Systems, Inc.
   *
   * This software is dual licensed under the GNU General License Version 2,
   * June 1991 as shown in the file COPYING in the top-level directory of this
@@@ -78,6 -78,29 +78,29 @@@ enum pkt_vec 
  #define NFP_BPF_ABI_FLAGS	reg_imm(0)
  #define   NFP_BPF_ABI_FLAG_MARK	1
  
+ /**
+  * struct nfp_app_bpf - bpf app priv structure
+  * @app:		backpointer to the app
+  *
+  * @adjust_head:	adjust head capability
+  * @flags:		extra flags for adjust head
+  * @off_min:		minimal packet offset within buffer required
+  * @off_max:		maximum packet offset within buffer required
+  * @guaranteed_sub:	amount of negative adjustment guaranteed possible
+  * @guaranteed_add:	amount of positive adjustment guaranteed possible
+  */
+ struct nfp_app_bpf {
+ 	struct nfp_app *app;
+ 
+ 	struct nfp_bpf_cap_adjust_head {
+ 		u32 flags;
+ 		int off_min;
+ 		int off_max;
+ 		int guaranteed_sub;
+ 		int guaranteed_add;
+ 	} adjust_head;
+ };
+ 
  struct nfp_prog;
  struct nfp_insn_meta;
  typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
@@@ -89,23 -112,39 +112,39 @@@
  #define nfp_meta_next(meta)	list_next_entry(meta, l)
  #define nfp_meta_prev(meta)	list_prev_entry(meta, l)
  
+ #define FLAG_INSN_IS_JUMP_DST	BIT(0)
+ 
  /**
   * struct nfp_insn_meta - BPF instruction wrapper
   * @insn: BPF instruction
   * @ptr: pointer type for memory operations
+  * @ldst_gather_len: memcpy length gathered from load/store sequence
+  * @paired_st: the paired store insn at the head of the sequence
+  * @arg2: arg2 for call instructions
   * @ptr_not_const: pointer is not always constant
+  * @jmp_dst: destination info for jump instructions
   * @off: index of first generated machine instruction (in nfp_prog.prog)
   * @n: eBPF instruction number
+  * @flags: eBPF instruction extra optimization flags
   * @skip: skip this instruction (optimized out)
   * @double_cb: callback for second part of the instruction
   * @l: link on nfp_prog->insns list
   */
  struct nfp_insn_meta {
  	struct bpf_insn insn;
- 	struct bpf_reg_state ptr;
- 	bool ptr_not_const;
+ 	union {
+ 		struct {
+ 			struct bpf_reg_state ptr;
+ 			struct bpf_insn *paired_st;
+ 			s16 ldst_gather_len;
+ 			bool ptr_not_const;
+ 		};
+ 		struct nfp_insn_meta *jmp_dst;
+ 		struct bpf_reg_state arg2;
+ 	};
  	unsigned int off;
  	unsigned short n;
+ 	unsigned short flags;
  	bool skip;
  	instr_cb_t double_cb;
  
@@@ -134,23 -173,38 +173,38 @@@ static inline u8 mbpf_mode(const struc
  	return BPF_MODE(meta->insn.code);
  }
  
+ static inline bool is_mbpf_load(const struct nfp_insn_meta *meta)
+ {
+ 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM);
+ }
+ 
+ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta)
+ {
+ 	return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM);
+ }
+ 
  /**
   * struct nfp_prog - nfp BPF program
+  * @bpf: backpointer to the bpf app priv structure
   * @prog: machine code
   * @prog_len: number of valid instructions in @prog array
   * @__prog_alloc_len: alloc size of @prog array
   * @verifier_meta: temporary storage for verifier's insn meta
   * @type: BPF program type
   * @start_off: address of the first instruction in the memory
+  * @last_bpf_off: address of the last instruction translated from BPF
   * @tgt_out: jump target for normal exit
   * @tgt_abort: jump target for abort (e.g. access outside of packet buffer)
   * @tgt_done: jump target to get the next packet
   * @n_translated: number of successfully translated instructions (for errors)
   * @error: error code if something went wrong
   * @stack_depth: max stack depth from the verifier
+  * @adjust_head_location: if program has single adjust head call - the insn no.
   * @insns: list of BPF instruction wrappers (struct nfp_insn_meta)
   */
  struct nfp_prog {
+ 	struct nfp_app_bpf *bpf;
+ 
  	u64 *prog;
  	unsigned int prog_len;
  	unsigned int __prog_alloc_len;
@@@ -160,6 -214,7 +214,7 @@@
  	enum bpf_prog_type type;
  
  	unsigned int start_off;
+ 	unsigned int last_bpf_off;
  	unsigned int tgt_out;
  	unsigned int tgt_abort;
  	unsigned int tgt_done;
@@@ -168,18 -223,11 +223,19 @@@
  	int error;
  
  	unsigned int stack_depth;
+ 	unsigned int adjust_head_location;
  
  	struct list_head insns;
  };
  
 +/**
 + * struct nfp_bpf_vnic - per-vNIC BPF priv structure
 + * @tc_prog:	currently loaded cls_bpf program
 + */
 +struct nfp_bpf_vnic {
 +	struct bpf_prog *tc_prog;
 +};
 +
  int nfp_bpf_jit(struct nfp_prog *prog);
  
  extern const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops;
@@@ -197,4 -245,7 +253,7 @@@ int nfp_bpf_translate(struct nfp_app *a
  		      struct bpf_prog *prog);
  int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn,
  		    struct bpf_prog *prog);
+ struct nfp_insn_meta *
+ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
+ 		  unsigned int insn_idx, unsigned int n_insns);
  #endif
diff --combined drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f1063dc00398,c52a9963c19d..beb9f5d070e1
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@@ -482,7 -482,7 +482,7 @@@ static void stmmac_get_rx_hwtstamp(stru
  		desc = np;
  
  	/* Check if timestamp is available */
 -	if (priv->hw->desc->get_rx_timestamp_status(desc, priv->adv_ts)) {
 +	if (priv->hw->desc->get_rx_timestamp_status(p, np, priv->adv_ts)) {
  		ns = priv->hw->desc->get_timestamp(desc, priv->adv_ts);
  		netdev_dbg(priv->dev, "get valid RX hw timestamp %llu\n", ns);
  		shhwtstamp = skb_hwtstamps(skb);
@@@ -1997,22 -1997,60 +1997,60 @@@ static void stmmac_set_dma_operation_mo
  static void stmmac_dma_interrupt(struct stmmac_priv *priv)
  {
  	u32 tx_channel_count = priv->plat->tx_queues_to_use;
- 	int status;
+ 	u32 rx_channel_count = priv->plat->rx_queues_to_use;
+ 	u32 channels_to_check = tx_channel_count > rx_channel_count ?
+ 				tx_channel_count : rx_channel_count;
  	u32 chan;
+ 	bool poll_scheduled = false;
+ 	int status[channels_to_check];
+ 
+ 	/* Each DMA channel can be used for rx and tx simultaneously, yet
+ 	 * napi_struct is embedded in struct stmmac_rx_queue rather than in a
+ 	 * stmmac_channel struct.
+ 	 * Because of this, stmmac_poll currently checks (and possibly wakes)
+ 	 * all tx queues rather than just a single tx queue.
+ 	 */
+ 	for (chan = 0; chan < channels_to_check; chan++)
+ 		status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr,
+ 							    &priv->xstats,
+ 							    chan);
  
- 	for (chan = 0; chan < tx_channel_count; chan++) {
- 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+ 	for (chan = 0; chan < rx_channel_count; chan++) {
+ 		if (likely(status[chan] & handle_rx)) {
+ 			struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
  
- 		status = priv->hw->dma->dma_interrupt(priv->ioaddr,
- 						      &priv->xstats, chan);
- 		if (likely((status & handle_rx)) || (status & handle_tx)) {
  			if (likely(napi_schedule_prep(&rx_q->napi))) {
  				stmmac_disable_dma_irq(priv, chan);
  				__napi_schedule(&rx_q->napi);
+ 				poll_scheduled = true;
+ 			}
+ 		}
+ 	}
+ 
+ 	/* If we scheduled poll, we already know that tx queues will be checked.
+ 	 * If we didn't schedule poll, see if any DMA channel (used by tx) has a
+ 	 * completed transmission, if so, call stmmac_poll (once).
+ 	 */
+ 	if (!poll_scheduled) {
+ 		for (chan = 0; chan < tx_channel_count; chan++) {
+ 			if (status[chan] & handle_tx) {
+ 				/* It doesn't matter what rx queue we choose
+ 				 * here. We use 0 since it always exists.
+ 				 */
+ 				struct stmmac_rx_queue *rx_q =
+ 					&priv->rx_queue[0];
+ 
+ 				if (likely(napi_schedule_prep(&rx_q->napi))) {
+ 					stmmac_disable_dma_irq(priv, chan);
+ 					__napi_schedule(&rx_q->napi);
+ 				}
+ 				break;
  			}
  		}
+ 	}
  
- 		if (unlikely(status & tx_hard_error_bump_tc)) {
+ 	for (chan = 0; chan < tx_channel_count; chan++) {
+ 		if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
  			/* Try to bump up the dma threshold on this failure */
  			if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
  			    (tc <= 256)) {
@@@ -2029,7 -2067,7 +2067,7 @@@
  								    chan);
  				priv->xstats.threshold = tc;
  			}
- 		} else if (unlikely(status == tx_hard_error)) {
+ 		} else if (unlikely(status[chan] == tx_hard_error)) {
  			stmmac_tx_err(priv, chan);
  		}
  	}
@@@ -2533,7 -2571,7 +2571,7 @@@ static int stmmac_hw_setup(struct net_d
  	}
  
  	if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
 -		priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
 +		priv->hw->mac->pcs_ctrl_ane(priv->ioaddr, 1, priv->hw->ps, 0);
  
  	/* set TX and RX rings length */
  	stmmac_set_rings_length(priv);
diff --combined drivers/net/netdevsim/bpf.c
index 000000000000,7da814686ad9..afaf980bbbe7
mode 000000,100644..100644
--- a/drivers/net/netdevsim/bpf.c
+++ b/drivers/net/netdevsim/bpf.c
@@@ -1,0 -1,370 +1,364 @@@
+ /*
+  * Copyright (C) 2017 Netronome Systems, Inc.
+  *
+  * This software is licensed under the GNU General License Version 2,
+  * June 1991 as shown in the file COPYING in the top-level directory of this
+  * source tree.
+  *
+  * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+  * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+  * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+  * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+  * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+  */
+ 
+ #include <linux/bpf.h>
+ #include <linux/bpf_verifier.h>
+ #include <linux/debugfs.h>
+ #include <linux/kernel.h>
+ #include <linux/rtnetlink.h>
+ #include <net/pkt_cls.h>
+ 
+ #include "netdevsim.h"
+ 
+ struct nsim_bpf_bound_prog {
+ 	struct netdevsim *ns;
+ 	struct bpf_prog *prog;
+ 	struct dentry *ddir;
+ 	const char *state;
+ 	bool is_loaded;
+ 	struct list_head l;
+ };
+ 
+ static int nsim_debugfs_bpf_string_read(struct seq_file *file, void *data)
+ {
+ 	const char **str = file->private;
+ 
+ 	if (*str)
+ 		seq_printf(file, "%s\n", *str);
+ 
+ 	return 0;
+ }
+ 
+ static int nsim_debugfs_bpf_string_open(struct inode *inode, struct file *f)
+ {
+ 	return single_open(f, nsim_debugfs_bpf_string_read, inode->i_private);
+ }
+ 
+ static const struct file_operations nsim_bpf_string_fops = {
+ 	.owner = THIS_MODULE,
+ 	.open = nsim_debugfs_bpf_string_open,
+ 	.release = single_release,
+ 	.read = seq_read,
+ 	.llseek = seq_lseek
+ };
+ 
+ static int
+ nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn)
+ {
+ 	struct nsim_bpf_bound_prog *state;
+ 
+ 	state = env->prog->aux->offload->dev_priv;
+ 	if (state->ns->bpf_bind_verifier_delay && !insn_idx)
+ 		msleep(state->ns->bpf_bind_verifier_delay);
+ 
+ 	return 0;
+ }
+ 
+ static const struct bpf_ext_analyzer_ops nsim_bpf_analyzer_ops = {
+ 	.insn_hook = nsim_bpf_verify_insn,
+ };
+ 
+ static bool nsim_xdp_offload_active(struct netdevsim *ns)
+ {
+ 	return ns->xdp_prog_mode == XDP_ATTACHED_HW;
+ }
+ 
+ static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded)
+ {
+ 	struct nsim_bpf_bound_prog *state;
+ 
+ 	if (!prog || !prog->aux->offload)
+ 		return;
+ 
+ 	state = prog->aux->offload->dev_priv;
+ 	state->is_loaded = loaded;
+ }
+ 
+ static int
+ nsim_bpf_offload(struct netdevsim *ns, struct bpf_prog *prog, bool oldprog)
+ {
+ 	nsim_prog_set_loaded(ns->bpf_offloaded, false);
+ 
+ 	WARN(!!ns->bpf_offloaded != oldprog,
+ 	     "bad offload state, expected offload %sto be active",
+ 	     oldprog ? "" : "not ");
+ 	ns->bpf_offloaded = prog;
+ 	ns->bpf_offloaded_id = prog ? prog->aux->id : 0;
+ 	nsim_prog_set_loaded(prog, true);
+ 
+ 	return 0;
+ }
+ 
+ int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type,
+ 			       void *type_data, void *cb_priv)
+ {
+ 	struct tc_cls_bpf_offload *cls_bpf = type_data;
+ 	struct bpf_prog *prog = cls_bpf->prog;
+ 	struct netdevsim *ns = cb_priv;
+ 
+ 	if (type != TC_SETUP_CLSBPF ||
+ 	    !tc_can_offload(ns->netdev) ||
+ 	    cls_bpf->common.protocol != htons(ETH_P_ALL) ||
+ 	    cls_bpf->common.chain_index)
+ 		return -EOPNOTSUPP;
+ 
+ 	if (nsim_xdp_offload_active(ns))
+ 		return -EBUSY;
+ 
+ 	if (!ns->bpf_tc_accept)
+ 		return -EOPNOTSUPP;
+ 	/* Note: progs without skip_sw will probably not be dev bound */
+ 	if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept)
+ 		return -EOPNOTSUPP;
+ 
 -	switch (cls_bpf->command) {
 -	case TC_CLSBPF_REPLACE:
 -		return nsim_bpf_offload(ns, prog, true);
 -	case TC_CLSBPF_ADD:
 -		return nsim_bpf_offload(ns, prog, false);
 -	case TC_CLSBPF_DESTROY:
 -		return nsim_bpf_offload(ns, NULL, true);
 -	default:
++	if (cls_bpf->command != TC_CLSBPF_OFFLOAD)
+ 		return -EOPNOTSUPP;
 -	}
++
++	return nsim_bpf_offload(ns, prog, cls_bpf->oldprog);
+ }
+ 
+ int nsim_bpf_disable_tc(struct netdevsim *ns)
+ {
+ 	if (ns->bpf_offloaded && !nsim_xdp_offload_active(ns))
+ 		return -EBUSY;
+ 	return 0;
+ }
+ 
+ static int nsim_xdp_offload_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
+ {
+ 	if (!nsim_xdp_offload_active(ns) && !bpf->prog)
+ 		return 0;
+ 	if (!nsim_xdp_offload_active(ns) && bpf->prog && ns->bpf_offloaded) {
+ 		NSIM_EA(bpf->extack, "TC program is already loaded");
+ 		return -EBUSY;
+ 	}
+ 
+ 	return nsim_bpf_offload(ns, bpf->prog, nsim_xdp_offload_active(ns));
+ }
+ 
+ static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf)
+ {
+ 	int err;
+ 
+ 	if (ns->xdp_prog && (bpf->flags ^ ns->xdp_flags) & XDP_FLAGS_MODES) {
+ 		NSIM_EA(bpf->extack, "program loaded with different flags");
+ 		return -EBUSY;
+ 	}
+ 
+ 	if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) {
+ 		NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS");
+ 		return -EOPNOTSUPP;
+ 	}
+ 	if (bpf->command == XDP_SETUP_PROG_HW && !ns->bpf_xdpoffload_accept) {
+ 		NSIM_EA(bpf->extack, "XDP offload disabled in DebugFS");
+ 		return -EOPNOTSUPP;
+ 	}
+ 
+ 	if (bpf->command == XDP_SETUP_PROG_HW) {
+ 		err = nsim_xdp_offload_prog(ns, bpf);
+ 		if (err)
+ 			return err;
+ 	}
+ 
+ 	if (ns->xdp_prog)
+ 		bpf_prog_put(ns->xdp_prog);
+ 
+ 	ns->xdp_prog = bpf->prog;
+ 	ns->xdp_flags = bpf->flags;
+ 
+ 	if (!bpf->prog)
+ 		ns->xdp_prog_mode = XDP_ATTACHED_NONE;
+ 	else if (bpf->command == XDP_SETUP_PROG)
+ 		ns->xdp_prog_mode = XDP_ATTACHED_DRV;
+ 	else
+ 		ns->xdp_prog_mode = XDP_ATTACHED_HW;
+ 
+ 	return 0;
+ }
+ 
+ static int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog)
+ {
+ 	struct nsim_bpf_bound_prog *state;
+ 	char name[16];
+ 
+ 	state = kzalloc(sizeof(*state), GFP_KERNEL);
+ 	if (!state)
+ 		return -ENOMEM;
+ 
+ 	state->ns = ns;
+ 	state->prog = prog;
+ 	state->state = "verify";
+ 
+ 	/* Program id is not populated yet when we create the state. */
+ 	sprintf(name, "%u", ns->prog_id_gen++);
+ 	state->ddir = debugfs_create_dir(name, ns->ddir_bpf_bound_progs);
+ 	if (IS_ERR_OR_NULL(state->ddir)) {
+ 		kfree(state);
+ 		return -ENOMEM;
+ 	}
+ 
+ 	debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id);
+ 	debugfs_create_file("state", 0400, state->ddir,
+ 			    &state->state, &nsim_bpf_string_fops);
+ 	debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded);
+ 
+ 	list_add_tail(&state->l, &ns->bpf_bound_progs);
+ 
+ 	prog->aux->offload->dev_priv = state;
+ 
+ 	return 0;
+ }
+ 
+ static void nsim_bpf_destroy_prog(struct bpf_prog *prog)
+ {
+ 	struct nsim_bpf_bound_prog *state;
+ 
+ 	state = prog->aux->offload->dev_priv;
+ 	WARN(state->is_loaded,
+ 	     "offload state destroyed while program still bound");
+ 	debugfs_remove_recursive(state->ddir);
+ 	list_del(&state->l);
+ 	kfree(state);
+ }
+ 
+ static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
+ {
+ 	if (bpf->prog && bpf->prog->aux->offload) {
+ 		NSIM_EA(bpf->extack, "attempt to load offloaded prog to drv");
+ 		return -EINVAL;
+ 	}
+ 	if (ns->netdev->mtu > NSIM_XDP_MAX_MTU) {
+ 		NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled");
+ 		return -EINVAL;
+ 	}
+ 	if (nsim_xdp_offload_active(ns)) {
+ 		NSIM_EA(bpf->extack, "xdp offload active, can't load drv prog");
+ 		return -EBUSY;
+ 	}
+ 	return 0;
+ }
+ 
+ static int
+ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
+ {
+ 	struct nsim_bpf_bound_prog *state;
+ 
+ 	if (!bpf->prog)
+ 		return 0;
+ 
+ 	if (!bpf->prog->aux->offload) {
+ 		NSIM_EA(bpf->extack, "xdpoffload of non-bound program");
+ 		return -EINVAL;
+ 	}
+ 	if (bpf->prog->aux->offload->netdev != ns->netdev) {
+ 		NSIM_EA(bpf->extack, "program bound to different dev");
+ 		return -EINVAL;
+ 	}
+ 
+ 	state = bpf->prog->aux->offload->dev_priv;
+ 	if (WARN_ON(strcmp(state->state, "xlated"))) {
+ 		NSIM_EA(bpf->extack, "offloading program in bad state");
+ 		return -EINVAL;
+ 	}
+ 	return 0;
+ }
+ 
+ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+ {
+ 	struct netdevsim *ns = netdev_priv(dev);
+ 	struct nsim_bpf_bound_prog *state;
+ 	int err;
+ 
+ 	ASSERT_RTNL();
+ 
+ 	switch (bpf->command) {
+ 	case BPF_OFFLOAD_VERIFIER_PREP:
+ 		if (!ns->bpf_bind_accept)
+ 			return -EOPNOTSUPP;
+ 
+ 		err = nsim_bpf_create_prog(ns, bpf->verifier.prog);
+ 		if (err)
+ 			return err;
+ 
+ 		bpf->verifier.ops = &nsim_bpf_analyzer_ops;
+ 		return 0;
+ 	case BPF_OFFLOAD_TRANSLATE:
+ 		state = bpf->offload.prog->aux->offload->dev_priv;
+ 
+ 		state->state = "xlated";
+ 		return 0;
+ 	case BPF_OFFLOAD_DESTROY:
+ 		nsim_bpf_destroy_prog(bpf->offload.prog);
+ 		return 0;
+ 	case XDP_QUERY_PROG:
+ 		bpf->prog_attached = ns->xdp_prog_mode;
+ 		bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0;
+ 		bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0;
+ 		return 0;
+ 	case XDP_SETUP_PROG:
+ 		err = nsim_setup_prog_checks(ns, bpf);
+ 		if (err)
+ 			return err;
+ 
+ 		return nsim_xdp_set_prog(ns, bpf);
+ 	case XDP_SETUP_PROG_HW:
+ 		err = nsim_setup_prog_hw_checks(ns, bpf);
+ 		if (err)
+ 			return err;
+ 
+ 		return nsim_xdp_set_prog(ns, bpf);
+ 	default:
+ 		return -EINVAL;
+ 	}
+ }
+ 
+ int nsim_bpf_init(struct netdevsim *ns)
+ {
+ 	INIT_LIST_HEAD(&ns->bpf_bound_progs);
+ 
+ 	debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir,
+ 			   &ns->bpf_offloaded_id);
+ 
+ 	ns->bpf_bind_accept = true;
+ 	debugfs_create_bool("bpf_bind_accept", 0600, ns->ddir,
+ 			    &ns->bpf_bind_accept);
+ 	debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->ddir,
+ 			   &ns->bpf_bind_verifier_delay);
+ 	ns->ddir_bpf_bound_progs =
+ 		debugfs_create_dir("bpf_bound_progs", ns->ddir);
+ 	if (IS_ERR_OR_NULL(ns->ddir_bpf_bound_progs))
+ 		return -ENOMEM;
+ 
+ 	ns->bpf_tc_accept = true;
+ 	debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir,
+ 			    &ns->bpf_tc_accept);
+ 	debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ns->ddir,
+ 			    &ns->bpf_tc_non_bound_accept);
+ 	ns->bpf_xdpdrv_accept = true;
+ 	debugfs_create_bool("bpf_xdpdrv_accept", 0600, ns->ddir,
+ 			    &ns->bpf_xdpdrv_accept);
+ 	ns->bpf_xdpoffload_accept = true;
+ 	debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir,
+ 			    &ns->bpf_xdpoffload_accept);
+ 
+ 	return 0;
+ }
+ 
+ void nsim_bpf_uninit(struct netdevsim *ns)
+ {
+ 	WARN_ON(!list_empty(&ns->bpf_bound_progs));
+ 	WARN_ON(ns->xdp_prog);
+ 	WARN_ON(ns->bpf_offloaded);
+ }
diff --combined drivers/net/phy/marvell.c
index 82104edca393,2fc026dc170a..80c120a9f2f3
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@@ -879,8 -879,6 +879,8 @@@ static int m88e1510_config_init(struct 
  
  	/* SGMII-to-Copper mode initialization */
  	if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
 +		u32 pause;
 +
  		/* Select page 18 */
  		err = marvell_set_page(phydev, 18);
  		if (err < 0)
@@@ -904,16 -902,6 +904,16 @@@
  		err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE);
  		if (err < 0)
  			return err;
 +
 +		/* There appears to be a bug in the 88e1512 when used in
 +		 * SGMII to copper mode, where the AN advertisment register
 +		 * clears the pause bits each time a negotiation occurs.
 +		 * This means we can never be truely sure what was advertised,
 +		 * so disable Pause support.
 +		 */
 +		pause = SUPPORTED_Pause | SUPPORTED_Asym_Pause;
 +		phydev->supported &= ~pause;
 +		phydev->advertising &= ~pause;
  	}
  
  	return m88e1121_config_init(phydev);
@@@ -1974,7 -1962,6 +1974,6 @@@ static struct phy_driver marvell_driver
  		.probe = marvell_probe,
  		.config_init = &marvell_config_init,
  		.config_aneg = &m88e1101_config_aneg,
- 		.read_status = &genphy_read_status,
  		.ack_interrupt = &marvell_ack_interrupt,
  		.config_intr = &marvell_config_intr,
  		.resume = &genphy_resume,
@@@ -1992,7 -1979,6 +1991,6 @@@
  		.probe = marvell_probe,
  		.config_init = &m88e1111_config_init,
  		.config_aneg = &marvell_config_aneg,
- 		.read_status = &genphy_read_status,
  		.ack_interrupt = &marvell_ack_interrupt,
  		.config_intr = &marvell_config_intr,
  		.resume = &genphy_resume,
@@@ -2028,7 -2014,6 +2026,6 @@@
  		.probe = marvell_probe,
  		.config_init = &m88e1118_config_init,
  		.config_aneg = &m88e1118_config_aneg,
- 		.read_status = &genphy_read_status,
  		.ack_interrupt = &marvell_ack_interrupt,
  		.config_intr = &marvell_config_intr,
  		.resume = &genphy_resume,
@@@ -2085,8 -2070,7 +2082,7 @@@
  		.flags = PHY_HAS_INTERRUPT,
  		.probe = marvell_probe,
  		.config_init = &m88e1145_config_init,
 -		.config_aneg = &marvell_config_aneg,
 +		.config_aneg = &m88e1101_config_aneg,
- 		.read_status = &genphy_read_status,
  		.ack_interrupt = &marvell_ack_interrupt,
  		.config_intr = &marvell_config_intr,
  		.resume = &genphy_resume,
@@@ -2104,7 -2088,6 +2100,6 @@@
  		.probe = marvell_probe,
  		.config_init = &m88e1149_config_init,
  		.config_aneg = &m88e1118_config_aneg,
- 		.read_status = &genphy_read_status,
  		.ack_interrupt = &marvell_ack_interrupt,
  		.config_intr = &marvell_config_intr,
  		.resume = &genphy_resume,
@@@ -2122,7 -2105,6 +2117,6 @@@
  		.probe = marvell_probe,
  		.config_init = &m88e1111_config_init,
  		.config_aneg = &marvell_config_aneg,
- 		.read_status = &genphy_read_status,
  		.ack_interrupt = &marvell_ack_interrupt,
  		.config_intr = &marvell_config_intr,
  		.resume = &genphy_resume,
@@@ -2139,8 -2121,6 +2133,6 @@@
  		.flags = PHY_HAS_INTERRUPT,
  		.probe = marvell_probe,
  		.config_init = &m88e1116r_config_init,
- 		.config_aneg = &genphy_config_aneg,
- 		.read_status = &genphy_read_status,
  		.ack_interrupt = &marvell_ack_interrupt,
  		.config_intr = &marvell_config_intr,
  		.resume = &genphy_resume,
@@@ -2216,7 -2196,6 +2208,6 @@@
  		.features = PHY_BASIC_FEATURES,
  		.flags = PHY_HAS_INTERRUPT,
  		.probe = marvell_probe,
- 		.config_aneg = &genphy_config_aneg,
  		.config_init = &m88e3016_config_init,
  		.aneg_done = &marvell_aneg_done,
  		.read_status = &marvell_read_status,
diff --combined drivers/net/tun.c
index 2ffe5dba7e09,e367d6310353..164fef1d1cf3
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@@ -195,6 -195,11 +195,11 @@@ struct tun_flow_entry 
  
  #define TUN_NUM_FLOW_ENTRIES 1024
  
+ struct tun_steering_prog {
+ 	struct rcu_head rcu;
+ 	struct bpf_prog *prog;
+ };
+ 
  /* Since the socket were moved to tun_file, to preserve the behavior of persist
   * device, socket filter, sndbuf and vnet header size were restore when the
   * file were attached to a persist device.
@@@ -232,6 -237,7 +237,7 @@@ struct tun_struct 
  	u32 rx_batched;
  	struct tun_pcpu_stats __percpu *pcpu_stats;
  	struct bpf_prog __rcu *xdp_prog;
+ 	struct tun_steering_prog __rcu *steering_prog;
  };
  
  static int tun_napi_receive(struct napi_struct *napi, int budget)
@@@ -537,15 -543,12 +543,12 @@@ static inline void tun_flow_save_rps_rx
   * different rxq no. here. If we could not get rxhash, then we would
   * hope the rxq no. may help here.
   */
- static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
- 			    void *accel_priv, select_queue_fallback_t fallback)
+ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
  {
- 	struct tun_struct *tun = netdev_priv(dev);
  	struct tun_flow_entry *e;
  	u32 txq = 0;
  	u32 numqueues = 0;
  
- 	rcu_read_lock();
  	numqueues = READ_ONCE(tun->numqueues);
  
  	txq = __skb_get_hash_symmetric(skb);
@@@ -563,10 -566,37 +566,37 @@@
  			txq -= numqueues;
  	}
  
- 	rcu_read_unlock();
  	return txq;
  }
  
+ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+ {
+ 	struct tun_steering_prog *prog;
+ 	u16 ret = 0;
+ 
+ 	prog = rcu_dereference(tun->steering_prog);
+ 	if (prog)
+ 		ret = bpf_prog_run_clear_cb(prog->prog, skb);
+ 
+ 	return ret % tun->numqueues;
+ }
+ 
+ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
+ 			    void *accel_priv, select_queue_fallback_t fallback)
+ {
+ 	struct tun_struct *tun = netdev_priv(dev);
+ 	u16 ret;
+ 
+ 	rcu_read_lock();
+ 	if (rcu_dereference(tun->steering_prog))
+ 		ret = tun_ebpf_select_queue(tun, skb);
+ 	else
+ 		ret = tun_automq_select_queue(tun, skb);
+ 	rcu_read_unlock();
+ 
+ 	return ret;
+ }
+ 
  static inline bool tun_not_capable(struct tun_struct *tun)
  {
  	const struct cred *cred = current_cred();
@@@ -673,7 -703,6 +703,6 @@@ static void tun_detach(struct tun_file 
  static void tun_detach_all(struct net_device *dev)
  {
  	struct tun_struct *tun = netdev_priv(dev);
- 	struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
  	struct tun_file *tfile, *tmp;
  	int i, n = tun->numqueues;
  
@@@ -708,9 -737,6 +737,6 @@@
  	}
  	BUG_ON(tun->numdisabled != 0);
  
- 	if (xdp_prog)
- 		bpf_prog_put(xdp_prog);
- 
  	if (tun->flags & IFF_PERSIST)
  		module_put(THIS_MODULE);
  }
@@@ -937,23 -963,10 +963,10 @@@ static int tun_net_close(struct net_dev
  }
  
  /* Net device start xmit */
- static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
  {
- 	struct tun_struct *tun = netdev_priv(dev);
- 	int txq = skb->queue_mapping;
- 	struct tun_file *tfile;
- 	u32 numqueues = 0;
- 
- 	rcu_read_lock();
- 	tfile = rcu_dereference(tun->tfiles[txq]);
- 	numqueues = READ_ONCE(tun->numqueues);
- 
- 	/* Drop packet if interface is not attached */
- 	if (txq >= numqueues)
- 		goto drop;
- 
  #ifdef CONFIG_RPS
- 	if (numqueues == 1 && static_key_false(&rps_needed)) {
+ 	if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
  		/* Select queue was not called for the skbuff, so we extract the
  		 * RPS hash and save it into the flow_table here.
  		 */
@@@ -969,6 -982,24 +982,24 @@@
  		}
  	}
  #endif
+ }
+ 
+ /* Net device start xmit */
+ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ 	struct tun_struct *tun = netdev_priv(dev);
+ 	int txq = skb->queue_mapping;
+ 	struct tun_file *tfile;
+ 
+ 	rcu_read_lock();
+ 	tfile = rcu_dereference(tun->tfiles[txq]);
+ 
+ 	/* Drop packet if interface is not attached */
+ 	if (txq >= tun->numqueues)
+ 		goto drop;
+ 
+ 	if (!rcu_dereference(tun->steering_prog))
+ 		tun_automq_xmit(tun, skb);
  
  	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
  
@@@ -1248,12 -1279,12 +1279,12 @@@ static void tun_net_init(struct net_dev
  /* Character device part */
  
  /* Poll */
 -static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 +static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
  {
  	struct tun_file *tfile = file->private_data;
  	struct tun_struct *tun = tun_get(tfile);
  	struct sock *sk;
 -	unsigned int mask = 0;
 +	__poll_t mask = 0;
  
  	if (!tun)
  		return POLLERR;
@@@ -1551,7 -1582,7 +1582,7 @@@ static ssize_t tun_get_user(struct tun_
  	int copylen;
  	bool zerocopy = false;
  	int err;
- 	u32 rxhash;
+ 	u32 rxhash = 0;
  	int skb_xdp = 1;
  	bool frags = tun_napi_frags_enabled(tun);
  
@@@ -1739,7 -1770,10 +1770,10 @@@
  		rcu_read_unlock();
  	}
  
- 	rxhash = __skb_get_hash_symmetric(skb);
+ 	rcu_read_lock();
+ 	if (!rcu_dereference(tun->steering_prog))
+ 		rxhash = __skb_get_hash_symmetric(skb);
+ 	rcu_read_unlock();
  
  	if (frags) {
  		/* Exercise flow dissector code path. */
@@@ -1783,7 -1817,9 +1817,9 @@@
  	u64_stats_update_end(&stats->syncp);
  	put_cpu_ptr(stats);
  
- 	tun_flow_update(tun, rxhash, tfile);
+ 	if (rxhash)
+ 		tun_flow_update(tun, rxhash, tfile);
+ 
  	return total_len;
  }
  
@@@ -1991,6 -2027,39 +2027,39 @@@ static ssize_t tun_chr_read_iter(struc
  	return ret;
  }
  
+ static void tun_steering_prog_free(struct rcu_head *rcu)
+ {
+ 	struct tun_steering_prog *prog = container_of(rcu,
+ 					 struct tun_steering_prog, rcu);
+ 
+ 	bpf_prog_destroy(prog->prog);
+ 	kfree(prog);
+ }
+ 
+ static int __tun_set_steering_ebpf(struct tun_struct *tun,
+ 				   struct bpf_prog *prog)
+ {
+ 	struct tun_steering_prog *old, *new = NULL;
+ 
+ 	if (prog) {
+ 		new = kmalloc(sizeof(*new), GFP_KERNEL);
+ 		if (!new)
+ 			return -ENOMEM;
+ 		new->prog = prog;
+ 	}
+ 
+ 	spin_lock_bh(&tun->lock);
+ 	old = rcu_dereference_protected(tun->steering_prog,
+ 					lockdep_is_held(&tun->lock));
+ 	rcu_assign_pointer(tun->steering_prog, new);
+ 	spin_unlock_bh(&tun->lock);
+ 
+ 	if (old)
+ 		call_rcu(&old->rcu, tun_steering_prog_free);
+ 
+ 	return 0;
+ }
+ 
  static void tun_free_netdev(struct net_device *dev)
  {
  	struct tun_struct *tun = netdev_priv(dev);
@@@ -1999,6 -2068,7 +2068,7 @@@
  	free_percpu(tun->pcpu_stats);
  	tun_flow_uninit(tun);
  	security_tun_dev_free_security(tun->security);
+ 	__tun_set_steering_ebpf(tun, NULL);
  }
  
  static void tun_setup(struct net_device *dev)
@@@ -2287,6 -2357,7 +2357,7 @@@ static int tun_set_iff(struct net *net
  		tun->filter_attached = false;
  		tun->sndbuf = tfile->socket.sk->sk_sndbuf;
  		tun->rx_batched = 0;
+ 		RCU_INIT_POINTER(tun->steering_prog, NULL);
  
  		tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
  		if (!tun->pcpu_stats) {
@@@ -2479,6 -2550,25 +2550,25 @@@ unlock
  	return ret;
  }
  
+ static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
+ {
+ 	struct bpf_prog *prog;
+ 	int fd;
+ 
+ 	if (copy_from_user(&fd, data, sizeof(fd)))
+ 		return -EFAULT;
+ 
+ 	if (fd == -1) {
+ 		prog = NULL;
+ 	} else {
+ 		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+ 		if (IS_ERR(prog))
+ 			return PTR_ERR(prog);
+ 	}
+ 
+ 	return __tun_set_steering_ebpf(tun, prog);
+ }
+ 
  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
  			    unsigned long arg, int ifreq_len)
  {
@@@ -2755,6 -2845,10 +2845,10 @@@
  		ret = 0;
  		break;
  
+ 	case TUNSETSTEERINGEBPF:
+ 		ret = tun_set_steering_ebpf(tun, argp);
+ 		break;
+ 
  	default:
  		ret = -EINVAL;
  		break;
diff --combined drivers/net/vxlan.c
index 31f4b7911ef8,48a0dc238f73..82090ae7ced1
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -2155,13 -2155,6 +2155,13 @@@ static void vxlan_xmit_one(struct sk_bu
  		}
  
  		ndst = &rt->dst;
 +		if (skb_dst(skb)) {
 +			int mtu = dst_mtu(ndst) - VXLAN_HEADROOM;
 +
 +			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
 +						       skb, mtu);
 +		}
 +
  		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
  		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
  		err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
@@@ -2197,13 -2190,6 +2197,13 @@@
  				goto out_unlock;
  		}
  
 +		if (skb_dst(skb)) {
 +			int mtu = dst_mtu(ndst) - VXLAN6_HEADROOM;
 +
 +			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL,
 +						       skb, mtu);
 +		}
 +
  		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
  		ttl = ttl ? : ip6_dst_hoplimit(ndst);
  		skb_scrub_packet(skb, xnet);
@@@ -3117,11 -3103,6 +3117,11 @@@ static void vxlan_config_apply(struct n
  
  		max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
  					   VXLAN_HEADROOM);
 +		if (max_mtu < ETH_MIN_MTU)
 +			max_mtu = ETH_MIN_MTU;
 +
 +		if (!changelink && !conf->mtu)
 +			dev->mtu = max_mtu;
  	}
  
  	if (dev->mtu > max_mtu)
@@@ -3711,18 -3692,16 +3711,16 @@@ static __net_init int vxlan_init_net(st
  	return 0;
  }
  
- static void __net_exit vxlan_exit_net(struct net *net)
+ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
  {
  	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
  	struct vxlan_dev *vxlan, *next;
  	struct net_device *dev, *aux;
  	unsigned int h;
- 	LIST_HEAD(list);
  
- 	rtnl_lock();
  	for_each_netdev_safe(net, dev, aux)
  		if (dev->rtnl_link_ops == &vxlan_link_ops)
- 			unregister_netdevice_queue(dev, &list);
+ 			unregister_netdevice_queue(dev, head);
  
  	list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
  		/* If vxlan->dev is in the same netns, it has already been added
@@@ -3730,20 -3709,30 +3728,30 @@@
  		 */
  		if (!net_eq(dev_net(vxlan->dev), net)) {
  			gro_cells_destroy(&vxlan->gro_cells);
- 			unregister_netdevice_queue(vxlan->dev, &list);
+ 			unregister_netdevice_queue(vxlan->dev, head);
  		}
  	}
  
- 	unregister_netdevice_many(&list);
- 	rtnl_unlock();
- 
  	for (h = 0; h < PORT_HASH_SIZE; ++h)
  		WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
  }
  
+ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
+ {
+ 	struct net *net;
+ 	LIST_HEAD(list);
+ 
+ 	rtnl_lock();
+ 	list_for_each_entry(net, net_list, exit_list)
+ 		vxlan_destroy_tunnels(net, &list);
+ 
+ 	unregister_netdevice_many(&list);
+ 	rtnl_unlock();
+ }
+ 
  static struct pernet_operations vxlan_net_ops = {
  	.init = vxlan_init_net,
- 	.exit = vxlan_exit_net,
+ 	.exit_batch = vxlan_exit_batch_net,
  	.id   = &vxlan_net_id,
  	.size = sizeof(struct vxlan_net),
  };
diff --combined drivers/s390/net/qeth_core_main.c
index 3614df68830f,a007f6249166..bdc28330800e
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@@ -564,7 -564,7 +564,7 @@@ static struct qeth_reply *qeth_alloc_re
  
  	reply = kzalloc(sizeof(struct qeth_reply), GFP_ATOMIC);
  	if (reply) {
- 		atomic_set(&reply->refcnt, 1);
+ 		refcount_set(&reply->refcnt, 1);
  		atomic_set(&reply->received, 0);
  		reply->card = card;
  	}
@@@ -573,14 -573,12 +573,12 @@@
  
  static void qeth_get_reply(struct qeth_reply *reply)
  {
- 	WARN_ON(atomic_read(&reply->refcnt) <= 0);
- 	atomic_inc(&reply->refcnt);
+ 	refcount_inc(&reply->refcnt);
  }
  
  static void qeth_put_reply(struct qeth_reply *reply)
  {
- 	WARN_ON(atomic_read(&reply->refcnt) <= 0);
- 	if (atomic_dec_and_test(&reply->refcnt))
+ 	if (refcount_dec_and_test(&reply->refcnt))
  		kfree(reply);
  }
  
@@@ -4218,9 -4216,8 +4216,8 @@@ static int qeth_setadpparms_change_maca
  	cmd = (struct qeth_ipa_cmd *) data;
  	if (!card->options.layer2 ||
  	    !(card->info.mac_bits & QETH_LAYER2_MAC_READ)) {
- 		memcpy(card->dev->dev_addr,
- 		       &cmd->data.setadapterparms.data.change_addr.addr,
- 		       OSA_ADDR_LEN);
+ 		ether_addr_copy(card->dev->dev_addr,
+ 				cmd->data.setadapterparms.data.change_addr.addr);
  		card->info.mac_bits |= QETH_LAYER2_MAC_READ;
  	}
  	qeth_default_setadapterparms_cb(card, reply, (unsigned long) cmd);
@@@ -4242,9 -4239,9 +4239,9 @@@ int qeth_setadpparms_change_macaddr(str
  		return -ENOMEM;
  	cmd = (struct qeth_ipa_cmd *)(iob->data+IPA_PDU_HEADER_SIZE);
  	cmd->data.setadapterparms.data.change_addr.cmd = CHANGE_ADDR_READ_MAC;
- 	cmd->data.setadapterparms.data.change_addr.addr_size = OSA_ADDR_LEN;
- 	memcpy(&cmd->data.setadapterparms.data.change_addr.addr,
- 	       card->dev->dev_addr, OSA_ADDR_LEN);
+ 	cmd->data.setadapterparms.data.change_addr.addr_size = ETH_ALEN;
+ 	ether_addr_copy(cmd->data.setadapterparms.data.change_addr.addr,
+ 			card->dev->dev_addr);
  	rc = qeth_send_ipa_cmd(card, iob, qeth_setadpparms_change_macaddr_cb,
  			       NULL);
  	return rc;
@@@ -5386,13 -5383,6 +5383,13 @@@ out
  }
  EXPORT_SYMBOL_GPL(qeth_poll);
  
 +static int qeth_setassparms_inspect_rc(struct qeth_ipa_cmd *cmd)
 +{
 +	if (!cmd->hdr.return_code)
 +		cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
 +	return cmd->hdr.return_code;
 +}
 +
  int qeth_setassparms_cb(struct qeth_card *card,
  			struct qeth_reply *reply, unsigned long data)
  {
@@@ -6249,7 -6239,7 +6246,7 @@@ static int qeth_ipa_checksum_run_cmd_cb
  				(struct qeth_checksum_cmd *)reply->param;
  
  	QETH_CARD_TEXT(card, 4, "chkdoccb");
 -	if (cmd->hdr.return_code)
 +	if (qeth_setassparms_inspect_rc(cmd))
  		return 0;
  
  	memset(chksum_cb, 0, sizeof(*chksum_cb));
diff --combined fs/btrfs/disk-io.c
index ad4eeca7a068,5da18ebc9222..1336a7da0444
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -30,6 -30,7 +30,7 @@@
  #include <linux/ratelimit.h>
  #include <linux/uuid.h>
  #include <linux/semaphore.h>
+ #include <linux/bpf.h>
  #include <asm/unaligned.h>
  #include "ctree.h"
  #include "disk-io.h"
@@@ -220,7 -221,7 +221,7 @@@ void btrfs_set_buffer_lockdep_class(u6
   * extents on the btree inode are pretty simple, there's one extent
   * that covers the entire device
   */
 -static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 +struct extent_map *btree_get_extent(struct btrfs_inode *inode,
  		struct page *page, size_t pg_offset, u64 start, u64 len,
  		int create)
  {
@@@ -285,7 -286,7 +286,7 @@@ static int csum_tree_block(struct btrfs
  			   int verify)
  {
  	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
 -	char *result = NULL;
 +	char result[BTRFS_CSUM_SIZE];
  	unsigned long len;
  	unsigned long cur_len;
  	unsigned long offset = BTRFS_CSUM_SIZE;
@@@ -294,6 -295,7 +295,6 @@@
  	unsigned long map_len;
  	int err;
  	u32 crc = ~(u32)0;
 -	unsigned long inline_result;
  
  	len = buf->len - offset;
  	while (len > 0) {
@@@ -307,7 -309,13 +308,7 @@@
  		len -= cur_len;
  		offset += cur_len;
  	}
 -	if (csum_size > sizeof(inline_result)) {
 -		result = kzalloc(csum_size, GFP_NOFS);
 -		if (!result)
 -			return -ENOMEM;
 -	} else {
 -		result = (char *)&inline_result;
 -	}
 +	memset(result, 0, BTRFS_CSUM_SIZE);
  
  	btrfs_csum_final(crc, result);
  
@@@ -322,12 -330,15 +323,12 @@@
  				"%s checksum verify failed on %llu wanted %X found %X level %d",
  				fs_info->sb->s_id, buf->start,
  				val, found, btrfs_header_level(buf));
 -			if (result != (char *)&inline_result)
 -				kfree(result);
  			return -EUCLEAN;
  		}
  	} else {
  		write_extent_buffer(buf, result, 0, csum_size);
  	}
 -	if (result != (char *)&inline_result)
 -		kfree(result);
 +
  	return 0;
  }
  
@@@ -381,7 -392,7 +382,7 @@@ static int verify_parent_transid(struc
  		clear_extent_buffer_uptodate(eb);
  out:
  	unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
 -			     &cached_state, GFP_NOFS);
 +			     &cached_state);
  	if (need_lock)
  		btrfs_tree_read_unlock_blocking(eb);
  	return ret;
@@@ -445,7 -456,7 +446,7 @@@ static int btree_read_extent_buffer_pag
  	io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
  	while (1) {
  		ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
 -					       btree_get_extent, mirror_num);
 +					       mirror_num);
  		if (!ret) {
  			if (!verify_parent_transid(io_tree, eb,
  						   parent_transid, 0))
@@@ -855,8 -866,6 +856,8 @@@ static blk_status_t btree_submit_bio_ho
  	int async = check_async_write(BTRFS_I(inode));
  	blk_status_t ret;
  
 +	bio_associate_blkcg(bio, blkcg_root_css);
 +
  	if (bio_op(bio) != REQ_OP_WRITE) {
  		/*
  		 * called for a read, do the setup so that checksum validation
@@@ -1004,7 -1013,7 +1005,7 @@@ void readahead_tree_block(struct btrfs_
  	if (IS_ERR(buf))
  		return;
  	read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
 -				 buf, WAIT_NONE, btree_get_extent, 0);
 +				 buf, WAIT_NONE, 0);
  	free_extent_buffer(buf);
  }
  
@@@ -1023,7 -1032,7 +1024,7 @@@ int reada_tree_block_flagged(struct btr
  	set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
  
  	ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
 -				       btree_get_extent, mirror_num);
 +				       mirror_num);
  	if (ret) {
  		free_extent_buffer(buf);
  		return ret;
@@@ -1160,7 -1169,6 +1161,7 @@@ static void __setup_root(struct btrfs_r
  	spin_lock_init(&root->accounting_lock);
  	spin_lock_init(&root->log_extents_lock[0]);
  	spin_lock_init(&root->log_extents_lock[1]);
 +	spin_lock_init(&root->qgroup_meta_rsv_lock);
  	mutex_init(&root->objectid_mutex);
  	mutex_init(&root->log_mutex);
  	mutex_init(&root->ordered_extent_mutex);
@@@ -1177,6 -1185,7 +1178,6 @@@
  	atomic_set(&root->orphan_inodes, 0);
  	refcount_set(&root->refs, 1);
  	atomic_set(&root->will_be_snapshotted, 0);
 -	atomic64_set(&root->qgroup_meta_rsv, 0);
  	root->log_transid = 0;
  	root->log_transid_committed = -1;
  	root->last_log_commit = 0;
@@@ -1235,7 -1244,7 +1236,7 @@@ struct btrfs_root *btrfs_create_tree(st
  	struct btrfs_root *root;
  	struct btrfs_key key;
  	int ret = 0;
 -	uuid_le uuid;
 +	uuid_le uuid = { 0 };
  
  	root = btrfs_alloc_root(fs_info, GFP_KERNEL);
  	if (!root)
@@@ -1276,8 -1285,7 +1277,8 @@@
  	btrfs_set_root_used(&root->root_item, leaf->len);
  	btrfs_set_root_last_snapshot(&root->root_item, 0);
  	btrfs_set_root_dirid(&root->root_item, 0);
 -	uuid_le_gen(&uuid);
 +	if (is_fstree(objectid))
 +		uuid_le_gen(&uuid);
  	memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
  	root->root_item.drop_level = 0;
  
@@@ -3116,6 -3124,7 +3117,7 @@@ recovery_tree_root
  		goto fail_block_groups;
  	goto retry_root_backup;
  }
+ BPF_ALLOW_ERROR_INJECTION(open_ctree);
  
  static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
  {
@@@ -3343,8 -3352,6 +3345,8 @@@ static void write_dev_flush(struct btrf
  		return;
  
  	bio_reset(bio);
 +	bio_associate_blkcg(bio, blkcg_root_css);
 +
  	bio->bi_end_io = btrfs_end_empty_barrier;
  	bio_set_dev(bio, device->bdev);
  	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
@@@ -3352,7 -3359,7 +3354,7 @@@
  	bio->bi_private = &device->flush_wait;
  
  	btrfsic_submit_bio(bio);
 -	device->flush_bio_sent = 1;
 +	set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
  }
  
  /*
@@@ -3362,10 -3369,10 +3364,10 @@@ static blk_status_t wait_dev_flush(stru
  {
  	struct bio *bio = device->flush_bio;
  
 -	if (!device->flush_bio_sent)
 +	if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
  		return BLK_STS_OK;
  
 -	device->flush_bio_sent = 0;
 +	clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
  	wait_for_completion_io(&device->flush_wait);
  
  	return bio->bi_status;
@@@ -3389,16 -3396,14 +3391,16 @@@ static int barrier_all_devices(struct b
  	int errors_wait = 0;
  	blk_status_t ret;
  
 +	lockdep_assert_held(&info->fs_devices->device_list_mutex);
  	/* send down all the barriers */
  	head = &info->fs_devices->devices;
 -	list_for_each_entry_rcu(dev, head, dev_list) {
 -		if (dev->missing)
 +	list_for_each_entry(dev, head, dev_list) {
 +		if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
  			continue;
  		if (!dev->bdev)
  			continue;
 -		if (!dev->in_fs_metadata || !dev->writeable)
 +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
 +		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
  			continue;
  
  		write_dev_flush(dev);
@@@ -3406,15 -3411,14 +3408,15 @@@
  	}
  
  	/* wait for all the barriers */
 -	list_for_each_entry_rcu(dev, head, dev_list) {
 -		if (dev->missing)
 +	list_for_each_entry(dev, head, dev_list) {
 +		if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
  			continue;
  		if (!dev->bdev) {
  			errors_wait++;
  			continue;
  		}
 -		if (!dev->in_fs_metadata || !dev->writeable)
 +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
 +		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
  			continue;
  
  		ret = wait_dev_flush(dev);
@@@ -3506,13 -3510,12 +3508,13 @@@ int write_all_supers(struct btrfs_fs_in
  		}
  	}
  
 -	list_for_each_entry_rcu(dev, head, dev_list) {
 +	list_for_each_entry(dev, head, dev_list) {
  		if (!dev->bdev) {
  			total_errors++;
  			continue;
  		}
 -		if (!dev->in_fs_metadata || !dev->writeable)
 +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
 +		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
  			continue;
  
  		btrfs_set_stack_device_generation(dev_item, 0);
@@@ -3548,11 -3551,10 +3550,11 @@@
  	}
  
  	total_errors = 0;
 -	list_for_each_entry_rcu(dev, head, dev_list) {
 +	list_for_each_entry(dev, head, dev_list) {
  		if (!dev->bdev)
  			continue;
 -		if (!dev->in_fs_metadata || !dev->writeable)
 +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
 +		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
  			continue;
  
  		ret = wait_dev_supers(dev, max_mirrors);
diff --combined fs/btrfs/free-space-cache.c
index 9e8c1f046e02,fb1382893bfc..9088b0b0d10f
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@@ -22,6 -22,7 +22,7 @@@
  #include <linux/slab.h>
  #include <linux/math64.h>
  #include <linux/ratelimit.h>
+ #include <linux/bpf.h>
  #include "ctree.h"
  #include "free-space-cache.h"
  #include "transaction.h"
@@@ -332,6 -333,7 +333,7 @@@ static int io_ctl_init(struct btrfs_io_
  
  	return 0;
  }
+ BPF_ALLOW_ERROR_INJECTION(io_ctl_init);
  
  static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
  {
@@@ -993,7 -995,8 +995,7 @@@ update_cache_item(struct btrfs_trans_ha
  	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
  	if (ret < 0) {
  		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
 -				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
 -				 GFP_NOFS);
 +				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
  		goto fail;
  	}
  	leaf = path->nodes[0];
@@@ -1007,7 -1010,7 +1009,7 @@@
  			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
  					 inode->i_size - 1,
  					 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
 -					 NULL, GFP_NOFS);
 +					 NULL);
  			btrfs_release_path(path);
  			goto fail;
  		}
@@@ -1104,7 -1107,8 +1106,7 @@@ static int flush_dirty_cache(struct ino
  	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
  	if (ret)
  		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
 -				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
 -				 GFP_NOFS);
 +				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
  
  	return ret;
  }
@@@ -1125,7 -1129,8 +1127,7 @@@ cleanup_write_cache_enospc(struct inod
  {
  	io_ctl_drop_pages(io_ctl);
  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
 -			     i_size_read(inode) - 1, cached_state,
 -			     GFP_NOFS);
 +			     i_size_read(inode) - 1, cached_state);
  }
  
  static int __btrfs_wait_cache_io(struct btrfs_root *root,
@@@ -1319,7 -1324,7 +1321,7 @@@ static int __btrfs_write_out_cache(stru
  	io_ctl_drop_pages(io_ctl);
  
  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
 -			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
 +			     i_size_read(inode) - 1, &cached_state);
  
  	/*
  	 * at this point the pages are under IO and we're happy,
@@@ -3545,7 -3550,7 +3547,7 @@@ int btrfs_write_out_ino_cache(struct bt
  	if (ret) {
  		if (release_metadata)
  			btrfs_delalloc_release_metadata(BTRFS_I(inode),
 -					inode->i_size);
 +					inode->i_size, true);
  #ifdef DEBUG
  		btrfs_err(fs_info,
  			  "failed to write free ino cache for root %llu",
diff --combined include/linux/bpf.h
index b63a592ad29d,da54ef644fcd..0dcd1d7c9825
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@@ -200,6 -200,9 +200,9 @@@ struct bpf_prog_aux 
  	u32 max_ctx_offset;
  	u32 stack_depth;
  	u32 id;
+ 	u32 func_cnt;
+ 	struct bpf_prog **func;
+ 	void *jit_data; /* JIT specific data. arch dependent */
  	struct latch_tree_node ksym_tnode;
  	struct list_head ksym_lnode;
  	const struct bpf_prog_ops *ops;
@@@ -285,6 -288,9 +288,9 @@@ int bpf_prog_array_copy_to_user(struct 
  
  void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
  				struct bpf_prog *old_prog);
+ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
+ 			     __u32 __user *prog_ids, u32 request_cnt,
+ 			     __u32 __user *prog_cnt);
  int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
  			struct bpf_prog *exclude_prog,
  			struct bpf_prog *include_prog,
@@@ -399,6 -405,7 +405,7 @@@ static inline void bpf_long_memcpy(voi
  
  /* verify correctness of eBPF program */
  int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
+ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
  
  /* Map specifics */
  struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
@@@ -419,8 -426,6 +426,8 @@@ static inline int bpf_map_attr_numa_nod
  		attr->numa_node : NUMA_NO_NODE;
  }
  
 +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
 +
  #else /* !CONFIG_BPF_SYSCALL */
  static inline struct bpf_prog *bpf_prog_get(u32 ufd)
  {
@@@ -508,12 -513,6 +515,12 @@@ static inline int cpu_map_enqueue(struc
  {
  	return 0;
  }
 +
 +static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
 +				enum bpf_prog_type type)
 +{
 +	return ERR_PTR(-EOPNOTSUPP);
 +}
  #endif /* CONFIG_BPF_SYSCALL */
  
  static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@@ -522,8 -521,6 +529,8 @@@
  	return bpf_prog_get_type_dev(ufd, type, false);
  }
  
 +bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
 +
  int bpf_prog_offload_compile(struct bpf_prog *prog);
  void bpf_prog_offload_destroy(struct bpf_prog *prog);
  
@@@ -586,4 -583,15 +593,15 @@@ extern const struct bpf_func_proto bpf_
  void bpf_user_rnd_init_once(void);
  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
  
+ #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+ #ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ #define BPF_ALLOW_ERROR_INJECTION(fname)				\
+ static unsigned long __used						\
+ 	__attribute__((__section__("_kprobe_error_inject_list")))	\
+ 	_eil_addr_##fname = (unsigned long)fname;
+ #else
+ #define BPF_ALLOW_ERROR_INJECTION(fname)
+ #endif
+ #endif
+ 
  #endif /* _LINUX_BPF_H */
diff --combined include/linux/bpf_verifier.h
index 1632bb13ad8a,aaac589e490c..c009e472f647
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@@ -15,11 -15,11 +15,11 @@@
   * In practice this is far bigger than any realistic pointer offset; this limit
   * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
   */
 -#define BPF_MAX_VAR_OFF	(1ULL << 31)
 +#define BPF_MAX_VAR_OFF	(1 << 29)
  /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO].  This ensures
   * that converting umax_value to int cannot overflow.
   */
 -#define BPF_MAX_VAR_SIZ	INT_MAX
 +#define BPF_MAX_VAR_SIZ	(1 << 29)
  
  /* Liveness marks, used for registers and spilled-regs (in stack slots).
   * Read marks propagate upwards until they find a write mark; they record that
@@@ -76,6 -76,14 +76,14 @@@ struct bpf_reg_state 
  	s64 smax_value; /* maximum possible (s64)value */
  	u64 umin_value; /* minimum possible (u64)value */
  	u64 umax_value; /* maximum possible (u64)value */
+ 	/* Inside the callee two registers can be both PTR_TO_STACK like
+ 	 * R1=fp-8 and R2=fp-8, but one of them points to this function stack
+ 	 * while another to the caller's stack. To differentiate them 'frameno'
+ 	 * is used which is an index in bpf_verifier_state->frame[] array
+ 	 * pointing to bpf_func_state.
+ 	 * This field must be second to last, for states_equal() reasons.
+ 	 */
+ 	u32 frameno;
  	/* This field must be last, for states_equal() reasons. */
  	enum bpf_reg_liveness live;
  };
@@@ -83,7 -91,8 +91,8 @@@
  enum bpf_stack_slot_type {
  	STACK_INVALID,    /* nothing was stored in this stack slot */
  	STACK_SPILL,      /* register spilled into stack */
- 	STACK_MISC	  /* BPF program wrote some data into this slot */
+ 	STACK_MISC,	  /* BPF program wrote some data into this slot */
+ 	STACK_ZERO,	  /* BPF program wrote constant zero */
  };
  
  #define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
@@@ -96,13 -105,34 +105,34 @@@ struct bpf_stack_state 
  /* state of the program:
   * type of all registers and stack info
   */
- struct bpf_verifier_state {
+ struct bpf_func_state {
  	struct bpf_reg_state regs[MAX_BPF_REG];
  	struct bpf_verifier_state *parent;
+ 	/* index of call instruction that called into this func */
+ 	int callsite;
+ 	/* stack frame number of this function state from pov of
+ 	 * enclosing bpf_verifier_state.
+ 	 * 0 = main function, 1 = first callee.
+ 	 */
+ 	u32 frameno;
+ 	/* subprog number == index within subprog_stack_depth
+ 	 * zero == main subprog
+ 	 */
+ 	u32 subprogno;
+ 
+ 	/* should be second to last. See copy_func_state() */
  	int allocated_stack;
  	struct bpf_stack_state *stack;
  };
  
+ #define MAX_CALL_FRAMES 8
+ struct bpf_verifier_state {
+ 	/* call stack tracking */
+ 	struct bpf_func_state *frame[MAX_CALL_FRAMES];
+ 	struct bpf_verifier_state *parent;
+ 	u32 curframe;
+ };
+ 
  /* linked list of verifier states used to prune search */
  struct bpf_verifier_state_list {
  	struct bpf_verifier_state state;
@@@ -113,6 -143,7 +143,7 @@@ struct bpf_insn_aux_data 
  	union {
  		enum bpf_reg_type ptr_type;	/* pointer type for load/store insns */
  		struct bpf_map *map_ptr;	/* pointer for call insn into lookup_elem */
+ 		s32 call_imm;			/* saved imm field of call insn */
  	};
  	int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
  	bool seen; /* this insn was processed by the verifier */
@@@ -141,6 -172,8 +172,8 @@@ struct bpf_ext_analyzer_ops 
  			 int insn_idx, int prev_insn_idx);
  };
  
+ #define BPF_MAX_SUBPROGS 256
+ 
  /* single container for all structs
   * one verifier_env per bpf_check() call
   */
@@@ -159,13 -192,17 +192,17 @@@ struct bpf_verifier_env 
  	bool allow_ptr_leaks;
  	bool seen_direct_write;
  	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
- 
  	struct bpf_verifer_log log;
+ 	u32 subprog_starts[BPF_MAX_SUBPROGS];
+ 	u16 subprog_stack_depth[BPF_MAX_SUBPROGS + 1];
+ 	u32 subprog_cnt;
  };
  
  static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
  {
- 	return env->cur_state->regs;
+ 	struct bpf_verifier_state *cur = env->cur_state;
+ 
+ 	return cur->frame[cur->curframe]->regs;
  }
  
  #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
diff --combined include/linux/module.h
index e6249795f9e2,548fa09fa806..0fd65481c045
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@@ -475,6 -475,11 +475,11 @@@ struct module 
  	ctor_fn_t *ctors;
  	unsigned int num_ctors;
  #endif
+ 
+ #ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ 	unsigned int num_kprobe_ei_funcs;
+ 	unsigned long *kprobe_ei_funcs;
+ #endif
  } ____cacheline_aligned __randomize_layout;
  #ifndef MODULE_ARCH_INIT
  #define MODULE_ARCH_INIT {}
@@@ -606,9 -611,6 +611,9 @@@ int ref_module(struct module *a, struc
  	__mod ? __mod->name : "kernel";		\
  })
  
 +/* Dereference module function descriptor */
 +void *dereference_module_function_descriptor(struct module *mod, void *ptr);
 +
  /* For kallsyms to ask for address resolution.  namebuf should be at
   * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
   * found, otherwise NULL. */
@@@ -763,13 -765,6 +768,13 @@@ static inline bool is_module_sig_enforc
  	return false;
  }
  
 +/* Dereference module function descriptor */
 +static inline
 +void *dereference_module_function_descriptor(struct module *mod, void *ptr)
 +{
 +	return ptr;
 +}
 +
  #endif /* CONFIG_MODULES */
  
  #ifdef CONFIG_SYSFS
diff --combined include/linux/pci.h
index 95807535d175,0314e0716c30..66cca1c6f742
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@@ -48,17 -48,17 +48,17 @@@
   * In the interest of not exposing interfaces to user-space unnecessarily,
   * the following kernel-only defines are being added here.
   */
 -#define PCI_DEVID(bus, devfn)  ((((u16)(bus)) << 8) | (devfn))
 +#define PCI_DEVID(bus, devfn)	((((u16)(bus)) << 8) | (devfn))
  /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */
  #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
  
  /* pci_slot represents a physical slot */
  struct pci_slot {
 -	struct pci_bus *bus;		/* The bus this slot is on */
 -	struct list_head list;		/* node in list of slots on this bus */
 -	struct hotplug_slot *hotplug;	/* Hotplug info (migrate over time) */
 -	unsigned char number;		/* PCI_SLOT(pci_dev->devfn) */
 -	struct kobject kobj;
 +	struct pci_bus		*bus;		/* Bus this slot is on */
 +	struct list_head	list;		/* Node in list of slots */
 +	struct hotplug_slot	*hotplug;	/* Hotplug info (move here) */
 +	unsigned char		number;		/* PCI_SLOT(pci_dev->devfn) */
 +	struct kobject		kobj;
  };
  
  static inline const char *pci_slot_name(const struct pci_slot *slot)
@@@ -72,7 -72,9 +72,7 @@@ enum pci_mmap_state 
  	pci_mmap_mem
  };
  
 -/*
 - *  For PCI devices, the region numbers are assigned this way:
 - */
 +/* For PCI devices, the region numbers are assigned this way: */
  enum {
  	/* #0-5: standard PCI resources */
  	PCI_STD_RESOURCES,
@@@ -81,23 -83,23 +81,23 @@@
  	/* #6: expansion ROM resource */
  	PCI_ROM_RESOURCE,
  
 -	/* device specific resources */
 +	/* Device-specific resources */
  #ifdef CONFIG_PCI_IOV
  	PCI_IOV_RESOURCES,
  	PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1,
  #endif
  
 -	/* resources assigned to buses behind the bridge */
 +	/* Resources assigned to buses behind the bridge */
  #define PCI_BRIDGE_RESOURCE_NUM 4
  
  	PCI_BRIDGE_RESOURCES,
  	PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES +
  				  PCI_BRIDGE_RESOURCE_NUM - 1,
  
 -	/* total resources associated with a PCI device */
 +	/* Total resources associated with a PCI device */
  	PCI_NUM_RESOURCES,
  
 -	/* preserve this for compatibility */
 +	/* Preserve this for compatibility */
  	DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES,
  };
  
@@@ -150,10 -152,9 +150,10 @@@ static inline const char *pci_power_nam
  #define PCI_PM_D3COLD_WAIT	100
  #define PCI_PM_BUS_WAIT		50
  
 -/** The pci_channel state describes connectivity between the CPU and
 - *  the pci device.  If some PCI bus between here and the pci device
 - *  has crashed or locked up, this info is reflected here.
 +/**
 + * The pci_channel state describes connectivity between the CPU and
 + * the PCI device.  If some PCI bus between here and the PCI device
 + * has crashed or locked up, this info is reflected here.
   */
  typedef unsigned int __bitwise pci_channel_state_t;
  
@@@ -183,7 -184,9 +183,7 @@@ enum pcie_reset_state 
  
  typedef unsigned short __bitwise pci_dev_flags_t;
  enum pci_dev_flags {
 -	/* INTX_DISABLE in PCI_COMMAND register disables MSI
 -	 * generation too.
 -	 */
 +	/* INTX_DISABLE in PCI_COMMAND register disables MSI too */
  	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) (1 << 0),
  	/* Device configuration is irrevocably lost if disabled into D3 */
  	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) (1 << 1),
@@@ -199,7 -202,7 +199,7 @@@
  	PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7),
  	/* Get VPD from function 0 VPD */
  	PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8),
 -	/* a non-root bridge where translation occurs, stop alias search here */
 +	/* A non-root bridge where translation occurs, stop alias search here */
  	PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
  	/* Do not use FLR even if device advertises PCI_AF_CAP */
  	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
@@@ -219,17 -222,17 +219,17 @@@ enum pci_bus_flags 
  	PCI_BUS_FLAGS_NO_AERSID	= (__force pci_bus_flags_t) 4,
  };
  
 -/* These values come from the PCI Express Spec */
 +/* Values from Link Status register, PCIe r3.1, sec 7.8.8 */
  enum pcie_link_width {
  	PCIE_LNK_WIDTH_RESRV	= 0x00,
  	PCIE_LNK_X1		= 0x01,
  	PCIE_LNK_X2		= 0x02,
  	PCIE_LNK_X4		= 0x04,
  	PCIE_LNK_X8		= 0x08,
 -	PCIE_LNK_X12		= 0x0C,
 +	PCIE_LNK_X12		= 0x0c,
  	PCIE_LNK_X16		= 0x10,
  	PCIE_LNK_X32		= 0x20,
 -	PCIE_LNK_WIDTH_UNKNOWN  = 0xFF,
 +	PCIE_LNK_WIDTH_UNKNOWN	= 0xff,
  };
  
  /* Based on the PCI Hotplug Spec, but some values are made up by us */
@@@ -260,15 -263,15 +260,15 @@@ enum pci_bus_speed 
  };
  
  struct pci_cap_saved_data {
 -	u16 cap_nr;
 -	bool cap_extended;
 -	unsigned int size;
 -	u32 data[0];
 +	u16		cap_nr;
 +	bool		cap_extended;
 +	unsigned int	size;
 +	u32		data[0];
  };
  
  struct pci_cap_saved_state {
 -	struct hlist_node next;
 -	struct pci_cap_saved_data cap;
 +	struct hlist_node		next;
 +	struct pci_cap_saved_data	cap;
  };
  
  struct irq_affinity;
@@@ -277,17 -280,19 +277,17 @@@ struct pci_vpd
  struct pci_sriov;
  struct pci_ats;
  
 -/*
 - * The pci_dev structure is used to describe PCI devices.
 - */
 +/* The pci_dev structure describes PCI devices */
  struct pci_dev {
 -	struct list_head bus_list;	/* node in per-bus list */
 -	struct pci_bus	*bus;		/* bus this device is on */
 -	struct pci_bus	*subordinate;	/* bus this device bridges to */
 +	struct list_head bus_list;	/* Node in per-bus list */
 +	struct pci_bus	*bus;		/* Bus this device is on */
 +	struct pci_bus	*subordinate;	/* Bus this device bridges to */
  
 -	void		*sysdata;	/* hook for sys-specific extension */
 -	struct proc_dir_entry *procent;	/* device entry in /proc/bus/pci */
 +	void		*sysdata;	/* Hook for sys-specific extension */
 +	struct proc_dir_entry *procent;	/* Device entry in /proc/bus/pci */
  	struct pci_slot	*slot;		/* Physical slot this device is in */
  
 -	unsigned int	devfn;		/* encoded device & function index */
 +	unsigned int	devfn;		/* Encoded device & function index */
  	unsigned short	vendor;
  	unsigned short	device;
  	unsigned short	subsystem_vendor;
@@@ -302,12 -307,12 +302,12 @@@
  	u8		msi_cap;	/* MSI capability offset */
  	u8		msix_cap;	/* MSI-X capability offset */
  	u8		pcie_mpss:3;	/* PCIe Max Payload Size Supported */
 -	u8		rom_base_reg;	/* which config register controls the ROM */
 -	u8		pin;		/* which interrupt pin this device uses */
 -	u16		pcie_flags_reg;	/* cached PCIe Capabilities Register */
 -	unsigned long	*dma_alias_mask;/* mask of enabled devfn aliases */
 +	u8		rom_base_reg;	/* Config register controlling ROM */
 +	u8		pin;		/* Interrupt pin this device uses */
 +	u16		pcie_flags_reg;	/* Cached PCIe Capabilities Register */
 +	unsigned long	*dma_alias_mask;/* Mask of enabled devfn aliases */
  
 -	struct pci_driver *driver;	/* which driver has allocated this device */
 +	struct pci_driver *driver;	/* Driver bound to this device */
  	u64		dma_mask;	/* Mask of the bits of bus address this
  					   device implements.  Normally this is
  					   0xffffffff.  You only need to change
@@@ -316,9 -321,9 +316,9 @@@
  
  	struct device_dma_parameters dma_parms;
  
 -	pci_power_t     current_state;  /* Current operating state. In ACPI-speak,
 -					   this is D0-D3, D0 being fully functional,
 -					   and D3 being off. */
 +	pci_power_t	current_state;	/* Current operating state. In ACPI,
 +					   this is D0-D3, D0 being fully
 +					   functional, and D3 being off. */
  	u8		pm_cap;		/* PM capability offset */
  	unsigned int	pme_support:5;	/* Bitmask of states from which PME#
  					   can be generated */
@@@ -329,10 -334,10 +329,10 @@@
  	unsigned int	no_d3cold:1;	/* D3cold is forbidden */
  	unsigned int	bridge_d3:1;	/* Allow D3 for bridge */
  	unsigned int	d3cold_allowed:1;	/* D3cold is allowed by user */
 -	unsigned int	mmio_always_on:1;	/* disallow turning off io/mem
 -						   decoding during bar sizing */
 +	unsigned int	mmio_always_on:1;	/* Disallow turning off io/mem
 +						   decoding during BAR sizing */
  	unsigned int	wakeup_prepared:1;
 -	unsigned int	runtime_d3cold:1;	/* whether go through runtime
 +	unsigned int	runtime_d3cold:1;	/* Whether go through runtime
  						   D3cold, not set for devices
  						   powered on/off by the
  						   corresponding bridge */
@@@ -345,14 -350,12 +345,14 @@@
  
  #ifdef CONFIG_PCIEASPM
  	struct pcie_link_state	*link_state;	/* ASPM link state */
 +	unsigned int	ltr_path:1;	/* Latency Tolerance Reporting
 +					   supported from root to here */
  #endif
  
 -	pci_channel_state_t error_state;	/* current connectivity state */
 -	struct	device	dev;		/* Generic device interface */
 +	pci_channel_state_t error_state;	/* Current connectivity state */
 +	struct device	dev;			/* Generic device interface */
  
 -	int		cfg_size;	/* Size of configuration space */
 +	int		cfg_size;		/* Size of config space */
  
  	/*
  	 * Instead of touching interrupt line and base address registers
@@@ -361,47 -364,47 +361,47 @@@
  	unsigned int	irq;
  	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
  
 -	bool match_driver;		/* Skip attaching driver */
 -	/* These fields are used by common fixups */
 -	unsigned int	transparent:1;	/* Subtractive decode PCI bridge */
 -	unsigned int	multifunction:1;/* Part of multi-function device */
 -	/* keep track of device state */
 +	bool		match_driver;		/* Skip attaching driver */
 +
 +	unsigned int	transparent:1;		/* Subtractive decode bridge */
 +	unsigned int	multifunction:1;	/* Multi-function device */
 +
  	unsigned int	is_added:1;
 -	unsigned int	is_busmaster:1; /* device is busmaster */
 -	unsigned int	no_msi:1;	/* device may not use msi */
 -	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
 -	unsigned int	block_cfg_access:1;	/* config space access is blocked */
 -	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 -	unsigned int	irq_reroute_variant:2;	/* device needs IRQ rerouting variant */
 +	unsigned int	is_busmaster:1;		/* Is busmaster */
 +	unsigned int	no_msi:1;		/* May not use MSI */
 +	unsigned int	no_64bit_msi:1; 	/* May only use 32-bit MSIs */
 +	unsigned int	block_cfg_access:1;	/* Config space access blocked */
 +	unsigned int	broken_parity_status:1;	/* Generates false positive parity */
 +	unsigned int	irq_reroute_variant:2;	/* Needs IRQ rerouting variant */
  	unsigned int	msi_enabled:1;
  	unsigned int	msix_enabled:1;
 -	unsigned int	ari_enabled:1;	/* ARI forwarding */
 -	unsigned int	ats_enabled:1;	/* Address Translation Service */
 +	unsigned int	ari_enabled:1;		/* ARI forwarding */
 +	unsigned int	ats_enabled:1;		/* Address Translation Svc */
  	unsigned int	pasid_enabled:1;	/* Process Address Space ID */
  	unsigned int	pri_enabled:1;		/* Page Request Interface */
  	unsigned int	is_managed:1;
 -	unsigned int    needs_freset:1; /* Dev requires fundamental reset */
 +	unsigned int	needs_freset:1;		/* Requires fundamental reset */
  	unsigned int	state_saved:1;
  	unsigned int	is_physfn:1;
  	unsigned int	is_virtfn:1;
  	unsigned int	reset_fn:1;
 -	unsigned int    is_hotplug_bridge:1;
 -	unsigned int	is_thunderbolt:1; /* Thunderbolt controller */
 -	unsigned int    __aer_firmware_first_valid:1;
 +	unsigned int	is_hotplug_bridge:1;
 +	unsigned int	is_thunderbolt:1;	/* Thunderbolt controller */
 +	unsigned int	__aer_firmware_first_valid:1;
  	unsigned int	__aer_firmware_first:1;
 -	unsigned int	broken_intx_masking:1; /* INTx masking can't be used */
 -	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
 +	unsigned int	broken_intx_masking:1;	/* INTx masking can't be used */
 +	unsigned int	io_window_1k:1;		/* Intel bridge 1K I/O windows */
  	unsigned int	irq_managed:1;
  	unsigned int	has_secondary_link:1;
 -	unsigned int	non_compliant_bars:1;	/* broken BARs; ignore them */
 -	unsigned int	is_probed:1;		/* device probing in progress */
 +	unsigned int	non_compliant_bars:1;	/* Broken BARs; ignore them */
 +	unsigned int	is_probed:1;		/* Device probing in progress */
  	pci_dev_flags_t dev_flags;
  	atomic_t	enable_cnt;	/* pci_enable_device has been called */
  
 -	u32		saved_config_space[16]; /* config space saved at suspend time */
 +	u32		saved_config_space[16]; /* Config space saved at suspend time */
  	struct hlist_head saved_cap_space;
 -	struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
 -	int rom_attr_enabled;		/* has display of the rom attribute been enabled? */
 +	struct bin_attribute *rom_attr;		/* Attribute descriptor for sysfs ROM entry */
 +	int		rom_attr_enabled;	/* Display of ROM attribute enabled? */
  	struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
  	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
  
@@@ -416,12 -419,12 +416,12 @@@
  	struct pci_vpd *vpd;
  #ifdef CONFIG_PCI_ATS
  	union {
 -		struct pci_sriov *sriov;	/* SR-IOV capability related */
 -		struct pci_dev *physfn;	/* the PF this VF is associated with */
 +		struct pci_sriov	*sriov;		/* PF: SR-IOV info */
 +		struct pci_dev		*physfn;	/* VF: related PF */
  	};
  	u16		ats_cap;	/* ATS Capability offset */
  	u8		ats_stu;	/* ATS Smallest Translation Unit */
 -	atomic_t	ats_ref_cnt;	/* number of VFs with ATS enabled */
 +	atomic_t	ats_ref_cnt;	/* Number of VFs with ATS enabled */
  #endif
  #ifdef CONFIG_PCI_PRI
  	u32		pri_reqs_alloc; /* Number of PRI requests allocated */
@@@ -429,11 -432,11 +429,11 @@@
  #ifdef CONFIG_PCI_PASID
  	u16		pasid_features;
  #endif
 -	phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */
 -	size_t romlen; /* Length of ROM if it's not from the BAR */
 -	char *driver_override; /* Driver name to force a match */
 +	phys_addr_t	rom;		/* Physical address if not from BAR */
 +	size_t		romlen;		/* Length if not from BAR */
 +	char		*driver_override; /* Driver name to force a match */
  
 -	unsigned long priv_flags; /* Private flags for the pci driver */
 +	unsigned long	priv_flags;	/* Private flags for the PCI driver */
  };
  
  static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
@@@ -456,26 -459,26 +456,26 @@@ static inline int pci_channel_offline(s
  }
  
  struct pci_host_bridge {
 -	struct device dev;
 -	struct pci_bus *bus;		/* root bus */
 -	struct pci_ops *ops;
 -	void *sysdata;
 -	int busnr;
 +	struct device	dev;
 +	struct pci_bus	*bus;		/* Root bus */
 +	struct pci_ops	*ops;
 +	void		*sysdata;
 +	int		busnr;
  	struct list_head windows;	/* resource_entry */
 -	u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* platform IRQ swizzler */
 +	u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */
  	int (*map_irq)(const struct pci_dev *, u8, u8);
  	void (*release_fn)(struct pci_host_bridge *);
 -	void *release_data;
 +	void		*release_data;
  	struct msi_controller *msi;
 -	unsigned int ignore_reset_delay:1;	/* for entire hierarchy */
 -	unsigned int no_ext_tags:1;		/* no Extended Tags */
 +	unsigned int	ignore_reset_delay:1;	/* For entire hierarchy */
 +	unsigned int	no_ext_tags:1;		/* No Extended Tags */
  	/* Resource alignment requirements */
  	resource_size_t (*align_resource)(struct pci_dev *dev,
  			const struct resource *res,
  			resource_size_t start,
  			resource_size_t size,
  			resource_size_t align);
 -	unsigned long private[0] ____cacheline_aligned;
 +	unsigned long	private[0] ____cacheline_aligned;
  };
  
  #define	to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev)
@@@ -497,8 -500,8 +497,8 @@@ void pci_free_host_bridge(struct pci_ho
  struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
  
  void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 -		     void (*release_fn)(struct pci_host_bridge *),
 -		     void *release_data);
 +				 void (*release_fn)(struct pci_host_bridge *),
 +				 void *release_data);
  
  int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge);
  
@@@ -518,32 -521,32 +518,32 @@@
  #define PCI_SUBTRACTIVE_DECODE	0x1
  
  struct pci_bus_resource {
 -	struct list_head list;
 -	struct resource *res;
 -	unsigned int flags;
 +	struct list_head	list;
 +	struct resource		*res;
 +	unsigned int		flags;
  };
  
  #define PCI_REGION_FLAG_MASK	0x0fU	/* These bits of resource flags tell us the PCI region flags */
  
  struct pci_bus {
 -	struct list_head node;		/* node in list of buses */
 -	struct pci_bus	*parent;	/* parent bus this bridge is on */
 -	struct list_head children;	/* list of child buses */
 -	struct list_head devices;	/* list of devices on this bus */
 -	struct pci_dev	*self;		/* bridge device as seen by parent */
 -	struct list_head slots;		/* list of slots on this bus;
 +	struct list_head node;		/* Node in list of buses */
 +	struct pci_bus	*parent;	/* Parent bus this bridge is on */
 +	struct list_head children;	/* List of child buses */
 +	struct list_head devices;	/* List of devices on this bus */
 +	struct pci_dev	*self;		/* Bridge device as seen by parent */
 +	struct list_head slots;		/* List of slots on this bus;
  					   protected by pci_slot_mutex */
  	struct resource *resource[PCI_BRIDGE_RESOURCE_NUM];
 -	struct list_head resources;	/* address space routed to this bus */
 -	struct resource busn_res;	/* bus numbers routed to this bus */
 +	struct list_head resources;	/* Address space routed to this bus */
 +	struct resource busn_res;	/* Bus numbers routed to this bus */
  
 -	struct pci_ops	*ops;		/* configuration access functions */
 +	struct pci_ops	*ops;		/* Configuration access functions */
  	struct msi_controller *msi;	/* MSI controller */
 -	void		*sysdata;	/* hook for sys-specific extension */
 -	struct proc_dir_entry *procdir;	/* directory entry in /proc/bus/pci */
 +	void		*sysdata;	/* Hook for sys-specific extension */
 +	struct proc_dir_entry *procdir;	/* Directory entry in /proc/bus/pci */
  
 -	unsigned char	number;		/* bus number */
 -	unsigned char	primary;	/* number of primary bridge */
 +	unsigned char	number;		/* Bus number */
 +	unsigned char	primary;	/* Number of primary bridge */
  	unsigned char	max_bus_speed;	/* enum pci_bus_speed */
  	unsigned char	cur_bus_speed;	/* enum pci_bus_speed */
  #ifdef CONFIG_PCI_DOMAINS_GENERIC
@@@ -552,12 -555,12 +552,12 @@@
  
  	char		name[48];
  
 -	unsigned short  bridge_ctl;	/* manage NO_ISA/FBB/et al behaviors */
 -	pci_bus_flags_t bus_flags;	/* inherited by child buses */
 +	unsigned short	bridge_ctl;	/* Manage NO_ISA/FBB/et al behaviors */
 +	pci_bus_flags_t bus_flags;	/* Inherited by child buses */
  	struct device		*bridge;
  	struct device		dev;
 -	struct bin_attribute	*legacy_io; /* legacy I/O for this bus */
 -	struct bin_attribute	*legacy_mem; /* legacy mem */
 +	struct bin_attribute	*legacy_io;	/* Legacy I/O for this bus */
 +	struct bin_attribute	*legacy_mem;	/* Legacy mem */
  	unsigned int		is_added:1;
  };
  
@@@ -614,7 -617,9 +614,7 @@@ static inline bool pci_dev_msi_enabled(
  static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; }
  #endif
  
 -/*
 - * Error values that may be returned by PCI functions.
 - */
 +/* Error values that may be returned by PCI functions */
  #define PCIBIOS_SUCCESSFUL		0x00
  #define PCIBIOS_FUNC_NOT_SUPPORTED	0x81
  #define PCIBIOS_BAD_VENDOR_ID		0x83
@@@ -623,7 -628,9 +623,7 @@@
  #define PCIBIOS_SET_FAILED		0x88
  #define PCIBIOS_BUFFER_TOO_SMALL	0x89
  
 -/*
 - * Translate above to generic errno for passing back through non-PCI code.
 - */
 +/* Translate above to generic errno for passing back through non-PCI code */
  static inline int pcibios_err_to_errno(int err)
  {
  	if (err <= PCIBIOS_SUCCESSFUL)
@@@ -673,13 -680,13 +673,13 @@@ typedef u32 pci_bus_addr_t
  #endif
  
  struct pci_bus_region {
 -	pci_bus_addr_t start;
 -	pci_bus_addr_t end;
 +	pci_bus_addr_t	start;
 +	pci_bus_addr_t	end;
  };
  
  struct pci_dynids {
 -	spinlock_t lock;            /* protects list, index */
 -	struct list_head list;      /* for IDs added at runtime */
 +	spinlock_t		lock;	/* Protects list, index */
 +	struct list_head	list;	/* For IDs added at runtime */
  };
  
  
@@@ -693,13 -700,13 +693,13 @@@
  typedef unsigned int __bitwise pci_ers_result_t;
  
  enum pci_ers_result {
 -	/* no result/none/not supported in device driver */
 +	/* No result/none/not supported in device driver */
  	PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1,
  
  	/* Device driver can recover without slot reset */
  	PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2,
  
 -	/* Device driver wants slot to be reset. */
 +	/* Device driver wants slot to be reset */
  	PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3,
  
  	/* Device has completely failed, is unrecoverable */
@@@ -735,27 -742,27 +735,27 @@@ struct pci_error_handlers 
  
  struct module;
  struct pci_driver {
 -	struct list_head node;
 -	const char *name;
 -	const struct pci_device_id *id_table;	/* must be non-NULL for probe to be called */
 -	int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 -	void (*remove) (struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
 -	int  (*suspend) (struct pci_dev *dev, pm_message_t state);	/* Device suspended */
 -	int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
 -	int  (*resume_early) (struct pci_dev *dev);
 -	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
 +	struct list_head	node;
 +	const char		*name;
 +	const struct pci_device_id *id_table;	/* Must be non-NULL for probe to be called */
 +	int  (*probe)(struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 +	void (*remove)(struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
 +	int  (*suspend)(struct pci_dev *dev, pm_message_t state);	/* Device suspended */
 +	int  (*suspend_late)(struct pci_dev *dev, pm_message_t state);
 +	int  (*resume_early)(struct pci_dev *dev);
 +	int  (*resume) (struct pci_dev *dev);	/* Device woken up */
  	void (*shutdown) (struct pci_dev *dev);
 -	int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */
 +	int  (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* On PF */
  	const struct pci_error_handlers *err_handler;
  	const struct attribute_group **groups;
  	struct device_driver	driver;
 -	struct pci_dynids dynids;
 +	struct pci_dynids	dynids;
  };
  
  #define	to_pci_driver(drv) container_of(drv, struct pci_driver, driver)
  
  /**
 - * PCI_DEVICE - macro used to describe a specific pci device
 + * PCI_DEVICE - macro used to describe a specific PCI device
   * @vend: the 16 bit PCI Vendor ID
   * @dev: the 16 bit PCI Device ID
   *
@@@ -768,7 -775,7 +768,7 @@@
  	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
  
  /**
 - * PCI_DEVICE_SUB - macro used to describe a specific pci device with subsystem
 + * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem
   * @vend: the 16 bit PCI Vendor ID
   * @dev: the 16 bit PCI Device ID
   * @subvend: the 16 bit PCI Subvendor ID
@@@ -782,7 -789,7 +782,7 @@@
  	.subvendor = (subvend), .subdevice = (subdev)
  
  /**
 - * PCI_DEVICE_CLASS - macro used to describe a specific pci device class
 + * PCI_DEVICE_CLASS - macro used to describe a specific PCI device class
   * @dev_class: the class, subclass, prog-if triple for this device
   * @dev_class_mask: the class mask for this device
   *
@@@ -796,7 -803,7 +796,7 @@@
  	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
  
  /**
 - * PCI_VDEVICE - macro used to describe a specific pci device in short form
 + * PCI_VDEVICE - macro used to describe a specific PCI device in short form
   * @vend: the vendor name
   * @dev: the 16 bit PCI Device ID
   *
@@@ -805,21 -812,22 +805,21 @@@
   * to PCI_ANY_ID. The macro allows the next field to follow as the device
   * private data.
   */
 -
  #define PCI_VDEVICE(vend, dev) \
  	.vendor = PCI_VENDOR_ID_##vend, .device = (dev), \
  	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
  
  enum {
 -	PCI_REASSIGN_ALL_RSRC	= 0x00000001,	/* ignore firmware setup */
 -	PCI_REASSIGN_ALL_BUS	= 0x00000002,	/* reassign all bus numbers */
 -	PCI_PROBE_ONLY		= 0x00000004,	/* use existing setup */
 -	PCI_CAN_SKIP_ISA_ALIGN	= 0x00000008,	/* don't do ISA alignment */
 -	PCI_ENABLE_PROC_DOMAINS	= 0x00000010,	/* enable domains in /proc */
 +	PCI_REASSIGN_ALL_RSRC	= 0x00000001,	/* Ignore firmware setup */
 +	PCI_REASSIGN_ALL_BUS	= 0x00000002,	/* Reassign all bus numbers */
 +	PCI_PROBE_ONLY		= 0x00000004,	/* Use existing setup */
 +	PCI_CAN_SKIP_ISA_ALIGN	= 0x00000008,	/* Don't do ISA alignment */
 +	PCI_ENABLE_PROC_DOMAINS	= 0x00000010,	/* Enable domains in /proc */
  	PCI_COMPAT_DOMAIN_0	= 0x00000020,	/* ... except domain 0 */
 -	PCI_SCAN_ALL_PCIE_DEVS	= 0x00000040,	/* scan all, not just dev 0 */
 +	PCI_SCAN_ALL_PCIE_DEVS	= 0x00000040,	/* Scan all, not just dev 0 */
  };
  
 -/* these external functions are only available when PCI support is enabled */
 +/* These external functions are only available when PCI support is enabled */
  #ifdef CONFIG_PCI
  
  extern unsigned int pci_flags;
@@@ -832,11 -840,11 +832,11 @@@ static inline int pci_has_flag(int flag
  void pcie_bus_configure_settings(struct pci_bus *bus);
  
  enum pcie_bus_config_types {
 -	PCIE_BUS_TUNE_OFF,	/* don't touch MPS at all */
 -	PCIE_BUS_DEFAULT,	/* ensure MPS matches upstream bridge */
 -	PCIE_BUS_SAFE,		/* use largest MPS boot-time devices support */
 -	PCIE_BUS_PERFORMANCE,	/* use MPS and MRRS for best performance */
 -	PCIE_BUS_PEER2PEER,	/* set MPS = 128 for all devices */
 +	PCIE_BUS_TUNE_OFF,	/* Don't touch MPS at all */
 +	PCIE_BUS_DEFAULT,	/* Ensure MPS matches upstream bridge */
 +	PCIE_BUS_SAFE,		/* Use largest MPS boot-time devices support */
 +	PCIE_BUS_PERFORMANCE,	/* Use MPS and MRRS for best performance */
 +	PCIE_BUS_PEER2PEER,	/* Set MPS = 128 for all devices */
  };
  
  extern enum pcie_bus_config_types pcie_bus_config;
@@@ -845,7 -853,7 +845,7 @@@ extern struct bus_type pci_bus_type
  
  /* Do NOT directly access these two variables, unless you are arch-specific PCI
   * code, or PCI core code. */
 -extern struct list_head pci_root_buses;	/* list of all known PCI buses */
 +extern struct list_head pci_root_buses;	/* List of all known PCI buses */
  /* Some device drivers need know if PCI is initiated */
  int no_pci_devices(void);
  
@@@ -883,8 -891,8 +883,8 @@@ int pci_bus_insert_busn_res(struct pci_
  int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
  void pci_bus_release_busn_res(struct pci_bus *b);
  struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 -					     struct pci_ops *ops, void *sysdata,
 -					     struct list_head *resources);
 +				  struct pci_ops *ops, void *sysdata,
 +				  struct list_head *resources);
  int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge);
  struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
  				int busnr);
@@@ -941,10 -949,10 +941,10 @@@ int pci_find_next_ht_capability(struct 
  struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
  
  struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device,
 -				struct pci_dev *from);
 +			       struct pci_dev *from);
  struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device,
 -				unsigned int ss_vendor, unsigned int ss_device,
 -				struct pci_dev *from);
 +			       unsigned int ss_vendor, unsigned int ss_device,
 +			       struct pci_dev *from);
  struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn);
  struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus,
  					    unsigned int devfn);
@@@ -1020,7 -1028,7 +1020,7 @@@ static inline int pcie_capability_clear
  	return pcie_capability_clear_and_set_dword(dev, pos, clear, 0);
  }
  
 -/* user-space driven config access */
 +/* User-space driven config access */
  int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val);
  int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val);
  int pci_user_read_config_dword(struct pci_dev *dev, int where, u32 *val);
@@@ -1064,6 -1072,7 +1064,7 @@@ int pci_set_pcie_reset_state(struct pci
  int pci_set_cacheline_size(struct pci_dev *dev);
  #define HAVE_PCI_SET_MWI
  int __must_check pci_set_mwi(struct pci_dev *dev);
+ int __must_check pcim_set_mwi(struct pci_dev *dev);
  int pci_try_set_mwi(struct pci_dev *dev);
  void pci_clear_mwi(struct pci_dev *dev);
  void pci_intx(struct pci_dev *dev, int enable);
@@@ -1162,7 -1171,7 +1163,7 @@@ unsigned int pci_rescan_bus(struct pci_
  void pci_lock_rescan_remove(void);
  void pci_unlock_rescan_remove(void);
  
 -/* Vital product data routines */
 +/* Vital Product Data routines */
  ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
  ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
  int pci_set_vpd_size(struct pci_dev *dev, size_t len);
@@@ -1247,7 -1256,9 +1248,7 @@@ static inline pci_bus_addr_t pci_bus_ad
  int __must_check __pci_register_driver(struct pci_driver *, struct module *,
  				       const char *mod_name);
  
 -/*
 - * pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded
 - */
 +/* pci_register_driver() must be a macro so KBUILD_MODNAME can be expanded */
  #define pci_register_driver(driver)		\
  	__pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
  
@@@ -1262,7 -1273,8 +1263,7 @@@ void pci_unregister_driver(struct pci_d
   * use this macro once, and calling it replaces module_init() and module_exit()
   */
  #define module_pci_driver(__pci_driver) \
 -	module_driver(__pci_driver, pci_register_driver, \
 -		       pci_unregister_driver)
 +	module_driver(__pci_driver, pci_register_driver, pci_unregister_driver)
  
  /**
   * builtin_pci_driver() - Helper macro for registering a PCI driver
@@@ -1301,10 -1313,10 +1302,10 @@@ resource_size_t pcibios_iov_resource_al
  int pci_set_vga_state(struct pci_dev *pdev, bool decode,
  		      unsigned int command_bits, u32 flags);
  
 -#define PCI_IRQ_LEGACY		(1 << 0) /* allow legacy interrupts */
 -#define PCI_IRQ_MSI		(1 << 1) /* allow MSI interrupts */
 -#define PCI_IRQ_MSIX		(1 << 2) /* allow MSI-X interrupts */
 -#define PCI_IRQ_AFFINITY	(1 << 3) /* auto-assign affinity */
 +#define PCI_IRQ_LEGACY		(1 << 0) /* Allow legacy interrupts */
 +#define PCI_IRQ_MSI		(1 << 1) /* Allow MSI interrupts */
 +#define PCI_IRQ_MSIX		(1 << 2) /* Allow MSI-X interrupts */
 +#define PCI_IRQ_AFFINITY	(1 << 3) /* Auto-assign affinity */
  #define PCI_IRQ_ALL_TYPES \
  	(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
  
@@@ -1323,8 -1335,8 +1324,8 @@@
  #define	pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr)
  
  struct msix_entry {
 -	u32	vector;	/* kernel uses to write allocated vector */
 -	u16	entry;	/* driver uses to specify entry, OS writes */
 +	u32	vector;	/* Kernel uses to write allocated vector */
 +	u16	entry;	/* Driver uses to specify entry, OS writes */
  };
  
  #ifdef CONFIG_PCI_MSI
@@@ -1364,10 -1376,10 +1365,10 @@@ static inline int pci_msi_enabled(void
  static inline int pci_enable_msi(struct pci_dev *dev)
  { return -ENOSYS; }
  static inline int pci_enable_msix_range(struct pci_dev *dev,
 -		      struct msix_entry *entries, int minvec, int maxvec)
 +			struct msix_entry *entries, int minvec, int maxvec)
  { return -ENOSYS; }
  static inline int pci_enable_msix_exact(struct pci_dev *dev,
 -		      struct msix_entry *entries, int nvec)
 +			struct msix_entry *entries, int nvec)
  { return -ENOSYS; }
  
  static inline int
@@@ -1532,9 -1544,9 +1533,9 @@@ static inline int acpi_pci_bus_find_dom
  int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
  #endif
  
 -/* some architectures require additional setup to direct VGA traffic */
 +/* Some architectures require additional setup to direct VGA traffic */
  typedef int (*arch_set_vga_state_t)(struct pci_dev *pdev, bool decode,
 -		      unsigned int command_bits, u32 flags);
 +				    unsigned int command_bits, u32 flags);
  void pci_register_set_vga_state(arch_set_vga_state_t func);
  
  static inline int
@@@ -1573,9 -1585,10 +1574,9 @@@ static inline void pci_clear_flags(int 
  static inline int pci_has_flag(int flag) { return 0; }
  
  /*
 - *  If the system does not have PCI, clearly these return errors.  Define
 - *  these as simple inline functions to avoid hair in drivers.
 + * If the system does not have PCI, clearly these return errors.  Define
 + * these as simple inline functions to avoid hair in drivers.
   */
 -
  #define _PCI_NOP(o, s, t) \
  	static inline int pci_##o##_config_##s(struct pci_dev *dev, \
  						int where, t val) \
@@@ -1714,10 -1727,8 +1715,10 @@@ int pci_iobar_pfn(struct pci_dev *pdev
  #define pci_root_bus_fwnode(bus)	NULL
  #endif
  
 -/* these helpers provide future and backwards compatibility
 - * for accessing popular PCI BAR info */
 +/*
 + * These helpers provide future and backwards compatibility
 + * for accessing popular PCI BAR info
 + */
  #define pci_resource_start(dev, bar)	((dev)->resource[(bar)].start)
  #define pci_resource_end(dev, bar)	((dev)->resource[(bar)].end)
  #define pci_resource_flags(dev, bar)	((dev)->resource[(bar)].flags)
@@@ -1729,8 -1740,7 +1730,8 @@@
  	 (pci_resource_end((dev), (bar)) -		\
  	  pci_resource_start((dev), (bar)) + 1))
  
 -/* Similar to the helpers above, these manipulate per-pci_dev
 +/*
 + * Similar to the helpers above, these manipulate per-pci_dev
   * driver-specific data.  They are really just a wrapper around
   * the generic device structure functions of these calls.
   */
@@@ -1744,14 -1754,16 +1745,14 @@@ static inline void pci_set_drvdata(stru
  	dev_set_drvdata(&pdev->dev, data);
  }
  
 -/* If you want to know what to call your pci_dev, ask this function.
 - * Again, it's a wrapper around the generic device.
 - */
  static inline const char *pci_name(const struct pci_dev *pdev)
  {
  	return dev_name(&pdev->dev);
  }
  
  
 -/* Some archs don't want to expose struct resource to userland as-is
 +/*
 + * Some archs don't want to expose struct resource to userland as-is
   * in sysfs and /proc
   */
  #ifdef HAVE_ARCH_PCI_RESOURCE_TO_USER
@@@ -1770,16 -1782,16 +1771,16 @@@ static inline void pci_resource_to_user
  
  
  /*
 - *  The world is not perfect and supplies us with broken PCI devices.
 - *  For at least a part of these bugs we need a work-around, so both
 - *  generic (drivers/pci/quirks.c) and per-architecture code can define
 - *  fixup hooks to be called for particular buggy devices.
 + * The world is not perfect and supplies us with broken PCI devices.
 + * For at least a part of these bugs we need a work-around, so both
 + * generic (drivers/pci/quirks.c) and per-architecture code can define
 + * fixup hooks to be called for particular buggy devices.
   */
  
  struct pci_fixup {
 -	u16 vendor;		/* You can use PCI_ANY_ID here of course */
 -	u16 device;		/* You can use PCI_ANY_ID here of course */
 -	u32 class;		/* You can use PCI_ANY_ID here too */
 +	u16 vendor;			/* Or PCI_ANY_ID */
 +	u16 device;			/* Or PCI_ANY_ID */
 +	u32 class;			/* Or PCI_ANY_ID */
  	unsigned int class_shift;	/* should be 0, 8, 16 */
  	void (*hook)(struct pci_dev *dev);
  };
@@@ -1821,19 -1833,23 +1822,19 @@@ enum pci_fixup_pass 
  #define DECLARE_PCI_FIXUP_CLASS_RESUME(vendor, device, class,		\
  					 class_shift, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,			\
 -		resume##hook, vendor, device, class,	\
 -		class_shift, hook)
 +		resume##hook, vendor, device, class, class_shift, hook)
  #define DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(vendor, device, class,	\
  					 class_shift, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,		\
 -		resume_early##hook, vendor, device,	\
 -		class, class_shift, hook)
 +		resume_early##hook, vendor, device, class, class_shift, hook)
  #define DECLARE_PCI_FIXUP_CLASS_SUSPEND(vendor, device, class,		\
  					 class_shift, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
 -		suspend##hook, vendor, device, class,	\
 -		class_shift, hook)
 +		suspend##hook, vendor, device, class, class_shift, hook)
  #define DECLARE_PCI_FIXUP_CLASS_SUSPEND_LATE(vendor, device, class,	\
  					 class_shift, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late,		\
 -		suspend_late##hook, vendor, device,	\
 -		class, class_shift, hook)
 +		suspend_late##hook, vendor, device, class, class_shift, hook)
  
  #define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook)			\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,			\
@@@ -1849,16 -1865,20 +1850,16 @@@
  		hook, vendor, device, PCI_ANY_ID, 0, hook)
  #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook)			\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,			\
 -		resume##hook, vendor, device,		\
 -		PCI_ANY_ID, 0, hook)
 +		resume##hook, vendor, device, PCI_ANY_ID, 0, hook)
  #define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,		\
 -		resume_early##hook, vendor, device,	\
 -		PCI_ANY_ID, 0, hook)
 +		resume_early##hook, vendor, device, PCI_ANY_ID, 0, hook)
  #define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook)			\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
 -		suspend##hook, vendor, device,		\
 -		PCI_ANY_ID, 0, hook)
 +		suspend##hook, vendor, device, PCI_ANY_ID, 0, hook)
  #define DECLARE_PCI_FIXUP_SUSPEND_LATE(vendor, device, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late,		\
 -		suspend_late##hook, vendor, device,	\
 -		PCI_ANY_ID, 0, hook)
 +		suspend_late##hook, vendor, device, PCI_ANY_ID, 0, hook)
  
  #ifdef CONFIG_PCI_QUIRKS
  void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
@@@ -1945,7 -1965,6 +1946,7 @@@ int pci_vfs_assigned(struct pci_dev *de
  int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
  int pci_sriov_get_totalvfs(struct pci_dev *dev);
  resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
 +void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
  #else
  static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id)
  {
@@@ -1973,7 -1992,6 +1974,7 @@@ static inline int pci_sriov_get_totalvf
  { return 0; }
  static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
  { return 0; }
 +static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { }
  #endif
  
  #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
@@@ -2095,7 -2113,7 +2096,7 @@@ static inline u16 pci_vpd_lrdt_size(con
   */
  static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt)
  {
 -    return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK);
 +	return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK);
  }
  
  /**
@@@ -2180,7 -2198,7 +2181,7 @@@ static inline struct device_node *pci_b
  	return bus ? bus->dev.of_node : NULL;
  }
  
 -#else /* CONFIG_OF */
 +#else	/* CONFIG_OF */
  static inline void pci_set_of_node(struct pci_dev *dev) { }
  static inline void pci_release_of_node(struct pci_dev *dev) { }
  static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
@@@ -2189,7 -2207,7 +2190,7 @@@ static inline struct device_node 
  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
  static inline struct irq_domain *
  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
 -#endif  /* CONFIG_OF */
 +#endif	/* CONFIG_OF */
  
  #ifdef CONFIG_ACPI
  struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus);
@@@ -2214,7 -2232,7 +2215,7 @@@ int pci_for_each_dma_alias(struct pci_d
  			   int (*fn)(struct pci_dev *pdev,
  				     u16 alias, void *data), void *data);
  
 -/* helper functions for operation of device flag */
 +/* Helper functions for operation of device flag */
  static inline void pci_set_dev_assigned(struct pci_dev *pdev)
  {
  	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
@@@ -2261,7 -2279,7 +2262,7 @@@ static inline bool pci_is_thunderbolt_a
  	return false;
  }
  
 -/* provide the legacy pci_dma_* API */
 +/* Provide the legacy pci_dma_* API */
  #include <linux/pci-dma-compat.h>
  
  #endif /* LINUX_PCI_H */
diff --combined include/linux/skbuff.h
index a87e43d16f44,b8e0da6c27d6..ac89a93b7c83
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -1211,6 -1211,11 +1211,11 @@@ static inline bool skb_flow_dissect_flo
  				  data, proto, nhoff, hlen, flags);
  }
  
+ void
+ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ 			     struct flow_dissector *flow_dissector,
+ 			     void *target_container);
+ 
  static inline __u32 skb_get_hash(struct sk_buff *skb)
  {
  	if (!skb->l4_hash && !skb->sw_hash)
@@@ -3241,7 -3246,7 +3246,7 @@@ struct sk_buff *__skb_recv_datagram(str
  				    int *peeked, int *off, int *err);
  struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
  				  int *err);
 -unsigned int datagram_poll(struct file *file, struct socket *sock,
 +__poll_t datagram_poll(struct file *file, struct socket *sock,
  			   struct poll_table_struct *wait);
  int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
  			   struct iov_iter *to, int size);
diff --combined include/net/inet_connection_sock.h
index ec72cdb5bc39,8e1bf9ae4a5e..6692d67e9245
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@@ -77,6 -77,7 +77,7 @@@ struct inet_connection_sock_af_ops 
   * @icsk_af_ops		   Operations which are AF_INET{4,6} specific
   * @icsk_ulp_ops	   Pluggable ULP control hook
   * @icsk_ulp_data	   ULP private data
+  * @icsk_listen_portaddr_node	hash to the portaddr listener hashtable
   * @icsk_ca_state:	   Congestion control state
   * @icsk_retransmits:	   Number of unrecovered [RTO] timeouts
   * @icsk_pending:	   Scheduled timer event
@@@ -101,6 -102,7 +102,7 @@@ struct inet_connection_sock 
  	const struct inet_connection_sock_af_ops *icsk_af_ops;
  	const struct tcp_ulp_ops  *icsk_ulp_ops;
  	void			  *icsk_ulp_data;
+ 	struct hlist_node         icsk_listen_portaddr_node;
  	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
  	__u8			  icsk_ca_state:6,
  				  icsk_ca_setsockopt:1,
@@@ -305,7 -307,7 +307,7 @@@ void inet_csk_prepare_forced_close(stru
  /*
   * LISTEN is a special case for poll..
   */
 -static inline unsigned int inet_csk_listen_poll(const struct sock *sk)
 +static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
  {
  	return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
  			(POLLIN | POLLRDNORM) : 0;
diff --combined include/net/pkt_cls.h
index 8e08b6da72f3,58bba9c769ea..31574c958673
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@@ -39,9 -39,11 +39,11 @@@ struct tcf_chain *tcf_chain_get(struct 
  				bool create);
  void tcf_chain_put(struct tcf_chain *chain);
  int tcf_block_get(struct tcf_block **p_block,
- 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q);
+ 		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
+ 		  struct netlink_ext_ack *extack);
  int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
- 		      struct tcf_block_ext_info *ei);
+ 		      struct tcf_block_ext_info *ei,
+ 		      struct netlink_ext_ack *extack);
  void tcf_block_put(struct tcf_block *block);
  void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
  		       struct tcf_block_ext_info *ei);
@@@ -694,7 -696,9 +696,7 @@@ struct tc_cls_matchall_offload 
  };
  
  enum tc_clsbpf_command {
 -	TC_CLSBPF_ADD,
 -	TC_CLSBPF_REPLACE,
 -	TC_CLSBPF_DESTROY,
 +	TC_CLSBPF_OFFLOAD,
  	TC_CLSBPF_STATS,
  };
  
@@@ -703,7 -707,6 +705,7 @@@ struct tc_cls_bpf_offload 
  	enum tc_clsbpf_command command;
  	struct tcf_exts *exts;
  	struct bpf_prog *prog;
 +	struct bpf_prog *oldprog;
  	const char *name;
  	bool exts_integrated;
  	u32 gen_flags;
diff --combined include/net/sctp/sctp.h
index 608d123ef25f,20c0c1be2ca7..f7ae6b0a21d0
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@@ -107,7 -107,7 +107,7 @@@ int sctp_backlog_rcv(struct sock *sk, s
  int sctp_inet_listen(struct socket *sock, int backlog);
  void sctp_write_space(struct sock *sk);
  void sctp_data_ready(struct sock *sk);
 -unsigned int sctp_poll(struct file *file, struct socket *sock,
 +__poll_t sctp_poll(struct file *file, struct socket *sock,
  		poll_table *wait);
  void sctp_sock_rfree(struct sk_buff *skb);
  void sctp_copy_sock(struct sock *newsk, struct sock *sk,
@@@ -116,7 -116,7 +116,7 @@@ extern struct percpu_counter sctp_socke
  int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
  struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
  
- int sctp_transport_walk_start(struct rhashtable_iter *iter);
+ void sctp_transport_walk_start(struct rhashtable_iter *iter);
  void sctp_transport_walk_stop(struct rhashtable_iter *iter);
  struct sctp_transport *sctp_transport_get_next(struct net *net,
  			struct rhashtable_iter *iter);
@@@ -444,13 -444,13 +444,13 @@@ static inline int sctp_frag_point(cons
  	int frag = pmtu;
  
  	frag -= sp->pf->af->net_header_len;
- 	frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk);
+ 	frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
  
  	if (asoc->user_frag)
  		frag = min_t(int, frag, asoc->user_frag);
  
  	frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN -
- 					    sizeof(struct sctp_data_chunk)));
+ 					    sctp_datachk_len(&asoc->stream)));
  
  	return frag;
  }
diff --combined include/net/sock.h
index f90685441143,6c1db823f8b9..ae68e1be0c1d
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@@ -1262,6 -1262,7 +1262,7 @@@ proto_memory_pressure(struct proto *pro
  /* Called with local bh disabled */
  void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
  int sock_prot_inuse_get(struct net *net, struct proto *proto);
+ int sock_inuse_get(struct net *net);
  #else
  static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
  		int inc)
@@@ -1578,7 -1579,7 +1579,7 @@@ int sock_no_connect(struct socket *, st
  int sock_no_socketpair(struct socket *, struct socket *);
  int sock_no_accept(struct socket *, struct socket *, int, bool);
  int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
 -unsigned int sock_no_poll(struct file *, struct socket *,
 +__poll_t sock_no_poll(struct file *, struct socket *,
  			  struct poll_table_struct *);
  int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
  int sock_no_listen(struct socket *, int);
@@@ -2332,31 -2333,6 +2333,6 @@@ static inline bool sk_listener(const st
  	return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
  }
  
- /**
-  * sk_state_load - read sk->sk_state for lockless contexts
-  * @sk: socket pointer
-  *
-  * Paired with sk_state_store(). Used in places we do not hold socket lock :
-  * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
-  */
- static inline int sk_state_load(const struct sock *sk)
- {
- 	return smp_load_acquire(&sk->sk_state);
- }
- 
- /**
-  * sk_state_store - update sk->sk_state
-  * @sk: socket pointer
-  * @newstate: new state
-  *
-  * Paired with sk_state_load(). Should be used in contexts where
-  * state change might impact lockless readers.
-  */
- static inline void sk_state_store(struct sock *sk, int newstate)
- {
- 	smp_store_release(&sk->sk_state, newstate);
- }
- 
  void sock_enable_timestamp(struct sock *sk, int flag);
  int sock_get_timestamp(struct sock *, struct timeval __user *);
  int sock_get_timestampns(struct sock *, struct timespec __user *);
@@@ -2407,4 -2383,15 +2383,15 @@@ static inline int sk_get_rmem0(const st
  	return *proto->sysctl_rmem;
  }
  
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+  * Some wifi drivers need to tweak it to get more chunks.
+  * They can use this helper from their ndo_start_xmit()
+  */
+ static inline void sk_pacing_shift_update(struct sock *sk, int val)
+ {
+ 	if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
+ 		return;
+ 	sk->sk_pacing_shift = val;
+ }
+ 
  #endif	/* _SOCK_H */
diff --combined include/net/tcp.h
index 50b21a49d870,6939e69d3c37..26c2793846a1
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@@ -387,7 -387,7 +387,7 @@@ bool tcp_peer_is_proven(struct request_
  void tcp_close(struct sock *sk, long timeout);
  void tcp_init_sock(struct sock *sk);
  void tcp_init_transfer(struct sock *sk, int bpf_op);
 -unsigned int tcp_poll(struct file *file, struct socket *sock,
 +__poll_t tcp_poll(struct file *file, struct socket *sock,
  		      struct poll_table_struct *wait);
  int tcp_getsockopt(struct sock *sk, int level, int optname,
  		   char __user *optval, int __user *optlen);
@@@ -1507,8 -1507,7 +1507,7 @@@ int tcp_md5_hash_key(struct tcp_md5sig_
  
  /* From tcp_fastopen.c */
  void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
- 			    struct tcp_fastopen_cookie *cookie, int *syn_loss,
- 			    unsigned long *last_syn_loss);
+ 			    struct tcp_fastopen_cookie *cookie);
  void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
  			    struct tcp_fastopen_cookie *cookie, bool syn_lost,
  			    u16 try_exp);
@@@ -1546,7 -1545,7 +1545,7 @@@ extern unsigned int sysctl_tcp_fastopen
  void tcp_fastopen_active_disable(struct sock *sk);
  bool tcp_fastopen_active_should_disable(struct sock *sk);
  void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
- void tcp_fastopen_active_timeout_reset(void);
+ void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
  
  /* Latencies incurred by various limits for a sender. They are
   * chronograph-like stats that are mutually exclusive.
@@@ -2011,10 -2010,12 +2010,12 @@@ static inline int tcp_call_bpf(struct s
  	struct bpf_sock_ops_kern sock_ops;
  	int ret;
  
- 	if (sk_fullsock(sk))
+ 	memset(&sock_ops, 0, sizeof(sock_ops));
+ 	if (sk_fullsock(sk)) {
+ 		sock_ops.is_fullsock = 1;
  		sock_owned_by_me(sk);
+ 	}
  
- 	memset(&sock_ops, 0, sizeof(sock_ops));
  	sock_ops.sk = sk;
  	sock_ops.op = op;
  
diff --combined include/net/xfrm.h
index ae35991b5877,1ec0c4760646..059213a4096e
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@@ -968,7 -968,7 +968,7 @@@ static inline bool xfrm_sec_ctx_match(s
  
  /* A struct encoding bundle of transformations to apply to some set of flow.
   *
-  * dst->child points to the next element of bundle.
+  * xdst->child points to the next element of bundle.
   * dst->xfrm  points to an instanse of transformer.
   *
   * Due to unfortunate limitations of current routing cache, which we
@@@ -984,6 -984,8 +984,8 @@@ struct xfrm_dst 
  		struct rt6_info		rt6;
  	} u;
  	struct dst_entry *route;
+ 	struct dst_entry *child;
+ 	struct dst_entry *path;
  	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
  	int num_pols, num_xfrms;
  	u32 xfrm_genid;
@@@ -994,7 -996,35 +996,35 @@@
  	u32 path_cookie;
  };
  
+ static inline struct dst_entry *xfrm_dst_path(const struct dst_entry *dst)
+ {
  #ifdef CONFIG_XFRM
+ 	if (dst->xfrm) {
+ 		const struct xfrm_dst *xdst = (const struct xfrm_dst *) dst;
+ 
+ 		return xdst->path;
+ 	}
+ #endif
+ 	return (struct dst_entry *) dst;
+ }
+ 
+ static inline struct dst_entry *xfrm_dst_child(const struct dst_entry *dst)
+ {
+ #ifdef CONFIG_XFRM
+ 	if (dst->xfrm) {
+ 		struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
+ 		return xdst->child;
+ 	}
+ #endif
+ 	return NULL;
+ }
+ 
+ #ifdef CONFIG_XFRM
+ static inline void xfrm_dst_set_child(struct xfrm_dst *xdst, struct dst_entry *child)
+ {
+ 	xdst->child = child;
+ }
+ 
  static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
  {
  	xfrm_pols_put(xdst->pols, xdst->num_pols);
@@@ -1570,9 -1600,6 +1600,9 @@@ int xfrm_init_state(struct xfrm_state *
  int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
  int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
  int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
 +int xfrm_trans_queue(struct sk_buff *skb,
 +		     int (*finish)(struct net *, struct sock *,
 +				   struct sk_buff *));
  int xfrm_output_resume(struct sk_buff *skb, int err);
  int xfrm_output(struct sock *sk, struct sk_buff *skb);
  int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
@@@ -1869,12 -1896,14 +1899,14 @@@ bool xfrm_dev_offload_ok(struct sk_buf
  static inline bool xfrm_dst_offload_ok(struct dst_entry *dst)
  {
  	struct xfrm_state *x = dst->xfrm;
+ 	struct xfrm_dst *xdst;
  
  	if (!x || !x->type_offload)
  		return false;
  
- 	if (x->xso.offload_handle && (x->xso.dev == dst->path->dev) &&
- 	    !dst->child->xfrm)
+ 	xdst = (struct xfrm_dst *) dst;
+ 	if (x->xso.offload_handle && (x->xso.dev == xfrm_dst_path(dst)->dev) &&
+ 	    !xdst->child->xfrm)
  		return true;
  
  	return false;
diff --combined kernel/bpf/syscall.c
index 5cb783fc8224,e2e1c78ce1dc..da932743e116
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@@ -1057,7 -1057,7 +1057,7 @@@ struct bpf_prog *bpf_prog_inc_not_zero(
  }
  EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
  
 -static bool bpf_prog_get_ok(struct bpf_prog *prog,
 +bool bpf_prog_get_ok(struct bpf_prog *prog,
  			    enum bpf_prog_type *attach_type, bool attach_drv)
  {
  	/* not an attachment, just a refcount inc, always allow */
@@@ -1194,7 -1194,8 +1194,8 @@@ static int bpf_prog_load(union bpf_att
  		goto free_used_maps;
  
  	/* eBPF program is ready to be JITed */
- 	prog = bpf_prog_select_runtime(prog, &err);
+ 	if (!prog->bpf_func)
+ 		prog = bpf_prog_select_runtime(prog, &err);
  	if (err < 0)
  		goto free_used_maps;
  
diff --combined kernel/bpf/verifier.c
index 04b24876cd23,48b2901cf483..8e480706722e
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -20,6 -20,8 +20,8 @@@
  #include <linux/file.h>
  #include <linux/vmalloc.h>
  #include <linux/stringify.h>
+ #include <linux/bsearch.h>
+ #include <linux/sort.h>
  
  #include "disasm.h"
  
@@@ -216,23 -218,48 +218,48 @@@ static const char * const reg_type_str[
  	[PTR_TO_PACKET_END]	= "pkt_end",
  };
  
+ static void print_liveness(struct bpf_verifier_env *env,
+ 			   enum bpf_reg_liveness live)
+ {
+ 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
+ 	    verbose(env, "_");
+ 	if (live & REG_LIVE_READ)
+ 		verbose(env, "r");
+ 	if (live & REG_LIVE_WRITTEN)
+ 		verbose(env, "w");
+ }
+ 
+ static struct bpf_func_state *func(struct bpf_verifier_env *env,
+ 				   const struct bpf_reg_state *reg)
+ {
+ 	struct bpf_verifier_state *cur = env->cur_state;
+ 
+ 	return cur->frame[reg->frameno];
+ }
+ 
  static void print_verifier_state(struct bpf_verifier_env *env,
- 				 struct bpf_verifier_state *state)
+ 				 const struct bpf_func_state *state)
  {
- 	struct bpf_reg_state *reg;
+ 	const struct bpf_reg_state *reg;
  	enum bpf_reg_type t;
  	int i;
  
+ 	if (state->frameno)
+ 		verbose(env, " frame%d:", state->frameno);
  	for (i = 0; i < MAX_BPF_REG; i++) {
  		reg = &state->regs[i];
  		t = reg->type;
  		if (t == NOT_INIT)
  			continue;
- 		verbose(env, " R%d=%s", i, reg_type_str[t]);
+ 		verbose(env, " R%d", i);
+ 		print_liveness(env, reg->live);
+ 		verbose(env, "=%s", reg_type_str[t]);
  		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
  		    tnum_is_const(reg->var_off)) {
  			/* reg->off should be 0 for SCALAR_VALUE */
  			verbose(env, "%lld", reg->var_off.value + reg->off);
+ 			if (t == PTR_TO_STACK)
+ 				verbose(env, ",call_%d", func(env, reg)->callsite);
  		} else {
  			verbose(env, "(id=%d", reg->id);
  			if (t != SCALAR_VALUE)
@@@ -277,16 -304,21 +304,21 @@@
  		}
  	}
  	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- 		if (state->stack[i].slot_type[0] == STACK_SPILL)
- 			verbose(env, " fp%d=%s",
- 				-MAX_BPF_STACK + i * BPF_REG_SIZE,
+ 		if (state->stack[i].slot_type[0] == STACK_SPILL) {
+ 			verbose(env, " fp%d",
+ 				(-i - 1) * BPF_REG_SIZE);
+ 			print_liveness(env, state->stack[i].spilled_ptr.live);
+ 			verbose(env, "=%s",
  				reg_type_str[state->stack[i].spilled_ptr.type]);
+ 		}
+ 		if (state->stack[i].slot_type[0] == STACK_ZERO)
+ 			verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
  	}
  	verbose(env, "\n");
  }
  
- static int copy_stack_state(struct bpf_verifier_state *dst,
- 			    const struct bpf_verifier_state *src)
+ static int copy_stack_state(struct bpf_func_state *dst,
+ 			    const struct bpf_func_state *src)
  {
  	if (!src->stack)
  		return 0;
@@@ -302,13 -334,13 +334,13 @@@
  
  /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
   * make it consume minimal amount of memory. check_stack_write() access from
-  * the program calls into realloc_verifier_state() to grow the stack size.
+  * the program calls into realloc_func_state() to grow the stack size.
   * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
   * which this function copies over. It points to previous bpf_verifier_state
   * which is never reallocated
   */
- static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
- 				  bool copy_old)
+ static int realloc_func_state(struct bpf_func_state *state, int size,
+ 			      bool copy_old)
  {
  	u32 old_size = state->allocated_stack;
  	struct bpf_stack_state *new_stack;
@@@ -341,10 -373,21 +373,21 @@@
  	return 0;
  }
  
+ static void free_func_state(struct bpf_func_state *state)
+ {
+ 	kfree(state->stack);
+ 	kfree(state);
+ }
+ 
  static void free_verifier_state(struct bpf_verifier_state *state,
  				bool free_self)
  {
- 	kfree(state->stack);
+ 	int i;
+ 
+ 	for (i = 0; i <= state->curframe; i++) {
+ 		free_func_state(state->frame[i]);
+ 		state->frame[i] = NULL;
+ 	}
  	if (free_self)
  		kfree(state);
  }
@@@ -352,18 -395,46 +395,46 @@@
  /* copy verifier state from src to dst growing dst stack space
   * when necessary to accommodate larger src stack
   */
- static int copy_verifier_state(struct bpf_verifier_state *dst,
- 			       const struct bpf_verifier_state *src)
+ static int copy_func_state(struct bpf_func_state *dst,
+ 			   const struct bpf_func_state *src)
  {
  	int err;
  
- 	err = realloc_verifier_state(dst, src->allocated_stack, false);
+ 	err = realloc_func_state(dst, src->allocated_stack, false);
  	if (err)
  		return err;
- 	memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack));
+ 	memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
  	return copy_stack_state(dst, src);
  }
  
+ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
+ 			       const struct bpf_verifier_state *src)
+ {
+ 	struct bpf_func_state *dst;
+ 	int i, err;
+ 
+ 	/* if dst has more stack frames then src frame, free them */
+ 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
+ 		free_func_state(dst_state->frame[i]);
+ 		dst_state->frame[i] = NULL;
+ 	}
+ 	dst_state->curframe = src->curframe;
+ 	dst_state->parent = src->parent;
+ 	for (i = 0; i <= src->curframe; i++) {
+ 		dst = dst_state->frame[i];
+ 		if (!dst) {
+ 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ 			if (!dst)
+ 				return -ENOMEM;
+ 			dst_state->frame[i] = dst;
+ 		}
+ 		err = copy_func_state(dst, src->frame[i]);
+ 		if (err)
+ 			return err;
+ 	}
+ 	return 0;
+ }
+ 
  static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
  		     int *insn_idx)
  {
@@@ -425,6 -496,10 +496,10 @@@ err
  static const int caller_saved[CALLER_SAVED_REGS] = {
  	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
  };
+ #define CALLEE_SAVED_REGS 5
+ static const int callee_saved[CALLEE_SAVED_REGS] = {
+ 	BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
+ };
  
  static void __mark_reg_not_init(struct bpf_reg_state *reg);
  
@@@ -449,6 -524,13 +524,13 @@@ static void __mark_reg_known_zero(struc
  	__mark_reg_known(reg, 0);
  }
  
+ static void __mark_reg_const_zero(struct bpf_reg_state *reg)
+ {
+ 	__mark_reg_known(reg, 0);
+ 	reg->off = 0;
+ 	reg->type = SCALAR_VALUE;
+ }
+ 
  static void mark_reg_known_zero(struct bpf_verifier_env *env,
  				struct bpf_reg_state *regs, u32 regno)
  {
@@@ -560,6 -642,7 +642,7 @@@ static void __mark_reg_unknown(struct b
  	reg->id = 0;
  	reg->off = 0;
  	reg->var_off = tnum_unknown;
+ 	reg->frameno = 0;
  	__mark_reg_unbounded(reg);
  }
  
@@@ -568,8 -651,8 +651,8 @@@ static void mark_reg_unknown(struct bpf
  {
  	if (WARN_ON(regno >= MAX_BPF_REG)) {
  		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
- 		/* Something bad happened, let's kill all regs */
- 		for (regno = 0; regno < MAX_BPF_REG; regno++)
+ 		/* Something bad happened, let's kill all regs except FP */
+ 		for (regno = 0; regno < BPF_REG_FP; regno++)
  			__mark_reg_not_init(regs + regno);
  		return;
  	}
@@@ -587,8 -670,8 +670,8 @@@ static void mark_reg_not_init(struct bp
  {
  	if (WARN_ON(regno >= MAX_BPF_REG)) {
  		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
- 		/* Something bad happened, let's kill all regs */
- 		for (regno = 0; regno < MAX_BPF_REG; regno++)
+ 		/* Something bad happened, let's kill all regs except FP */
+ 		for (regno = 0; regno < BPF_REG_FP; regno++)
  			__mark_reg_not_init(regs + regno);
  		return;
  	}
@@@ -596,8 -679,9 +679,9 @@@
  }
  
  static void init_reg_state(struct bpf_verifier_env *env,
- 			   struct bpf_reg_state *regs)
+ 			   struct bpf_func_state *state)
  {
+ 	struct bpf_reg_state *regs = state->regs;
  	int i;
  
  	for (i = 0; i < MAX_BPF_REG; i++) {
@@@ -608,41 -692,217 +692,217 @@@
  	/* frame pointer */
  	regs[BPF_REG_FP].type = PTR_TO_STACK;
  	mark_reg_known_zero(env, regs, BPF_REG_FP);
+ 	regs[BPF_REG_FP].frameno = state->frameno;
  
  	/* 1st arg to a function */
  	regs[BPF_REG_1].type = PTR_TO_CTX;
  	mark_reg_known_zero(env, regs, BPF_REG_1);
  }
  
+ #define BPF_MAIN_FUNC (-1)
+ static void init_func_state(struct bpf_verifier_env *env,
+ 			    struct bpf_func_state *state,
+ 			    int callsite, int frameno, int subprogno)
+ {
+ 	state->callsite = callsite;
+ 	state->frameno = frameno;
+ 	state->subprogno = subprogno;
+ 	init_reg_state(env, state);
+ }
+ 
  enum reg_arg_type {
  	SRC_OP,		/* register is used as source operand */
  	DST_OP,		/* register is used as destination operand */
  	DST_OP_NO_MARK	/* same as above, check only, don't mark */
  };
  
- static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno)
+ static int cmp_subprogs(const void *a, const void *b)
+ {
+ 	return *(int *)a - *(int *)b;
+ }
+ 
+ static int find_subprog(struct bpf_verifier_env *env, int off)
+ {
+ 	u32 *p;
+ 
+ 	p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
+ 		    sizeof(env->subprog_starts[0]), cmp_subprogs);
+ 	if (!p)
+ 		return -ENOENT;
+ 	return p - env->subprog_starts;
+ 
+ }
+ 
+ static int add_subprog(struct bpf_verifier_env *env, int off)
+ {
+ 	int insn_cnt = env->prog->len;
+ 	int ret;
+ 
+ 	if (off >= insn_cnt || off < 0) {
+ 		verbose(env, "call to invalid destination\n");
+ 		return -EINVAL;
+ 	}
+ 	ret = find_subprog(env, off);
+ 	if (ret >= 0)
+ 		return 0;
+ 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
+ 		verbose(env, "too many subprograms\n");
+ 		return -E2BIG;
+ 	}
+ 	env->subprog_starts[env->subprog_cnt++] = off;
+ 	sort(env->subprog_starts, env->subprog_cnt,
+ 	     sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
+ 	return 0;
+ }
+ 
+ static int check_subprogs(struct bpf_verifier_env *env)
+ {
+ 	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
+ 	struct bpf_insn *insn = env->prog->insnsi;
+ 	int insn_cnt = env->prog->len;
+ 
+ 	/* determine subprog starts. The end is one before the next starts */
+ 	for (i = 0; i < insn_cnt; i++) {
+ 		if (insn[i].code != (BPF_JMP | BPF_CALL))
+ 			continue;
+ 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		if (!env->allow_ptr_leaks) {
+ 			verbose(env, "function calls to other bpf functions are allowed for root only\n");
+ 			return -EPERM;
+ 		}
+ 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
+ 			verbose(env, "funcation calls in offloaded programs are not supported yet\n");
+ 			return -EINVAL;
+ 		}
+ 		ret = add_subprog(env, i + insn[i].imm + 1);
+ 		if (ret < 0)
+ 			return ret;
+ 	}
+ 
+ 	if (env->log.level > 1)
+ 		for (i = 0; i < env->subprog_cnt; i++)
+ 			verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
+ 
+ 	/* now check that all jumps are within the same subprog */
+ 	subprog_start = 0;
+ 	if (env->subprog_cnt == cur_subprog)
+ 		subprog_end = insn_cnt;
+ 	else
+ 		subprog_end = env->subprog_starts[cur_subprog++];
+ 	for (i = 0; i < insn_cnt; i++) {
+ 		u8 code = insn[i].code;
+ 
+ 		if (BPF_CLASS(code) != BPF_JMP)
+ 			goto next;
+ 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
+ 			goto next;
+ 		off = i + insn[i].off + 1;
+ 		if (off < subprog_start || off >= subprog_end) {
+ 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
+ 			return -EINVAL;
+ 		}
+ next:
+ 		if (i == subprog_end - 1) {
+ 			/* to avoid fall-through from one subprog into another
+ 			 * the last insn of the subprog should be either exit
+ 			 * or unconditional jump back
+ 			 */
+ 			if (code != (BPF_JMP | BPF_EXIT) &&
+ 			    code != (BPF_JMP | BPF_JA)) {
+ 				verbose(env, "last insn is not an exit or jmp\n");
+ 				return -EINVAL;
+ 			}
+ 			subprog_start = subprog_end;
+ 			if (env->subprog_cnt == cur_subprog)
+ 				subprog_end = insn_cnt;
+ 			else
+ 				subprog_end = env->subprog_starts[cur_subprog++];
+ 		}
+ 	}
+ 	return 0;
+ }
+ 
+ struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
+ 				       const struct bpf_verifier_state *state,
+ 				       struct bpf_verifier_state *parent,
+ 				       u32 regno)
+ {
+ 	struct bpf_verifier_state *tmp = NULL;
+ 
+ 	/* 'parent' could be a state of caller and
+ 	 * 'state' could be a state of callee. In such case
+ 	 * parent->curframe < state->curframe
+ 	 * and it's ok for r1 - r5 registers
+ 	 *
+ 	 * 'parent' could be a callee's state after it bpf_exit-ed.
+ 	 * In such case parent->curframe > state->curframe
+ 	 * and it's ok for r0 only
+ 	 */
+ 	if (parent->curframe == state->curframe ||
+ 	    (parent->curframe < state->curframe &&
+ 	     regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
+ 	    (parent->curframe > state->curframe &&
+ 	       regno == BPF_REG_0))
+ 		return parent;
+ 
+ 	if (parent->curframe > state->curframe &&
+ 	    regno >= BPF_REG_6) {
+ 		/* for callee saved regs we have to skip the whole chain
+ 		 * of states that belong to callee and mark as LIVE_READ
+ 		 * the registers before the call
+ 		 */
+ 		tmp = parent;
+ 		while (tmp && tmp->curframe != state->curframe) {
+ 			tmp = tmp->parent;
+ 		}
+ 		if (!tmp)
+ 			goto bug;
+ 		parent = tmp;
+ 	} else {
+ 		goto bug;
+ 	}
+ 	return parent;
+ bug:
+ 	verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
+ 	verbose(env, "regno %d parent frame %d current frame %d\n",
+ 		regno, parent->curframe, state->curframe);
+ 	return 0;
+ }
+ 
+ static int mark_reg_read(struct bpf_verifier_env *env,
+ 			 const struct bpf_verifier_state *state,
+ 			 struct bpf_verifier_state *parent,
+ 			 u32 regno)
  {
- 	struct bpf_verifier_state *parent = state->parent;
+ 	bool writes = parent == state->parent; /* Observe write marks */
  
  	if (regno == BPF_REG_FP)
  		/* We don't need to worry about FP liveness because it's read-only */
- 		return;
+ 		return 0;
  
  	while (parent) {
  		/* if read wasn't screened by an earlier write ... */
- 		if (state->regs[regno].live & REG_LIVE_WRITTEN)
+ 		if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
  			break;
+ 		parent = skip_callee(env, state, parent, regno);
+ 		if (!parent)
+ 			return -EFAULT;
  		/* ... then we depend on parent's value */
- 		parent->regs[regno].live |= REG_LIVE_READ;
+ 		parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
  		state = parent;
  		parent = state->parent;
+ 		writes = true;
  	}
+ 	return 0;
  }
  
  static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
  			 enum reg_arg_type t)
  {
- 	struct bpf_reg_state *regs = env->cur_state->regs;
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ 	struct bpf_reg_state *regs = state->regs;
  
  	if (regno >= MAX_BPF_REG) {
  		verbose(env, "R%d is invalid\n", regno);
@@@ -655,7 -915,7 +915,7 @@@
  			verbose(env, "R%d !read_ok\n", regno);
  			return -EACCES;
  		}
- 		mark_reg_read(env->cur_state, regno);
+ 		return mark_reg_read(env, vstate, vstate->parent, regno);
  	} else {
  		/* check whether register used as dest operand can be written to */
  		if (regno == BPF_REG_FP) {
@@@ -686,17 -946,25 +946,25 @@@ static bool is_spillable_regtype(enum b
  	}
  }
  
+ /* Does this register contain a constant zero? */
+ static bool register_is_null(struct bpf_reg_state *reg)
+ {
+ 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
+ }
+ 
  /* check_stack_read/write functions track spill/fill of registers,
   * stack boundary and alignment are checked in check_mem_access()
   */
  static int check_stack_write(struct bpf_verifier_env *env,
- 			     struct bpf_verifier_state *state, int off,
- 			     int size, int value_regno)
+ 			     struct bpf_func_state *state, /* func where register points to */
+ 			     int off, int size, int value_regno)
  {
+ 	struct bpf_func_state *cur; /* state of the current function */
  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
+ 	enum bpf_reg_type type;
  
- 	err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE),
- 				     true);
+ 	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
+ 				 true);
  	if (err)
  		return err;
  	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@@ -709,8 -977,9 +977,9 @@@
  		return -EACCES;
  	}
  
+ 	cur = env->cur_state->frame[env->cur_state->curframe];
  	if (value_regno >= 0 &&
- 	    is_spillable_regtype(state->regs[value_regno].type)) {
+ 	    is_spillable_regtype((type = cur->regs[value_regno].type))) {
  
  		/* register containing pointer is being spilled into stack */
  		if (size != BPF_REG_SIZE) {
@@@ -718,51 -987,116 +987,116 @@@
  			return -EACCES;
  		}
  
+ 		if (state != cur && type == PTR_TO_STACK) {
+ 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
+ 			return -EINVAL;
+ 		}
+ 
  		/* save register state */
- 		state->stack[spi].spilled_ptr = state->regs[value_regno];
+ 		state->stack[spi].spilled_ptr = cur->regs[value_regno];
  		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
  
  		for (i = 0; i < BPF_REG_SIZE; i++)
  			state->stack[spi].slot_type[i] = STACK_SPILL;
  	} else {
+ 		u8 type = STACK_MISC;
+ 
  		/* regular write of data into stack */
  		state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
  
+ 		/* only mark the slot as written if all 8 bytes were written
+ 		 * otherwise read propagation may incorrectly stop too soon
+ 		 * when stack slots are partially written.
+ 		 * This heuristic means that read propagation will be
+ 		 * conservative, since it will add reg_live_read marks
+ 		 * to stack slots all the way to first state when programs
+ 		 * writes+reads less than 8 bytes
+ 		 */
+ 		if (size == BPF_REG_SIZE)
+ 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ 
+ 		/* when we zero initialize stack slots mark them as such */
+ 		if (value_regno >= 0 &&
+ 		    register_is_null(&cur->regs[value_regno]))
+ 			type = STACK_ZERO;
+ 
  		for (i = 0; i < size; i++)
  			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
- 				STACK_MISC;
+ 				type;
  	}
  	return 0;
  }
  
- static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slot)
+ /* registers of every function are unique and mark_reg_read() propagates
+  * the liveness in the following cases:
+  * - from callee into caller for R1 - R5 that were used as arguments
+  * - from caller into callee for R0 that used as result of the call
+  * - from caller to the same caller skipping states of the callee for R6 - R9,
+  *   since R6 - R9 are callee saved by implicit function prologue and
+  *   caller's R6 != callee's R6, so when we propagate liveness up to
+  *   parent states we need to skip callee states for R6 - R9.
+  *
+  * stack slot marking is different, since stacks of caller and callee are
+  * accessible in both (since caller can pass a pointer to caller's stack to
+  * callee which can pass it to another function), hence mark_stack_slot_read()
+  * has to propagate the stack liveness to all parent states at given frame number.
+  * Consider code:
+  * f1() {
+  *   ptr = fp - 8;
+  *   *ptr = ctx;
+  *   call f2 {
+  *      .. = *ptr;
+  *   }
+  *   .. = *ptr;
+  * }
+  * First *ptr is reading from f1's stack and mark_stack_slot_read() has
+  * to mark liveness at the f1's frame and not f2's frame.
+  * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
+  * to propagate liveness to f2 states at f1's frame level and further into
+  * f1 states at f1's frame level until write into that stack slot
+  */
+ static void mark_stack_slot_read(struct bpf_verifier_env *env,
+ 				 const struct bpf_verifier_state *state,
+ 				 struct bpf_verifier_state *parent,
+ 				 int slot, int frameno)
  {
- 	struct bpf_verifier_state *parent = state->parent;
+ 	bool writes = parent == state->parent; /* Observe write marks */
  
  	while (parent) {
+ 		if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
+ 			/* since LIVE_WRITTEN mark is only done for full 8-byte
+ 			 * write the read marks are conservative and parent
+ 			 * state may not even have the stack allocated. In such case
+ 			 * end the propagation, since the loop reached beginning
+ 			 * of the function
+ 			 */
+ 			break;
  		/* if read wasn't screened by an earlier write ... */
- 		if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
+ 		if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
  			break;
  		/* ... then we depend on parent's value */
- 		parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
+ 		parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
  		state = parent;
  		parent = state->parent;
+ 		writes = true;
  	}
  }
  
  static int check_stack_read(struct bpf_verifier_env *env,
- 			    struct bpf_verifier_state *state, int off, int size,
- 			    int value_regno)
+ 			    struct bpf_func_state *reg_state /* func where register points to */,
+ 			    int off, int size, int value_regno)
  {
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
  	u8 *stype;
  
- 	if (state->allocated_stack <= slot) {
+ 	if (reg_state->allocated_stack <= slot) {
  		verbose(env, "invalid read from stack off %d+0 size %d\n",
  			off, size);
  		return -EACCES;
  	}
- 	stype = state->stack[spi].slot_type;
+ 	stype = reg_state->stack[spi].slot_type;
  
  	if (stype[0] == STACK_SPILL) {
  		if (size != BPF_REG_SIZE) {
@@@ -778,21 -1112,44 +1112,44 @@@
  
  		if (value_regno >= 0) {
  			/* restore register state from stack */
- 			state->regs[value_regno] = state->stack[spi].spilled_ptr;
- 			mark_stack_slot_read(state, spi);
+ 			state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
+ 			/* mark reg as written since spilled pointer state likely
+ 			 * has its liveness marks cleared by is_state_visited()
+ 			 * which resets stack/reg liveness for state transitions
+ 			 */
+ 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
  		}
+ 		mark_stack_slot_read(env, vstate, vstate->parent, spi,
+ 				     reg_state->frameno);
  		return 0;
  	} else {
+ 		int zeros = 0;
+ 
  		for (i = 0; i < size; i++) {
- 			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) {
- 				verbose(env, "invalid read from stack off %d+%d size %d\n",
- 					off, i, size);
- 				return -EACCES;
+ 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
+ 				continue;
+ 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
+ 				zeros++;
+ 				continue;
+ 			}
+ 			verbose(env, "invalid read from stack off %d+%d size %d\n",
+ 				off, i, size);
+ 			return -EACCES;
+ 		}
+ 		mark_stack_slot_read(env, vstate, vstate->parent, spi,
+ 				     reg_state->frameno);
+ 		if (value_regno >= 0) {
+ 			if (zeros == size) {
+ 				/* any size read into register is zero extended,
+ 				 * so the whole register == const_zero
+ 				 */
+ 				__mark_reg_const_zero(&state->regs[value_regno]);
+ 			} else {
+ 				/* have read misc data from the stack */
+ 				mark_reg_unknown(env, state->regs, value_regno);
  			}
+ 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
  		}
- 		if (value_regno >= 0)
- 			/* have read misc data from the stack */
- 			mark_reg_unknown(env, state->regs, value_regno);
  		return 0;
  	}
  }
@@@ -817,7 -1174,8 +1174,8 @@@ static int __check_map_access(struct bp
  static int check_map_access(struct bpf_verifier_env *env, u32 regno,
  			    int off, int size, bool zero_size_allowed)
  {
- 	struct bpf_verifier_state *state = env->cur_state;
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
  	struct bpf_reg_state *reg = &state->regs[regno];
  	int err;
  
@@@ -1059,11 -1417,6 +1417,11 @@@ static int check_ptr_alignment(struct b
  		break;
  	case PTR_TO_STACK:
  		pointer_desc = "stack ";
 +		/* The stack spill tracking logic in check_stack_write()
 +		 * and check_stack_read() relies on stack accesses being
 +		 * aligned.
 +		 */
 +		strict = true;
  		break;
  	default:
  		break;
@@@ -1072,29 -1425,54 +1430,77 @@@
  					   strict);
  }
  
 +/* truncate register to smaller size (in bytes)
 + * must be called with size < BPF_REG_SIZE
 + */
 +static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
 +{
 +	u64 mask;
 +
 +	/* clear high bits in bit representation */
 +	reg->var_off = tnum_cast(reg->var_off, size);
 +
 +	/* fix arithmetic bounds */
 +	mask = ((u64)1 << (size * 8)) - 1;
 +	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
 +		reg->umin_value &= mask;
 +		reg->umax_value &= mask;
 +	} else {
 +		reg->umin_value = 0;
 +		reg->umax_value = mask;
 +	}
 +	reg->smin_value = reg->umin_value;
 +	reg->smax_value = reg->umax_value;
 +}
 +
+ static int update_stack_depth(struct bpf_verifier_env *env,
+ 			      const struct bpf_func_state *func,
+ 			      int off)
+ {
+ 	u16 stack = env->subprog_stack_depth[func->subprogno], total = 0;
+ 	struct bpf_verifier_state *cur = env->cur_state;
+ 	int i;
+ 
+ 	if (stack >= -off)
+ 		return 0;
+ 
+ 	/* update known max for given subprogram */
+ 	env->subprog_stack_depth[func->subprogno] = -off;
+ 
+ 	/* compute the total for current call chain */
+ 	for (i = 0; i <= cur->curframe; i++) {
+ 		u32 depth = env->subprog_stack_depth[cur->frame[i]->subprogno];
+ 
+ 		/* round up to 32-bytes, since this is granularity
+ 		 * of interpreter stack sizes
+ 		 */
+ 		depth = round_up(depth, 32);
+ 		total += depth;
+ 	}
+ 
+ 	if (total > MAX_BPF_STACK) {
+ 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
+ 			cur->curframe, total);
+ 		return -EACCES;
+ 	}
+ 	return 0;
+ }
+ 
+ static int get_callee_stack_depth(struct bpf_verifier_env *env,
+ 				  const struct bpf_insn *insn, int idx)
+ {
+ 	int start = idx + insn->imm + 1, subprog;
+ 
+ 	subprog = find_subprog(env, start);
+ 	if (subprog < 0) {
+ 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ 			  start);
+ 		return -EFAULT;
+ 	}
+ 	subprog++;
+ 	return env->subprog_stack_depth[subprog];
+ }
+ 
  /* check whether memory at (regno + off) is accessible for t = (read | write)
   * if t==write, value_regno is a register which value is stored into memory
   * if t==read, value_regno is a register which will receive the value from memory
@@@ -1105,9 -1483,9 +1511,9 @@@ static int check_mem_access(struct bpf_
  			    int bpf_size, enum bpf_access_type t,
  			    int value_regno)
  {
- 	struct bpf_verifier_state *state = env->cur_state;
  	struct bpf_reg_state *regs = cur_regs(env);
  	struct bpf_reg_state *reg = regs + regno;
+ 	struct bpf_func_state *state;
  	int size, err = 0;
  
  	size = bpf_size_to_bytes(bpf_size);
@@@ -1196,8 -1574,10 +1602,10 @@@
  			return -EACCES;
  		}
  
- 		if (env->prog->aux->stack_depth < -off)
- 			env->prog->aux->stack_depth = -off;
+ 		state = func(env, reg);
+ 		err = update_stack_depth(env, state, off);
+ 		if (err)
+ 			return err;
  
  		if (t == BPF_WRITE)
  			err = check_stack_write(env, state, off, size,
@@@ -1228,7 -1608,9 +1636,7 @@@
  	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
  	    regs[value_regno].type == SCALAR_VALUE) {
  		/* b/h/w load zero-extends, mark upper bits as known 0 */
 -		regs[value_regno].var_off =
 -			tnum_cast(regs[value_regno].var_off, size);
 -		__update_reg_bounds(&regs[value_regno]);
 +		coerce_reg_to_size(&regs[value_regno], size);
  	}
  	return err;
  }
@@@ -1269,12 -1651,6 +1677,6 @@@ static int check_xadd(struct bpf_verifi
  				BPF_SIZE(insn->code), BPF_WRITE, -1);
  }
  
- /* Does this register contain a constant zero? */
- static bool register_is_null(struct bpf_reg_state reg)
- {
- 	return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0);
- }
- 
  /* when register 'regno' is passed into function that will read 'access_size'
   * bytes from that pointer, make sure that it's within stack boundary
   * and all elements of stack are initialized.
@@@ -1285,32 -1661,31 +1687,32 @@@ static int check_stack_boundary(struct 
  				int access_size, bool zero_size_allowed,
  				struct bpf_call_arg_meta *meta)
  {
- 	struct bpf_verifier_state *state = env->cur_state;
- 	struct bpf_reg_state *regs = state->regs;
+ 	struct bpf_reg_state *reg = cur_regs(env) + regno;
+ 	struct bpf_func_state *state = func(env, reg);
  	int off, i, slot, spi;
  
- 	if (regs[regno].type != PTR_TO_STACK) {
+ 	if (reg->type != PTR_TO_STACK) {
  		/* Allow zero-byte read from NULL, regardless of pointer type */
  		if (zero_size_allowed && access_size == 0 &&
- 		    register_is_null(regs[regno]))
+ 		    register_is_null(reg))
  			return 0;
  
  		verbose(env, "R%d type=%s expected=%s\n", regno,
- 			reg_type_str[regs[regno].type],
+ 			reg_type_str[reg->type],
  			reg_type_str[PTR_TO_STACK]);
  		return -EACCES;
  	}
  
  	/* Only allow fixed-offset stack reads */
- 	if (!tnum_is_const(regs[regno].var_off)) {
+ 	if (!tnum_is_const(reg->var_off)) {
  		char tn_buf[48];
  
- 		tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
+ 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  		verbose(env, "invalid variable stack read R%d var_off=%s\n",
  			regno, tn_buf);
 +		return -EACCES;
  	}
- 	off = regs[regno].off + regs[regno].var_off.value;
+ 	off = reg->off + reg->var_off.value;
  	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
  	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
  		verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
@@@ -1318,9 -1693,6 +1720,6 @@@
  		return -EACCES;
  	}
  
- 	if (env->prog->aux->stack_depth < -off)
- 		env->prog->aux->stack_depth = -off;
- 
  	if (meta && meta->raw_mode) {
  		meta->access_size = access_size;
  		meta->regno = regno;
@@@ -1328,17 -1700,32 +1727,32 @@@
  	}
  
  	for (i = 0; i < access_size; i++) {
+ 		u8 *stype;
+ 
  		slot = -(off + i) - 1;
  		spi = slot / BPF_REG_SIZE;
- 		if (state->allocated_stack <= slot ||
- 		    state->stack[spi].slot_type[slot % BPF_REG_SIZE] !=
- 			STACK_MISC) {
- 			verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
- 				off, i, access_size);
- 			return -EACCES;
+ 		if (state->allocated_stack <= slot)
+ 			goto err;
+ 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
+ 		if (*stype == STACK_MISC)
+ 			goto mark;
+ 		if (*stype == STACK_ZERO) {
+ 			/* helper can write anything into the stack */
+ 			*stype = STACK_MISC;
+ 			goto mark;
  		}
+ err:
+ 		verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
+ 			off, i, access_size);
+ 		return -EACCES;
+ mark:
+ 		/* reading any byte out of 8-byte 'spill_slot' will cause
+ 		 * the whole slot to be marked as 'read'
+ 		 */
+ 		mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
+ 				     spi, state->frameno);
  	}
- 	return 0;
+ 	return update_stack_depth(env, state, off);
  }
  
  static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@@ -1418,7 -1805,7 +1832,7 @@@ static int check_func_arg(struct bpf_ve
  		 * passed in as argument, it's a SCALAR_VALUE type. Final test
  		 * happens during stack boundary checking.
  		 */
- 		if (register_is_null(*reg) &&
+ 		if (register_is_null(reg) &&
  		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
  			/* final test in check_stack_boundary() */;
  		else if (!type_is_pkt_pointer(type) &&
@@@ -1591,6 -1978,10 +2005,10 @@@ static int check_map_func_compatibility
  	case BPF_FUNC_tail_call:
  		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
  			goto error;
+ 		if (env->subprog_cnt) {
+ 			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
+ 			return -EINVAL;
+ 		}
  		break;
  	case BPF_FUNC_perf_event_read:
  	case BPF_FUNC_perf_event_output:
@@@ -1652,9 -2043,9 +2070,9 @@@ static int check_raw_mode(const struct 
  /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
   * are now invalid, so turn them into unknown SCALAR_VALUE.
   */
- static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
+ 				     struct bpf_func_state *state)
  {
- 	struct bpf_verifier_state *state = env->cur_state;
  	struct bpf_reg_state *regs = state->regs, *reg;
  	int i;
  
@@@ -1671,7 -2062,121 +2089,121 @@@
  	}
  }
  
- static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
+ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+ {
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	int i;
+ 
+ 	for (i = 0; i <= vstate->curframe; i++)
+ 		__clear_all_pkt_pointers(env, vstate->frame[i]);
+ }
+ 
+ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ 			   int *insn_idx)
+ {
+ 	struct bpf_verifier_state *state = env->cur_state;
+ 	struct bpf_func_state *caller, *callee;
+ 	int i, subprog, target_insn;
+ 
+ 	if (state->curframe >= MAX_CALL_FRAMES) {
+ 		verbose(env, "the call stack of %d frames is too deep\n",
+ 			state->curframe);
+ 		return -E2BIG;
+ 	}
+ 
+ 	target_insn = *insn_idx + insn->imm;
+ 	subprog = find_subprog(env, target_insn + 1);
+ 	if (subprog < 0) {
+ 		verbose(env, "verifier bug. No program starts at insn %d\n",
+ 			target_insn + 1);
+ 		return -EFAULT;
+ 	}
+ 
+ 	caller = state->frame[state->curframe];
+ 	if (state->frame[state->curframe + 1]) {
+ 		verbose(env, "verifier bug. Frame %d already allocated\n",
+ 			state->curframe + 1);
+ 		return -EFAULT;
+ 	}
+ 
+ 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+ 	if (!callee)
+ 		return -ENOMEM;
+ 	state->frame[state->curframe + 1] = callee;
+ 
+ 	/* callee cannot access r0, r6 - r9 for reading and has to write
+ 	 * into its own stack before reading from it.
+ 	 * callee can read/write into caller's stack
+ 	 */
+ 	init_func_state(env, callee,
+ 			/* remember the callsite, it will be used by bpf_exit */
+ 			*insn_idx /* callsite */,
+ 			state->curframe + 1 /* frameno within this callchain */,
+ 			subprog + 1 /* subprog number within this prog */);
+ 
+ 	/* copy r1 - r5 args that callee can access */
+ 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+ 		callee->regs[i] = caller->regs[i];
+ 
+ 	/* after the call regsiters r0 - r5 were scratched */
+ 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
+ 		mark_reg_not_init(env, caller->regs, caller_saved[i]);
+ 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+ 	}
+ 
+ 	/* only increment it after check_reg_arg() finished */
+ 	state->curframe++;
+ 
+ 	/* and go analyze first insn of the callee */
+ 	*insn_idx = target_insn;
+ 
+ 	if (env->log.level) {
+ 		verbose(env, "caller:\n");
+ 		print_verifier_state(env, caller);
+ 		verbose(env, "callee:\n");
+ 		print_verifier_state(env, callee);
+ 	}
+ 	return 0;
+ }
+ 
+ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
+ {
+ 	struct bpf_verifier_state *state = env->cur_state;
+ 	struct bpf_func_state *caller, *callee;
+ 	struct bpf_reg_state *r0;
+ 
+ 	callee = state->frame[state->curframe];
+ 	r0 = &callee->regs[BPF_REG_0];
+ 	if (r0->type == PTR_TO_STACK) {
+ 		/* technically it's ok to return caller's stack pointer
+ 		 * (or caller's caller's pointer) back to the caller,
+ 		 * since these pointers are valid. Only current stack
+ 		 * pointer will be invalid as soon as function exits,
+ 		 * but let's be conservative
+ 		 */
+ 		verbose(env, "cannot return stack pointer to the caller\n");
+ 		return -EINVAL;
+ 	}
+ 
+ 	state->curframe--;
+ 	caller = state->frame[state->curframe];
+ 	/* return to the caller whatever r0 had in the callee */
+ 	caller->regs[BPF_REG_0] = *r0;
+ 
+ 	*insn_idx = callee->callsite + 1;
+ 	if (env->log.level) {
+ 		verbose(env, "returning from callee:\n");
+ 		print_verifier_state(env, callee);
+ 		verbose(env, "to caller at %d:\n", *insn_idx);
+ 		print_verifier_state(env, caller);
+ 	}
+ 	/* clear everything in the callee */
+ 	free_func_state(callee);
+ 	state->frame[state->curframe + 1] = NULL;
+ 	return 0;
+ }
+ 
+ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
  {
  	const struct bpf_func_proto *fn = NULL;
  	struct bpf_reg_state *regs;
@@@ -1701,13 -2206,7 +2233,13 @@@
  		return -EINVAL;
  	}
  
 +	/* With LD_ABS/IND some JITs save/restore skb from r1. */
  	changes_data = bpf_helper_changes_pkt_data(fn->func);
 +	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
 +		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
 +			func_id_name(func_id), func_id);
 +		return -EINVAL;
 +	}
  
  	memset(&meta, 0, sizeof(meta));
  	meta.pkt_access = fn->pkt_access;
@@@ -1799,6 -2298,14 +2331,6 @@@
  	return 0;
  }
  
 -static void coerce_reg_to_32(struct bpf_reg_state *reg)
 -{
 -	/* clear high 32 bits */
 -	reg->var_off = tnum_cast(reg->var_off, 4);
 -	/* Update bounds */
 -	__update_reg_bounds(reg);
 -}
 -
  static bool signed_add_overflows(s64 a, s64 b)
  {
  	/* Do the add in u64, where overflow is well-defined */
@@@ -1819,41 -2326,6 +2351,41 @@@ static bool signed_sub_overflows(s64 a
  	return res > a;
  }
  
 +static bool check_reg_sane_offset(struct bpf_verifier_env *env,
 +				  const struct bpf_reg_state *reg,
 +				  enum bpf_reg_type type)
 +{
 +	bool known = tnum_is_const(reg->var_off);
 +	s64 val = reg->var_off.value;
 +	s64 smin = reg->smin_value;
 +
 +	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
 +		verbose(env, "math between %s pointer and %lld is not allowed\n",
 +			reg_type_str[type], val);
 +		return false;
 +	}
 +
 +	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
 +		verbose(env, "%s pointer offset %d is not allowed\n",
 +			reg_type_str[type], reg->off);
 +		return false;
 +	}
 +
 +	if (smin == S64_MIN) {
 +		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
 +			reg_type_str[type]);
 +		return false;
 +	}
 +
 +	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
 +		verbose(env, "value %lld makes %s pointer be out of bounds\n",
 +			smin, reg_type_str[type]);
 +		return false;
 +	}
 +
 +	return true;
 +}
 +
  /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
   * Caller should also handle BPF_MOV case separately.
   * If we return -EACCES, caller may want to try again treating pointer as a
@@@ -1864,7 -2336,9 +2396,9 @@@ static int adjust_ptr_min_max_vals(stru
  				   const struct bpf_reg_state *ptr_reg,
  				   const struct bpf_reg_state *off_reg)
  {
- 	struct bpf_reg_state *regs = cur_regs(env), *dst_reg;
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ 	struct bpf_reg_state *regs = state->regs, *dst_reg;
  	bool known = tnum_is_const(off_reg->var_off);
  	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
  	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
@@@ -1876,13 -2350,13 +2410,13 @@@
  	dst_reg = &regs[dst];
  
  	if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
- 		print_verifier_state(env, env->cur_state);
+ 		print_verifier_state(env, state);
  		verbose(env,
  			"verifier internal error: known but bad sbounds\n");
  		return -EINVAL;
  	}
  	if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
- 		print_verifier_state(env, env->cur_state);
+ 		print_verifier_state(env, state);
  		verbose(env,
  			"verifier internal error: known but bad ubounds\n");
  		return -EINVAL;
@@@ -1890,25 -2364,29 +2424,25 @@@
  
  	if (BPF_CLASS(insn->code) != BPF_ALU64) {
  		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
 -		if (!env->allow_ptr_leaks)
 -			verbose(env,
 -				"R%d 32-bit pointer arithmetic prohibited\n",
 -				dst);
 +		verbose(env,
 +			"R%d 32-bit pointer arithmetic prohibited\n",
 +			dst);
  		return -EACCES;
  	}
  
  	if (ptr_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
 -		if (!env->allow_ptr_leaks)
 -			verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
 -				dst);
 +		verbose(env, "R%d pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL prohibited, null-check it first\n",
 +			dst);
  		return -EACCES;
  	}
  	if (ptr_reg->type == CONST_PTR_TO_MAP) {
 -		if (!env->allow_ptr_leaks)
 -			verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
 -				dst);
 +		verbose(env, "R%d pointer arithmetic on CONST_PTR_TO_MAP prohibited\n",
 +			dst);
  		return -EACCES;
  	}
  	if (ptr_reg->type == PTR_TO_PACKET_END) {
 -		if (!env->allow_ptr_leaks)
 -			verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
 -				dst);
 +		verbose(env, "R%d pointer arithmetic on PTR_TO_PACKET_END prohibited\n",
 +			dst);
  		return -EACCES;
  	}
  
@@@ -1918,10 -2396,6 +2452,10 @@@
  	dst_reg->type = ptr_reg->type;
  	dst_reg->id = ptr_reg->id;
  
 +	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
 +	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
 +		return -EINVAL;
 +
  	switch (opcode) {
  	case BPF_ADD:
  		/* We can take a fixed offset as long as it doesn't overflow
@@@ -1975,8 -2449,9 +2509,8 @@@
  	case BPF_SUB:
  		if (dst_reg == off_reg) {
  			/* scalar -= pointer.  Creates an unknown scalar */
 -			if (!env->allow_ptr_leaks)
 -				verbose(env, "R%d tried to subtract pointer from scalar\n",
 -					dst);
 +			verbose(env, "R%d tried to subtract pointer from scalar\n",
 +				dst);
  			return -EACCES;
  		}
  		/* We don't allow subtraction from FP, because (according to
@@@ -1984,8 -2459,9 +2518,8 @@@
  		 * be able to deal with it.
  		 */
  		if (ptr_reg->type == PTR_TO_STACK) {
 -			if (!env->allow_ptr_leaks)
 -				verbose(env, "R%d subtraction from stack pointer prohibited\n",
 -					dst);
 +			verbose(env, "R%d subtraction from stack pointer prohibited\n",
 +				dst);
  			return -EACCES;
  		}
  		if (known && (ptr_reg->off - smin_val ==
@@@ -2034,30 -2510,28 +2568,30 @@@
  	case BPF_AND:
  	case BPF_OR:
  	case BPF_XOR:
 -		/* bitwise ops on pointers are troublesome, prohibit for now.
 -		 * (However, in principle we could allow some cases, e.g.
 -		 * ptr &= ~3 which would reduce min_value by 3.)
 -		 */
 -		if (!env->allow_ptr_leaks)
 -			verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
 -				dst, bpf_alu_string[opcode >> 4]);
 +		/* bitwise ops on pointers are troublesome, prohibit. */
 +		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
 +			dst, bpf_alu_string[opcode >> 4]);
  		return -EACCES;
  	default:
  		/* other operators (e.g. MUL,LSH) produce non-pointer results */
 -		if (!env->allow_ptr_leaks)
 -			verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
 -				dst, bpf_alu_string[opcode >> 4]);
 +		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
 +			dst, bpf_alu_string[opcode >> 4]);
  		return -EACCES;
  	}
  
 +	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
 +		return -EINVAL;
 +
  	__update_reg_bounds(dst_reg);
  	__reg_deduce_bounds(dst_reg);
  	__reg_bound_offset(dst_reg);
  	return 0;
  }
  
 +/* WARNING: This function does calculations on 64-bit values, but the actual
 + * execution may occur on 32-bit values. Therefore, things like bitshifts
 + * need extra checks in the 32-bit case.
 + */
  static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
  				      struct bpf_insn *insn,
  				      struct bpf_reg_state *dst_reg,
@@@ -2068,8 -2542,12 +2602,8 @@@
  	bool src_known, dst_known;
  	s64 smin_val, smax_val;
  	u64 umin_val, umax_val;
 +	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
  
 -	if (BPF_CLASS(insn->code) != BPF_ALU64) {
 -		/* 32-bit ALU ops are (32,32)->64 */
 -		coerce_reg_to_32(dst_reg);
 -		coerce_reg_to_32(&src_reg);
 -	}
  	smin_val = src_reg.smin_value;
  	smax_val = src_reg.smax_value;
  	umin_val = src_reg.umin_value;
@@@ -2077,12 -2555,6 +2611,12 @@@
  	src_known = tnum_is_const(src_reg.var_off);
  	dst_known = tnum_is_const(dst_reg->var_off);
  
 +	if (!src_known &&
 +	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
 +		__mark_reg_unknown(dst_reg);
 +		return 0;
 +	}
 +
  	switch (opcode) {
  	case BPF_ADD:
  		if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
@@@ -2211,9 -2683,9 +2745,9 @@@
  		__update_reg_bounds(dst_reg);
  		break;
  	case BPF_LSH:
 -		if (umax_val > 63) {
 -			/* Shifts greater than 63 are undefined.  This includes
 -			 * shifts by a negative number.
 +		if (umax_val >= insn_bitness) {
 +			/* Shifts greater than 31 or 63 are undefined.
 +			 * This includes shifts by a negative number.
  			 */
  			mark_reg_unknown(env, regs, insn->dst_reg);
  			break;
@@@ -2239,29 -2711,27 +2773,29 @@@
  		__update_reg_bounds(dst_reg);
  		break;
  	case BPF_RSH:
 -		if (umax_val > 63) {
 -			/* Shifts greater than 63 are undefined.  This includes
 -			 * shifts by a negative number.
 +		if (umax_val >= insn_bitness) {
 +			/* Shifts greater than 31 or 63 are undefined.
 +			 * This includes shifts by a negative number.
  			 */
  			mark_reg_unknown(env, regs, insn->dst_reg);
  			break;
  		}
 -		/* BPF_RSH is an unsigned shift, so make the appropriate casts */
 -		if (dst_reg->smin_value < 0) {
 -			if (umin_val) {
 -				/* Sign bit will be cleared */
 -				dst_reg->smin_value = 0;
 -			} else {
 -				/* Lost sign bit information */
 -				dst_reg->smin_value = S64_MIN;
 -				dst_reg->smax_value = S64_MAX;
 -			}
 -		} else {
 -			dst_reg->smin_value =
 -				(u64)(dst_reg->smin_value) >> umax_val;
 -		}
 +		/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
 +		 * be negative, then either:
 +		 * 1) src_reg might be zero, so the sign bit of the result is
 +		 *    unknown, so we lose our signed bounds
 +		 * 2) it's known negative, thus the unsigned bounds capture the
 +		 *    signed bounds
 +		 * 3) the signed bounds cross zero, so they tell us nothing
 +		 *    about the result
 +		 * If the value in dst_reg is known nonnegative, then again the
 +		 * unsigned bounts capture the signed bounds.
 +		 * Thus, in all cases it suffices to blow away our signed bounds
 +		 * and rely on inferring new ones from the unsigned bounds and
 +		 * var_off of the result.
 +		 */
 +		dst_reg->smin_value = S64_MIN;
 +		dst_reg->smax_value = S64_MAX;
  		if (src_known)
  			dst_reg->var_off = tnum_rshift(dst_reg->var_off,
  						       umin_val);
@@@ -2277,12 -2747,6 +2811,12 @@@
  		break;
  	}
  
 +	if (BPF_CLASS(insn->code) != BPF_ALU64) {
 +		/* 32-bit ALU ops are (32,32)->32 */
 +		coerce_reg_to_size(dst_reg, 4);
 +		coerce_reg_to_size(&src_reg, 4);
 +	}
 +
  	__reg_deduce_bounds(dst_reg);
  	__reg_bound_offset(dst_reg);
  	return 0;
@@@ -2294,9 -2758,12 +2828,11 @@@
  static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
  				   struct bpf_insn *insn)
  {
- 	struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
  	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
  	u8 opcode = BPF_OP(insn->code);
 -	int rc;
  
  	dst_reg = &regs[insn->dst_reg];
  	src_reg = NULL;
@@@ -2307,29 -2774,43 +2843,29 @@@
  		if (src_reg->type != SCALAR_VALUE) {
  			if (dst_reg->type != SCALAR_VALUE) {
  				/* Combining two pointers by any ALU op yields
 -				 * an arbitrary scalar.
 +				 * an arbitrary scalar. Disallow all math except
 +				 * pointer subtraction
  				 */
 -				if (!env->allow_ptr_leaks) {
 -					verbose(env, "R%d pointer %s pointer prohibited\n",
 -						insn->dst_reg,
 -						bpf_alu_string[opcode >> 4]);
 -					return -EACCES;
 +				if (opcode == BPF_SUB){
 +					mark_reg_unknown(env, regs, insn->dst_reg);
 +					return 0;
  				}
 -				mark_reg_unknown(env, regs, insn->dst_reg);
 -				return 0;
 +				verbose(env, "R%d pointer %s pointer prohibited\n",
 +					insn->dst_reg,
 +					bpf_alu_string[opcode >> 4]);
 +				return -EACCES;
  			} else {
  				/* scalar += pointer
  				 * This is legal, but we have to reverse our
  				 * src/dest handling in computing the range
  				 */
 -				rc = adjust_ptr_min_max_vals(env, insn,
 -							     src_reg, dst_reg);
 -				if (rc == -EACCES && env->allow_ptr_leaks) {
 -					/* scalar += unknown scalar */
 -					__mark_reg_unknown(&off_reg);
 -					return adjust_scalar_min_max_vals(
 -							env, insn,
 -							dst_reg, off_reg);
 -				}
 -				return rc;
 +				return adjust_ptr_min_max_vals(env, insn,
 +							       src_reg, dst_reg);
  			}
  		} else if (ptr_reg) {
  			/* pointer += scalar */
 -			rc = adjust_ptr_min_max_vals(env, insn,
 -						     dst_reg, src_reg);
 -			if (rc == -EACCES && env->allow_ptr_leaks) {
 -				/* unknown scalar += scalar */
 -				__mark_reg_unknown(dst_reg);
 -				return adjust_scalar_min_max_vals(
 -						env, insn, dst_reg, *src_reg);
 -			}
 -			return rc;
 +			return adjust_ptr_min_max_vals(env, insn,
 +						       dst_reg, src_reg);
  		}
  	} else {
  		/* Pretend the src is a reg with a known value, since we only
@@@ -2338,19 -2819,27 +2874,19 @@@
  		off_reg.type = SCALAR_VALUE;
  		__mark_reg_known(&off_reg, insn->imm);
  		src_reg = &off_reg;
 -		if (ptr_reg) { /* pointer += K */
 -			rc = adjust_ptr_min_max_vals(env, insn,
 -						     ptr_reg, src_reg);
 -			if (rc == -EACCES && env->allow_ptr_leaks) {
 -				/* unknown scalar += K */
 -				__mark_reg_unknown(dst_reg);
 -				return adjust_scalar_min_max_vals(
 -						env, insn, dst_reg, off_reg);
 -			}
 -			return rc;
 -		}
 +		if (ptr_reg) /* pointer += K */
 +			return adjust_ptr_min_max_vals(env, insn,
 +						       ptr_reg, src_reg);
  	}
  
  	/* Got here implies adding two SCALAR_VALUEs */
  	if (WARN_ON_ONCE(ptr_reg)) {
- 		print_verifier_state(env, env->cur_state);
+ 		print_verifier_state(env, state);
  		verbose(env, "verifier internal error: unexpected ptr_reg\n");
  		return -EINVAL;
  	}
  	if (WARN_ON(!src_reg)) {
- 		print_verifier_state(env, env->cur_state);
+ 		print_verifier_state(env, state);
  		verbose(env, "verifier internal error: no src_reg\n");
  		return -EINVAL;
  	}
@@@ -2437,20 -2926,17 +2973,20 @@@ static int check_alu_op(struct bpf_veri
  					return -EACCES;
  				}
  				mark_reg_unknown(env, regs, insn->dst_reg);
 -				/* high 32 bits are known zero. */
 -				regs[insn->dst_reg].var_off = tnum_cast(
 -						regs[insn->dst_reg].var_off, 4);
 -				__update_reg_bounds(&regs[insn->dst_reg]);
 +				coerce_reg_to_size(&regs[insn->dst_reg], 4);
  			}
  		} else {
  			/* case: R = imm
  			 * remember the value we stored into this reg
  			 */
  			regs[insn->dst_reg].type = SCALAR_VALUE;
 -			__mark_reg_known(regs + insn->dst_reg, insn->imm);
 +			if (BPF_CLASS(insn->code) == BPF_ALU64) {
 +				__mark_reg_known(regs + insn->dst_reg,
 +						 insn->imm);
 +			} else {
 +				__mark_reg_known(regs + insn->dst_reg,
 +						 (u32)insn->imm);
 +			}
  		}
  
  	} else if (opcode > BPF_END) {
@@@ -2507,14 -2993,15 +3043,15 @@@
  	return 0;
  }
  
- static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
  				   struct bpf_reg_state *dst_reg,
  				   enum bpf_reg_type type,
  				   bool range_right_open)
  {
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
  	struct bpf_reg_state *regs = state->regs, *reg;
  	u16 new_range;
- 	int i;
+ 	int i, j;
  
  	if (dst_reg->off < 0 ||
  	    (dst_reg->off == 0 && range_right_open))
@@@ -2584,12 -3071,15 +3121,15 @@@
  			/* keep the maximum range already checked */
  			regs[i].range = max(regs[i].range, new_range);
  
- 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- 		if (state->stack[i].slot_type[0] != STACK_SPILL)
- 			continue;
- 		reg = &state->stack[i].spilled_ptr;
- 		if (reg->type == type && reg->id == dst_reg->id)
- 			reg->range = max(reg->range, new_range);
+ 	for (j = 0; j <= vstate->curframe; j++) {
+ 		state = vstate->frame[j];
+ 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ 			if (state->stack[i].slot_type[0] != STACK_SPILL)
+ 				continue;
+ 			reg = &state->stack[i].spilled_ptr;
+ 			if (reg->type == type && reg->id == dst_reg->id)
+ 				reg->range = max(reg->range, new_range);
+ 		}
  	}
  }
  
@@@ -2827,20 -3317,24 +3367,24 @@@ static void mark_map_reg(struct bpf_reg
  /* The logic is similar to find_good_pkt_pointers(), both could eventually
   * be folded together at some point.
   */
- static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
+ static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
  			  bool is_null)
  {
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
  	struct bpf_reg_state *regs = state->regs;
  	u32 id = regs[regno].id;
- 	int i;
+ 	int i, j;
  
  	for (i = 0; i < MAX_BPF_REG; i++)
  		mark_map_reg(regs, i, id, is_null);
  
- 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- 		if (state->stack[i].slot_type[0] != STACK_SPILL)
- 			continue;
- 		mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
+ 	for (j = 0; j <= vstate->curframe; j++) {
+ 		state = vstate->frame[j];
+ 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ 			if (state->stack[i].slot_type[0] != STACK_SPILL)
+ 				continue;
+ 			mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
+ 		}
  	}
  }
  
@@@ -2940,8 -3434,10 +3484,10 @@@ static bool try_match_pkt_pointers(cons
  static int check_cond_jmp_op(struct bpf_verifier_env *env,
  			     struct bpf_insn *insn, int *insn_idx)
  {
- 	struct bpf_verifier_state *other_branch, *this_branch = env->cur_state;
- 	struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
+ 	struct bpf_verifier_state *this_branch = env->cur_state;
+ 	struct bpf_verifier_state *other_branch;
+ 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
+ 	struct bpf_reg_state *dst_reg, *other_branch_regs;
  	u8 opcode = BPF_OP(insn->code);
  	int err;
  
@@@ -2984,8 -3480,9 +3530,9 @@@
  	if (BPF_SRC(insn->code) == BPF_K &&
  	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
  	    dst_reg->type == SCALAR_VALUE &&
- 	    tnum_equals_const(dst_reg->var_off, insn->imm)) {
- 		if (opcode == BPF_JEQ) {
+ 	    tnum_is_const(dst_reg->var_off)) {
+ 		if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
+ 		    (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
  			/* if (imm == imm) goto pc+off;
  			 * only follow the goto, ignore fall-through
  			 */
@@@ -3003,6 -3500,7 +3550,7 @@@
  	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
  	if (!other_branch)
  		return -EFAULT;
+ 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
  
  	/* detect if we are comparing against a constant value so we can adjust
  	 * our min/max values for our dst register.
@@@ -3015,22 -3513,22 +3563,22 @@@
  		if (dst_reg->type == SCALAR_VALUE &&
  		    regs[insn->src_reg].type == SCALAR_VALUE) {
  			if (tnum_is_const(regs[insn->src_reg].var_off))
- 				reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
  						dst_reg, regs[insn->src_reg].var_off.value,
  						opcode);
  			else if (tnum_is_const(dst_reg->var_off))
- 				reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+ 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
  						    &regs[insn->src_reg],
  						    dst_reg->var_off.value, opcode);
  			else if (opcode == BPF_JEQ || opcode == BPF_JNE)
  				/* Comparing for equality, we can combine knowledge */
- 				reg_combine_min_max(&other_branch->regs[insn->src_reg],
- 						    &other_branch->regs[insn->dst_reg],
+ 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
+ 						    &other_branch_regs[insn->dst_reg],
  						    &regs[insn->src_reg],
  						    &regs[insn->dst_reg], opcode);
  		}
  	} else if (dst_reg->type == SCALAR_VALUE) {
- 		reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
  					dst_reg, insn->imm, opcode);
  	}
  
@@@ -3051,7 -3549,7 +3599,7 @@@
  		return -EACCES;
  	}
  	if (env->log.level)
- 		print_verifier_state(env, this_branch);
+ 		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
  	return 0;
  }
  
@@@ -3136,6 -3634,18 +3684,18 @@@ static int check_ld_abs(struct bpf_veri
  		return -EINVAL;
  	}
  
+ 	if (env->subprog_cnt) {
+ 		/* when program has LD_ABS insn JITs and interpreter assume
+ 		 * that r1 == ctx == skb which is not the case for callees
+ 		 * that can have arbitrary arguments. It's problematic
+ 		 * for main prog as well since JITs would need to analyze
+ 		 * all functions in order to make proper register save/restore
+ 		 * decisions in the main prog. Hence disallow LD_ABS with calls
+ 		 */
+ 		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
+ 		return -EINVAL;
+ 	}
+ 
  	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
  	    BPF_SIZE(insn->code) == BPF_DW ||
  	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
@@@ -3312,6 -3822,10 +3872,10 @@@ static int check_cfg(struct bpf_verifie
  	int ret = 0;
  	int i, t;
  
+ 	ret = check_subprogs(env);
+ 	if (ret < 0)
+ 		return ret;
+ 
  	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
  	if (!insn_state)
  		return -ENOMEM;
@@@ -3344,6 -3858,14 +3908,14 @@@ peek_stack
  				goto err_free;
  			if (t + 1 < insn_cnt)
  				env->explored_states[t + 1] = STATE_LIST_MARK;
+ 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
+ 				env->explored_states[t] = STATE_LIST_MARK;
+ 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
+ 				if (ret == 1)
+ 					goto peek_stack;
+ 				else if (ret < 0)
+ 					goto err_free;
+ 			}
  		} else if (opcode == BPF_JA) {
  			if (BPF_SRC(insns[t].code) != BPF_K) {
  				ret = -EINVAL;
@@@ -3462,11 -3984,21 +4034,21 @@@ static bool check_ids(u32 old_id, u32 c
  static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
  		    struct idpair *idmap)
  {
+ 	bool equal;
+ 
  	if (!(rold->live & REG_LIVE_READ))
  		/* explored state didn't use this */
  		return true;
  
- 	if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, live)) == 0)
+ 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
+ 
+ 	if (rold->type == PTR_TO_STACK)
+ 		/* two stack pointers are equal only if they're pointing to
+ 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
+ 		 */
+ 		return equal && rold->frameno == rcur->frameno;
+ 
+ 	if (equal)
  		return true;
  
  	if (rold->type == NOT_INIT)
@@@ -3481,14 -4013,15 +4063,14 @@@
  			return range_within(rold, rcur) &&
  			       tnum_in(rold->var_off, rcur->var_off);
  		} else {
 -			/* if we knew anything about the old value, we're not
 -			 * equal, because we can't know anything about the
 -			 * scalar value of the pointer in the new value.
 +			/* We're trying to use a pointer in place of a scalar.
 +			 * Even if the scalar was unbounded, this could lead to
 +			 * pointer leaks because scalars are allowed to leak
 +			 * while pointers are not. We could make this safe in
 +			 * special cases if root is calling us, but it's
 +			 * probably not worth the hassle.
  			 */
 -			return rold->umin_value == 0 &&
 -			       rold->umax_value == U64_MAX &&
 -			       rold->smin_value == S64_MIN &&
 -			       rold->smax_value == S64_MAX &&
 -			       tnum_is_unknown(rold->var_off);
 +			return false;
  		}
  	case PTR_TO_MAP_VALUE:
  		/* If the new min/max/var_off satisfy the old ones and
@@@ -3538,7 -4071,6 +4120,6 @@@
  		       tnum_in(rold->var_off, rcur->var_off);
  	case PTR_TO_CTX:
  	case CONST_PTR_TO_MAP:
- 	case PTR_TO_STACK:
  	case PTR_TO_PACKET_END:
  		/* Only valid matches are exact, which memcmp() above
  		 * would have accepted
@@@ -3553,8 -4085,8 +4134,8 @@@
  	return false;
  }
  
- static bool stacksafe(struct bpf_verifier_state *old,
- 		      struct bpf_verifier_state *cur,
+ static bool stacksafe(struct bpf_func_state *old,
+ 		      struct bpf_func_state *cur,
  		      struct idpair *idmap)
  {
  	int i, spi;
@@@ -3572,8 -4104,19 +4153,19 @@@
  	for (i = 0; i < old->allocated_stack; i++) {
  		spi = i / BPF_REG_SIZE;
  
+ 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
+ 			/* explored state didn't use this */
+ 			return true;
+ 
  		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
  			continue;
+ 		/* if old state was safe with misc data in the stack
+ 		 * it will be safe with zero-initialized stack.
+ 		 * The opposite is not true
+ 		 */
+ 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
+ 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
+ 			continue;
  		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
  		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
  			/* Ex: old explored (safe) state has STACK_SPILL in
@@@ -3630,9 -4173,8 +4222,8 @@@
   * whereas register type in current state is meaningful, it means that
   * the current state will reach 'bpf_exit' instruction safely
   */
- static bool states_equal(struct bpf_verifier_env *env,
- 			 struct bpf_verifier_state *old,
- 			 struct bpf_verifier_state *cur)
+ static bool func_states_equal(struct bpf_func_state *old,
+ 			      struct bpf_func_state *cur)
  {
  	struct idpair *idmap;
  	bool ret = false;
@@@ -3656,71 -4198,72 +4247,72 @@@ out_free
  	return ret;
  }
  
+ static bool states_equal(struct bpf_verifier_env *env,
+ 			 struct bpf_verifier_state *old,
+ 			 struct bpf_verifier_state *cur)
+ {
+ 	int i;
+ 
+ 	if (old->curframe != cur->curframe)
+ 		return false;
+ 
+ 	/* for states to be equal callsites have to be the same
+ 	 * and all frame states need to be equivalent
+ 	 */
+ 	for (i = 0; i <= old->curframe; i++) {
+ 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
+ 			return false;
+ 		if (!func_states_equal(old->frame[i], cur->frame[i]))
+ 			return false;
+ 	}
+ 	return true;
+ }
+ 
  /* A write screens off any subsequent reads; but write marks come from the
-  * straight-line code between a state and its parent.  When we arrive at a
-  * jump target (in the first iteration of the propagate_liveness() loop),
-  * we didn't arrive by the straight-line code, so read marks in state must
-  * propagate to parent regardless of state's write marks.
+  * straight-line code between a state and its parent.  When we arrive at an
+  * equivalent state (jump target or such) we didn't arrive by the straight-line
+  * code, so read marks in the state must propagate to the parent regardless
+  * of the state's write marks. That's what 'parent == state->parent' comparison
+  * in mark_reg_read() and mark_stack_slot_read() is for.
   */
- static bool do_propagate_liveness(const struct bpf_verifier_state *state,
- 				  struct bpf_verifier_state *parent)
+ static int propagate_liveness(struct bpf_verifier_env *env,
+ 			      const struct bpf_verifier_state *vstate,
+ 			      struct bpf_verifier_state *vparent)
  {
- 	bool writes = parent == state->parent; /* Observe write marks */
- 	bool touched = false; /* any changes made? */
- 	int i;
+ 	int i, frame, err = 0;
+ 	struct bpf_func_state *state, *parent;
  
- 	if (!parent)
- 		return touched;
+ 	if (vparent->curframe != vstate->curframe) {
+ 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
+ 		     vparent->curframe, vstate->curframe);
+ 		return -EFAULT;
+ 	}
  	/* Propagate read liveness of registers... */
  	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
  	/* We don't need to worry about FP liveness because it's read-only */
  	for (i = 0; i < BPF_REG_FP; i++) {
- 		if (parent->regs[i].live & REG_LIVE_READ)
+ 		if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
  			continue;
- 		if (writes && (state->regs[i].live & REG_LIVE_WRITTEN))
- 			continue;
- 		if (state->regs[i].live & REG_LIVE_READ) {
- 			parent->regs[i].live |= REG_LIVE_READ;
- 			touched = true;
+ 		if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
+ 			err = mark_reg_read(env, vstate, vparent, i);
+ 			if (err)
+ 				return err;
  		}
  	}
+ 
  	/* ... and stack slots */
- 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
- 		    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
- 		if (parent->stack[i].slot_type[0] != STACK_SPILL)
- 			continue;
- 		if (state->stack[i].slot_type[0] != STACK_SPILL)
- 			continue;
- 		if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
- 			continue;
- 		if (writes &&
- 		    (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN))
- 			continue;
- 		if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) {
- 			parent->stack[i].spilled_ptr.live |= REG_LIVE_READ;
- 			touched = true;
+ 	for (frame = 0; frame <= vstate->curframe; frame++) {
+ 		state = vstate->frame[frame];
+ 		parent = vparent->frame[frame];
+ 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
+ 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
+ 			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
+ 				continue;
+ 			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
+ 				mark_stack_slot_read(env, vstate, vparent, i, frame);
  		}
  	}
- 	return touched;
- }
- 
- /* "parent" is "a state from which we reach the current state", but initially
-  * it is not the state->parent (i.e. "the state whose straight-line code leads
-  * to the current state"), instead it is the state that happened to arrive at
-  * a (prunable) equivalent of the current state.  See comment above
-  * do_propagate_liveness() for consequences of this.
-  * This function is just a more efficient way of calling mark_reg_read() or
-  * mark_stack_slot_read() on each reg in "parent" that is read in "state",
-  * though it requires that parent != state->parent in the call arguments.
-  */
- static void propagate_liveness(const struct bpf_verifier_state *state,
- 			       struct bpf_verifier_state *parent)
- {
- 	while (do_propagate_liveness(state, parent)) {
- 		/* Something changed, so we need to feed those changes onward */
- 		state = parent;
- 		parent = state->parent;
- 	}
+ 	return err;
  }
  
  static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
@@@ -3728,7 -4271,7 +4320,7 @@@
  	struct bpf_verifier_state_list *new_sl;
  	struct bpf_verifier_state_list *sl;
  	struct bpf_verifier_state *cur = env->cur_state;
- 	int i, err;
+ 	int i, j, err;
  
  	sl = env->explored_states[insn_idx];
  	if (!sl)
@@@ -3749,7 -4292,9 +4341,9 @@@
  			 * they'll be immediately forgotten as we're pruning
  			 * this state and will pop a new one.
  			 */
- 			propagate_liveness(&sl->state, cur);
+ 			err = propagate_liveness(env, &sl->state, cur);
+ 			if (err)
+ 				return err;
  			return 1;
  		}
  		sl = sl->next;
@@@ -3757,9 -4302,10 +4351,10 @@@
  
  	/* there were no equivalent states, remember current one.
  	 * technically the current state is not proven to be safe yet,
- 	 * but it will either reach bpf_exit (which means it's safe) or
- 	 * it will be rejected. Since there are no loops, we won't be
- 	 * seeing this 'insn_idx' instruction again on the way to bpf_exit
+ 	 * but it will either reach outer most bpf_exit (which means it's safe)
+ 	 * or it will be rejected. Since there are no loops, we won't be
+ 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
+ 	 * again on the way to bpf_exit
  	 */
  	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
  	if (!new_sl)
@@@ -3783,10 -4329,15 +4378,15 @@@
  	 * explored_states can get read marks.)
  	 */
  	for (i = 0; i < BPF_REG_FP; i++)
- 		cur->regs[i].live = REG_LIVE_NONE;
- 	for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++)
- 		if (cur->stack[i].slot_type[0] == STACK_SPILL)
- 			cur->stack[i].spilled_ptr.live = REG_LIVE_NONE;
+ 		cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
+ 
+ 	/* all stack frames are accessible from callee, clear them all */
+ 	for (j = 0; j <= cur->curframe; j++) {
+ 		struct bpf_func_state *frame = cur->frame[j];
+ 
+ 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
+ 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
+ 	}
  	return 0;
  }
  
@@@ -3804,7 -4355,7 +4404,7 @@@ static int do_check(struct bpf_verifier
  	struct bpf_verifier_state *state;
  	struct bpf_insn *insns = env->prog->insnsi;
  	struct bpf_reg_state *regs;
- 	int insn_cnt = env->prog->len;
+ 	int insn_cnt = env->prog->len, i;
  	int insn_idx, prev_insn_idx = 0;
  	int insn_processed = 0;
  	bool do_print_state = false;
@@@ -3812,9 -4363,18 +4412,18 @@@
  	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
  	if (!state)
  		return -ENOMEM;
- 	env->cur_state = state;
- 	init_reg_state(env, state->regs);
+ 	state->curframe = 0;
  	state->parent = NULL;
+ 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
+ 	if (!state->frame[0]) {
+ 		kfree(state);
+ 		return -ENOMEM;
+ 	}
+ 	env->cur_state = state;
+ 	init_func_state(env, state->frame[0],
+ 			BPF_MAIN_FUNC /* callsite */,
+ 			0 /* frameno */,
+ 			0 /* subprogno, zero == main subprog */);
  	insn_idx = 0;
  	for (;;) {
  		struct bpf_insn *insn;
@@@ -3861,7 -4421,7 +4470,7 @@@
  			else
  				verbose(env, "\nfrom %d to %d:",
  					prev_insn_idx, insn_idx);
- 			print_verifier_state(env, state);
+ 			print_verifier_state(env, state->frame[state->curframe]);
  			do_print_state = false;
  		}
  
@@@ -3994,13 -4554,17 +4603,17 @@@
  			if (opcode == BPF_CALL) {
  				if (BPF_SRC(insn->code) != BPF_K ||
  				    insn->off != 0 ||
- 				    insn->src_reg != BPF_REG_0 ||
+ 				    (insn->src_reg != BPF_REG_0 &&
+ 				     insn->src_reg != BPF_PSEUDO_CALL) ||
  				    insn->dst_reg != BPF_REG_0) {
  					verbose(env, "BPF_CALL uses reserved fields\n");
  					return -EINVAL;
  				}
  
- 				err = check_call(env, insn->imm, insn_idx);
+ 				if (insn->src_reg == BPF_PSEUDO_CALL)
+ 					err = check_func_call(env, insn, &insn_idx);
+ 				else
+ 					err = check_helper_call(env, insn->imm, insn_idx);
  				if (err)
  					return err;
  
@@@ -4025,6 -4589,16 +4638,16 @@@
  					return -EINVAL;
  				}
  
+ 				if (state->curframe) {
+ 					/* exit from nested function */
+ 					prev_insn_idx = insn_idx;
+ 					err = prepare_func_exit(env, &insn_idx);
+ 					if (err)
+ 						return err;
+ 					do_print_state = true;
+ 					continue;
+ 				}
+ 
  				/* eBPF calling convetion is such that R0 is used
  				 * to return the value from eBPF program.
  				 * Make sure that it's readable at this time
@@@ -4085,8 -4659,16 +4708,16 @@@ process_bpf_exit
  		insn_idx++;
  	}
  
- 	verbose(env, "processed %d insns, stack depth %d\n", insn_processed,
- 		env->prog->aux->stack_depth);
+ 	verbose(env, "processed %d insns, stack depth ", insn_processed);
+ 	for (i = 0; i < env->subprog_cnt + 1; i++) {
+ 		u32 depth = env->subprog_stack_depth[i];
+ 
+ 		verbose(env, "%d", depth);
+ 		if (i + 1 < env->subprog_cnt + 1)
+ 			verbose(env, "+");
+ 	}
+ 	verbose(env, "\n");
+ 	env->prog->aux->stack_depth = env->subprog_stack_depth[0];
  	return 0;
  }
  
@@@ -4272,6 -4854,19 +4903,19 @@@ static int adjust_insn_aux_data(struct 
  	return 0;
  }
  
+ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
+ {
+ 	int i;
+ 
+ 	if (len == 1)
+ 		return;
+ 	for (i = 0; i < env->subprog_cnt; i++) {
+ 		if (env->subprog_starts[i] < off)
+ 			continue;
+ 		env->subprog_starts[i] += len - 1;
+ 	}
+ }
+ 
  static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
  					    const struct bpf_insn *patch, u32 len)
  {
@@@ -4282,6 -4877,7 +4926,7 @@@
  		return NULL;
  	if (adjust_insn_aux_data(env, new_prog->len, off, len))
  		return NULL;
+ 	adjust_subprog_starts(env, off, len);
  	return new_prog;
  }
  
@@@ -4416,6 -5012,150 +5061,150 @@@ static int convert_ctx_accesses(struct 
  	return 0;
  }
  
+ static int jit_subprogs(struct bpf_verifier_env *env)
+ {
+ 	struct bpf_prog *prog = env->prog, **func, *tmp;
+ 	int i, j, subprog_start, subprog_end = 0, len, subprog;
+ 	struct bpf_insn *insn = prog->insnsi;
+ 	void *old_bpf_func;
+ 	int err = -ENOMEM;
+ 
+ 	if (env->subprog_cnt == 0)
+ 		return 0;
+ 
+ 	for (i = 0; i < prog->len; i++, insn++) {
+ 		if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 		    insn->src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		subprog = find_subprog(env, i + insn->imm + 1);
+ 		if (subprog < 0) {
+ 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ 				  i + insn->imm + 1);
+ 			return -EFAULT;
+ 		}
+ 		/* temporarily remember subprog id inside insn instead of
+ 		 * aux_data, since next loop will split up all insns into funcs
+ 		 */
+ 		insn->off = subprog + 1;
+ 		/* remember original imm in case JIT fails and fallback
+ 		 * to interpreter will be needed
+ 		 */
+ 		env->insn_aux_data[i].call_imm = insn->imm;
+ 		/* point imm to __bpf_call_base+1 from JITs point of view */
+ 		insn->imm = 1;
+ 	}
+ 
+ 	func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
+ 	if (!func)
+ 		return -ENOMEM;
+ 
+ 	for (i = 0; i <= env->subprog_cnt; i++) {
+ 		subprog_start = subprog_end;
+ 		if (env->subprog_cnt == i)
+ 			subprog_end = prog->len;
+ 		else
+ 			subprog_end = env->subprog_starts[i];
+ 
+ 		len = subprog_end - subprog_start;
+ 		func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
+ 		if (!func[i])
+ 			goto out_free;
+ 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+ 		       len * sizeof(struct bpf_insn));
+ 		func[i]->len = len;
+ 		func[i]->is_func = 1;
+ 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
+ 		 * Long term would need debug info to populate names
+ 		 */
+ 		func[i]->aux->name[0] = 'F';
+ 		func[i]->aux->stack_depth = env->subprog_stack_depth[i];
+ 		func[i]->jit_requested = 1;
+ 		func[i] = bpf_int_jit_compile(func[i]);
+ 		if (!func[i]->jited) {
+ 			err = -ENOTSUPP;
+ 			goto out_free;
+ 		}
+ 		cond_resched();
+ 	}
+ 	/* at this point all bpf functions were successfully JITed
+ 	 * now populate all bpf_calls with correct addresses and
+ 	 * run last pass of JIT
+ 	 */
+ 	for (i = 0; i <= env->subprog_cnt; i++) {
+ 		insn = func[i]->insnsi;
+ 		for (j = 0; j < func[i]->len; j++, insn++) {
+ 			if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 			    insn->src_reg != BPF_PSEUDO_CALL)
+ 				continue;
+ 			subprog = insn->off;
+ 			insn->off = 0;
+ 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
+ 				func[subprog]->bpf_func -
+ 				__bpf_call_base;
+ 		}
+ 	}
+ 	for (i = 0; i <= env->subprog_cnt; i++) {
+ 		old_bpf_func = func[i]->bpf_func;
+ 		tmp = bpf_int_jit_compile(func[i]);
+ 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
+ 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
+ 			err = -EFAULT;
+ 			goto out_free;
+ 		}
+ 		cond_resched();
+ 	}
+ 
+ 	/* finally lock prog and jit images for all functions and
+ 	 * populate kallsysm
+ 	 */
+ 	for (i = 0; i <= env->subprog_cnt; i++) {
+ 		bpf_prog_lock_ro(func[i]);
+ 		bpf_prog_kallsyms_add(func[i]);
+ 	}
+ 	prog->jited = 1;
+ 	prog->bpf_func = func[0]->bpf_func;
+ 	prog->aux->func = func;
+ 	prog->aux->func_cnt = env->subprog_cnt + 1;
+ 	return 0;
+ out_free:
+ 	for (i = 0; i <= env->subprog_cnt; i++)
+ 		if (func[i])
+ 			bpf_jit_free(func[i]);
+ 	kfree(func);
+ 	/* cleanup main prog to be interpreted */
+ 	prog->jit_requested = 0;
+ 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ 		if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 		    insn->src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		insn->off = 0;
+ 		insn->imm = env->insn_aux_data[i].call_imm;
+ 	}
+ 	return err;
+ }
+ 
+ static int fixup_call_args(struct bpf_verifier_env *env)
+ {
+ 	struct bpf_prog *prog = env->prog;
+ 	struct bpf_insn *insn = prog->insnsi;
+ 	int i, depth;
+ 
+ 	if (env->prog->jit_requested)
+ 		if (jit_subprogs(env) == 0)
+ 			return 0;
+ 
+ 	for (i = 0; i < prog->len; i++, insn++) {
+ 		if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 		    insn->src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		depth = get_callee_stack_depth(env, insn, i);
+ 		if (depth < 0)
+ 			return depth;
+ 		bpf_patch_call_args(insn, depth);
+ 	}
+ 	return 0;
+ }
+ 
  /* fixup insn->imm field of bpf_call instructions
   * and inline eligible helpers as explicit sequence of BPF instructions
   *
@@@ -4435,11 -5175,15 +5224,15 @@@ static int fixup_bpf_calls(struct bpf_v
  	for (i = 0; i < insn_cnt; i++, insn++) {
  		if (insn->code != (BPF_JMP | BPF_CALL))
  			continue;
+ 		if (insn->src_reg == BPF_PSEUDO_CALL)
+ 			continue;
  
  		if (insn->imm == BPF_FUNC_get_route_realm)
  			prog->dst_needed = 1;
  		if (insn->imm == BPF_FUNC_get_prandom_u32)
  			bpf_user_rnd_init_once();
+ 		if (insn->imm == BPF_FUNC_override_return)
+ 			prog->kprobe_override = 1;
  		if (insn->imm == BPF_FUNC_tail_call) {
  			/* If we tail call into other programs, we
  			 * cannot make any assumptions since they can
@@@ -4462,7 -5206,7 +5255,7 @@@
  		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
  		 * handlers are currently limited to 64 bit only.
  		 */
- 		if (ebpf_jit_enabled() && BITS_PER_LONG == 64 &&
+ 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
  		    insn->imm == BPF_FUNC_map_lookup_elem) {
  			map_ptr = env->insn_aux_data[i + delta].map_ptr;
  			if (map_ptr == BPF_MAP_PTR_POISON ||
@@@ -4614,12 -5358,12 +5407,12 @@@ int bpf_check(struct bpf_prog **prog, u
  	if (!env->explored_states)
  		goto skip_full_check;
  
+ 	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+ 
  	ret = check_cfg(env);
  	if (ret < 0)
  		goto skip_full_check;
  
- 	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
- 
  	ret = do_check(env);
  	if (env->cur_state) {
  		free_verifier_state(env->cur_state, true);
@@@ -4640,6 -5384,9 +5433,9 @@@ skip_full_check
  	if (ret == 0)
  		ret = fixup_bpf_calls(env);
  
+ 	if (ret == 0)
+ 		ret = fixup_call_args(env);
+ 
  	if (log->level && bpf_verifier_log_full(log))
  		ret = -ENOSPC;
  	if (log->level && !log->ubuf) {
diff --combined kernel/events/core.c
index 4dd0e1ea876d,878d86c513d6..812f03e99ef3
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -4511,11 -4511,11 +4511,11 @@@ perf_read(struct file *file, char __use
  	return ret;
  }
  
 -static unsigned int perf_poll(struct file *file, poll_table *wait)
 +static __poll_t perf_poll(struct file *file, poll_table *wait)
  {
  	struct perf_event *event = file->private_data;
  	struct ring_buffer *rb;
 -	unsigned int events = POLLHUP;
 +	__poll_t events = POLLHUP;
  
  	poll_wait(file, &event->waitq, wait);
  
@@@ -4723,6 -4723,9 +4723,9 @@@ static long _perf_ioctl(struct perf_eve
  		rcu_read_unlock();
  		return 0;
  	}
+ 
+ 	case PERF_EVENT_IOC_QUERY_BPF:
+ 		return perf_event_query_prog_array(event, (void __user *)arg);
  	default:
  		return -ENOTTY;
  	}
@@@ -8080,6 -8083,13 +8083,13 @@@ static int perf_event_set_bpf_prog(stru
  		return -EINVAL;
  	}
  
+ 	/* Kprobe override only works for kprobes, not uprobes. */
+ 	if (prog->kprobe_override &&
+ 	    !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
+ 		bpf_prog_put(prog);
+ 		return -EINVAL;
+ 	}
+ 
  	if (is_tracepoint || is_syscall_tp) {
  		int off = trace_event_get_offsets(event->tp_event);
  
diff --combined kernel/module.c
index 8042b8fcbf14,bd695bfdc5c4..83075a104710
--- a/kernel/module.c
+++ b/kernel/module.c
@@@ -3118,7 -3118,11 +3118,11 @@@ static int find_module_sections(struct 
  					     sizeof(*mod->ftrace_callsites),
  					     &mod->num_ftrace_callsites);
  #endif
- 
+ #ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ 	mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
+ 					    sizeof(*mod->kprobe_ei_funcs),
+ 					    &mod->num_kprobe_ei_funcs);
+ #endif
  	mod->extable = section_objs(info, "__ex_table",
  				    sizeof(*mod->extable), &mod->num_exentries);
  
@@@ -3938,12 -3942,6 +3942,12 @@@ static const char *get_ksymbol(struct m
  	return symname(kallsyms, best);
  }
  
 +void * __weak dereference_module_function_descriptor(struct module *mod,
 +						     void *ptr)
 +{
 +	return ptr;
 +}
 +
  /* For kallsyms to ask for address resolution.  NULL means not found.  Careful
   * not to lock to avoid deadlock on oopses, simply disable preemption. */
  const char *module_address_lookup(unsigned long addr,
diff --combined net/atm/common.c
index 8f12f1c6fa14,5763fd241dc3..6523f38c4957
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@@ -14,7 -14,7 +14,7 @@@
  #include <linux/capability.h>
  #include <linux/mm.h>
  #include <linux/sched/signal.h>
- #include <linux/time.h>		/* struct timeval */
+ #include <linux/time64.h>	/* 64-bit time for seconds */
  #include <linux/skbuff.h>
  #include <linux/bitops.h>
  #include <linux/init.h>
@@@ -648,11 -648,11 +648,11 @@@ out
  	return error;
  }
  
 -unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
 +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	struct atm_vcc *vcc;
 -	unsigned int mask;
 +	__poll_t mask;
  
  	sock_poll_wait(file, sk_sleep(sk), wait);
  	mask = 0;
diff --combined net/batman-adv/icmp_socket.c
index a98e0a986cef,8041cf106c37..581375d0eed2
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0
  /* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
   *
   * Marek Lindner
@@@ -26,6 -27,7 +27,7 @@@
  #include <linux/export.h>
  #include <linux/fcntl.h>
  #include <linux/fs.h>
+ #include <linux/gfp.h>
  #include <linux/if_ether.h>
  #include <linux/kernel.h>
  #include <linux/list.h>
@@@ -42,11 -44,11 +44,11 @@@
  #include <linux/string.h>
  #include <linux/uaccess.h>
  #include <linux/wait.h>
+ #include <uapi/linux/batadv_packet.h>
  
  #include "hard-interface.h"
  #include "log.h"
  #include "originator.h"
- #include "packet.h"
  #include "send.h"
  
  static struct batadv_socket_client *batadv_socket_client_hash[256];
@@@ -55,6 -57,9 +57,9 @@@ static void batadv_socket_add_packet(st
  				     struct batadv_icmp_header *icmph,
  				     size_t icmp_len);
  
+ /**
+  * batadv_socket_init() - Initialize soft interface independent socket data
+  */
  void batadv_socket_init(void)
  {
  	memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash));
@@@ -292,7 -297,7 +297,7 @@@ out
  	return len;
  }
  
 -static unsigned int batadv_socket_poll(struct file *file, poll_table *wait)
 +static __poll_t batadv_socket_poll(struct file *file, poll_table *wait)
  {
  	struct batadv_socket_client *socket_client = file->private_data;
  
@@@ -314,6 -319,12 +319,12 @@@ static const struct file_operations bat
  	.llseek = no_llseek,
  };
  
+ /**
+  * batadv_socket_setup() - Create debugfs "socket" file
+  * @bat_priv: the bat priv with all the soft interface information
+  *
+  * Return: 0 on success or negative error number in case of failure
+  */
  int batadv_socket_setup(struct batadv_priv *bat_priv)
  {
  	struct dentry *d;
@@@ -333,7 -344,7 +344,7 @@@ err
  }
  
  /**
-  * batadv_socket_add_packet - schedule an icmp packet to be sent to
+  * batadv_socket_add_packet() - schedule an icmp packet to be sent to
   *  userspace on an icmp socket.
   * @socket_client: the socket this packet belongs to
   * @icmph: pointer to the header of the icmp packet
@@@ -390,7 -401,7 +401,7 @@@ static void batadv_socket_add_packet(st
  }
  
  /**
-  * batadv_socket_receive_packet - schedule an icmp packet to be received
+  * batadv_socket_receive_packet() - schedule an icmp packet to be received
   *  locally and sent to userspace.
   * @icmph: pointer to the header of the icmp packet
   * @icmp_len: total length of the icmp packet
diff --combined net/batman-adv/log.c
index 76451460c98d,da004980ab8b..9be74a44e99d
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0
  /* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
   *
   * Marek Lindner
@@@ -24,6 -25,7 +25,7 @@@
  #include <linux/export.h>
  #include <linux/fcntl.h>
  #include <linux/fs.h>
+ #include <linux/gfp.h>
  #include <linux/jiffies.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
@@@ -86,6 -88,13 +88,13 @@@ static int batadv_fdebug_log(struct bat
  	return 0;
  }
  
+ /**
+  * batadv_debug_log() - Add debug log entry
+  * @bat_priv: the bat priv with all the soft interface information
+  * @fmt: format string
+  *
+  * Return: 0 on success or negative error number in case of failure
+  */
  int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
  {
  	va_list args;
@@@ -176,7 -185,7 +185,7 @@@ static ssize_t batadv_log_read(struct f
  	return error;
  }
  
 -static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
 +static __poll_t batadv_log_poll(struct file *file, poll_table *wait)
  {
  	struct batadv_priv *bat_priv = file->private_data;
  	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
@@@ -197,6 -206,12 +206,12 @@@ static const struct file_operations bat
  	.llseek         = no_llseek,
  };
  
+ /**
+  * batadv_debug_log_setup() - Initialize debug log
+  * @bat_priv: the bat priv with all the soft interface information
+  *
+  * Return: 0 on success or negative error number in case of failure
+  */
  int batadv_debug_log_setup(struct batadv_priv *bat_priv)
  {
  	struct dentry *d;
@@@ -222,6 -237,10 +237,10 @@@ err
  	return -ENOMEM;
  }
  
+ /**
+  * batadv_debug_log_cleanup() - Destroy debug log
+  * @bat_priv: the bat priv with all the soft interface information
+  */
  void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
  {
  	kfree(bat_priv->debug_log);
diff --combined net/bluetooth/af_bluetooth.c
index 671b907ba678,f044202346c6..f897681780db
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@@ -421,7 -421,7 +421,7 @@@ out
  }
  EXPORT_SYMBOL(bt_sock_stream_recvmsg);
  
 -static inline unsigned int bt_accept_poll(struct sock *parent)
 +static inline __poll_t bt_accept_poll(struct sock *parent)
  {
  	struct bt_sock *s, *n;
  	struct sock *sk;
@@@ -437,11 -437,11 +437,11 @@@
  	return 0;
  }
  
 -unsigned int bt_sock_poll(struct file *file, struct socket *sock,
 +__poll_t bt_sock_poll(struct file *file, struct socket *sock,
  			  poll_table *wait)
  {
  	struct sock *sk = sock->sk;
 -	unsigned int mask = 0;
 +	__poll_t mask = 0;
  
  	BT_DBG("sock %p, sk %p", sock, sk);
  
@@@ -766,43 -766,39 +766,39 @@@ static int __init bt_init(void
  		return err;
  
  	err = sock_register(&bt_sock_family_ops);
- 	if (err < 0) {
- 		bt_sysfs_cleanup();
- 		return err;
- 	}
+ 	if (err)
+ 		goto cleanup_sysfs;
  
  	BT_INFO("HCI device and connection manager initialized");
  
  	err = hci_sock_init();
- 	if (err < 0)
- 		goto error;
+ 	if (err)
+ 		goto unregister_socket;
  
  	err = l2cap_init();
- 	if (err < 0)
- 		goto sock_err;
+ 	if (err)
+ 		goto cleanup_socket;
  
  	err = sco_init();
- 	if (err < 0) {
- 		l2cap_exit();
- 		goto sock_err;
- 	}
+ 	if (err)
+ 		goto cleanup_cap;
  
  	err = mgmt_init();
- 	if (err < 0) {
- 		sco_exit();
- 		l2cap_exit();
- 		goto sock_err;
- 	}
+ 	if (err)
+ 		goto cleanup_sco;
  
  	return 0;
  
- sock_err:
+ cleanup_sco:
+ 	sco_exit();
+ cleanup_cap:
+ 	l2cap_exit();
+ cleanup_socket:
  	hci_sock_cleanup();
- 
- error:
+ unregister_socket:
  	sock_unregister(PF_BLUETOOTH);
+ cleanup_sysfs:
  	bt_sysfs_cleanup();
- 
  	return err;
  }
  
diff --combined net/core/dev.c
index 01ee854454a8,c7db39926769..59ead3910ab7
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@@ -1542,6 -1542,23 +1542,23 @@@ void dev_disable_lro(struct net_device 
  }
  EXPORT_SYMBOL(dev_disable_lro);
  
+ /**
+  *	dev_disable_gro_hw - disable HW Generic Receive Offload on a device
+  *	@dev: device
+  *
+  *	Disable HW Generic Receive Offload (GRO_HW) on a net device.  Must be
+  *	called under RTNL.  This is needed if Generic XDP is installed on
+  *	the device.
+  */
+ static void dev_disable_gro_hw(struct net_device *dev)
+ {
+ 	dev->wanted_features &= ~NETIF_F_GRO_HW;
+ 	netdev_update_features(dev);
+ 
+ 	if (unlikely(dev->features & NETIF_F_GRO_HW))
+ 		netdev_WARN(dev, "failed to disable GRO_HW!\n");
+ }
+ 
  static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
  				   struct net_device *dev)
  {
@@@ -2803,7 -2820,7 +2820,7 @@@ struct sk_buff *__skb_gso_segment(struc
  
  	segs = skb_mac_gso_segment(skb, features);
  
- 	if (unlikely(skb_needs_check(skb, tx_path)))
+ 	if (unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
  		skb_warn_bad_offload(skb);
  
  	return segs;
@@@ -3162,6 -3179,21 +3179,21 @@@ static inline int __dev_xmit_skb(struc
  	int rc;
  
  	qdisc_calculate_pkt_len(skb, q);
+ 
+ 	if (q->flags & TCQ_F_NOLOCK) {
+ 		if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+ 			__qdisc_drop(skb, &to_free);
+ 			rc = NET_XMIT_DROP;
+ 		} else {
+ 			rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
+ 			__qdisc_run(q);
+ 		}
+ 
+ 		if (unlikely(to_free))
+ 			kfree_skb_list(to_free);
+ 		return rc;
+ 	}
+ 
  	/*
  	 * Heuristic to force contended enqueues to serialize on a
  	 * separate lock before trying to get qdisc main lock.
@@@ -3192,9 -3224,9 +3224,9 @@@
  				contended = false;
  			}
  			__qdisc_run(q);
- 		} else
- 			qdisc_run_end(q);
+ 		}
  
+ 		qdisc_run_end(q);
  		rc = NET_XMIT_SUCCESS;
  	} else {
  		rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
@@@ -3204,6 -3236,7 +3236,7 @@@
  				contended = false;
  			}
  			__qdisc_run(q);
+ 			qdisc_run_end(q);
  		}
  	}
  	spin_unlock(root_lock);
@@@ -3904,7 -3937,7 +3937,7 @@@ static u32 netif_receive_generic_xdp(st
  				     hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0,
  				     troom > 0 ? troom + 128 : 0, GFP_ATOMIC))
  			goto do_drop;
 -		if (troom > 0 && __skb_linearize(skb))
 +		if (skb_linearize(skb))
  			goto do_drop;
  	}
  
@@@ -4143,19 -4176,22 +4176,22 @@@ static __latent_entropy void net_tx_act
  
  		while (head) {
  			struct Qdisc *q = head;
- 			spinlock_t *root_lock;
+ 			spinlock_t *root_lock = NULL;
  
  			head = head->next_sched;
  
- 			root_lock = qdisc_lock(q);
- 			spin_lock(root_lock);
+ 			if (!(q->flags & TCQ_F_NOLOCK)) {
+ 				root_lock = qdisc_lock(q);
+ 				spin_lock(root_lock);
+ 			}
  			/* We need to make sure head->next_sched is read
  			 * before clearing __QDISC_STATE_SCHED
  			 */
  			smp_mb__before_atomic();
  			clear_bit(__QDISC_STATE_SCHED, &q->state);
  			qdisc_run(q);
- 			spin_unlock(root_lock);
+ 			if (root_lock)
+ 				spin_unlock(root_lock);
  		}
  	}
  }
@@@ -4545,6 -4581,7 +4581,7 @@@ static int generic_xdp_install(struct n
  		} else if (new && !old) {
  			static_key_slow_inc(&generic_xdp_needed);
  			dev_disable_lro(dev);
+ 			dev_disable_gro_hw(dev);
  		}
  		break;
  
@@@ -7073,17 -7110,21 +7110,21 @@@ int dev_change_proto_down(struct net_de
  }
  EXPORT_SYMBOL(dev_change_proto_down);
  
- u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id)
+ void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op,
+ 		     struct netdev_bpf *xdp)
  {
- 	struct netdev_bpf xdp;
- 
- 	memset(&xdp, 0, sizeof(xdp));
- 	xdp.command = XDP_QUERY_PROG;
+ 	memset(xdp, 0, sizeof(*xdp));
+ 	xdp->command = XDP_QUERY_PROG;
  
  	/* Query must always succeed. */
- 	WARN_ON(bpf_op(dev, &xdp) < 0);
- 	if (prog_id)
- 		*prog_id = xdp.prog_id;
+ 	WARN_ON(bpf_op(dev, xdp) < 0);
+ }
+ 
+ static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op)
+ {
+ 	struct netdev_bpf xdp;
+ 
+ 	__dev_xdp_query(dev, bpf_op, &xdp);
  
  	return xdp.prog_attached;
  }
@@@ -7106,6 -7147,27 +7147,27 @@@ static int dev_xdp_install(struct net_d
  	return bpf_op(dev, &xdp);
  }
  
+ static void dev_xdp_uninstall(struct net_device *dev)
+ {
+ 	struct netdev_bpf xdp;
+ 	bpf_op_t ndo_bpf;
+ 
+ 	/* Remove generic XDP */
+ 	WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL));
+ 
+ 	/* Remove from the driver */
+ 	ndo_bpf = dev->netdev_ops->ndo_bpf;
+ 	if (!ndo_bpf)
+ 		return;
+ 
+ 	__dev_xdp_query(dev, ndo_bpf, &xdp);
+ 	if (xdp.prog_attached == XDP_ATTACHED_NONE)
+ 		return;
+ 
+ 	/* Program removal should always succeed */
+ 	WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL));
+ }
+ 
  /**
   *	dev_change_xdp_fd - set or clear a bpf program for a device rx path
   *	@dev: device
@@@ -7134,10 -7196,10 +7196,10 @@@ int dev_change_xdp_fd(struct net_devic
  		bpf_chk = generic_xdp_install;
  
  	if (fd >= 0) {
- 		if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL))
+ 		if (bpf_chk && __dev_xdp_attached(dev, bpf_chk))
  			return -EEXIST;
  		if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) &&
- 		    __dev_xdp_attached(dev, bpf_op, NULL))
+ 		    __dev_xdp_attached(dev, bpf_op))
  			return -EBUSY;
  
  		prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP,
@@@ -7236,6 -7298,7 +7298,7 @@@ static void rollback_registered_many(st
  		/* Shutdown queueing discipline. */
  		dev_shutdown(dev);
  
+ 		dev_xdp_uninstall(dev);
  
  		/* Notify protocols, that we are about to destroy
  		 * this device. They should clean all the things.
@@@ -7379,6 -7442,18 +7442,18 @@@ static netdev_features_t netdev_fix_fea
  		features &= ~dev->gso_partial_features;
  	}
  
+ 	if (!(features & NETIF_F_RXCSUM)) {
+ 		/* NETIF_F_GRO_HW implies doing RXCSUM since every packet
+ 		 * successfully merged by hardware must also have the
+ 		 * checksum verified by hardware.  If the user does not
+ 		 * want to enable RXCSUM, logically, we should disable GRO_HW.
+ 		 */
+ 		if (features & NETIF_F_GRO_HW) {
+ 			netdev_dbg(dev, "Dropping NETIF_F_GRO_HW since no RXCSUM feature.\n");
+ 			features &= ~NETIF_F_GRO_HW;
+ 		}
+ 	}
+ 
  	return features;
  }
  
@@@ -8195,7 -8270,6 +8270,6 @@@ EXPORT_SYMBOL(alloc_netdev_mqs)
  void free_netdev(struct net_device *dev)
  {
  	struct napi_struct *p, *n;
- 	struct bpf_prog *prog;
  
  	might_sleep();
  	netif_free_tx_queues(dev);
@@@ -8214,12 -8288,6 +8288,6 @@@
  	free_percpu(dev->pcpu_refcnt);
  	dev->pcpu_refcnt = NULL;
  
- 	prog = rcu_dereference_protected(dev->xdp_prog, 1);
- 	if (prog) {
- 		bpf_prog_put(prog);
- 		static_key_slow_dec(&generic_xdp_needed);
- 	}
- 
  	/*  Compatibility with error handling in drivers */
  	if (dev->reg_state == NETREG_UNINITIALIZED) {
  		netdev_freemem(dev);
diff --combined net/core/sock.c
index 1211159718ad,72d14b221784..420c380bc61d
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@@ -145,6 -145,8 +145,8 @@@
  static DEFINE_MUTEX(proto_list_mutex);
  static LIST_HEAD(proto_list);
  
+ static void sock_inuse_add(struct net *net, int val);
+ 
  /**
   * sk_ns_capable - General socket capability test
   * @sk: Socket to use a capability on or through
@@@ -1531,8 -1533,11 +1533,11 @@@ struct sock *sk_alloc(struct net *net, 
  		sk->sk_kern_sock = kern;
  		sock_lock_init(sk);
  		sk->sk_net_refcnt = kern ? 0 : 1;
- 		if (likely(sk->sk_net_refcnt))
+ 		if (likely(sk->sk_net_refcnt)) {
  			get_net(net);
+ 			sock_inuse_add(net, 1);
+ 		}
+ 
  		sock_net_set(sk, net);
  		refcount_set(&sk->sk_wmem_alloc, 1);
  
@@@ -1595,6 -1600,9 +1600,9 @@@ void sk_destruct(struct sock *sk
  
  static void __sk_free(struct sock *sk)
  {
+ 	if (likely(sk->sk_net_refcnt))
+ 		sock_inuse_add(sock_net(sk), -1);
+ 
  	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
  		sock_diag_broadcast_destroy(sk);
  	else
@@@ -1716,6 -1724,8 +1724,8 @@@ struct sock *sk_clone_lock(const struc
  		newsk->sk_priority = 0;
  		newsk->sk_incoming_cpu = raw_smp_processor_id();
  		atomic64_set(&newsk->sk_cookie, 0);
+ 		if (likely(newsk->sk_net_refcnt))
+ 			sock_inuse_add(sock_net(newsk), 1);
  
  		/*
  		 * Before updating sk_refcnt, we must commit prior changes to memory
@@@ -2496,7 -2506,7 +2506,7 @@@ int sock_no_getname(struct socket *sock
  }
  EXPORT_SYMBOL(sock_no_getname);
  
 -unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
 +__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
  {
  	return 0;
  }
@@@ -3045,7 -3055,7 +3055,7 @@@ static DECLARE_BITMAP(proto_inuse_idx, 
  
  void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
  {
- 	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
+ 	__this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
  }
  EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
  
@@@ -3055,21 -3065,50 +3065,50 @@@ int sock_prot_inuse_get(struct net *net
  	int res = 0;
  
  	for_each_possible_cpu(cpu)
- 		res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
+ 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
  
  	return res >= 0 ? res : 0;
  }
  EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
  
+ static void sock_inuse_add(struct net *net, int val)
+ {
+ 	this_cpu_add(*net->core.sock_inuse, val);
+ }
+ 
+ int sock_inuse_get(struct net *net)
+ {
+ 	int cpu, res = 0;
+ 
+ 	for_each_possible_cpu(cpu)
+ 		res += *per_cpu_ptr(net->core.sock_inuse, cpu);
+ 
+ 	return res;
+ }
+ 
+ EXPORT_SYMBOL_GPL(sock_inuse_get);
+ 
  static int __net_init sock_inuse_init_net(struct net *net)
  {
- 	net->core.inuse = alloc_percpu(struct prot_inuse);
- 	return net->core.inuse ? 0 : -ENOMEM;
+ 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
+ 	if (net->core.prot_inuse == NULL)
+ 		return -ENOMEM;
+ 
+ 	net->core.sock_inuse = alloc_percpu(int);
+ 	if (net->core.sock_inuse == NULL)
+ 		goto out;
+ 
+ 	return 0;
+ 
+ out:
+ 	free_percpu(net->core.prot_inuse);
+ 	return -ENOMEM;
  }
  
  static void __net_exit sock_inuse_exit_net(struct net *net)
  {
- 	free_percpu(net->core.inuse);
+ 	free_percpu(net->core.prot_inuse);
+ 	free_percpu(net->core.sock_inuse);
  }
  
  static struct pernet_operations net_inuse_ops = {
@@@ -3112,6 -3151,10 +3151,10 @@@ static inline void assign_proto_idx(str
  static inline void release_proto_idx(struct proto *prot)
  {
  }
+ 
+ static void sock_inuse_add(struct net *net, int val)
+ {
+ }
  #endif
  
  static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
diff --combined net/dccp/proto.c
index 8b8db3d481bd,7a75a1d3568b..ada84f62b6bd
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@@ -110,7 -110,7 +110,7 @@@ void dccp_set_state(struct sock *sk, co
  	/* Change state AFTER socket is unhashed to avoid closed
  	 * socket sitting in hash tables.
  	 */
- 	sk->sk_state = state;
+ 	inet_sk_set_state(sk, state);
  }
  
  EXPORT_SYMBOL_GPL(dccp_set_state);
@@@ -318,10 -318,10 +318,10 @@@ EXPORT_SYMBOL_GPL(dccp_disconnect)
   *	take care of normal races (between the test and the event) and we don't
   *	go look at any of the socket buffers directly.
   */
 -unsigned int dccp_poll(struct file *file, struct socket *sock,
 +__poll_t dccp_poll(struct file *file, struct socket *sock,
  		       poll_table *wait)
  {
 -	unsigned int mask;
 +	__poll_t mask;
  	struct sock *sk = sock->sk;
  
  	sock_poll_wait(file, sk_sleep(sk), wait);
diff --combined net/ipv4/ip_gre.c
index 45ffd3d045d2,90c912307814..78365094f56c
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@@ -114,7 -114,8 +114,8 @@@ MODULE_PARM_DESC(log_ecn_error, "Log pa
  static struct rtnl_link_ops ipgre_link_ops __read_mostly;
  static int ipgre_tunnel_init(struct net_device *dev);
  static void erspan_build_header(struct sk_buff *skb,
- 				__be32 id, u32 index, bool truncate);
+ 				__be32 id, u32 index,
+ 				bool truncate, bool is_ipv4);
  
  static unsigned int ipgre_net_id __read_mostly;
  static unsigned int gre_tap_net_id __read_mostly;
@@@ -255,34 -256,43 +256,43 @@@ static int erspan_rcv(struct sk_buff *s
  {
  	struct net *net = dev_net(skb->dev);
  	struct metadata_dst *tun_dst = NULL;
+ 	struct erspan_base_hdr *ershdr;
+ 	struct erspan_metadata *pkt_md;
  	struct ip_tunnel_net *itn;
  	struct ip_tunnel *tunnel;
- 	struct erspanhdr *ershdr;
  	const struct iphdr *iph;
- 	__be32 index;
+ 	int ver;
  	int len;
  
  	itn = net_generic(net, erspan_net_id);
  	len = gre_hdr_len + sizeof(*ershdr);
  
+ 	/* Check based hdr len */
  	if (unlikely(!pskb_may_pull(skb, len)))
  		return PACKET_REJECT;
  
  	iph = ip_hdr(skb);
- 	ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len);
+ 	ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ 	ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
  
  	/* The original GRE header does not have key field,
  	 * Use ERSPAN 10-bit session ID as key.
  	 */
  	tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
- 	index = ershdr->md.index;
  	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
  				  tpi->flags | TUNNEL_KEY,
  				  iph->saddr, iph->daddr, tpi->key);
  
  	if (tunnel) {
+ 		len = gre_hdr_len + erspan_hdr_len(ver);
+ 		if (unlikely(!pskb_may_pull(skb, len)))
+ 			return PACKET_REJECT;
+ 
+ 		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ 		pkt_md = (struct erspan_metadata *)(ershdr + 1);
+ 
  		if (__iptunnel_pull_header(skb,
- 					   gre_hdr_len + sizeof(*ershdr),
+ 					   len,
  					   htons(ETH_P_TEB),
  					   false, false) < 0)
  			goto drop;
@@@ -303,15 -313,32 +313,32 @@@
  				return PACKET_REJECT;
  
  			md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
- 			if (!md)
+ 			if (!md) {
+ 				dst_release((struct dst_entry *)tun_dst);
  				return PACKET_REJECT;
+ 			}
+ 
+ 			memcpy(md, pkt_md, sizeof(*md));
+ 			md->version = ver;
  
- 			md->index = index;
  			info = &tun_dst->u.tun_info;
  			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
  			info->options_len = sizeof(*md);
  		} else {
- 			tunnel->index = ntohl(index);
+ 			tunnel->erspan_ver = ver;
+ 			if (ver == 1) {
+ 				tunnel->index = ntohl(pkt_md->u.index);
+ 			} else {
+ 				u16 md2_flags;
+ 				u16 dir, hwid;
+ 
+ 				md2_flags = ntohs(pkt_md->u.md2.flags);
+ 				dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
+ 				hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
+ 				tunnel->dir = dir;
+ 				tunnel->hwid = hwid;
+ 			}
+ 
  		}
  
  		skb_reset_mac_header(skb);
@@@ -405,14 -432,17 +432,17 @@@ static int gre_rcv(struct sk_buff *skb
  	if (hdr_len < 0)
  		goto drop;
  
- 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) {
+ 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
  		if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
  			return 0;
+ 		goto out;
  	}
  
  	if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
  		return 0;
  
+ out:
  	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
  drop:
  	kfree_skb(skb);
@@@ -560,6 -590,7 +590,7 @@@ static void erspan_fb_xmit(struct sk_bu
  	bool truncate = false;
  	struct flowi4 fl;
  	int tunnel_hlen;
+ 	int version;
  	__be16 df;
  
  	tun_info = skb_tunnel_info(skb);
@@@ -568,9 -599,13 +599,13 @@@
  		goto err_free_skb;
  
  	key = &tun_info->key;
+ 	md = ip_tunnel_info_opts(tun_info);
+ 	if (!md)
+ 		goto err_free_rt;
  
  	/* ERSPAN has fixed 8 byte GRE header */
- 	tunnel_hlen = 8 + sizeof(struct erspanhdr);
+ 	version = md->version;
+ 	tunnel_hlen = 8 + erspan_hdr_len(version);
  
  	rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen);
  	if (!rt)
@@@ -584,12 -619,23 +619,23 @@@
  		truncate = true;
  	}
  
- 	md = ip_tunnel_info_opts(tun_info);
- 	if (!md)
- 		goto err_free_rt;
+ 	if (version == 1) {
+ 		erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
+ 				    ntohl(md->u.index), truncate, true);
+ 	} else if (version == 2) {
+ 		u16 md2_flags;
+ 		u8 direction;
+ 		u16 hwid;
  
- 	erspan_build_header(skb, tunnel_id_to_key32(key->tun_id),
- 			    ntohl(md->index), truncate);
+ 		md2_flags = ntohs(md->u.md2.flags);
+ 		direction = (md2_flags & DIR_MASK) >> DIR_OFFSET;
+ 		hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
+ 
+ 		erspan_build_header_v2(skb, tunnel_id_to_key32(key->tun_id),
+ 				       direction, hwid,	truncate, true);
+ 	} else {
+ 		goto err_free_rt;
+ 	}
  
  	gre_build_header(skb, 8, TUNNEL_SEQ,
  			 htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++));
@@@ -668,52 -714,6 +714,6 @@@ free_skb
  	return NETDEV_TX_OK;
  }
  
- static inline u8 tos_to_cos(u8 tos)
- {
- 	u8 dscp, cos;
- 
- 	dscp = tos >> 2;
- 	cos = dscp >> 3;
- 	return cos;
- }
- 
- static void erspan_build_header(struct sk_buff *skb,
- 				__be32 id, u32 index, bool truncate)
- {
- 	struct iphdr *iphdr = ip_hdr(skb);
- 	struct ethhdr *eth = eth_hdr(skb);
- 	enum erspan_encap_type enc_type;
- 	struct erspanhdr *ershdr;
- 	struct qtag_prefix {
- 		__be16 eth_type;
- 		__be16 tci;
- 	} *qp;
- 	u16 vlan_tci = 0;
- 
- 	enc_type = ERSPAN_ENCAP_NOVLAN;
- 
- 	/* If mirrored packet has vlan tag, extract tci and
- 	 *  perserve vlan header in the mirrored frame.
- 	 */
- 	if (eth->h_proto == htons(ETH_P_8021Q)) {
- 		qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN);
- 		vlan_tci = ntohs(qp->tci);
- 		enc_type = ERSPAN_ENCAP_INFRAME;
- 	}
- 
- 	skb_push(skb, sizeof(*ershdr));
- 	ershdr = (struct erspanhdr *)skb->data;
- 	memset(ershdr, 0, sizeof(*ershdr));
- 
- 	ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) |
- 				 (ERSPAN_VERSION << VER_OFFSET));
- 	ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) |
- 			   ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) |
- 			   (enc_type << EN_OFFSET & EN_MASK) |
- 			   ((truncate << T_OFFSET) & T_MASK));
- 	ershdr->md.index = htonl(index & INDEX_MASK);
- }
- 
  static netdev_tx_t erspan_xmit(struct sk_buff *skb,
  			       struct net_device *dev)
  {
@@@ -737,7 -737,14 +737,14 @@@
  	}
  
  	/* Push ERSPAN header */
- 	erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate);
+ 	if (tunnel->erspan_ver == 1)
+ 		erspan_build_header(skb, tunnel->parms.o_key, tunnel->index,
+ 				    truncate, true);
+ 	else
+ 		erspan_build_header_v2(skb, tunnel->parms.o_key,
+ 				       tunnel->dir, tunnel->hwid,
+ 				       truncate, true);
+ 
  	tunnel->parms.o_flags &= ~TUNNEL_KEY;
  	__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN));
  	return NETDEV_TX_OK;
@@@ -1209,13 -1216,32 +1216,32 @@@ static int ipgre_netlink_parms(struct n
  	if (data[IFLA_GRE_FWMARK])
  		*fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
  
- 	if (data[IFLA_GRE_ERSPAN_INDEX]) {
- 		t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ 	if (data[IFLA_GRE_ERSPAN_VER]) {
+ 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
  
- 		if (t->index & ~INDEX_MASK)
+ 		if (t->erspan_ver != 1 && t->erspan_ver != 2)
  			return -EINVAL;
  	}
  
+ 	if (t->erspan_ver == 1) {
+ 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ 			t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ 			if (t->index & ~INDEX_MASK)
+ 				return -EINVAL;
+ 		}
+ 	} else if (t->erspan_ver == 2) {
+ 		if (data[IFLA_GRE_ERSPAN_DIR]) {
+ 			t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ 			if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
+ 				return -EINVAL;
+ 		}
+ 		if (data[IFLA_GRE_ERSPAN_HWID]) {
+ 			t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ 			if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
+ 				return -EINVAL;
+ 		}
+ 	}
+ 
  	return 0;
  }
  
@@@ -1282,7 -1308,7 +1308,7 @@@ static int erspan_tunnel_init(struct ne
  	tunnel->tun_hlen = 8;
  	tunnel->parms.iph.protocol = IPPROTO_GRE;
  	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
- 		       sizeof(struct erspanhdr);
+ 		       erspan_hdr_len(tunnel->erspan_ver);
  	t_hlen = tunnel->hlen + sizeof(struct iphdr);
  
  	dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4;
@@@ -1310,7 -1336,6 +1336,7 @@@ static const struct net_device_ops ersp
  static void ipgre_tap_setup(struct net_device *dev)
  {
  	ether_setup(dev);
 +	dev->max_mtu = 0;
  	dev->netdev_ops	= &gre_tap_netdev_ops;
  	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
  	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
@@@ -1413,6 -1438,12 +1439,12 @@@ static size_t ipgre_get_size(const stru
  		nla_total_size(4) +
  		/* IFLA_GRE_ERSPAN_INDEX */
  		nla_total_size(4) +
+ 		/* IFLA_GRE_ERSPAN_VER */
+ 		nla_total_size(1) +
+ 		/* IFLA_GRE_ERSPAN_DIR */
+ 		nla_total_size(1) +
+ 		/* IFLA_GRE_ERSPAN_HWID */
+ 		nla_total_size(2) +
  		0;
  }
  
@@@ -1455,9 -1486,18 +1487,18 @@@ static int ipgre_fill_info(struct sk_bu
  			goto nla_put_failure;
  	}
  
- 	if (t->index)
+ 	if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+ 		goto nla_put_failure;
+ 
+ 	if (t->erspan_ver == 1) {
  		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
  			goto nla_put_failure;
+ 	} else if (t->erspan_ver == 2) {
+ 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+ 			goto nla_put_failure;
+ 		if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+ 			goto nla_put_failure;
+ 	}
  
  	return 0;
  
@@@ -1493,6 -1533,9 +1534,9 @@@ static const struct nla_policy ipgre_po
  	[IFLA_GRE_IGNORE_DF]	= { .type = NLA_U8 },
  	[IFLA_GRE_FWMARK]	= { .type = NLA_U32 },
  	[IFLA_GRE_ERSPAN_INDEX]	= { .type = NLA_U32 },
+ 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
+ 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
+ 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
  };
  
  static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
diff --combined net/ipv4/tcp.c
index c4a7ee7f6721,67d39b79c801..ca042cdf8496
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@@ -283,8 -283,6 +283,6 @@@
  #include <asm/ioctls.h>
  #include <net/busy_poll.h>
  
- #include <trace/events/tcp.h>
- 
  struct percpu_counter tcp_orphan_count;
  EXPORT_SYMBOL_GPL(tcp_orphan_count);
  
@@@ -493,9 -491,9 +491,9 @@@ static void tcp_tx_timestamp(struct soc
   *	take care of normal races (between the test and the event) and we don't
   *	go look at any of the socket buffers directly.
   */
 -unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 +__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
  {
 -	unsigned int mask;
 +	__poll_t mask;
  	struct sock *sk = sock->sk;
  	const struct tcp_sock *tp = tcp_sk(sk);
  	int state;
@@@ -504,7 -502,7 +502,7 @@@
  
  	sock_poll_wait(file, sk_sleep(sk), wait);
  
- 	state = sk_state_load(sk);
+ 	state = inet_sk_state_load(sk);
  	if (state == TCP_LISTEN)
  		return inet_csk_listen_poll(sk);
  
@@@ -2040,8 -2038,6 +2038,6 @@@ void tcp_set_state(struct sock *sk, in
  {
  	int oldstate = sk->sk_state;
  
- 	trace_tcp_set_state(sk, oldstate, state);
- 
  	switch (state) {
  	case TCP_ESTABLISHED:
  		if (oldstate != TCP_ESTABLISHED)
@@@ -2065,7 -2061,7 +2061,7 @@@
  	/* Change state AFTER socket is unhashed to avoid closed
  	 * socket sitting in hash tables.
  	 */
- 	sk_state_store(sk, state);
+ 	inet_sk_state_store(sk, state);
  
  #ifdef STATE_TRACE
  	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@@ -2920,7 -2916,7 +2916,7 @@@ void tcp_get_info(struct sock *sk, stru
  	if (sk->sk_type != SOCK_STREAM)
  		return;
  
- 	info->tcpi_state = sk_state_load(sk);
+ 	info->tcpi_state = inet_sk_state_load(sk);
  
  	/* Report meaningful fields for all TCP states, including listeners */
  	rate = READ_ONCE(sk->sk_pacing_rate);
@@@ -3578,6 -3574,9 +3574,9 @@@ void __init tcp_init(void
  	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
  	percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
  	inet_hashinfo_init(&tcp_hashinfo);
+ 	inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
+ 			    thash_entries, 21,  /* one slot per 2 MB*/
+ 			    0, 64 * 1024);
  	tcp_hashinfo.bind_bucket_cachep =
  		kmem_cache_create("tcp_bind_bucket",
  				  sizeof(struct inet_bind_bucket), 0,
diff --combined net/ipv4/udp.c
index ef45adfc0edb,e9c0d1e1772e..0942a5f43ea5
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -357,18 -357,12 +357,12 @@@ fail
  }
  EXPORT_SYMBOL(udp_lib_get_port);
  
- static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
- 			      unsigned int port)
- {
- 	return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
- }
- 
  int udp_v4_get_port(struct sock *sk, unsigned short snum)
  {
  	unsigned int hash2_nulladdr =
- 		udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
+ 		ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
  	unsigned int hash2_partial =
- 		udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
+ 		ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
  
  	/* precompute partial secondary hash */
  	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@@ -445,7 -439,7 +439,7 @@@ static struct sock *udp4_lib_lookup2(st
  				     struct sk_buff *skb)
  {
  	struct sock *sk, *result;
- 	int score, badness, matches = 0, reuseport = 0;
+ 	int score, badness;
  	u32 hash = 0;
  
  	result = NULL;
@@@ -454,23 -448,16 +448,16 @@@
  		score = compute_score(sk, net, saddr, sport,
  				      daddr, hnum, dif, sdif, exact_dif);
  		if (score > badness) {
- 			reuseport = sk->sk_reuseport;
- 			if (reuseport) {
+ 			if (sk->sk_reuseport) {
  				hash = udp_ehashfn(net, daddr, hnum,
  						   saddr, sport);
  				result = reuseport_select_sock(sk, hash, skb,
  							sizeof(struct udphdr));
  				if (result)
  					return result;
- 				matches = 1;
  			}
  			badness = score;
  			result = sk;
- 		} else if (score == badness && reuseport) {
- 			matches++;
- 			if (reciprocal_scale(hash, matches) == 0)
- 				result = sk;
- 			hash = next_pseudo_random32(hash);
  		}
  	}
  	return result;
@@@ -488,11 -475,11 +475,11 @@@ struct sock *__udp4_lib_lookup(struct n
  	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
  	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
  	bool exact_dif = udp_lib_exact_dif_match(net, skb);
- 	int score, badness, matches = 0, reuseport = 0;
+ 	int score, badness;
  	u32 hash = 0;
  
  	if (hslot->count > 10) {
- 		hash2 = udp4_portaddr_hash(net, daddr, hnum);
+ 		hash2 = ipv4_portaddr_hash(net, daddr, hnum);
  		slot2 = hash2 & udptable->mask;
  		hslot2 = &udptable->hash2[slot2];
  		if (hslot->count < hslot2->count)
@@@ -503,7 -490,7 +490,7 @@@
  					  exact_dif, hslot2, skb);
  		if (!result) {
  			unsigned int old_slot2 = slot2;
- 			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ 			hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
  			slot2 = hash2 & udptable->mask;
  			/* avoid searching the same slot again. */
  			if (unlikely(slot2 == old_slot2))
@@@ -526,23 -513,16 +513,16 @@@ begin
  		score = compute_score(sk, net, saddr, sport,
  				      daddr, hnum, dif, sdif, exact_dif);
  		if (score > badness) {
- 			reuseport = sk->sk_reuseport;
- 			if (reuseport) {
+ 			if (sk->sk_reuseport) {
  				hash = udp_ehashfn(net, daddr, hnum,
  						   saddr, sport);
  				result = reuseport_select_sock(sk, hash, skb,
  							sizeof(struct udphdr));
  				if (result)
  					return result;
- 				matches = 1;
  			}
  			result = sk;
  			badness = score;
- 		} else if (score == badness && reuseport) {
- 			matches++;
- 			if (reciprocal_scale(hash, matches) == 0)
- 				result = sk;
- 			hash = next_pseudo_random32(hash);
  		}
  	}
  	return result;
@@@ -1775,7 -1755,7 +1755,7 @@@ EXPORT_SYMBOL(udp_lib_rehash)
  
  static void udp_v4_rehash(struct sock *sk)
  {
- 	u16 new_hash = udp4_portaddr_hash(sock_net(sk),
+ 	u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
  					  inet_sk(sk)->inet_rcv_saddr,
  					  inet_sk(sk)->inet_num);
  	udp_lib_rehash(sk, new_hash);
@@@ -1966,9 -1946,9 +1946,9 @@@ static int __udp4_lib_mcast_deliver(str
  	struct sk_buff *nskb;
  
  	if (use_hash2) {
- 		hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
+ 		hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
  			    udptable->mask;
- 		hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask;
+ 		hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
  start_lookup:
  		hslot = &udptable->hash2[hash2];
  		offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@@ -2200,7 -2180,7 +2180,7 @@@ static struct sock *__udp4_lib_demux_lo
  					    int dif, int sdif)
  {
  	unsigned short hnum = ntohs(loc_port);
- 	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
+ 	unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
  	unsigned int slot2 = hash2 & udp_table.mask;
  	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
  	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
@@@ -2502,9 -2482,9 +2482,9 @@@ int compat_udp_getsockopt(struct sock *
   *	but then block when reading it. Add special case code
   *	to work around these arguably broken applications.
   */
 -unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
  {
 -	unsigned int mask = datagram_poll(file, sock, wait);
 +	__poll_t mask = datagram_poll(file, sock, wait);
  	struct sock *sk = sock->sk;
  
  	if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
diff --combined net/ipv6/ip6_gre.c
index 416c8913f132,8451d00b210b..97f148f15429
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@@ -55,6 -55,8 +55,8 @@@
  #include <net/ip6_route.h>
  #include <net/ip6_tunnel.h>
  #include <net/gre.h>
+ #include <net/erspan.h>
+ #include <net/dst_metadata.h>
  
  
  static bool log_ecn_error = true;
@@@ -68,11 -70,13 +70,13 @@@ static unsigned int ip6gre_net_id __rea
  struct ip6gre_net {
  	struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
  
+ 	struct ip6_tnl __rcu *collect_md_tun;
  	struct net_device *fb_tunnel_dev;
  };
  
  static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
  static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
+ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly;
  static int ip6gre_tunnel_init(struct net_device *dev);
  static void ip6gre_tunnel_setup(struct net_device *dev);
  static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@@ -121,7 -125,8 +125,8 @@@ static struct ip6_tnl *ip6gre_tunnel_lo
  	unsigned int h1 = HASH_KEY(key);
  	struct ip6_tnl *t, *cand = NULL;
  	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- 	int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+ 	int dev_type = (gre_proto == htons(ETH_P_TEB) ||
+ 			gre_proto == htons(ETH_P_ERSPAN)) ?
  		       ARPHRD_ETHER : ARPHRD_IP6GRE;
  	int score, cand_score = 4;
  
@@@ -226,6 -231,10 +231,10 @@@
  	if (cand)
  		return cand;
  
+ 	t = rcu_dereference(ign->collect_md_tun);
+ 	if (t && t->dev->flags & IFF_UP)
+ 		return t;
+ 
  	dev = ign->fb_tunnel_dev;
  	if (dev->flags & IFF_UP)
  		return netdev_priv(dev);
@@@ -261,6 -270,9 +270,9 @@@ static void ip6gre_tunnel_link(struct i
  {
  	struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
  
+ 	if (t->parms.collect_md)
+ 		rcu_assign_pointer(ign->collect_md_tun, t);
+ 
  	rcu_assign_pointer(t->next, rtnl_dereference(*tp));
  	rcu_assign_pointer(*tp, t);
  }
@@@ -270,6 -282,9 +282,9 @@@ static void ip6gre_tunnel_unlink(struc
  	struct ip6_tnl __rcu **tp;
  	struct ip6_tnl *iter;
  
+ 	if (t->parms.collect_md)
+ 		rcu_assign_pointer(ign->collect_md_tun, NULL);
+ 
  	for (tp = ip6gre_bucket(ign, t);
  	     (iter = rtnl_dereference(*tp)) != NULL;
  	     tp = &iter->next) {
@@@ -460,7 -475,111 +475,111 @@@ static int ip6gre_rcv(struct sk_buff *s
  				      &ipv6h->saddr, &ipv6h->daddr, tpi->key,
  				      tpi->proto);
  	if (tunnel) {
- 		ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ 		if (tunnel->parms.collect_md) {
+ 			struct metadata_dst *tun_dst;
+ 			__be64 tun_id;
+ 			__be16 flags;
+ 
+ 			flags = tpi->flags;
+ 			tun_id = key32_to_tunnel_id(tpi->key);
+ 
+ 			tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
+ 			if (!tun_dst)
+ 				return PACKET_REJECT;
+ 
+ 			ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+ 		} else {
+ 			ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ 		}
+ 
+ 		return PACKET_RCVD;
+ 	}
+ 
+ 	return PACKET_REJECT;
+ }
+ 
+ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
+ 			 struct tnl_ptk_info *tpi)
+ {
+ 	struct erspan_base_hdr *ershdr;
+ 	struct erspan_metadata *pkt_md;
+ 	const struct ipv6hdr *ipv6h;
+ 	struct ip6_tnl *tunnel;
+ 	u8 ver;
+ 
+ 	if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ 		return PACKET_REJECT;
+ 
+ 	ipv6h = ipv6_hdr(skb);
+ 	ershdr = (struct erspan_base_hdr *)skb->data;
+ 	ver = (ntohs(ershdr->ver_vlan) & VER_MASK) >> VER_OFFSET;
+ 	tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK);
+ 
+ 	tunnel = ip6gre_tunnel_lookup(skb->dev,
+ 				      &ipv6h->saddr, &ipv6h->daddr, tpi->key,
+ 				      tpi->proto);
+ 	if (tunnel) {
+ 		int len = erspan_hdr_len(ver);
+ 
+ 		if (unlikely(!pskb_may_pull(skb, len)))
+ 			return PACKET_REJECT;
+ 
+ 		ershdr = (struct erspan_base_hdr *)skb->data;
+ 		pkt_md = (struct erspan_metadata *)(ershdr + 1);
+ 
+ 		if (__iptunnel_pull_header(skb, len,
+ 					   htons(ETH_P_TEB),
+ 					   false, false) < 0)
+ 			return PACKET_REJECT;
+ 
+ 		if (tunnel->parms.collect_md) {
+ 			struct metadata_dst *tun_dst;
+ 			struct ip_tunnel_info *info;
+ 			struct erspan_metadata *md;
+ 			__be64 tun_id;
+ 			__be16 flags;
+ 
+ 			tpi->flags |= TUNNEL_KEY;
+ 			flags = tpi->flags;
+ 			tun_id = key32_to_tunnel_id(tpi->key);
+ 
+ 			tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
+ 						  sizeof(*md));
+ 			if (!tun_dst)
+ 				return PACKET_REJECT;
+ 
+ 			info = &tun_dst->u.tun_info;
+ 			md = ip_tunnel_info_opts(info);
+ 			if (!md) {
+ 				dst_release((struct dst_entry *)tun_dst);
+ 				return PACKET_REJECT;
+ 			}
+ 
+ 			memcpy(md, pkt_md, sizeof(*md));
+ 			md->version = ver;
+ 			info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ 			info->options_len = sizeof(*md);
+ 
+ 			ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+ 
+ 		} else {
+ 			tunnel->parms.erspan_ver = ver;
+ 
+ 			if (ver == 1) {
+ 				tunnel->parms.index = ntohl(pkt_md->u.index);
+ 			} else {
+ 				u16 md2_flags;
+ 				u16 dir, hwid;
+ 
+ 				md2_flags = ntohs(pkt_md->u.md2.flags);
+ 				dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
+ 				hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
+ 				tunnel->parms.dir = dir;
+ 				tunnel->parms.hwid = hwid;
+ 			}
+ 
+ 			ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ 		}
  
  		return PACKET_RCVD;
  	}
@@@ -481,9 -600,17 +600,17 @@@ static int gre_rcv(struct sk_buff *skb
  	if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
  		goto drop;
  
+ 	if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ 		     tpi.proto == htons(ETH_P_ERSPAN2))) {
+ 		if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
+ 			return 0;
+ 		goto out;
+ 	}
+ 
  	if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
  		return 0;
  
+ out:
  	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
  drop:
  	kfree_skb(skb);
@@@ -496,6 -623,78 +623,78 @@@ static int gre_handle_offloads(struct s
  					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
  }
  
+ static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
+ 				     struct net_device *dev,
+ 				     struct flowi6 *fl6, __u8 *dsfield,
+ 				     int *encap_limit)
+ {
+ 	const struct iphdr *iph = ip_hdr(skb);
+ 	struct ip6_tnl *t = netdev_priv(dev);
+ 
+ 	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ 		*encap_limit = t->parms.encap_limit;
+ 
+ 	memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+ 
+ 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ 		*dsfield = ipv4_get_dsfield(iph);
+ 	else
+ 		*dsfield = ip6_tclass(t->parms.flowinfo);
+ 
+ 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ 		fl6->flowi6_mark = skb->mark;
+ 	else
+ 		fl6->flowi6_mark = t->parms.fwmark;
+ 
+ 	fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ }
+ 
+ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
+ 				    struct net_device *dev,
+ 				    struct flowi6 *fl6, __u8 *dsfield,
+ 				    int *encap_limit)
+ {
+ 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ 	struct ip6_tnl *t = netdev_priv(dev);
+ 	__u16 offset;
+ 
+ 	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ 	/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+ 
+ 	if (offset > 0) {
+ 		struct ipv6_tlv_tnl_enc_lim *tel;
+ 
+ 		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ 		if (tel->encap_limit == 0) {
+ 			icmpv6_send(skb, ICMPV6_PARAMPROB,
+ 				    ICMPV6_HDR_FIELD, offset + 2);
+ 			return -1;
+ 		}
+ 		*encap_limit = tel->encap_limit - 1;
+ 	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ 		*encap_limit = t->parms.encap_limit;
+ 	}
+ 
+ 	memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+ 
+ 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ 		*dsfield = ipv6_get_dsfield(ipv6h);
+ 	else
+ 		*dsfield = ip6_tclass(t->parms.flowinfo);
+ 
+ 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ 		fl6->flowlabel |= ip6_flowlabel(ipv6h);
+ 
+ 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ 		fl6->flowi6_mark = skb->mark;
+ 	else
+ 		fl6->flowi6_mark = t->parms.fwmark;
+ 
+ 	fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ 
+ 	return 0;
+ }
+ 
  static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
  			       struct net_device *dev, __u8 dsfield,
  			       struct flowi6 *fl6, int encap_limit,
@@@ -517,8 -716,38 +716,38 @@@
  
  	/* Push GRE header. */
  	protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
- 	gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
- 			 protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+ 
+ 	if (tunnel->parms.collect_md) {
+ 		struct ip_tunnel_info *tun_info;
+ 		const struct ip_tunnel_key *key;
+ 		__be16 flags;
+ 
+ 		tun_info = skb_tunnel_info(skb);
+ 		if (unlikely(!tun_info ||
+ 			     !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ 			     ip_tunnel_info_af(tun_info) != AF_INET6))
+ 			return -EINVAL;
+ 
+ 		key = &tun_info->key;
+ 		memset(fl6, 0, sizeof(*fl6));
+ 		fl6->flowi6_proto = IPPROTO_GRE;
+ 		fl6->daddr = key->u.ipv6.dst;
+ 		fl6->flowlabel = key->label;
+ 		fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ 
+ 		dsfield = key->tos;
+ 		flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ 		tunnel->tun_hlen = gre_calc_hlen(flags);
+ 
+ 		gre_build_header(skb, tunnel->tun_hlen,
+ 				 flags, protocol,
+ 				 tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ 
+ 	} else {
+ 		gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ 				 protocol, tunnel->parms.o_key,
+ 				 htonl(tunnel->o_seqno));
+ 	}
  
  	return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
  			    NEXTHDR_GRE);
@@@ -527,30 -756,17 +756,17 @@@
  static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
  {
  	struct ip6_tnl *t = netdev_priv(dev);
- 	const struct iphdr  *iph = ip_hdr(skb);
  	int encap_limit = -1;
  	struct flowi6 fl6;
- 	__u8 dsfield;
+ 	__u8 dsfield = 0;
  	__u32 mtu;
  	int err;
  
  	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
  
- 	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- 		encap_limit = t->parms.encap_limit;
- 
- 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- 
- 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- 		dsfield = ipv4_get_dsfield(iph);
- 	else
- 		dsfield = ip6_tclass(t->parms.flowinfo);
- 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- 		fl6.flowi6_mark = skb->mark;
- 	else
- 		fl6.flowi6_mark = t->parms.fwmark;
- 
- 	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ 	if (!t->parms.collect_md)
+ 		prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ 					 &dsfield, &encap_limit);
  
  	err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
  	if (err)
@@@ -574,46 -790,17 +790,17 @@@ static inline int ip6gre_xmit_ipv6(stru
  	struct ip6_tnl *t = netdev_priv(dev);
  	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
  	int encap_limit = -1;
- 	__u16 offset;
  	struct flowi6 fl6;
- 	__u8 dsfield;
+ 	__u8 dsfield = 0;
  	__u32 mtu;
  	int err;
  
  	if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
  		return -1;
  
- 	offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
- 	/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
- 	ipv6h = ipv6_hdr(skb);
- 
- 	if (offset > 0) {
- 		struct ipv6_tlv_tnl_enc_lim *tel;
- 		tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
- 		if (tel->encap_limit == 0) {
- 			icmpv6_send(skb, ICMPV6_PARAMPROB,
- 				    ICMPV6_HDR_FIELD, offset + 2);
- 			return -1;
- 		}
- 		encap_limit = tel->encap_limit - 1;
- 	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- 		encap_limit = t->parms.encap_limit;
- 
- 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
- 
- 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- 		dsfield = ipv6_get_dsfield(ipv6h);
- 	else
- 		dsfield = ip6_tclass(t->parms.flowinfo);
- 
- 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- 		fl6.flowlabel |= ip6_flowlabel(ipv6h);
- 	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- 		fl6.flowi6_mark = skb->mark;
- 	else
- 		fl6.flowi6_mark = t->parms.fwmark;
- 
- 	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ 	if (!t->parms.collect_md &&
+ 	    prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
+ 		return -1;
  
  	if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
  		return -1;
@@@ -660,7 -847,8 +847,8 @@@ static int ip6gre_xmit_other(struct sk_
  	if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
  		encap_limit = t->parms.encap_limit;
  
- 	memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ 	if (!t->parms.collect_md)
+ 		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
  
  	err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
  	if (err)
@@@ -705,6 -893,141 +893,141 @@@ tx_err
  	return NETDEV_TX_OK;
  }
  
+ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ 					 struct net_device *dev)
+ {
+ 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ 	struct ip6_tnl *t = netdev_priv(dev);
+ 	struct dst_entry *dst = skb_dst(skb);
+ 	struct net_device_stats *stats;
+ 	bool truncate = false;
+ 	int encap_limit = -1;
+ 	__u8 dsfield = false;
+ 	struct flowi6 fl6;
+ 	int err = -EINVAL;
+ 	__u32 mtu;
+ 
+ 	if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
+ 		goto tx_err;
+ 
+ 	if (gre_handle_offloads(skb, false))
+ 		goto tx_err;
+ 
+ 	if (skb->len > dev->mtu + dev->hard_header_len) {
+ 		pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ 		truncate = true;
+ 	}
+ 
+ 	t->parms.o_flags &= ~TUNNEL_KEY;
+ 	IPCB(skb)->flags = 0;
+ 
+ 	/* For collect_md mode, derive fl6 from the tunnel key,
+ 	 * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}.
+ 	 */
+ 	if (t->parms.collect_md) {
+ 		struct ip_tunnel_info *tun_info;
+ 		const struct ip_tunnel_key *key;
+ 		struct erspan_metadata *md;
+ 
+ 		tun_info = skb_tunnel_info(skb);
+ 		if (unlikely(!tun_info ||
+ 			     !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ 			     ip_tunnel_info_af(tun_info) != AF_INET6))
+ 			return -EINVAL;
+ 
+ 		key = &tun_info->key;
+ 		memset(&fl6, 0, sizeof(fl6));
+ 		fl6.flowi6_proto = IPPROTO_GRE;
+ 		fl6.daddr = key->u.ipv6.dst;
+ 		fl6.flowlabel = key->label;
+ 		fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ 
+ 		dsfield = key->tos;
+ 		md = ip_tunnel_info_opts(tun_info);
+ 		if (!md)
+ 			goto tx_err;
+ 
+ 		if (md->version == 1) {
+ 			erspan_build_header(skb,
+ 					    tunnel_id_to_key32(key->tun_id),
+ 					    ntohl(md->u.index), truncate,
+ 					    false);
+ 		} else if (md->version == 2) {
+ 			u16 md2_flags;
+ 			u16 dir, hwid;
+ 
+ 			md2_flags = ntohs(md->u.md2.flags);
+ 			dir = (md2_flags & DIR_MASK) >> DIR_OFFSET;
+ 			hwid = (md2_flags & HWID_MASK) >> HWID_OFFSET;
+ 
+ 			erspan_build_header_v2(skb,
+ 					       tunnel_id_to_key32(key->tun_id),
+ 					       dir, hwid, truncate,
+ 					       false);
+ 		}
+ 	} else {
+ 		switch (skb->protocol) {
+ 		case htons(ETH_P_IP):
+ 			memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ 			prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ 						 &dsfield, &encap_limit);
+ 			break;
+ 		case htons(ETH_P_IPV6):
+ 			if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ 				goto tx_err;
+ 			if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
+ 						     &dsfield, &encap_limit))
+ 				goto tx_err;
+ 			break;
+ 		default:
+ 			memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ 			break;
+ 		}
+ 
+ 		if (t->parms.erspan_ver == 1)
+ 			erspan_build_header(skb, t->parms.o_key,
+ 					    t->parms.index,
+ 					    truncate, false);
+ 		else
+ 			erspan_build_header_v2(skb, t->parms.o_key,
+ 					       t->parms.dir,
+ 					       t->parms.hwid,
+ 					       truncate, false);
+ 		fl6.daddr = t->parms.raddr;
+ 	}
+ 
+ 	/* Push GRE header. */
+ 	gre_build_header(skb, 8, TUNNEL_SEQ,
+ 			 htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
+ 
+ 	/* TooBig packet may have updated dst->dev's mtu */
+ 	if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+ 		dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+ 
+ 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ 			   NEXTHDR_GRE);
+ 	if (err != 0) {
+ 		/* XXX: send ICMP error even if DF is not set. */
+ 		if (err == -EMSGSIZE) {
+ 			if (skb->protocol == htons(ETH_P_IP))
+ 				icmp_send(skb, ICMP_DEST_UNREACH,
+ 					  ICMP_FRAG_NEEDED, htonl(mtu));
+ 			else
+ 				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ 		}
+ 
+ 		goto tx_err;
+ 	}
+ 	return NETDEV_TX_OK;
+ 
+ tx_err:
+ 	stats = &t->dev->stats;
+ 	stats->tx_errors++;
+ 	stats->tx_dropped++;
+ 	kfree_skb(skb);
+ 	return NETDEV_TX_OK;
+ }
+ 
  static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
  {
  	struct net_device *dev = t->dev;
@@@ -1048,6 -1371,11 +1371,11 @@@ static int ip6gre_tunnel_init_common(st
  	if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
  		dev->mtu -= 8;
  
+ 	if (tunnel->parms.collect_md) {
+ 		dev->features |= NETIF_F_NETNS_LOCAL;
+ 		netif_keep_dst(dev);
+ 	}
+ 
  	return 0;
  }
  
@@@ -1062,6 -1390,9 +1390,9 @@@ static int ip6gre_tunnel_init(struct ne
  
  	tunnel = netdev_priv(dev);
  
+ 	if (tunnel->parms.collect_md)
+ 		return 0;
+ 
  	memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
  	memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
  
@@@ -1084,7 -1415,6 +1415,6 @@@ static void ip6gre_fb_tunnel_init(struc
  	dev_hold(dev);
  }
  
- 
  static struct inet6_protocol ip6gre_protocol __read_mostly = {
  	.handler     = gre_rcv,
  	.err_handler = ip6gre_err,
@@@ -1099,7 -1429,8 +1429,8 @@@ static void ip6gre_destroy_tunnels(stru
  
  	for_each_netdev_safe(net, dev, aux)
  		if (dev->rtnl_link_ops == &ip6gre_link_ops ||
- 		    dev->rtnl_link_ops == &ip6gre_tap_ops)
+ 		    dev->rtnl_link_ops == &ip6gre_tap_ops ||
+ 		    dev->rtnl_link_ops == &ip6erspan_tap_ops)
  			unregister_netdevice_queue(dev, head);
  
  	for (prio = 0; prio < 4; prio++) {
@@@ -1221,6 -1552,70 +1552,70 @@@ out
  	return ip6gre_tunnel_validate(tb, data, extack);
  }
  
+ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
+ 				  struct netlink_ext_ack *extack)
+ {
+ 	__be16 flags = 0;
+ 	int ret, ver = 0;
+ 
+ 	if (!data)
+ 		return 0;
+ 
+ 	ret = ip6gre_tap_validate(tb, data, extack);
+ 	if (ret)
+ 		return ret;
+ 
+ 	/* ERSPAN should only have GRE sequence and key flag */
+ 	if (data[IFLA_GRE_OFLAGS])
+ 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ 	if (data[IFLA_GRE_IFLAGS])
+ 		flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ 	if (!data[IFLA_GRE_COLLECT_METADATA] &&
+ 	    flags != (GRE_SEQ | GRE_KEY))
+ 		return -EINVAL;
+ 
+ 	/* ERSPAN Session ID only has 10-bit. Since we reuse
+ 	 * 32-bit key field as ID, check it's range.
+ 	 */
+ 	if (data[IFLA_GRE_IKEY] &&
+ 	    (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+ 		return -EINVAL;
+ 
+ 	if (data[IFLA_GRE_OKEY] &&
+ 	    (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+ 		return -EINVAL;
+ 
+ 	if (data[IFLA_GRE_ERSPAN_VER]) {
+ 		ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+ 		if (ver != 1 && ver != 2)
+ 			return -EINVAL;
+ 	}
+ 
+ 	if (ver == 1) {
+ 		if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ 			u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ 
+ 			if (index & ~INDEX_MASK)
+ 				return -EINVAL;
+ 		}
+ 	} else if (ver == 2) {
+ 		if (data[IFLA_GRE_ERSPAN_DIR]) {
+ 			u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ 
+ 			if (dir & ~(DIR_MASK >> DIR_OFFSET))
+ 				return -EINVAL;
+ 		}
+ 
+ 		if (data[IFLA_GRE_ERSPAN_HWID]) {
+ 			u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ 
+ 			if (hwid & ~(HWID_MASK >> HWID_OFFSET))
+ 				return -EINVAL;
+ 		}
+ 	}
+ 
+ 	return 0;
+ }
  
  static void ip6gre_netlink_parms(struct nlattr *data[],
  				struct __ip6_tnl_parm *parms)
@@@ -1267,6 -1662,22 +1662,22 @@@
  
  	if (data[IFLA_GRE_FWMARK])
  		parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+ 
+ 	if (data[IFLA_GRE_COLLECT_METADATA])
+ 		parms->collect_md = true;
+ 
+ 	if (data[IFLA_GRE_ERSPAN_VER])
+ 		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+ 
+ 	if (parms->erspan_ver == 1) {
+ 		if (data[IFLA_GRE_ERSPAN_INDEX])
+ 			parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ 	} else if (parms->erspan_ver == 2) {
+ 		if (data[IFLA_GRE_ERSPAN_DIR])
+ 			parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ 		if (data[IFLA_GRE_ERSPAN_HWID])
+ 			parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ 	}
  }
  
  static int ip6gre_tap_init(struct net_device *dev)
@@@ -1303,12 -1714,64 +1714,65 @@@ static const struct net_device_ops ip6g
  		       NETIF_F_HIGHDMA |		\
  		       NETIF_F_HW_CSUM)
  
+ static int ip6erspan_tap_init(struct net_device *dev)
+ {
+ 	struct ip6_tnl *tunnel;
+ 	int t_hlen;
+ 	int ret;
+ 
+ 	tunnel = netdev_priv(dev);
+ 
+ 	tunnel->dev = dev;
+ 	tunnel->net = dev_net(dev);
+ 	strcpy(tunnel->parms.name, dev->name);
+ 
+ 	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ 	if (!dev->tstats)
+ 		return -ENOMEM;
+ 
+ 	ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ 	if (ret) {
+ 		free_percpu(dev->tstats);
+ 		dev->tstats = NULL;
+ 		return ret;
+ 	}
+ 
+ 	tunnel->tun_hlen = 8;
+ 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+ 		       erspan_hdr_len(tunnel->parms.erspan_ver);
+ 	t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+ 
+ 	dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ 	dev->mtu = ETH_DATA_LEN - t_hlen;
+ 	if (dev->type == ARPHRD_ETHER)
+ 		dev->mtu -= ETH_HLEN;
+ 	if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ 		dev->mtu -= 8;
+ 
+ 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ 	tunnel = netdev_priv(dev);
+ 	ip6gre_tnl_link_config(tunnel, 1);
+ 
+ 	return 0;
+ }
+ 
+ static const struct net_device_ops ip6erspan_netdev_ops = {
+ 	.ndo_init =		ip6erspan_tap_init,
+ 	.ndo_uninit =		ip6gre_tunnel_uninit,
+ 	.ndo_start_xmit =	ip6erspan_tunnel_xmit,
+ 	.ndo_set_mac_address =	eth_mac_addr,
+ 	.ndo_validate_addr =	eth_validate_addr,
+ 	.ndo_change_mtu =	ip6_tnl_change_mtu,
+ 	.ndo_get_stats64 =	ip_tunnel_get_stats64,
+ 	.ndo_get_iflink =	ip6_tnl_get_iflink,
+ };
+ 
  static void ip6gre_tap_setup(struct net_device *dev)
  {
  
  	ether_setup(dev);
  
 +	dev->max_mtu = 0;
  	dev->netdev_ops = &ip6gre_tap_netdev_ops;
  	dev->needs_free_netdev = true;
  	dev->priv_destructor = ip6gre_dev_free;
@@@ -1373,8 -1836,13 +1837,13 @@@ static int ip6gre_newlink(struct net *s
  
  	ip6gre_netlink_parms(data, &nt->parms);
  
- 	if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- 		return -EEXIST;
+ 	if (nt->parms.collect_md) {
+ 		if (rtnl_dereference(ign->collect_md_tun))
+ 			return -EEXIST;
+ 	} else {
+ 		if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ 			return -EEXIST;
+ 	}
  
  	if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
  		eth_hw_addr_random(dev);
@@@ -1493,8 -1961,12 +1962,12 @@@ static size_t ip6gre_get_size(const str
  		nla_total_size(2) +
  		/* IFLA_GRE_ENCAP_DPORT */
  		nla_total_size(2) +
+ 		/* IFLA_GRE_COLLECT_METADATA */
+ 		nla_total_size(0) +
  		/* IFLA_GRE_FWMARK */
  		nla_total_size(4) +
+ 		/* IFLA_GRE_ERSPAN_INDEX */
+ 		nla_total_size(4) +
  		0;
  }
  
@@@ -1516,7 -1988,8 +1989,8 @@@ static int ip6gre_fill_info(struct sk_b
  	    nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
  	    nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
  	    nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
- 	    nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
+ 	    nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
+ 	    nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
  		goto nla_put_failure;
  
  	if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@@ -1529,6 -2002,24 +2003,24 @@@
  			t->encap.flags))
  		goto nla_put_failure;
  
+ 	if (p->collect_md) {
+ 		if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+ 			goto nla_put_failure;
+ 	}
+ 
+ 	if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+ 		goto nla_put_failure;
+ 
+ 	if (p->erspan_ver == 1) {
+ 		if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+ 			goto nla_put_failure;
+ 	} else if (p->erspan_ver == 2) {
+ 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+ 			goto nla_put_failure;
+ 		if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+ 			goto nla_put_failure;
+ 	}
+ 
  	return 0;
  
  nla_put_failure:
@@@ -1551,9 -2042,28 +2043,28 @@@ static const struct nla_policy ip6gre_p
  	[IFLA_GRE_ENCAP_FLAGS]  = { .type = NLA_U16 },
  	[IFLA_GRE_ENCAP_SPORT]  = { .type = NLA_U16 },
  	[IFLA_GRE_ENCAP_DPORT]  = { .type = NLA_U16 },
+ 	[IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
  	[IFLA_GRE_FWMARK]       = { .type = NLA_U32 },
+ 	[IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+ 	[IFLA_GRE_ERSPAN_VER]	= { .type = NLA_U8 },
+ 	[IFLA_GRE_ERSPAN_DIR]	= { .type = NLA_U8 },
+ 	[IFLA_GRE_ERSPAN_HWID]	= { .type = NLA_U16 },
  };
  
+ static void ip6erspan_tap_setup(struct net_device *dev)
+ {
+ 	ether_setup(dev);
+ 
+ 	dev->netdev_ops = &ip6erspan_netdev_ops;
+ 	dev->needs_free_netdev = true;
+ 	dev->priv_destructor = ip6gre_dev_free;
+ 
+ 	dev->features |= NETIF_F_NETNS_LOCAL;
+ 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ 	netif_keep_dst(dev);
+ }
+ 
  static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
  	.kind		= "ip6gre",
  	.maxtype	= IFLA_GRE_MAX,
@@@ -1583,6 -2093,20 +2094,20 @@@ static struct rtnl_link_ops ip6gre_tap_
  	.get_link_net	= ip6_tnl_get_link_net,
  };
  
+ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
+ 	.kind		= "ip6erspan",
+ 	.maxtype	= IFLA_GRE_MAX,
+ 	.policy		= ip6gre_policy,
+ 	.priv_size	= sizeof(struct ip6_tnl),
+ 	.setup		= ip6erspan_tap_setup,
+ 	.validate	= ip6erspan_tap_validate,
+ 	.newlink	= ip6gre_newlink,
+ 	.changelink	= ip6gre_changelink,
+ 	.get_size	= ip6gre_get_size,
+ 	.fill_info	= ip6gre_fill_info,
+ 	.get_link_net	= ip6_tnl_get_link_net,
+ };
+ 
  /*
   *	And now the modules code and kernel interface.
   */
@@@ -1611,9 -2135,15 +2136,15 @@@ static int __init ip6gre_init(void
  	if (err < 0)
  		goto tap_ops_failed;
  
+ 	err = rtnl_link_register(&ip6erspan_tap_ops);
+ 	if (err < 0)
+ 		goto erspan_link_failed;
+ 
  out:
  	return err;
  
+ erspan_link_failed:
+ 	rtnl_link_unregister(&ip6gre_tap_ops);
  tap_ops_failed:
  	rtnl_link_unregister(&ip6gre_link_ops);
  rtnl_link_failed:
@@@ -1627,6 -2157,7 +2158,7 @@@ static void __exit ip6gre_fini(void
  {
  	rtnl_link_unregister(&ip6gre_tap_ops);
  	rtnl_link_unregister(&ip6gre_link_ops);
+ 	rtnl_link_unregister(&ip6erspan_tap_ops);
  	inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
  	unregister_pernet_device(&ip6gre_net_ops);
  }
@@@ -1638,4 -2169,5 +2170,5 @@@ MODULE_AUTHOR("D. Kozlov (xeb at mail.ru)"
  MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
  MODULE_ALIAS_RTNL_LINK("ip6gre");
  MODULE_ALIAS_RTNL_LINK("ip6gretap");
+ MODULE_ALIAS_RTNL_LINK("ip6erspan");
  MODULE_ALIAS_NETDEV("ip6gre0");
diff --combined net/ipv6/ip6_output.c
index f7dd51c42314,176d74fb3b4d..ece2781a31b2
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@@ -166,14 -166,6 +166,14 @@@ int ip6_output(struct net *net, struct 
  			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
  }
  
 +static bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
 +{
 +	if (!np->autoflowlabel_set)
 +		return ip6_default_np_autolabel(net);
 +	else
 +		return np->autoflowlabel;
 +}
 +
  /*
   * xmit an sk_buff (used by TCP, SCTP and DCCP)
   * Note : socket lock is not held for SYNACK packets, but might be modified
@@@ -238,7 -230,7 +238,7 @@@ int ip6_xmit(const struct sock *sk, str
  		hlimit = ip6_dst_hoplimit(dst);
  
  	ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
 -						     np->autoflowlabel, fl6));
 +				ip6_autoflowlabel(net, np), fl6));
  
  	hdr->payload_len = htons(seg_len);
  	hdr->nexthdr = proto;
@@@ -1209,13 -1201,13 +1209,13 @@@ static int ip6_setup_cork(struct sock *
  		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
  	else
  		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- 		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+ 		      rt->dst.dev->mtu : dst_mtu(xfrm_dst_path(&rt->dst));
  	if (np->frag_size < mtu) {
  		if (np->frag_size)
  			mtu = np->frag_size;
  	}
  	cork->base.fragsize = mtu;
- 	if (dst_allfrag(rt->dst.path))
+ 	if (dst_allfrag(xfrm_dst_path(&rt->dst)))
  		cork->base.flags |= IPCORK_ALLFRAG;
  	cork->base.length = 0;
  
@@@ -1634,7 -1626,7 +1634,7 @@@ struct sk_buff *__ip6_make_skb(struct s
  
  	ip6_flow_hdr(hdr, v6_cork->tclass,
  		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
 -					np->autoflowlabel, fl6));
 +					ip6_autoflowlabel(net, np), fl6));
  	hdr->hop_limit = v6_cork->hop_limit;
  	hdr->nexthdr = proto;
  	hdr->saddr = fl6->saddr;
diff --combined net/ipv6/ip6_tunnel.c
index 931c38f6ff4a,6ff2f21ae3fc..8a4610e84e58
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@@ -861,7 -861,7 +861,7 @@@ int ip6_tnl_rcv(struct ip6_tnl *t, stru
  		struct metadata_dst *tun_dst,
  		bool log_ecn_err)
  {
- 	return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+ 	return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
  			     log_ecn_err);
  }
  EXPORT_SYMBOL(ip6_tnl_rcv);
@@@ -979,6 -979,9 +979,9 @@@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t
  	int ret = 0;
  	struct net *net = t->net;
  
+ 	if (t->parms.collect_md)
+ 		return 1;
+ 
  	if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
  	    ((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
  	     (ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
@@@ -1123,13 -1126,8 +1126,13 @@@ route_lookup
  		max_headroom += 8;
  		mtu -= 8;
  	}
 -	if (mtu < IPV6_MIN_MTU)
 -		mtu = IPV6_MIN_MTU;
 +	if (skb->protocol == htons(ETH_P_IPV6)) {
 +		if (mtu < IPV6_MIN_MTU)
 +			mtu = IPV6_MIN_MTU;
 +	} else if (mtu < 576) {
 +		mtu = 576;
 +	}
 +
  	if (skb_dst(skb) && !t->parms.collect_md)
  		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
  	if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
diff --combined net/ipv6/route.c
index 0458b761f3c5,b3f4d19b3ca5..4efaac956f0c
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -186,7 -186,7 +186,7 @@@ static void rt6_uncached_list_flush_dev
  
  static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
  {
- 	return dst_metrics_write_ptr(rt->dst.from);
+ 	return dst_metrics_write_ptr(&rt->from->dst);
  }
  
  static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
@@@ -391,7 -391,7 +391,7 @@@ static void ip6_dst_destroy(struct dst_
  {
  	struct rt6_info *rt = (struct rt6_info *)dst;
  	struct rt6_exception_bucket *bucket;
- 	struct dst_entry *from = dst->from;
+ 	struct rt6_info *from = rt->from;
  	struct inet6_dev *idev;
  
  	dst_destroy_metrics_generic(dst);
@@@ -409,8 -409,8 +409,8 @@@
  		kfree(bucket);
  	}
  
- 	dst->from = NULL;
- 	dst_release(from);
+ 	rt->from = NULL;
+ 	dst_release(&from->dst);
  }
  
  static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@@ -443,9 -443,9 +443,9 @@@ static bool rt6_check_expired(const str
  	if (rt->rt6i_flags & RTF_EXPIRES) {
  		if (time_after(jiffies, rt->dst.expires))
  			return true;
- 	} else if (rt->dst.from) {
+ 	} else if (rt->from) {
  		return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
- 		       rt6_check_expired((struct rt6_info *)rt->dst.from);
+ 			rt6_check_expired(rt->from);
  	}
  	return false;
  }
@@@ -502,7 -502,7 +502,7 @@@ static inline struct rt6_info *rt6_devi
  	if (!oif && ipv6_addr_any(saddr))
  		goto out;
  
- 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
+ 	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
  		struct net_device *dev = sprt->dst.dev;
  
  		if (oif) {
@@@ -721,7 -721,7 +721,7 @@@ static struct rt6_info *find_rr_leaf(st
  
  	match = NULL;
  	cont = NULL;
- 	for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
+ 	for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
  		if (rt->rt6i_metric != metric) {
  			cont = rt;
  			break;
@@@ -731,7 -731,7 +731,7 @@@
  	}
  
  	for (rt = leaf; rt && rt != rr_head;
- 	     rt = rcu_dereference(rt->dst.rt6_next)) {
+ 	     rt = rcu_dereference(rt->rt6_next)) {
  		if (rt->rt6i_metric != metric) {
  			cont = rt;
  			break;
@@@ -743,7 -743,7 +743,7 @@@
  	if (match || !cont)
  		return match;
  
- 	for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
+ 	for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
  		match = find_match(rt, oif, strict, &mpri, match, do_rr);
  
  	return match;
@@@ -781,7 -781,7 +781,7 @@@ static struct rt6_info *rt6_select(stru
  			     &do_rr);
  
  	if (do_rr) {
- 		struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
+ 		struct rt6_info *next = rcu_dereference(rt0->rt6_next);
  
  		/* no entries matched; do round-robin */
  		if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@@ -1054,7 -1054,7 +1054,7 @@@ static struct rt6_info *ip6_rt_cache_al
  	 */
  
  	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- 		ort = (struct rt6_info *)ort->dst.from;
+ 		ort = ort->from;
  
  	rcu_read_lock();
  	dev = ip6_rt_get_dev_rcu(ort);
@@@ -1274,7 -1274,7 +1274,7 @@@ static int rt6_insert_exception(struct 
  
  	/* ort can't be a cache or pcpu route */
  	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- 		ort = (struct rt6_info *)ort->dst.from;
+ 		ort = ort->from;
  	WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
  
  	spin_lock_bh(&rt6_exception_lock);
@@@ -1415,8 -1415,8 +1415,8 @@@ static struct rt6_info *rt6_find_cached
  /* Remove the passed in cached rt from the hash table that contains it */
  int rt6_remove_exception_rt(struct rt6_info *rt)
  {
- 	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
  	struct rt6_exception_bucket *bucket;
+ 	struct rt6_info *from = rt->from;
  	struct in6_addr *src_key = NULL;
  	struct rt6_exception *rt6_ex;
  	int err;
@@@ -1460,8 -1460,8 +1460,8 @@@
   */
  static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
  {
- 	struct rt6_info *from = (struct rt6_info *)rt->dst.from;
  	struct rt6_exception_bucket *bucket;
+ 	struct rt6_info *from = rt->from;
  	struct in6_addr *src_key = NULL;
  	struct rt6_exception *rt6_ex;
  
@@@ -1929,9 -1929,9 +1929,9 @@@ struct dst_entry *ip6_blackhole_route(s
  
  static void rt6_dst_from_metrics_check(struct rt6_info *rt)
  {
- 	if (rt->dst.from &&
- 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
- 		dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
+ 	if (rt->from &&
+ 	    dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
+ 		dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
  }
  
  static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
@@@ -1951,7 -1951,7 +1951,7 @@@ static struct dst_entry *rt6_dst_from_c
  {
  	if (!__rt6_check_expired(rt) &&
  	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
- 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
+ 	    rt6_check(rt->from, cookie))
  		return &rt->dst;
  	else
  		return NULL;
@@@ -1971,7 -1971,7 +1971,7 @@@ static struct dst_entry *ip6_dst_check(
  	rt6_dst_from_metrics_check(rt);
  
  	if (rt->rt6i_flags & RTF_PCPU ||
- 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
+ 	    (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
  		return rt6_dst_from_check(rt, cookie);
  	else
  		return rt6_check(rt, cookie);
@@@ -2336,7 -2336,6 +2336,7 @@@ struct dst_entry *icmp6_dst_alloc(struc
  	}
  
  	rt->dst.flags |= DST_HOST;
 +	rt->dst.input = ip6_input;
  	rt->dst.output  = ip6_output;
  	rt->rt6i_gateway  = fl6->daddr;
  	rt->rt6i_dst.addr = fl6->daddr;
@@@ -3056,11 -3055,11 +3056,11 @@@ out
  
  static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
  {
- 	BUG_ON(from->dst.from);
+ 	BUG_ON(from->from);
  
  	rt->rt6i_flags &= ~RTF_EXPIRES;
  	dst_hold(&from->dst);
- 	rt->dst.from = &from->dst;
+ 	rt->from = from;
  	dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
  }
  
@@@ -4298,13 -4297,19 +4298,13 @@@ static int inet6_rtm_getroute(struct sk
  		if (!ipv6_addr_any(&fl6.saddr))
  			flags |= RT6_LOOKUP_F_HAS_SADDR;
  
 -		if (!fibmatch)
 -			dst = ip6_route_input_lookup(net, dev, &fl6, flags);
 -		else
 -			dst = ip6_route_lookup(net, &fl6, 0);
 +		dst = ip6_route_input_lookup(net, dev, &fl6, flags);
  
  		rcu_read_unlock();
  	} else {
  		fl6.flowi6_oif = oif;
  
 -		if (!fibmatch)
 -			dst = ip6_route_output(net, NULL, &fl6);
 -		else
 -			dst = ip6_route_lookup(net, &fl6, 0);
 +		dst = ip6_route_output(net, NULL, &fl6);
  	}
  
  
@@@ -4321,15 -4326,6 +4321,15 @@@
  		goto errout;
  	}
  
 +	if (fibmatch && rt->dst.from) {
 +		struct rt6_info *ort = container_of(rt->dst.from,
 +						    struct rt6_info, dst);
 +
 +		dst_hold(&ort->dst);
 +		ip6_rt_put(rt);
 +		rt = ort;
 +	}
 +
  	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
  	if (!skb) {
  		ip6_rt_put(rt);
@@@ -4600,8 -4596,6 +4600,6 @@@ static int __net_init ip6_route_net_ini
  					   GFP_KERNEL);
  	if (!net->ipv6.ip6_null_entry)
  		goto out_ip6_dst_entries;
- 	net->ipv6.ip6_null_entry->dst.path =
- 		(struct dst_entry *)net->ipv6.ip6_null_entry;
  	net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
  	dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
  			 ip6_template_metrics, true);
@@@ -4613,8 -4607,6 +4611,6 @@@
  					       GFP_KERNEL);
  	if (!net->ipv6.ip6_prohibit_entry)
  		goto out_ip6_null_entry;
- 	net->ipv6.ip6_prohibit_entry->dst.path =
- 		(struct dst_entry *)net->ipv6.ip6_prohibit_entry;
  	net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
  	dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
  			 ip6_template_metrics, true);
@@@ -4624,8 -4616,6 +4620,6 @@@
  					       GFP_KERNEL);
  	if (!net->ipv6.ip6_blk_hole_entry)
  		goto out_ip6_prohibit_entry;
- 	net->ipv6.ip6_blk_hole_entry->dst.path =
- 		(struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
  	net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
  	dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
  			 ip6_template_metrics, true);
@@@ -4782,11 -4772,20 +4776,20 @@@ int __init ip6_route_init(void
  	if (ret)
  		goto fib6_rules_init;
  
- 	ret = -ENOBUFS;
- 	if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
- 	    __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
- 	    __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
- 			    RTNL_FLAG_DOIT_UNLOCKED))
+ 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
+ 				   inet6_rtm_newroute, NULL, 0);
+ 	if (ret < 0)
+ 		goto out_register_late_subsys;
+ 
+ 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
+ 				   inet6_rtm_delroute, NULL, 0);
+ 	if (ret < 0)
+ 		goto out_register_late_subsys;
+ 
+ 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
+ 				   inet6_rtm_getroute, NULL,
+ 				   RTNL_FLAG_DOIT_UNLOCKED);
+ 	if (ret < 0)
  		goto out_register_late_subsys;
  
  	ret = register_netdevice_notifier(&ip6_route_dev_notifier);
@@@ -4804,6 -4803,7 +4807,7 @@@ out
  	return ret;
  
  out_register_late_subsys:
+ 	rtnl_unregister_all(PF_INET6);
  	unregister_pernet_subsys(&ip6_route_net_late_ops);
  fib6_rules_init:
  	fib6_rules_cleanup();
diff --combined net/openvswitch/flow.c
index f039064ce922,76d050aba7a4..56b8e7167790
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@@ -56,12 -56,12 +56,12 @@@
  
  u64 ovs_flow_used_time(unsigned long flow_jiffies)
  {
- 	struct timespec cur_ts;
+ 	struct timespec64 cur_ts;
  	u64 cur_ms, idle_ms;
  
- 	ktime_get_ts(&cur_ts);
+ 	ktime_get_ts64(&cur_ts);
  	idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
- 	cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+ 	cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
  		 cur_ts.tv_nsec / NSEC_PER_MSEC;
  
  	return cur_ms - idle_ms;
@@@ -579,7 -579,6 +579,7 @@@ static int key_extract(struct sk_buff *
  			return -EINVAL;
  
  		skb_reset_network_header(skb);
 +		key->eth.type = skb->protocol;
  	} else {
  		eth = eth_hdr(skb);
  		ether_addr_copy(key->eth.src, eth->h_source);
@@@ -593,23 -592,15 +593,23 @@@
  		if (unlikely(parse_vlan(skb, key)))
  			return -ENOMEM;
  
 -		skb->protocol = parse_ethertype(skb);
 -		if (unlikely(skb->protocol == htons(0)))
 +		key->eth.type = parse_ethertype(skb);
 +		if (unlikely(key->eth.type == htons(0)))
  			return -ENOMEM;
  
 +		/* Multiple tagged packets need to retain TPID to satisfy
 +		 * skb_vlan_pop(), which will later shift the ethertype into
 +		 * skb->protocol.
 +		 */
 +		if (key->eth.cvlan.tci & htons(VLAN_TAG_PRESENT))
 +			skb->protocol = key->eth.cvlan.tpid;
 +		else
 +			skb->protocol = key->eth.type;
 +
  		skb_reset_network_header(skb);
  		__skb_push(skb, skb->data - skb_mac_header(skb));
  	}
  	skb_reset_mac_len(skb);
 -	key->eth.type = skb->protocol;
  
  	/* Network layer. */
  	if (key->eth.type == htons(ETH_P_IP)) {
diff --combined net/sctp/socket.c
index 03d9d24b38ba,aadcd4244d9b..f0ca32547a21
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@@ -201,6 -201,22 +201,22 @@@ static void sctp_for_each_tx_datachunk(
  		cb(chunk);
  }
  
+ static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk,
+ 				 void (*cb)(struct sk_buff *, struct sock *))
+ 
+ {
+ 	struct sk_buff *skb, *tmp;
+ 
+ 	sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp)
+ 		cb(skb, sk);
+ 
+ 	sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp)
+ 		cb(skb, sk);
+ 
+ 	sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp)
+ 		cb(skb, sk);
+ }
+ 
  /* Verify that this is a valid address. */
  static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
  				   int len)
@@@ -1528,7 -1544,7 +1544,7 @@@ static void sctp_close(struct sock *sk
  
  	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  	sk->sk_shutdown = SHUTDOWN_MASK;
- 	sk->sk_state = SCTP_SS_CLOSING;
+ 	inet_sk_set_state(sk, SCTP_SS_CLOSING);
  
  	ep = sctp_sk(sk)->ep;
  
@@@ -1554,6 -1570,7 +1570,7 @@@
  
  		if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) ||
  		    !skb_queue_empty(&asoc->ulpq.reasm) ||
+ 		    !skb_queue_empty(&asoc->ulpq.reasm_uo) ||
  		    (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) {
  			struct sctp_chunk *chunk;
  
@@@ -2002,7 -2019,20 +2019,20 @@@ static int sctp_sendmsg(struct sock *sk
  		if (err < 0)
  			goto out_free;
  
- 		wait_connect = true;
+ 		/* If stream interleave is enabled, wait_connect has to be
+ 		 * done earlier than data enqueue, as it needs to make data
+ 		 * or idata according to asoc->intl_enable which is set
+ 		 * after connection is done.
+ 		 */
+ 		if (sctp_sk(asoc->base.sk)->strm_interleave) {
+ 			timeo = sock_sndtimeo(sk, 0);
+ 			err = sctp_wait_for_connect(asoc, &timeo);
+ 			if (err)
+ 				goto out_unlock;
+ 		} else {
+ 			wait_connect = true;
+ 		}
+ 
  		pr_debug("%s: we associated primitively\n", __func__);
  	}
  
@@@ -2281,7 -2311,7 +2311,7 @@@ static int sctp_setsockopt_events(struc
  			if (!event)
  				return -ENOMEM;
  
- 			sctp_ulpq_tail_event(&asoc->ulpq, event);
+ 			asoc->stream.si->enqueue_event(&asoc->ulpq, event);
  		}
  	}
  
@@@ -3180,7 -3210,7 +3210,7 @@@ static int sctp_setsockopt_maxseg(struc
  		if (val == 0) {
  			val = asoc->pathmtu - sp->pf->af->net_header_len;
  			val -= sizeof(struct sctphdr) +
- 			       sizeof(struct sctp_data_chunk);
+ 			       sctp_datachk_len(&asoc->stream);
  		}
  		asoc->user_frag = val;
  		asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
@@@ -3350,7 -3380,10 +3380,10 @@@ static int sctp_setsockopt_fragment_int
  	if (get_user(val, (int __user *)optval))
  		return -EFAULT;
  
- 	sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+ 	sctp_sk(sk)->frag_interleave = !!val;
+ 
+ 	if (!sctp_sk(sk)->frag_interleave)
+ 		sctp_sk(sk)->strm_interleave = 0;
  
  	return 0;
  }
@@@ -4023,6 -4056,40 +4056,40 @@@ out
  	return retval;
  }
  
+ static int sctp_setsockopt_interleaving_supported(struct sock *sk,
+ 						  char __user *optval,
+ 						  unsigned int optlen)
+ {
+ 	struct sctp_sock *sp = sctp_sk(sk);
+ 	struct net *net = sock_net(sk);
+ 	struct sctp_assoc_value params;
+ 	int retval = -EINVAL;
+ 
+ 	if (optlen < sizeof(params))
+ 		goto out;
+ 
+ 	optlen = sizeof(params);
+ 	if (copy_from_user(&params, optval, optlen)) {
+ 		retval = -EFAULT;
+ 		goto out;
+ 	}
+ 
+ 	if (params.assoc_id)
+ 		goto out;
+ 
+ 	if (!net->sctp.intl_enable || !sp->frag_interleave) {
+ 		retval = -EPERM;
+ 		goto out;
+ 	}
+ 
+ 	sp->strm_interleave = !!params.assoc_value;
+ 
+ 	retval = 0;
+ 
+ out:
+ 	return retval;
+ }
+ 
  /* API 6.2 setsockopt(), getsockopt()
   *
   * Applications use setsockopt() and getsockopt() to set or retrieve
@@@ -4210,6 -4277,10 +4277,10 @@@ static int sctp_setsockopt(struct sock 
  	case SCTP_STREAM_SCHEDULER_VALUE:
  		retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
  		break;
+ 	case SCTP_INTERLEAVING_SUPPORTED:
+ 		retval = sctp_setsockopt_interleaving_supported(sk, optval,
+ 								optlen);
+ 		break;
  	default:
  		retval = -ENOPROTOOPT;
  		break;
@@@ -4586,7 -4657,7 +4657,7 @@@ static void sctp_shutdown(struct sock *
  	if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
  		struct sctp_association *asoc;
  
- 		sk->sk_state = SCTP_SS_CLOSING;
+ 		inet_sk_set_state(sk, SCTP_SS_CLOSING);
  		asoc = list_entry(ep->asocs.next,
  				  struct sctp_association, asocs);
  		sctp_primitive_SHUTDOWN(net, asoc, NULL);
@@@ -4680,20 -4751,11 +4751,11 @@@ int sctp_get_sctp_info(struct sock *sk
  EXPORT_SYMBOL_GPL(sctp_get_sctp_info);
  
  /* use callback to avoid exporting the core structure */
- int sctp_transport_walk_start(struct rhashtable_iter *iter)
+ void sctp_transport_walk_start(struct rhashtable_iter *iter)
  {
- 	int err;
- 
  	rhltable_walk_enter(&sctp_transport_hashtable, iter);
  
- 	err = rhashtable_walk_start(iter);
- 	if (err && err != -EAGAIN) {
- 		rhashtable_walk_stop(iter);
- 		rhashtable_walk_exit(iter);
- 		return err;
- 	}
- 
- 	return 0;
+ 	rhashtable_walk_start(iter);
  }
  
  void sctp_transport_walk_stop(struct rhashtable_iter *iter)
@@@ -4784,12 -4846,10 +4846,10 @@@ int sctp_for_each_transport(int (*cb)(s
  			    struct net *net, int *pos, void *p) {
  	struct rhashtable_iter hti;
  	struct sctp_transport *tsp;
- 	int ret;
+ 	int ret = 0;
  
  again:
- 	ret = sctp_transport_walk_start(&hti);
- 	if (ret)
- 		return ret;
+ 	sctp_transport_walk_start(&hti);
  
  	tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
  	for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
@@@ -6984,6 -7044,47 +7044,47 @@@ out
  	return retval;
  }
  
+ static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len,
+ 						  char __user *optval,
+ 						  int __user *optlen)
+ {
+ 	struct sctp_assoc_value params;
+ 	struct sctp_association *asoc;
+ 	int retval = -EFAULT;
+ 
+ 	if (len < sizeof(params)) {
+ 		retval = -EINVAL;
+ 		goto out;
+ 	}
+ 
+ 	len = sizeof(params);
+ 	if (copy_from_user(&params, optval, len))
+ 		goto out;
+ 
+ 	asoc = sctp_id2assoc(sk, params.assoc_id);
+ 	if (asoc) {
+ 		params.assoc_value = asoc->intl_enable;
+ 	} else if (!params.assoc_id) {
+ 		struct sctp_sock *sp = sctp_sk(sk);
+ 
+ 		params.assoc_value = sp->strm_interleave;
+ 	} else {
+ 		retval = -EINVAL;
+ 		goto out;
+ 	}
+ 
+ 	if (put_user(len, optlen))
+ 		goto out;
+ 
+ 	if (copy_to_user(optval, &params, len))
+ 		goto out;
+ 
+ 	retval = 0;
+ 
+ out:
+ 	return retval;
+ }
+ 
  static int sctp_getsockopt(struct sock *sk, int level, int optname,
  			   char __user *optval, int __user *optlen)
  {
@@@ -7174,6 -7275,10 +7275,10 @@@
  		retval = sctp_getsockopt_scheduler_value(sk, len, optval,
  							 optlen);
  		break;
+ 	case SCTP_INTERLEAVING_SUPPORTED:
+ 		retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
+ 								optlen);
+ 		break;
  	default:
  		retval = -ENOPROTOOPT;
  		break;
@@@ -7408,13 -7513,13 +7513,13 @@@ static int sctp_listen_start(struct soc
  	 * sockets.
  	 *
  	 */
- 	sk->sk_state = SCTP_SS_LISTENING;
+ 	inet_sk_set_state(sk, SCTP_SS_LISTENING);
  	if (!ep->base.bind_addr.port) {
  		if (sctp_autobind(sk))
  			return -EAGAIN;
  	} else {
  		if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
- 			sk->sk_state = SCTP_SS_CLOSED;
+ 			inet_sk_set_state(sk, SCTP_SS_CLOSED);
  			return -EADDRINUSE;
  		}
  	}
@@@ -7500,11 -7605,11 +7605,11 @@@ out
   * here, again, by modeling the current TCP/UDP code.  We don't have
   * a good way to test with it yet.
   */
 -unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
 +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	struct sctp_sock *sp = sctp_sk(sk);
 -	unsigned int mask;
 +	__poll_t mask;
  
  	poll_wait(file, sk_sleep(sk), wait);
  
@@@ -8411,11 -8516,7 +8516,7 @@@ static void sctp_sock_migrate(struct so
  
  	}
  
- 	sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp)
- 		sctp_skb_set_owner_r_frag(skb, newsk);
- 
- 	sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp)
- 		sctp_skb_set_owner_r_frag(skb, newsk);
+ 	sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag);
  
  	/* Set the type of socket to indicate that it is peeled off from the
  	 * original UDP-style socket or created with the accept() call on a
@@@ -8441,10 -8542,10 +8542,10 @@@
  	 * is called, set RCV_SHUTDOWN flag.
  	 */
  	if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) {
- 		newsk->sk_state = SCTP_SS_CLOSED;
+ 		inet_sk_set_state(newsk, SCTP_SS_CLOSED);
  		newsk->sk_shutdown |= RCV_SHUTDOWN;
  	} else {
- 		newsk->sk_state = SCTP_SS_ESTABLISHED;
+ 		inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED);
  	}
  
  	release_sock(newsk);
diff --combined net/sctp/ulpqueue.c
index e36ec5dd64c6,97fae53310e0..0b427100b0d4
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@@ -60,6 -60,7 +60,7 @@@ struct sctp_ulpq *sctp_ulpq_init(struc
  
  	ulpq->asoc = asoc;
  	skb_queue_head_init(&ulpq->reasm);
+ 	skb_queue_head_init(&ulpq->reasm_uo);
  	skb_queue_head_init(&ulpq->lobby);
  	ulpq->pd_mode  = 0;
  
@@@ -83,6 -84,10 +84,10 @@@ void sctp_ulpq_flush(struct sctp_ulpq *
  		sctp_ulpevent_free(event);
  	}
  
+ 	while ((skb = __skb_dequeue(&ulpq->reasm_uo)) != NULL) {
+ 		event = sctp_skb2event(skb);
+ 		sctp_ulpevent_free(event);
+ 	}
  }
  
  /* Dispose of a ulpqueue.  */
@@@ -104,6 -109,9 +109,9 @@@ int sctp_ulpq_tail_data(struct sctp_ulp
  	if (!event)
  		return -ENOMEM;
  
+ 	event->ssn = ntohs(chunk->subh.data_hdr->ssn);
+ 	event->ppid = chunk->subh.data_hdr->ppid;
+ 
  	/* Do reassembly if needed.  */
  	event = sctp_ulpq_reasm(ulpq, event);
  
@@@ -328,9 -336,10 +336,10 @@@ static void sctp_ulpq_store_reasm(struc
   * payload was fragmented on the way and ip had to reassemble them.
   * We add the rest of skb's to the first skb's fraglist.
   */
- static struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
- 	struct sk_buff_head *queue, struct sk_buff *f_frag,
- 	struct sk_buff *l_frag)
+ struct sctp_ulpevent *sctp_make_reassembled_event(struct net *net,
+ 						  struct sk_buff_head *queue,
+ 						  struct sk_buff *f_frag,
+ 						  struct sk_buff *l_frag)
  {
  	struct sk_buff *pos;
  	struct sk_buff *new = NULL;
@@@ -853,7 -862,7 +862,7 @@@ static struct sctp_ulpevent *sctp_ulpq_
  	struct sctp_stream *stream;
  
  	/* Check if this message needs ordering.  */
- 	if (SCTP_DATA_UNORDERED & event->msg_flags)
+ 	if (event->msg_flags & SCTP_DATA_UNORDERED)
  		return event;
  
  	/* Note: The stream ID must be verified before this routine.  */
@@@ -974,8 -983,8 +983,8 @@@ void sctp_ulpq_skip(struct sctp_ulpq *u
  	sctp_ulpq_reap_ordered(ulpq, sid);
  }
  
- static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
- 		struct sk_buff_head *list, __u16 needed)
+ __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq, struct sk_buff_head *list,
+ 			    __u16 needed)
  {
  	__u16 freed = 0;
  	__u32 tsn, last_tsn;
@@@ -1084,21 -1093,29 +1093,21 @@@ void sctp_ulpq_partial_delivery(struct 
  void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
  		      gfp_t gfp)
  {
 -	struct sctp_association *asoc;
 -	__u16 needed, freed;
 -
 -	asoc = ulpq->asoc;
 +	struct sctp_association *asoc = ulpq->asoc;
 +	__u32 freed = 0;
 +	__u16 needed;
  
 -	if (chunk) {
 -		needed = ntohs(chunk->chunk_hdr->length);
 -		needed -= sizeof(struct sctp_data_chunk);
 -	} else
 -		needed = SCTP_DEFAULT_MAXWINDOW;
 -
 -	freed = 0;
 +	needed = ntohs(chunk->chunk_hdr->length) -
 +		 sizeof(struct sctp_data_chunk);
  
  	if (skb_queue_empty(&asoc->base.sk->sk_receive_queue)) {
  		freed = sctp_ulpq_renege_order(ulpq, needed);
 -		if (freed < needed) {
 +		if (freed < needed)
  			freed += sctp_ulpq_renege_frags(ulpq, needed - freed);
 -		}
  	}
  	/* If able to free enough room, accept this chunk. */
 -	if (chunk && (freed >= needed)) {
 -		int retval;
 -		retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
 +	if (freed >= needed) {
 +		int retval = sctp_ulpq_tail_data(ulpq, chunk, gfp);
  		/*
  		 * Enter partial delivery if chunk has not been
  		 * delivered; otherwise, drain the reassembly queue.
@@@ -1132,7 -1149,7 +1141,7 @@@ void sctp_ulpq_abort_pd(struct sctp_ulp
  				       &sctp_sk(sk)->subscribe))
  		ev = sctp_ulpevent_make_pdapi(ulpq->asoc,
  					      SCTP_PARTIAL_DELIVERY_ABORTED,
- 					      gfp);
+ 					      0, 0, 0, gfp);
  	if (ev)
  		__skb_queue_tail(&sk->sk_receive_queue, sctp_event2skb(ev));
  
diff --combined net/smc/af_smc.c
index 449f62e1e270,daf8075f5a4c..b6e4e2e4fe12
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@@ -520,7 -520,7 +520,7 @@@ decline_rdma
  	smc->use_fallback = true;
  	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
  		rc = smc_clc_send_decline(smc, reason_code);
- 		if (rc < sizeof(struct smc_clc_msg_decline))
+ 		if (rc < 0)
  			goto out_err;
  	}
  	goto out_connected;
@@@ -751,14 -751,16 +751,16 @@@ static void smc_listen_work(struct work
  {
  	struct smc_sock *new_smc = container_of(work, struct smc_sock,
  						smc_listen_work);
+ 	struct smc_clc_msg_proposal_prefix *pclc_prfx;
  	struct socket *newclcsock = new_smc->clcsock;
  	struct smc_sock *lsmc = new_smc->listen_smc;
  	struct smc_clc_msg_accept_confirm cclc;
  	int local_contact = SMC_REUSE_CONTACT;
  	struct sock *newsmcsk = &new_smc->sk;
- 	struct smc_clc_msg_proposal pclc;
+ 	struct smc_clc_msg_proposal *pclc;
  	struct smc_ib_device *smcibdev;
  	struct sockaddr_in peeraddr;
+ 	u8 buf[SMC_CLC_MAX_LEN];
  	struct smc_link *link;
  	int reason_code = 0;
  	int rc = 0, len;
@@@ -775,7 -777,7 +777,7 @@@
  	/* do inband token exchange -
  	 *wait for and receive SMC Proposal CLC message
  	 */
- 	reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc),
+ 	reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
  				       SMC_CLC_PROPOSAL);
  	if (reason_code < 0)
  		goto out_err;
@@@ -804,8 -806,11 +806,11 @@@
  		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
  		goto decline_rdma;
  	}
- 	if ((pclc.outgoing_subnet != subnet) ||
- 	    (pclc.prefix_len != prefix_len)) {
+ 
+ 	pclc = (struct smc_clc_msg_proposal *)&buf;
+ 	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ 	if (pclc_prfx->outgoing_subnet != subnet ||
+ 	    pclc_prfx->prefix_len != prefix_len) {
  		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
  		goto decline_rdma;
  	}
@@@ -816,7 -821,7 +821,7 @@@
  	/* allocate connection / link group */
  	mutex_lock(&smc_create_lgr_pending);
  	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- 					smcibdev, ibport, &pclc.lcl, 0);
+ 					smcibdev, ibport, &pclc->lcl, 0);
  	if (local_contact < 0) {
  		rc = local_contact;
  		if (rc == -ENOMEM)
@@@ -879,11 -884,9 +884,9 @@@
  		}
  		/* QP confirmation over RoCE fabric */
  		reason_code = smc_serv_conf_first_link(new_smc);
- 		if (reason_code < 0) {
+ 		if (reason_code < 0)
  			/* peer is not aware of a problem */
- 			rc = reason_code;
  			goto out_err_unlock;
- 		}
  		if (reason_code > 0)
  			goto decline_rdma_unlock;
  	}
@@@ -916,8 -919,7 +919,7 @@@ decline_rdma
  	smc_conn_free(&new_smc->conn);
  	new_smc->use_fallback = true;
  	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- 		rc = smc_clc_send_decline(new_smc, reason_code);
- 		if (rc < sizeof(struct smc_clc_msg_decline))
+ 		if (smc_clc_send_decline(new_smc, reason_code) < 0)
  			goto out_err;
  	}
  	goto out_connected;
@@@ -1107,7 -1109,7 +1109,7 @@@ out
  	return rc;
  }
  
 -static unsigned int smc_accept_poll(struct sock *parent)
 +static __poll_t smc_accept_poll(struct sock *parent)
  {
  	struct smc_sock *isk;
  	struct sock *sk;
@@@ -1126,11 -1128,11 +1128,11 @@@
  	return 0;
  }
  
 -static unsigned int smc_poll(struct file *file, struct socket *sock,
 +static __poll_t smc_poll(struct file *file, struct socket *sock,
  			     poll_table *wait)
  {
  	struct sock *sk = sock->sk;
 -	unsigned int mask = 0;
 +	__poll_t mask = 0;
  	struct smc_sock *smc;
  	int rc;
  
diff --combined net/smc/smc_clc.c
index 511548085d16,abf7ceb6690b..8ac51583a063
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@@ -22,6 -22,54 +22,54 @@@
  #include "smc_clc.h"
  #include "smc_ib.h"
  
+ /* check if received message has a correct header length and contains valid
+  * heading and trailing eyecatchers
+  */
+ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
+ {
+ 	struct smc_clc_msg_proposal_prefix *pclc_prfx;
+ 	struct smc_clc_msg_accept_confirm *clc;
+ 	struct smc_clc_msg_proposal *pclc;
+ 	struct smc_clc_msg_decline *dclc;
+ 	struct smc_clc_msg_trail *trl;
+ 
+ 	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ 		return false;
+ 	switch (clcm->type) {
+ 	case SMC_CLC_PROPOSAL:
+ 		pclc = (struct smc_clc_msg_proposal *)clcm;
+ 		pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ 		if (ntohs(pclc->hdr.length) !=
+ 			sizeof(*pclc) + ntohs(pclc->iparea_offset) +
+ 			sizeof(*pclc_prfx) +
+ 			pclc_prfx->ipv6_prefixes_cnt *
+ 				sizeof(struct smc_clc_ipv6_prefix) +
+ 			sizeof(*trl))
+ 			return false;
+ 		trl = (struct smc_clc_msg_trail *)
+ 			((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
+ 		break;
+ 	case SMC_CLC_ACCEPT:
+ 	case SMC_CLC_CONFIRM:
+ 		clc = (struct smc_clc_msg_accept_confirm *)clcm;
+ 		if (ntohs(clc->hdr.length) != sizeof(*clc))
+ 			return false;
+ 		trl = &clc->trl;
+ 		break;
+ 	case SMC_CLC_DECLINE:
+ 		dclc = (struct smc_clc_msg_decline *)clcm;
+ 		if (ntohs(dclc->hdr.length) != sizeof(*dclc))
+ 			return false;
+ 		trl = &dclc->trl;
+ 		break;
+ 	default:
+ 		return false;
+ 	}
+ 	if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ 		return false;
+ 	return true;
+ }
+ 
  /* Wait for data on the tcp-socket, analyze received data
   * Returns:
   * 0 if success and it was not a decline that we received.
@@@ -35,7 -83,7 +83,7 @@@ int smc_clc_wait_msg(struct smc_sock *s
  	struct smc_clc_msg_hdr *clcm = buf;
  	struct msghdr msg = {NULL, 0};
  	int reason_code = 0;
 -	struct kvec vec;
 +	struct kvec vec = {buf, buflen};
  	int len, datlen;
  	int krflags;
  
@@@ -43,15 -91,12 +91,15 @@@
  	 * so we don't consume any subsequent CLC message or payload data
  	 * in the TCP byte stream
  	 */
 -	vec.iov_base = buf;
 -	vec.iov_len = buflen;
 +	/*
 +	 * Caller must make sure that buflen is no less than
 +	 * sizeof(struct smc_clc_msg_hdr)
 +	 */
  	krflags = MSG_PEEK | MSG_WAITALL;
  	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
 -	len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
 -			     sizeof(struct smc_clc_msg_hdr), krflags);
 +	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
 +			sizeof(struct smc_clc_msg_hdr));
 +	len = sock_recvmsg(smc->clcsock, &msg, krflags);
  	if (signal_pending(current)) {
  		reason_code = -EINTR;
  		clc_sk->sk_err = EINTR;
@@@ -75,9 -120,7 +123,7 @@@
  	}
  	datlen = ntohs(clcm->length);
  	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
- 	    (datlen < sizeof(struct smc_clc_msg_decline)) ||
- 	    (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
- 	    memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
+ 	    (datlen > buflen) ||
  	    ((clcm->type != SMC_CLC_DECLINE) &&
  	     (clcm->type != expected_type))) {
  		smc->sk.sk_err = EPROTO;
@@@ -86,12 -129,13 +132,12 @@@
  	}
  
  	/* receive the complete CLC message */
 -	vec.iov_base = buf;
 -	vec.iov_len = buflen;
  	memset(&msg, 0, sizeof(struct msghdr));
 +	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
  	krflags = MSG_WAITALL;
  	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
 -	len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
 +	len = sock_recvmsg(smc->clcsock, &msg, krflags);
- 	if (len < datlen) {
+ 	if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
  		smc->sk.sk_err = EPROTO;
  		reason_code = -EPROTO;
  		goto out;
@@@ -135,7 -179,7 +181,7 @@@ int smc_clc_send_decline(struct smc_soc
  		smc->sk.sk_err = EPROTO;
  	if (len < 0)
  		smc->sk.sk_err = -len;
- 	return len;
+ 	return sock_error(&smc->sk);
  }
  
  /* send CLC PROPOSAL message across internal TCP socket */
@@@ -143,33 -187,43 +189,43 @@@ int smc_clc_send_proposal(struct smc_so
  			  struct smc_ib_device *smcibdev,
  			  u8 ibport)
  {
+ 	struct smc_clc_msg_proposal_prefix pclc_prfx;
  	struct smc_clc_msg_proposal pclc;
+ 	struct smc_clc_msg_trail trl;
  	int reason_code = 0;
+ 	struct kvec vec[3];
  	struct msghdr msg;
- 	struct kvec vec;
- 	int len, rc;
+ 	int len, plen, rc;
  
  	/* send SMC Proposal CLC message */
+ 	plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
  	memset(&pclc, 0, sizeof(pclc));
  	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  	pclc.hdr.type = SMC_CLC_PROPOSAL;
- 	pclc.hdr.length = htons(sizeof(pclc));
+ 	pclc.hdr.length = htons(plen);
  	pclc.hdr.version = SMC_CLC_V1;		/* SMC version */
  	memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
  	memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
  	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
+ 	pclc.iparea_offset = htons(0);
  
+ 	memset(&pclc_prfx, 0, sizeof(pclc_prfx));
  	/* determine subnet and mask from internal TCP socket */
- 	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
- 				  &pclc.prefix_len);
+ 	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+ 				  &pclc_prfx.prefix_len);
  	if (rc)
  		return SMC_CLC_DECL_CNFERR; /* configuration error */
- 	memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+ 	pclc_prfx.ipv6_prefixes_cnt = 0;
+ 	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  	memset(&msg, 0, sizeof(msg));
- 	vec.iov_base = &pclc;
- 	vec.iov_len = sizeof(pclc);
+ 	vec[0].iov_base = &pclc;
+ 	vec[0].iov_len = sizeof(pclc);
+ 	vec[1].iov_base = &pclc_prfx;
+ 	vec[1].iov_len = sizeof(pclc_prfx);
+ 	vec[2].iov_base = &trl;
+ 	vec[2].iov_len = sizeof(trl);
  	/* due to the few bytes needed for clc-handshake this cannot block */
- 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
+ 	len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
  	if (len < sizeof(pclc)) {
  		if (len >= 0) {
  			reason_code = -ENETUNREACH;
diff --combined net/socket.c
index 092baa464afc,bbd2e9ceb692..60d05479b2c1
--- a/net/socket.c
+++ b/net/socket.c
@@@ -118,7 -118,7 +118,7 @@@ static ssize_t sock_write_iter(struct k
  static int sock_mmap(struct file *file, struct vm_area_struct *vma);
  
  static int sock_close(struct inode *inode, struct file *file);
 -static unsigned int sock_poll(struct file *file,
 +static __poll_t sock_poll(struct file *file,
  			      struct poll_table_struct *wait);
  static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
  #ifdef CONFIG_COMPAT
@@@ -163,12 -163,6 +163,6 @@@ static DEFINE_SPINLOCK(net_family_lock)
  static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
  
  /*
-  *	Statistics counters of the socket lists
-  */
- 
- static DEFINE_PER_CPU(int, sockets_in_use);
- 
- /*
   * Support routines.
   * Move socket addresses back and forth across the kernel/user
   * divide and look after the messy bits.
@@@ -578,7 -572,6 +572,6 @@@ struct socket *sock_alloc(void
  	inode->i_gid = current_fsgid();
  	inode->i_op = &sockfs_inode_ops;
  
- 	this_cpu_add(sockets_in_use, 1);
  	return sock;
  }
  EXPORT_SYMBOL(sock_alloc);
@@@ -605,7 -598,6 +598,6 @@@ void sock_release(struct socket *sock
  	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
  		pr_err("%s: fasync list not empty!\n", __func__);
  
- 	this_cpu_sub(sockets_in_use, 1);
  	if (!sock->file) {
  		iput(SOCK_INODE(sock));
  		return;
@@@ -1095,9 -1087,9 +1087,9 @@@ out_release
  EXPORT_SYMBOL(sock_create_lite);
  
  /* No kernel lock held - perfect */
 -static unsigned int sock_poll(struct file *file, poll_table *wait)
 +static __poll_t sock_poll(struct file *file, poll_table *wait)
  {
 -	unsigned int busy_flag = 0;
 +	__poll_t busy_flag = 0;
  	struct socket *sock;
  
  	/*
@@@ -2622,17 -2614,8 +2614,8 @@@ core_initcall(sock_init);	/* early init
  #ifdef CONFIG_PROC_FS
  void socket_seq_show(struct seq_file *seq)
  {
- 	int cpu;
- 	int counter = 0;
- 
- 	for_each_possible_cpu(cpu)
- 	    counter += per_cpu(sockets_in_use, cpu);
- 
- 	/* It can be negative, by the way. 8) */
- 	if (counter < 0)
- 		counter = 0;
- 
- 	seq_printf(seq, "sockets: used %d\n", counter);
+ 	seq_printf(seq, "sockets: used %d\n",
+ 		   sock_inuse_get(seq->private));
  }
  #endif				/* CONFIG_PROC_FS */
  
diff --combined net/tipc/socket.c
index fcbd6489a8b5,0cdf5c2ad881..1a31445e1a31
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@@ -710,13 -710,13 +710,13 @@@ static int tipc_getname(struct socket *
   * imply that the operation will succeed, merely that it should be performed
   * and will not block.
   */
 -static unsigned int tipc_poll(struct file *file, struct socket *sock,
 +static __poll_t tipc_poll(struct file *file, struct socket *sock,
  			      poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	struct tipc_sock *tsk = tipc_sk(sk);
  	struct tipc_group *grp = tsk->group;
 -	u32 revents = 0;
 +	__poll_t revents = 0;
  
  	sock_poll_wait(file, sk_sleep(sk), wait);
  
@@@ -2640,9 -2640,7 +2640,7 @@@ void tipc_sk_reinit(struct net *net
  	rhashtable_walk_enter(&tn->sk_rht, &iter);
  
  	do {
- 		tsk = ERR_PTR(rhashtable_walk_start(&iter));
- 		if (IS_ERR(tsk))
- 			goto walk_stop;
+ 		rhashtable_walk_start(&iter);
  
  		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
  			spin_lock_bh(&tsk->sk.sk_lock.slock);
@@@ -2651,7 -2649,7 +2649,7 @@@
  			msg_set_orignode(msg, tn->own_addr);
  			spin_unlock_bh(&tsk->sk.sk_lock.slock);
  		}
- walk_stop:
+ 
  		rhashtable_walk_stop(&iter);
  	} while (tsk == ERR_PTR(-EAGAIN));
  }
diff --combined net/xfrm/xfrm_input.c
index 3f6f6f8c9fa5,ac277b97e0d7..26b10eb7a206
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@@ -8,29 -8,15 +8,29 @@@
   *
   */
  
 +#include <linux/bottom_half.h>
 +#include <linux/interrupt.h>
  #include <linux/slab.h>
  #include <linux/module.h>
  #include <linux/netdevice.h>
 +#include <linux/percpu.h>
  #include <net/dst.h>
  #include <net/ip.h>
  #include <net/xfrm.h>
  #include <net/ip_tunnels.h>
  #include <net/ip6_tunnel.h>
  
 +struct xfrm_trans_tasklet {
 +	struct tasklet_struct tasklet;
 +	struct sk_buff_head queue;
 +};
 +
 +struct xfrm_trans_cb {
 +	int (*finish)(struct net *net, struct sock *sk, struct sk_buff *skb);
 +};
 +
 +#define XFRM_TRANS_SKB_CB(__skb) ((struct xfrm_trans_cb *)&((__skb)->cb[0]))
 +
  static struct kmem_cache *secpath_cachep __read_mostly;
  
  static DEFINE_SPINLOCK(xfrm_input_afinfo_lock);
@@@ -39,8 -25,6 +39,8 @@@ static struct xfrm_input_afinfo const _
  static struct gro_cells gro_cells;
  static struct net_device xfrm_napi_dev;
  
 +static DEFINE_PER_CPU(struct xfrm_trans_tasklet, xfrm_trans_tasklet);
 +
  int xfrm_input_register_afinfo(const struct xfrm_input_afinfo *afinfo)
  {
  	int err = 0;
@@@ -223,7 -207,7 +223,7 @@@ int xfrm_input(struct sk_buff *skb, in
  	xfrm_address_t *daddr;
  	struct xfrm_mode *inner_mode;
  	u32 mark = skb->mark;
 -	unsigned int family;
 +	unsigned int family = AF_UNSPEC;
  	int decaps = 0;
  	int async = 0;
  	bool xfrm_gro = false;
@@@ -232,16 -216,6 +232,16 @@@
  
  	if (encap_type < 0) {
  		x = xfrm_input_state(skb);
 +
 +		if (unlikely(x->km.state != XFRM_STATE_VALID)) {
 +			if (x->km.state == XFRM_STATE_ACQ)
 +				XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
 +			else
 +				XFRM_INC_STATS(net,
 +					       LINUX_MIB_XFRMINSTATEINVALID);
 +			goto drop;
 +		}
 +
  		family = x->outer_mode->afinfo->family;
  
  		/* An encap_type of -1 indicates async resumption. */
@@@ -257,7 -231,6 +257,6 @@@
  
  		if (xo && (xo->flags & CRYPTO_DONE)) {
  			crypto_done = true;
- 			x = xfrm_input_state(skb);
  			family = XFRM_SPI_SKB_CB(skb)->family;
  
  			if (!(xo->status & CRYPTO_SUCCESS)) {
@@@ -493,41 -466,9 +492,41 @@@ int xfrm_input_resume(struct sk_buff *s
  }
  EXPORT_SYMBOL(xfrm_input_resume);
  
 +static void xfrm_trans_reinject(unsigned long data)
 +{
 +	struct xfrm_trans_tasklet *trans = (void *)data;
 +	struct sk_buff_head queue;
 +	struct sk_buff *skb;
 +
 +	__skb_queue_head_init(&queue);
 +	skb_queue_splice_init(&trans->queue, &queue);
 +
 +	while ((skb = __skb_dequeue(&queue)))
 +		XFRM_TRANS_SKB_CB(skb)->finish(dev_net(skb->dev), NULL, skb);
 +}
 +
 +int xfrm_trans_queue(struct sk_buff *skb,
 +		     int (*finish)(struct net *, struct sock *,
 +				   struct sk_buff *))
 +{
 +	struct xfrm_trans_tasklet *trans;
 +
 +	trans = this_cpu_ptr(&xfrm_trans_tasklet);
 +
 +	if (skb_queue_len(&trans->queue) >= netdev_max_backlog)
 +		return -ENOBUFS;
 +
 +	XFRM_TRANS_SKB_CB(skb)->finish = finish;
 +	skb_queue_tail(&trans->queue, skb);
 +	tasklet_schedule(&trans->tasklet);
 +	return 0;
 +}
 +EXPORT_SYMBOL(xfrm_trans_queue);
 +
  void __init xfrm_input_init(void)
  {
  	int err;
 +	int i;
  
  	init_dummy_netdev(&xfrm_napi_dev);
  	err = gro_cells_init(&gro_cells, &xfrm_napi_dev);
@@@ -538,13 -479,4 +537,13 @@@
  					   sizeof(struct sec_path),
  					   0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
  					   NULL);
 +
 +	for_each_possible_cpu(i) {
 +		struct xfrm_trans_tasklet *trans;
 +
 +		trans = &per_cpu(xfrm_trans_tasklet, i);
 +		__skb_queue_head_init(&trans->queue);
 +		tasklet_init(&trans->tasklet, xfrm_trans_reinject,
 +			     (unsigned long)trans);
 +	}
  }
diff --combined net/xfrm/xfrm_policy.c
index 70aa5cb0c659,e3a5aca9cdda..d8a8129b9232
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@@ -54,7 -54,7 +54,7 @@@ static struct xfrm_policy_afinfo const 
  static struct kmem_cache *xfrm_dst_cache __read_mostly;
  static __read_mostly seqcount_t xfrm_policy_hash_generation;
  
- static void xfrm_init_pmtu(struct dst_entry *dst);
+ static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
  static int stale_bundle(struct dst_entry *dst);
  static int xfrm_bundle_ok(struct xfrm_dst *xdst);
  static void xfrm_policy_queue_process(struct timer_list *t);
@@@ -1168,15 -1168,9 +1168,15 @@@ static struct xfrm_policy *xfrm_sk_poli
   again:
  	pol = rcu_dereference(sk->sk_policy[dir]);
  	if (pol != NULL) {
 -		bool match = xfrm_selector_match(&pol->selector, fl, family);
 +		bool match;
  		int err = 0;
  
 +		if (pol->family != family) {
 +			pol = NULL;
 +			goto out;
 +		}
 +
 +		match = xfrm_selector_match(&pol->selector, fl, family);
  		if (match) {
  			if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
  				pol = NULL;
@@@ -1257,7 -1251,7 +1257,7 @@@ EXPORT_SYMBOL(xfrm_policy_delete)
  
  int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
  {
- 	struct net *net = xp_net(pol);
+ 	struct net *net = sock_net(sk);
  	struct xfrm_policy *old_pol;
  
  #ifdef CONFIG_XFRM_SUB_POLICY
@@@ -1544,7 -1538,9 +1544,9 @@@ static inline int xfrm_fill_dst(struct 
   */
  
  static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
- 					    struct xfrm_state **xfrm, int nx,
+ 					    struct xfrm_state **xfrm,
+ 					    struct xfrm_dst **bundle,
+ 					    int nx,
  					    const struct flowi *fl,
  					    struct dst_entry *dst)
  {
@@@ -1552,8 -1548,8 +1554,8 @@@
  	unsigned long now = jiffies;
  	struct net_device *dev;
  	struct xfrm_mode *inner_mode;
- 	struct dst_entry *dst_prev = NULL;
- 	struct dst_entry *dst0 = NULL;
+ 	struct xfrm_dst *xdst_prev = NULL;
+ 	struct xfrm_dst *xdst0 = NULL;
  	int i = 0;
  	int err;
  	int header_len = 0;
@@@ -1579,13 -1575,14 +1581,14 @@@
  			goto put_states;
  		}
  
- 		if (!dst_prev)
- 			dst0 = dst1;
+ 		bundle[i] = xdst;
+ 		if (!xdst_prev)
+ 			xdst0 = xdst;
  		else
  			/* Ref count is taken during xfrm_alloc_dst()
  			 * No need to do dst_clone() on dst1
  			 */
- 			dst_prev->child = dst1;
+ 			xfrm_dst_set_child(xdst_prev, &xdst->u.dst);
  
  		if (xfrm[i]->sel.family == AF_UNSPEC) {
  			inner_mode = xfrm_ip2inner_mode(xfrm[i],
@@@ -1622,8 -1619,7 +1625,7 @@@
  		dst1->input = dst_discard;
  		dst1->output = inner_mode->afinfo->output;
  
- 		dst1->next = dst_prev;
- 		dst_prev = dst1;
+ 		xdst_prev = xdst;
  
  		header_len += xfrm[i]->props.header_len;
  		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
@@@ -1631,40 -1627,39 +1633,39 @@@
  		trailer_len += xfrm[i]->props.trailer_len;
  	}
  
- 	dst_prev->child = dst;
- 	dst0->path = dst;
+ 	xfrm_dst_set_child(xdst_prev, dst);
+ 	xdst0->path = dst;
  
  	err = -ENODEV;
  	dev = dst->dev;
  	if (!dev)
  		goto free_dst;
  
- 	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
- 	xfrm_init_pmtu(dst_prev);
- 
- 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
- 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
+ 	xfrm_init_path(xdst0, dst, nfheader_len);
+ 	xfrm_init_pmtu(bundle, nx);
  
- 		err = xfrm_fill_dst(xdst, dev, fl);
+ 	for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst;
+ 	     xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) {
+ 		err = xfrm_fill_dst(xdst_prev, dev, fl);
  		if (err)
  			goto free_dst;
  
- 		dst_prev->header_len = header_len;
- 		dst_prev->trailer_len = trailer_len;
- 		header_len -= xdst->u.dst.xfrm->props.header_len;
- 		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
+ 		xdst_prev->u.dst.header_len = header_len;
+ 		xdst_prev->u.dst.trailer_len = trailer_len;
+ 		header_len -= xdst_prev->u.dst.xfrm->props.header_len;
+ 		trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
  	}
  
  out:
- 	return dst0;
+ 	return &xdst0->u.dst;
  
  put_states:
  	for (; i < nx; i++)
  		xfrm_state_put(xfrm[i]);
  free_dst:
- 	if (dst0)
- 		dst_release_immediate(dst0);
- 	dst0 = ERR_PTR(err);
+ 	if (xdst0)
+ 		dst_release_immediate(&xdst0->u.dst);
+ 	xdst0 = ERR_PTR(err);
  	goto out;
  }
  
@@@ -1806,7 -1801,7 +1807,7 @@@ static bool xfrm_xdst_can_reuse(struct 
  	for (i = 0; i < num; i++) {
  		if (!dst || dst->xfrm != xfrm[i])
  			return false;
- 		dst = dst->child;
+ 		dst = xfrm_dst_child(dst);
  	}
  
  	return xfrm_bundle_ok(xdst);
@@@ -1819,6 -1814,7 +1820,7 @@@ xfrm_resolve_and_create_bundle(struct x
  {
  	struct net *net = xp_net(pols[0]);
  	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+ 	struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
  	struct xfrm_dst *xdst, *old;
  	struct dst_entry *dst;
  	int err;
@@@ -1839,7 -1835,6 +1841,7 @@@
  		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
  	    xfrm_xdst_can_reuse(xdst, xfrm, err)) {
  		dst_hold(&xdst->u.dst);
 +		xfrm_pols_put(pols, num_pols);
  		while (err > 0)
  			xfrm_state_put(xfrm[--err]);
  		return xdst;
@@@ -1847,7 -1842,7 +1849,7 @@@
  
  	old = xdst;
  
- 	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
+ 	dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
  	if (IS_ERR(dst)) {
  		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
  		return ERR_CAST(dst);
@@@ -1887,8 -1882,8 +1889,8 @@@ static void xfrm_policy_queue_process(s
  	xfrm_decode_session(skb, &fl, dst->ops->family);
  	spin_unlock(&pq->hold_queue.lock);
  
- 	dst_hold(dst->path);
- 	dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
+ 	dst_hold(xfrm_dst_path(dst));
+ 	dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
  	if (IS_ERR(dst))
  		goto purge_queue;
  
@@@ -1917,8 -1912,8 +1919,8 @@@
  		skb = __skb_dequeue(&list);
  
  		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
- 		dst_hold(skb_dst(skb)->path);
- 		dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
+ 		dst_hold(xfrm_dst_path(skb_dst(skb)));
+ 		dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
  		if (IS_ERR(dst)) {
  			kfree_skb(skb);
  			continue;
@@@ -2019,8 -2014,8 +2021,8 @@@ static struct xfrm_dst *xfrm_create_dum
  	dst1->output = xdst_queue_output;
  
  	dst_hold(dst);
- 	dst1->child = dst;
- 	dst1->path = dst;
+ 	xfrm_dst_set_child(xdst, dst);
+ 	xdst->path = dst;
  
  	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
  
@@@ -2583,7 -2578,7 +2585,7 @@@ static int stale_bundle(struct dst_entr
  
  void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
  {
- 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+ 	while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
  		dst->dev = dev_net(dev)->loopback_dev;
  		dev_hold(dst->dev);
  		dev_put(dev);
@@@ -2607,13 -2602,15 +2609,15 @@@ static struct dst_entry *xfrm_negative_
  	return dst;
  }
  
- static void xfrm_init_pmtu(struct dst_entry *dst)
+ static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
  {
- 	do {
- 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ 	while (nr--) {
+ 		struct xfrm_dst *xdst = bundle[nr];
  		u32 pmtu, route_mtu_cached;
+ 		struct dst_entry *dst;
  
- 		pmtu = dst_mtu(dst->child);
+ 		dst = &xdst->u.dst;
+ 		pmtu = dst_mtu(xfrm_dst_child(dst));
  		xdst->child_mtu_cached = pmtu;
  
  		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
@@@ -2625,7 -2622,7 +2629,7 @@@
  			pmtu = route_mtu_cached;
  
  		dst_metric_set(dst, RTAX_MTU, pmtu);
- 	} while ((dst = dst->next));
+ 	}
  }
  
  /* Check that the bundle accepts the flow and its components are
@@@ -2634,19 -2631,20 +2638,20 @@@
  
  static int xfrm_bundle_ok(struct xfrm_dst *first)
  {
+ 	struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
  	struct dst_entry *dst = &first->u.dst;
- 	struct xfrm_dst *last;
+ 	struct xfrm_dst *xdst;
+ 	int start_from, nr;
  	u32 mtu;
  
- 	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
+ 	if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) ||
  	    (dst->dev && !netif_running(dst->dev)))
  		return 0;
  
  	if (dst->flags & DST_XFRM_QUEUE)
  		return 1;
  
- 	last = NULL;
- 
+ 	start_from = nr = 0;
  	do {
  		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
  
@@@ -2658,9 -2656,11 +2663,11 @@@
  		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
  			return 0;
  
- 		mtu = dst_mtu(dst->child);
+ 		bundle[nr++] = xdst;
+ 
+ 		mtu = dst_mtu(xfrm_dst_child(dst));
  		if (xdst->child_mtu_cached != mtu) {
- 			last = xdst;
+ 			start_from = nr;
  			xdst->child_mtu_cached = mtu;
  		}
  
@@@ -2668,30 -2668,30 +2675,30 @@@
  			return 0;
  		mtu = dst_mtu(xdst->route);
  		if (xdst->route_mtu_cached != mtu) {
- 			last = xdst;
+ 			start_from = nr;
  			xdst->route_mtu_cached = mtu;
  		}
  
- 		dst = dst->child;
+ 		dst = xfrm_dst_child(dst);
  	} while (dst->xfrm);
  
- 	if (likely(!last))
+ 	if (likely(!start_from))
  		return 1;
  
- 	mtu = last->child_mtu_cached;
- 	for (;;) {
- 		dst = &last->u.dst;
+ 	xdst = bundle[start_from - 1];
+ 	mtu = xdst->child_mtu_cached;
+ 	while (start_from--) {
+ 		dst = &xdst->u.dst;
  
  		mtu = xfrm_state_mtu(dst->xfrm, mtu);
- 		if (mtu > last->route_mtu_cached)
- 			mtu = last->route_mtu_cached;
+ 		if (mtu > xdst->route_mtu_cached)
+ 			mtu = xdst->route_mtu_cached;
  		dst_metric_set(dst, RTAX_MTU, mtu);
- 
- 		if (last == first)
+ 		if (!start_from)
  			break;
  
- 		last = (struct xfrm_dst *)last->u.dst.next;
- 		last->child_mtu_cached = mtu;
+ 		xdst = bundle[start_from - 1];
+ 		xdst->child_mtu_cached = mtu;
  	}
  
  	return 1;
@@@ -2699,22 -2699,20 +2706,20 @@@
  
  static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
  {
- 	return dst_metric_advmss(dst->path);
+ 	return dst_metric_advmss(xfrm_dst_path(dst));
  }
  
  static unsigned int xfrm_mtu(const struct dst_entry *dst)
  {
  	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
  
- 	return mtu ? : dst_mtu(dst->path);
+ 	return mtu ? : dst_mtu(xfrm_dst_path(dst));
  }
  
  static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
  					const void *daddr)
  {
- 	const struct dst_entry *path = dst->path;
- 
- 	for (; dst != path; dst = dst->child) {
+ 	while (dst->xfrm) {
  		const struct xfrm_state *xfrm = dst->xfrm;
  
  		if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
@@@ -2723,6 -2721,8 +2728,8 @@@
  			daddr = xfrm->coaddr;
  		else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
  			daddr = &xfrm->id.daddr;
+ 
+ 		dst = xfrm_dst_child(dst);
  	}
  	return daddr;
  }
@@@ -2731,7 -2731,7 +2738,7 @@@ static struct neighbour *xfrm_neigh_loo
  					   struct sk_buff *skb,
  					   const void *daddr)
  {
- 	const struct dst_entry *path = dst->path;
+ 	const struct dst_entry *path = xfrm_dst_path(dst);
  
  	if (!skb)
  		daddr = xfrm_get_dst_nexthop(dst, daddr);
@@@ -2740,7 -2740,7 +2747,7 @@@
  
  static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
  {
- 	const struct dst_entry *path = dst->path;
+ 	const struct dst_entry *path = xfrm_dst_path(dst);
  
  	daddr = xfrm_get_dst_nexthop(dst, daddr);
  	path->ops->confirm_neigh(path, daddr);
diff --combined net/xfrm/xfrm_state.c
index 500b3391f474,1b7856be3eeb..cc4c519cad76
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@@ -1343,7 -1343,6 +1343,7 @@@ static struct xfrm_state *xfrm_state_cl
  
  	if (orig->aead) {
  		x->aead = xfrm_algo_aead_clone(orig->aead);
 +		x->geniv = orig->geniv;
  		if (!x->aead)
  			goto error;
  	}
@@@ -2049,6 -2048,13 +2049,13 @@@ int xfrm_user_policy(struct sock *sk, i
  	struct xfrm_mgr *km;
  	struct xfrm_policy *pol = NULL;
  
+ 	if (!optval && !optlen) {
+ 		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
+ 		xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
+ 		__sk_dst_reset(sk);
+ 		return 0;
+ 	}
+ 
  	if (optlen <= 0 || optlen > PAGE_SIZE)
  		return -EMSGSIZE;
  
diff --combined tools/testing/selftests/bpf/Makefile
index 05fc4e2e7b3a,a1fcb0c31d02..f1fdb36269f2
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@@ -11,16 -11,18 +11,18 @@@ ifneq ($(wildcard $(GENHDR)),
  endif
  
  CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
 -LDLIBS += -lcap -lelf
 +LDLIBS += -lcap -lelf -lrt
  
  TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
  	test_align test_verifier_log test_dev_cgroup
  
  TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
  	test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o     \
- 	sockmap_verdict_prog.o dev_cgroup.o
+ 	sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
+ 	test_l4lb_noinline.o test_xdp_noinline.o
  
- TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh
+ TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \
+ 	test_offload.py
  
  include ../lib.mk
  
@@@ -49,8 -51,13 +51,13 @@@ els
    CPU ?= generic
  endif
  
+ CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
+ 	      -Wno-compare-distinct-pointer-types
+ 
+ $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
+ $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+ 
  %.o: %.c
- 	$(CLANG) -I. -I./include/uapi -I../../../include/uapi \
- 		 -Wno-compare-distinct-pointer-types          \
+ 	$(CLANG) $(CLANG_FLAGS) \
  		 -O2 -target bpf -emit-llvm -c $< -o - |      \
  	$(LLC) -march=bpf -mcpu=$(CPU) -filetype=obj -o $@
diff --combined tools/testing/selftests/bpf/test_progs.c
index 6761be18a91f,6472ca98690e..09087ab12293
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@@ -21,8 -21,10 +21,10 @@@ typedef __u16 __sum16
  #include <linux/ipv6.h>
  #include <linux/tcp.h>
  #include <linux/filter.h>
+ #include <linux/perf_event.h>
  #include <linux/unistd.h>
  
+ #include <sys/ioctl.h>
  #include <sys/wait.h>
  #include <sys/resource.h>
  #include <sys/types.h>
@@@ -167,10 -169,9 +169,9 @@@ out
  #define NUM_ITER 100000
  #define VIP_NUM 5
  
- static void test_l4lb(void)
+ static void test_l4lb(const char *file)
  {
  	unsigned int nr_cpus = bpf_num_possible_cpus();
- 	const char *file = "./test_l4lb.o";
  	struct vip key = {.protocol = 6};
  	struct vip_meta {
  		__u32 flags;
@@@ -247,6 -248,95 +248,95 @@@ out
  	bpf_object__close(obj);
  }
  
+ static void test_l4lb_all(void)
+ {
+ 	const char *file1 = "./test_l4lb.o";
+ 	const char *file2 = "./test_l4lb_noinline.o";
+ 
+ 	test_l4lb(file1);
+ 	test_l4lb(file2);
+ }
+ 
+ static void test_xdp_noinline(void)
+ {
+ 	const char *file = "./test_xdp_noinline.o";
+ 	unsigned int nr_cpus = bpf_num_possible_cpus();
+ 	struct vip key = {.protocol = 6};
+ 	struct vip_meta {
+ 		__u32 flags;
+ 		__u32 vip_num;
+ 	} value = {.vip_num = VIP_NUM};
+ 	__u32 stats_key = VIP_NUM;
+ 	struct vip_stats {
+ 		__u64 bytes;
+ 		__u64 pkts;
+ 	} stats[nr_cpus];
+ 	struct real_definition {
+ 		union {
+ 			__be32 dst;
+ 			__be32 dstv6[4];
+ 		};
+ 		__u8 flags;
+ 	} real_def = {.dst = MAGIC_VAL};
+ 	__u32 ch_key = 11, real_num = 3;
+ 	__u32 duration, retval, size;
+ 	int err, i, prog_fd, map_fd;
+ 	__u64 bytes = 0, pkts = 0;
+ 	struct bpf_object *obj;
+ 	char buf[128];
+ 	u32 *magic = (u32 *)buf;
+ 
+ 	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ 	if (err) {
+ 		error_cnt++;
+ 		return;
+ 	}
+ 
+ 	map_fd = bpf_find_map(__func__, obj, "vip_map");
+ 	if (map_fd < 0)
+ 		goto out;
+ 	bpf_map_update_elem(map_fd, &key, &value, 0);
+ 
+ 	map_fd = bpf_find_map(__func__, obj, "ch_rings");
+ 	if (map_fd < 0)
+ 		goto out;
+ 	bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+ 
+ 	map_fd = bpf_find_map(__func__, obj, "reals");
+ 	if (map_fd < 0)
+ 		goto out;
+ 	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+ 
+ 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ 				buf, &size, &retval, &duration);
+ 	CHECK(err || errno || retval != 1 || size != 54 ||
+ 	      *magic != MAGIC_VAL, "ipv4",
+ 	      "err %d errno %d retval %d size %d magic %x\n",
+ 	      err, errno, retval, size, *magic);
+ 
+ 	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ 				buf, &size, &retval, &duration);
+ 	CHECK(err || errno || retval != 1 || size != 74 ||
+ 	      *magic != MAGIC_VAL, "ipv6",
+ 	      "err %d errno %d retval %d size %d magic %x\n",
+ 	      err, errno, retval, size, *magic);
+ 
+ 	map_fd = bpf_find_map(__func__, obj, "stats");
+ 	if (map_fd < 0)
+ 		goto out;
+ 	bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ 	for (i = 0; i < nr_cpus; i++) {
+ 		bytes += stats[i].bytes;
+ 		pkts += stats[i].pkts;
+ 	}
+ 	if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+ 		error_cnt++;
+ 		printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+ 	}
+ out:
+ 	bpf_object__close(obj);
+ }
+ 
  static void test_tcp_estats(void)
  {
  	const char *file = "./test_tcp_estats.o";
@@@ -351,7 -441,7 +441,7 @@@ static void test_bpf_obj_id(void
  			  info_len != sizeof(struct bpf_map_info) ||
  			  strcmp((char *)map_infos[i].name, expected_map_name),
  			  "get-map-info(fd)",
 -			  "err %d errno %d type %d(%d) info_len %u(%lu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
 +			  "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
  			  err, errno,
  			  map_infos[i].type, BPF_MAP_TYPE_ARRAY,
  			  info_len, sizeof(struct bpf_map_info),
@@@ -395,7 -485,7 +485,7 @@@
  			  *(int *)prog_infos[i].map_ids != map_infos[i].id ||
  			  strcmp((char *)prog_infos[i].name, expected_prog_name),
  			  "get-prog-info(fd)",
 -			  "err %d errno %d i %d type %d(%d) info_len %u(%lu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
 +			  "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
  			  err, errno, i,
  			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
  			  info_len, sizeof(struct bpf_prog_info),
@@@ -463,7 -553,7 +553,7 @@@
  		      memcmp(&prog_info, &prog_infos[i], info_len) ||
  		      *(int *)prog_info.map_ids != saved_map_id,
  		      "get-prog-info(next_id->fd)",
 -		      "err %d errno %d info_len %u(%lu) memcmp %d map_id %u(%u)\n",
 +		      "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
  		      err, errno, info_len, sizeof(struct bpf_prog_info),
  		      memcmp(&prog_info, &prog_infos[i], info_len),
  		      *(int *)prog_info.map_ids, saved_map_id);
@@@ -509,7 -599,7 +599,7 @@@
  		      memcmp(&map_info, &map_infos[i], info_len) ||
  		      array_value != array_magic_value,
  		      "check get-map-info(next_id->fd)",
 -		      "err %d errno %d info_len %u(%lu) memcmp %d array_value %llu(%llu)\n",
 +		      "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
  		      err, errno, info_len, sizeof(struct bpf_map_info),
  		      memcmp(&map_info, &map_infos[i], info_len),
  		      array_value, array_magic_value);
@@@ -617,6 -707,136 +707,136 @@@ static void test_obj_name(void
  	}
  }
  
+ static void test_tp_attach_query(void)
+ {
+ 	const int num_progs = 3;
+ 	int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
+ 	__u32 duration = 0, info_len, saved_prog_ids[num_progs];
+ 	const char *file = "./test_tracepoint.o";
+ 	struct perf_event_query_bpf *query;
+ 	struct perf_event_attr attr = {};
+ 	struct bpf_object *obj[num_progs];
+ 	struct bpf_prog_info prog_info;
+ 	char buf[256];
+ 
+ 	snprintf(buf, sizeof(buf),
+ 		 "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ 	efd = open(buf, O_RDONLY, 0);
+ 	if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ 		return;
+ 	bytes = read(efd, buf, sizeof(buf));
+ 	close(efd);
+ 	if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+ 		  "read", "bytes %d errno %d\n", bytes, errno))
+ 		return;
+ 
+ 	attr.config = strtol(buf, NULL, 0);
+ 	attr.type = PERF_TYPE_TRACEPOINT;
+ 	attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ 	attr.sample_period = 1;
+ 	attr.wakeup_events = 1;
+ 
+ 	query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
+ 	for (i = 0; i < num_progs; i++) {
+ 		err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+ 				    &prog_fd[i]);
+ 		if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ 			goto cleanup1;
+ 
+ 		bzero(&prog_info, sizeof(prog_info));
+ 		prog_info.jited_prog_len = 0;
+ 		prog_info.xlated_prog_len = 0;
+ 		prog_info.nr_map_ids = 0;
+ 		info_len = sizeof(prog_info);
+ 		err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
+ 		if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+ 			  err, errno))
+ 			goto cleanup1;
+ 		saved_prog_ids[i] = prog_info.id;
+ 
+ 		pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 				    0 /* cpu 0 */, -1 /* group id */,
+ 				    0 /* flags */);
+ 		if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n",
+ 			  pmu_fd[i], errno))
+ 			goto cleanup2;
+ 		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+ 		if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ 			  err, errno))
+ 			goto cleanup3;
+ 
+ 		if (i == 0) {
+ 			/* check NULL prog array query */
+ 			query->ids_len = num_progs;
+ 			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ 			if (CHECK(err || query->prog_cnt != 0,
+ 				  "perf_event_ioc_query_bpf",
+ 				  "err %d errno %d query->prog_cnt %u\n",
+ 				  err, errno, query->prog_cnt))
+ 				goto cleanup3;
+ 		}
+ 
+ 		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]);
+ 		if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ 			  err, errno))
+ 			goto cleanup3;
+ 
+ 		if (i == 1) {
+ 			/* try to get # of programs only */
+ 			query->ids_len = 0;
+ 			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ 			if (CHECK(err || query->prog_cnt != 2,
+ 				  "perf_event_ioc_query_bpf",
+ 				  "err %d errno %d query->prog_cnt %u\n",
+ 				  err, errno, query->prog_cnt))
+ 				goto cleanup3;
+ 
+ 			/* try a few negative tests */
+ 			/* invalid query pointer */
+ 			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF,
+ 				    (struct perf_event_query_bpf *)0x1);
+ 			if (CHECK(!err || errno != EFAULT,
+ 				  "perf_event_ioc_query_bpf",
+ 				  "err %d errno %d\n", err, errno))
+ 				goto cleanup3;
+ 
+ 			/* no enough space */
+ 			query->ids_len = 1;
+ 			err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ 			if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2,
+ 				  "perf_event_ioc_query_bpf",
+ 				  "err %d errno %d query->prog_cnt %u\n",
+ 				  err, errno, query->prog_cnt))
+ 				goto cleanup3;
+ 		}
+ 
+ 		query->ids_len = num_progs;
+ 		err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ 		if (CHECK(err || query->prog_cnt != (i + 1),
+ 			  "perf_event_ioc_query_bpf",
+ 			  "err %d errno %d query->prog_cnt %u\n",
+ 			  err, errno, query->prog_cnt))
+ 			goto cleanup3;
+ 		for (j = 0; j < i + 1; j++)
+ 			if (CHECK(saved_prog_ids[j] != query->ids[j],
+ 				  "perf_event_ioc_query_bpf",
+ 				  "#%d saved_prog_id %x query prog_id %x\n",
+ 				  j, saved_prog_ids[j], query->ids[j]))
+ 				goto cleanup3;
+ 	}
+ 
+ 	i = num_progs - 1;
+ 	for (; i >= 0; i--) {
+  cleanup3:
+ 		ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
+  cleanup2:
+ 		close(pmu_fd[i]);
+  cleanup1:
+ 		bpf_object__close(obj[i]);
+ 	}
+ 	free(query);
+ }
+ 
  int main(void)
  {
  	struct rlimit rinf = { RLIM_INFINITY, RLIM_INFINITY };
@@@ -625,11 -845,13 +845,13 @@@
  
  	test_pkt_access();
  	test_xdp();
- 	test_l4lb();
+ 	test_l4lb_all();
+ 	test_xdp_noinline();
  	test_tcp_estats();
  	test_bpf_obj_id();
  	test_pkt_md_access();
  	test_obj_name();
+ 	test_tp_attach_query();
  
  	printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
  	return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --combined tools/testing/selftests/bpf/test_verifier.c
index b51017404c62,3bacff0d6f91..d38334abb990
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@@ -2,6 -2,7 +2,7 @@@
   * Testsuite for eBPF verifier
   *
   * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+  * Copyright (c) 2017 Facebook
   *
   * This program is free software; you can redistribute it and/or
   * modify it under the terms of version 2 of the GNU General Public
@@@ -277,7 -278,7 +278,7 @@@ static struct bpf_test tests[] = 
  		.insns = {
  			BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
  		},
- 		.errstr = "jump out of range",
+ 		.errstr = "not an exit",
  		.result = REJECT,
  	},
  	{
@@@ -422,7 -423,9 +423,7 @@@
  			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr_unpriv = "R1 subtraction from stack pointer",
 -		.result_unpriv = REJECT,
 -		.errstr = "R1 invalid mem access",
 +		.errstr = "R1 subtraction from stack pointer",
  		.result = REJECT,
  	},
  	{
@@@ -604,6 -607,7 +605,6 @@@
  		},
  		.errstr = "misaligned stack access",
  		.result = REJECT,
 -		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"invalid map_fd for function call",
@@@ -1794,6 -1798,7 +1795,6 @@@
  		},
  		.result = REJECT,
  		.errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
 -		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"PTR_TO_STACK store/load - bad alignment on reg",
@@@ -1806,6 -1811,7 +1807,6 @@@
  		},
  		.result = REJECT,
  		.errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
 -		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"PTR_TO_STACK store/load - out of bounds low",
@@@ -1857,8 -1863,9 +1858,8 @@@
  			BPF_MOV64_IMM(BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 -		.result = ACCEPT,
 -		.result_unpriv = REJECT,
 -		.errstr_unpriv = "R1 pointer += pointer",
 +		.result = REJECT,
 +		.errstr = "R1 pointer += pointer",
  	},
  	{
  		"unpriv: neg pointer",
@@@ -2586,8 -2593,7 +2587,8 @@@
  			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
 -			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
 +			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
 +				    offsetof(struct __sk_buff, len)),
  			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
  			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
@@@ -2894,7 -2900,7 +2895,7 @@@
  			BPF_MOV64_IMM(BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "invalid access to packet",
 +		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
@@@ -3880,7 -3886,9 +3881,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3, 11 },
 -		.errstr_unpriv = "R0 pointer += pointer",
 -		.errstr = "R0 invalid mem access 'inv'",
 -		.result_unpriv = REJECT,
 +		.errstr = "R0 pointer += pointer",
  		.result = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
  	},
@@@ -3921,7 -3929,7 +3922,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 4 },
 -		.errstr = "R4 invalid mem access",
 +		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS
  	},
@@@ -3942,7 -3950,7 +3943,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 4 },
 -		.errstr = "R4 invalid mem access",
 +		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS
  	},
@@@ -3963,7 -3971,7 +3964,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 4 },
 -		.errstr = "R4 invalid mem access",
 +		.errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS
  	},
@@@ -5188,8 -5196,10 +5189,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 bitwise operator &= on pointer",
 -		.errstr = "invalid mem access 'inv'",
 +		.errstr = "R0 bitwise operator &= on pointer",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"map element value illegal alu op, 2",
@@@ -5205,8 -5215,10 +5206,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
 -		.errstr = "invalid mem access 'inv'",
 +		.errstr = "R0 32-bit pointer arithmetic prohibited",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"map element value illegal alu op, 3",
@@@ -5222,8 -5234,10 +5223,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic with /= operator",
 -		.errstr = "invalid mem access 'inv'",
 +		.errstr = "R0 pointer arithmetic with /= operator",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"map element value illegal alu op, 4",
@@@ -5635,7 -5649,7 +5636,7 @@@
  		"helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_1, 0),
- 			BPF_MOV64_IMM(BPF_REG_2, 0),
+ 			BPF_MOV64_IMM(BPF_REG_2, 1),
  			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
  			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
  			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
@@@ -5870,7 -5884,7 +5871,7 @@@
  			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
  			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
  			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- 			BPF_MOV64_IMM(BPF_REG_2, 0),
+ 			BPF_MOV64_IMM(BPF_REG_2, 1),
  			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
  			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
  			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
@@@ -6006,7 -6020,8 +6007,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map_in_map = { 3 },
 -		.errstr = "R1 type=inv expected=map_ptr",
 -		.errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
 +		.errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
  		.result = REJECT,
  	},
  	{
@@@ -6103,30 -6118,6 +6104,30 @@@
  		.result = ACCEPT,
  	},
  	{
 +		"ld_abs: tests on r6 and skb data reload helper",
 +		.insns = {
 +			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
 +			BPF_LD_ABS(BPF_B, 0),
 +			BPF_LD_ABS(BPF_H, 0),
 +			BPF_LD_ABS(BPF_W, 0),
 +			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
 +			BPF_MOV64_IMM(BPF_REG_6, 0),
 +			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
 +			BPF_MOV64_IMM(BPF_REG_2, 1),
 +			BPF_MOV64_IMM(BPF_REG_3, 2),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_skb_vlan_push),
 +			BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
 +			BPF_LD_ABS(BPF_B, 0),
 +			BPF_LD_ABS(BPF_H, 0),
 +			BPF_LD_ABS(BPF_W, 0),
 +			BPF_MOV64_IMM(BPF_REG_0, 42),
 +			BPF_EXIT_INSN(),
 +		},
 +		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 +		.result = ACCEPT,
 +	},
 +	{
  		"ld_ind: check calling conv, r1",
  		.insns = {
  			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@@ -6310,7 -6301,7 +6311,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6334,7 -6325,7 +6335,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6360,7 -6351,7 +6361,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R8 invalid mem access 'inv'",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6385,7 -6376,7 +6386,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R8 invalid mem access 'inv'",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6433,7 -6424,7 +6434,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6504,7 -6495,7 +6505,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6555,7 -6546,7 +6556,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6582,7 -6573,7 +6583,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6608,7 -6599,7 +6609,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6637,7 -6628,7 +6638,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6667,7 -6658,7 +6668,7 @@@
  			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
  		},
  		.fixup_map1 = { 4 },
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  	},
  	{
@@@ -6695,7 -6686,8 +6696,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer comparison prohibited",
 -		.errstr = "R0 min value is negative",
 +		.errstr = "unbounded min value",
  		.result = REJECT,
  		.result_unpriv = REJECT,
  	},
@@@ -6751,462 -6743,6 +6752,462 @@@
  		.result = REJECT,
  	},
  	{
 +		"bounds check based on zero-extended MOV",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
 +			/* r2 = 0x0000'0000'ffff'ffff */
 +			BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
 +			/* r2 = 0 */
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
 +			/* no-op */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
 +			/* access at offset 0 */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.result = ACCEPT
 +	},
 +	{
 +		"bounds check based on sign-extended MOV. test1",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
 +			/* r2 = 0xffff'ffff'ffff'ffff */
 +			BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
 +			/* r2 = 0xffff'ffff */
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
 +			/* r0 = <oob pointer> */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
 +			/* access to OOB pointer */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.errstr = "map_value pointer and 4294967295",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check based on sign-extended MOV. test2",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
 +			/* r2 = 0xffff'ffff'ffff'ffff */
 +			BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
 +			/* r2 = 0xfff'ffff */
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
 +			/* r0 = <oob pointer> */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
 +			/* access to OOB pointer */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.errstr = "R0 min value is outside of the array range",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check based on reg_off + var_off + insn_off. test1",
 +		.insns = {
 +			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 +				    offsetof(struct __sk_buff, mark)),
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
 +			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 4 },
 +		.errstr = "value_size=8 off=1073741825",
 +		.result = REJECT,
 +		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 +	},
 +	{
 +		"bounds check based on reg_off + var_off + insn_off. test2",
 +		.insns = {
 +			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 +				    offsetof(struct __sk_buff, mark)),
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
 +			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 4 },
 +		.errstr = "value 1073741823",
 +		.result = REJECT,
 +		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 +	},
 +	{
 +		"bounds check after truncation of non-boundary-crossing range",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
 +			/* r1 = [0x00, 0xff] */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 +			BPF_MOV64_IMM(BPF_REG_2, 1),
 +			/* r2 = 0x10'0000'0000 */
 +			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
 +			/* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
 +			/* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
 +			/* r1 = [0x00, 0xff] */
 +			BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
 +			/* r1 = 0 */
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
 +			/* no-op */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 +			/* access at offset 0 */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.result = ACCEPT
 +	},
 +	{
 +		"bounds check after truncation of boundary-crossing range (1)",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
 +			/* r1 = [0x00, 0xff] */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
 +			/* r1 = [0xffff'ff80, 0x1'0000'007f] */
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
 +			/* r1 = [0xffff'ff80, 0xffff'ffff] or
 +			 *      [0x0000'0000, 0x0000'007f]
 +			 */
 +			BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
 +			/* r1 = [0x00, 0xff] or
 +			 *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
 +			 */
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
 +			/* r1 = 0 or
 +			 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
 +			 */
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
 +			/* no-op or OOB pointer computation */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 +			/* potentially OOB access */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		/* not actually fully unbounded, but the bound is very high */
 +		.errstr = "R0 unbounded memory access",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check after truncation of boundary-crossing range (2)",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
 +			/* r1 = [0x00, 0xff] */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
 +			/* r1 = [0xffff'ff80, 0x1'0000'007f] */
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
 +			/* r1 = [0xffff'ff80, 0xffff'ffff] or
 +			 *      [0x0000'0000, 0x0000'007f]
 +			 * difference to previous test: truncation via MOV32
 +			 * instead of ALU32.
 +			 */
 +			BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
 +			/* r1 = [0x00, 0xff] or
 +			 *      [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
 +			 */
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
 +			/* r1 = 0 or
 +			 *      [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
 +			 */
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
 +			/* no-op or OOB pointer computation */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 +			/* potentially OOB access */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		/* not actually fully unbounded, but the bound is very high */
 +		.errstr = "R0 unbounded memory access",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check after wrapping 32-bit addition",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
 +			/* r1 = 0x7fff'ffff */
 +			BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
 +			/* r1 = 0xffff'fffe */
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
 +			/* r1 = 0 */
 +			BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
 +			/* no-op */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 +			/* access at offset 0 */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.result = ACCEPT
 +	},
 +	{
 +		"bounds check after shift with oversized count operand",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
 +			BPF_MOV64_IMM(BPF_REG_2, 32),
 +			BPF_MOV64_IMM(BPF_REG_1, 1),
 +			/* r1 = (u32)1 << (u32)32 = ? */
 +			BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
 +			/* r1 = [0x0000, 0xffff] */
 +			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
 +			/* computes unknown pointer, potentially OOB */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 +			/* potentially OOB access */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.errstr = "R0 max value is outside of the array range",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check after right shift of maybe-negative number",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
 +			/* r1 = [0x00, 0xff] */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 +			/* r1 = [-0x01, 0xfe] */
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
 +			/* r1 = 0 or 0xff'ffff'ffff'ffff */
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
 +			/* r1 = 0 or 0xffff'ffff'ffff */
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
 +			/* computes unknown pointer, potentially OOB */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 +			/* potentially OOB access */
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			/* exit */
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.errstr = "R0 unbounded memory access",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check map access with off+size signed 32bit overflow. test1",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 +			BPF_EXIT_INSN(),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
 +			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 +			BPF_JMP_A(0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.errstr = "map_value pointer and 2147483646",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check map access with off+size signed 32bit overflow. test2",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 +			BPF_EXIT_INSN(),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
 +			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 +			BPF_JMP_A(0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.errstr = "pointer offset 1073741822",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check map access with off+size signed 32bit overflow. test3",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 +			BPF_EXIT_INSN(),
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
 +			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
 +			BPF_JMP_A(0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.errstr = "pointer offset -1073741822",
 +		.result = REJECT
 +	},
 +	{
 +		"bounds check map access with off+size signed 32bit overflow. test4",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 +			BPF_EXIT_INSN(),
 +			BPF_MOV64_IMM(BPF_REG_1, 1000000),
 +			BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 +			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
 +			BPF_JMP_A(0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.errstr = "map_value pointer and 1000000000000",
 +		.result = REJECT
 +	},
 +	{
 +		"pointer/scalar confusion in state equality check (way 1)",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
 +			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 +			BPF_JMP_A(1),
 +			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
 +			BPF_JMP_A(0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.result = ACCEPT,
 +		.result_unpriv = REJECT,
 +		.errstr_unpriv = "R0 leaks addr as return value"
 +	},
 +	{
 +		"pointer/scalar confusion in state equality check (way 2)",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
 +			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
 +			BPF_JMP_A(1),
 +			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
 +		.result = ACCEPT,
 +		.result_unpriv = REJECT,
 +		.errstr_unpriv = "R0 leaks addr as return value"
 +	},
 +	{
  		"variable-offset ctx access",
  		.insns = {
  			/* Get an unknown value */
@@@ -7248,71 -6784,6 +7249,71 @@@
  		.prog_type = BPF_PROG_TYPE_LWT_IN,
  	},
  	{
 +		"indirect variable-offset stack access",
 +		.insns = {
 +			/* Fill the top 8 bytes of the stack */
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			/* Get an unknown value */
 +			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
 +			/* Make it small and 4-byte aligned */
 +			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
 +			/* add it to fp.  We now have either fp-4 or fp-8, but
 +			 * we don't know which
 +			 */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
 +			/* dereference it indirectly */
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 5 },
 +		.errstr = "variable stack read R2",
 +		.result = REJECT,
 +		.prog_type = BPF_PROG_TYPE_LWT_IN,
 +	},
 +	{
 +		"direct stack access with 32-bit wraparound. test1",
 +		.insns = {
 +			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
 +			BPF_MOV32_IMM(BPF_REG_0, 0),
 +			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 +			BPF_EXIT_INSN()
 +		},
 +		.errstr = "fp pointer and 2147483647",
 +		.result = REJECT
 +	},
 +	{
 +		"direct stack access with 32-bit wraparound. test2",
 +		.insns = {
 +			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
 +			BPF_MOV32_IMM(BPF_REG_0, 0),
 +			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 +			BPF_EXIT_INSN()
 +		},
 +		.errstr = "fp pointer and 1073741823",
 +		.result = REJECT
 +	},
 +	{
 +		"direct stack access with 32-bit wraparound. test3",
 +		.insns = {
 +			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
 +			BPF_MOV32_IMM(BPF_REG_0, 0),
 +			BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 +			BPF_EXIT_INSN()
 +		},
 +		.errstr = "fp pointer offset 1073741822",
 +		.result = REJECT
 +	},
 +	{
  		"liveness pruning and write screening",
  		.insns = {
  			/* Get an unknown value */
@@@ -7634,19 -7105,6 +7635,19 @@@
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
 +		"pkt_end - pkt_start is allowed",
 +		.insns = {
 +			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
 +				    offsetof(struct __sk_buff, data_end)),
 +			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
 +				    offsetof(struct __sk_buff, data)),
 +			BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
 +			BPF_EXIT_INSN(),
 +		},
 +		.result = ACCEPT,
 +		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 +	},
 +	{
  		"XDP pkt read, pkt_end mangling, bad access 1",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
@@@ -7661,7 -7119,7 +7662,7 @@@
  			BPF_MOV64_IMM(BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "R1 offset is outside of the packet",
 +		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_XDP,
  	},
@@@ -7680,7 -7138,7 +7681,7 @@@
  			BPF_MOV64_IMM(BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "R1 offset is outside of the packet",
 +		.errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_XDP,
  	},
@@@ -8640,6 -8098,1623 +8641,1623 @@@
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
  	},
+ 	{
+ 		"calls: basic sanity",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 2),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: not on unpriviledged",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 2),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ 		.result_unpriv = REJECT,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: overlapping caller/callee",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "last insn is not an exit or jmp",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: wrong recursive calls",
+ 		.insns = {
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "jump out of range",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: wrong src reg",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "BPF_CALL uses reserved fields",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: wrong off value",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 2),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "BPF_CALL uses reserved fields",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: jump back loop",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "back-edge from insn 0 to 0",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: conditional call",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, mark)),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 2),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "jump out of range",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: conditional call 2",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, mark)),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 2),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 3),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: conditional call 3",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, mark)),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ 			BPF_MOV64_IMM(BPF_REG_0, 3),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "back-edge from insn",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: conditional call 4",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, mark)),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, -5),
+ 			BPF_MOV64_IMM(BPF_REG_0, 3),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: conditional call 5",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, mark)),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ 			BPF_MOV64_IMM(BPF_REG_0, 3),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "back-edge from insn",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: conditional call 6",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+ 			BPF_EXIT_INSN(),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, mark)),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "back-edge from insn",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: using r0 returned by callee",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 2),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: using uninit r0 from callee",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "!read_ok",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: callee is using r1",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, len)),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: callee using args1",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.errstr_unpriv = "allowed for root only",
+ 		.result_unpriv = REJECT,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: callee using wrong args2",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "R2 !read_ok",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: callee using two args",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ 				    offsetof(struct __sk_buff, len)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6,
+ 				    offsetof(struct __sk_buff, len)),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ 			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.errstr_unpriv = "allowed for root only",
+ 		.result_unpriv = REJECT,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: callee changing pkt pointers",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ 				    offsetof(struct xdp_md, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ 				    offsetof(struct xdp_md, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			/* clear_all_pkt_pointers() has to walk all frames
+ 			 * to make sure that pkt pointers in the caller
+ 			 * are cleared when callee is calling a helper that
+ 			 * adjusts packet size
+ 			 */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ 			BPF_MOV32_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_2, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_xdp_adjust_head),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.result = REJECT,
+ 		.errstr = "R6 invalid mem access 'inv'",
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 	},
+ 	{
+ 		"calls: two calls with args",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ 			BPF_EXIT_INSN(),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, len)),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: calls with stack arith",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ 			BPF_MOV64_IMM(BPF_REG_0, 42),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: calls with misaligned stack access",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+ 			BPF_MOV64_IMM(BPF_REG_0, 42),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ 		.errstr = "misaligned stack access",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: calls control flow, jump test",
+ 		.insns = {
+ 			BPF_MOV64_IMM(BPF_REG_0, 42),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 43),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: calls control flow, jump test 2",
+ 		.insns = {
+ 			BPF_MOV64_IMM(BPF_REG_0, 42),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 43),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.errstr = "jump out of range from insn 1 to 4",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: two calls with bad jump",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ 			BPF_EXIT_INSN(),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, len)),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "jump out of range from insn 11 to 9",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: recursive call. test1",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "back-edge",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: recursive call. test2",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "back-edge",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: unreachable code",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "unreachable insn 6",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: invalid call",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "invalid destination",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: invalid call 2",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "invalid destination",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: jumping across function bodies. test1",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "jump out of range",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: jumping across function bodies. test2",
+ 		.insns = {
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "jump out of range",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: call without exit",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "not an exit",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: call into middle of ld_imm64",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_LD_IMM64(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "last insn",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: call into middle of other call",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "last insn",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: ld_abs with changing ctx data in callee",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_LD_ABS(BPF_B, 0),
+ 			BPF_LD_ABS(BPF_H, 0),
+ 			BPF_LD_ABS(BPF_W, 0),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ 			BPF_LD_ABS(BPF_B, 0),
+ 			BPF_LD_ABS(BPF_H, 0),
+ 			BPF_LD_ABS(BPF_W, 0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_IMM(BPF_REG_2, 1),
+ 			BPF_MOV64_IMM(BPF_REG_3, 2),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_skb_vlan_push),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: two calls with bad fallthrough",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_0),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, len)),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ 		.errstr = "not an exit",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: two calls with stack read",
+ 		.insns = {
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ 			BPF_EXIT_INSN(),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: two calls with stack write",
+ 		.insns = {
+ 			/* main prog */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7),
+ 			BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
+ 			/* write into stack frame of main prog */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			/* read from stack frame of main prog */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: spill into caller stack frame",
+ 		.insns = {
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.errstr = "cannot spill",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: write into caller stack frame",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: write into callee stack frame",
+ 		.insns = {
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.errstr = "cannot return stack pointer",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: two calls with stack write and void return",
+ 		.insns = {
+ 			/* main prog */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			/* write into stack frame of main prog */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ 			BPF_EXIT_INSN(), /* void return */
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: ambiguous return value",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ 			BPF_EXIT_INSN(),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.errstr_unpriv = "allowed for root only",
+ 		.result_unpriv = REJECT,
+ 		.errstr = "R0 !read_ok",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: two calls that return map_value",
+ 		.insns = {
+ 			/* main prog */
+ 			/* pass fp-16, fp-8 into a function */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ 
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			/* fetch secound map_value_ptr from the stack */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			/* call 3rd function twice */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			/* first time with fp-8 */
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ 			/* second time with fp-16 */
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			/* lookup from map */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			/* write map_value_ptr into stack frame of main prog */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(), /* return 0 */
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.fixup_map1 = { 23 },
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: two calls that return map_value with bool condition",
+ 		.insns = {
+ 			/* main prog */
+ 			/* pass fp-16, fp-8 into a function */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			/* call 3rd function twice */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			/* first time with fp-8 */
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ 			/* second time with fp-16 */
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ 			/* fetch secound map_value_ptr from the stack */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			/* lookup from map */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(), /* return 0 */
+ 			/* write map_value_ptr into stack frame of main prog */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(), /* return 1 */
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.fixup_map1 = { 23 },
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: two calls that return map_value with incorrect bool check",
+ 		.insns = {
+ 			/* main prog */
+ 			/* pass fp-16, fp-8 into a function */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			/* call 3rd function twice */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			/* first time with fp-8 */
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ 			/* second time with fp-16 */
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			/* fetch secound map_value_ptr from the stack */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			/* lookup from map */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(), /* return 0 */
+ 			/* write map_value_ptr into stack frame of main prog */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_0, 1),
+ 			BPF_EXIT_INSN(), /* return 1 */
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 		.fixup_map1 = { 23 },
+ 		.result = REJECT,
+ 		.errstr = "invalid read from stack off -16+0 size 8",
+ 	},
+ 	{
+ 		"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
+ 		.insns = {
+ 			/* main prog */
+ 			/* pass fp-16, fp-8 into a function */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			/* 1st lookup from map */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_8, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ 			/* write map_value_ptr into stack frame of main prog at fp-8 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_8, 1),
+ 
+ 			/* 2nd lookup from map */
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_9, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ 			/* write map_value_ptr into stack frame of main prog at fp-16 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_9, 1),
+ 
+ 			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ 			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),  /* 34 */
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			/* if arg2 == 1 do *arg1 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 
+ 			/* if arg4 == 1 do *arg3 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.fixup_map1 = { 12, 22 },
+ 		.result = REJECT,
+ 		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
+ 	},
+ 	{
+ 		"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2",
+ 		.insns = {
+ 			/* main prog */
+ 			/* pass fp-16, fp-8 into a function */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			/* 1st lookup from map */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_8, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ 			/* write map_value_ptr into stack frame of main prog at fp-8 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_8, 1),
+ 
+ 			/* 2nd lookup from map */
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_9, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ 			/* write map_value_ptr into stack frame of main prog at fp-16 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_9, 1),
+ 
+ 			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ 			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),  /* 34 */
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			/* if arg2 == 1 do *arg1 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 
+ 			/* if arg4 == 1 do *arg3 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.fixup_map1 = { 12, 22 },
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3",
+ 		.insns = {
+ 			/* main prog */
+ 			/* pass fp-16, fp-8 into a function */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			/* 1st lookup from map */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_8, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ 			/* write map_value_ptr into stack frame of main prog at fp-8 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_8, 1),
+ 
+ 			/* 2nd lookup from map */
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_9, 0),  // 26
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ 			/* write map_value_ptr into stack frame of main prog at fp-16 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ 			BPF_MOV64_IMM(BPF_REG_9, 1),
+ 
+ 			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ 			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, -30),
+ 
+ 			/* subprog 2 */
+ 			/* if arg2 == 1 do *arg1 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 
+ 			/* if arg4 == 1 do *arg3 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, -8),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.fixup_map1 = { 12, 22 },
+ 		.result = REJECT,
+ 		.errstr = "invalid access to map value, value_size=8 off=2 size=8",
+ 	},
+ 	{
+ 		"calls: two calls that receive map_value_ptr_or_null via arg. test1",
+ 		.insns = {
+ 			/* main prog */
+ 			/* pass fp-16, fp-8 into a function */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			/* 1st lookup from map */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_8, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ 			BPF_MOV64_IMM(BPF_REG_8, 1),
+ 
+ 			/* 2nd lookup from map */
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			/* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_9, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ 			BPF_MOV64_IMM(BPF_REG_9, 1),
+ 
+ 			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ 			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			/* if arg2 == 1 do *arg1 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 
+ 			/* if arg4 == 1 do *arg3 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.fixup_map1 = { 12, 22 },
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: two calls that receive map_value_ptr_or_null via arg. test2",
+ 		.insns = {
+ 			/* main prog */
+ 			/* pass fp-16, fp-8 into a function */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ 			BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ 			/* 1st lookup from map */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_8, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ 			BPF_MOV64_IMM(BPF_REG_8, 1),
+ 
+ 			/* 2nd lookup from map */
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			/* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ 			BPF_MOV64_IMM(BPF_REG_9, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ 			BPF_MOV64_IMM(BPF_REG_9, 1),
+ 
+ 			/* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ 			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ 			BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 2 */
+ 			/* if arg2 == 1 do *arg1 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 
+ 			/* if arg4 == 0 do *arg3 = 0 */
+ 			BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2),
+ 			/* fetch map_value_ptr from the stack of this function */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ 			/* write into map value */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.fixup_map1 = { 12, 22 },
+ 		.result = REJECT,
+ 		.errstr = "R0 invalid mem access 'inv'",
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			/* spill unchecked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ 			/* now the pkt range is verified, read pkt_ptr from stack */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.result = ACCEPT,
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack 2",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			/* Marking is still kept, but not in all cases safe. */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ 			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			/* spill unchecked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ 			/* now the pkt range is verified, read pkt_ptr from stack */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.errstr = "invalid access to packet",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack 3",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ 			/* Marking is still kept and safe here. */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ 			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			/* spill unchecked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_MOV64_IMM(BPF_REG_5, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ 			BPF_MOV64_IMM(BPF_REG_5, 1),
+ 			/* now the pkt range is verified, read pkt_ptr from stack */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack 4",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ 			/* Check marking propagated. */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ 			BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			/* spill unchecked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_MOV64_IMM(BPF_REG_5, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ 			BPF_MOV64_IMM(BPF_REG_5, 1),
+ 			/* don't read back pkt_ptr from stack here */
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack 5",
+ 		.insns = {
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			BPF_MOV64_IMM(BPF_REG_5, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ 			/* spill checked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_MOV64_IMM(BPF_REG_5, 1),
+ 			/* don't read back pkt_ptr from stack here */
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.errstr = "same insn cannot be used with different",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack 6",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			BPF_MOV64_IMM(BPF_REG_5, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ 			/* spill checked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_MOV64_IMM(BPF_REG_5, 1),
+ 			/* don't read back pkt_ptr from stack here */
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.errstr = "R4 invalid mem access",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack 7",
+ 		.insns = {
+ 			BPF_MOV64_IMM(BPF_REG_2, 0),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			BPF_MOV64_IMM(BPF_REG_5, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ 			/* spill checked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_MOV64_IMM(BPF_REG_5, 1),
+ 			/* don't read back pkt_ptr from stack here */
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.errstr = "R4 invalid mem access",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack 8",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			BPF_MOV64_IMM(BPF_REG_5, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ 			/* spill checked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_MOV64_IMM(BPF_REG_5, 1),
+ 			/* don't read back pkt_ptr from stack here */
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.result = ACCEPT,
+ 	},
+ 	{
+ 		"calls: pkt_ptr spill into caller stack 9",
+ 		.insns = {
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ 			BPF_EXIT_INSN(),
+ 			BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data)),
+ 			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ 				    offsetof(struct __sk_buff, data_end)),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ 			BPF_MOV64_IMM(BPF_REG_5, 0),
+ 			/* spill unchecked pkt_ptr into stack of caller */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ 			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ 			BPF_MOV64_IMM(BPF_REG_5, 1),
+ 			/* don't read back pkt_ptr from stack here */
+ 			/* write 4 bytes into packet */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ 		.errstr = "invalid access to packet",
+ 		.result = REJECT,
+ 	},
+ 	{
+ 		"calls: caller stack init to zero or map_value_or_null",
+ 		.insns = {
+ 			BPF_MOV64_IMM(BPF_REG_0, 0),
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ 			/* fetch map_value_or_null or const_zero from stack */
+ 			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ 			/* store into map_value */
+ 			BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+ 			BPF_EXIT_INSN(),
+ 
+ 			/* subprog 1 */
+ 			/* if (ctx == 0) return; */
+ 			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
+ 			/* else bpf_map_lookup() and *(fp - 8) = r0 */
+ 			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			/* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ 			BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.fixup_map1 = { 13 },
+ 		.result = ACCEPT,
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 	},
+ 	{
+ 		"calls: stack init to zero and pruning",
+ 		.insns = {
+ 			/* first make allocated_stack 16 byte */
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ 			/* now fork the execution such that the false branch
+ 			 * of JGT insn will be verified second and it skisp zero
+ 			 * init of fp-8 stack slot. If stack liveness marking
+ 			 * is missing live_read marks from call map_lookup
+ 			 * processing then pruning will incorrectly assume
+ 			 * that fp-8 stack slot was unused in the fall-through
+ 			 * branch and will accept the program incorrectly
+ 			 */
+ 			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+ 			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ 			BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ 			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ 			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ 			BPF_LD_MAP_FD(BPF_REG_1, 0),
+ 			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ 				     BPF_FUNC_map_lookup_elem),
+ 			BPF_EXIT_INSN(),
+ 		},
+ 		.fixup_map2 = { 6 },
+ 		.errstr = "invalid indirect read from stack off -8+0 size 8",
+ 		.result = REJECT,
+ 		.prog_type = BPF_PROG_TYPE_XDP,
+ 	},
  };
  
  static int probe_filter_length(const struct bpf_insn *fp)

-- 
LinuxNextTracking


More information about the linux-merge mailing list