[linux-next] LinuxNextTracking branch, master, updated. next-20180110

batman at open-mesh.org batman at open-mesh.org
Thu Jan 11 00:16:10 CET 2018


The following commit has been merged in the master branch:
commit 700b0d4b681f299c3243c0d8dae8dc49b1360d79
Merge: 30d75d434feb9d2ad5c15ec1171f17a53c6447b3 61ad64080e039dce99a7f8d89b729bbea995e2f7
Author: Stephen Rothwell <sfr at canb.auug.org.au>
Date:   Wed Jan 10 10:32:58 2018 +1100

    Merge remote-tracking branch 'net-next/master'

diff --combined MAINTAINERS
index 6809623840de,e81d91f7cd4e..f8b0299bfe08
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -321,7 -321,7 +321,7 @@@ F:	drivers/acpi/apei
  
  ACPI COMPONENT ARCHITECTURE (ACPICA)
  M:	Robert Moore <robert.moore at intel.com>
 -M:	Lv Zheng <lv.zheng at intel.com>
 +M:	Erik Schmauss <erik.schmauss at intel.com>
  M:	"Rafael J. Wysocki" <rafael.j.wysocki at intel.com>
  L:	linux-acpi at vger.kernel.org
  L:	devel at acpica.org
@@@ -1255,12 -1255,6 +1255,12 @@@ L:	linux-arm-kernel at lists.infradead.or
  S:	Supported
  F:	drivers/net/ethernet/cavium/thunder/
  
 +ARM/CIRRUS LOGIC BK3 MACHINE SUPPORT
 +M:	Lukasz Majewski <lukma at denx.de>
 +L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
 +S:	Maintained
 +F:	arch/arm/mach-ep93xx/ts72xx.c
 +
  ARM/CIRRUS LOGIC CLPS711X ARM ARCHITECTURE
  M:	Alexander Shiyan <shc_work at mail.ru>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
@@@ -1589,7 -1583,6 +1589,7 @@@ F:	arch/arm/boot/dts/kirkwood
  F:	arch/arm/configs/mvebu_*_defconfig
  F:	arch/arm/mach-mvebu/
  F:	arch/arm64/boot/dts/marvell/armada*
 +F:	drivers/cpufreq/armada-37xx-cpufreq.c
  F:	drivers/cpufreq/mvebu-cpufreq.c
  F:	drivers/irqchip/irq-armada-370-xp.c
  F:	drivers/irqchip/irq-mvebu-*
@@@ -1642,38 -1635,14 +1642,38 @@@ ARM/NEC MOBILEPRO 900/c MACHINE SUPPOR
  M:	Michael Petchkovsky <mkpetch at internode.on.net>
  S:	Maintained
  
 -ARM/NOMADIK ARCHITECTURE
 -M:	Alessandro Rubini <rubini at unipv.it>
 +ARM/NOMADIK/U300/Ux500 ARCHITECTURES
  M:	Linus Walleij <linus.walleij at linaro.org>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  F:	arch/arm/mach-nomadik/
 -F:	drivers/pinctrl/nomadik/
 +F:	arch/arm/mach-u300/
 +F:	arch/arm/mach-ux500/
 +F:	arch/arm/boot/dts/ste-*
 +F:	drivers/clk/clk-nomadik.c
 +F:	drivers/clk/clk-u300.c
 +F:	drivers/clocksource/clksrc-dbx500-prcmu.c
 +F:	drivers/clocksource/timer-u300.c
 +F:	drivers/dma/coh901318*
 +F:	drivers/dma/ste_dma40*
 +F:	drivers/hwspinlock/u8500_hsem.c
  F:	drivers/i2c/busses/i2c-nomadik.c
 +F:	drivers/i2c/busses/i2c-stu300.c
 +F:	drivers/mfd/ab3100*
 +F:	drivers/mfd/ab8500*
 +F:	drivers/mfd/abx500*
 +F:	drivers/mfd/dbx500*
 +F:	drivers/mfd/db8500*
 +F:	drivers/pinctrl/nomadik/
 +F:	drivers/pinctrl/pinctrl-coh901*
 +F:	drivers/pinctrl/pinctrl-u300.c
 +F:	drivers/rtc/rtc-ab3100.c
 +F:	drivers/rtc/rtc-ab8500.c
 +F:	drivers/rtc/rtc-coh901331.c
 +F:	drivers/rtc/rtc-pl031.c
 +F:	drivers/watchdog/coh901327_wdt.c
 +F:	Documentation/devicetree/bindings/arm/ste-*
 +F:	Documentation/devicetree/bindings/arm/ux500/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
  
  ARM/NUVOTON W90X900 ARM ARCHITECTURE
@@@ -1987,10 -1956,9 +1987,10 @@@ N:	stm3
  F:	drivers/clocksource/armv7m_systick.c
  
  ARM/TANGO ARCHITECTURE
 -M:	Marc Gonzalez <marc_gonzalez at sigmadesigns.com>
 +M:	Marc Gonzalez <marc.w.gonzalez at free.fr>
 +M:	Mans Rullgard <mans at mansr.com>
  L:	linux-arm-kernel at lists.infradead.org
 -S:	Maintained
 +S:	Odd Fixes
  N:	tango
  
  ARM/TECHNOLOGIC SYSTEMS TS7250 MACHINE SUPPORT
@@@ -2054,6 -2022,21 +2054,6 @@@ M:	Dmitry Eremin-Solenikov <dbaryshkov@
  M:	Dirk Opfer <dirk at opfer-online.de>
  S:	Maintained
  
 -ARM/U300 MACHINE SUPPORT
 -M:	Linus Walleij <linus.walleij at linaro.org>
 -L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
 -S:	Supported
 -F:	arch/arm/mach-u300/
 -F:	drivers/clocksource/timer-u300.c
 -F:	drivers/i2c/busses/i2c-stu300.c
 -F:	drivers/rtc/rtc-coh901331.c
 -F:	drivers/watchdog/coh901327_wdt.c
 -F:	drivers/dma/coh901318*
 -F:	drivers/mfd/ab3100*
 -F:	drivers/rtc/rtc-ab3100.c
 -F:	drivers/rtc/rtc-coh901331.c
 -T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 -
  ARM/UNIPHIER ARCHITECTURE
  M:	Masahiro Yamada <yamada.masahiro at socionext.com>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
@@@ -2075,6 -2058,24 +2075,6 @@@ F:	drivers/reset/reset-uniphier.
  F:	drivers/tty/serial/8250/8250_uniphier.c
  N:	uniphier
  
 -ARM/Ux500 ARM ARCHITECTURE
 -M:	Linus Walleij <linus.walleij at linaro.org>
 -L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
 -S:	Maintained
 -F:	arch/arm/mach-ux500/
 -F:	drivers/clocksource/clksrc-dbx500-prcmu.c
 -F:	drivers/dma/ste_dma40*
 -F:	drivers/hwspinlock/u8500_hsem.c
 -F:	drivers/mfd/abx500*
 -F:	drivers/mfd/ab8500*
 -F:	drivers/mfd/dbx500*
 -F:	drivers/mfd/db8500*
 -F:	drivers/pinctrl/nomadik/pinctrl-ab*
 -F:	drivers/pinctrl/nomadik/pinctrl-nomadik*
 -F:	drivers/rtc/rtc-ab8500.c
 -F:	drivers/rtc/rtc-pl031.c
 -T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
 -
  ARM/Ux500 CLOCK FRAMEWORK SUPPORT
  M:	Ulf Hansson <ulf.hansson at linaro.org>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
@@@ -2201,6 -2202,14 +2201,6 @@@ L:	linux-leds at vger.kernel.or
  S:	Maintained
  F:	drivers/leds/leds-as3645a.c
  
 -AS3645A LED FLASH CONTROLLER DRIVER
 -M:	Laurent Pinchart <laurent.pinchart at ideasonboard.com>
 -L:	linux-media at vger.kernel.org
 -T:	git git://linuxtv.org/media_tree.git
 -S:	Maintained
 -F:	drivers/media/i2c/as3645a.c
 -F:	include/media/i2c/as3645a.h
 -
  ASAHI KASEI AK8974 DRIVER
  M:	Linus Walleij <linus.walleij at linaro.org>
  L:	linux-iio at vger.kernel.org
@@@ -2253,9 -2262,7 +2253,9 @@@ F:	include/linux/async_tx.
  AT24 EEPROM DRIVER
  M:	Bartosz Golaszewski <brgl at bgdev.pl>
  L:	linux-i2c at vger.kernel.org
 +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git
  S:	Maintained
 +F:	Documentation/devicetree/bindings/eeprom/at24.txt
  F:	drivers/misc/eeprom/at24.c
  F:	include/linux/platform_data/at24.h
  
@@@ -2494,8 -2501,6 +2494,8 @@@ L:	linux-arm-kernel at lists.infradead.or
  S:	Maintained
  F:	Documentation/devicetree/bindings/arm/axentia.txt
  F:	arch/arm/boot/dts/at91-linea.dtsi
 +F:	arch/arm/boot/dts/at91-natte.dtsi
 +F:	arch/arm/boot/dts/at91-nattis-2-natte-2.dts
  F:	arch/arm/boot/dts/at91-tse850-3.dts
  
  AXENTIA ASOC DRIVERS
@@@ -2559,6 -2564,7 +2559,7 @@@ S:	Maintaine
  F:	Documentation/ABI/testing/sysfs-class-net-batman-adv
  F:	Documentation/ABI/testing/sysfs-class-net-mesh
  F:	Documentation/networking/batman-adv.rst
+ F:	include/uapi/linux/batadv_packet.h
  F:	include/uapi/linux/batman_adv.h
  F:	net/batman-adv/
  
@@@ -2682,7 -2688,6 +2683,6 @@@ F:	drivers/mtd/devices/block2mtd.
  
  BLUETOOTH DRIVERS
  M:	Marcel Holtmann <marcel at holtmann.org>
- M:	Gustavo Padovan <gustavo at padovan.org>
  M:	Johan Hedberg <johan.hedberg at gmail.com>
  L:	linux-bluetooth at vger.kernel.org
  W:	http://www.bluez.org/
@@@ -2693,7 -2698,6 +2693,6 @@@ F:	drivers/bluetooth
  
  BLUETOOTH SUBSYSTEM
  M:	Marcel Holtmann <marcel at holtmann.org>
- M:	Gustavo Padovan <gustavo at padovan.org>
  M:	Johan Hedberg <johan.hedberg at gmail.com>
  L:	linux-bluetooth at vger.kernel.org
  W:	http://www.bluez.org/
@@@ -2718,12 -2722,16 +2717,16 @@@ M:	Alexei Starovoitov <ast at kernel.org
  M:	Daniel Borkmann <daniel at iogearbox.net>
  L:	netdev at vger.kernel.org
  L:	linux-kernel at vger.kernel.org
+ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
+ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
  S:	Supported
  F:	arch/x86/net/bpf_jit*
  F:	Documentation/networking/filter.txt
  F:	Documentation/bpf/
  F:	include/linux/bpf*
  F:	include/linux/filter.h
+ F:	include/trace/events/bpf.h
+ F:	include/trace/events/xdp.h
  F:	include/uapi/linux/bpf*
  F:	include/uapi/linux/filter.h
  F:	kernel/bpf/
@@@ -2827,8 -2835,6 +2830,8 @@@ S:	Maintaine
  F:	arch/arm/mach-bcm/*brcmstb*
  F:	arch/arm/boot/dts/bcm7*.dts*
  F:	drivers/bus/brcmstb_gisb.c
 +F:	arch/arm/mm/cache-b15-rac.c
 +F:	arch/arm/include/asm/hardware/cache-b15-rac.h
  N:	brcmstb
  
  BROADCOM BMIPS CPUFREQ DRIVER
@@@ -7070,14 -7076,6 +7073,14 @@@ R:	Dan Williams <dan.j.williams at intel.c
  S:	Odd fixes
  F:	drivers/dma/iop-adma.c
  
 +INTEL IPU3 CSI-2 CIO2 DRIVER
 +M:	Yong Zhi <yong.zhi at intel.com>
 +M:	Sakari Ailus <sakari.ailus at linux.intel.com>
 +L:	linux-media at vger.kernel.org
 +S:	Maintained
 +F:	drivers/media/pci/intel/ipu3/
 +F:	Documentation/media/uapi/v4l/pixfmt-srggb10-ipu3.rst
 +
  INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
  M:	Krzysztof Halasa <khalasa at piap.pl>
  S:	Maintained
@@@ -8683,15 -8681,6 +8686,15 @@@ T:	git git://linuxtv.org/media_tree.gi
  S:	Maintained
  F:	drivers/media/dvb-frontends/stv6111*
  
 +MEDIA DRIVERS FOR NVIDIA TEGRA - VDE
 +M:	Dmitry Osipenko <digetx at gmail.com>
 +L:	linux-media at vger.kernel.org
 +L:	linux-tegra at vger.kernel.org
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Maintained
 +F:	Documentation/devicetree/bindings/media/nvidia,tegra-vde.txt
 +F:	drivers/staging/media/tegra-vde/
 +
  MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
  M:	Mauro Carvalho Chehab <mchehab at s-opensource.com>
  M:	Mauro Carvalho Chehab <mchehab at kernel.org>
@@@ -8735,6 -8724,13 +8738,13 @@@ L:	netdev at vger.kernel.or
  S:	Maintained
  F:	drivers/net/ethernet/mediatek/
  
+ MEDIATEK SWITCH DRIVER
+ M:	Sean Wang <sean.wang at mediatek.com>
+ L:	netdev at vger.kernel.org
+ S:	Maintained
+ F:	drivers/net/dsa/mt7530.*
+ F:	net/dsa/tag_mtk.c
+ 
  MEDIATEK JPEG DRIVER
  M:	Rick Chang <rick.chang at mediatek.com>
  M:	Bin Liu <bin.liu at mediatek.com>
@@@ -9107,7 -9103,6 +9117,7 @@@ S:	Supporte
  F:	Documentation/devicetree/bindings/mips/
  F:	Documentation/mips/
  F:	arch/mips/
 +F:	drivers/platform/mips/
  
  MIPS BOSTON DEVELOPMENT BOARD
  M:	Paul Burton <paul.burton at mips.com>
@@@ -9135,25 -9130,6 +9145,25 @@@ F:	arch/mips/include/asm/mach-loongson3
  F:	drivers/*/*loongson1*
  F:	drivers/*/*/*loongson1*
  
 +MIPS/LOONGSON2 ARCHITECTURE
 +M:	Jiaxun Yang <jiaxun.yang at flygoat.com>
 +L:	linux-mips at linux-mips.org
 +S:	Maintained
 +F:	arch/mips/loongson64/*{2e/2f}*
 +F:	arch/mips/include/asm/mach-loongson64/
 +F:	drivers/*/*loongson2*
 +F:	drivers/*/*/*loongson2*
 +
 +MIPS/LOONGSON3 ARCHITECTURE
 +M:	Huacai Chen <chenhc at lemote.com>
 +L:	linux-mips at linux-mips.org
 +S:	Maintained
 +F:	arch/mips/loongson64/
 +F:	arch/mips/include/asm/mach-loongson64/
 +F:	drivers/platform/mips/cpu_hwmon.c
 +F:	drivers/*/*loongson3*
 +F:	drivers/*/*/*loongson3*
 +
  MIPS RINT INSTRUCTION EMULATION
  M:	Aleksandar Markovic <aleksandar.markovic at mips.com>
  L:	linux-mips at linux-mips.org
@@@ -9631,6 -9607,11 +9641,11 @@@ NETWORKING [WIRELESS
  L:	linux-wireless at vger.kernel.org
  Q:	http://patchwork.kernel.org/project/linux-wireless/list/
  
+ NETDEVSIM
+ M:	Jakub Kicinski <jakub.kicinski at netronome.com>
+ S:	Maintained
+ F:	drivers/net/netdevsim/*
+ 
  NETXEN (1/10) GbE SUPPORT
  M:	Manish Chopra <manish.chopra at cavium.com>
  M:	Rahul Verma <rahul.verma at cavium.com>
@@@ -10085,14 -10066,6 +10100,14 @@@ S:	Maintaine
  F:	drivers/media/i2c/ov7670.c
  F:	Documentation/devicetree/bindings/media/i2c/ov7670.txt
  
 +OMNIVISION OV7740 SENSOR DRIVER
 +M:	Wenyou Yang <wenyou.yang at microchip.com>
 +L:	linux-media at vger.kernel.org
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Maintained
 +F:	drivers/media/i2c/ov7740.c
 +F:	Documentation/devicetree/bindings/media/i2c/ov7740.txt
 +
  ONENAND FLASH DRIVER
  M:	Kyungmin Park <kyungmin.park at samsung.com>
  L:	linux-mtd at lists.infradead.org
@@@ -10591,12 -10564,8 +10606,12 @@@ T:	git git://git.kernel.org/pub/scm/lin
  S:	Supported
  F:	Documentation/devicetree/bindings/pci/
  F:	Documentation/PCI/
 +F:	drivers/acpi/pci*
  F:	drivers/pci/
 +F:	include/asm-generic/pci*
  F:	include/linux/pci*
 +F:	include/uapi/linux/pci*
 +F:	lib/pci*
  F:	arch/x86/pci/
  F:	arch/x86/kernel/quirks.c
  
@@@ -10935,7 -10904,6 +10950,7 @@@ F:	include/linux/pm.
  F:	include/linux/pm_*
  F:	include/linux/powercap.h
  F:	drivers/powercap/
 +F:	kernel/configs/nopm.config
  
  POWER STATE COORDINATION INTERFACE (PSCI)
  M:	Mark Rutland <mark.rutland at arm.com>
@@@ -12383,14 -12351,6 +12398,14 @@@ T:	git git://linuxtv.org/anttip/media_t
  S:	Maintained
  F:	drivers/media/tuners/si2157*
  
 +SI2165 MEDIA DRIVER
 +M:	Matthias Schwarzott <zzam at gentoo.org>
 +L:	linux-media at vger.kernel.org
 +W:	https://linuxtv.org
 +Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 +S:	Maintained
 +F:	drivers/media/dvb-frontends/si2165*
 +
  SI2168 MEDIA DRIVER
  M:	Antti Palosaari <crope at iki.fi>
  L:	linux-media at vger.kernel.org
@@@ -12921,6 -12881,12 +12936,6 @@@ S:	Odd Fixe
  F:	Documentation/devicetree/bindings/staging/iio/
  F:	drivers/staging/iio/
  
 -STAGING - LIRC (LINUX INFRARED REMOTE CONTROL) DRIVERS
 -M:	Jarod Wilson <jarod at wilsonet.com>
 -W:	http://www.lirc.org/
 -S:	Odd Fixes
 -F:	drivers/staging/media/lirc/
 -
  STAGING - LUSTRE PARALLEL FILESYSTEM
  M:	Oleg Drokin <oleg.drokin at intel.com>
  M:	Andreas Dilger <andreas.dilger at intel.com>
@@@ -13302,15 -13268,6 +13317,15 @@@ T:	git git://linuxtv.org/anttip/media_t
  S:	Maintained
  F:	drivers/media/tuners/tda18218*
  
 +TDA18250 MEDIA DRIVER
 +M:	Olli Salonen <olli.salonen at iki.fi>
 +L:	linux-media at vger.kernel.org
 +W:	https://linuxtv.org
 +Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Maintained
 +F:	drivers/media/tuners/tda18250*
 +
  TDA18271 MEDIA DRIVER
  M:	Michael Krufky <mkrufky at linuxtv.org>
  L:	linux-media at vger.kernel.org
diff --combined arch/arm/boot/dts/imx25.dtsi
index c43cf704b768,fcaff1c66bcb..9445f8e1473c
--- a/arch/arm/boot/dts/imx25.dtsi
+++ b/arch/arm/boot/dts/imx25.dtsi
@@@ -122,7 -122,7 +122,7 @@@
  			};
  
  			can1: can at 43f88000 {
- 				compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx25-flexcan";
  				reg = <0x43f88000 0x4000>;
  				interrupts = <43>;
  				clocks = <&clks 75>, <&clks 75>;
@@@ -131,7 -131,7 +131,7 @@@
  			};
  
  			can2: can at 43f8c000 {
- 				compatible = "fsl,imx25-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx25-flexcan";
  				reg = <0x43f8c000 0x4000>;
  				interrupts = <44>;
  				clocks = <&clks 76>, <&clks 76>;
@@@ -628,13 -628,11 +628,13 @@@
  		usbphy0: usb-phy at 0 {
  			reg = <0>;
  			compatible = "usb-nop-xceiv";
 +			#phy-cells = <0>;
  		};
  
  		usbphy1: usb-phy at 1 {
  			reg = <1>;
  			compatible = "usb-nop-xceiv";
 +			#phy-cells = <0>;
  		};
  	};
  };
diff --combined arch/arm/boot/dts/imx35.dtsi
index f049c692c6b0,1f0e2203b576..e08c0c193767
--- a/arch/arm/boot/dts/imx35.dtsi
+++ b/arch/arm/boot/dts/imx35.dtsi
@@@ -303,7 -303,7 +303,7 @@@
  			};
  
  			can1: can at 53fe4000 {
- 				compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx35-flexcan";
  				reg = <0x53fe4000 0x1000>;
  				clocks = <&clks 33>, <&clks 33>;
  				clock-names = "ipg", "per";
@@@ -312,7 -312,7 +312,7 @@@
  			};
  
  			can2: can at 53fe8000 {
- 				compatible = "fsl,imx35-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx35-flexcan";
  				reg = <0x53fe8000 0x1000>;
  				clocks = <&clks 34>, <&clks 34>;
  				clock-names = "ipg", "per";
@@@ -402,13 -402,11 +402,13 @@@
  		usbphy0: usb-phy at 0 {
  			reg = <0>;
  			compatible = "usb-nop-xceiv";
 +			#phy-cells = <0>;
  		};
  
  		usbphy1: usb-phy at 1 {
  			reg = <1>;
  			compatible = "usb-nop-xceiv";
 +			#phy-cells = <0>;
  		};
  	};
  };
diff --combined arch/arm/boot/dts/imx53.dtsi
index 38b31a37339b,85071ff8c639..1040251f2951
--- a/arch/arm/boot/dts/imx53.dtsi
+++ b/arch/arm/boot/dts/imx53.dtsi
@@@ -116,28 -116,6 +116,28 @@@
  		};
  	};
  
 +	pmu {
 +		compatible = "arm,cortex-a8-pmu";
 +		interrupt-parent = <&tzic>;
 +		interrupts = <77>;
 +	};
 +
 +	usbphy0: usbphy-0 {
 +		compatible = "usb-nop-xceiv";
 +		clocks = <&clks IMX5_CLK_USB_PHY1_GATE>;
 +		clock-names = "main_clk";
 +		#phy-cells = <0>;
 +		status = "okay";
 +	};
 +
 +	usbphy1: usbphy-1 {
 +		compatible = "usb-nop-xceiv";
 +		clocks = <&clks IMX5_CLK_USB_PHY2_GATE>;
 +		clock-names = "main_clk";
 +		#phy-cells = <0>;
 +		status = "okay";
 +	};
 +
  	soc {
  		#address-cells = <1>;
  		#size-cells = <1>;
@@@ -321,6 -299,20 +321,6 @@@
  				reg = <0x53f00000 0x60>;
  			};
  
 -			usbphy0: usbphy-0 {
 -				compatible = "usb-nop-xceiv";
 -				clocks = <&clks IMX5_CLK_USB_PHY1_GATE>;
 -				clock-names = "main_clk";
 -				status = "okay";
 -			};
 -
 -			usbphy1: usbphy-1 {
 -				compatible = "usb-nop-xceiv";
 -				clocks = <&clks IMX5_CLK_USB_PHY2_GATE>;
 -				clock-names = "main_clk";
 -				status = "okay";
 -			};
 -
  			usbotg: usb at 53f80000 {
  				compatible = "fsl,imx53-usb", "fsl,imx27-usb";
  				reg = <0x53f80000 0x0200>;
@@@ -441,13 -433,6 +441,13 @@@
  				clock-names = "ipg", "per";
  			};
  
 +			srtc: rtc at 53fa4000 {
 +				compatible = "fsl,imx53-rtc";
 +				reg = <0x53fa4000 0x4000>;
 +				interrupts = <24>;
 +				clocks = <&clks IMX5_CLK_SRTC_GATE>;
 +			};
 +
  			iomuxc: iomuxc at 53fa8000 {
  				compatible = "fsl,imx53-iomuxc";
  				reg = <0x53fa8000 0x4000>;
@@@ -551,7 -536,7 +551,7 @@@
  			};
  
  			can1: can at 53fc8000 {
- 				compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx53-flexcan";
  				reg = <0x53fc8000 0x4000>;
  				interrupts = <82>;
  				clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>,
@@@ -561,7 -546,7 +561,7 @@@
  			};
  
  			can2: can at 53fcc000 {
- 				compatible = "fsl,imx53-flexcan", "fsl,p1010-flexcan";
+ 				compatible = "fsl,imx53-flexcan";
  				reg = <0x53fcc000 0x4000>;
  				interrupts = <83>;
  				clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>,
@@@ -828,5 -813,10 +828,5 @@@
  			reg = <0xf8000000 0x20000>;
  			clocks = <&clks IMX5_CLK_OCRAM>;
  		};
 -
 -		pmu {
 -			compatible = "arm,cortex-a8-pmu";
 -			interrupts = <77>;
 -		};
  	};
  };
diff --combined arch/arm/boot/dts/ls1021a-qds.dts
index bf15dc27ca53,7bb402d3e9d0..499f41a2c6f0
--- a/arch/arm/boot/dts/ls1021a-qds.dts
+++ b/arch/arm/boot/dts/ls1021a-qds.dts
@@@ -239,11 -239,6 +239,11 @@@
  		device-width = <1>;
  	};
  
 +	nand at 2,0 {
 +		compatible = "fsl,ifc-nand";
 +		reg = <0x2 0x0 0x10000>;
 +	};
 +
  	fpga: board-control at 3,0 {
  		#address-cells = <1>;
  		#size-cells = <1>;
@@@ -336,3 -331,19 +336,19 @@@
  &uart1 {
  	status = "okay";
  };
+ 
+ &can0 {
+ 	status = "okay";
+ };
+ 
+ &can1 {
+ 	status = "okay";
+ };
+ 
+ &can2 {
+ 	status = "disabled";
+ };
+ 
+ &can3 {
+ 	status = "disabled";
+ };
diff --combined arch/arm/boot/dts/ls1021a-twr.dts
index b186c370ad54,860b898141f0..f0c949d74833
--- a/arch/arm/boot/dts/ls1021a-twr.dts
+++ b/arch/arm/boot/dts/ls1021a-twr.dts
@@@ -228,10 -228,6 +228,10 @@@
  	};
  };
  
 +&esdhc {
 +        status = "okay";
 +};
 +
  &sai1 {
  	status = "okay";
  };
@@@ -247,3 -243,19 +247,19 @@@
  &uart1 {
  	status = "okay";
  };
+ 
+ &can0 {
+ 	status = "okay";
+ };
+ 
+ &can1 {
+ 	status = "okay";
+ };
+ 
+ &can2 {
+ 	status = "disabled";
+ };
+ 
+ &can3 {
+ 	status = "disabled";
+ };
diff --combined arch/arm/boot/dts/ls1021a.dtsi
index c5edfa9a68a6,7789031898b0..c31dad98f989
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@@ -106,14 -106,6 +106,14 @@@
  		compatible = "arm,cortex-a7-pmu";
  		interrupts = <GIC_SPI 138 IRQ_TYPE_LEVEL_HIGH>,
  			     <GIC_SPI 139 IRQ_TYPE_LEVEL_HIGH>;
 +		interrupt-affinity = <&cpu0>, <&cpu1>;
 +	};
 +
 +	reboot {
 +		compatible = "syscon-reboot";
 +		regmap = <&dcfg>;
 +		offset = <0xb0>;
 +		mask = <0x02>;
  	};
  
  	soc {
@@@ -162,22 -154,8 +162,22 @@@
  			big-endian;
  		};
  
 +		qspi: quadspi at 1550000 {
 +			compatible = "fsl,ls1021a-qspi";
 +			#address-cells = <1>;
 +			#size-cells = <0>;
 +			reg = <0x0 0x1550000 0x0 0x10000>,
 +			      <0x0 0x40000000 0x0 0x40000000>;
 +			reg-names = "QuadSPI", "QuadSPI-memory";
 +			interrupts = <GIC_SPI 131 IRQ_TYPE_LEVEL_HIGH>;
 +			clock-names = "qspi_en", "qspi";
 +			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
 +			big-endian;
 +			status = "disabled";
 +		};
 +
  		esdhc: esdhc at 1560000 {
 -			compatible = "fsl,esdhc";
 +			compatible = "fsl,ls1021a-esdhc", "fsl,esdhc";
  			reg = <0x0 0x1560000 0x0 0x10000>;
  			interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
  			clock-frequency = <0>;
@@@ -597,7 -575,7 +597,7 @@@
  			fsl,tclk-period = <5>;
  			fsl,tmr-prsc    = <2>;
  			fsl,tmr-add     = <0xaaaaaaab>;
 -			fsl,tmr-fiper1  = <999999990>;
 +			fsl,tmr-fiper1  = <999999995>;
  			fsl,tmr-fiper2  = <99990>;
  			fsl,max-adj     = <499999999>;
  		};
@@@ -690,7 -668,7 +690,7 @@@
  			};
  		};
  
 -		usb at 8600000 {
 +		usb2: usb at 8600000 {
  			compatible = "fsl-usb2-dr-v2.5", "fsl-usb2-dr";
  			reg = <0x0 0x8600000 0x0 0x1000>;
  			interrupts = <GIC_SPI 171 IRQ_TYPE_LEVEL_HIGH>;
@@@ -698,7 -676,7 +698,7 @@@
  			phy_type = "ulpi";
  		};
  
 -		usb3 at 3100000 {
 +		usb3: usb3 at 3100000 {
  			compatible = "snps,dwc3";
  			reg = <0x0 0x3100000 0x0 0x10000>;
  			interrupts = <GIC_SPI 93 IRQ_TYPE_LEVEL_HIGH>;
@@@ -752,5 -730,41 +752,41 @@@
  					<0000 0 0 3 &gic GIC_SPI 191 IRQ_TYPE_LEVEL_HIGH>,
  					<0000 0 0 4 &gic GIC_SPI 193 IRQ_TYPE_LEVEL_HIGH>;
  		};
+ 
+ 		can0: can at 2a70000 {
+ 			compatible = "fsl,ls1021ar2-flexcan";
+ 			reg = <0x0 0x2a70000 0x0 0x1000>;
+ 			interrupts = <GIC_SPI 126 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ 			clock-names = "ipg", "per";
+ 			big-endian;
+ 		};
+ 
+ 		can1: can at 2a80000 {
+ 			compatible = "fsl,ls1021ar2-flexcan";
+ 			reg = <0x0 0x2a80000 0x0 0x1000>;
+ 			interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ 			clock-names = "ipg", "per";
+ 			big-endian;
+ 		};
+ 
+ 		can2: can at 2a90000 {
+ 			compatible = "fsl,ls1021ar2-flexcan";
+ 			reg = <0x0 0x2a90000 0x0 0x1000>;
+ 			interrupts = <GIC_SPI 128 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ 			clock-names = "ipg", "per";
+ 			big-endian;
+ 		};
+ 
+ 		can3: can at 2aa0000 {
+ 			compatible = "fsl,ls1021ar2-flexcan";
+ 			reg = <0x0 0x2aa0000 0x0 0x1000>;
+ 			interrupts = <GIC_SPI 129 IRQ_TYPE_LEVEL_HIGH>;
+ 			clocks = <&clockgen 4 1>, <&clockgen 4 1>;
+ 			clock-names = "ipg", "per";
+ 			big-endian;
+ 		};
  	};
  };
diff --combined drivers/net/ethernet/broadcom/bcmsysport.c
index 9d7a834c5f62,f15a8fc6dfc9..c2969b260aed
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@@ -1156,7 -1156,7 +1156,7 @@@ static struct sk_buff *bcm_sysport_inse
  	memset(tsb, 0, sizeof(*tsb));
  
  	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 -		ip_ver = htons(skb->protocol);
 +		ip_ver = ntohs(skb->protocol);
  		switch (ip_ver) {
  		case ETH_P_IP:
  			ip_proto = ip_hdr(skb)->protocol;
@@@ -1216,18 -1216,6 +1216,6 @@@ static netdev_tx_t bcm_sysport_xmit(str
  		goto out;
  	}
  
- 	/* The Ethernet switch we are interfaced with needs packets to be at
- 	 * least 64 bytes (including FCS) otherwise they will be discarded when
- 	 * they enter the switch port logic. When Broadcom tags are enabled, we
- 	 * need to make sure that packets are at least 68 bytes
- 	 * (including FCS and tag) because the length verification is done after
- 	 * the Broadcom tag is stripped off the ingress packet.
- 	 */
- 	if (skb_put_padto(skb, ETH_ZLEN + ENET_BRCM_TAG_LEN)) {
- 		ret = NETDEV_TX_OK;
- 		goto out;
- 	}
- 
  	/* Insert TSB and checksum infos */
  	if (priv->tsb_en) {
  		skb = bcm_sysport_insert_tsb(skb, dev);
diff --combined drivers/net/ethernet/broadcom/tg3.c
index 86ff8b49ee57,a77ee2f8fb8d..2bd77d9990f2
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@@ -3227,7 -3227,7 +3227,7 @@@ static int tg3_nvram_read_using_eeprom(
  	return 0;
  }
  
- #define NVRAM_CMD_TIMEOUT 5000
+ #define NVRAM_CMD_TIMEOUT 10000
  
  static int tg3_nvram_exec_cmd(struct tg3 *tp, u32 nvram_cmd)
  {
@@@ -3744,7 -3744,7 +3744,7 @@@ static int tg3_load_firmware_cpu(struc
  	}
  
  	do {
 -		u32 *fw_data = (u32 *)(fw_hdr + 1);
 +		__be32 *fw_data = (__be32 *)(fw_hdr + 1);
  		for (i = 0; i < tg3_fw_data_len(tp, fw_hdr); i++)
  			write_op(tp, cpu_scratch_base +
  				     (be32_to_cpu(fw_hdr->base_addr) & 0xffff) +
@@@ -14789,7 -14789,7 +14789,7 @@@ static void tg3_get_5717_nvram_info(str
  
  static void tg3_get_5720_nvram_info(struct tg3 *tp)
  {
- 	u32 nvcfg1, nvmpinstrp;
+ 	u32 nvcfg1, nvmpinstrp, nv_status;
  
  	nvcfg1 = tr32(NVRAM_CFG1);
  	nvmpinstrp = nvcfg1 & NVRAM_CFG1_5752VENDOR_MASK;
@@@ -14801,6 -14801,23 +14801,23 @@@
  		}
  
  		switch (nvmpinstrp) {
+ 		case FLASH_5762_MX25L_100:
+ 		case FLASH_5762_MX25L_200:
+ 		case FLASH_5762_MX25L_400:
+ 		case FLASH_5762_MX25L_800:
+ 		case FLASH_5762_MX25L_160_320:
+ 			tp->nvram_pagesize = 4096;
+ 			tp->nvram_jedecnum = JEDEC_MACRONIX;
+ 			tg3_flag_set(tp, NVRAM_BUFFERED);
+ 			tg3_flag_set(tp, NO_NVRAM_ADDR_TRANS);
+ 			tg3_flag_set(tp, FLASH);
+ 			nv_status = tr32(NVRAM_AUTOSENSE_STATUS);
+ 			tp->nvram_size =
+ 				(1 << (nv_status >> AUTOSENSE_DEVID &
+ 						AUTOSENSE_DEVID_MASK)
+ 					<< AUTOSENSE_SIZE_IN_MB);
+ 			return;
+ 
  		case FLASH_5762_EEPROM_HD:
  			nvmpinstrp = FLASH_5720_EEPROM_HD;
  			break;
diff --combined drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index 5bbcaf8298f6,2fd8456999f6..b419229d7457
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@@ -241,13 -241,12 +241,13 @@@ static int dwmac4_rx_check_timestamp(vo
  	u32 own, ctxt;
  	int ret = 1;
  
 -	own = p->des3 & RDES3_OWN;
 -	ctxt = ((p->des3 & RDES3_CONTEXT_DESCRIPTOR)
 +	own = le32_to_cpu(p->des3) & RDES3_OWN;
 +	ctxt = ((le32_to_cpu(p->des3) & RDES3_CONTEXT_DESCRIPTOR)
  		>> RDES3_CONTEXT_DESCRIPTOR_SHIFT);
  
  	if (likely(!own && ctxt)) {
 -		if ((p->des0 == 0xffffffff) && (p->des1 == 0xffffffff))
 +		if ((p->des0 == cpu_to_le32(0xffffffff)) &&
 +		    (p->des1 == cpu_to_le32(0xffffffff)))
  			/* Corrupted value */
  			ret = -EINVAL;
  		else
@@@ -266,7 -265,7 +266,7 @@@ static int dwmac4_wrback_get_rx_timesta
  	int ret = -EINVAL;
  
  	/* Get the status from normal w/b descriptor */
 -	if (likely(p->des3 & TDES3_RS1V)) {
 +	if (likely(p->des3 & cpu_to_le32(TDES3_RS1V))) {
  		if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) {
  			int i = 0;
  
@@@ -407,7 -406,7 +407,7 @@@ static void dwmac4_display_ring(void *h
  	pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
  
  	for (i = 0; i < size; i++) {
- 		pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
+ 		pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
  			i, (unsigned int)virt_to_phys(p),
  			le32_to_cpu(p->des0), le32_to_cpu(p->des1),
  			le32_to_cpu(p->des2), le32_to_cpu(p->des3));
diff --combined drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 323464576fc0,f99f14c35063..cf0e16d1a068
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@@ -2003,22 -2003,60 +2003,60 @@@ static void stmmac_set_dma_operation_mo
  static void stmmac_dma_interrupt(struct stmmac_priv *priv)
  {
  	u32 tx_channel_count = priv->plat->tx_queues_to_use;
- 	int status;
+ 	u32 rx_channel_count = priv->plat->rx_queues_to_use;
+ 	u32 channels_to_check = tx_channel_count > rx_channel_count ?
+ 				tx_channel_count : rx_channel_count;
  	u32 chan;
+ 	bool poll_scheduled = false;
+ 	int status[channels_to_check];
+ 
+ 	/* Each DMA channel can be used for rx and tx simultaneously, yet
+ 	 * napi_struct is embedded in struct stmmac_rx_queue rather than in a
+ 	 * stmmac_channel struct.
+ 	 * Because of this, stmmac_poll currently checks (and possibly wakes)
+ 	 * all tx queues rather than just a single tx queue.
+ 	 */
+ 	for (chan = 0; chan < channels_to_check; chan++)
+ 		status[chan] = priv->hw->dma->dma_interrupt(priv->ioaddr,
+ 							    &priv->xstats,
+ 							    chan);
  
- 	for (chan = 0; chan < tx_channel_count; chan++) {
- 		struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
+ 	for (chan = 0; chan < rx_channel_count; chan++) {
+ 		if (likely(status[chan] & handle_rx)) {
+ 			struct stmmac_rx_queue *rx_q = &priv->rx_queue[chan];
  
- 		status = priv->hw->dma->dma_interrupt(priv->ioaddr,
- 						      &priv->xstats, chan);
- 		if (likely((status & handle_rx)) || (status & handle_tx)) {
  			if (likely(napi_schedule_prep(&rx_q->napi))) {
  				stmmac_disable_dma_irq(priv, chan);
  				__napi_schedule(&rx_q->napi);
+ 				poll_scheduled = true;
  			}
  		}
+ 	}
  
- 		if (unlikely(status & tx_hard_error_bump_tc)) {
+ 	/* If we scheduled poll, we already know that tx queues will be checked.
+ 	 * If we didn't schedule poll, see if any DMA channel (used by tx) has a
+ 	 * completed transmission, if so, call stmmac_poll (once).
+ 	 */
+ 	if (!poll_scheduled) {
+ 		for (chan = 0; chan < tx_channel_count; chan++) {
+ 			if (status[chan] & handle_tx) {
+ 				/* It doesn't matter what rx queue we choose
+ 				 * here. We use 0 since it always exists.
+ 				 */
+ 				struct stmmac_rx_queue *rx_q =
+ 					&priv->rx_queue[0];
+ 
+ 				if (likely(napi_schedule_prep(&rx_q->napi))) {
+ 					stmmac_disable_dma_irq(priv, chan);
+ 					__napi_schedule(&rx_q->napi);
+ 				}
+ 				break;
+ 			}
+ 		}
+ 	}
+ 
+ 	for (chan = 0; chan < tx_channel_count; chan++) {
+ 		if (unlikely(status[chan] & tx_hard_error_bump_tc)) {
  			/* Try to bump up the dma threshold on this failure */
  			if (unlikely(priv->xstats.threshold != SF_DMA_MODE) &&
  			    (tc <= 256)) {
@@@ -2035,7 -2073,7 +2073,7 @@@
  								    chan);
  				priv->xstats.threshold = tc;
  			}
- 		} else if (unlikely(status == tx_hard_error)) {
+ 		} else if (unlikely(status[chan] == tx_hard_error)) {
  			stmmac_tx_err(priv, chan);
  		}
  	}
@@@ -2539,7 -2577,7 +2577,7 @@@ static int stmmac_hw_setup(struct net_d
  	}
  
  	if (priv->hw->pcs && priv->hw->mac->pcs_ctrl_ane)
 -		priv->hw->mac->pcs_ctrl_ane(priv->hw, 1, priv->hw->ps, 0);
 +		priv->hw->mac->pcs_ctrl_ane(priv->ioaddr, 1, priv->hw->ps, 0);
  
  	/* set TX and RX rings length */
  	stmmac_set_rings_length(priv);
@@@ -3404,9 -3442,8 +3442,8 @@@ static int stmmac_rx(struct stmmac_pri
  			if (netif_msg_rx_status(priv)) {
  				netdev_dbg(priv->dev, "\tdesc: %p [entry %d] buff=0x%x\n",
  					   p, entry, des);
- 				if (frame_len > ETH_FRAME_LEN)
- 					netdev_dbg(priv->dev, "frame size %d, COE: %d\n",
- 						   frame_len, status);
+ 				netdev_dbg(priv->dev, "frame size %d, COE: %d\n",
+ 					   frame_len, status);
  			}
  
  			/* The zero-copy is always used for all the sizes
diff --combined drivers/net/tap.c
index f39c6f876e67,7c38659b2a76..4f745eb878f3
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@@ -330,7 -330,7 +330,7 @@@ rx_handler_result_t tap_handle_frame(st
  	if (!q)
  		return RX_HANDLER_PASS;
  
- 	if (__skb_array_full(&q->skb_array))
+ 	if (__ptr_ring_full(&q->ring))
  		goto drop;
  
  	skb_push(skb, ETH_HLEN);
@@@ -348,7 -348,7 +348,7 @@@
  			goto drop;
  
  		if (!segs) {
- 			if (skb_array_produce(&q->skb_array, skb))
+ 			if (ptr_ring_produce(&q->ring, skb))
  				goto drop;
  			goto wake_up;
  		}
@@@ -358,7 -358,7 +358,7 @@@
  			struct sk_buff *nskb = segs->next;
  
  			segs->next = NULL;
- 			if (skb_array_produce(&q->skb_array, segs)) {
+ 			if (ptr_ring_produce(&q->ring, segs)) {
  				kfree_skb(segs);
  				kfree_skb_list(nskb);
  				break;
@@@ -375,7 -375,7 +375,7 @@@
  		    !(features & NETIF_F_CSUM_MASK) &&
  		    skb_checksum_help(skb))
  			goto drop;
- 		if (skb_array_produce(&q->skb_array, skb))
+ 		if (ptr_ring_produce(&q->ring, skb))
  			goto drop;
  	}
  
@@@ -497,7 -497,7 +497,7 @@@ static void tap_sock_destruct(struct so
  {
  	struct tap_queue *q = container_of(sk, struct tap_queue, sk);
  
- 	skb_array_cleanup(&q->skb_array);
+ 	ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb);
  }
  
  static int tap_open(struct inode *inode, struct file *file)
@@@ -517,7 -517,7 +517,7 @@@
  					     &tap_proto, 0);
  	if (!q)
  		goto err;
- 	if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) {
+ 	if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) {
  		sk_free(&q->sk);
  		goto err;
  	}
@@@ -546,7 -546,7 +546,7 @@@
  
  	err = tap_set_queue(tap, file, q);
  	if (err) {
- 		/* tap_sock_destruct() will take care of freeing skb_array */
+ 		/* tap_sock_destruct() will take care of freeing ptr_ring */
  		goto err_put;
  	}
  
@@@ -572,10 -572,10 +572,10 @@@ static int tap_release(struct inode *in
  	return 0;
  }
  
 -static unsigned int tap_poll(struct file *file, poll_table *wait)
 +static __poll_t tap_poll(struct file *file, poll_table *wait)
  {
  	struct tap_queue *q = file->private_data;
 -	unsigned int mask = POLLERR;
 +	__poll_t mask = POLLERR;
  
  	if (!q)
  		goto out;
@@@ -583,7 -583,7 +583,7 @@@
  	mask = 0;
  	poll_wait(file, &q->wq.wait, wait);
  
- 	if (!skb_array_empty(&q->skb_array))
+ 	if (!ptr_ring_empty(&q->ring))
  		mask |= POLLIN | POLLRDNORM;
  
  	if (sock_writeable(&q->sk) ||
@@@ -844,7 -844,7 +844,7 @@@ static ssize_t tap_do_read(struct tap_q
  					TASK_INTERRUPTIBLE);
  
  		/* Read frames from the queue */
- 		skb = skb_array_consume(&q->skb_array);
+ 		skb = ptr_ring_consume(&q->ring);
  		if (skb)
  			break;
  		if (noblock) {
@@@ -1176,7 -1176,7 +1176,7 @@@ static int tap_peek_len(struct socket *
  {
  	struct tap_queue *q = container_of(sock, struct tap_queue,
  					       sock);
- 	return skb_array_peek_len(&q->skb_array);
+ 	return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag);
  }
  
  /* Ops structure to mimic raw sockets with tun */
@@@ -1202,7 -1202,7 +1202,7 @@@ struct socket *tap_get_socket(struct fi
  }
  EXPORT_SYMBOL_GPL(tap_get_socket);
  
- struct skb_array *tap_get_skb_array(struct file *file)
+ struct ptr_ring *tap_get_ptr_ring(struct file *file)
  {
  	struct tap_queue *q;
  
@@@ -1211,29 -1211,30 +1211,30 @@@
  	q = file->private_data;
  	if (!q)
  		return ERR_PTR(-EBADFD);
- 	return &q->skb_array;
+ 	return &q->ring;
  }
- EXPORT_SYMBOL_GPL(tap_get_skb_array);
+ EXPORT_SYMBOL_GPL(tap_get_ptr_ring);
  
  int tap_queue_resize(struct tap_dev *tap)
  {
  	struct net_device *dev = tap->dev;
  	struct tap_queue *q;
- 	struct skb_array **arrays;
+ 	struct ptr_ring **rings;
  	int n = tap->numqueues;
  	int ret, i = 0;
  
- 	arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
- 	if (!arrays)
+ 	rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
+ 	if (!rings)
  		return -ENOMEM;
  
  	list_for_each_entry(q, &tap->queue_list, next)
- 		arrays[i++] = &q->skb_array;
+ 		rings[i++] = &q->ring;
  
- 	ret = skb_array_resize_multiple(arrays, n,
- 					dev->tx_queue_len, GFP_KERNEL);
+ 	ret = ptr_ring_resize_multiple(rings, n,
+ 				       dev->tx_queue_len, GFP_KERNEL,
+ 				       __skb_array_destroy_skb);
  
- 	kfree(arrays);
+ 	kfree(rings);
  	return ret;
  }
  EXPORT_SYMBOL_GPL(tap_queue_resize);
diff --combined drivers/net/tun.c
index 2ffe5dba7e09,2fba3be5719e..55e6fee87375
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@@ -179,7 -179,8 +179,8 @@@ struct tun_file 
  	struct mutex napi_mutex;	/* Protects access to the above napi */
  	struct list_head next;
  	struct tun_struct *detached;
- 	struct skb_array tx_array;
+ 	struct ptr_ring tx_ring;
+ 	struct xdp_rxq_info xdp_rxq;
  };
  
  struct tun_flow_entry {
@@@ -195,6 -196,11 +196,11 @@@
  
  #define TUN_NUM_FLOW_ENTRIES 1024
  
+ struct tun_steering_prog {
+ 	struct rcu_head rcu;
+ 	struct bpf_prog *prog;
+ };
+ 
  /* Since the socket were moved to tun_file, to preserve the behavior of persist
   * device, socket filter, sndbuf and vnet header size were restore when the
   * file were attached to a persist device.
@@@ -232,8 -238,27 +238,27 @@@ struct tun_struct 
  	u32 rx_batched;
  	struct tun_pcpu_stats __percpu *pcpu_stats;
  	struct bpf_prog __rcu *xdp_prog;
+ 	struct tun_steering_prog __rcu *steering_prog;
  };
  
+ bool tun_is_xdp_buff(void *ptr)
+ {
+ 	return (unsigned long)ptr & TUN_XDP_FLAG;
+ }
+ EXPORT_SYMBOL(tun_is_xdp_buff);
+ 
+ void *tun_xdp_to_ptr(void *ptr)
+ {
+ 	return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
+ }
+ EXPORT_SYMBOL(tun_xdp_to_ptr);
+ 
+ void *tun_ptr_to_xdp(void *ptr)
+ {
+ 	return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
+ }
+ EXPORT_SYMBOL(tun_ptr_to_xdp);
+ 
  static int tun_napi_receive(struct napi_struct *napi, int budget)
  {
  	struct tun_file *tfile = container_of(napi, struct tun_file, napi);
@@@ -537,15 -562,12 +562,12 @@@ static inline void tun_flow_save_rps_rx
   * different rxq no. here. If we could not get rxhash, then we would
   * hope the rxq no. may help here.
   */
- static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
- 			    void *accel_priv, select_queue_fallback_t fallback)
+ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
  {
- 	struct tun_struct *tun = netdev_priv(dev);
  	struct tun_flow_entry *e;
  	u32 txq = 0;
  	u32 numqueues = 0;
  
- 	rcu_read_lock();
  	numqueues = READ_ONCE(tun->numqueues);
  
  	txq = __skb_get_hash_symmetric(skb);
@@@ -563,10 -585,37 +585,37 @@@
  			txq -= numqueues;
  	}
  
- 	rcu_read_unlock();
  	return txq;
  }
  
+ static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+ {
+ 	struct tun_steering_prog *prog;
+ 	u16 ret = 0;
+ 
+ 	prog = rcu_dereference(tun->steering_prog);
+ 	if (prog)
+ 		ret = bpf_prog_run_clear_cb(prog->prog, skb);
+ 
+ 	return ret % tun->numqueues;
+ }
+ 
+ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
+ 			    void *accel_priv, select_queue_fallback_t fallback)
+ {
+ 	struct tun_struct *tun = netdev_priv(dev);
+ 	u16 ret;
+ 
+ 	rcu_read_lock();
+ 	if (rcu_dereference(tun->steering_prog))
+ 		ret = tun_ebpf_select_queue(tun, skb);
+ 	else
+ 		ret = tun_automq_select_queue(tun, skb);
+ 	rcu_read_unlock();
+ 
+ 	return ret;
+ }
+ 
  static inline bool tun_not_capable(struct tun_struct *tun)
  {
  	const struct cred *cred = current_cred();
@@@ -600,12 -649,25 +649,25 @@@ static struct tun_struct *tun_enable_qu
  	return tun;
  }
  
+ static void tun_ptr_free(void *ptr)
+ {
+ 	if (!ptr)
+ 		return;
+ 	if (tun_is_xdp_buff(ptr)) {
+ 		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ 
+ 		put_page(virt_to_head_page(xdp->data));
+ 	} else {
+ 		__skb_array_destroy_skb(ptr);
+ 	}
+ }
+ 
  static void tun_queue_purge(struct tun_file *tfile)
  {
- 	struct sk_buff *skb;
+ 	void *ptr;
  
- 	while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
- 		kfree_skb(skb);
+ 	while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
+ 		tun_ptr_free(ptr);
  
  	skb_queue_purge(&tfile->sk.sk_write_queue);
  	skb_queue_purge(&tfile->sk.sk_error_queue);
@@@ -657,8 -719,10 +719,10 @@@ static void __tun_detach(struct tun_fil
  			    tun->dev->reg_state == NETREG_REGISTERED)
  				unregister_netdevice(tun->dev);
  		}
- 		if (tun)
- 			skb_array_cleanup(&tfile->tx_array);
+ 		if (tun) {
+ 			ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
+ 			xdp_rxq_info_unreg(&tfile->xdp_rxq);
+ 		}
  		sock_put(&tfile->sk);
  	}
  }
@@@ -673,7 -737,6 +737,6 @@@ static void tun_detach(struct tun_file 
  static void tun_detach_all(struct net_device *dev)
  {
  	struct tun_struct *tun = netdev_priv(dev);
- 	struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog);
  	struct tun_file *tfile, *tmp;
  	int i, n = tun->numqueues;
  
@@@ -699,18 -762,17 +762,17 @@@
  		tun_napi_del(tun, tfile);
  		/* Drop read queue */
  		tun_queue_purge(tfile);
+ 		xdp_rxq_info_unreg(&tfile->xdp_rxq);
  		sock_put(&tfile->sk);
  	}
  	list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
  		tun_enable_queue(tfile);
  		tun_queue_purge(tfile);
+ 		xdp_rxq_info_unreg(&tfile->xdp_rxq);
  		sock_put(&tfile->sk);
  	}
  	BUG_ON(tun->numdisabled != 0);
  
- 	if (xdp_prog)
- 		bpf_prog_put(xdp_prog);
- 
  	if (tun->flags & IFF_PERSIST)
  		module_put(THIS_MODULE);
  }
@@@ -751,13 -813,29 +813,29 @@@ static int tun_attach(struct tun_struc
  	}
  
  	if (!tfile->detached &&
- 	    skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
+ 	    ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) {
  		err = -ENOMEM;
  		goto out;
  	}
  
  	tfile->queue_index = tun->numqueues;
  	tfile->socket.sk->sk_shutdown &= ~RCV_SHUTDOWN;
+ 
+ 	if (tfile->detached) {
+ 		/* Re-attach detached tfile, updating XDP queue_index */
+ 		WARN_ON(!xdp_rxq_info_is_reg(&tfile->xdp_rxq));
+ 
+ 		if (tfile->xdp_rxq.queue_index    != tfile->queue_index)
+ 			tfile->xdp_rxq.queue_index = tfile->queue_index;
+ 	} else {
+ 		/* Setup XDP RX-queue info, for new tfile getting attached */
+ 		err = xdp_rxq_info_reg(&tfile->xdp_rxq,
+ 				       tun->dev, tfile->queue_index);
+ 		if (err < 0)
+ 			goto out;
+ 		err = 0;
+ 	}
+ 
  	rcu_assign_pointer(tfile->tun, tun);
  	rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
  	tun->numqueues++;
@@@ -937,23 -1015,10 +1015,10 @@@ static int tun_net_close(struct net_dev
  }
  
  /* Net device start xmit */
- static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ static void tun_automq_xmit(struct tun_struct *tun, struct sk_buff *skb)
  {
- 	struct tun_struct *tun = netdev_priv(dev);
- 	int txq = skb->queue_mapping;
- 	struct tun_file *tfile;
- 	u32 numqueues = 0;
- 
- 	rcu_read_lock();
- 	tfile = rcu_dereference(tun->tfiles[txq]);
- 	numqueues = READ_ONCE(tun->numqueues);
- 
- 	/* Drop packet if interface is not attached */
- 	if (txq >= numqueues)
- 		goto drop;
- 
  #ifdef CONFIG_RPS
- 	if (numqueues == 1 && static_key_false(&rps_needed)) {
+ 	if (tun->numqueues == 1 && static_key_false(&rps_needed)) {
  		/* Select queue was not called for the skbuff, so we extract the
  		 * RPS hash and save it into the flow_table here.
  		 */
@@@ -969,6 -1034,24 +1034,24 @@@
  		}
  	}
  #endif
+ }
+ 
+ /* Net device start xmit */
+ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ 	struct tun_struct *tun = netdev_priv(dev);
+ 	int txq = skb->queue_mapping;
+ 	struct tun_file *tfile;
+ 
+ 	rcu_read_lock();
+ 	tfile = rcu_dereference(tun->tfiles[txq]);
+ 
+ 	/* Drop packet if interface is not attached */
+ 	if (txq >= tun->numqueues)
+ 		goto drop;
+ 
+ 	if (!rcu_dereference(tun->steering_prog))
+ 		tun_automq_xmit(tun, skb);
  
  	tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
  
@@@ -996,7 -1079,7 +1079,7 @@@
  
  	nf_reset(skb);
  
- 	if (skb_array_produce(&tfile->tx_array, skb))
+ 	if (ptr_ring_produce(&tfile->tx_ring, skb))
  		goto drop;
  
  	/* Notify and wake up reader process */
@@@ -1169,6 -1252,67 +1252,67 @@@ static const struct net_device_ops tun_
  	.ndo_get_stats64	= tun_net_get_stats64,
  };
  
+ static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+ {
+ 	struct tun_struct *tun = netdev_priv(dev);
+ 	struct xdp_buff *buff = xdp->data_hard_start;
+ 	int headroom = xdp->data - xdp->data_hard_start;
+ 	struct tun_file *tfile;
+ 	u32 numqueues;
+ 	int ret = 0;
+ 
+ 	/* Assure headroom is available and buff is properly aligned */
+ 	if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
+ 		return -ENOSPC;
+ 
+ 	*buff = *xdp;
+ 
+ 	rcu_read_lock();
+ 
+ 	numqueues = READ_ONCE(tun->numqueues);
+ 	if (!numqueues) {
+ 		ret = -ENOSPC;
+ 		goto out;
+ 	}
+ 
+ 	tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
+ 					    numqueues]);
+ 	/* Encode the XDP flag into lowest bit for consumer to differ
+ 	 * XDP buffer from sk_buff.
+ 	 */
+ 	if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
+ 		this_cpu_inc(tun->pcpu_stats->tx_dropped);
+ 		ret = -ENOSPC;
+ 	}
+ 
+ out:
+ 	rcu_read_unlock();
+ 	return ret;
+ }
+ 
+ static void tun_xdp_flush(struct net_device *dev)
+ {
+ 	struct tun_struct *tun = netdev_priv(dev);
+ 	struct tun_file *tfile;
+ 	u32 numqueues;
+ 
+ 	rcu_read_lock();
+ 
+ 	numqueues = READ_ONCE(tun->numqueues);
+ 	if (!numqueues)
+ 		goto out;
+ 
+ 	tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
+ 					    numqueues]);
+ 	/* Notify and wake up reader process */
+ 	if (tfile->flags & TUN_FASYNC)
+ 		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
+ 	tfile->socket.sk->sk_data_ready(tfile->socket.sk);
+ 
+ out:
+ 	rcu_read_unlock();
+ }
+ 
  static const struct net_device_ops tap_netdev_ops = {
  	.ndo_uninit		= tun_net_uninit,
  	.ndo_open		= tun_net_open,
@@@ -1186,6 -1330,8 +1330,8 @@@
  	.ndo_set_rx_headroom	= tun_set_headroom,
  	.ndo_get_stats64	= tun_net_get_stats64,
  	.ndo_bpf		= tun_xdp,
+ 	.ndo_xdp_xmit		= tun_xdp_xmit,
+ 	.ndo_xdp_flush		= tun_xdp_flush,
  };
  
  static void tun_flow_init(struct tun_struct *tun)
@@@ -1248,12 -1394,12 +1394,12 @@@ static void tun_net_init(struct net_dev
  /* Character device part */
  
  /* Poll */
 -static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 +static __poll_t tun_chr_poll(struct file *file, poll_table *wait)
  {
  	struct tun_file *tfile = file->private_data;
  	struct tun_struct *tun = tun_get(tfile);
  	struct sock *sk;
 -	unsigned int mask = 0;
 +	__poll_t mask = 0;
  
  	if (!tun)
  		return POLLERR;
@@@ -1264,7 -1410,7 +1410,7 @@@
  
  	poll_wait(file, sk_sleep(sk), wait);
  
- 	if (!skb_array_empty(&tfile->tx_array))
+ 	if (!ptr_ring_empty(&tfile->tx_ring))
  		mask |= POLLIN | POLLRDNORM;
  
  	if (tun->dev->flags & IFF_UP &&
@@@ -1477,6 -1623,7 +1623,7 @@@ static struct sk_buff *tun_build_skb(st
  		xdp.data = buf + pad;
  		xdp_set_data_meta_invalid(&xdp);
  		xdp.data_end = xdp.data + len;
+ 		xdp.rxq = &tfile->xdp_rxq;
  		orig_data = xdp.data;
  		act = bpf_prog_run_xdp(xdp_prog, &xdp);
  
@@@ -1551,7 -1698,7 +1698,7 @@@ static ssize_t tun_get_user(struct tun_
  	int copylen;
  	bool zerocopy = false;
  	int err;
- 	u32 rxhash;
+ 	u32 rxhash = 0;
  	int skb_xdp = 1;
  	bool frags = tun_napi_frags_enabled(tun);
  
@@@ -1739,7 -1886,10 +1886,10 @@@
  		rcu_read_unlock();
  	}
  
- 	rxhash = __skb_get_hash_symmetric(skb);
+ 	rcu_read_lock();
+ 	if (!rcu_dereference(tun->steering_prog))
+ 		rxhash = __skb_get_hash_symmetric(skb);
+ 	rcu_read_unlock();
  
  	if (frags) {
  		/* Exercise flow dissector code path. */
@@@ -1783,7 -1933,9 +1933,9 @@@
  	u64_stats_update_end(&stats->syncp);
  	put_cpu_ptr(stats);
  
- 	tun_flow_update(tun, rxhash, tfile);
+ 	if (rxhash)
+ 		tun_flow_update(tun, rxhash, tfile);
+ 
  	return total_len;
  }
  
@@@ -1804,6 -1956,40 +1956,40 @@@ static ssize_t tun_chr_write_iter(struc
  	return result;
  }
  
+ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
+ 				struct tun_file *tfile,
+ 				struct xdp_buff *xdp,
+ 				struct iov_iter *iter)
+ {
+ 	int vnet_hdr_sz = 0;
+ 	size_t size = xdp->data_end - xdp->data;
+ 	struct tun_pcpu_stats *stats;
+ 	size_t ret;
+ 
+ 	if (tun->flags & IFF_VNET_HDR) {
+ 		struct virtio_net_hdr gso = { 0 };
+ 
+ 		vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
+ 		if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
+ 			return -EINVAL;
+ 		if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
+ 			     sizeof(gso)))
+ 			return -EFAULT;
+ 		iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+ 	}
+ 
+ 	ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
+ 
+ 	stats = get_cpu_ptr(tun->pcpu_stats);
+ 	u64_stats_update_begin(&stats->syncp);
+ 	stats->tx_packets++;
+ 	stats->tx_bytes += ret;
+ 	u64_stats_update_end(&stats->syncp);
+ 	put_cpu_ptr(tun->pcpu_stats);
+ 
+ 	return ret;
+ }
+ 
  /* Put packet to the user space buffer */
  static ssize_t tun_put_user(struct tun_struct *tun,
  			    struct tun_file *tfile,
@@@ -1901,15 -2087,14 +2087,14 @@@ done
  	return total;
  }
  
- static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
- 				     int *err)
+ static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
  {
  	DECLARE_WAITQUEUE(wait, current);
- 	struct sk_buff *skb = NULL;
+ 	void *ptr = NULL;
  	int error = 0;
  
- 	skb = skb_array_consume(&tfile->tx_array);
- 	if (skb)
+ 	ptr = ptr_ring_consume(&tfile->tx_ring);
+ 	if (ptr)
  		goto out;
  	if (noblock) {
  		error = -EAGAIN;
@@@ -1920,8 -2105,8 +2105,8 @@@
  	current->state = TASK_INTERRUPTIBLE;
  
  	while (1) {
- 		skb = skb_array_consume(&tfile->tx_array);
- 		if (skb)
+ 		ptr = ptr_ring_consume(&tfile->tx_ring);
+ 		if (ptr)
  			break;
  		if (signal_pending(current)) {
  			error = -ERESTARTSYS;
@@@ -1940,12 -2125,12 +2125,12 @@@
  
  out:
  	*err = error;
- 	return skb;
+ 	return ptr;
  }
  
  static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
  			   struct iov_iter *to,
- 			   int noblock, struct sk_buff *skb)
+ 			   int noblock, void *ptr)
  {
  	ssize_t ret;
  	int err;
@@@ -1953,23 -2138,31 +2138,31 @@@
  	tun_debug(KERN_INFO, tun, "tun_do_read\n");
  
  	if (!iov_iter_count(to)) {
- 		if (skb)
- 			kfree_skb(skb);
+ 		tun_ptr_free(ptr);
  		return 0;
  	}
  
- 	if (!skb) {
+ 	if (!ptr) {
  		/* Read frames from ring */
- 		skb = tun_ring_recv(tfile, noblock, &err);
- 		if (!skb)
+ 		ptr = tun_ring_recv(tfile, noblock, &err);
+ 		if (!ptr)
  			return err;
  	}
  
- 	ret = tun_put_user(tun, tfile, skb, to);
- 	if (unlikely(ret < 0))
- 		kfree_skb(skb);
- 	else
- 		consume_skb(skb);
+ 	if (tun_is_xdp_buff(ptr)) {
+ 		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ 
+ 		ret = tun_put_user_xdp(tun, tfile, xdp, to);
+ 		put_page(virt_to_head_page(xdp->data));
+ 	} else {
+ 		struct sk_buff *skb = ptr;
+ 
+ 		ret = tun_put_user(tun, tfile, skb, to);
+ 		if (unlikely(ret < 0))
+ 			kfree_skb(skb);
+ 		else
+ 			consume_skb(skb);
+ 	}
  
  	return ret;
  }
@@@ -1991,6 -2184,39 +2184,39 @@@ static ssize_t tun_chr_read_iter(struc
  	return ret;
  }
  
+ static void tun_steering_prog_free(struct rcu_head *rcu)
+ {
+ 	struct tun_steering_prog *prog = container_of(rcu,
+ 					 struct tun_steering_prog, rcu);
+ 
+ 	bpf_prog_destroy(prog->prog);
+ 	kfree(prog);
+ }
+ 
+ static int __tun_set_steering_ebpf(struct tun_struct *tun,
+ 				   struct bpf_prog *prog)
+ {
+ 	struct tun_steering_prog *old, *new = NULL;
+ 
+ 	if (prog) {
+ 		new = kmalloc(sizeof(*new), GFP_KERNEL);
+ 		if (!new)
+ 			return -ENOMEM;
+ 		new->prog = prog;
+ 	}
+ 
+ 	spin_lock_bh(&tun->lock);
+ 	old = rcu_dereference_protected(tun->steering_prog,
+ 					lockdep_is_held(&tun->lock));
+ 	rcu_assign_pointer(tun->steering_prog, new);
+ 	spin_unlock_bh(&tun->lock);
+ 
+ 	if (old)
+ 		call_rcu(&old->rcu, tun_steering_prog_free);
+ 
+ 	return 0;
+ }
+ 
  static void tun_free_netdev(struct net_device *dev)
  {
  	struct tun_struct *tun = netdev_priv(dev);
@@@ -1999,6 -2225,7 +2225,7 @@@
  	free_percpu(tun->pcpu_stats);
  	tun_flow_uninit(tun);
  	security_tun_dev_free_security(tun->security);
+ 	__tun_set_steering_ebpf(tun, NULL);
  }
  
  static void tun_setup(struct net_device *dev)
@@@ -2072,12 -2299,12 +2299,12 @@@ static int tun_recvmsg(struct socket *s
  {
  	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
  	struct tun_struct *tun = tun_get(tfile);
- 	struct sk_buff *skb = m->msg_control;
+ 	void *ptr = m->msg_control;
  	int ret;
  
  	if (!tun) {
  		ret = -EBADFD;
- 		goto out_free_skb;
+ 		goto out_free;
  	}
  
  	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
@@@ -2089,7 -2316,7 +2316,7 @@@
  					 SOL_PACKET, TUN_TX_TIMESTAMP);
  		goto out;
  	}
- 	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
+ 	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
  	if (ret > (ssize_t)total_len) {
  		m->msg_flags |= MSG_TRUNC;
  		ret = flags & MSG_TRUNC ? ret : total_len;
@@@ -2100,12 -2327,25 +2327,25 @@@ out
  
  out_put_tun:
  	tun_put(tun);
- out_free_skb:
- 	if (skb)
- 		kfree_skb(skb);
+ out_free:
+ 	tun_ptr_free(ptr);
  	return ret;
  }
  
+ static int tun_ptr_peek_len(void *ptr)
+ {
+ 	if (likely(ptr)) {
+ 		if (tun_is_xdp_buff(ptr)) {
+ 			struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ 
+ 			return xdp->data_end - xdp->data;
+ 		}
+ 		return __skb_array_len_with_tag(ptr);
+ 	} else {
+ 		return 0;
+ 	}
+ }
+ 
  static int tun_peek_len(struct socket *sock)
  {
  	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
@@@ -2116,7 -2356,7 +2356,7 @@@
  	if (!tun)
  		return 0;
  
- 	ret = skb_array_peek_len(&tfile->tx_array);
+ 	ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
  	tun_put(tun);
  
  	return ret;
@@@ -2287,6 -2527,7 +2527,7 @@@ static int tun_set_iff(struct net *net
  		tun->filter_attached = false;
  		tun->sndbuf = tfile->socket.sk->sk_sndbuf;
  		tun->rx_batched = 0;
+ 		RCU_INIT_POINTER(tun->steering_prog, NULL);
  
  		tun->pcpu_stats = netdev_alloc_pcpu_stats(struct tun_pcpu_stats);
  		if (!tun->pcpu_stats) {
@@@ -2479,6 -2720,25 +2720,25 @@@ unlock
  	return ret;
  }
  
+ static int tun_set_steering_ebpf(struct tun_struct *tun, void __user *data)
+ {
+ 	struct bpf_prog *prog;
+ 	int fd;
+ 
+ 	if (copy_from_user(&fd, data, sizeof(fd)))
+ 		return -EFAULT;
+ 
+ 	if (fd == -1) {
+ 		prog = NULL;
+ 	} else {
+ 		prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
+ 		if (IS_ERR(prog))
+ 			return PTR_ERR(prog);
+ 	}
+ 
+ 	return __tun_set_steering_ebpf(tun, prog);
+ }
+ 
  static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
  			    unsigned long arg, int ifreq_len)
  {
@@@ -2755,6 -3015,10 +3015,10 @@@
  		ret = 0;
  		break;
  
+ 	case TUNSETSTEERINGEBPF:
+ 		ret = tun_set_steering_ebpf(tun, argp);
+ 		break;
+ 
  	default:
  		ret = -EINVAL;
  		break;
@@@ -2998,25 -3262,26 +3262,26 @@@ static int tun_queue_resize(struct tun_
  {
  	struct net_device *dev = tun->dev;
  	struct tun_file *tfile;
- 	struct skb_array **arrays;
+ 	struct ptr_ring **rings;
  	int n = tun->numqueues + tun->numdisabled;
  	int ret, i;
  
- 	arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
- 	if (!arrays)
+ 	rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
+ 	if (!rings)
  		return -ENOMEM;
  
  	for (i = 0; i < tun->numqueues; i++) {
  		tfile = rtnl_dereference(tun->tfiles[i]);
- 		arrays[i] = &tfile->tx_array;
+ 		rings[i] = &tfile->tx_ring;
  	}
  	list_for_each_entry(tfile, &tun->disabled, next)
- 		arrays[i++] = &tfile->tx_array;
+ 		rings[i++] = &tfile->tx_ring;
  
- 	ret = skb_array_resize_multiple(arrays, n,
- 					dev->tx_queue_len, GFP_KERNEL);
+ 	ret = ptr_ring_resize_multiple(rings, n,
+ 				       dev->tx_queue_len, GFP_KERNEL,
+ 				       tun_ptr_free);
  
- 	kfree(arrays);
+ 	kfree(rings);
  	return ret;
  }
  
@@@ -3102,7 -3367,7 +3367,7 @@@ struct socket *tun_get_socket(struct fi
  }
  EXPORT_SYMBOL_GPL(tun_get_socket);
  
- struct skb_array *tun_get_skb_array(struct file *file)
+ struct ptr_ring *tun_get_tx_ring(struct file *file)
  {
  	struct tun_file *tfile;
  
@@@ -3111,9 -3376,9 +3376,9 @@@
  	tfile = file->private_data;
  	if (!tfile)
  		return ERR_PTR(-EBADFD);
- 	return &tfile->tx_array;
+ 	return &tfile->tx_ring;
  }
- EXPORT_SYMBOL_GPL(tun_get_skb_array);
+ EXPORT_SYMBOL_GPL(tun_get_tx_ring);
  
  module_init(tun_init);
  module_exit(tun_cleanup);
diff --combined drivers/net/wireless/ath/wcn36xx/main.c
index 987f1252a3cf,5bed323f1100..ab5be6d2c691
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@@ -384,18 -384,6 +384,18 @@@ static int wcn36xx_config(struct ieee80
  		}
  	}
  
 +	if (changed & IEEE80211_CONF_CHANGE_PS) {
 +		list_for_each_entry(tmp, &wcn->vif_list, list) {
 +			vif = wcn36xx_priv_to_vif(tmp);
 +			if (hw->conf.flags & IEEE80211_CONF_PS) {
 +				if (vif->bss_conf.ps) /* ps allowed ? */
 +					wcn36xx_pmc_enter_bmps_state(wcn, vif);
 +			} else {
 +				wcn36xx_pmc_exit_bmps_state(wcn, vif);
 +			}
 +		}
 +	}
 +
  	mutex_unlock(&wcn->conf_mutex);
  
  	return 0;
@@@ -641,7 -629,6 +641,6 @@@ static int wcn36xx_hw_scan(struct ieee8
  			   struct ieee80211_scan_request *hw_req)
  {
  	struct wcn36xx *wcn = hw->priv;
- 
  	mutex_lock(&wcn->scan_lock);
  	if (wcn->scan_req) {
  		mutex_unlock(&wcn->scan_lock);
@@@ -650,11 -637,16 +649,16 @@@
  
  	wcn->scan_aborted = false;
  	wcn->scan_req = &hw_req->req;
+ 
  	mutex_unlock(&wcn->scan_lock);
  
- 	schedule_work(&wcn->scan_work);
+ 	if (!get_feat_caps(wcn->fw_feat_caps, SCAN_OFFLOAD)) {
+ 		/* legacy manual/sw scan */
+ 		schedule_work(&wcn->scan_work);
+ 		return 0;
+ 	}
  
- 	return 0;
+ 	return wcn36xx_smd_start_hw_scan(wcn, vif, &hw_req->req);
  }
  
  static void wcn36xx_cancel_hw_scan(struct ieee80211_hw *hw,
@@@ -662,6 -654,12 +666,12 @@@
  {
  	struct wcn36xx *wcn = hw->priv;
  
+ 	if (!wcn36xx_smd_stop_hw_scan(wcn)) {
+ 		struct cfg80211_scan_info scan_info = { .aborted = true };
+ 
+ 		ieee80211_scan_completed(wcn->hw, &scan_info);
+ 	}
+ 
  	mutex_lock(&wcn->scan_lock);
  	wcn->scan_aborted = true;
  	mutex_unlock(&wcn->scan_lock);
@@@ -759,6 -757,17 +769,6 @@@ static void wcn36xx_bss_info_changed(st
  		vif_priv->dtim_period = bss_conf->dtim_period;
  	}
  
 -	if (changed & BSS_CHANGED_PS) {
 -		wcn36xx_dbg(WCN36XX_DBG_MAC,
 -			    "mac bss PS set %d\n",
 -			    bss_conf->ps);
 -		if (bss_conf->ps) {
 -			wcn36xx_pmc_enter_bmps_state(wcn, vif);
 -		} else {
 -			wcn36xx_pmc_exit_bmps_state(wcn, vif);
 -		}
 -	}
 -
  	if (changed & BSS_CHANGED_BSSID) {
  		wcn36xx_dbg(WCN36XX_DBG_MAC, "mac bss changed_bssid %pM\n",
  			    bss_conf->bssid);
diff --combined drivers/vhost/net.c
index 9524ee16878a,a5a1db647635..71163b96e9ae
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@@ -89,7 -89,7 +89,7 @@@ struct vhost_net_ubuf_ref 
  
  #define VHOST_RX_BATCH 64
  struct vhost_net_buf {
- 	struct sk_buff **queue;
+ 	void **queue;
  	int tail;
  	int head;
  };
@@@ -108,7 -108,7 +108,7 @@@ struct vhost_net_virtqueue 
  	/* Reference counting for outstanding ubufs.
  	 * Protected by vq mutex. Writers must also take device mutex. */
  	struct vhost_net_ubuf_ref *ubufs;
- 	struct skb_array *rx_array;
+ 	struct ptr_ring *rx_ring;
  	struct vhost_net_buf rxq;
  };
  
@@@ -158,7 -158,7 +158,7 @@@ static int vhost_net_buf_produce(struc
  	struct vhost_net_buf *rxq = &nvq->rxq;
  
  	rxq->head = 0;
- 	rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue,
+ 	rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
  					      VHOST_RX_BATCH);
  	return rxq->tail;
  }
@@@ -167,13 -167,25 +167,25 @@@ static void vhost_net_buf_unproduce(str
  {
  	struct vhost_net_buf *rxq = &nvq->rxq;
  
- 	if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) {
- 		skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head,
- 				    vhost_net_buf_get_size(rxq));
+ 	if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) {
+ 		ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head,
+ 				   vhost_net_buf_get_size(rxq),
+ 				   __skb_array_destroy_skb);
  		rxq->head = rxq->tail = 0;
  	}
  }
  
+ static int vhost_net_buf_peek_len(void *ptr)
+ {
+ 	if (tun_is_xdp_buff(ptr)) {
+ 		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+ 
+ 		return xdp->data_end - xdp->data;
+ 	}
+ 
+ 	return __skb_array_len_with_tag(ptr);
+ }
+ 
  static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
  {
  	struct vhost_net_buf *rxq = &nvq->rxq;
@@@ -185,7 -197,7 +197,7 @@@
  		return 0;
  
  out:
- 	return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq));
+ 	return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq));
  }
  
  static void vhost_net_buf_init(struct vhost_net_buf *rxq)
@@@ -583,7 -595,7 +595,7 @@@ static int peek_head_len(struct vhost_n
  	int len = 0;
  	unsigned long flags;
  
- 	if (rvq->rx_array)
+ 	if (rvq->rx_ring)
  		return vhost_net_buf_peek(rvq);
  
  	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
@@@ -790,7 -802,7 +802,7 @@@ static void handle_rx(struct vhost_net 
  			 * they refilled. */
  			goto out;
  		}
- 		if (nvq->rx_array)
+ 		if (nvq->rx_ring)
  			msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
  		/* On overrun, truncate and discard */
  		if (unlikely(headcount > UIO_MAXIOV)) {
@@@ -896,7 -908,7 +908,7 @@@ static int vhost_net_open(struct inode 
  	struct vhost_net *n;
  	struct vhost_dev *dev;
  	struct vhost_virtqueue **vqs;
- 	struct sk_buff **queue;
+ 	void **queue;
  	int i;
  
  	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
@@@ -908,7 -920,7 +920,7 @@@
  		return -ENOMEM;
  	}
  
- 	queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *),
+ 	queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *),
  			      GFP_KERNEL);
  	if (!queue) {
  		kfree(vqs);
@@@ -1046,23 -1058,23 +1058,23 @@@ err
  	return ERR_PTR(r);
  }
  
- static struct skb_array *get_tap_skb_array(int fd)
+ static struct ptr_ring *get_tap_ptr_ring(int fd)
  {
- 	struct skb_array *array;
+ 	struct ptr_ring *ring;
  	struct file *file = fget(fd);
  
  	if (!file)
  		return NULL;
- 	array = tun_get_skb_array(file);
- 	if (!IS_ERR(array))
+ 	ring = tun_get_tx_ring(file);
+ 	if (!IS_ERR(ring))
  		goto out;
- 	array = tap_get_skb_array(file);
- 	if (!IS_ERR(array))
+ 	ring = tap_get_ptr_ring(file);
+ 	if (!IS_ERR(ring))
  		goto out;
- 	array = NULL;
+ 	ring = NULL;
  out:
  	fput(file);
- 	return array;
+ 	return ring;
  }
  
  static struct socket *get_tap_socket(int fd)
@@@ -1143,7 -1155,7 +1155,7 @@@ static long vhost_net_set_backend(struc
  		vq->private_data = sock;
  		vhost_net_buf_unproduce(nvq);
  		if (index == VHOST_NET_VQ_RX)
- 			nvq->rx_array = get_tap_skb_array(fd);
+ 			nvq->rx_ring = get_tap_ptr_ring(fd);
  		r = vhost_vq_init_access(vq);
  		if (r)
  			goto err_used;
@@@ -1353,7 -1365,7 +1365,7 @@@ static ssize_t vhost_net_chr_write_iter
  	return vhost_chr_write_iter(dev, from);
  }
  
 -static unsigned int vhost_net_chr_poll(struct file *file, poll_table *wait)
 +static __poll_t vhost_net_chr_poll(struct file *file, poll_table *wait)
  {
  	struct vhost_net *n = file->private_data;
  	struct vhost_dev *dev = &n->dev;
diff --combined fs/btrfs/disk-io.c
index e5a4faf9e304,5da18ebc9222..bf31663de6b7
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@@ -30,6 -30,7 +30,7 @@@
  #include <linux/ratelimit.h>
  #include <linux/uuid.h>
  #include <linux/semaphore.h>
+ #include <linux/bpf.h>
  #include <asm/unaligned.h>
  #include "ctree.h"
  #include "disk-io.h"
@@@ -220,7 -221,7 +221,7 @@@ void btrfs_set_buffer_lockdep_class(u6
   * extents on the btree inode are pretty simple, there's one extent
   * that covers the entire device
   */
 -static struct extent_map *btree_get_extent(struct btrfs_inode *inode,
 +struct extent_map *btree_get_extent(struct btrfs_inode *inode,
  		struct page *page, size_t pg_offset, u64 start, u64 len,
  		int create)
  {
@@@ -285,7 -286,7 +286,7 @@@ static int csum_tree_block(struct btrfs
  			   int verify)
  {
  	u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
 -	char *result = NULL;
 +	char result[BTRFS_CSUM_SIZE];
  	unsigned long len;
  	unsigned long cur_len;
  	unsigned long offset = BTRFS_CSUM_SIZE;
@@@ -294,6 -295,7 +295,6 @@@
  	unsigned long map_len;
  	int err;
  	u32 crc = ~(u32)0;
 -	unsigned long inline_result;
  
  	len = buf->len - offset;
  	while (len > 0) {
@@@ -307,7 -309,13 +308,7 @@@
  		len -= cur_len;
  		offset += cur_len;
  	}
 -	if (csum_size > sizeof(inline_result)) {
 -		result = kzalloc(csum_size, GFP_NOFS);
 -		if (!result)
 -			return -ENOMEM;
 -	} else {
 -		result = (char *)&inline_result;
 -	}
 +	memset(result, 0, BTRFS_CSUM_SIZE);
  
  	btrfs_csum_final(crc, result);
  
@@@ -322,12 -330,15 +323,12 @@@
  				"%s checksum verify failed on %llu wanted %X found %X level %d",
  				fs_info->sb->s_id, buf->start,
  				val, found, btrfs_header_level(buf));
 -			if (result != (char *)&inline_result)
 -				kfree(result);
  			return -EUCLEAN;
  		}
  	} else {
  		write_extent_buffer(buf, result, 0, csum_size);
  	}
 -	if (result != (char *)&inline_result)
 -		kfree(result);
 +
  	return 0;
  }
  
@@@ -381,7 -392,7 +382,7 @@@ static int verify_parent_transid(struc
  		clear_extent_buffer_uptodate(eb);
  out:
  	unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
 -			     &cached_state, GFP_NOFS);
 +			     &cached_state);
  	if (need_lock)
  		btrfs_tree_read_unlock_blocking(eb);
  	return ret;
@@@ -445,7 -456,7 +446,7 @@@ static int btree_read_extent_buffer_pag
  	io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
  	while (1) {
  		ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
 -					       btree_get_extent, mirror_num);
 +					       mirror_num);
  		if (!ret) {
  			if (!verify_parent_transid(io_tree, eb,
  						   parent_transid, 0))
@@@ -855,8 -866,6 +856,8 @@@ static blk_status_t btree_submit_bio_ho
  	int async = check_async_write(BTRFS_I(inode));
  	blk_status_t ret;
  
 +	bio_associate_blkcg(bio, blkcg_root_css);
 +
  	if (bio_op(bio) != REQ_OP_WRITE) {
  		/*
  		 * called for a read, do the setup so that checksum validation
@@@ -1004,7 -1013,7 +1005,7 @@@ void readahead_tree_block(struct btrfs_
  	if (IS_ERR(buf))
  		return;
  	read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
 -				 buf, WAIT_NONE, btree_get_extent, 0);
 +				 buf, WAIT_NONE, 0);
  	free_extent_buffer(buf);
  }
  
@@@ -1023,7 -1032,7 +1024,7 @@@ int reada_tree_block_flagged(struct btr
  	set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
  
  	ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
 -				       btree_get_extent, mirror_num);
 +				       mirror_num);
  	if (ret) {
  		free_extent_buffer(buf);
  		return ret;
@@@ -1160,7 -1169,6 +1161,7 @@@ static void __setup_root(struct btrfs_r
  	spin_lock_init(&root->accounting_lock);
  	spin_lock_init(&root->log_extents_lock[0]);
  	spin_lock_init(&root->log_extents_lock[1]);
 +	spin_lock_init(&root->qgroup_meta_rsv_lock);
  	mutex_init(&root->objectid_mutex);
  	mutex_init(&root->log_mutex);
  	mutex_init(&root->ordered_extent_mutex);
@@@ -1177,6 -1185,7 +1178,6 @@@
  	atomic_set(&root->orphan_inodes, 0);
  	refcount_set(&root->refs, 1);
  	atomic_set(&root->will_be_snapshotted, 0);
 -	atomic64_set(&root->qgroup_meta_rsv, 0);
  	root->log_transid = 0;
  	root->log_transid_committed = -1;
  	root->last_log_commit = 0;
@@@ -1235,7 -1244,7 +1236,7 @@@ struct btrfs_root *btrfs_create_tree(st
  	struct btrfs_root *root;
  	struct btrfs_key key;
  	int ret = 0;
 -	uuid_le uuid;
 +	uuid_le uuid = NULL_UUID_LE;
  
  	root = btrfs_alloc_root(fs_info, GFP_KERNEL);
  	if (!root)
@@@ -1276,8 -1285,7 +1277,8 @@@
  	btrfs_set_root_used(&root->root_item, leaf->len);
  	btrfs_set_root_last_snapshot(&root->root_item, 0);
  	btrfs_set_root_dirid(&root->root_item, 0);
 -	uuid_le_gen(&uuid);
 +	if (is_fstree(objectid))
 +		uuid_le_gen(&uuid);
  	memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
  	root->root_item.drop_level = 0;
  
@@@ -2868,7 -2876,7 +2869,7 @@@ retry_root_backup
  		goto fail_sysfs;
  	}
  
 -	if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info)) {
 +	if (!sb_rdonly(sb) && !btrfs_check_rw_degradable(fs_info, NULL)) {
  		btrfs_warn(fs_info,
  		"writeable mount is not allowed due to too many missing devices");
  		goto fail_sysfs;
@@@ -3116,6 -3124,7 +3117,7 @@@ recovery_tree_root
  		goto fail_block_groups;
  	goto retry_root_backup;
  }
+ BPF_ALLOW_ERROR_INJECTION(open_ctree);
  
  static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
  {
@@@ -3343,8 -3352,6 +3345,8 @@@ static void write_dev_flush(struct btrf
  		return;
  
  	bio_reset(bio);
 +	bio_associate_blkcg(bio, blkcg_root_css);
 +
  	bio->bi_end_io = btrfs_end_empty_barrier;
  	bio_set_dev(bio, device->bdev);
  	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
@@@ -3352,7 -3359,7 +3354,7 @@@
  	bio->bi_private = &device->flush_wait;
  
  	btrfsic_submit_bio(bio);
 -	device->flush_bio_sent = 1;
 +	set_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
  }
  
  /*
@@@ -3362,10 -3369,10 +3364,10 @@@ static blk_status_t wait_dev_flush(stru
  {
  	struct bio *bio = device->flush_bio;
  
 -	if (!device->flush_bio_sent)
 +	if (!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state))
  		return BLK_STS_OK;
  
 -	device->flush_bio_sent = 0;
 +	clear_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state);
  	wait_for_completion_io(&device->flush_wait);
  
  	return bio->bi_status;
@@@ -3373,7 -3380,7 +3375,7 @@@
  
  static int check_barrier_error(struct btrfs_fs_info *fs_info)
  {
 -	if (!btrfs_check_rw_degradable(fs_info))
 +	if (!btrfs_check_rw_degradable(fs_info, NULL))
  		return -EIO;
  	return 0;
  }
@@@ -3389,16 -3396,14 +3391,16 @@@ static int barrier_all_devices(struct b
  	int errors_wait = 0;
  	blk_status_t ret;
  
 +	lockdep_assert_held(&info->fs_devices->device_list_mutex);
  	/* send down all the barriers */
  	head = &info->fs_devices->devices;
 -	list_for_each_entry_rcu(dev, head, dev_list) {
 -		if (dev->missing)
 +	list_for_each_entry(dev, head, dev_list) {
 +		if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
  			continue;
  		if (!dev->bdev)
  			continue;
 -		if (!dev->in_fs_metadata || !dev->writeable)
 +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
 +		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
  			continue;
  
  		write_dev_flush(dev);
@@@ -3406,15 -3411,14 +3408,15 @@@
  	}
  
  	/* wait for all the barriers */
 -	list_for_each_entry_rcu(dev, head, dev_list) {
 -		if (dev->missing)
 +	list_for_each_entry(dev, head, dev_list) {
 +		if (test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state))
  			continue;
  		if (!dev->bdev) {
  			errors_wait++;
  			continue;
  		}
 -		if (!dev->in_fs_metadata || !dev->writeable)
 +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
 +		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
  			continue;
  
  		ret = wait_dev_flush(dev);
@@@ -3506,13 -3510,12 +3508,13 @@@ int write_all_supers(struct btrfs_fs_in
  		}
  	}
  
 -	list_for_each_entry_rcu(dev, head, dev_list) {
 +	list_for_each_entry(dev, head, dev_list) {
  		if (!dev->bdev) {
  			total_errors++;
  			continue;
  		}
 -		if (!dev->in_fs_metadata || !dev->writeable)
 +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
 +		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
  			continue;
  
  		btrfs_set_stack_device_generation(dev_item, 0);
@@@ -3548,11 -3551,10 +3550,11 @@@
  	}
  
  	total_errors = 0;
 -	list_for_each_entry_rcu(dev, head, dev_list) {
 +	list_for_each_entry(dev, head, dev_list) {
  		if (!dev->bdev)
  			continue;
 -		if (!dev->in_fs_metadata || !dev->writeable)
 +		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &dev->dev_state) ||
 +		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))
  			continue;
  
  		ret = wait_dev_supers(dev, max_mirrors);
diff --combined fs/btrfs/free-space-cache.c
index 9e8c1f046e02,fb1382893bfc..9088b0b0d10f
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@@ -22,6 -22,7 +22,7 @@@
  #include <linux/slab.h>
  #include <linux/math64.h>
  #include <linux/ratelimit.h>
+ #include <linux/bpf.h>
  #include "ctree.h"
  #include "free-space-cache.h"
  #include "transaction.h"
@@@ -332,6 -333,7 +333,7 @@@ static int io_ctl_init(struct btrfs_io_
  
  	return 0;
  }
+ BPF_ALLOW_ERROR_INJECTION(io_ctl_init);
  
  static void io_ctl_free(struct btrfs_io_ctl *io_ctl)
  {
@@@ -993,7 -995,8 +995,7 @@@ update_cache_item(struct btrfs_trans_ha
  	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
  	if (ret < 0) {
  		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
 -				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
 -				 GFP_NOFS);
 +				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
  		goto fail;
  	}
  	leaf = path->nodes[0];
@@@ -1007,7 -1010,7 +1009,7 @@@
  			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
  					 inode->i_size - 1,
  					 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0,
 -					 NULL, GFP_NOFS);
 +					 NULL);
  			btrfs_release_path(path);
  			goto fail;
  		}
@@@ -1104,7 -1107,8 +1106,7 @@@ static int flush_dirty_cache(struct ino
  	ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
  	if (ret)
  		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
 -				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL,
 -				 GFP_NOFS);
 +				 EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL);
  
  	return ret;
  }
@@@ -1125,7 -1129,8 +1127,7 @@@ cleanup_write_cache_enospc(struct inod
  {
  	io_ctl_drop_pages(io_ctl);
  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
 -			     i_size_read(inode) - 1, cached_state,
 -			     GFP_NOFS);
 +			     i_size_read(inode) - 1, cached_state);
  }
  
  static int __btrfs_wait_cache_io(struct btrfs_root *root,
@@@ -1319,7 -1324,7 +1321,7 @@@ static int __btrfs_write_out_cache(stru
  	io_ctl_drop_pages(io_ctl);
  
  	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
 -			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
 +			     i_size_read(inode) - 1, &cached_state);
  
  	/*
  	 * at this point the pages are under IO and we're happy,
@@@ -3545,7 -3550,7 +3547,7 @@@ int btrfs_write_out_ino_cache(struct bt
  	if (ret) {
  		if (release_metadata)
  			btrfs_delalloc_release_metadata(BTRFS_I(inode),
 -					inode->i_size);
 +					inode->i_size, true);
  #ifdef DEBUG
  		btrfs_err(fs_info,
  			  "failed to write free ino cache for root %llu",
diff --combined include/linux/bpf.h
index 0b25cf87b6d6,6be837c063c3..44f26f6df8fc
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@@ -17,6 -17,7 +17,7 @@@
  #include <linux/numa.h>
  #include <linux/wait.h>
  
+ struct bpf_verifier_env;
  struct perf_event;
  struct bpf_prog;
  struct bpf_map;
@@@ -43,14 -44,7 +44,14 @@@ struct bpf_map_ops 
  };
  
  struct bpf_map {
 -	atomic_t refcnt;
 +	/* 1st cacheline with read-mostly members of which some
 +	 * are also accessed in fast-path (e.g. ops, max_entries).
 +	 */
 +	const struct bpf_map_ops *ops ____cacheline_aligned;
 +	struct bpf_map *inner_map_meta;
 +#ifdef CONFIG_SECURITY
 +	void *security;
 +#endif
  	enum bpf_map_type map_type;
  	u32 key_size;
  	u32 value_size;
@@@ -59,17 -53,15 +60,17 @@@
  	u32 pages;
  	u32 id;
  	int numa_node;
 -	struct user_struct *user;
 -	const struct bpf_map_ops *ops;
 -	struct work_struct work;
 +	bool unpriv_array;
 +	/* 7 bytes hole */
 +
 +	/* 2nd cacheline with misc members to avoid false sharing
 +	 * particularly with refcounting.
 +	 */
 +	struct user_struct *user ____cacheline_aligned;
 +	atomic_t refcnt;
  	atomic_t usercnt;
 -	struct bpf_map *inner_map_meta;
 +	struct work_struct work;
  	char name[BPF_OBJ_NAME_LEN];
 -#ifdef CONFIG_SECURITY
 -	void *security;
 -#endif
  };
  
  /* function argument constraints */
@@@ -193,14 -185,18 +194,18 @@@ struct bpf_verifier_ops 
  				  struct bpf_prog *prog, u32 *target_size);
  };
  
+ struct bpf_prog_offload_ops {
+ 	int (*insn_hook)(struct bpf_verifier_env *env,
+ 			 int insn_idx, int prev_insn_idx);
+ };
+ 
  struct bpf_dev_offload {
  	struct bpf_prog		*prog;
  	struct net_device	*netdev;
  	void			*dev_priv;
  	struct list_head	offloads;
  	bool			dev_state;
- 	bool			verifier_running;
- 	wait_queue_head_t	verifier_done;
+ 	const struct bpf_prog_offload_ops *dev_ops;
  };
  
  struct bpf_prog_aux {
@@@ -209,6 -205,10 +214,10 @@@
  	u32 max_ctx_offset;
  	u32 stack_depth;
  	u32 id;
+ 	u32 func_cnt;
+ 	bool offload_requested;
+ 	struct bpf_prog **func;
+ 	void *jit_data; /* JIT specific data. arch dependent */
  	struct latch_tree_node ksym_tnode;
  	struct list_head ksym_lnode;
  	const struct bpf_prog_ops *ops;
@@@ -230,7 -230,6 +239,7 @@@
  struct bpf_array {
  	struct bpf_map map;
  	u32 elem_size;
 +	u32 index_mask;
  	/* 'ownership' of prog_array is claimed by the first program that
  	 * is going to use this map or by the first program which FD is stored
  	 * in the map to make sure that all callers and callees have the same
@@@ -295,6 -294,9 +304,9 @@@ int bpf_prog_array_copy_to_user(struct 
  
  void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs,
  				struct bpf_prog *old_prog);
+ int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array,
+ 			     __u32 __user *prog_ids, u32 request_cnt,
+ 			     __u32 __user *prog_cnt);
  int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,
  			struct bpf_prog *exclude_prog,
  			struct bpf_prog *include_prog,
@@@ -355,6 -357,8 +367,8 @@@ void bpf_prog_put(struct bpf_prog *prog
  int __bpf_prog_charge(struct user_struct *user, u32 pages);
  void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
  
+ void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
+ 
  struct bpf_map *bpf_map_get_with_uref(u32 ufd);
  struct bpf_map *__bpf_map_get(struct fd f);
  struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
@@@ -409,6 -413,7 +423,7 @@@ static inline void bpf_long_memcpy(voi
  
  /* verify correctness of eBPF program */
  int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
+ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
  
  /* Map specifics */
  struct net_device  *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
@@@ -536,13 -541,15 +551,15 @@@ bool bpf_prog_get_ok(struct bpf_prog *
  
  int bpf_prog_offload_compile(struct bpf_prog *prog);
  void bpf_prog_offload_destroy(struct bpf_prog *prog);
+ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+ 			       struct bpf_prog *prog);
  
  #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
  int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
  
  static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
  {
- 	return aux->offload;
+ 	return aux->offload_requested;
  }
  #else
  static inline int bpf_prog_offload_init(struct bpf_prog *prog,
@@@ -557,7 -564,7 +574,7 @@@ static inline bool bpf_prog_is_dev_boun
  }
  #endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
  
- #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)
+ #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
  struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
  int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
  #else
@@@ -596,4 -603,15 +613,15 @@@ extern const struct bpf_func_proto bpf_
  void bpf_user_rnd_init_once(void);
  u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
  
+ #if defined(__KERNEL__) && !defined(__ASSEMBLY__)
+ #ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ #define BPF_ALLOW_ERROR_INJECTION(fname)				\
+ static unsigned long __used						\
+ 	__attribute__((__section__("_kprobe_error_inject_list")))	\
+ 	_eil_addr_##fname = (unsigned long)fname;
+ #else
+ #define BPF_ALLOW_ERROR_INJECTION(fname)
+ #endif
+ #endif
+ 
  #endif /* _LINUX_BPF_H */
diff --combined include/linux/module.h
index e6249795f9e2,548fa09fa806..0fd65481c045
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@@ -475,6 -475,11 +475,11 @@@ struct module 
  	ctor_fn_t *ctors;
  	unsigned int num_ctors;
  #endif
+ 
+ #ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ 	unsigned int num_kprobe_ei_funcs;
+ 	unsigned long *kprobe_ei_funcs;
+ #endif
  } ____cacheline_aligned __randomize_layout;
  #ifndef MODULE_ARCH_INIT
  #define MODULE_ARCH_INIT {}
@@@ -606,9 -611,6 +611,9 @@@ int ref_module(struct module *a, struc
  	__mod ? __mod->name : "kernel";		\
  })
  
 +/* Dereference module function descriptor */
 +void *dereference_module_function_descriptor(struct module *mod, void *ptr);
 +
  /* For kallsyms to ask for address resolution.  namebuf should be at
   * least KSYM_NAME_LEN long: a pointer to namebuf is returned if
   * found, otherwise NULL. */
@@@ -763,13 -765,6 +768,13 @@@ static inline bool is_module_sig_enforc
  	return false;
  }
  
 +/* Dereference module function descriptor */
 +static inline
 +void *dereference_module_function_descriptor(struct module *mod, void *ptr)
 +{
 +	return ptr;
 +}
 +
  #endif /* CONFIG_MODULES */
  
  #ifdef CONFIG_SYSFS
diff --combined include/linux/pci.h
index 95807535d175,0314e0716c30..66cca1c6f742
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@@ -48,17 -48,17 +48,17 @@@
   * In the interest of not exposing interfaces to user-space unnecessarily,
   * the following kernel-only defines are being added here.
   */
 -#define PCI_DEVID(bus, devfn)  ((((u16)(bus)) << 8) | (devfn))
 +#define PCI_DEVID(bus, devfn)	((((u16)(bus)) << 8) | (devfn))
  /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */
  #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
  
  /* pci_slot represents a physical slot */
  struct pci_slot {
 -	struct pci_bus *bus;		/* The bus this slot is on */
 -	struct list_head list;		/* node in list of slots on this bus */
 -	struct hotplug_slot *hotplug;	/* Hotplug info (migrate over time) */
 -	unsigned char number;		/* PCI_SLOT(pci_dev->devfn) */
 -	struct kobject kobj;
 +	struct pci_bus		*bus;		/* Bus this slot is on */
 +	struct list_head	list;		/* Node in list of slots */
 +	struct hotplug_slot	*hotplug;	/* Hotplug info (move here) */
 +	unsigned char		number;		/* PCI_SLOT(pci_dev->devfn) */
 +	struct kobject		kobj;
  };
  
  static inline const char *pci_slot_name(const struct pci_slot *slot)
@@@ -72,7 -72,9 +72,7 @@@ enum pci_mmap_state 
  	pci_mmap_mem
  };
  
 -/*
 - *  For PCI devices, the region numbers are assigned this way:
 - */
 +/* For PCI devices, the region numbers are assigned this way: */
  enum {
  	/* #0-5: standard PCI resources */
  	PCI_STD_RESOURCES,
@@@ -81,23 -83,23 +81,23 @@@
  	/* #6: expansion ROM resource */
  	PCI_ROM_RESOURCE,
  
 -	/* device specific resources */
 +	/* Device-specific resources */
  #ifdef CONFIG_PCI_IOV
  	PCI_IOV_RESOURCES,
  	PCI_IOV_RESOURCE_END = PCI_IOV_RESOURCES + PCI_SRIOV_NUM_BARS - 1,
  #endif
  
 -	/* resources assigned to buses behind the bridge */
 +	/* Resources assigned to buses behind the bridge */
  #define PCI_BRIDGE_RESOURCE_NUM 4
  
  	PCI_BRIDGE_RESOURCES,
  	PCI_BRIDGE_RESOURCE_END = PCI_BRIDGE_RESOURCES +
  				  PCI_BRIDGE_RESOURCE_NUM - 1,
  
 -	/* total resources associated with a PCI device */
 +	/* Total resources associated with a PCI device */
  	PCI_NUM_RESOURCES,
  
 -	/* preserve this for compatibility */
 +	/* Preserve this for compatibility */
  	DEVICE_COUNT_RESOURCE = PCI_NUM_RESOURCES,
  };
  
@@@ -150,10 -152,9 +150,10 @@@ static inline const char *pci_power_nam
  #define PCI_PM_D3COLD_WAIT	100
  #define PCI_PM_BUS_WAIT		50
  
 -/** The pci_channel state describes connectivity between the CPU and
 - *  the pci device.  If some PCI bus between here and the pci device
 - *  has crashed or locked up, this info is reflected here.
 +/**
 + * The pci_channel state describes connectivity between the CPU and
 + * the PCI device.  If some PCI bus between here and the PCI device
 + * has crashed or locked up, this info is reflected here.
   */
  typedef unsigned int __bitwise pci_channel_state_t;
  
@@@ -183,7 -184,9 +183,7 @@@ enum pcie_reset_state 
  
  typedef unsigned short __bitwise pci_dev_flags_t;
  enum pci_dev_flags {
 -	/* INTX_DISABLE in PCI_COMMAND register disables MSI
 -	 * generation too.
 -	 */
 +	/* INTX_DISABLE in PCI_COMMAND register disables MSI too */
  	PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG = (__force pci_dev_flags_t) (1 << 0),
  	/* Device configuration is irrevocably lost if disabled into D3 */
  	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) (1 << 1),
@@@ -199,7 -202,7 +199,7 @@@
  	PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7),
  	/* Get VPD from function 0 VPD */
  	PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8),
 -	/* a non-root bridge where translation occurs, stop alias search here */
 +	/* A non-root bridge where translation occurs, stop alias search here */
  	PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9),
  	/* Do not use FLR even if device advertises PCI_AF_CAP */
  	PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10),
@@@ -219,17 -222,17 +219,17 @@@ enum pci_bus_flags 
  	PCI_BUS_FLAGS_NO_AERSID	= (__force pci_bus_flags_t) 4,
  };
  
 -/* These values come from the PCI Express Spec */
 +/* Values from Link Status register, PCIe r3.1, sec 7.8.8 */
  enum pcie_link_width {
  	PCIE_LNK_WIDTH_RESRV	= 0x00,
  	PCIE_LNK_X1		= 0x01,
  	PCIE_LNK_X2		= 0x02,
  	PCIE_LNK_X4		= 0x04,
  	PCIE_LNK_X8		= 0x08,
 -	PCIE_LNK_X12		= 0x0C,
 +	PCIE_LNK_X12		= 0x0c,
  	PCIE_LNK_X16		= 0x10,
  	PCIE_LNK_X32		= 0x20,
 -	PCIE_LNK_WIDTH_UNKNOWN  = 0xFF,
 +	PCIE_LNK_WIDTH_UNKNOWN	= 0xff,
  };
  
  /* Based on the PCI Hotplug Spec, but some values are made up by us */
@@@ -260,15 -263,15 +260,15 @@@ enum pci_bus_speed 
  };
  
  struct pci_cap_saved_data {
 -	u16 cap_nr;
 -	bool cap_extended;
 -	unsigned int size;
 -	u32 data[0];
 +	u16		cap_nr;
 +	bool		cap_extended;
 +	unsigned int	size;
 +	u32		data[0];
  };
  
  struct pci_cap_saved_state {
 -	struct hlist_node next;
 -	struct pci_cap_saved_data cap;
 +	struct hlist_node		next;
 +	struct pci_cap_saved_data	cap;
  };
  
  struct irq_affinity;
@@@ -277,17 -280,19 +277,17 @@@ struct pci_vpd
  struct pci_sriov;
  struct pci_ats;
  
 -/*
 - * The pci_dev structure is used to describe PCI devices.
 - */
 +/* The pci_dev structure describes PCI devices */
  struct pci_dev {
 -	struct list_head bus_list;	/* node in per-bus list */
 -	struct pci_bus	*bus;		/* bus this device is on */
 -	struct pci_bus	*subordinate;	/* bus this device bridges to */
 +	struct list_head bus_list;	/* Node in per-bus list */
 +	struct pci_bus	*bus;		/* Bus this device is on */
 +	struct pci_bus	*subordinate;	/* Bus this device bridges to */
  
 -	void		*sysdata;	/* hook for sys-specific extension */
 -	struct proc_dir_entry *procent;	/* device entry in /proc/bus/pci */
 +	void		*sysdata;	/* Hook for sys-specific extension */
 +	struct proc_dir_entry *procent;	/* Device entry in /proc/bus/pci */
  	struct pci_slot	*slot;		/* Physical slot this device is in */
  
 -	unsigned int	devfn;		/* encoded device & function index */
 +	unsigned int	devfn;		/* Encoded device & function index */
  	unsigned short	vendor;
  	unsigned short	device;
  	unsigned short	subsystem_vendor;
@@@ -302,12 -307,12 +302,12 @@@
  	u8		msi_cap;	/* MSI capability offset */
  	u8		msix_cap;	/* MSI-X capability offset */
  	u8		pcie_mpss:3;	/* PCIe Max Payload Size Supported */
 -	u8		rom_base_reg;	/* which config register controls the ROM */
 -	u8		pin;		/* which interrupt pin this device uses */
 -	u16		pcie_flags_reg;	/* cached PCIe Capabilities Register */
 -	unsigned long	*dma_alias_mask;/* mask of enabled devfn aliases */
 +	u8		rom_base_reg;	/* Config register controlling ROM */
 +	u8		pin;		/* Interrupt pin this device uses */
 +	u16		pcie_flags_reg;	/* Cached PCIe Capabilities Register */
 +	unsigned long	*dma_alias_mask;/* Mask of enabled devfn aliases */
  
 -	struct pci_driver *driver;	/* which driver has allocated this device */
 +	struct pci_driver *driver;	/* Driver bound to this device */
  	u64		dma_mask;	/* Mask of the bits of bus address this
  					   device implements.  Normally this is
  					   0xffffffff.  You only need to change
@@@ -316,9 -321,9 +316,9 @@@
  
  	struct device_dma_parameters dma_parms;
  
 -	pci_power_t     current_state;  /* Current operating state. In ACPI-speak,
 -					   this is D0-D3, D0 being fully functional,
 -					   and D3 being off. */
 +	pci_power_t	current_state;	/* Current operating state. In ACPI,
 +					   this is D0-D3, D0 being fully
 +					   functional, and D3 being off. */
  	u8		pm_cap;		/* PM capability offset */
  	unsigned int	pme_support:5;	/* Bitmask of states from which PME#
  					   can be generated */
@@@ -329,10 -334,10 +329,10 @@@
  	unsigned int	no_d3cold:1;	/* D3cold is forbidden */
  	unsigned int	bridge_d3:1;	/* Allow D3 for bridge */
  	unsigned int	d3cold_allowed:1;	/* D3cold is allowed by user */
 -	unsigned int	mmio_always_on:1;	/* disallow turning off io/mem
 -						   decoding during bar sizing */
 +	unsigned int	mmio_always_on:1;	/* Disallow turning off io/mem
 +						   decoding during BAR sizing */
  	unsigned int	wakeup_prepared:1;
 -	unsigned int	runtime_d3cold:1;	/* whether go through runtime
 +	unsigned int	runtime_d3cold:1;	/* Whether go through runtime
  						   D3cold, not set for devices
  						   powered on/off by the
  						   corresponding bridge */
@@@ -345,14 -350,12 +345,14 @@@
  
  #ifdef CONFIG_PCIEASPM
  	struct pcie_link_state	*link_state;	/* ASPM link state */
 +	unsigned int	ltr_path:1;	/* Latency Tolerance Reporting
 +					   supported from root to here */
  #endif
  
 -	pci_channel_state_t error_state;	/* current connectivity state */
 -	struct	device	dev;		/* Generic device interface */
 +	pci_channel_state_t error_state;	/* Current connectivity state */
 +	struct device	dev;			/* Generic device interface */
  
 -	int		cfg_size;	/* Size of configuration space */
 +	int		cfg_size;		/* Size of config space */
  
  	/*
  	 * Instead of touching interrupt line and base address registers
@@@ -361,47 -364,47 +361,47 @@@
  	unsigned int	irq;
  	struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
  
 -	bool match_driver;		/* Skip attaching driver */
 -	/* These fields are used by common fixups */
 -	unsigned int	transparent:1;	/* Subtractive decode PCI bridge */
 -	unsigned int	multifunction:1;/* Part of multi-function device */
 -	/* keep track of device state */
 +	bool		match_driver;		/* Skip attaching driver */
 +
 +	unsigned int	transparent:1;		/* Subtractive decode bridge */
 +	unsigned int	multifunction:1;	/* Multi-function device */
 +
  	unsigned int	is_added:1;
 -	unsigned int	is_busmaster:1; /* device is busmaster */
 -	unsigned int	no_msi:1;	/* device may not use msi */
 -	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
 -	unsigned int	block_cfg_access:1;	/* config space access is blocked */
 -	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 -	unsigned int	irq_reroute_variant:2;	/* device needs IRQ rerouting variant */
 +	unsigned int	is_busmaster:1;		/* Is busmaster */
 +	unsigned int	no_msi:1;		/* May not use MSI */
 +	unsigned int	no_64bit_msi:1; 	/* May only use 32-bit MSIs */
 +	unsigned int	block_cfg_access:1;	/* Config space access blocked */
 +	unsigned int	broken_parity_status:1;	/* Generates false positive parity */
 +	unsigned int	irq_reroute_variant:2;	/* Needs IRQ rerouting variant */
  	unsigned int	msi_enabled:1;
  	unsigned int	msix_enabled:1;
 -	unsigned int	ari_enabled:1;	/* ARI forwarding */
 -	unsigned int	ats_enabled:1;	/* Address Translation Service */
 +	unsigned int	ari_enabled:1;		/* ARI forwarding */
 +	unsigned int	ats_enabled:1;		/* Address Translation Svc */
  	unsigned int	pasid_enabled:1;	/* Process Address Space ID */
  	unsigned int	pri_enabled:1;		/* Page Request Interface */
  	unsigned int	is_managed:1;
 -	unsigned int    needs_freset:1; /* Dev requires fundamental reset */
 +	unsigned int	needs_freset:1;		/* Requires fundamental reset */
  	unsigned int	state_saved:1;
  	unsigned int	is_physfn:1;
  	unsigned int	is_virtfn:1;
  	unsigned int	reset_fn:1;
 -	unsigned int    is_hotplug_bridge:1;
 -	unsigned int	is_thunderbolt:1; /* Thunderbolt controller */
 -	unsigned int    __aer_firmware_first_valid:1;
 +	unsigned int	is_hotplug_bridge:1;
 +	unsigned int	is_thunderbolt:1;	/* Thunderbolt controller */
 +	unsigned int	__aer_firmware_first_valid:1;
  	unsigned int	__aer_firmware_first:1;
 -	unsigned int	broken_intx_masking:1; /* INTx masking can't be used */
 -	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
 +	unsigned int	broken_intx_masking:1;	/* INTx masking can't be used */
 +	unsigned int	io_window_1k:1;		/* Intel bridge 1K I/O windows */
  	unsigned int	irq_managed:1;
  	unsigned int	has_secondary_link:1;
 -	unsigned int	non_compliant_bars:1;	/* broken BARs; ignore them */
 -	unsigned int	is_probed:1;		/* device probing in progress */
 +	unsigned int	non_compliant_bars:1;	/* Broken BARs; ignore them */
 +	unsigned int	is_probed:1;		/* Device probing in progress */
  	pci_dev_flags_t dev_flags;
  	atomic_t	enable_cnt;	/* pci_enable_device has been called */
  
 -	u32		saved_config_space[16]; /* config space saved at suspend time */
 +	u32		saved_config_space[16]; /* Config space saved at suspend time */
  	struct hlist_head saved_cap_space;
 -	struct bin_attribute *rom_attr; /* attribute descriptor for sysfs ROM entry */
 -	int rom_attr_enabled;		/* has display of the rom attribute been enabled? */
 +	struct bin_attribute *rom_attr;		/* Attribute descriptor for sysfs ROM entry */
 +	int		rom_attr_enabled;	/* Display of ROM attribute enabled? */
  	struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
  	struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
  
@@@ -416,12 -419,12 +416,12 @@@
  	struct pci_vpd *vpd;
  #ifdef CONFIG_PCI_ATS
  	union {
 -		struct pci_sriov *sriov;	/* SR-IOV capability related */
 -		struct pci_dev *physfn;	/* the PF this VF is associated with */
 +		struct pci_sriov	*sriov;		/* PF: SR-IOV info */
 +		struct pci_dev		*physfn;	/* VF: related PF */
  	};
  	u16		ats_cap;	/* ATS Capability offset */
  	u8		ats_stu;	/* ATS Smallest Translation Unit */
 -	atomic_t	ats_ref_cnt;	/* number of VFs with ATS enabled */
 +	atomic_t	ats_ref_cnt;	/* Number of VFs with ATS enabled */
  #endif
  #ifdef CONFIG_PCI_PRI
  	u32		pri_reqs_alloc; /* Number of PRI requests allocated */
@@@ -429,11 -432,11 +429,11 @@@
  #ifdef CONFIG_PCI_PASID
  	u16		pasid_features;
  #endif
 -	phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */
 -	size_t romlen; /* Length of ROM if it's not from the BAR */
 -	char *driver_override; /* Driver name to force a match */
 +	phys_addr_t	rom;		/* Physical address if not from BAR */
 +	size_t		romlen;		/* Length if not from BAR */
 +	char		*driver_override; /* Driver name to force a match */
  
 -	unsigned long priv_flags; /* Private flags for the pci driver */
 +	unsigned long	priv_flags;	/* Private flags for the PCI driver */
  };
  
  static inline struct pci_dev *pci_physfn(struct pci_dev *dev)
@@@ -456,26 -459,26 +456,26 @@@ static inline int pci_channel_offline(s
  }
  
  struct pci_host_bridge {
 -	struct device dev;
 -	struct pci_bus *bus;		/* root bus */
 -	struct pci_ops *ops;
 -	void *sysdata;
 -	int busnr;
 +	struct device	dev;
 +	struct pci_bus	*bus;		/* Root bus */
 +	struct pci_ops	*ops;
 +	void		*sysdata;
 +	int		busnr;
  	struct list_head windows;	/* resource_entry */
 -	u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* platform IRQ swizzler */
 +	u8 (*swizzle_irq)(struct pci_dev *, u8 *); /* Platform IRQ swizzler */
  	int (*map_irq)(const struct pci_dev *, u8, u8);
  	void (*release_fn)(struct pci_host_bridge *);
 -	void *release_data;
 +	void		*release_data;
  	struct msi_controller *msi;
 -	unsigned int ignore_reset_delay:1;	/* for entire hierarchy */
 -	unsigned int no_ext_tags:1;		/* no Extended Tags */
 +	unsigned int	ignore_reset_delay:1;	/* For entire hierarchy */
 +	unsigned int	no_ext_tags:1;		/* No Extended Tags */
  	/* Resource alignment requirements */
  	resource_size_t (*align_resource)(struct pci_dev *dev,
  			const struct resource *res,
  			resource_size_t start,
  			resource_size_t size,
  			resource_size_t align);
 -	unsigned long private[0] ____cacheline_aligned;
 +	unsigned long	private[0] ____cacheline_aligned;
  };
  
  #define	to_pci_host_bridge(n) container_of(n, struct pci_host_bridge, dev)
@@@ -497,8 -500,8 +497,8 @@@ void pci_free_host_bridge(struct pci_ho
  struct pci_host_bridge *pci_find_host_bridge(struct pci_bus *bus);
  
  void pci_set_host_bridge_release(struct pci_host_bridge *bridge,
 -		     void (*release_fn)(struct pci_host_bridge *),
 -		     void *release_data);
 +				 void (*release_fn)(struct pci_host_bridge *),
 +				 void *release_data);
  
  int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge);
  
@@@ -518,32 -521,32 +518,32 @@@
  #define PCI_SUBTRACTIVE_DECODE	0x1
  
  struct pci_bus_resource {
 -	struct list_head list;
 -	struct resource *res;
 -	unsigned int flags;
 +	struct list_head	list;
 +	struct resource		*res;
 +	unsigned int		flags;
  };
  
  #define PCI_REGION_FLAG_MASK	0x0fU	/* These bits of resource flags tell us the PCI region flags */
  
  struct pci_bus {
 -	struct list_head node;		/* node in list of buses */
 -	struct pci_bus	*parent;	/* parent bus this bridge is on */
 -	struct list_head children;	/* list of child buses */
 -	struct list_head devices;	/* list of devices on this bus */
 -	struct pci_dev	*self;		/* bridge device as seen by parent */
 -	struct list_head slots;		/* list of slots on this bus;
 +	struct list_head node;		/* Node in list of buses */
 +	struct pci_bus	*parent;	/* Parent bus this bridge is on */
 +	struct list_head children;	/* List of child buses */
 +	struct list_head devices;	/* List of devices on this bus */
 +	struct pci_dev	*self;		/* Bridge device as seen by parent */
 +	struct list_head slots;		/* List of slots on this bus;
  					   protected by pci_slot_mutex */
  	struct resource *resource[PCI_BRIDGE_RESOURCE_NUM];
 -	struct list_head resources;	/* address space routed to this bus */
 -	struct resource busn_res;	/* bus numbers routed to this bus */
 +	struct list_head resources;	/* Address space routed to this bus */
 +	struct resource busn_res;	/* Bus numbers routed to this bus */
  
 -	struct pci_ops	*ops;		/* configuration access functions */
 +	struct pci_ops	*ops;		/* Configuration access functions */
  	struct msi_controller *msi;	/* MSI controller */
 -	void		*sysdata;	/* hook for sys-specific extension */
 -	struct proc_dir_entry *procdir;	/* directory entry in /proc/bus/pci */
 +	void		*sysdata;	/* Hook for sys-specific extension */
 +	struct proc_dir_entry *procdir;	/* Directory entry in /proc/bus/pci */
  
 -	unsigned char	number;		/* bus number */
 -	unsigned char	primary;	/* number of primary bridge */
 +	unsigned char	number;		/* Bus number */
 +	unsigned char	primary;	/* Number of primary bridge */
  	unsigned char	max_bus_speed;	/* enum pci_bus_speed */
  	unsigned char	cur_bus_speed;	/* enum pci_bus_speed */
  #ifdef CONFIG_PCI_DOMAINS_GENERIC
@@@ -552,12 -555,12 +552,12 @@@
  
  	char		name[48];
  
 -	unsigned short  bridge_ctl;	/* manage NO_ISA/FBB/et al behaviors */
 -	pci_bus_flags_t bus_flags;	/* inherited by child buses */
 +	unsigned short	bridge_ctl;	/* Manage NO_ISA/FBB/et al behaviors */
 +	pci_bus_flags_t bus_flags;	/* Inherited by child buses */
  	struct device		*bridge;
  	struct device		dev;
 -	struct bin_attribute	*legacy_io; /* legacy I/O for this bus */
 -	struct bin_attribute	*legacy_mem; /* legacy mem */
 +	struct bin_attribute	*legacy_io;	/* Legacy I/O for this bus */
 +	struct bin_attribute	*legacy_mem;	/* Legacy mem */
  	unsigned int		is_added:1;
  };
  
@@@ -614,7 -617,9 +614,7 @@@ static inline bool pci_dev_msi_enabled(
  static inline bool pci_dev_msi_enabled(struct pci_dev *pci_dev) { return false; }
  #endif
  
 -/*
 - * Error values that may be returned by PCI functions.
 - */
 +/* Error values that may be returned by PCI functions */
  #define PCIBIOS_SUCCESSFUL		0x00
  #define PCIBIOS_FUNC_NOT_SUPPORTED	0x81
  #define PCIBIOS_BAD_VENDOR_ID		0x83
@@@ -623,7 -628,9 +623,7 @@@
  #define PCIBIOS_SET_FAILED		0x88
  #define PCIBIOS_BUFFER_TOO_SMALL	0x89
  
 -/*
 - * Translate above to generic errno for passing back through non-PCI code.
 - */
 +/* Translate above to generic errno for passing back through non-PCI code */
  static inline int pcibios_err_to_errno(int err)
  {
  	if (err <= PCIBIOS_SUCCESSFUL)
@@@ -673,13 -680,13 +673,13 @@@ typedef u32 pci_bus_addr_t
  #endif
  
  struct pci_bus_region {
 -	pci_bus_addr_t start;
 -	pci_bus_addr_t end;
 +	pci_bus_addr_t	start;
 +	pci_bus_addr_t	end;
  };
  
  struct pci_dynids {
 -	spinlock_t lock;            /* protects list, index */
 -	struct list_head list;      /* for IDs added at runtime */
 +	spinlock_t		lock;	/* Protects list, index */
 +	struct list_head	list;	/* For IDs added at runtime */
  };
  
  
@@@ -693,13 -700,13 +693,13 @@@
  typedef unsigned int __bitwise pci_ers_result_t;
  
  enum pci_ers_result {
 -	/* no result/none/not supported in device driver */
 +	/* No result/none/not supported in device driver */
  	PCI_ERS_RESULT_NONE = (__force pci_ers_result_t) 1,
  
  	/* Device driver can recover without slot reset */
  	PCI_ERS_RESULT_CAN_RECOVER = (__force pci_ers_result_t) 2,
  
 -	/* Device driver wants slot to be reset. */
 +	/* Device driver wants slot to be reset */
  	PCI_ERS_RESULT_NEED_RESET = (__force pci_ers_result_t) 3,
  
  	/* Device has completely failed, is unrecoverable */
@@@ -735,27 -742,27 +735,27 @@@ struct pci_error_handlers 
  
  struct module;
  struct pci_driver {
 -	struct list_head node;
 -	const char *name;
 -	const struct pci_device_id *id_table;	/* must be non-NULL for probe to be called */
 -	int  (*probe)  (struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 -	void (*remove) (struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
 -	int  (*suspend) (struct pci_dev *dev, pm_message_t state);	/* Device suspended */
 -	int  (*suspend_late) (struct pci_dev *dev, pm_message_t state);
 -	int  (*resume_early) (struct pci_dev *dev);
 -	int  (*resume) (struct pci_dev *dev);	                /* Device woken up */
 +	struct list_head	node;
 +	const char		*name;
 +	const struct pci_device_id *id_table;	/* Must be non-NULL for probe to be called */
 +	int  (*probe)(struct pci_dev *dev, const struct pci_device_id *id);	/* New device inserted */
 +	void (*remove)(struct pci_dev *dev);	/* Device removed (NULL if not a hot-plug capable driver) */
 +	int  (*suspend)(struct pci_dev *dev, pm_message_t state);	/* Device suspended */
 +	int  (*suspend_late)(struct pci_dev *dev, pm_message_t state);
 +	int  (*resume_early)(struct pci_dev *dev);
 +	int  (*resume) (struct pci_dev *dev);	/* Device woken up */
  	void (*shutdown) (struct pci_dev *dev);
 -	int (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* PF pdev */
 +	int  (*sriov_configure) (struct pci_dev *dev, int num_vfs); /* On PF */
  	const struct pci_error_handlers *err_handler;
  	const struct attribute_group **groups;
  	struct device_driver	driver;
 -	struct pci_dynids dynids;
 +	struct pci_dynids	dynids;
  };
  
  #define	to_pci_driver(drv) container_of(drv, struct pci_driver, driver)
  
  /**
 - * PCI_DEVICE - macro used to describe a specific pci device
 + * PCI_DEVICE - macro used to describe a specific PCI device
   * @vend: the 16 bit PCI Vendor ID
   * @dev: the 16 bit PCI Device ID
   *
@@@ -768,7 -775,7 +768,7 @@@
  	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
  
  /**
 - * PCI_DEVICE_SUB - macro used to describe a specific pci device with subsystem
 + * PCI_DEVICE_SUB - macro used to describe a specific PCI device with subsystem
   * @vend: the 16 bit PCI Vendor ID
   * @dev: the 16 bit PCI Device ID
   * @subvend: the 16 bit PCI Subvendor ID
@@@ -782,7 -789,7 +782,7 @@@
  	.subvendor = (subvend), .subdevice = (subdev)
  
  /**
 - * PCI_DEVICE_CLASS - macro used to describe a specific pci device class
 + * PCI_DEVICE_CLASS - macro used to describe a specific PCI device class
   * @dev_class: the class, subclass, prog-if triple for this device
   * @dev_class_mask: the class mask for this device
   *
@@@ -796,7 -803,7 +796,7 @@@
  	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID
  
  /**
 - * PCI_VDEVICE - macro used to describe a specific pci device in short form
 + * PCI_VDEVICE - macro used to describe a specific PCI device in short form
   * @vend: the vendor name
   * @dev: the 16 bit PCI Device ID
   *
@@@ -805,21 -812,22 +805,21 @@@
   * to PCI_ANY_ID. The macro allows the next field to follow as the device
   * private data.
   */
 -
  #define PCI_VDEVICE(vend, dev) \
  	.vendor = PCI_VENDOR_ID_##vend, .device = (dev), \
  	.subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID, 0, 0
  
  enum {
 -	PCI_REASSIGN_ALL_RSRC	= 0x00000001,	/* ignore firmware setup */
 -	PCI_REASSIGN_ALL_BUS	= 0x00000002,	/* reassign all bus numbers */
 -	PCI_PROBE_ONLY		= 0x00000004,	/* use existing setup */
 -	PCI_CAN_SKIP_ISA_ALIGN	= 0x00000008,	/* don't do ISA alignment */
 -	PCI_ENABLE_PROC_DOMAINS	= 0x00000010,	/* enable domains in /proc */
 +	PCI_REASSIGN_ALL_RSRC	= 0x00000001,	/* Ignore firmware setup */
 +	PCI_REASSIGN_ALL_BUS	= 0x00000002,	/* Reassign all bus numbers */
 +	PCI_PROBE_ONLY		= 0x00000004,	/* Use existing setup */
 +	PCI_CAN_SKIP_ISA_ALIGN	= 0x00000008,	/* Don't do ISA alignment */
 +	PCI_ENABLE_PROC_DOMAINS	= 0x00000010,	/* Enable domains in /proc */
  	PCI_COMPAT_DOMAIN_0	= 0x00000020,	/* ... except domain 0 */
 -	PCI_SCAN_ALL_PCIE_DEVS	= 0x00000040,	/* scan all, not just dev 0 */
 +	PCI_SCAN_ALL_PCIE_DEVS	= 0x00000040,	/* Scan all, not just dev 0 */
  };
  
 -/* these external functions are only available when PCI support is enabled */
 +/* These external functions are only available when PCI support is enabled */
  #ifdef CONFIG_PCI
  
  extern unsigned int pci_flags;
@@@ -832,11 -840,11 +832,11 @@@ static inline int pci_has_flag(int flag
  void pcie_bus_configure_settings(struct pci_bus *bus);
  
  enum pcie_bus_config_types {
 -	PCIE_BUS_TUNE_OFF,	/* don't touch MPS at all */
 -	PCIE_BUS_DEFAULT,	/* ensure MPS matches upstream bridge */
 -	PCIE_BUS_SAFE,		/* use largest MPS boot-time devices support */
 -	PCIE_BUS_PERFORMANCE,	/* use MPS and MRRS for best performance */
 -	PCIE_BUS_PEER2PEER,	/* set MPS = 128 for all devices */
 +	PCIE_BUS_TUNE_OFF,	/* Don't touch MPS at all */
 +	PCIE_BUS_DEFAULT,	/* Ensure MPS matches upstream bridge */
 +	PCIE_BUS_SAFE,		/* Use largest MPS boot-time devices support */
 +	PCIE_BUS_PERFORMANCE,	/* Use MPS and MRRS for best performance */
 +	PCIE_BUS_PEER2PEER,	/* Set MPS = 128 for all devices */
  };
  
  extern enum pcie_bus_config_types pcie_bus_config;
@@@ -845,7 -853,7 +845,7 @@@ extern struct bus_type pci_bus_type
  
  /* Do NOT directly access these two variables, unless you are arch-specific PCI
   * code, or PCI core code. */
 -extern struct list_head pci_root_buses;	/* list of all known PCI buses */
 +extern struct list_head pci_root_buses;	/* List of all known PCI buses */
  /* Some device drivers need know if PCI is initiated */
  int no_pci_devices(void);
  
@@@ -883,8 -891,8 +883,8 @@@ int pci_bus_insert_busn_res(struct pci_
  int pci_bus_update_busn_res_end(struct pci_bus *b, int busmax);
  void pci_bus_release_busn_res(struct pci_bus *b);
  struct pci_bus *pci_scan_root_bus(struct device *parent, int bus,
 -					     struct pci_ops *ops, void *sysdata,
 -					     struct list_head *resources);
 +				  struct pci_ops *ops, void *sysdata,
 +				  struct list_head *resources);
  int pci_scan_root_bus_bridge(struct pci_host_bridge *bridge);
  struct pci_bus *pci_add_new_bus(struct pci_bus *parent, struct pci_dev *dev,
  				int busnr);
@@@ -941,10 -949,10 +941,10 @@@ int pci_find_next_ht_capability(struct 
  struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
  
  struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device,
 -				struct pci_dev *from);
 +			       struct pci_dev *from);
  struct pci_dev *pci_get_subsys(unsigned int vendor, unsigned int device,
 -				unsigned int ss_vendor, unsigned int ss_device,
 -				struct pci_dev *from);
 +			       unsigned int ss_vendor, unsigned int ss_device,
 +			       struct pci_dev *from);
  struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn);
  struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus,
  					    unsigned int devfn);
@@@ -1020,7 -1028,7 +1020,7 @@@ static inline int pcie_capability_clear
  	return pcie_capability_clear_and_set_dword(dev, pos, clear, 0);
  }
  
 -/* user-space driven config access */
 +/* User-space driven config access */
  int pci_user_read_config_byte(struct pci_dev *dev, int where, u8 *val);
  int pci_user_read_config_word(struct pci_dev *dev, int where, u16 *val);
  int pci_user_read_config_dword(struct pci_dev *dev, int where, u32 *val);
@@@ -1064,6 -1072,7 +1064,7 @@@ int pci_set_pcie_reset_state(struct pci
  int pci_set_cacheline_size(struct pci_dev *dev);
  #define HAVE_PCI_SET_MWI
  int __must_check pci_set_mwi(struct pci_dev *dev);
+ int __must_check pcim_set_mwi(struct pci_dev *dev);
  int pci_try_set_mwi(struct pci_dev *dev);
  void pci_clear_mwi(struct pci_dev *dev);
  void pci_intx(struct pci_dev *dev, int enable);
@@@ -1162,7 -1171,7 +1163,7 @@@ unsigned int pci_rescan_bus(struct pci_
  void pci_lock_rescan_remove(void);
  void pci_unlock_rescan_remove(void);
  
 -/* Vital product data routines */
 +/* Vital Product Data routines */
  ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
  ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
  int pci_set_vpd_size(struct pci_dev *dev, size_t len);
@@@ -1247,7 -1256,9 +1248,7 @@@ static inline pci_bus_addr_t pci_bus_ad
  int __must_check __pci_register_driver(struct pci_driver *, struct module *,
  				       const char *mod_name);
  
 -/*
 - * pci_register_driver must be a macro so that KBUILD_MODNAME can be expanded
 - */
 +/* pci_register_driver() must be a macro so KBUILD_MODNAME can be expanded */
  #define pci_register_driver(driver)		\
  	__pci_register_driver(driver, THIS_MODULE, KBUILD_MODNAME)
  
@@@ -1262,7 -1273,8 +1263,7 @@@ void pci_unregister_driver(struct pci_d
   * use this macro once, and calling it replaces module_init() and module_exit()
   */
  #define module_pci_driver(__pci_driver) \
 -	module_driver(__pci_driver, pci_register_driver, \
 -		       pci_unregister_driver)
 +	module_driver(__pci_driver, pci_register_driver, pci_unregister_driver)
  
  /**
   * builtin_pci_driver() - Helper macro for registering a PCI driver
@@@ -1301,10 -1313,10 +1302,10 @@@ resource_size_t pcibios_iov_resource_al
  int pci_set_vga_state(struct pci_dev *pdev, bool decode,
  		      unsigned int command_bits, u32 flags);
  
 -#define PCI_IRQ_LEGACY		(1 << 0) /* allow legacy interrupts */
 -#define PCI_IRQ_MSI		(1 << 1) /* allow MSI interrupts */
 -#define PCI_IRQ_MSIX		(1 << 2) /* allow MSI-X interrupts */
 -#define PCI_IRQ_AFFINITY	(1 << 3) /* auto-assign affinity */
 +#define PCI_IRQ_LEGACY		(1 << 0) /* Allow legacy interrupts */
 +#define PCI_IRQ_MSI		(1 << 1) /* Allow MSI interrupts */
 +#define PCI_IRQ_MSIX		(1 << 2) /* Allow MSI-X interrupts */
 +#define PCI_IRQ_AFFINITY	(1 << 3) /* Auto-assign affinity */
  #define PCI_IRQ_ALL_TYPES \
  	(PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX)
  
@@@ -1323,8 -1335,8 +1324,8 @@@
  #define	pci_pool_free(pool, vaddr, addr) dma_pool_free(pool, vaddr, addr)
  
  struct msix_entry {
 -	u32	vector;	/* kernel uses to write allocated vector */
 -	u16	entry;	/* driver uses to specify entry, OS writes */
 +	u32	vector;	/* Kernel uses to write allocated vector */
 +	u16	entry;	/* Driver uses to specify entry, OS writes */
  };
  
  #ifdef CONFIG_PCI_MSI
@@@ -1364,10 -1376,10 +1365,10 @@@ static inline int pci_msi_enabled(void
  static inline int pci_enable_msi(struct pci_dev *dev)
  { return -ENOSYS; }
  static inline int pci_enable_msix_range(struct pci_dev *dev,
 -		      struct msix_entry *entries, int minvec, int maxvec)
 +			struct msix_entry *entries, int minvec, int maxvec)
  { return -ENOSYS; }
  static inline int pci_enable_msix_exact(struct pci_dev *dev,
 -		      struct msix_entry *entries, int nvec)
 +			struct msix_entry *entries, int nvec)
  { return -ENOSYS; }
  
  static inline int
@@@ -1532,9 -1544,9 +1533,9 @@@ static inline int acpi_pci_bus_find_dom
  int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
  #endif
  
 -/* some architectures require additional setup to direct VGA traffic */
 +/* Some architectures require additional setup to direct VGA traffic */
  typedef int (*arch_set_vga_state_t)(struct pci_dev *pdev, bool decode,
 -		      unsigned int command_bits, u32 flags);
 +				    unsigned int command_bits, u32 flags);
  void pci_register_set_vga_state(arch_set_vga_state_t func);
  
  static inline int
@@@ -1573,9 -1585,10 +1574,9 @@@ static inline void pci_clear_flags(int 
  static inline int pci_has_flag(int flag) { return 0; }
  
  /*
 - *  If the system does not have PCI, clearly these return errors.  Define
 - *  these as simple inline functions to avoid hair in drivers.
 + * If the system does not have PCI, clearly these return errors.  Define
 + * these as simple inline functions to avoid hair in drivers.
   */
 -
  #define _PCI_NOP(o, s, t) \
  	static inline int pci_##o##_config_##s(struct pci_dev *dev, \
  						int where, t val) \
@@@ -1714,10 -1727,8 +1715,10 @@@ int pci_iobar_pfn(struct pci_dev *pdev
  #define pci_root_bus_fwnode(bus)	NULL
  #endif
  
 -/* these helpers provide future and backwards compatibility
 - * for accessing popular PCI BAR info */
 +/*
 + * These helpers provide future and backwards compatibility
 + * for accessing popular PCI BAR info
 + */
  #define pci_resource_start(dev, bar)	((dev)->resource[(bar)].start)
  #define pci_resource_end(dev, bar)	((dev)->resource[(bar)].end)
  #define pci_resource_flags(dev, bar)	((dev)->resource[(bar)].flags)
@@@ -1729,8 -1740,7 +1730,8 @@@
  	 (pci_resource_end((dev), (bar)) -		\
  	  pci_resource_start((dev), (bar)) + 1))
  
 -/* Similar to the helpers above, these manipulate per-pci_dev
 +/*
 + * Similar to the helpers above, these manipulate per-pci_dev
   * driver-specific data.  They are really just a wrapper around
   * the generic device structure functions of these calls.
   */
@@@ -1744,14 -1754,16 +1745,14 @@@ static inline void pci_set_drvdata(stru
  	dev_set_drvdata(&pdev->dev, data);
  }
  
 -/* If you want to know what to call your pci_dev, ask this function.
 - * Again, it's a wrapper around the generic device.
 - */
  static inline const char *pci_name(const struct pci_dev *pdev)
  {
  	return dev_name(&pdev->dev);
  }
  
  
 -/* Some archs don't want to expose struct resource to userland as-is
 +/*
 + * Some archs don't want to expose struct resource to userland as-is
   * in sysfs and /proc
   */
  #ifdef HAVE_ARCH_PCI_RESOURCE_TO_USER
@@@ -1770,16 -1782,16 +1771,16 @@@ static inline void pci_resource_to_user
  
  
  /*
 - *  The world is not perfect and supplies us with broken PCI devices.
 - *  For at least a part of these bugs we need a work-around, so both
 - *  generic (drivers/pci/quirks.c) and per-architecture code can define
 - *  fixup hooks to be called for particular buggy devices.
 + * The world is not perfect and supplies us with broken PCI devices.
 + * For at least a part of these bugs we need a work-around, so both
 + * generic (drivers/pci/quirks.c) and per-architecture code can define
 + * fixup hooks to be called for particular buggy devices.
   */
  
  struct pci_fixup {
 -	u16 vendor;		/* You can use PCI_ANY_ID here of course */
 -	u16 device;		/* You can use PCI_ANY_ID here of course */
 -	u32 class;		/* You can use PCI_ANY_ID here too */
 +	u16 vendor;			/* Or PCI_ANY_ID */
 +	u16 device;			/* Or PCI_ANY_ID */
 +	u32 class;			/* Or PCI_ANY_ID */
  	unsigned int class_shift;	/* should be 0, 8, 16 */
  	void (*hook)(struct pci_dev *dev);
  };
@@@ -1821,19 -1833,23 +1822,19 @@@ enum pci_fixup_pass 
  #define DECLARE_PCI_FIXUP_CLASS_RESUME(vendor, device, class,		\
  					 class_shift, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,			\
 -		resume##hook, vendor, device, class,	\
 -		class_shift, hook)
 +		resume##hook, vendor, device, class, class_shift, hook)
  #define DECLARE_PCI_FIXUP_CLASS_RESUME_EARLY(vendor, device, class,	\
  					 class_shift, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,		\
 -		resume_early##hook, vendor, device,	\
 -		class, class_shift, hook)
 +		resume_early##hook, vendor, device, class, class_shift, hook)
  #define DECLARE_PCI_FIXUP_CLASS_SUSPEND(vendor, device, class,		\
  					 class_shift, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
 -		suspend##hook, vendor, device, class,	\
 -		class_shift, hook)
 +		suspend##hook, vendor, device, class, class_shift, hook)
  #define DECLARE_PCI_FIXUP_CLASS_SUSPEND_LATE(vendor, device, class,	\
  					 class_shift, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late,		\
 -		suspend_late##hook, vendor, device,	\
 -		class, class_shift, hook)
 +		suspend_late##hook, vendor, device, class, class_shift, hook)
  
  #define DECLARE_PCI_FIXUP_EARLY(vendor, device, hook)			\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_early,			\
@@@ -1849,16 -1865,20 +1850,16 @@@
  		hook, vendor, device, PCI_ANY_ID, 0, hook)
  #define DECLARE_PCI_FIXUP_RESUME(vendor, device, hook)			\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume,			\
 -		resume##hook, vendor, device,		\
 -		PCI_ANY_ID, 0, hook)
 +		resume##hook, vendor, device, PCI_ANY_ID, 0, hook)
  #define DECLARE_PCI_FIXUP_RESUME_EARLY(vendor, device, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_resume_early,		\
 -		resume_early##hook, vendor, device,	\
 -		PCI_ANY_ID, 0, hook)
 +		resume_early##hook, vendor, device, PCI_ANY_ID, 0, hook)
  #define DECLARE_PCI_FIXUP_SUSPEND(vendor, device, hook)			\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend,			\
 -		suspend##hook, vendor, device,		\
 -		PCI_ANY_ID, 0, hook)
 +		suspend##hook, vendor, device, PCI_ANY_ID, 0, hook)
  #define DECLARE_PCI_FIXUP_SUSPEND_LATE(vendor, device, hook)		\
  	DECLARE_PCI_FIXUP_SECTION(.pci_fixup_suspend_late,		\
 -		suspend_late##hook, vendor, device,	\
 -		PCI_ANY_ID, 0, hook)
 +		suspend_late##hook, vendor, device, PCI_ANY_ID, 0, hook)
  
  #ifdef CONFIG_PCI_QUIRKS
  void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
@@@ -1945,7 -1965,6 +1946,7 @@@ int pci_vfs_assigned(struct pci_dev *de
  int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
  int pci_sriov_get_totalvfs(struct pci_dev *dev);
  resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
 +void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
  #else
  static inline int pci_iov_virtfn_bus(struct pci_dev *dev, int id)
  {
@@@ -1973,7 -1992,6 +1974,7 @@@ static inline int pci_sriov_get_totalvf
  { return 0; }
  static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
  { return 0; }
 +static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { }
  #endif
  
  #if defined(CONFIG_HOTPLUG_PCI) || defined(CONFIG_HOTPLUG_PCI_MODULE)
@@@ -2095,7 -2113,7 +2096,7 @@@ static inline u16 pci_vpd_lrdt_size(con
   */
  static inline u16 pci_vpd_lrdt_tag(const u8 *lrdt)
  {
 -    return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK);
 +	return (u16)(lrdt[0] & PCI_VPD_LRDT_TIN_MASK);
  }
  
  /**
@@@ -2180,7 -2198,7 +2181,7 @@@ static inline struct device_node *pci_b
  	return bus ? bus->dev.of_node : NULL;
  }
  
 -#else /* CONFIG_OF */
 +#else	/* CONFIG_OF */
  static inline void pci_set_of_node(struct pci_dev *dev) { }
  static inline void pci_release_of_node(struct pci_dev *dev) { }
  static inline void pci_set_bus_of_node(struct pci_bus *bus) { }
@@@ -2189,7 -2207,7 +2190,7 @@@ static inline struct device_node 
  pci_device_to_OF_node(const struct pci_dev *pdev) { return NULL; }
  static inline struct irq_domain *
  pci_host_bridge_of_msi_domain(struct pci_bus *bus) { return NULL; }
 -#endif  /* CONFIG_OF */
 +#endif	/* CONFIG_OF */
  
  #ifdef CONFIG_ACPI
  struct irq_domain *pci_host_bridge_acpi_msi_domain(struct pci_bus *bus);
@@@ -2214,7 -2232,7 +2215,7 @@@ int pci_for_each_dma_alias(struct pci_d
  			   int (*fn)(struct pci_dev *pdev,
  				     u16 alias, void *data), void *data);
  
 -/* helper functions for operation of device flag */
 +/* Helper functions for operation of device flag */
  static inline void pci_set_dev_assigned(struct pci_dev *pdev)
  {
  	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
@@@ -2261,7 -2279,7 +2262,7 @@@ static inline bool pci_is_thunderbolt_a
  	return false;
  }
  
 -/* provide the legacy pci_dma_* API */
 +/* Provide the legacy pci_dma_* API */
  #include <linux/pci-dma-compat.h>
  
  #endif /* LINUX_PCI_H */
diff --combined include/linux/skbuff.h
index a87e43d16f44,b8e0da6c27d6..ac89a93b7c83
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -1211,6 -1211,11 +1211,11 @@@ static inline bool skb_flow_dissect_flo
  				  data, proto, nhoff, hlen, flags);
  }
  
+ void
+ skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
+ 			     struct flow_dissector *flow_dissector,
+ 			     void *target_container);
+ 
  static inline __u32 skb_get_hash(struct sk_buff *skb)
  {
  	if (!skb->l4_hash && !skb->sw_hash)
@@@ -3241,7 -3246,7 +3246,7 @@@ struct sk_buff *__skb_recv_datagram(str
  				    int *peeked, int *off, int *err);
  struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
  				  int *err);
 -unsigned int datagram_poll(struct file *file, struct socket *sock,
 +__poll_t datagram_poll(struct file *file, struct socket *sock,
  			   struct poll_table_struct *wait);
  int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
  			   struct iov_iter *to, int size);
diff --combined include/net/inet_connection_sock.h
index ec72cdb5bc39,8e1bf9ae4a5e..6692d67e9245
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@@ -77,6 -77,7 +77,7 @@@ struct inet_connection_sock_af_ops 
   * @icsk_af_ops		   Operations which are AF_INET{4,6} specific
   * @icsk_ulp_ops	   Pluggable ULP control hook
   * @icsk_ulp_data	   ULP private data
+  * @icsk_listen_portaddr_node	hash to the portaddr listener hashtable
   * @icsk_ca_state:	   Congestion control state
   * @icsk_retransmits:	   Number of unrecovered [RTO] timeouts
   * @icsk_pending:	   Scheduled timer event
@@@ -101,6 -102,7 +102,7 @@@ struct inet_connection_sock 
  	const struct inet_connection_sock_af_ops *icsk_af_ops;
  	const struct tcp_ulp_ops  *icsk_ulp_ops;
  	void			  *icsk_ulp_data;
+ 	struct hlist_node         icsk_listen_portaddr_node;
  	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
  	__u8			  icsk_ca_state:6,
  				  icsk_ca_setsockopt:1,
@@@ -305,7 -307,7 +307,7 @@@ void inet_csk_prepare_forced_close(stru
  /*
   * LISTEN is a special case for poll..
   */
 -static inline unsigned int inet_csk_listen_poll(const struct sock *sk)
 +static inline __poll_t inet_csk_listen_poll(const struct sock *sk)
  {
  	return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ?
  			(POLLIN | POLLRDNORM) : 0;
diff --combined include/net/sctp/sctp.h
index 608d123ef25f,20c0c1be2ca7..f7ae6b0a21d0
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@@ -107,7 -107,7 +107,7 @@@ int sctp_backlog_rcv(struct sock *sk, s
  int sctp_inet_listen(struct socket *sock, int backlog);
  void sctp_write_space(struct sock *sk);
  void sctp_data_ready(struct sock *sk);
 -unsigned int sctp_poll(struct file *file, struct socket *sock,
 +__poll_t sctp_poll(struct file *file, struct socket *sock,
  		poll_table *wait);
  void sctp_sock_rfree(struct sk_buff *skb);
  void sctp_copy_sock(struct sock *newsk, struct sock *sk,
@@@ -116,7 -116,7 +116,7 @@@ extern struct percpu_counter sctp_socke
  int sctp_asconf_mgmt(struct sctp_sock *, struct sctp_sockaddr_entry *);
  struct sk_buff *sctp_skb_recv_datagram(struct sock *, int, int, int *);
  
- int sctp_transport_walk_start(struct rhashtable_iter *iter);
+ void sctp_transport_walk_start(struct rhashtable_iter *iter);
  void sctp_transport_walk_stop(struct rhashtable_iter *iter);
  struct sctp_transport *sctp_transport_get_next(struct net *net,
  			struct rhashtable_iter *iter);
@@@ -444,13 -444,13 +444,13 @@@ static inline int sctp_frag_point(cons
  	int frag = pmtu;
  
  	frag -= sp->pf->af->net_header_len;
- 	frag -= sizeof(struct sctphdr) + sizeof(struct sctp_data_chunk);
+ 	frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream);
  
  	if (asoc->user_frag)
  		frag = min_t(int, frag, asoc->user_frag);
  
  	frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN -
- 					    sizeof(struct sctp_data_chunk)));
+ 					    sctp_datachk_len(&asoc->stream)));
  
  	return frag;
  }
diff --combined include/net/sock.h
index 4fd74e0d1bbb,73b7830b0bb8..0752f034f1bf
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@@ -72,6 -72,7 +72,7 @@@
  #include <net/tcp_states.h>
  #include <linux/net_tstamp.h>
  #include <net/smc.h>
+ #include <net/l3mdev.h>
  
  /*
   * This structure really needs to be cleaned up.
@@@ -1262,6 -1263,7 +1263,7 @@@ proto_memory_pressure(struct proto *pro
  /* Called with local bh disabled */
  void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
  int sock_prot_inuse_get(struct net *net, struct proto *proto);
+ int sock_inuse_get(struct net *net);
  #else
  static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
  		int inc)
@@@ -1583,7 -1585,7 +1585,7 @@@ int sock_no_connect(struct socket *, st
  int sock_no_socketpair(struct socket *, struct socket *);
  int sock_no_accept(struct socket *, struct socket *, int, bool);
  int sock_no_getname(struct socket *, struct sockaddr *, int *, int);
 -unsigned int sock_no_poll(struct file *, struct socket *,
 +__poll_t sock_no_poll(struct file *, struct socket *,
  			  struct poll_table_struct *);
  int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
  int sock_no_listen(struct socket *, int);
@@@ -2337,31 -2339,6 +2339,6 @@@ static inline bool sk_listener(const st
  	return (1 << sk->sk_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
  }
  
- /**
-  * sk_state_load - read sk->sk_state for lockless contexts
-  * @sk: socket pointer
-  *
-  * Paired with sk_state_store(). Used in places we do not hold socket lock :
-  * tcp_diag_get_info(), tcp_get_info(), tcp_poll(), get_tcp4_sock() ...
-  */
- static inline int sk_state_load(const struct sock *sk)
- {
- 	return smp_load_acquire(&sk->sk_state);
- }
- 
- /**
-  * sk_state_store - update sk->sk_state
-  * @sk: socket pointer
-  * @newstate: new state
-  *
-  * Paired with sk_state_load(). Should be used in contexts where
-  * state change might impact lockless readers.
-  */
- static inline void sk_state_store(struct sock *sk, int newstate)
- {
- 	smp_store_release(&sk->sk_state, newstate);
- }
- 
  void sock_enable_timestamp(struct sock *sk, int flag);
  int sock_get_timestamp(struct sock *, struct timeval __user *);
  int sock_get_timestampns(struct sock *, struct timespec __user *);
@@@ -2412,4 -2389,34 +2389,34 @@@ static inline int sk_get_rmem0(const st
  	return *proto->sysctl_rmem;
  }
  
+ /* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
+  * Some wifi drivers need to tweak it to get more chunks.
+  * They can use this helper from their ndo_start_xmit()
+  */
+ static inline void sk_pacing_shift_update(struct sock *sk, int val)
+ {
+ 	if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
+ 		return;
+ 	sk->sk_pacing_shift = val;
+ }
+ 
+ /* if a socket is bound to a device, check that the given device
+  * index is either the same or that the socket is bound to an L3
+  * master device and the given device index is also enslaved to
+  * that L3 master
+  */
+ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
+ {
+ 	int mdif;
+ 
+ 	if (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)
+ 		return true;
+ 
+ 	mdif = l3mdev_master_ifindex_by_index(sock_net(sk), dif);
+ 	if (mdif && mdif == sk->sk_bound_dev_if)
+ 		return true;
+ 
+ 	return false;
+ }
+ 
  #endif	/* _SOCK_H */
diff --combined include/net/tcp.h
index 50b21a49d870,6939e69d3c37..26c2793846a1
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@@ -387,7 -387,7 +387,7 @@@ bool tcp_peer_is_proven(struct request_
  void tcp_close(struct sock *sk, long timeout);
  void tcp_init_sock(struct sock *sk);
  void tcp_init_transfer(struct sock *sk, int bpf_op);
 -unsigned int tcp_poll(struct file *file, struct socket *sock,
 +__poll_t tcp_poll(struct file *file, struct socket *sock,
  		      struct poll_table_struct *wait);
  int tcp_getsockopt(struct sock *sk, int level, int optname,
  		   char __user *optval, int __user *optlen);
@@@ -1507,8 -1507,7 +1507,7 @@@ int tcp_md5_hash_key(struct tcp_md5sig_
  
  /* From tcp_fastopen.c */
  void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
- 			    struct tcp_fastopen_cookie *cookie, int *syn_loss,
- 			    unsigned long *last_syn_loss);
+ 			    struct tcp_fastopen_cookie *cookie);
  void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
  			    struct tcp_fastopen_cookie *cookie, bool syn_lost,
  			    u16 try_exp);
@@@ -1546,7 -1545,7 +1545,7 @@@ extern unsigned int sysctl_tcp_fastopen
  void tcp_fastopen_active_disable(struct sock *sk);
  bool tcp_fastopen_active_should_disable(struct sock *sk);
  void tcp_fastopen_active_disable_ofo_check(struct sock *sk);
- void tcp_fastopen_active_timeout_reset(void);
+ void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired);
  
  /* Latencies incurred by various limits for a sender. They are
   * chronograph-like stats that are mutually exclusive.
@@@ -2011,10 -2010,12 +2010,12 @@@ static inline int tcp_call_bpf(struct s
  	struct bpf_sock_ops_kern sock_ops;
  	int ret;
  
- 	if (sk_fullsock(sk))
+ 	memset(&sock_ops, 0, sizeof(sock_ops));
+ 	if (sk_fullsock(sk)) {
+ 		sock_ops.is_fullsock = 1;
  		sock_owned_by_me(sk);
+ 	}
  
- 	memset(&sock_ops, 0, sizeof(sock_ops));
  	sock_ops.sk = sk;
  	sock_ops.op = op;
  
diff --combined kernel/bpf/sockmap.c
index 1712d319c2d8,3f662ee23a34..079968680bc3
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@@ -96,14 -96,6 +96,6 @@@ static inline struct smap_psock *smap_p
  	return rcu_dereference_sk_user_data(sk);
  }
  
- /* compute the linear packet data range [data, data_end) for skb when
-  * sk_skb type programs are in use.
-  */
- static inline void bpf_compute_data_end_sk_skb(struct sk_buff *skb)
- {
- 	TCP_SKB_CB(skb)->bpf.data_end = skb->data + skb_headlen(skb);
- }
- 
  enum __sk_action {
  	__SK_DROP = 0,
  	__SK_PASS,
@@@ -591,15 -583,8 +583,15 @@@ static void sock_map_free(struct bpf_ma
  
  		write_lock_bh(&sock->sk_callback_lock);
  		psock = smap_psock_sk(sock);
 -		smap_list_remove(psock, &stab->sock_map[i]);
 -		smap_release_sock(psock, sock);
 +		/* This check handles a racing sock event that can get the
 +		 * sk_callback_lock before this case but after xchg happens
 +		 * causing the refcnt to hit zero and sock user data (psock)
 +		 * to be null and queued for garbage collection.
 +		 */
 +		if (likely(psock)) {
 +			smap_list_remove(psock, &stab->sock_map[i]);
 +			smap_release_sock(psock, sock);
 +		}
  		write_unlock_bh(&sock->sk_callback_lock);
  	}
  	rcu_read_unlock();
diff --combined kernel/bpf/verifier.c
index b414d6b2d470,a2b211262c25..d3b7f9725115
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -20,6 -20,8 +20,8 @@@
  #include <linux/file.h>
  #include <linux/vmalloc.h>
  #include <linux/stringify.h>
+ #include <linux/bsearch.h>
+ #include <linux/sort.h>
  
  #include "disasm.h"
  
@@@ -216,23 -218,48 +218,48 @@@ static const char * const reg_type_str[
  	[PTR_TO_PACKET_END]	= "pkt_end",
  };
  
+ static void print_liveness(struct bpf_verifier_env *env,
+ 			   enum bpf_reg_liveness live)
+ {
+ 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN))
+ 	    verbose(env, "_");
+ 	if (live & REG_LIVE_READ)
+ 		verbose(env, "r");
+ 	if (live & REG_LIVE_WRITTEN)
+ 		verbose(env, "w");
+ }
+ 
+ static struct bpf_func_state *func(struct bpf_verifier_env *env,
+ 				   const struct bpf_reg_state *reg)
+ {
+ 	struct bpf_verifier_state *cur = env->cur_state;
+ 
+ 	return cur->frame[reg->frameno];
+ }
+ 
  static void print_verifier_state(struct bpf_verifier_env *env,
- 				 struct bpf_verifier_state *state)
+ 				 const struct bpf_func_state *state)
  {
- 	struct bpf_reg_state *reg;
+ 	const struct bpf_reg_state *reg;
  	enum bpf_reg_type t;
  	int i;
  
+ 	if (state->frameno)
+ 		verbose(env, " frame%d:", state->frameno);
  	for (i = 0; i < MAX_BPF_REG; i++) {
  		reg = &state->regs[i];
  		t = reg->type;
  		if (t == NOT_INIT)
  			continue;
- 		verbose(env, " R%d=%s", i, reg_type_str[t]);
+ 		verbose(env, " R%d", i);
+ 		print_liveness(env, reg->live);
+ 		verbose(env, "=%s", reg_type_str[t]);
  		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
  		    tnum_is_const(reg->var_off)) {
  			/* reg->off should be 0 for SCALAR_VALUE */
  			verbose(env, "%lld", reg->var_off.value + reg->off);
+ 			if (t == PTR_TO_STACK)
+ 				verbose(env, ",call_%d", func(env, reg)->callsite);
  		} else {
  			verbose(env, "(id=%d", reg->id);
  			if (t != SCALAR_VALUE)
@@@ -277,16 -304,21 +304,21 @@@
  		}
  	}
  	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- 		if (state->stack[i].slot_type[0] == STACK_SPILL)
- 			verbose(env, " fp%d=%s",
- 				-MAX_BPF_STACK + i * BPF_REG_SIZE,
+ 		if (state->stack[i].slot_type[0] == STACK_SPILL) {
+ 			verbose(env, " fp%d",
+ 				(-i - 1) * BPF_REG_SIZE);
+ 			print_liveness(env, state->stack[i].spilled_ptr.live);
+ 			verbose(env, "=%s",
  				reg_type_str[state->stack[i].spilled_ptr.type]);
+ 		}
+ 		if (state->stack[i].slot_type[0] == STACK_ZERO)
+ 			verbose(env, " fp%d=0", (-i - 1) * BPF_REG_SIZE);
  	}
  	verbose(env, "\n");
  }
  
- static int copy_stack_state(struct bpf_verifier_state *dst,
- 			    const struct bpf_verifier_state *src)
+ static int copy_stack_state(struct bpf_func_state *dst,
+ 			    const struct bpf_func_state *src)
  {
  	if (!src->stack)
  		return 0;
@@@ -302,13 -334,13 +334,13 @@@
  
  /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
   * make it consume minimal amount of memory. check_stack_write() access from
-  * the program calls into realloc_verifier_state() to grow the stack size.
+  * the program calls into realloc_func_state() to grow the stack size.
   * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
   * which this function copies over. It points to previous bpf_verifier_state
   * which is never reallocated
   */
- static int realloc_verifier_state(struct bpf_verifier_state *state, int size,
- 				  bool copy_old)
+ static int realloc_func_state(struct bpf_func_state *state, int size,
+ 			      bool copy_old)
  {
  	u32 old_size = state->allocated_stack;
  	struct bpf_stack_state *new_stack;
@@@ -341,10 -373,21 +373,21 @@@
  	return 0;
  }
  
+ static void free_func_state(struct bpf_func_state *state)
+ {
+ 	kfree(state->stack);
+ 	kfree(state);
+ }
+ 
  static void free_verifier_state(struct bpf_verifier_state *state,
  				bool free_self)
  {
- 	kfree(state->stack);
+ 	int i;
+ 
+ 	for (i = 0; i <= state->curframe; i++) {
+ 		free_func_state(state->frame[i]);
+ 		state->frame[i] = NULL;
+ 	}
  	if (free_self)
  		kfree(state);
  }
@@@ -352,18 -395,46 +395,46 @@@
  /* copy verifier state from src to dst growing dst stack space
   * when necessary to accommodate larger src stack
   */
- static int copy_verifier_state(struct bpf_verifier_state *dst,
- 			       const struct bpf_verifier_state *src)
+ static int copy_func_state(struct bpf_func_state *dst,
+ 			   const struct bpf_func_state *src)
  {
  	int err;
  
- 	err = realloc_verifier_state(dst, src->allocated_stack, false);
+ 	err = realloc_func_state(dst, src->allocated_stack, false);
  	if (err)
  		return err;
- 	memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack));
+ 	memcpy(dst, src, offsetof(struct bpf_func_state, allocated_stack));
  	return copy_stack_state(dst, src);
  }
  
+ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
+ 			       const struct bpf_verifier_state *src)
+ {
+ 	struct bpf_func_state *dst;
+ 	int i, err;
+ 
+ 	/* if dst has more stack frames then src frame, free them */
+ 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
+ 		free_func_state(dst_state->frame[i]);
+ 		dst_state->frame[i] = NULL;
+ 	}
+ 	dst_state->curframe = src->curframe;
+ 	dst_state->parent = src->parent;
+ 	for (i = 0; i <= src->curframe; i++) {
+ 		dst = dst_state->frame[i];
+ 		if (!dst) {
+ 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
+ 			if (!dst)
+ 				return -ENOMEM;
+ 			dst_state->frame[i] = dst;
+ 		}
+ 		err = copy_func_state(dst, src->frame[i]);
+ 		if (err)
+ 			return err;
+ 	}
+ 	return 0;
+ }
+ 
  static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
  		     int *insn_idx)
  {
@@@ -425,6 -496,10 +496,10 @@@ err
  static const int caller_saved[CALLER_SAVED_REGS] = {
  	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
  };
+ #define CALLEE_SAVED_REGS 5
+ static const int callee_saved[CALLEE_SAVED_REGS] = {
+ 	BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9
+ };
  
  static void __mark_reg_not_init(struct bpf_reg_state *reg);
  
@@@ -449,6 -524,13 +524,13 @@@ static void __mark_reg_known_zero(struc
  	__mark_reg_known(reg, 0);
  }
  
+ static void __mark_reg_const_zero(struct bpf_reg_state *reg)
+ {
+ 	__mark_reg_known(reg, 0);
+ 	reg->off = 0;
+ 	reg->type = SCALAR_VALUE;
+ }
+ 
  static void mark_reg_known_zero(struct bpf_verifier_env *env,
  				struct bpf_reg_state *regs, u32 regno)
  {
@@@ -560,6 -642,7 +642,7 @@@ static void __mark_reg_unknown(struct b
  	reg->id = 0;
  	reg->off = 0;
  	reg->var_off = tnum_unknown;
+ 	reg->frameno = 0;
  	__mark_reg_unbounded(reg);
  }
  
@@@ -568,8 -651,8 +651,8 @@@ static void mark_reg_unknown(struct bpf
  {
  	if (WARN_ON(regno >= MAX_BPF_REG)) {
  		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
- 		/* Something bad happened, let's kill all regs */
- 		for (regno = 0; regno < MAX_BPF_REG; regno++)
+ 		/* Something bad happened, let's kill all regs except FP */
+ 		for (regno = 0; regno < BPF_REG_FP; regno++)
  			__mark_reg_not_init(regs + regno);
  		return;
  	}
@@@ -587,8 -670,8 +670,8 @@@ static void mark_reg_not_init(struct bp
  {
  	if (WARN_ON(regno >= MAX_BPF_REG)) {
  		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
- 		/* Something bad happened, let's kill all regs */
- 		for (regno = 0; regno < MAX_BPF_REG; regno++)
+ 		/* Something bad happened, let's kill all regs except FP */
+ 		for (regno = 0; regno < BPF_REG_FP; regno++)
  			__mark_reg_not_init(regs + regno);
  		return;
  	}
@@@ -596,8 -679,9 +679,9 @@@
  }
  
  static void init_reg_state(struct bpf_verifier_env *env,
- 			   struct bpf_reg_state *regs)
+ 			   struct bpf_func_state *state)
  {
+ 	struct bpf_reg_state *regs = state->regs;
  	int i;
  
  	for (i = 0; i < MAX_BPF_REG; i++) {
@@@ -608,41 -692,218 +692,218 @@@
  	/* frame pointer */
  	regs[BPF_REG_FP].type = PTR_TO_STACK;
  	mark_reg_known_zero(env, regs, BPF_REG_FP);
+ 	regs[BPF_REG_FP].frameno = state->frameno;
  
  	/* 1st arg to a function */
  	regs[BPF_REG_1].type = PTR_TO_CTX;
  	mark_reg_known_zero(env, regs, BPF_REG_1);
  }
  
+ #define BPF_MAIN_FUNC (-1)
+ static void init_func_state(struct bpf_verifier_env *env,
+ 			    struct bpf_func_state *state,
+ 			    int callsite, int frameno, int subprogno)
+ {
+ 	state->callsite = callsite;
+ 	state->frameno = frameno;
+ 	state->subprogno = subprogno;
+ 	init_reg_state(env, state);
+ }
+ 
  enum reg_arg_type {
  	SRC_OP,		/* register is used as source operand */
  	DST_OP,		/* register is used as destination operand */
  	DST_OP_NO_MARK	/* same as above, check only, don't mark */
  };
  
- static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno)
+ static int cmp_subprogs(const void *a, const void *b)
  {
- 	struct bpf_verifier_state *parent = state->parent;
+ 	return *(int *)a - *(int *)b;
+ }
+ 
+ static int find_subprog(struct bpf_verifier_env *env, int off)
+ {
+ 	u32 *p;
+ 
+ 	p = bsearch(&off, env->subprog_starts, env->subprog_cnt,
+ 		    sizeof(env->subprog_starts[0]), cmp_subprogs);
+ 	if (!p)
+ 		return -ENOENT;
+ 	return p - env->subprog_starts;
+ 
+ }
+ 
+ static int add_subprog(struct bpf_verifier_env *env, int off)
+ {
+ 	int insn_cnt = env->prog->len;
+ 	int ret;
+ 
+ 	if (off >= insn_cnt || off < 0) {
+ 		verbose(env, "call to invalid destination\n");
+ 		return -EINVAL;
+ 	}
+ 	ret = find_subprog(env, off);
+ 	if (ret >= 0)
+ 		return 0;
+ 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
+ 		verbose(env, "too many subprograms\n");
+ 		return -E2BIG;
+ 	}
+ 	env->subprog_starts[env->subprog_cnt++] = off;
+ 	sort(env->subprog_starts, env->subprog_cnt,
+ 	     sizeof(env->subprog_starts[0]), cmp_subprogs, NULL);
+ 	return 0;
+ }
+ 
+ static int check_subprogs(struct bpf_verifier_env *env)
+ {
+ 	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
+ 	struct bpf_insn *insn = env->prog->insnsi;
+ 	int insn_cnt = env->prog->len;
+ 
+ 	/* determine subprog starts. The end is one before the next starts */
+ 	for (i = 0; i < insn_cnt; i++) {
+ 		if (insn[i].code != (BPF_JMP | BPF_CALL))
+ 			continue;
+ 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		if (!env->allow_ptr_leaks) {
+ 			verbose(env, "function calls to other bpf functions are allowed for root only\n");
+ 			return -EPERM;
+ 		}
+ 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
+ 			verbose(env, "function calls in offloaded programs are not supported yet\n");
+ 			return -EINVAL;
+ 		}
+ 		ret = add_subprog(env, i + insn[i].imm + 1);
+ 		if (ret < 0)
+ 			return ret;
+ 	}
+ 
+ 	if (env->log.level > 1)
+ 		for (i = 0; i < env->subprog_cnt; i++)
+ 			verbose(env, "func#%d @%d\n", i, env->subprog_starts[i]);
+ 
+ 	/* now check that all jumps are within the same subprog */
+ 	subprog_start = 0;
+ 	if (env->subprog_cnt == cur_subprog)
+ 		subprog_end = insn_cnt;
+ 	else
+ 		subprog_end = env->subprog_starts[cur_subprog++];
+ 	for (i = 0; i < insn_cnt; i++) {
+ 		u8 code = insn[i].code;
+ 
+ 		if (BPF_CLASS(code) != BPF_JMP)
+ 			goto next;
+ 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
+ 			goto next;
+ 		off = i + insn[i].off + 1;
+ 		if (off < subprog_start || off >= subprog_end) {
+ 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
+ 			return -EINVAL;
+ 		}
+ next:
+ 		if (i == subprog_end - 1) {
+ 			/* to avoid fall-through from one subprog into another
+ 			 * the last insn of the subprog should be either exit
+ 			 * or unconditional jump back
+ 			 */
+ 			if (code != (BPF_JMP | BPF_EXIT) &&
+ 			    code != (BPF_JMP | BPF_JA)) {
+ 				verbose(env, "last insn is not an exit or jmp\n");
+ 				return -EINVAL;
+ 			}
+ 			subprog_start = subprog_end;
+ 			if (env->subprog_cnt == cur_subprog)
+ 				subprog_end = insn_cnt;
+ 			else
+ 				subprog_end = env->subprog_starts[cur_subprog++];
+ 		}
+ 	}
+ 	return 0;
+ }
+ 
+ static
+ struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env,
+ 				       const struct bpf_verifier_state *state,
+ 				       struct bpf_verifier_state *parent,
+ 				       u32 regno)
+ {
+ 	struct bpf_verifier_state *tmp = NULL;
+ 
+ 	/* 'parent' could be a state of caller and
+ 	 * 'state' could be a state of callee. In such case
+ 	 * parent->curframe < state->curframe
+ 	 * and it's ok for r1 - r5 registers
+ 	 *
+ 	 * 'parent' could be a callee's state after it bpf_exit-ed.
+ 	 * In such case parent->curframe > state->curframe
+ 	 * and it's ok for r0 only
+ 	 */
+ 	if (parent->curframe == state->curframe ||
+ 	    (parent->curframe < state->curframe &&
+ 	     regno >= BPF_REG_1 && regno <= BPF_REG_5) ||
+ 	    (parent->curframe > state->curframe &&
+ 	       regno == BPF_REG_0))
+ 		return parent;
+ 
+ 	if (parent->curframe > state->curframe &&
+ 	    regno >= BPF_REG_6) {
+ 		/* for callee saved regs we have to skip the whole chain
+ 		 * of states that belong to callee and mark as LIVE_READ
+ 		 * the registers before the call
+ 		 */
+ 		tmp = parent;
+ 		while (tmp && tmp->curframe != state->curframe) {
+ 			tmp = tmp->parent;
+ 		}
+ 		if (!tmp)
+ 			goto bug;
+ 		parent = tmp;
+ 	} else {
+ 		goto bug;
+ 	}
+ 	return parent;
+ bug:
+ 	verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp);
+ 	verbose(env, "regno %d parent frame %d current frame %d\n",
+ 		regno, parent->curframe, state->curframe);
+ 	return NULL;
+ }
+ 
+ static int mark_reg_read(struct bpf_verifier_env *env,
+ 			 const struct bpf_verifier_state *state,
+ 			 struct bpf_verifier_state *parent,
+ 			 u32 regno)
+ {
+ 	bool writes = parent == state->parent; /* Observe write marks */
  
  	if (regno == BPF_REG_FP)
  		/* We don't need to worry about FP liveness because it's read-only */
- 		return;
+ 		return 0;
  
  	while (parent) {
  		/* if read wasn't screened by an earlier write ... */
- 		if (state->regs[regno].live & REG_LIVE_WRITTEN)
+ 		if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN)
  			break;
+ 		parent = skip_callee(env, state, parent, regno);
+ 		if (!parent)
+ 			return -EFAULT;
  		/* ... then we depend on parent's value */
- 		parent->regs[regno].live |= REG_LIVE_READ;
+ 		parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ;
  		state = parent;
  		parent = state->parent;
+ 		writes = true;
  	}
+ 	return 0;
  }
  
  static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
  			 enum reg_arg_type t)
  {
- 	struct bpf_reg_state *regs = env->cur_state->regs;
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ 	struct bpf_reg_state *regs = state->regs;
  
  	if (regno >= MAX_BPF_REG) {
  		verbose(env, "R%d is invalid\n", regno);
@@@ -655,7 -916,7 +916,7 @@@
  			verbose(env, "R%d !read_ok\n", regno);
  			return -EACCES;
  		}
- 		mark_reg_read(env->cur_state, regno);
+ 		return mark_reg_read(env, vstate, vstate->parent, regno);
  	} else {
  		/* check whether register used as dest operand can be written to */
  		if (regno == BPF_REG_FP) {
@@@ -686,17 -947,25 +947,25 @@@ static bool is_spillable_regtype(enum b
  	}
  }
  
+ /* Does this register contain a constant zero? */
+ static bool register_is_null(struct bpf_reg_state *reg)
+ {
+ 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
+ }
+ 
  /* check_stack_read/write functions track spill/fill of registers,
   * stack boundary and alignment are checked in check_mem_access()
   */
  static int check_stack_write(struct bpf_verifier_env *env,
- 			     struct bpf_verifier_state *state, int off,
- 			     int size, int value_regno)
+ 			     struct bpf_func_state *state, /* func where register points to */
+ 			     int off, int size, int value_regno)
  {
+ 	struct bpf_func_state *cur; /* state of the current function */
  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
+ 	enum bpf_reg_type type;
  
- 	err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE),
- 				     true);
+ 	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
+ 				 true);
  	if (err)
  		return err;
  	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
@@@ -709,8 -978,9 +978,9 @@@
  		return -EACCES;
  	}
  
+ 	cur = env->cur_state->frame[env->cur_state->curframe];
  	if (value_regno >= 0 &&
- 	    is_spillable_regtype(state->regs[value_regno].type)) {
+ 	    is_spillable_regtype((type = cur->regs[value_regno].type))) {
  
  		/* register containing pointer is being spilled into stack */
  		if (size != BPF_REG_SIZE) {
@@@ -718,51 -988,116 +988,116 @@@
  			return -EACCES;
  		}
  
+ 		if (state != cur && type == PTR_TO_STACK) {
+ 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
+ 			return -EINVAL;
+ 		}
+ 
  		/* save register state */
- 		state->stack[spi].spilled_ptr = state->regs[value_regno];
+ 		state->stack[spi].spilled_ptr = cur->regs[value_regno];
  		state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
  
  		for (i = 0; i < BPF_REG_SIZE; i++)
  			state->stack[spi].slot_type[i] = STACK_SPILL;
  	} else {
+ 		u8 type = STACK_MISC;
+ 
  		/* regular write of data into stack */
  		state->stack[spi].spilled_ptr = (struct bpf_reg_state) {};
  
+ 		/* only mark the slot as written if all 8 bytes were written
+ 		 * otherwise read propagation may incorrectly stop too soon
+ 		 * when stack slots are partially written.
+ 		 * This heuristic means that read propagation will be
+ 		 * conservative, since it will add reg_live_read marks
+ 		 * to stack slots all the way to first state when programs
+ 		 * writes+reads less than 8 bytes
+ 		 */
+ 		if (size == BPF_REG_SIZE)
+ 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
+ 
+ 		/* when we zero initialize stack slots mark them as such */
+ 		if (value_regno >= 0 &&
+ 		    register_is_null(&cur->regs[value_regno]))
+ 			type = STACK_ZERO;
+ 
  		for (i = 0; i < size; i++)
  			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
- 				STACK_MISC;
+ 				type;
  	}
  	return 0;
  }
  
- static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slot)
+ /* registers of every function are unique and mark_reg_read() propagates
+  * the liveness in the following cases:
+  * - from callee into caller for R1 - R5 that were used as arguments
+  * - from caller into callee for R0 that used as result of the call
+  * - from caller to the same caller skipping states of the callee for R6 - R9,
+  *   since R6 - R9 are callee saved by implicit function prologue and
+  *   caller's R6 != callee's R6, so when we propagate liveness up to
+  *   parent states we need to skip callee states for R6 - R9.
+  *
+  * stack slot marking is different, since stacks of caller and callee are
+  * accessible in both (since caller can pass a pointer to caller's stack to
+  * callee which can pass it to another function), hence mark_stack_slot_read()
+  * has to propagate the stack liveness to all parent states at given frame number.
+  * Consider code:
+  * f1() {
+  *   ptr = fp - 8;
+  *   *ptr = ctx;
+  *   call f2 {
+  *      .. = *ptr;
+  *   }
+  *   .. = *ptr;
+  * }
+  * First *ptr is reading from f1's stack and mark_stack_slot_read() has
+  * to mark liveness at the f1's frame and not f2's frame.
+  * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has
+  * to propagate liveness to f2 states at f1's frame level and further into
+  * f1 states at f1's frame level until write into that stack slot
+  */
+ static void mark_stack_slot_read(struct bpf_verifier_env *env,
+ 				 const struct bpf_verifier_state *state,
+ 				 struct bpf_verifier_state *parent,
+ 				 int slot, int frameno)
  {
- 	struct bpf_verifier_state *parent = state->parent;
+ 	bool writes = parent == state->parent; /* Observe write marks */
  
  	while (parent) {
+ 		if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE)
+ 			/* since LIVE_WRITTEN mark is only done for full 8-byte
+ 			 * write the read marks are conservative and parent
+ 			 * state may not even have the stack allocated. In such case
+ 			 * end the propagation, since the loop reached beginning
+ 			 * of the function
+ 			 */
+ 			break;
  		/* if read wasn't screened by an earlier write ... */
- 		if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
+ 		if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN)
  			break;
  		/* ... then we depend on parent's value */
- 		parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
+ 		parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ;
  		state = parent;
  		parent = state->parent;
+ 		writes = true;
  	}
  }
  
  static int check_stack_read(struct bpf_verifier_env *env,
- 			    struct bpf_verifier_state *state, int off, int size,
- 			    int value_regno)
+ 			    struct bpf_func_state *reg_state /* func where register points to */,
+ 			    int off, int size, int value_regno)
  {
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
  	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
  	u8 *stype;
  
- 	if (state->allocated_stack <= slot) {
+ 	if (reg_state->allocated_stack <= slot) {
  		verbose(env, "invalid read from stack off %d+0 size %d\n",
  			off, size);
  		return -EACCES;
  	}
- 	stype = state->stack[spi].slot_type;
+ 	stype = reg_state->stack[spi].slot_type;
  
  	if (stype[0] == STACK_SPILL) {
  		if (size != BPF_REG_SIZE) {
@@@ -778,21 -1113,44 +1113,44 @@@
  
  		if (value_regno >= 0) {
  			/* restore register state from stack */
- 			state->regs[value_regno] = state->stack[spi].spilled_ptr;
- 			mark_stack_slot_read(state, spi);
+ 			state->regs[value_regno] = reg_state->stack[spi].spilled_ptr;
+ 			/* mark reg as written since spilled pointer state likely
+ 			 * has its liveness marks cleared by is_state_visited()
+ 			 * which resets stack/reg liveness for state transitions
+ 			 */
+ 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
  		}
+ 		mark_stack_slot_read(env, vstate, vstate->parent, spi,
+ 				     reg_state->frameno);
  		return 0;
  	} else {
+ 		int zeros = 0;
+ 
  		for (i = 0; i < size; i++) {
- 			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) {
- 				verbose(env, "invalid read from stack off %d+%d size %d\n",
- 					off, i, size);
- 				return -EACCES;
+ 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_MISC)
+ 				continue;
+ 			if (stype[(slot - i) % BPF_REG_SIZE] == STACK_ZERO) {
+ 				zeros++;
+ 				continue;
  			}
+ 			verbose(env, "invalid read from stack off %d+%d size %d\n",
+ 				off, i, size);
+ 			return -EACCES;
+ 		}
+ 		mark_stack_slot_read(env, vstate, vstate->parent, spi,
+ 				     reg_state->frameno);
+ 		if (value_regno >= 0) {
+ 			if (zeros == size) {
+ 				/* any size read into register is zero extended,
+ 				 * so the whole register == const_zero
+ 				 */
+ 				__mark_reg_const_zero(&state->regs[value_regno]);
+ 			} else {
+ 				/* have read misc data from the stack */
+ 				mark_reg_unknown(env, state->regs, value_regno);
+ 			}
+ 			state->regs[value_regno].live |= REG_LIVE_WRITTEN;
  		}
- 		if (value_regno >= 0)
- 			/* have read misc data from the stack */
- 			mark_reg_unknown(env, state->regs, value_regno);
  		return 0;
  	}
  }
@@@ -817,7 -1175,8 +1175,8 @@@ static int __check_map_access(struct bp
  static int check_map_access(struct bpf_verifier_env *env, u32 regno,
  			    int off, int size, bool zero_size_allowed)
  {
- 	struct bpf_verifier_state *state = env->cur_state;
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
  	struct bpf_reg_state *reg = &state->regs[regno];
  	int err;
  
@@@ -1072,6 -1431,101 +1431,101 @@@ static int check_ptr_alignment(struct b
  					   strict);
  }
  
+ static int update_stack_depth(struct bpf_verifier_env *env,
+ 			      const struct bpf_func_state *func,
+ 			      int off)
+ {
+ 	u16 stack = env->subprog_stack_depth[func->subprogno];
+ 
+ 	if (stack >= -off)
+ 		return 0;
+ 
+ 	/* update known max for given subprogram */
+ 	env->subprog_stack_depth[func->subprogno] = -off;
+ 	return 0;
+ }
+ 
+ /* starting from main bpf function walk all instructions of the function
+  * and recursively walk all callees that given function can call.
+  * Ignore jump and exit insns.
+  * Since recursion is prevented by check_cfg() this algorithm
+  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
+  */
+ static int check_max_stack_depth(struct bpf_verifier_env *env)
+ {
+ 	int depth = 0, frame = 0, subprog = 0, i = 0, subprog_end;
+ 	struct bpf_insn *insn = env->prog->insnsi;
+ 	int insn_cnt = env->prog->len;
+ 	int ret_insn[MAX_CALL_FRAMES];
+ 	int ret_prog[MAX_CALL_FRAMES];
+ 
+ process_func:
+ 	/* round up to 32-bytes, since this is granularity
+ 	 * of interpreter stack size
+ 	 */
+ 	depth += round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
+ 	if (depth > MAX_BPF_STACK) {
+ 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
+ 			frame + 1, depth);
+ 		return -EACCES;
+ 	}
+ continue_func:
+ 	if (env->subprog_cnt == subprog)
+ 		subprog_end = insn_cnt;
+ 	else
+ 		subprog_end = env->subprog_starts[subprog];
+ 	for (; i < subprog_end; i++) {
+ 		if (insn[i].code != (BPF_JMP | BPF_CALL))
+ 			continue;
+ 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		/* remember insn and function to return to */
+ 		ret_insn[frame] = i + 1;
+ 		ret_prog[frame] = subprog;
+ 
+ 		/* find the callee */
+ 		i = i + insn[i].imm + 1;
+ 		subprog = find_subprog(env, i);
+ 		if (subprog < 0) {
+ 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ 				  i);
+ 			return -EFAULT;
+ 		}
+ 		subprog++;
+ 		frame++;
+ 		if (frame >= MAX_CALL_FRAMES) {
+ 			WARN_ONCE(1, "verifier bug. Call stack is too deep\n");
+ 			return -EFAULT;
+ 		}
+ 		goto process_func;
+ 	}
+ 	/* end of for() loop means the last insn of the 'subprog'
+ 	 * was reached. Doesn't matter whether it was JA or EXIT
+ 	 */
+ 	if (frame == 0)
+ 		return 0;
+ 	depth -= round_up(max_t(u32, env->subprog_stack_depth[subprog], 1), 32);
+ 	frame--;
+ 	i = ret_insn[frame];
+ 	subprog = ret_prog[frame];
+ 	goto continue_func;
+ }
+ 
+ static int get_callee_stack_depth(struct bpf_verifier_env *env,
+ 				  const struct bpf_insn *insn, int idx)
+ {
+ 	int start = idx + insn->imm + 1, subprog;
+ 
+ 	subprog = find_subprog(env, start);
+ 	if (subprog < 0) {
+ 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ 			  start);
+ 		return -EFAULT;
+ 	}
+ 	subprog++;
+ 	return env->subprog_stack_depth[subprog];
+ }
+ 
  /* truncate register to smaller size (in bytes)
   * must be called with size < BPF_REG_SIZE
   */
@@@ -1105,9 -1559,9 +1559,9 @@@ static int check_mem_access(struct bpf_
  			    int bpf_size, enum bpf_access_type t,
  			    int value_regno)
  {
- 	struct bpf_verifier_state *state = env->cur_state;
  	struct bpf_reg_state *regs = cur_regs(env);
  	struct bpf_reg_state *reg = regs + regno;
+ 	struct bpf_func_state *state;
  	int size, err = 0;
  
  	size = bpf_size_to_bytes(bpf_size);
@@@ -1196,8 -1650,10 +1650,10 @@@
  			return -EACCES;
  		}
  
- 		if (env->prog->aux->stack_depth < -off)
- 			env->prog->aux->stack_depth = -off;
+ 		state = func(env, reg);
+ 		err = update_stack_depth(env, state, off);
+ 		if (err)
+ 			return err;
  
  		if (t == BPF_WRITE)
  			err = check_stack_write(env, state, off, size,
@@@ -1269,12 -1725,6 +1725,6 @@@ static int check_xadd(struct bpf_verifi
  				BPF_SIZE(insn->code), BPF_WRITE, -1);
  }
  
- /* Does this register contain a constant zero? */
- static bool register_is_null(struct bpf_reg_state reg)
- {
- 	return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0);
- }
- 
  /* when register 'regno' is passed into function that will read 'access_size'
   * bytes from that pointer, make sure that it's within stack boundary
   * and all elements of stack are initialized.
@@@ -1285,32 -1735,32 +1735,32 @@@ static int check_stack_boundary(struct 
  				int access_size, bool zero_size_allowed,
  				struct bpf_call_arg_meta *meta)
  {
- 	struct bpf_verifier_state *state = env->cur_state;
- 	struct bpf_reg_state *regs = state->regs;
+ 	struct bpf_reg_state *reg = cur_regs(env) + regno;
+ 	struct bpf_func_state *state = func(env, reg);
  	int off, i, slot, spi;
  
- 	if (regs[regno].type != PTR_TO_STACK) {
+ 	if (reg->type != PTR_TO_STACK) {
  		/* Allow zero-byte read from NULL, regardless of pointer type */
  		if (zero_size_allowed && access_size == 0 &&
- 		    register_is_null(regs[regno]))
+ 		    register_is_null(reg))
  			return 0;
  
  		verbose(env, "R%d type=%s expected=%s\n", regno,
- 			reg_type_str[regs[regno].type],
+ 			reg_type_str[reg->type],
  			reg_type_str[PTR_TO_STACK]);
  		return -EACCES;
  	}
  
  	/* Only allow fixed-offset stack reads */
- 	if (!tnum_is_const(regs[regno].var_off)) {
+ 	if (!tnum_is_const(reg->var_off)) {
  		char tn_buf[48];
  
- 		tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off);
+ 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
  		verbose(env, "invalid variable stack read R%d var_off=%s\n",
  			regno, tn_buf);
  		return -EACCES;
  	}
- 	off = regs[regno].off + regs[regno].var_off.value;
+ 	off = reg->off + reg->var_off.value;
  	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
  	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
  		verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
@@@ -1318,9 -1768,6 +1768,6 @@@
  		return -EACCES;
  	}
  
- 	if (env->prog->aux->stack_depth < -off)
- 		env->prog->aux->stack_depth = -off;
- 
  	if (meta && meta->raw_mode) {
  		meta->access_size = access_size;
  		meta->regno = regno;
@@@ -1328,17 -1775,32 +1775,32 @@@
  	}
  
  	for (i = 0; i < access_size; i++) {
+ 		u8 *stype;
+ 
  		slot = -(off + i) - 1;
  		spi = slot / BPF_REG_SIZE;
- 		if (state->allocated_stack <= slot ||
- 		    state->stack[spi].slot_type[slot % BPF_REG_SIZE] !=
- 			STACK_MISC) {
- 			verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
- 				off, i, access_size);
- 			return -EACCES;
+ 		if (state->allocated_stack <= slot)
+ 			goto err;
+ 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
+ 		if (*stype == STACK_MISC)
+ 			goto mark;
+ 		if (*stype == STACK_ZERO) {
+ 			/* helper can write anything into the stack */
+ 			*stype = STACK_MISC;
+ 			goto mark;
  		}
+ err:
+ 		verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
+ 			off, i, access_size);
+ 		return -EACCES;
+ mark:
+ 		/* reading any byte out of 8-byte 'spill_slot' will cause
+ 		 * the whole slot to be marked as 'read'
+ 		 */
+ 		mark_stack_slot_read(env, env->cur_state, env->cur_state->parent,
+ 				     spi, state->frameno);
  	}
- 	return 0;
+ 	return update_stack_depth(env, state, off);
  }
  
  static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@@ -1418,7 -1880,7 +1880,7 @@@ static int check_func_arg(struct bpf_ve
  		 * passed in as argument, it's a SCALAR_VALUE type. Final test
  		 * happens during stack boundary checking.
  		 */
- 		if (register_is_null(*reg) &&
+ 		if (register_is_null(reg) &&
  		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
  			/* final test in check_stack_boundary() */;
  		else if (!type_is_pkt_pointer(type) &&
@@@ -1591,6 -2053,10 +2053,10 @@@ static int check_map_func_compatibility
  	case BPF_FUNC_tail_call:
  		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
  			goto error;
+ 		if (env->subprog_cnt) {
+ 			verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
+ 			return -EINVAL;
+ 		}
  		break;
  	case BPF_FUNC_perf_event_read:
  	case BPF_FUNC_perf_event_output:
@@@ -1652,9 -2118,9 +2118,9 @@@ static int check_raw_mode(const struct 
  /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
   * are now invalid, so turn them into unknown SCALAR_VALUE.
   */
- static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
+ 				     struct bpf_func_state *state)
  {
- 	struct bpf_verifier_state *state = env->cur_state;
  	struct bpf_reg_state *regs = state->regs, *reg;
  	int i;
  
@@@ -1671,7 -2137,121 +2137,121 @@@
  	}
  }
  
- static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
+ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
+ {
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	int i;
+ 
+ 	for (i = 0; i <= vstate->curframe; i++)
+ 		__clear_all_pkt_pointers(env, vstate->frame[i]);
+ }
+ 
+ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
+ 			   int *insn_idx)
+ {
+ 	struct bpf_verifier_state *state = env->cur_state;
+ 	struct bpf_func_state *caller, *callee;
+ 	int i, subprog, target_insn;
+ 
+ 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
+ 		verbose(env, "the call stack of %d frames is too deep\n",
+ 			state->curframe + 2);
+ 		return -E2BIG;
+ 	}
+ 
+ 	target_insn = *insn_idx + insn->imm;
+ 	subprog = find_subprog(env, target_insn + 1);
+ 	if (subprog < 0) {
+ 		verbose(env, "verifier bug. No program starts at insn %d\n",
+ 			target_insn + 1);
+ 		return -EFAULT;
+ 	}
+ 
+ 	caller = state->frame[state->curframe];
+ 	if (state->frame[state->curframe + 1]) {
+ 		verbose(env, "verifier bug. Frame %d already allocated\n",
+ 			state->curframe + 1);
+ 		return -EFAULT;
+ 	}
+ 
+ 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
+ 	if (!callee)
+ 		return -ENOMEM;
+ 	state->frame[state->curframe + 1] = callee;
+ 
+ 	/* callee cannot access r0, r6 - r9 for reading and has to write
+ 	 * into its own stack before reading from it.
+ 	 * callee can read/write into caller's stack
+ 	 */
+ 	init_func_state(env, callee,
+ 			/* remember the callsite, it will be used by bpf_exit */
+ 			*insn_idx /* callsite */,
+ 			state->curframe + 1 /* frameno within this callchain */,
+ 			subprog + 1 /* subprog number within this prog */);
+ 
+ 	/* copy r1 - r5 args that callee can access */
+ 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
+ 		callee->regs[i] = caller->regs[i];
+ 
+ 	/* after the call regsiters r0 - r5 were scratched */
+ 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
+ 		mark_reg_not_init(env, caller->regs, caller_saved[i]);
+ 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
+ 	}
+ 
+ 	/* only increment it after check_reg_arg() finished */
+ 	state->curframe++;
+ 
+ 	/* and go analyze first insn of the callee */
+ 	*insn_idx = target_insn;
+ 
+ 	if (env->log.level) {
+ 		verbose(env, "caller:\n");
+ 		print_verifier_state(env, caller);
+ 		verbose(env, "callee:\n");
+ 		print_verifier_state(env, callee);
+ 	}
+ 	return 0;
+ }
+ 
+ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
+ {
+ 	struct bpf_verifier_state *state = env->cur_state;
+ 	struct bpf_func_state *caller, *callee;
+ 	struct bpf_reg_state *r0;
+ 
+ 	callee = state->frame[state->curframe];
+ 	r0 = &callee->regs[BPF_REG_0];
+ 	if (r0->type == PTR_TO_STACK) {
+ 		/* technically it's ok to return caller's stack pointer
+ 		 * (or caller's caller's pointer) back to the caller,
+ 		 * since these pointers are valid. Only current stack
+ 		 * pointer will be invalid as soon as function exits,
+ 		 * but let's be conservative
+ 		 */
+ 		verbose(env, "cannot return stack pointer to the caller\n");
+ 		return -EINVAL;
+ 	}
+ 
+ 	state->curframe--;
+ 	caller = state->frame[state->curframe];
+ 	/* return to the caller whatever r0 had in the callee */
+ 	caller->regs[BPF_REG_0] = *r0;
+ 
+ 	*insn_idx = callee->callsite + 1;
+ 	if (env->log.level) {
+ 		verbose(env, "returning from callee:\n");
+ 		print_verifier_state(env, callee);
+ 		verbose(env, "to caller at %d:\n", *insn_idx);
+ 		print_verifier_state(env, caller);
+ 	}
+ 	/* clear everything in the callee */
+ 	free_func_state(callee);
+ 	state->frame[state->curframe + 1] = NULL;
+ 	return 0;
+ }
+ 
+ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
  {
  	const struct bpf_func_proto *fn = NULL;
  	struct bpf_reg_state *regs;
@@@ -1729,13 -2309,6 +2309,13 @@@
  	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
  	if (err)
  		return err;
 +	if (func_id == BPF_FUNC_tail_call) {
 +		if (meta.map_ptr == NULL) {
 +			verbose(env, "verifier bug\n");
 +			return -EINVAL;
 +		}
 +		env->insn_aux_data[insn_idx].map_ptr = meta.map_ptr;
 +	}
  	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
  	if (err)
  		return err;
@@@ -1871,7 -2444,9 +2451,9 @@@ static int adjust_ptr_min_max_vals(stru
  				   const struct bpf_reg_state *ptr_reg,
  				   const struct bpf_reg_state *off_reg)
  {
- 	struct bpf_reg_state *regs = cur_regs(env), *dst_reg;
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ 	struct bpf_reg_state *regs = state->regs, *dst_reg;
  	bool known = tnum_is_const(off_reg->var_off);
  	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
  	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
@@@ -1883,13 -2458,13 +2465,13 @@@
  	dst_reg = &regs[dst];
  
  	if (WARN_ON_ONCE(known && (smin_val != smax_val))) {
- 		print_verifier_state(env, env->cur_state);
+ 		print_verifier_state(env, state);
  		verbose(env,
  			"verifier internal error: known but bad sbounds\n");
  		return -EINVAL;
  	}
  	if (WARN_ON_ONCE(known && (umin_val != umax_val))) {
- 		print_verifier_state(env, env->cur_state);
+ 		print_verifier_state(env, state);
  		verbose(env,
  			"verifier internal error: known but bad ubounds\n");
  		return -EINVAL;
@@@ -2301,7 -2876,9 +2883,9 @@@ static int adjust_scalar_min_max_vals(s
  static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
  				   struct bpf_insn *insn)
  {
- 	struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg;
+ 	struct bpf_verifier_state *vstate = env->cur_state;
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+ 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
  	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
  	u8 opcode = BPF_OP(insn->code);
  
@@@ -2352,12 -2929,12 +2936,12 @@@
  
  	/* Got here implies adding two SCALAR_VALUEs */
  	if (WARN_ON_ONCE(ptr_reg)) {
- 		print_verifier_state(env, env->cur_state);
+ 		print_verifier_state(env, state);
  		verbose(env, "verifier internal error: unexpected ptr_reg\n");
  		return -EINVAL;
  	}
  	if (WARN_ON(!src_reg)) {
- 		print_verifier_state(env, env->cur_state);
+ 		print_verifier_state(env, state);
  		verbose(env, "verifier internal error: no src_reg\n");
  		return -EINVAL;
  	}
@@@ -2514,14 -3091,15 +3098,15 @@@ static int check_alu_op(struct bpf_veri
  	return 0;
  }
  
- static void find_good_pkt_pointers(struct bpf_verifier_state *state,
+ static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
  				   struct bpf_reg_state *dst_reg,
  				   enum bpf_reg_type type,
  				   bool range_right_open)
  {
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
  	struct bpf_reg_state *regs = state->regs, *reg;
  	u16 new_range;
- 	int i;
+ 	int i, j;
  
  	if (dst_reg->off < 0 ||
  	    (dst_reg->off == 0 && range_right_open))
@@@ -2591,12 -3169,15 +3176,15 @@@
  			/* keep the maximum range already checked */
  			regs[i].range = max(regs[i].range, new_range);
  
- 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- 		if (state->stack[i].slot_type[0] != STACK_SPILL)
- 			continue;
- 		reg = &state->stack[i].spilled_ptr;
- 		if (reg->type == type && reg->id == dst_reg->id)
- 			reg->range = max(reg->range, new_range);
+ 	for (j = 0; j <= vstate->curframe; j++) {
+ 		state = vstate->frame[j];
+ 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ 			if (state->stack[i].slot_type[0] != STACK_SPILL)
+ 				continue;
+ 			reg = &state->stack[i].spilled_ptr;
+ 			if (reg->type == type && reg->id == dst_reg->id)
+ 				reg->range = max(reg->range, new_range);
+ 		}
  	}
  }
  
@@@ -2834,20 -3415,24 +3422,24 @@@ static void mark_map_reg(struct bpf_reg
  /* The logic is similar to find_good_pkt_pointers(), both could eventually
   * be folded together at some point.
   */
- static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
+ static void mark_map_regs(struct bpf_verifier_state *vstate, u32 regno,
  			  bool is_null)
  {
+ 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
  	struct bpf_reg_state *regs = state->regs;
  	u32 id = regs[regno].id;
- 	int i;
+ 	int i, j;
  
  	for (i = 0; i < MAX_BPF_REG; i++)
  		mark_map_reg(regs, i, id, is_null);
  
- 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
- 		if (state->stack[i].slot_type[0] != STACK_SPILL)
- 			continue;
- 		mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
+ 	for (j = 0; j <= vstate->curframe; j++) {
+ 		state = vstate->frame[j];
+ 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+ 			if (state->stack[i].slot_type[0] != STACK_SPILL)
+ 				continue;
+ 			mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null);
+ 		}
  	}
  }
  
@@@ -2947,8 -3532,10 +3539,10 @@@ static bool try_match_pkt_pointers(cons
  static int check_cond_jmp_op(struct bpf_verifier_env *env,
  			     struct bpf_insn *insn, int *insn_idx)
  {
- 	struct bpf_verifier_state *other_branch, *this_branch = env->cur_state;
- 	struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
+ 	struct bpf_verifier_state *this_branch = env->cur_state;
+ 	struct bpf_verifier_state *other_branch;
+ 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
+ 	struct bpf_reg_state *dst_reg, *other_branch_regs;
  	u8 opcode = BPF_OP(insn->code);
  	int err;
  
@@@ -2991,8 -3578,9 +3585,9 @@@
  	if (BPF_SRC(insn->code) == BPF_K &&
  	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
  	    dst_reg->type == SCALAR_VALUE &&
- 	    tnum_equals_const(dst_reg->var_off, insn->imm)) {
- 		if (opcode == BPF_JEQ) {
+ 	    tnum_is_const(dst_reg->var_off)) {
+ 		if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) ||
+ 		    (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) {
  			/* if (imm == imm) goto pc+off;
  			 * only follow the goto, ignore fall-through
  			 */
@@@ -3010,6 -3598,7 +3605,7 @@@
  	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
  	if (!other_branch)
  		return -EFAULT;
+ 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
  
  	/* detect if we are comparing against a constant value so we can adjust
  	 * our min/max values for our dst register.
@@@ -3022,22 -3611,22 +3618,22 @@@
  		if (dst_reg->type == SCALAR_VALUE &&
  		    regs[insn->src_reg].type == SCALAR_VALUE) {
  			if (tnum_is_const(regs[insn->src_reg].var_off))
- 				reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
  						dst_reg, regs[insn->src_reg].var_off.value,
  						opcode);
  			else if (tnum_is_const(dst_reg->var_off))
- 				reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
+ 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
  						    &regs[insn->src_reg],
  						    dst_reg->var_off.value, opcode);
  			else if (opcode == BPF_JEQ || opcode == BPF_JNE)
  				/* Comparing for equality, we can combine knowledge */
- 				reg_combine_min_max(&other_branch->regs[insn->src_reg],
- 						    &other_branch->regs[insn->dst_reg],
+ 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
+ 						    &other_branch_regs[insn->dst_reg],
  						    &regs[insn->src_reg],
  						    &regs[insn->dst_reg], opcode);
  		}
  	} else if (dst_reg->type == SCALAR_VALUE) {
- 		reg_set_min_max(&other_branch->regs[insn->dst_reg],
+ 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
  					dst_reg, insn->imm, opcode);
  	}
  
@@@ -3058,7 -3647,7 +3654,7 @@@
  		return -EACCES;
  	}
  	if (env->log.level)
- 		print_verifier_state(env, this_branch);
+ 		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
  	return 0;
  }
  
@@@ -3143,6 -3732,18 +3739,18 @@@ static int check_ld_abs(struct bpf_veri
  		return -EINVAL;
  	}
  
+ 	if (env->subprog_cnt) {
+ 		/* when program has LD_ABS insn JITs and interpreter assume
+ 		 * that r1 == ctx == skb which is not the case for callees
+ 		 * that can have arbitrary arguments. It's problematic
+ 		 * for main prog as well since JITs would need to analyze
+ 		 * all functions in order to make proper register save/restore
+ 		 * decisions in the main prog. Hence disallow LD_ABS with calls
+ 		 */
+ 		verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
+ 		return -EINVAL;
+ 	}
+ 
  	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
  	    BPF_SIZE(insn->code) == BPF_DW ||
  	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
@@@ -3319,6 -3920,10 +3927,10 @@@ static int check_cfg(struct bpf_verifie
  	int ret = 0;
  	int i, t;
  
+ 	ret = check_subprogs(env);
+ 	if (ret < 0)
+ 		return ret;
+ 
  	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
  	if (!insn_state)
  		return -ENOMEM;
@@@ -3351,6 -3956,14 +3963,14 @@@ peek_stack
  				goto err_free;
  			if (t + 1 < insn_cnt)
  				env->explored_states[t + 1] = STATE_LIST_MARK;
+ 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
+ 				env->explored_states[t] = STATE_LIST_MARK;
+ 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
+ 				if (ret == 1)
+ 					goto peek_stack;
+ 				else if (ret < 0)
+ 					goto err_free;
+ 			}
  		} else if (opcode == BPF_JA) {
  			if (BPF_SRC(insns[t].code) != BPF_K) {
  				ret = -EINVAL;
@@@ -3469,11 -4082,21 +4089,21 @@@ static bool check_ids(u32 old_id, u32 c
  static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
  		    struct idpair *idmap)
  {
+ 	bool equal;
+ 
  	if (!(rold->live & REG_LIVE_READ))
  		/* explored state didn't use this */
  		return true;
  
- 	if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, live)) == 0)
+ 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0;
+ 
+ 	if (rold->type == PTR_TO_STACK)
+ 		/* two stack pointers are equal only if they're pointing to
+ 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
+ 		 */
+ 		return equal && rold->frameno == rcur->frameno;
+ 
+ 	if (equal)
  		return true;
  
  	if (rold->type == NOT_INIT)
@@@ -3545,7 -4168,6 +4175,6 @@@
  		       tnum_in(rold->var_off, rcur->var_off);
  	case PTR_TO_CTX:
  	case CONST_PTR_TO_MAP:
- 	case PTR_TO_STACK:
  	case PTR_TO_PACKET_END:
  		/* Only valid matches are exact, which memcmp() above
  		 * would have accepted
@@@ -3560,8 -4182,8 +4189,8 @@@
  	return false;
  }
  
- static bool stacksafe(struct bpf_verifier_state *old,
- 		      struct bpf_verifier_state *cur,
+ static bool stacksafe(struct bpf_func_state *old,
+ 		      struct bpf_func_state *cur,
  		      struct idpair *idmap)
  {
  	int i, spi;
@@@ -3579,8 -4201,19 +4208,19 @@@
  	for (i = 0; i < old->allocated_stack; i++) {
  		spi = i / BPF_REG_SIZE;
  
+ 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ))
+ 			/* explored state didn't use this */
+ 			continue;
+ 
  		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
  			continue;
+ 		/* if old state was safe with misc data in the stack
+ 		 * it will be safe with zero-initialized stack.
+ 		 * The opposite is not true
+ 		 */
+ 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
+ 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
+ 			continue;
  		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
  		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
  			/* Ex: old explored (safe) state has STACK_SPILL in
@@@ -3637,9 -4270,8 +4277,8 @@@
   * whereas register type in current state is meaningful, it means that
   * the current state will reach 'bpf_exit' instruction safely
   */
- static bool states_equal(struct bpf_verifier_env *env,
- 			 struct bpf_verifier_state *old,
- 			 struct bpf_verifier_state *cur)
+ static bool func_states_equal(struct bpf_func_state *old,
+ 			      struct bpf_func_state *cur)
  {
  	struct idpair *idmap;
  	bool ret = false;
@@@ -3663,71 -4295,72 +4302,72 @@@ out_free
  	return ret;
  }
  
+ static bool states_equal(struct bpf_verifier_env *env,
+ 			 struct bpf_verifier_state *old,
+ 			 struct bpf_verifier_state *cur)
+ {
+ 	int i;
+ 
+ 	if (old->curframe != cur->curframe)
+ 		return false;
+ 
+ 	/* for states to be equal callsites have to be the same
+ 	 * and all frame states need to be equivalent
+ 	 */
+ 	for (i = 0; i <= old->curframe; i++) {
+ 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
+ 			return false;
+ 		if (!func_states_equal(old->frame[i], cur->frame[i]))
+ 			return false;
+ 	}
+ 	return true;
+ }
+ 
  /* A write screens off any subsequent reads; but write marks come from the
-  * straight-line code between a state and its parent.  When we arrive at a
-  * jump target (in the first iteration of the propagate_liveness() loop),
-  * we didn't arrive by the straight-line code, so read marks in state must
-  * propagate to parent regardless of state's write marks.
+  * straight-line code between a state and its parent.  When we arrive at an
+  * equivalent state (jump target or such) we didn't arrive by the straight-line
+  * code, so read marks in the state must propagate to the parent regardless
+  * of the state's write marks. That's what 'parent == state->parent' comparison
+  * in mark_reg_read() and mark_stack_slot_read() is for.
   */
- static bool do_propagate_liveness(const struct bpf_verifier_state *state,
- 				  struct bpf_verifier_state *parent)
+ static int propagate_liveness(struct bpf_verifier_env *env,
+ 			      const struct bpf_verifier_state *vstate,
+ 			      struct bpf_verifier_state *vparent)
  {
- 	bool writes = parent == state->parent; /* Observe write marks */
- 	bool touched = false; /* any changes made? */
- 	int i;
+ 	int i, frame, err = 0;
+ 	struct bpf_func_state *state, *parent;
  
- 	if (!parent)
- 		return touched;
+ 	if (vparent->curframe != vstate->curframe) {
+ 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
+ 		     vparent->curframe, vstate->curframe);
+ 		return -EFAULT;
+ 	}
  	/* Propagate read liveness of registers... */
  	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
  	/* We don't need to worry about FP liveness because it's read-only */
  	for (i = 0; i < BPF_REG_FP; i++) {
- 		if (parent->regs[i].live & REG_LIVE_READ)
- 			continue;
- 		if (writes && (state->regs[i].live & REG_LIVE_WRITTEN))
+ 		if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ)
  			continue;
- 		if (state->regs[i].live & REG_LIVE_READ) {
- 			parent->regs[i].live |= REG_LIVE_READ;
- 			touched = true;
+ 		if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) {
+ 			err = mark_reg_read(env, vstate, vparent, i);
+ 			if (err)
+ 				return err;
  		}
  	}
+ 
  	/* ... and stack slots */
- 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
- 		    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
- 		if (parent->stack[i].slot_type[0] != STACK_SPILL)
- 			continue;
- 		if (state->stack[i].slot_type[0] != STACK_SPILL)
- 			continue;
- 		if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
- 			continue;
- 		if (writes &&
- 		    (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN))
- 			continue;
- 		if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) {
- 			parent->stack[i].spilled_ptr.live |= REG_LIVE_READ;
- 			touched = true;
+ 	for (frame = 0; frame <= vstate->curframe; frame++) {
+ 		state = vstate->frame[frame];
+ 		parent = vparent->frame[frame];
+ 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
+ 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
+ 			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
+ 				continue;
+ 			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
+ 				mark_stack_slot_read(env, vstate, vparent, i, frame);
  		}
  	}
- 	return touched;
- }
- 
- /* "parent" is "a state from which we reach the current state", but initially
-  * it is not the state->parent (i.e. "the state whose straight-line code leads
-  * to the current state"), instead it is the state that happened to arrive at
-  * a (prunable) equivalent of the current state.  See comment above
-  * do_propagate_liveness() for consequences of this.
-  * This function is just a more efficient way of calling mark_reg_read() or
-  * mark_stack_slot_read() on each reg in "parent" that is read in "state",
-  * though it requires that parent != state->parent in the call arguments.
-  */
- static void propagate_liveness(const struct bpf_verifier_state *state,
- 			       struct bpf_verifier_state *parent)
- {
- 	while (do_propagate_liveness(state, parent)) {
- 		/* Something changed, so we need to feed those changes onward */
- 		state = parent;
- 		parent = state->parent;
- 	}
+ 	return err;
  }
  
  static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
@@@ -3735,7 -4368,7 +4375,7 @@@
  	struct bpf_verifier_state_list *new_sl;
  	struct bpf_verifier_state_list *sl;
  	struct bpf_verifier_state *cur = env->cur_state;
- 	int i, err;
+ 	int i, j, err;
  
  	sl = env->explored_states[insn_idx];
  	if (!sl)
@@@ -3756,7 -4389,9 +4396,9 @@@
  			 * they'll be immediately forgotten as we're pruning
  			 * this state and will pop a new one.
  			 */
- 			propagate_liveness(&sl->state, cur);
+ 			err = propagate_liveness(env, &sl->state, cur);
+ 			if (err)
+ 				return err;
  			return 1;
  		}
  		sl = sl->next;
@@@ -3764,9 -4399,10 +4406,10 @@@
  
  	/* there were no equivalent states, remember current one.
  	 * technically the current state is not proven to be safe yet,
- 	 * but it will either reach bpf_exit (which means it's safe) or
- 	 * it will be rejected. Since there are no loops, we won't be
- 	 * seeing this 'insn_idx' instruction again on the way to bpf_exit
+ 	 * but it will either reach outer most bpf_exit (which means it's safe)
+ 	 * or it will be rejected. Since there are no loops, we won't be
+ 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
+ 	 * again on the way to bpf_exit
  	 */
  	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
  	if (!new_sl)
@@@ -3790,19 -4426,15 +4433,15 @@@
  	 * explored_states can get read marks.)
  	 */
  	for (i = 0; i < BPF_REG_FP; i++)
- 		cur->regs[i].live = REG_LIVE_NONE;
- 	for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++)
- 		if (cur->stack[i].slot_type[0] == STACK_SPILL)
- 			cur->stack[i].spilled_ptr.live = REG_LIVE_NONE;
- 	return 0;
- }
+ 		cur->frame[cur->curframe]->regs[i].live = REG_LIVE_NONE;
  
- static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
- 				  int insn_idx, int prev_insn_idx)
- {
- 	if (env->dev_ops && env->dev_ops->insn_hook)
- 		return env->dev_ops->insn_hook(env, insn_idx, prev_insn_idx);
+ 	/* all stack frames are accessible from callee, clear them all */
+ 	for (j = 0; j <= cur->curframe; j++) {
+ 		struct bpf_func_state *frame = cur->frame[j];
  
+ 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++)
+ 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
+ 	}
  	return 0;
  }
  
@@@ -3811,7 -4443,7 +4450,7 @@@ static int do_check(struct bpf_verifier
  	struct bpf_verifier_state *state;
  	struct bpf_insn *insns = env->prog->insnsi;
  	struct bpf_reg_state *regs;
- 	int insn_cnt = env->prog->len;
+ 	int insn_cnt = env->prog->len, i;
  	int insn_idx, prev_insn_idx = 0;
  	int insn_processed = 0;
  	bool do_print_state = false;
@@@ -3819,9 -4451,18 +4458,18 @@@
  	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
  	if (!state)
  		return -ENOMEM;
- 	env->cur_state = state;
- 	init_reg_state(env, state->regs);
+ 	state->curframe = 0;
  	state->parent = NULL;
+ 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
+ 	if (!state->frame[0]) {
+ 		kfree(state);
+ 		return -ENOMEM;
+ 	}
+ 	env->cur_state = state;
+ 	init_func_state(env, state->frame[0],
+ 			BPF_MAIN_FUNC /* callsite */,
+ 			0 /* frameno */,
+ 			0 /* subprogno, zero == main subprog */);
  	insn_idx = 0;
  	for (;;) {
  		struct bpf_insn *insn;
@@@ -3868,19 -4509,25 +4516,25 @@@
  			else
  				verbose(env, "\nfrom %d to %d:",
  					prev_insn_idx, insn_idx);
- 			print_verifier_state(env, state);
+ 			print_verifier_state(env, state->frame[state->curframe]);
  			do_print_state = false;
  		}
  
  		if (env->log.level) {
+ 			const struct bpf_insn_cbs cbs = {
+ 				.cb_print	= verbose,
+ 			};
+ 
  			verbose(env, "%d: ", insn_idx);
- 			print_bpf_insn(verbose, env, insn,
- 				       env->allow_ptr_leaks);
+ 			print_bpf_insn(&cbs, env, insn, env->allow_ptr_leaks);
  		}
  
- 		err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
- 		if (err)
- 			return err;
+ 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
+ 			err = bpf_prog_offload_verify_insn(env, insn_idx,
+ 							   prev_insn_idx);
+ 			if (err)
+ 				return err;
+ 		}
  
  		regs = cur_regs(env);
  		env->insn_aux_data[insn_idx].seen = true;
@@@ -4001,13 -4648,17 +4655,17 @@@
  			if (opcode == BPF_CALL) {
  				if (BPF_SRC(insn->code) != BPF_K ||
  				    insn->off != 0 ||
- 				    insn->src_reg != BPF_REG_0 ||
+ 				    (insn->src_reg != BPF_REG_0 &&
+ 				     insn->src_reg != BPF_PSEUDO_CALL) ||
  				    insn->dst_reg != BPF_REG_0) {
  					verbose(env, "BPF_CALL uses reserved fields\n");
  					return -EINVAL;
  				}
  
- 				err = check_call(env, insn->imm, insn_idx);
+ 				if (insn->src_reg == BPF_PSEUDO_CALL)
+ 					err = check_func_call(env, insn, &insn_idx);
+ 				else
+ 					err = check_helper_call(env, insn->imm, insn_idx);
  				if (err)
  					return err;
  
@@@ -4032,6 -4683,16 +4690,16 @@@
  					return -EINVAL;
  				}
  
+ 				if (state->curframe) {
+ 					/* exit from nested function */
+ 					prev_insn_idx = insn_idx;
+ 					err = prepare_func_exit(env, &insn_idx);
+ 					if (err)
+ 						return err;
+ 					do_print_state = true;
+ 					continue;
+ 				}
+ 
  				/* eBPF calling convetion is such that R0 is used
  				 * to return the value from eBPF program.
  				 * Make sure that it's readable at this time
@@@ -4092,8 -4753,16 +4760,16 @@@ process_bpf_exit
  		insn_idx++;
  	}
  
- 	verbose(env, "processed %d insns, stack depth %d\n", insn_processed,
- 		env->prog->aux->stack_depth);
+ 	verbose(env, "processed %d insns, stack depth ", insn_processed);
+ 	for (i = 0; i < env->subprog_cnt + 1; i++) {
+ 		u32 depth = env->subprog_stack_depth[i];
+ 
+ 		verbose(env, "%d", depth);
+ 		if (i + 1 < env->subprog_cnt + 1)
+ 			verbose(env, "+");
+ 	}
+ 	verbose(env, "\n");
+ 	env->prog->aux->stack_depth = env->subprog_stack_depth[0];
  	return 0;
  }
  
@@@ -4279,6 -4948,19 +4955,19 @@@ static int adjust_insn_aux_data(struct 
  	return 0;
  }
  
+ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
+ {
+ 	int i;
+ 
+ 	if (len == 1)
+ 		return;
+ 	for (i = 0; i < env->subprog_cnt; i++) {
+ 		if (env->subprog_starts[i] < off)
+ 			continue;
+ 		env->subprog_starts[i] += len - 1;
+ 	}
+ }
+ 
  static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
  					    const struct bpf_insn *patch, u32 len)
  {
@@@ -4289,6 -4971,7 +4978,7 @@@
  		return NULL;
  	if (adjust_insn_aux_data(env, new_prog->len, off, len))
  		return NULL;
+ 	adjust_subprog_starts(env, off, len);
  	return new_prog;
  }
  
@@@ -4423,6 -5106,172 +5113,172 @@@ static int convert_ctx_accesses(struct 
  	return 0;
  }
  
+ static int jit_subprogs(struct bpf_verifier_env *env)
+ {
+ 	struct bpf_prog *prog = env->prog, **func, *tmp;
+ 	int i, j, subprog_start, subprog_end = 0, len, subprog;
+ 	struct bpf_insn *insn;
+ 	void *old_bpf_func;
+ 	int err = -ENOMEM;
+ 
+ 	if (env->subprog_cnt == 0)
+ 		return 0;
+ 
+ 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ 		if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 		    insn->src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		subprog = find_subprog(env, i + insn->imm + 1);
+ 		if (subprog < 0) {
+ 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ 				  i + insn->imm + 1);
+ 			return -EFAULT;
+ 		}
+ 		/* temporarily remember subprog id inside insn instead of
+ 		 * aux_data, since next loop will split up all insns into funcs
+ 		 */
+ 		insn->off = subprog + 1;
+ 		/* remember original imm in case JIT fails and fallback
+ 		 * to interpreter will be needed
+ 		 */
+ 		env->insn_aux_data[i].call_imm = insn->imm;
+ 		/* point imm to __bpf_call_base+1 from JITs point of view */
+ 		insn->imm = 1;
+ 	}
+ 
+ 	func = kzalloc(sizeof(prog) * (env->subprog_cnt + 1), GFP_KERNEL);
+ 	if (!func)
+ 		return -ENOMEM;
+ 
+ 	for (i = 0; i <= env->subprog_cnt; i++) {
+ 		subprog_start = subprog_end;
+ 		if (env->subprog_cnt == i)
+ 			subprog_end = prog->len;
+ 		else
+ 			subprog_end = env->subprog_starts[i];
+ 
+ 		len = subprog_end - subprog_start;
+ 		func[i] = bpf_prog_alloc(bpf_prog_size(len), GFP_USER);
+ 		if (!func[i])
+ 			goto out_free;
+ 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
+ 		       len * sizeof(struct bpf_insn));
+ 		func[i]->type = prog->type;
+ 		func[i]->len = len;
+ 		if (bpf_prog_calc_tag(func[i]))
+ 			goto out_free;
+ 		func[i]->is_func = 1;
+ 		/* Use bpf_prog_F_tag to indicate functions in stack traces.
+ 		 * Long term would need debug info to populate names
+ 		 */
+ 		func[i]->aux->name[0] = 'F';
+ 		func[i]->aux->stack_depth = env->subprog_stack_depth[i];
+ 		func[i]->jit_requested = 1;
+ 		func[i] = bpf_int_jit_compile(func[i]);
+ 		if (!func[i]->jited) {
+ 			err = -ENOTSUPP;
+ 			goto out_free;
+ 		}
+ 		cond_resched();
+ 	}
+ 	/* at this point all bpf functions were successfully JITed
+ 	 * now populate all bpf_calls with correct addresses and
+ 	 * run last pass of JIT
+ 	 */
+ 	for (i = 0; i <= env->subprog_cnt; i++) {
+ 		insn = func[i]->insnsi;
+ 		for (j = 0; j < func[i]->len; j++, insn++) {
+ 			if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 			    insn->src_reg != BPF_PSEUDO_CALL)
+ 				continue;
+ 			subprog = insn->off;
+ 			insn->off = 0;
+ 			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
+ 				func[subprog]->bpf_func -
+ 				__bpf_call_base;
+ 		}
+ 	}
+ 	for (i = 0; i <= env->subprog_cnt; i++) {
+ 		old_bpf_func = func[i]->bpf_func;
+ 		tmp = bpf_int_jit_compile(func[i]);
+ 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
+ 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
+ 			err = -EFAULT;
+ 			goto out_free;
+ 		}
+ 		cond_resched();
+ 	}
+ 
+ 	/* finally lock prog and jit images for all functions and
+ 	 * populate kallsysm
+ 	 */
+ 	for (i = 0; i <= env->subprog_cnt; i++) {
+ 		bpf_prog_lock_ro(func[i]);
+ 		bpf_prog_kallsyms_add(func[i]);
+ 	}
+ 
+ 	/* Last step: make now unused interpreter insns from main
+ 	 * prog consistent for later dump requests, so they can
+ 	 * later look the same as if they were interpreted only.
+ 	 */
+ 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ 		unsigned long addr;
+ 
+ 		if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 		    insn->src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		insn->off = env->insn_aux_data[i].call_imm;
+ 		subprog = find_subprog(env, i + insn->off + 1);
+ 		addr  = (unsigned long)func[subprog + 1]->bpf_func;
+ 		addr &= PAGE_MASK;
+ 		insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
+ 			    addr - __bpf_call_base;
+ 	}
+ 
+ 	prog->jited = 1;
+ 	prog->bpf_func = func[0]->bpf_func;
+ 	prog->aux->func = func;
+ 	prog->aux->func_cnt = env->subprog_cnt + 1;
+ 	return 0;
+ out_free:
+ 	for (i = 0; i <= env->subprog_cnt; i++)
+ 		if (func[i])
+ 			bpf_jit_free(func[i]);
+ 	kfree(func);
+ 	/* cleanup main prog to be interpreted */
+ 	prog->jit_requested = 0;
+ 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
+ 		if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 		    insn->src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		insn->off = 0;
+ 		insn->imm = env->insn_aux_data[i].call_imm;
+ 	}
+ 	return err;
+ }
+ 
+ static int fixup_call_args(struct bpf_verifier_env *env)
+ {
+ 	struct bpf_prog *prog = env->prog;
+ 	struct bpf_insn *insn = prog->insnsi;
+ 	int i, depth;
+ 
+ 	if (env->prog->jit_requested)
+ 		if (jit_subprogs(env) == 0)
+ 			return 0;
+ 
+ 	for (i = 0; i < prog->len; i++, insn++) {
+ 		if (insn->code != (BPF_JMP | BPF_CALL) ||
+ 		    insn->src_reg != BPF_PSEUDO_CALL)
+ 			continue;
+ 		depth = get_callee_stack_depth(env, insn, i);
+ 		if (depth < 0)
+ 			return depth;
+ 		bpf_patch_call_args(insn, depth);
+ 	}
+ 	return 0;
+ }
+ 
  /* fixup insn->imm field of bpf_call instructions
   * and inline eligible helpers as explicit sequence of BPF instructions
   *
@@@ -4442,11 -5291,15 +5298,15 @@@ static int fixup_bpf_calls(struct bpf_v
  	for (i = 0; i < insn_cnt; i++, insn++) {
  		if (insn->code != (BPF_JMP | BPF_CALL))
  			continue;
+ 		if (insn->src_reg == BPF_PSEUDO_CALL)
+ 			continue;
  
  		if (insn->imm == BPF_FUNC_get_route_realm)
  			prog->dst_needed = 1;
  		if (insn->imm == BPF_FUNC_get_prandom_u32)
  			bpf_user_rnd_init_once();
+ 		if (insn->imm == BPF_FUNC_override_return)
+ 			prog->kprobe_override = 1;
  		if (insn->imm == BPF_FUNC_tail_call) {
  			/* If we tail call into other programs, we
  			 * cannot make any assumptions since they can
@@@ -4463,42 -5316,13 +5323,42 @@@
  			 */
  			insn->imm = 0;
  			insn->code = BPF_JMP | BPF_TAIL_CALL;
 +
 +			/* instead of changing every JIT dealing with tail_call
 +			 * emit two extra insns:
 +			 * if (index >= max_entries) goto out;
 +			 * index &= array->index_mask;
 +			 * to avoid out-of-bounds cpu speculation
 +			 */
 +			map_ptr = env->insn_aux_data[i + delta].map_ptr;
 +			if (map_ptr == BPF_MAP_PTR_POISON) {
 +				verbose(env, "tail_call obusing map_ptr\n");
 +				return -EINVAL;
 +			}
 +			if (!map_ptr->unpriv_array)
 +				continue;
 +			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
 +						  map_ptr->max_entries, 2);
 +			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
 +						    container_of(map_ptr,
 +								 struct bpf_array,
 +								 map)->index_mask);
 +			insn_buf[2] = *insn;
 +			cnt = 3;
 +			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
 +			if (!new_prog)
 +				return -ENOMEM;
 +
 +			delta    += cnt - 1;
 +			env->prog = prog = new_prog;
 +			insn      = new_prog->insnsi + i + delta;
  			continue;
  		}
  
  		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
  		 * handlers are currently limited to 64 bit only.
  		 */
- 		if (ebpf_jit_enabled() && BITS_PER_LONG == 64 &&
+ 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
  		    insn->imm == BPF_FUNC_map_lookup_elem) {
  			map_ptr = env->insn_aux_data[i + delta].map_ptr;
  			if (map_ptr == BPF_MAP_PTR_POISON ||
@@@ -4633,7 -5457,7 +5493,7 @@@ int bpf_check(struct bpf_prog **prog, u
  	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
  		env->strict_alignment = true;
  
- 	if (env->prog->aux->offload) {
+ 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
  		ret = bpf_prog_offload_verifier_prep(env);
  		if (ret)
  			goto err_unlock;
@@@ -4650,12 -5474,12 +5510,12 @@@
  	if (!env->explored_states)
  		goto skip_full_check;
  
+ 	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
+ 
  	ret = check_cfg(env);
  	if (ret < 0)
  		goto skip_full_check;
  
- 	env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
- 
  	ret = do_check(env);
  	if (env->cur_state) {
  		free_verifier_state(env->cur_state, true);
@@@ -4670,12 -5494,18 +5530,18 @@@ skip_full_check
  		sanitize_dead_code(env);
  
  	if (ret == 0)
+ 		ret = check_max_stack_depth(env);
+ 
+ 	if (ret == 0)
  		/* program is valid, convert *(u32*)(ctx + off) accesses */
  		ret = convert_ctx_accesses(env);
  
  	if (ret == 0)
  		ret = fixup_bpf_calls(env);
  
+ 	if (ret == 0)
+ 		ret = fixup_call_args(env);
+ 
  	if (log->level && bpf_verifier_log_full(log))
  		ret = -ENOSPC;
  	if (log->level && !log->ubuf) {
diff --combined kernel/events/core.c
index 56d2b99de409,878d86c513d6..0f2fe78c2fa2
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -4511,11 -4511,11 +4511,11 @@@ perf_read(struct file *file, char __use
  	return ret;
  }
  
 -static unsigned int perf_poll(struct file *file, poll_table *wait)
 +static __poll_t perf_poll(struct file *file, poll_table *wait)
  {
  	struct perf_event *event = file->private_data;
  	struct ring_buffer *rb;
 -	unsigned int events = POLLHUP;
 +	__poll_t events = POLLHUP;
  
  	poll_wait(file, &event->waitq, wait);
  
@@@ -4723,6 -4723,9 +4723,9 @@@ static long _perf_ioctl(struct perf_eve
  		rcu_read_unlock();
  		return 0;
  	}
+ 
+ 	case PERF_EVENT_IOC_QUERY_BPF:
+ 		return perf_event_query_prog_array(event, (void __user *)arg);
  	default:
  		return -ENOTTY;
  	}
@@@ -4904,7 -4907,6 +4907,7 @@@ void perf_event_update_userpage(struct 
  unlock:
  	rcu_read_unlock();
  }
 +EXPORT_SYMBOL_GPL(perf_event_update_userpage);
  
  static int perf_mmap_fault(struct vm_fault *vmf)
  {
@@@ -8081,6 -8083,13 +8084,13 @@@ static int perf_event_set_bpf_prog(stru
  		return -EINVAL;
  	}
  
+ 	/* Kprobe override only works for kprobes, not uprobes. */
+ 	if (prog->kprobe_override &&
+ 	    !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
+ 		bpf_prog_put(prog);
+ 		return -EINVAL;
+ 	}
+ 
  	if (is_tracepoint || is_syscall_tp) {
  		int off = trace_event_get_offsets(event->tp_event);
  
diff --combined kernel/module.c
index 8042b8fcbf14,bd695bfdc5c4..83075a104710
--- a/kernel/module.c
+++ b/kernel/module.c
@@@ -3118,7 -3118,11 +3118,11 @@@ static int find_module_sections(struct 
  					     sizeof(*mod->ftrace_callsites),
  					     &mod->num_ftrace_callsites);
  #endif
- 
+ #ifdef CONFIG_BPF_KPROBE_OVERRIDE
+ 	mod->kprobe_ei_funcs = section_objs(info, "_kprobe_error_inject_list",
+ 					    sizeof(*mod->kprobe_ei_funcs),
+ 					    &mod->num_kprobe_ei_funcs);
+ #endif
  	mod->extable = section_objs(info, "__ex_table",
  				    sizeof(*mod->extable), &mod->num_exentries);
  
@@@ -3938,12 -3942,6 +3942,12 @@@ static const char *get_ksymbol(struct m
  	return symname(kallsyms, best);
  }
  
 +void * __weak dereference_module_function_descriptor(struct module *mod,
 +						     void *ptr)
 +{
 +	return ptr;
 +}
 +
  /* For kallsyms to ask for address resolution.  NULL means not found.  Careful
   * not to lock to avoid deadlock on oopses, simply disable preemption. */
  const char *module_address_lookup(unsigned long addr,
diff --combined net/atm/common.c
index 8f12f1c6fa14,5763fd241dc3..6523f38c4957
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@@ -14,7 -14,7 +14,7 @@@
  #include <linux/capability.h>
  #include <linux/mm.h>
  #include <linux/sched/signal.h>
- #include <linux/time.h>		/* struct timeval */
+ #include <linux/time64.h>	/* 64-bit time for seconds */
  #include <linux/skbuff.h>
  #include <linux/bitops.h>
  #include <linux/init.h>
@@@ -648,11 -648,11 +648,11 @@@ out
  	return error;
  }
  
 -unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
 +__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	struct atm_vcc *vcc;
 -	unsigned int mask;
 +	__poll_t mask;
  
  	sock_poll_wait(file, sk_sleep(sk), wait);
  	mask = 0;
diff --combined net/batman-adv/icmp_socket.c
index a98e0a986cef,8041cf106c37..581375d0eed2
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0
  /* Copyright (C) 2007-2017  B.A.T.M.A.N. contributors:
   *
   * Marek Lindner
@@@ -26,6 -27,7 +27,7 @@@
  #include <linux/export.h>
  #include <linux/fcntl.h>
  #include <linux/fs.h>
+ #include <linux/gfp.h>
  #include <linux/if_ether.h>
  #include <linux/kernel.h>
  #include <linux/list.h>
@@@ -42,11 -44,11 +44,11 @@@
  #include <linux/string.h>
  #include <linux/uaccess.h>
  #include <linux/wait.h>
+ #include <uapi/linux/batadv_packet.h>
  
  #include "hard-interface.h"
  #include "log.h"
  #include "originator.h"
- #include "packet.h"
  #include "send.h"
  
  static struct batadv_socket_client *batadv_socket_client_hash[256];
@@@ -55,6 -57,9 +57,9 @@@ static void batadv_socket_add_packet(st
  				     struct batadv_icmp_header *icmph,
  				     size_t icmp_len);
  
+ /**
+  * batadv_socket_init() - Initialize soft interface independent socket data
+  */
  void batadv_socket_init(void)
  {
  	memset(batadv_socket_client_hash, 0, sizeof(batadv_socket_client_hash));
@@@ -292,7 -297,7 +297,7 @@@ out
  	return len;
  }
  
 -static unsigned int batadv_socket_poll(struct file *file, poll_table *wait)
 +static __poll_t batadv_socket_poll(struct file *file, poll_table *wait)
  {
  	struct batadv_socket_client *socket_client = file->private_data;
  
@@@ -314,6 -319,12 +319,12 @@@ static const struct file_operations bat
  	.llseek = no_llseek,
  };
  
+ /**
+  * batadv_socket_setup() - Create debugfs "socket" file
+  * @bat_priv: the bat priv with all the soft interface information
+  *
+  * Return: 0 on success or negative error number in case of failure
+  */
  int batadv_socket_setup(struct batadv_priv *bat_priv)
  {
  	struct dentry *d;
@@@ -333,7 -344,7 +344,7 @@@ err
  }
  
  /**
-  * batadv_socket_add_packet - schedule an icmp packet to be sent to
+  * batadv_socket_add_packet() - schedule an icmp packet to be sent to
   *  userspace on an icmp socket.
   * @socket_client: the socket this packet belongs to
   * @icmph: pointer to the header of the icmp packet
@@@ -390,7 -401,7 +401,7 @@@ static void batadv_socket_add_packet(st
  }
  
  /**
-  * batadv_socket_receive_packet - schedule an icmp packet to be received
+  * batadv_socket_receive_packet() - schedule an icmp packet to be received
   *  locally and sent to userspace.
   * @icmph: pointer to the header of the icmp packet
   * @icmp_len: total length of the icmp packet
diff --combined net/batman-adv/log.c
index 76451460c98d,da004980ab8b..9be74a44e99d
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@@ -1,3 -1,4 +1,4 @@@
+ // SPDX-License-Identifier: GPL-2.0
  /* Copyright (C) 2010-2017  B.A.T.M.A.N. contributors:
   *
   * Marek Lindner
@@@ -24,6 -25,7 +25,7 @@@
  #include <linux/export.h>
  #include <linux/fcntl.h>
  #include <linux/fs.h>
+ #include <linux/gfp.h>
  #include <linux/jiffies.h>
  #include <linux/kernel.h>
  #include <linux/module.h>
@@@ -86,6 -88,13 +88,13 @@@ static int batadv_fdebug_log(struct bat
  	return 0;
  }
  
+ /**
+  * batadv_debug_log() - Add debug log entry
+  * @bat_priv: the bat priv with all the soft interface information
+  * @fmt: format string
+  *
+  * Return: 0 on success or negative error number in case of failure
+  */
  int batadv_debug_log(struct batadv_priv *bat_priv, const char *fmt, ...)
  {
  	va_list args;
@@@ -176,7 -185,7 +185,7 @@@ static ssize_t batadv_log_read(struct f
  	return error;
  }
  
 -static unsigned int batadv_log_poll(struct file *file, poll_table *wait)
 +static __poll_t batadv_log_poll(struct file *file, poll_table *wait)
  {
  	struct batadv_priv *bat_priv = file->private_data;
  	struct batadv_priv_debug_log *debug_log = bat_priv->debug_log;
@@@ -197,6 -206,12 +206,12 @@@ static const struct file_operations bat
  	.llseek         = no_llseek,
  };
  
+ /**
+  * batadv_debug_log_setup() - Initialize debug log
+  * @bat_priv: the bat priv with all the soft interface information
+  *
+  * Return: 0 on success or negative error number in case of failure
+  */
  int batadv_debug_log_setup(struct batadv_priv *bat_priv)
  {
  	struct dentry *d;
@@@ -222,6 -237,10 +237,10 @@@ err
  	return -ENOMEM;
  }
  
+ /**
+  * batadv_debug_log_cleanup() - Destroy debug log
+  * @bat_priv: the bat priv with all the soft interface information
+  */
  void batadv_debug_log_cleanup(struct batadv_priv *bat_priv)
  {
  	kfree(bat_priv->debug_log);
diff --combined net/bluetooth/af_bluetooth.c
index 671b907ba678,f044202346c6..f897681780db
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@@ -421,7 -421,7 +421,7 @@@ out
  }
  EXPORT_SYMBOL(bt_sock_stream_recvmsg);
  
 -static inline unsigned int bt_accept_poll(struct sock *parent)
 +static inline __poll_t bt_accept_poll(struct sock *parent)
  {
  	struct bt_sock *s, *n;
  	struct sock *sk;
@@@ -437,11 -437,11 +437,11 @@@
  	return 0;
  }
  
 -unsigned int bt_sock_poll(struct file *file, struct socket *sock,
 +__poll_t bt_sock_poll(struct file *file, struct socket *sock,
  			  poll_table *wait)
  {
  	struct sock *sk = sock->sk;
 -	unsigned int mask = 0;
 +	__poll_t mask = 0;
  
  	BT_DBG("sock %p, sk %p", sock, sk);
  
@@@ -766,43 -766,39 +766,39 @@@ static int __init bt_init(void
  		return err;
  
  	err = sock_register(&bt_sock_family_ops);
- 	if (err < 0) {
- 		bt_sysfs_cleanup();
- 		return err;
- 	}
+ 	if (err)
+ 		goto cleanup_sysfs;
  
  	BT_INFO("HCI device and connection manager initialized");
  
  	err = hci_sock_init();
- 	if (err < 0)
- 		goto error;
+ 	if (err)
+ 		goto unregister_socket;
  
  	err = l2cap_init();
- 	if (err < 0)
- 		goto sock_err;
+ 	if (err)
+ 		goto cleanup_socket;
  
  	err = sco_init();
- 	if (err < 0) {
- 		l2cap_exit();
- 		goto sock_err;
- 	}
+ 	if (err)
+ 		goto cleanup_cap;
  
  	err = mgmt_init();
- 	if (err < 0) {
- 		sco_exit();
- 		l2cap_exit();
- 		goto sock_err;
- 	}
+ 	if (err)
+ 		goto cleanup_sco;
  
  	return 0;
  
- sock_err:
+ cleanup_sco:
+ 	sco_exit();
+ cleanup_cap:
+ 	l2cap_exit();
+ cleanup_socket:
  	hci_sock_cleanup();
- 
- error:
+ unregister_socket:
  	sock_unregister(PF_BLUETOOTH);
+ cleanup_sysfs:
  	bt_sysfs_cleanup();
- 
  	return err;
  }
  
diff --combined net/core/sock.c
index 1211159718ad,72d14b221784..420c380bc61d
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@@ -145,6 -145,8 +145,8 @@@
  static DEFINE_MUTEX(proto_list_mutex);
  static LIST_HEAD(proto_list);
  
+ static void sock_inuse_add(struct net *net, int val);
+ 
  /**
   * sk_ns_capable - General socket capability test
   * @sk: Socket to use a capability on or through
@@@ -1531,8 -1533,11 +1533,11 @@@ struct sock *sk_alloc(struct net *net, 
  		sk->sk_kern_sock = kern;
  		sock_lock_init(sk);
  		sk->sk_net_refcnt = kern ? 0 : 1;
- 		if (likely(sk->sk_net_refcnt))
+ 		if (likely(sk->sk_net_refcnt)) {
  			get_net(net);
+ 			sock_inuse_add(net, 1);
+ 		}
+ 
  		sock_net_set(sk, net);
  		refcount_set(&sk->sk_wmem_alloc, 1);
  
@@@ -1595,6 -1600,9 +1600,9 @@@ void sk_destruct(struct sock *sk
  
  static void __sk_free(struct sock *sk)
  {
+ 	if (likely(sk->sk_net_refcnt))
+ 		sock_inuse_add(sock_net(sk), -1);
+ 
  	if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
  		sock_diag_broadcast_destroy(sk);
  	else
@@@ -1716,6 -1724,8 +1724,8 @@@ struct sock *sk_clone_lock(const struc
  		newsk->sk_priority = 0;
  		newsk->sk_incoming_cpu = raw_smp_processor_id();
  		atomic64_set(&newsk->sk_cookie, 0);
+ 		if (likely(newsk->sk_net_refcnt))
+ 			sock_inuse_add(sock_net(newsk), 1);
  
  		/*
  		 * Before updating sk_refcnt, we must commit prior changes to memory
@@@ -2496,7 -2506,7 +2506,7 @@@ int sock_no_getname(struct socket *sock
  }
  EXPORT_SYMBOL(sock_no_getname);
  
 -unsigned int sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
 +__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
  {
  	return 0;
  }
@@@ -3045,7 -3055,7 +3055,7 @@@ static DECLARE_BITMAP(proto_inuse_idx, 
  
  void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
  {
- 	__this_cpu_add(net->core.inuse->val[prot->inuse_idx], val);
+ 	__this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
  }
  EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
  
@@@ -3055,21 -3065,50 +3065,50 @@@ int sock_prot_inuse_get(struct net *net
  	int res = 0;
  
  	for_each_possible_cpu(cpu)
- 		res += per_cpu_ptr(net->core.inuse, cpu)->val[idx];
+ 		res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
  
  	return res >= 0 ? res : 0;
  }
  EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
  
+ static void sock_inuse_add(struct net *net, int val)
+ {
+ 	this_cpu_add(*net->core.sock_inuse, val);
+ }
+ 
+ int sock_inuse_get(struct net *net)
+ {
+ 	int cpu, res = 0;
+ 
+ 	for_each_possible_cpu(cpu)
+ 		res += *per_cpu_ptr(net->core.sock_inuse, cpu);
+ 
+ 	return res;
+ }
+ 
+ EXPORT_SYMBOL_GPL(sock_inuse_get);
+ 
  static int __net_init sock_inuse_init_net(struct net *net)
  {
- 	net->core.inuse = alloc_percpu(struct prot_inuse);
- 	return net->core.inuse ? 0 : -ENOMEM;
+ 	net->core.prot_inuse = alloc_percpu(struct prot_inuse);
+ 	if (net->core.prot_inuse == NULL)
+ 		return -ENOMEM;
+ 
+ 	net->core.sock_inuse = alloc_percpu(int);
+ 	if (net->core.sock_inuse == NULL)
+ 		goto out;
+ 
+ 	return 0;
+ 
+ out:
+ 	free_percpu(net->core.prot_inuse);
+ 	return -ENOMEM;
  }
  
  static void __net_exit sock_inuse_exit_net(struct net *net)
  {
- 	free_percpu(net->core.inuse);
+ 	free_percpu(net->core.prot_inuse);
+ 	free_percpu(net->core.sock_inuse);
  }
  
  static struct pernet_operations net_inuse_ops = {
@@@ -3112,6 -3151,10 +3151,10 @@@ static inline void assign_proto_idx(str
  static inline void release_proto_idx(struct proto *prot)
  {
  }
+ 
+ static void sock_inuse_add(struct net *net, int val)
+ {
+ }
  #endif
  
  static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
diff --combined net/dccp/proto.c
index 8b8db3d481bd,fa7e92e08920..74685fecfdb9
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@@ -38,6 -38,9 +38,9 @@@
  #include "dccp.h"
  #include "feat.h"
  
+ #define CREATE_TRACE_POINTS
+ #include "trace.h"
+ 
  DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
  
  EXPORT_SYMBOL_GPL(dccp_statistics);
@@@ -110,7 -113,7 +113,7 @@@ void dccp_set_state(struct sock *sk, co
  	/* Change state AFTER socket is unhashed to avoid closed
  	 * socket sitting in hash tables.
  	 */
- 	sk->sk_state = state;
+ 	inet_sk_set_state(sk, state);
  }
  
  EXPORT_SYMBOL_GPL(dccp_set_state);
@@@ -318,10 -321,10 +321,10 @@@ EXPORT_SYMBOL_GPL(dccp_disconnect)
   *	take care of normal races (between the test and the event) and we don't
   *	go look at any of the socket buffers directly.
   */
 -unsigned int dccp_poll(struct file *file, struct socket *sock,
 +__poll_t dccp_poll(struct file *file, struct socket *sock,
  		       poll_table *wait)
  {
 -	unsigned int mask;
 +	__poll_t mask;
  	struct sock *sk = sock->sk;
  
  	sock_poll_wait(file, sk_sleep(sk), wait);
@@@ -761,6 -764,8 +764,8 @@@ int dccp_sendmsg(struct sock *sk, struc
  	int rc, size;
  	long timeo;
  
+ 	trace_dccp_probe(sk, len);
+ 
  	if (len > dp->dccps_mss_cache)
  		return -EMSGSIZE;
  
diff --combined net/ipv4/esp4.c
index 61fe6e4d23fc,6f00e43120a8..296d0b956bfe
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@@ -121,14 -121,32 +121,32 @@@ static void esp_ssg_unref(struct xfrm_s
  static void esp_output_done(struct crypto_async_request *base, int err)
  {
  	struct sk_buff *skb = base->data;
+ 	struct xfrm_offload *xo = xfrm_offload(skb);
  	void *tmp;
- 	struct dst_entry *dst = skb_dst(skb);
- 	struct xfrm_state *x = dst->xfrm;
+ 	struct xfrm_state *x;
+ 
+ 	if (xo && (xo->flags & XFRM_DEV_RESUME))
+ 		x = skb->sp->xvec[skb->sp->len - 1];
+ 	else
+ 		x = skb_dst(skb)->xfrm;
  
  	tmp = ESP_SKB_CB(skb)->tmp;
  	esp_ssg_unref(x, tmp);
  	kfree(tmp);
- 	xfrm_output_resume(skb, err);
+ 
+ 	if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+ 		if (err) {
+ 			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+ 			kfree_skb(skb);
+ 			return;
+ 		}
+ 
+ 		skb_push(skb, skb->data - skb_mac_header(skb));
+ 		secpath_reset(skb);
+ 		xfrm_dev_resume(skb);
+ 	} else {
+ 		xfrm_output_resume(skb, err);
+ 	}
  }
  
  /* Move ESP header back into place. */
@@@ -825,17 -843,13 +843,13 @@@ static int esp_init_aead(struct xfrm_st
  	char aead_name[CRYPTO_MAX_ALG_NAME];
  	struct crypto_aead *aead;
  	int err;
- 	u32 mask = 0;
  
  	err = -ENAMETOOLONG;
  	if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
  		     x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
  		goto error;
  
- 	if (x->xso.offload_handle)
- 		mask |= CRYPTO_ALG_ASYNC;
- 
- 	aead = crypto_alloc_aead(aead_name, 0, mask);
+ 	aead = crypto_alloc_aead(aead_name, 0, 0);
  	err = PTR_ERR(aead);
  	if (IS_ERR(aead))
  		goto error;
@@@ -865,7 -879,6 +879,6 @@@ static int esp_init_authenc(struct xfrm
  	char authenc_name[CRYPTO_MAX_ALG_NAME];
  	unsigned int keylen;
  	int err;
- 	u32 mask = 0;
  
  	err = -EINVAL;
  	if (!x->ealg)
@@@ -891,10 -904,7 +904,7 @@@
  			goto error;
  	}
  
- 	if (x->xso.offload_handle)
- 		mask |= CRYPTO_ALG_ASYNC;
- 
- 	aead = crypto_alloc_aead(authenc_name, 0, mask);
+ 	aead = crypto_alloc_aead(authenc_name, 0, 0);
  	err = PTR_ERR(aead);
  	if (IS_ERR(aead))
  		goto error;
@@@ -981,7 -991,6 +991,7 @@@ static int esp_init_state(struct xfrm_s
  
  		switch (encap->encap_type) {
  		default:
 +			err = -EINVAL;
  			goto error;
  		case UDP_ENCAP_ESPINUDP:
  			x->props.header_len += sizeof(struct udphdr);
diff --combined net/ipv4/tcp.c
index c4a7ee7f6721,f68cb33d50d1..d58285b54813
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@@ -283,8 -283,6 +283,6 @@@
  #include <asm/ioctls.h>
  #include <net/busy_poll.h>
  
- #include <trace/events/tcp.h>
- 
  struct percpu_counter tcp_orphan_count;
  EXPORT_SYMBOL_GPL(tcp_orphan_count);
  
@@@ -493,18 -491,16 +491,16 @@@ static void tcp_tx_timestamp(struct soc
   *	take care of normal races (between the test and the event) and we don't
   *	go look at any of the socket buffers directly.
   */
 -unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 +__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
  {
 -	unsigned int mask;
 +	__poll_t mask;
  	struct sock *sk = sock->sk;
  	const struct tcp_sock *tp = tcp_sk(sk);
  	int state;
  
- 	sock_rps_record_flow(sk);
- 
  	sock_poll_wait(file, sk_sleep(sk), wait);
  
- 	state = sk_state_load(sk);
+ 	state = inet_sk_state_load(sk);
  	if (state == TCP_LISTEN)
  		return inet_csk_listen_poll(sk);
  
@@@ -1106,12 -1102,15 +1102,15 @@@ static int linear_payload_sz(bool first
  	return 0;
  }
  
- static int select_size(const struct sock *sk, bool sg, bool first_skb)
+ static int select_size(const struct sock *sk, bool sg, bool first_skb, bool zc)
  {
  	const struct tcp_sock *tp = tcp_sk(sk);
  	int tmp = tp->mss_cache;
  
  	if (sg) {
+ 		if (zc)
+ 			return 0;
+ 
  		if (sk_can_gso(sk)) {
  			tmp = linear_payload_sz(first_skb);
  		} else {
@@@ -1188,7 -1187,7 +1187,7 @@@ int tcp_sendmsg_locked(struct sock *sk
  	int flags, err, copied = 0;
  	int mss_now = 0, size_goal, copied_syn = 0;
  	bool process_backlog = false;
- 	bool sg;
+ 	bool sg, zc = false;
  	long timeo;
  
  	flags = msg->msg_flags;
@@@ -1206,7 -1205,8 +1205,8 @@@
  			goto out_err;
  		}
  
- 		if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG))
+ 		zc = sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG;
+ 		if (!zc)
  			uarg->zerocopy = 0;
  	}
  
@@@ -1283,6 -1283,7 +1283,7 @@@ restart
  
  		if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) {
  			bool first_skb;
+ 			int linear;
  
  new_segment:
  			/* Allocate new segment. If the interface is SG,
@@@ -1296,9 -1297,8 +1297,8 @@@
  				goto restart;
  			}
  			first_skb = tcp_rtx_and_write_queues_empty(sk);
- 			skb = sk_stream_alloc_skb(sk,
- 						  select_size(sk, sg, first_skb),
- 						  sk->sk_allocation,
+ 			linear = select_size(sk, sg, first_skb, zc);
+ 			skb = sk_stream_alloc_skb(sk, linear, sk->sk_allocation,
  						  first_skb);
  			if (!skb)
  				goto wait_for_memory;
@@@ -1327,13 -1327,13 +1327,13 @@@
  			copy = msg_data_left(msg);
  
  		/* Where to copy to? */
- 		if (skb_availroom(skb) > 0) {
+ 		if (skb_availroom(skb) > 0 && !zc) {
  			/* We have some space in skb head. Superb! */
  			copy = min_t(int, copy, skb_availroom(skb));
  			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
  			if (err)
  				goto do_fault;
- 		} else if (!uarg || !uarg->zerocopy) {
+ 		} else if (!zc) {
  			bool merge = true;
  			int i = skb_shinfo(skb)->nr_frags;
  			struct page_frag *pfrag = sk_page_frag(sk);
@@@ -1373,8 -1373,10 +1373,10 @@@
  			pfrag->offset += copy;
  		} else {
  			err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg);
- 			if (err == -EMSGSIZE || err == -EEXIST)
+ 			if (err == -EMSGSIZE || err == -EEXIST) {
+ 				tcp_mark_push(tp, skb);
  				goto new_segment;
+ 			}
  			if (err < 0)
  				goto do_error;
  			copy = err;
@@@ -2040,8 -2042,6 +2042,6 @@@ void tcp_set_state(struct sock *sk, in
  {
  	int oldstate = sk->sk_state;
  
- 	trace_tcp_set_state(sk, oldstate, state);
- 
  	switch (state) {
  	case TCP_ESTABLISHED:
  		if (oldstate != TCP_ESTABLISHED)
@@@ -2065,7 -2065,7 +2065,7 @@@
  	/* Change state AFTER socket is unhashed to avoid closed
  	 * socket sitting in hash tables.
  	 */
- 	sk_state_store(sk, state);
+ 	inet_sk_state_store(sk, state);
  
  #ifdef STATE_TRACE
  	SOCK_DEBUG(sk, "TCP sk=%p, State %s -> %s\n", sk, statename[oldstate], statename[state]);
@@@ -2920,7 -2920,7 +2920,7 @@@ void tcp_get_info(struct sock *sk, stru
  	if (sk->sk_type != SOCK_STREAM)
  		return;
  
- 	info->tcpi_state = sk_state_load(sk);
+ 	info->tcpi_state = inet_sk_state_load(sk);
  
  	/* Report meaningful fields for all TCP states, including listeners */
  	rate = READ_ONCE(sk->sk_pacing_rate);
@@@ -3578,6 -3578,9 +3578,9 @@@ void __init tcp_init(void
  	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
  	percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
  	inet_hashinfo_init(&tcp_hashinfo);
+ 	inet_hashinfo2_init(&tcp_hashinfo, "tcp_listen_portaddr_hash",
+ 			    thash_entries, 21,  /* one slot per 2 MB*/
+ 			    0, 64 * 1024);
  	tcp_hashinfo.bind_bucket_cachep =
  		kmem_cache_create("tcp_bind_bucket",
  				  sizeof(struct inet_bind_bucket), 0,
diff --combined net/ipv4/udp.c
index ef45adfc0edb,db72619e07e4..6eddd0602813
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -357,18 -357,12 +357,12 @@@ fail
  }
  EXPORT_SYMBOL(udp_lib_get_port);
  
- static u32 udp4_portaddr_hash(const struct net *net, __be32 saddr,
- 			      unsigned int port)
- {
- 	return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
- }
- 
  int udp_v4_get_port(struct sock *sk, unsigned short snum)
  {
  	unsigned int hash2_nulladdr =
- 		udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
+ 		ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
  	unsigned int hash2_partial =
- 		udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
+ 		ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
  
  	/* precompute partial secondary hash */
  	udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@@ -445,7 -439,7 +439,7 @@@ static struct sock *udp4_lib_lookup2(st
  				     struct sk_buff *skb)
  {
  	struct sock *sk, *result;
- 	int score, badness, matches = 0, reuseport = 0;
+ 	int score, badness;
  	u32 hash = 0;
  
  	result = NULL;
@@@ -454,23 -448,16 +448,16 @@@
  		score = compute_score(sk, net, saddr, sport,
  				      daddr, hnum, dif, sdif, exact_dif);
  		if (score > badness) {
- 			reuseport = sk->sk_reuseport;
- 			if (reuseport) {
+ 			if (sk->sk_reuseport) {
  				hash = udp_ehashfn(net, daddr, hnum,
  						   saddr, sport);
  				result = reuseport_select_sock(sk, hash, skb,
  							sizeof(struct udphdr));
  				if (result)
  					return result;
- 				matches = 1;
  			}
  			badness = score;
  			result = sk;
- 		} else if (score == badness && reuseport) {
- 			matches++;
- 			if (reciprocal_scale(hash, matches) == 0)
- 				result = sk;
- 			hash = next_pseudo_random32(hash);
  		}
  	}
  	return result;
@@@ -488,11 -475,11 +475,11 @@@ struct sock *__udp4_lib_lookup(struct n
  	unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
  	struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
  	bool exact_dif = udp_lib_exact_dif_match(net, skb);
- 	int score, badness, matches = 0, reuseport = 0;
+ 	int score, badness;
  	u32 hash = 0;
  
  	if (hslot->count > 10) {
- 		hash2 = udp4_portaddr_hash(net, daddr, hnum);
+ 		hash2 = ipv4_portaddr_hash(net, daddr, hnum);
  		slot2 = hash2 & udptable->mask;
  		hslot2 = &udptable->hash2[slot2];
  		if (hslot->count < hslot2->count)
@@@ -503,7 -490,7 +490,7 @@@
  					  exact_dif, hslot2, skb);
  		if (!result) {
  			unsigned int old_slot2 = slot2;
- 			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
+ 			hash2 = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
  			slot2 = hash2 & udptable->mask;
  			/* avoid searching the same slot again. */
  			if (unlikely(slot2 == old_slot2))
@@@ -526,23 -513,16 +513,16 @@@ begin
  		score = compute_score(sk, net, saddr, sport,
  				      daddr, hnum, dif, sdif, exact_dif);
  		if (score > badness) {
- 			reuseport = sk->sk_reuseport;
- 			if (reuseport) {
+ 			if (sk->sk_reuseport) {
  				hash = udp_ehashfn(net, daddr, hnum,
  						   saddr, sport);
  				result = reuseport_select_sock(sk, hash, skb,
  							sizeof(struct udphdr));
  				if (result)
  					return result;
- 				matches = 1;
  			}
  			result = sk;
  			badness = score;
- 		} else if (score == badness && reuseport) {
- 			matches++;
- 			if (reciprocal_scale(hash, matches) == 0)
- 				result = sk;
- 			hash = next_pseudo_random32(hash);
  		}
  	}
  	return result;
@@@ -1775,7 -1755,7 +1755,7 @@@ EXPORT_SYMBOL(udp_lib_rehash)
  
  static void udp_v4_rehash(struct sock *sk)
  {
- 	u16 new_hash = udp4_portaddr_hash(sock_net(sk),
+ 	u16 new_hash = ipv4_portaddr_hash(sock_net(sk),
  					  inet_sk(sk)->inet_rcv_saddr,
  					  inet_sk(sk)->inet_num);
  	udp_lib_rehash(sk, new_hash);
@@@ -1966,9 -1946,9 +1946,9 @@@ static int __udp4_lib_mcast_deliver(str
  	struct sk_buff *nskb;
  
  	if (use_hash2) {
- 		hash2_any = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
+ 		hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) &
  			    udptable->mask;
- 		hash2 = udp4_portaddr_hash(net, daddr, hnum) & udptable->mask;
+ 		hash2 = ipv4_portaddr_hash(net, daddr, hnum) & udptable->mask;
  start_lookup:
  		hslot = &udptable->hash2[hash2];
  		offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@@ -2200,7 -2180,7 +2180,7 @@@ static struct sock *__udp4_lib_demux_lo
  					    int dif, int sdif)
  {
  	unsigned short hnum = ntohs(loc_port);
- 	unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum);
+ 	unsigned int hash2 = ipv4_portaddr_hash(net, loc_addr, hnum);
  	unsigned int slot2 = hash2 & udp_table.mask;
  	struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
  	INET_ADDR_COOKIE(acookie, rmt_addr, loc_addr);
@@@ -2502,16 -2482,14 +2482,14 @@@ int compat_udp_getsockopt(struct sock *
   *	but then block when reading it. Add special case code
   *	to work around these arguably broken applications.
   */
 -unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
 +__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
  {
 -	unsigned int mask = datagram_poll(file, sock, wait);
 +	__poll_t mask = datagram_poll(file, sock, wait);
  	struct sock *sk = sock->sk;
  
  	if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
  		mask |= POLLIN | POLLRDNORM;
  
- 	sock_rps_record_flow(sk);
- 
  	/* Check for false positives due to checksum errors */
  	if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
  	    !(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
diff --combined net/ipv6/esp6.c
index 1a7f00cd4803,7c888c6e53a9..97513f35bcc5
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@@ -141,14 -141,32 +141,32 @@@ static void esp_ssg_unref(struct xfrm_s
  static void esp_output_done(struct crypto_async_request *base, int err)
  {
  	struct sk_buff *skb = base->data;
+ 	struct xfrm_offload *xo = xfrm_offload(skb);
  	void *tmp;
- 	struct dst_entry *dst = skb_dst(skb);
- 	struct xfrm_state *x = dst->xfrm;
+ 	struct xfrm_state *x;
+ 
+ 	if (xo && (xo->flags & XFRM_DEV_RESUME))
+ 		x = skb->sp->xvec[skb->sp->len - 1];
+ 	else
+ 		x = skb_dst(skb)->xfrm;
  
  	tmp = ESP_SKB_CB(skb)->tmp;
  	esp_ssg_unref(x, tmp);
  	kfree(tmp);
- 	xfrm_output_resume(skb, err);
+ 
+ 	if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+ 		if (err) {
+ 			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+ 			kfree_skb(skb);
+ 			return;
+ 		}
+ 
+ 		skb_push(skb, skb->data - skb_mac_header(skb));
+ 		secpath_reset(skb);
+ 		xfrm_dev_resume(skb);
+ 	} else {
+ 		xfrm_output_resume(skb, err);
+ 	}
  }
  
  /* Move ESP header back into place. */
@@@ -734,17 -752,13 +752,13 @@@ static int esp_init_aead(struct xfrm_st
  	char aead_name[CRYPTO_MAX_ALG_NAME];
  	struct crypto_aead *aead;
  	int err;
- 	u32 mask = 0;
  
  	err = -ENAMETOOLONG;
  	if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
  		     x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
  		goto error;
  
- 	if (x->xso.offload_handle)
- 		mask |= CRYPTO_ALG_ASYNC;
- 
- 	aead = crypto_alloc_aead(aead_name, 0, mask);
+ 	aead = crypto_alloc_aead(aead_name, 0, 0);
  	err = PTR_ERR(aead);
  	if (IS_ERR(aead))
  		goto error;
@@@ -774,7 -788,6 +788,6 @@@ static int esp_init_authenc(struct xfrm
  	char authenc_name[CRYPTO_MAX_ALG_NAME];
  	unsigned int keylen;
  	int err;
- 	u32 mask = 0;
  
  	err = -EINVAL;
  	if (!x->ealg)
@@@ -800,10 -813,7 +813,7 @@@
  			goto error;
  	}
  
- 	if (x->xso.offload_handle)
- 		mask |= CRYPTO_ALG_ASYNC;
- 
- 	aead = crypto_alloc_aead(authenc_name, 0, mask);
+ 	aead = crypto_alloc_aead(authenc_name, 0, 0);
  	err = PTR_ERR(aead);
  	if (IS_ERR(aead))
  		goto error;
@@@ -890,12 -900,13 +900,12 @@@ static int esp6_init_state(struct xfrm_
  			x->props.header_len += IPV4_BEET_PHMAXLEN +
  					       (sizeof(struct ipv6hdr) - sizeof(struct iphdr));
  		break;
 +	default:
  	case XFRM_MODE_TRANSPORT:
  		break;
  	case XFRM_MODE_TUNNEL:
  		x->props.header_len += sizeof(struct ipv6hdr);
  		break;
 -	default:
 -		goto error;
  	}
  
  	align = ALIGN(crypto_aead_blocksize(aead), 4);
diff --combined net/ipv6/ip6_fib.c
index 9dcc3924a975,b5f19703fca6..2451cfda525a
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@@ -107,16 -107,13 +107,13 @@@ enum 
  
  void fib6_update_sernum(struct rt6_info *rt)
  {
- 	struct fib6_table *table = rt->rt6i_table;
  	struct net *net = dev_net(rt->dst.dev);
  	struct fib6_node *fn;
  
- 	spin_lock_bh(&table->tb6_lock);
  	fn = rcu_dereference_protected(rt->rt6i_node,
- 			lockdep_is_held(&table->tb6_lock));
+ 			lockdep_is_held(&rt->rt6i_table->tb6_lock));
  	if (fn)
  		fn->fn_sernum = fib6_new_sernum(net);
- 	spin_unlock_bh(&table->tb6_lock);
  }
  
  /*
@@@ -640,11 -637,6 +637,11 @@@ static struct fib6_node *fib6_add_1(str
  			if (!(fn->fn_flags & RTN_RTINFO)) {
  				RCU_INIT_POINTER(fn->leaf, NULL);
  				rt6_release(leaf);
 +			/* remove null_entry in the root node */
 +			} else if (fn->fn_flags & RTN_TL_ROOT &&
 +				   rcu_access_pointer(fn->leaf) ==
 +				   net->ipv6.ip6_null_entry) {
 +				RCU_INIT_POINTER(fn->leaf, NULL);
  			}
  
  			return fn;
@@@ -898,7 -890,7 +895,7 @@@ static int fib6_add_rt2node(struct fib6
  	ins = &fn->leaf;
  
  	for (iter = leaf; iter;
- 	     iter = rcu_dereference_protected(iter->dst.rt6_next,
+ 	     iter = rcu_dereference_protected(iter->rt6_next,
  				lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
  		/*
  		 *	Search for duplicates
@@@ -955,7 -947,7 +952,7 @@@
  			break;
  
  next_iter:
- 		ins = &iter->dst.rt6_next;
+ 		ins = &iter->rt6_next;
  	}
  
  	if (fallback_ins && !found) {
@@@ -984,7 -976,7 +981,7 @@@
  					      &sibling->rt6i_siblings);
  				break;
  			}
- 			sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+ 			sibling = rcu_dereference_protected(sibling->rt6_next,
  				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
  		}
  		/* For each sibling in the list, increment the counter of
@@@ -1014,7 -1006,7 +1011,7 @@@ add
  		if (err)
  			return err;
  
- 		rcu_assign_pointer(rt->dst.rt6_next, iter);
+ 		rcu_assign_pointer(rt->rt6_next, iter);
  		atomic_inc(&rt->rt6i_ref);
  		rcu_assign_pointer(rt->rt6i_node, fn);
  		rcu_assign_pointer(*ins, rt);
@@@ -1045,7 -1037,7 +1042,7 @@@
  
  		atomic_inc(&rt->rt6i_ref);
  		rcu_assign_pointer(rt->rt6i_node, fn);
- 		rt->dst.rt6_next = iter->dst.rt6_next;
+ 		rt->rt6_next = iter->rt6_next;
  		rcu_assign_pointer(*ins, rt);
  		call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
  					  rt, extack);
@@@ -1064,14 -1056,14 +1061,14 @@@
  
  		if (nsiblings) {
  			/* Replacing an ECMP route, remove all siblings */
- 			ins = &rt->dst.rt6_next;
+ 			ins = &rt->rt6_next;
  			iter = rcu_dereference_protected(*ins,
  				    lockdep_is_held(&rt->rt6i_table->tb6_lock));
  			while (iter) {
  				if (iter->rt6i_metric > rt->rt6i_metric)
  					break;
  				if (rt6_qualify_for_ecmp(iter)) {
- 					*ins = iter->dst.rt6_next;
+ 					*ins = iter->rt6_next;
  					iter->rt6i_node = NULL;
  					fib6_purge_rt(iter, fn, info->nl_net);
  					if (rcu_access_pointer(fn->rr_ptr) == iter)
@@@ -1080,7 -1072,7 +1077,7 @@@
  					nsiblings--;
  					info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
  				} else {
- 					ins = &iter->dst.rt6_next;
+ 					ins = &iter->rt6_next;
  				}
  				iter = rcu_dereference_protected(*ins,
  					lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@@ -1107,8 -1099,8 +1104,8 @@@ void fib6_force_start_gc(struct net *ne
  			  jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
  }
  
- static void fib6_update_sernum_upto_root(struct rt6_info *rt,
- 					 int sernum)
+ static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
+ 					   int sernum)
  {
  	struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
  				lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@@ -1122,6 -1114,11 +1119,11 @@@
  	}
  }
  
+ void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
+ {
+ 	__fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
+ }
+ 
  /*
   *	Add routing information to the routing tree.
   *	<destination addr>/<source addr>
@@@ -1235,7 -1232,7 +1237,7 @@@ int fib6_add(struct fib6_node *root, st
  
  	err = fib6_add_rt2node(fn, rt, info, mxc, extack);
  	if (!err) {
- 		fib6_update_sernum_upto_root(rt, sernum);
+ 		__fib6_update_sernum_upto_root(rt, sernum);
  		fib6_start_gc(info->nl_net, rt);
  	}
  
@@@ -1275,17 -1272,13 +1277,17 @@@ out
  	return err;
  
  failure:
 -	/* fn->leaf could be NULL if fn is an intermediate node and we
 -	 * failed to add the new route to it in both subtree creation
 -	 * failure and fib6_add_rt2node() failure case.
 -	 * In both cases, fib6_repair_tree() should be called to fix
 -	 * fn->leaf.
 +	/* fn->leaf could be NULL and fib6_repair_tree() needs to be called if:
 +	 * 1. fn is an intermediate node and we failed to add the new
 +	 * route to it in both subtree creation failure and fib6_add_rt2node()
 +	 * failure case.
 +	 * 2. fn is the root node in the table and we fail to add the first
 +	 * default route to it.
  	 */
 -	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
 +	if (fn &&
 +	    (!(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)) ||
 +	     (fn->fn_flags & RTN_TL_ROOT &&
 +	      !rcu_access_pointer(fn->leaf))))
  		fib6_repair_tree(info->nl_net, table, fn);
  	/* Always release dst as dst->__refcnt is guaranteed
  	 * to be taken before entering this function
@@@ -1540,12 -1533,6 +1542,12 @@@ static struct fib6_node *fib6_repair_tr
  	struct fib6_walker *w;
  	int iter = 0;
  
 +	/* Set fn->leaf to null_entry for root node. */
 +	if (fn->fn_flags & RTN_TL_ROOT) {
 +		rcu_assign_pointer(fn->leaf, net->ipv6.ip6_null_entry);
 +		return fn;
 +	}
 +
  	for (;;) {
  		struct fib6_node *fn_r = rcu_dereference_protected(fn->right,
  					    lockdep_is_held(&table->tb6_lock));
@@@ -1664,7 -1651,7 +1666,7 @@@ static void fib6_del_route(struct fib6_
  	WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
  
  	/* Unlink it */
- 	*rtp = rt->dst.rt6_next;
+ 	*rtp = rt->rt6_next;
  	rt->rt6i_node = NULL;
  	net->ipv6.rt6_stats->fib_rt_entries--;
  	net->ipv6.rt6_stats->fib_discarded_routes++;
@@@ -1692,7 -1679,7 +1694,7 @@@
  	FOR_WALKERS(net, w) {
  		if (w->state == FWS_C && w->leaf == rt) {
  			RT6_TRACE("walker %p adjusted by delroute\n", w);
- 			w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+ 			w->leaf = rcu_dereference_protected(rt->rt6_next,
  					    lockdep_is_held(&table->tb6_lock));
  			if (!w->leaf)
  				w->state = FWS_U;
@@@ -1700,15 -1687,10 +1702,15 @@@
  	}
  	read_unlock(&net->ipv6.fib6_walker_lock);
  
 -	/* If it was last route, expunge its radix tree node */
 +	/* If it was last route, call fib6_repair_tree() to:
 +	 * 1. For root node, put back null_entry as how the table was created.
 +	 * 2. For other nodes, expunge its radix tree node.
 +	 */
  	if (!rcu_access_pointer(fn->leaf)) {
 -		fn->fn_flags &= ~RTN_RTINFO;
 -		net->ipv6.rt6_stats->fib_route_nodes--;
 +		if (!(fn->fn_flags & RTN_TL_ROOT)) {
 +			fn->fn_flags &= ~RTN_RTINFO;
 +			net->ipv6.rt6_stats->fib_route_nodes--;
 +		}
  		fn = fib6_repair_tree(net, table, fn);
  	}
  
@@@ -1756,7 -1738,7 +1758,7 @@@ int fib6_del(struct rt6_info *rt, struc
  			fib6_del_route(table, fn, rtp, info);
  			return 0;
  		}
- 		rtp_next = &cur->dst.rt6_next;
+ 		rtp_next = &cur->rt6_next;
  	}
  	return -ENOENT;
  }
@@@ -1912,7 -1894,7 +1914,7 @@@ static int fib6_clean_node(struct fib6_
  
  	for_each_fib6_walker_rt(w) {
  		res = c->func(rt, c->arg);
- 		if (res < 0) {
+ 		if (res == -1) {
  			w->leaf = rt;
  			res = fib6_del(rt, &info);
  			if (res) {
@@@ -1925,6 -1907,12 +1927,12 @@@
  				continue;
  			}
  			return 0;
+ 		} else if (res == -2) {
+ 			if (WARN_ON(!rt->rt6i_nsiblings))
+ 				continue;
+ 			rt = list_last_entry(&rt->rt6i_siblings,
+ 					     struct rt6_info, rt6i_siblings);
+ 			continue;
  		}
  		WARN_ON(res != 0);
  	}
@@@ -1936,7 -1924,8 +1944,8 @@@
   *	Convenient frontend to tree walker.
   *
   *	func is called on each route.
-  *		It may return -1 -> delete this route.
+  *		It may return -2 -> skip multipath route.
+  *			      -1 -> delete this route.
   *		              0  -> continue walking
   */
  
@@@ -2128,7 -2117,6 +2137,6 @@@ static void fib6_net_exit(struct net *n
  {
  	unsigned int i;
  
- 	rt6_ifdown(net, NULL);
  	del_timer_sync(&net->ipv6.ip6_fib_timer);
  
  	for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
@@@ -2167,8 -2155,8 +2175,8 @@@ int __init fib6_init(void
  	if (ret)
  		goto out_kmem_cache_create;
  
- 	ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
- 			      0);
+ 	ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
+ 				   inet6_dump_fib, 0);
  	if (ret)
  		goto out_unregister_subsys;
  
@@@ -2233,7 -2221,7 +2241,7 @@@ static int ipv6_route_yield(struct fib6
  
  	do {
  		iter->w.leaf = rcu_dereference_protected(
- 				iter->w.leaf->dst.rt6_next,
+ 				iter->w.leaf->rt6_next,
  				lockdep_is_held(&iter->tbl->tb6_lock));
  		iter->skip--;
  		if (!iter->skip && iter->w.leaf)
@@@ -2299,7 -2287,7 +2307,7 @@@ static void *ipv6_route_seq_next(struc
  	if (!v)
  		goto iter_table;
  
- 	n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
+ 	n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
  	if (n) {
  		++*pos;
  		return n;
diff --combined net/netfilter/x_tables.c
index e02a21549c99,10c19a3f4cbd..ecbdea0431d4
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@@ -39,6 -39,7 +39,6 @@@ MODULE_LICENSE("GPL")
  MODULE_AUTHOR("Harald Welte <laforge at netfilter.org>");
  MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module");
  
 -#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
  #define XT_PCPU_BLOCK_SIZE 4096
  
  struct compat_delta {
@@@ -999,7 -1000,7 +999,7 @@@ struct xt_table_info *xt_alloc_table_in
  		return NULL;
  
  	/* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
 -	if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
 +	if ((size >> PAGE_SHIFT) + 2 > totalram_pages)
  		return NULL;
  
  	info = kvmalloc(sz, GFP_KERNEL);
@@@ -1026,7 -1027,7 +1026,7 @@@ void xt_free_table_info(struct xt_table
  }
  EXPORT_SYMBOL(xt_free_table_info);
  
- /* Find table by name, grabs mutex & ref.  Returns NULL on error. */
+ /* Find table by name, grabs mutex & ref.  Returns ERR_PTR on error. */
  struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
  				    const char *name)
  {
@@@ -1042,17 -1043,17 +1042,17 @@@
  
  	/* Table doesn't exist in this netns, re-try init */
  	list_for_each_entry(t, &init_net.xt.tables[af], list) {
+ 		int err;
+ 
  		if (strcmp(t->name, name))
  			continue;
- 		if (!try_module_get(t->me)) {
- 			mutex_unlock(&xt[af].mutex);
- 			return NULL;
- 		}
- 
+ 		if (!try_module_get(t->me))
+ 			goto out;
  		mutex_unlock(&xt[af].mutex);
- 		if (t->table_init(net) != 0) {
+ 		err = t->table_init(net);
+ 		if (err < 0) {
  			module_put(t->me);
- 			return NULL;
+ 			return ERR_PTR(err);
  		}
  
  		found = t;
@@@ -1072,10 -1073,28 +1072,28 @@@
  	module_put(found->me);
   out:
  	mutex_unlock(&xt[af].mutex);
- 	return NULL;
+ 	return ERR_PTR(-ENOENT);
  }
  EXPORT_SYMBOL_GPL(xt_find_table_lock);
  
+ struct xt_table *xt_request_find_table_lock(struct net *net, u_int8_t af,
+ 					    const char *name)
+ {
+ 	struct xt_table *t = xt_find_table_lock(net, af, name);
+ 
+ #ifdef CONFIG_MODULE
+ 	if (IS_ERR(t)) {
+ 		int err = request_module("%stable_%s", xt_prefix[af], name);
+ 		if (err)
+ 			return ERR_PTR(err);
+ 		t = xt_find_table_lock(net, af, name);
+ 	}
+ #endif
+ 
+ 	return t;
+ }
+ EXPORT_SYMBOL_GPL(xt_request_find_table_lock);
+ 
  void xt_table_unlock(struct xt_table *table)
  {
  	mutex_unlock(&xt[table->af].mutex);
@@@ -1396,7 -1415,7 +1414,7 @@@ static void *xt_mttg_seq_next(struct se
  		trav->curr = trav->curr->next;
  		if (trav->curr != trav->head)
  			break;
- 		/* fallthru, _stop will unlock */
+ 		/* fall through */
  	default:
  		return NULL;
  	}
diff --combined net/packet/af_packet.c
index 3b4d6a3cf190,ee7aa0ba3a67..2a80f19f0913
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@@ -247,12 -247,13 +247,13 @@@ static int packet_direct_xmit(struct sk
  	struct sk_buff *orig_skb = skb;
  	struct netdev_queue *txq;
  	int ret = NETDEV_TX_BUSY;
+ 	bool again = false;
  
  	if (unlikely(!netif_running(dev) ||
  		     !netif_carrier_ok(dev)))
  		goto drop;
  
- 	skb = validate_xmit_skb_list(skb, dev);
+ 	skb = validate_xmit_skb_list(skb, dev, &again);
  	if (skb != orig_skb)
  		goto drop;
  
@@@ -4073,12 -4074,12 +4074,12 @@@ static int packet_ioctl(struct socket *
  	return 0;
  }
  
 -static unsigned int packet_poll(struct file *file, struct socket *sock,
 +static __poll_t packet_poll(struct file *file, struct socket *sock,
  				poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	struct packet_sock *po = pkt_sk(sk);
 -	unsigned int mask = datagram_poll(file, sock, wait);
 +	__poll_t mask = datagram_poll(file, sock, wait);
  
  	spin_lock_bh(&sk->sk_receive_queue.lock);
  	if (po->rx_ring.pg_vec) {
diff --combined net/sctp/socket.c
index 8f7536de5f41,a5e2150ab013..eb68ae261054
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@@ -201,6 -201,22 +201,22 @@@ static void sctp_for_each_tx_datachunk(
  		cb(chunk);
  }
  
+ static void sctp_for_each_rx_skb(struct sctp_association *asoc, struct sock *sk,
+ 				 void (*cb)(struct sk_buff *, struct sock *))
+ 
+ {
+ 	struct sk_buff *skb, *tmp;
+ 
+ 	sctp_skb_for_each(skb, &asoc->ulpq.lobby, tmp)
+ 		cb(skb, sk);
+ 
+ 	sctp_skb_for_each(skb, &asoc->ulpq.reasm, tmp)
+ 		cb(skb, sk);
+ 
+ 	sctp_skb_for_each(skb, &asoc->ulpq.reasm_uo, tmp)
+ 		cb(skb, sk);
+ }
+ 
  /* Verify that this is a valid address. */
  static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
  				   int len)
@@@ -1528,7 -1544,7 +1544,7 @@@ static void sctp_close(struct sock *sk
  
  	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
  	sk->sk_shutdown = SHUTDOWN_MASK;
- 	sk->sk_state = SCTP_SS_CLOSING;
+ 	inet_sk_set_state(sk, SCTP_SS_CLOSING);
  
  	ep = sctp_sk(sk)->ep;
  
@@@ -1554,6 -1570,7 +1570,7 @@@
  
  		if (data_was_unread || !skb_queue_empty(&asoc->ulpq.lobby) ||
  		    !skb_queue_empty(&asoc->ulpq.reasm) ||
+ 		    !skb_queue_empty(&asoc->ulpq.reasm_uo) ||
  		    (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime)) {
  			struct sctp_chunk *chunk;
  
@@@ -2002,7 -2019,20 +2019,20 @@@ static int sctp_sendmsg(struct sock *sk
  		if (err < 0)
  			goto out_free;
  
- 		wait_connect = true;
+ 		/* If stream interleave is enabled, wait_connect has to be
+ 		 * done earlier than data enqueue, as it needs to make data
+ 		 * or idata according to asoc->intl_enable which is set
+ 		 * after connection is done.
+ 		 */
+ 		if (sctp_sk(asoc->base.sk)->strm_interleave) {
+ 			timeo = sock_sndtimeo(sk, 0);
+ 			err = sctp_wait_for_connect(asoc, &timeo);
+ 			if (err)
+ 				goto out_unlock;
+ 		} else {
+ 			wait_connect = true;
+ 		}
+ 
  		pr_debug("%s: we associated primitively\n", __func__);
  	}
  
@@@ -2281,7 -2311,7 +2311,7 @@@ static int sctp_setsockopt_events(struc
  			if (!event)
  				return -ENOMEM;
  
- 			sctp_ulpq_tail_event(&asoc->ulpq, event);
+ 			asoc->stream.si->enqueue_event(&asoc->ulpq, event);
  		}
  	}
  
@@@ -3180,7 -3210,7 +3210,7 @@@ static int sctp_setsockopt_maxseg(struc
  		if (val == 0) {
  			val = asoc->pathmtu - sp->pf->af->net_header_len;
  			val -= sizeof(struct sctphdr) +
- 			       sizeof(struct sctp_data_chunk);
+ 			       sctp_datachk_len(&asoc->stream);
  		}
  		asoc->user_frag = val;
  		asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu);
@@@ -3350,7 -3380,10 +3380,10 @@@ static int sctp_setsockopt_fragment_int
  	if (get_user(val, (int __user *)optval))
  		return -EFAULT;
  
- 	sctp_sk(sk)->frag_interleave = (val == 0) ? 0 : 1;
+ 	sctp_sk(sk)->frag_interleave = !!val;
+ 
+ 	if (!sctp_sk(sk)->frag_interleave)
+ 		sctp_sk(sk)->strm_interleave = 0;
  
  	return 0;
  }
@@@ -4023,6 -4056,40 +4056,40 @@@ out
  	return retval;
  }
  
+ static int sctp_setsockopt_interleaving_supported(struct sock *sk,
+ 						  char __user *optval,
+ 						  unsigned int optlen)
+ {
+ 	struct sctp_sock *sp = sctp_sk(sk);
+ 	struct net *net = sock_net(sk);
+ 	struct sctp_assoc_value params;
+ 	int retval = -EINVAL;
+ 
+ 	if (optlen < sizeof(params))
+ 		goto out;
+ 
+ 	optlen = sizeof(params);
+ 	if (copy_from_user(&params, optval, optlen)) {
+ 		retval = -EFAULT;
+ 		goto out;
+ 	}
+ 
+ 	if (params.assoc_id)
+ 		goto out;
+ 
+ 	if (!net->sctp.intl_enable || !sp->frag_interleave) {
+ 		retval = -EPERM;
+ 		goto out;
+ 	}
+ 
+ 	sp->strm_interleave = !!params.assoc_value;
+ 
+ 	retval = 0;
+ 
+ out:
+ 	return retval;
+ }
+ 
  /* API 6.2 setsockopt(), getsockopt()
   *
   * Applications use setsockopt() and getsockopt() to set or retrieve
@@@ -4210,6 -4277,10 +4277,10 @@@ static int sctp_setsockopt(struct sock 
  	case SCTP_STREAM_SCHEDULER_VALUE:
  		retval = sctp_setsockopt_scheduler_value(sk, optval, optlen);
  		break;
+ 	case SCTP_INTERLEAVING_SUPPORTED:
+ 		retval = sctp_setsockopt_interleaving_supported(sk, optval,
+ 								optlen);
+ 		break;
  	default:
  		retval = -ENOPROTOOPT;
  		break;
@@@ -4586,7 -4657,7 +4657,7 @@@ static void sctp_shutdown(struct sock *
  	if (how & SEND_SHUTDOWN && !list_empty(&ep->asocs)) {
  		struct sctp_association *asoc;
  
- 		sk->sk_state = SCTP_SS_CLOSING;
+ 		inet_sk_set_state(sk, SCTP_SS_CLOSING);
  		asoc = list_entry(ep->asocs.next,
  				  struct sctp_association, asocs);
  		sctp_primitive_SHUTDOWN(net, asoc, NULL);
@@@ -4680,20 -4751,11 +4751,11 @@@ int sctp_get_sctp_info(struct sock *sk
  EXPORT_SYMBOL_GPL(sctp_get_sctp_info);
  
  /* use callback to avoid exporting the core structure */
- int sctp_transport_walk_start(struct rhashtable_iter *iter)
+ void sctp_transport_walk_start(struct rhashtable_iter *iter)
  {
- 	int err;
- 
  	rhltable_walk_enter(&sctp_transport_hashtable, iter);
  
- 	err = rhashtable_walk_start(iter);
- 	if (err && err != -EAGAIN) {
- 		rhashtable_walk_stop(iter);
- 		rhashtable_walk_exit(iter);
- 		return err;
- 	}
- 
- 	return 0;
+ 	rhashtable_walk_start(iter);
  }
  
  void sctp_transport_walk_stop(struct rhashtable_iter *iter)
@@@ -4784,12 -4846,10 +4846,10 @@@ int sctp_for_each_transport(int (*cb)(s
  			    struct net *net, int *pos, void *p) {
  	struct rhashtable_iter hti;
  	struct sctp_transport *tsp;
- 	int ret;
+ 	int ret = 0;
  
  again:
- 	ret = sctp_transport_walk_start(&hti);
- 	if (ret)
- 		return ret;
+ 	sctp_transport_walk_start(&hti);
  
  	tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
  	for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
@@@ -6984,6 -7044,47 +7044,47 @@@ out
  	return retval;
  }
  
+ static int sctp_getsockopt_interleaving_supported(struct sock *sk, int len,
+ 						  char __user *optval,
+ 						  int __user *optlen)
+ {
+ 	struct sctp_assoc_value params;
+ 	struct sctp_association *asoc;
+ 	int retval = -EFAULT;
+ 
+ 	if (len < sizeof(params)) {
+ 		retval = -EINVAL;
+ 		goto out;
+ 	}
+ 
+ 	len = sizeof(params);
+ 	if (copy_from_user(&params, optval, len))
+ 		goto out;
+ 
+ 	asoc = sctp_id2assoc(sk, params.assoc_id);
+ 	if (asoc) {
+ 		params.assoc_value = asoc->intl_enable;
+ 	} else if (!params.assoc_id) {
+ 		struct sctp_sock *sp = sctp_sk(sk);
+ 
+ 		params.assoc_value = sp->strm_interleave;
+ 	} else {
+ 		retval = -EINVAL;
+ 		goto out;
+ 	}
+ 
+ 	if (put_user(len, optlen))
+ 		goto out;
+ 
+ 	if (copy_to_user(optval, &params, len))
+ 		goto out;
+ 
+ 	retval = 0;
+ 
+ out:
+ 	return retval;
+ }
+ 
  static int sctp_getsockopt(struct sock *sk, int level, int optname,
  			   char __user *optval, int __user *optlen)
  {
@@@ -7174,6 -7275,10 +7275,10 @@@
  		retval = sctp_getsockopt_scheduler_value(sk, len, optval,
  							 optlen);
  		break;
+ 	case SCTP_INTERLEAVING_SUPPORTED:
+ 		retval = sctp_getsockopt_interleaving_supported(sk, len, optval,
+ 								optlen);
+ 		break;
  	default:
  		retval = -ENOPROTOOPT;
  		break;
@@@ -7408,13 -7513,13 +7513,13 @@@ static int sctp_listen_start(struct soc
  	 * sockets.
  	 *
  	 */
- 	sk->sk_state = SCTP_SS_LISTENING;
+ 	inet_sk_set_state(sk, SCTP_SS_LISTENING);
  	if (!ep->base.bind_addr.port) {
  		if (sctp_autobind(sk))
  			return -EAGAIN;
  	} else {
  		if (sctp_get_port(sk, inet_sk(sk)->inet_num)) {
- 			sk->sk_state = SCTP_SS_CLOSED;
+ 			inet_sk_set_state(sk, SCTP_SS_CLOSED);
  			return -EADDRINUSE;
  		}
  	}
@@@ -7500,11 -7605,11 +7605,11 @@@ out
   * here, again, by modeling the current TCP/UDP code.  We don't have
   * a good way to test with it yet.
   */
 -unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
 +__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	struct sctp_sock *sp = sctp_sk(sk);
 -	unsigned int mask;
 +	__poll_t mask;
  
  	poll_wait(file, sk_sleep(sk), wait);
  
@@@ -8411,11 -8516,7 +8516,7 @@@ static void sctp_sock_migrate(struct so
  
  	}
  
- 	sctp_skb_for_each(skb, &assoc->ulpq.reasm, tmp)
- 		sctp_skb_set_owner_r_frag(skb, newsk);
- 
- 	sctp_skb_for_each(skb, &assoc->ulpq.lobby, tmp)
- 		sctp_skb_set_owner_r_frag(skb, newsk);
+ 	sctp_for_each_rx_skb(assoc, newsk, sctp_skb_set_owner_r_frag);
  
  	/* Set the type of socket to indicate that it is peeled off from the
  	 * original UDP-style socket or created with the accept() call on a
@@@ -8441,10 -8542,10 +8542,10 @@@
  	 * is called, set RCV_SHUTDOWN flag.
  	 */
  	if (sctp_state(assoc, CLOSED) && sctp_style(newsk, TCP)) {
- 		newsk->sk_state = SCTP_SS_CLOSED;
+ 		inet_sk_set_state(newsk, SCTP_SS_CLOSED);
  		newsk->sk_shutdown |= RCV_SHUTDOWN;
  	} else {
- 		newsk->sk_state = SCTP_SS_ESTABLISHED;
+ 		inet_sk_set_state(newsk, SCTP_SS_ESTABLISHED);
  	}
  
  	release_sock(newsk);
diff --combined net/smc/af_smc.c
index 449f62e1e270,daf8075f5a4c..b6e4e2e4fe12
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@@ -520,7 -520,7 +520,7 @@@ decline_rdma
  	smc->use_fallback = true;
  	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
  		rc = smc_clc_send_decline(smc, reason_code);
- 		if (rc < sizeof(struct smc_clc_msg_decline))
+ 		if (rc < 0)
  			goto out_err;
  	}
  	goto out_connected;
@@@ -751,14 -751,16 +751,16 @@@ static void smc_listen_work(struct work
  {
  	struct smc_sock *new_smc = container_of(work, struct smc_sock,
  						smc_listen_work);
+ 	struct smc_clc_msg_proposal_prefix *pclc_prfx;
  	struct socket *newclcsock = new_smc->clcsock;
  	struct smc_sock *lsmc = new_smc->listen_smc;
  	struct smc_clc_msg_accept_confirm cclc;
  	int local_contact = SMC_REUSE_CONTACT;
  	struct sock *newsmcsk = &new_smc->sk;
- 	struct smc_clc_msg_proposal pclc;
+ 	struct smc_clc_msg_proposal *pclc;
  	struct smc_ib_device *smcibdev;
  	struct sockaddr_in peeraddr;
+ 	u8 buf[SMC_CLC_MAX_LEN];
  	struct smc_link *link;
  	int reason_code = 0;
  	int rc = 0, len;
@@@ -775,7 -777,7 +777,7 @@@
  	/* do inband token exchange -
  	 *wait for and receive SMC Proposal CLC message
  	 */
- 	reason_code = smc_clc_wait_msg(new_smc, &pclc, sizeof(pclc),
+ 	reason_code = smc_clc_wait_msg(new_smc, &buf, sizeof(buf),
  				       SMC_CLC_PROPOSAL);
  	if (reason_code < 0)
  		goto out_err;
@@@ -804,8 -806,11 +806,11 @@@
  		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
  		goto decline_rdma;
  	}
- 	if ((pclc.outgoing_subnet != subnet) ||
- 	    (pclc.prefix_len != prefix_len)) {
+ 
+ 	pclc = (struct smc_clc_msg_proposal *)&buf;
+ 	pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ 	if (pclc_prfx->outgoing_subnet != subnet ||
+ 	    pclc_prfx->prefix_len != prefix_len) {
  		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
  		goto decline_rdma;
  	}
@@@ -816,7 -821,7 +821,7 @@@
  	/* allocate connection / link group */
  	mutex_lock(&smc_create_lgr_pending);
  	local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr,
- 					smcibdev, ibport, &pclc.lcl, 0);
+ 					smcibdev, ibport, &pclc->lcl, 0);
  	if (local_contact < 0) {
  		rc = local_contact;
  		if (rc == -ENOMEM)
@@@ -879,11 -884,9 +884,9 @@@
  		}
  		/* QP confirmation over RoCE fabric */
  		reason_code = smc_serv_conf_first_link(new_smc);
- 		if (reason_code < 0) {
+ 		if (reason_code < 0)
  			/* peer is not aware of a problem */
- 			rc = reason_code;
  			goto out_err_unlock;
- 		}
  		if (reason_code > 0)
  			goto decline_rdma_unlock;
  	}
@@@ -916,8 -919,7 +919,7 @@@ decline_rdma
  	smc_conn_free(&new_smc->conn);
  	new_smc->use_fallback = true;
  	if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) {
- 		rc = smc_clc_send_decline(new_smc, reason_code);
- 		if (rc < sizeof(struct smc_clc_msg_decline))
+ 		if (smc_clc_send_decline(new_smc, reason_code) < 0)
  			goto out_err;
  	}
  	goto out_connected;
@@@ -1107,7 -1109,7 +1109,7 @@@ out
  	return rc;
  }
  
 -static unsigned int smc_accept_poll(struct sock *parent)
 +static __poll_t smc_accept_poll(struct sock *parent)
  {
  	struct smc_sock *isk;
  	struct sock *sk;
@@@ -1126,11 -1128,11 +1128,11 @@@
  	return 0;
  }
  
 -static unsigned int smc_poll(struct file *file, struct socket *sock,
 +static __poll_t smc_poll(struct file *file, struct socket *sock,
  			     poll_table *wait)
  {
  	struct sock *sk = sock->sk;
 -	unsigned int mask = 0;
 +	__poll_t mask = 0;
  	struct smc_sock *smc;
  	int rc;
  
diff --combined net/smc/smc_clc.c
index 511548085d16,abf7ceb6690b..8ac51583a063
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@@ -22,6 -22,54 +22,54 @@@
  #include "smc_clc.h"
  #include "smc_ib.h"
  
+ /* check if received message has a correct header length and contains valid
+  * heading and trailing eyecatchers
+  */
+ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm)
+ {
+ 	struct smc_clc_msg_proposal_prefix *pclc_prfx;
+ 	struct smc_clc_msg_accept_confirm *clc;
+ 	struct smc_clc_msg_proposal *pclc;
+ 	struct smc_clc_msg_decline *dclc;
+ 	struct smc_clc_msg_trail *trl;
+ 
+ 	if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ 		return false;
+ 	switch (clcm->type) {
+ 	case SMC_CLC_PROPOSAL:
+ 		pclc = (struct smc_clc_msg_proposal *)clcm;
+ 		pclc_prfx = smc_clc_proposal_get_prefix(pclc);
+ 		if (ntohs(pclc->hdr.length) !=
+ 			sizeof(*pclc) + ntohs(pclc->iparea_offset) +
+ 			sizeof(*pclc_prfx) +
+ 			pclc_prfx->ipv6_prefixes_cnt *
+ 				sizeof(struct smc_clc_ipv6_prefix) +
+ 			sizeof(*trl))
+ 			return false;
+ 		trl = (struct smc_clc_msg_trail *)
+ 			((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl));
+ 		break;
+ 	case SMC_CLC_ACCEPT:
+ 	case SMC_CLC_CONFIRM:
+ 		clc = (struct smc_clc_msg_accept_confirm *)clcm;
+ 		if (ntohs(clc->hdr.length) != sizeof(*clc))
+ 			return false;
+ 		trl = &clc->trl;
+ 		break;
+ 	case SMC_CLC_DECLINE:
+ 		dclc = (struct smc_clc_msg_decline *)clcm;
+ 		if (ntohs(dclc->hdr.length) != sizeof(*dclc))
+ 			return false;
+ 		trl = &dclc->trl;
+ 		break;
+ 	default:
+ 		return false;
+ 	}
+ 	if (memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)))
+ 		return false;
+ 	return true;
+ }
+ 
  /* Wait for data on the tcp-socket, analyze received data
   * Returns:
   * 0 if success and it was not a decline that we received.
@@@ -35,7 -83,7 +83,7 @@@ int smc_clc_wait_msg(struct smc_sock *s
  	struct smc_clc_msg_hdr *clcm = buf;
  	struct msghdr msg = {NULL, 0};
  	int reason_code = 0;
 -	struct kvec vec;
 +	struct kvec vec = {buf, buflen};
  	int len, datlen;
  	int krflags;
  
@@@ -43,15 -91,12 +91,15 @@@
  	 * so we don't consume any subsequent CLC message or payload data
  	 * in the TCP byte stream
  	 */
 -	vec.iov_base = buf;
 -	vec.iov_len = buflen;
 +	/*
 +	 * Caller must make sure that buflen is no less than
 +	 * sizeof(struct smc_clc_msg_hdr)
 +	 */
  	krflags = MSG_PEEK | MSG_WAITALL;
  	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
 -	len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
 -			     sizeof(struct smc_clc_msg_hdr), krflags);
 +	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1,
 +			sizeof(struct smc_clc_msg_hdr));
 +	len = sock_recvmsg(smc->clcsock, &msg, krflags);
  	if (signal_pending(current)) {
  		reason_code = -EINTR;
  		clc_sk->sk_err = EINTR;
@@@ -75,9 -120,7 +123,7 @@@
  	}
  	datlen = ntohs(clcm->length);
  	if ((len < sizeof(struct smc_clc_msg_hdr)) ||
- 	    (datlen < sizeof(struct smc_clc_msg_decline)) ||
- 	    (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
- 	    memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
+ 	    (datlen > buflen) ||
  	    ((clcm->type != SMC_CLC_DECLINE) &&
  	     (clcm->type != expected_type))) {
  		smc->sk.sk_err = EPROTO;
@@@ -86,12 -129,13 +132,12 @@@
  	}
  
  	/* receive the complete CLC message */
 -	vec.iov_base = buf;
 -	vec.iov_len = buflen;
  	memset(&msg, 0, sizeof(struct msghdr));
 +	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &vec, 1, buflen);
  	krflags = MSG_WAITALL;
  	smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
 -	len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
 +	len = sock_recvmsg(smc->clcsock, &msg, krflags);
- 	if (len < datlen) {
+ 	if (len < datlen || !smc_clc_msg_hdr_valid(clcm)) {
  		smc->sk.sk_err = EPROTO;
  		reason_code = -EPROTO;
  		goto out;
@@@ -135,7 -179,7 +181,7 @@@ int smc_clc_send_decline(struct smc_soc
  		smc->sk.sk_err = EPROTO;
  	if (len < 0)
  		smc->sk.sk_err = -len;
- 	return len;
+ 	return sock_error(&smc->sk);
  }
  
  /* send CLC PROPOSAL message across internal TCP socket */
@@@ -143,33 -187,43 +189,43 @@@ int smc_clc_send_proposal(struct smc_so
  			  struct smc_ib_device *smcibdev,
  			  u8 ibport)
  {
+ 	struct smc_clc_msg_proposal_prefix pclc_prfx;
  	struct smc_clc_msg_proposal pclc;
+ 	struct smc_clc_msg_trail trl;
  	int reason_code = 0;
+ 	struct kvec vec[3];
  	struct msghdr msg;
- 	struct kvec vec;
- 	int len, rc;
+ 	int len, plen, rc;
  
  	/* send SMC Proposal CLC message */
+ 	plen = sizeof(pclc) + sizeof(pclc_prfx) + sizeof(trl);
  	memset(&pclc, 0, sizeof(pclc));
  	memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  	pclc.hdr.type = SMC_CLC_PROPOSAL;
- 	pclc.hdr.length = htons(sizeof(pclc));
+ 	pclc.hdr.length = htons(plen);
  	pclc.hdr.version = SMC_CLC_V1;		/* SMC version */
  	memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
  	memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
  	memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
+ 	pclc.iparea_offset = htons(0);
  
+ 	memset(&pclc_prfx, 0, sizeof(pclc_prfx));
  	/* determine subnet and mask from internal TCP socket */
- 	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
- 				  &pclc.prefix_len);
+ 	rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc_prfx.outgoing_subnet,
+ 				  &pclc_prfx.prefix_len);
  	if (rc)
  		return SMC_CLC_DECL_CNFERR; /* configuration error */
- 	memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
+ 	pclc_prfx.ipv6_prefixes_cnt = 0;
+ 	memcpy(trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  	memset(&msg, 0, sizeof(msg));
- 	vec.iov_base = &pclc;
- 	vec.iov_len = sizeof(pclc);
+ 	vec[0].iov_base = &pclc;
+ 	vec[0].iov_len = sizeof(pclc);
+ 	vec[1].iov_base = &pclc_prfx;
+ 	vec[1].iov_len = sizeof(pclc_prfx);
+ 	vec[2].iov_base = &trl;
+ 	vec[2].iov_len = sizeof(trl);
  	/* due to the few bytes needed for clc-handshake this cannot block */
- 	len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
+ 	len = kernel_sendmsg(smc->clcsock, &msg, vec, 3, plen);
  	if (len < sizeof(pclc)) {
  		if (len >= 0) {
  			reason_code = -ENETUNREACH;
diff --combined net/socket.c
index 092baa464afc,bbd2e9ceb692..60d05479b2c1
--- a/net/socket.c
+++ b/net/socket.c
@@@ -118,7 -118,7 +118,7 @@@ static ssize_t sock_write_iter(struct k
  static int sock_mmap(struct file *file, struct vm_area_struct *vma);
  
  static int sock_close(struct inode *inode, struct file *file);
 -static unsigned int sock_poll(struct file *file,
 +static __poll_t sock_poll(struct file *file,
  			      struct poll_table_struct *wait);
  static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
  #ifdef CONFIG_COMPAT
@@@ -163,12 -163,6 +163,6 @@@ static DEFINE_SPINLOCK(net_family_lock)
  static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
  
  /*
-  *	Statistics counters of the socket lists
-  */
- 
- static DEFINE_PER_CPU(int, sockets_in_use);
- 
- /*
   * Support routines.
   * Move socket addresses back and forth across the kernel/user
   * divide and look after the messy bits.
@@@ -578,7 -572,6 +572,6 @@@ struct socket *sock_alloc(void
  	inode->i_gid = current_fsgid();
  	inode->i_op = &sockfs_inode_ops;
  
- 	this_cpu_add(sockets_in_use, 1);
  	return sock;
  }
  EXPORT_SYMBOL(sock_alloc);
@@@ -605,7 -598,6 +598,6 @@@ void sock_release(struct socket *sock
  	if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
  		pr_err("%s: fasync list not empty!\n", __func__);
  
- 	this_cpu_sub(sockets_in_use, 1);
  	if (!sock->file) {
  		iput(SOCK_INODE(sock));
  		return;
@@@ -1095,9 -1087,9 +1087,9 @@@ out_release
  EXPORT_SYMBOL(sock_create_lite);
  
  /* No kernel lock held - perfect */
 -static unsigned int sock_poll(struct file *file, poll_table *wait)
 +static __poll_t sock_poll(struct file *file, poll_table *wait)
  {
 -	unsigned int busy_flag = 0;
 +	__poll_t busy_flag = 0;
  	struct socket *sock;
  
  	/*
@@@ -2622,17 -2614,8 +2614,8 @@@ core_initcall(sock_init);	/* early init
  #ifdef CONFIG_PROC_FS
  void socket_seq_show(struct seq_file *seq)
  {
- 	int cpu;
- 	int counter = 0;
- 
- 	for_each_possible_cpu(cpu)
- 	    counter += per_cpu(sockets_in_use, cpu);
- 
- 	/* It can be negative, by the way. 8) */
- 	if (counter < 0)
- 		counter = 0;
- 
- 	seq_printf(seq, "sockets: used %d\n", counter);
+ 	seq_printf(seq, "sockets: used %d\n",
+ 		   sock_inuse_get(seq->private));
  }
  #endif				/* CONFIG_PROC_FS */
  
diff --combined net/tipc/server.c
index 78a292a84afc,8ee5e86b7870..125da165e523
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@@ -264,8 -264,8 +264,8 @@@ static int tipc_receive_from_sock(struc
  	iov.iov_base = buf;
  	iov.iov_len = s->max_rcvbuf_size;
  	msg.msg_name = &addr;
 -	ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
 -			     MSG_DONTWAIT);
 +	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len);
 +	ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT);
  	if (ret <= 0) {
  		kmem_cache_free(s->rcvbuf_cache, buf);
  		goto out_close;
@@@ -489,8 -489,8 +489,8 @@@ void tipc_conn_terminate(struct tipc_se
  	}
  }
  
- bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type,
- 			     u32 lower, u32 upper, int *conid)
+ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower,
+ 			     u32 upper, u32 filter, int *conid)
  {
  	struct tipc_subscriber *scbr;
  	struct tipc_subscr sub;
@@@ -501,7 -501,7 +501,7 @@@
  	sub.seq.lower = lower;
  	sub.seq.upper = upper;
  	sub.timeout = TIPC_WAIT_FOREVER;
- 	sub.filter = TIPC_SUB_PORTS;
+ 	sub.filter = filter;
  	*(u32 *)&sub.usr_handle = port;
  
  	con = tipc_alloc_conn(tipc_topsrv(net));
diff --combined net/tipc/socket.c
index 2aa46e8cd8fe,1f236271766c..cbabc44cd19a
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@@ -710,13 -710,13 +710,13 @@@ static int tipc_getname(struct socket *
   * imply that the operation will succeed, merely that it should be performed
   * and will not block.
   */
 -static unsigned int tipc_poll(struct file *file, struct socket *sock,
 +static __poll_t tipc_poll(struct file *file, struct socket *sock,
  			      poll_table *wait)
  {
  	struct sock *sk = sock->sk;
  	struct tipc_sock *tsk = tipc_sk(sk);
- 	struct tipc_group *grp = tsk->group;
+ 	struct tipc_group *grp;
 -	u32 revents = 0;
 +	__poll_t revents = 0;
  
  	sock_poll_wait(file, sk_sleep(sk), wait);
  
@@@ -736,9 -736,9 +736,9 @@@
  			revents |= POLLIN | POLLRDNORM;
  		break;
  	case TIPC_OPEN:
- 		if (!grp || tipc_group_size(grp))
- 			if (!tsk->cong_link_cnt)
- 				revents |= POLLOUT;
+ 		grp = tsk->group;
+ 		if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt)
+ 			revents |= POLLOUT;
  		if (!tipc_sk_type_connectionless(sk))
  			break;
  		if (skb_queue_empty(&sk->sk_receive_queue))
@@@ -928,21 -928,22 +928,22 @@@ static int tipc_send_group_anycast(stru
  	struct list_head *cong_links = &tsk->cong_links;
  	int blks = tsk_blocks(GROUP_H_SIZE + dlen);
  	struct tipc_group *grp = tsk->group;
+ 	struct tipc_msg *hdr = &tsk->phdr;
  	struct tipc_member *first = NULL;
  	struct tipc_member *mbr = NULL;
  	struct net *net = sock_net(sk);
  	u32 node, port, exclude;
- 	u32 type, inst, domain;
  	struct list_head dsts;
+ 	u32 type, inst, scope;
  	int lookups = 0;
  	int dstcnt, rc;
  	bool cong;
  
  	INIT_LIST_HEAD(&dsts);
  
- 	type = dest->addr.name.name.type;
+ 	type = msg_nametype(hdr);
  	inst = dest->addr.name.name.instance;
- 	domain = addr_domain(net, dest->scope);
+ 	scope = msg_lookup_scope(hdr);
  	exclude = tipc_group_exclude(grp);
  
  	while (++lookups < 4) {
@@@ -950,7 -951,7 +951,7 @@@
  
  		/* Look for a non-congested destination member, if any */
  		while (1) {
- 			if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts,
+ 			if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
  						 &dstcnt, exclude, false))
  				return -EHOSTUNREACH;
  			tipc_dest_pop(&dsts, &node, &port);
@@@ -1079,22 -1080,23 +1080,23 @@@ static int tipc_send_group_mcast(struc
  {
  	struct sock *sk = sock->sk;
  	DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
- 	struct tipc_name_seq *seq = &dest->addr.nameseq;
  	struct tipc_sock *tsk = tipc_sk(sk);
  	struct tipc_group *grp = tsk->group;
+ 	struct tipc_msg *hdr = &tsk->phdr;
  	struct net *net = sock_net(sk);
- 	u32 domain, exclude, dstcnt;
+ 	u32 type, inst, scope, exclude;
  	struct list_head dsts;
+ 	u32 dstcnt;
  
  	INIT_LIST_HEAD(&dsts);
  
- 	if (seq->lower != seq->upper)
- 		return -ENOTSUPP;
- 
- 	domain = addr_domain(net, dest->scope);
+ 	type = msg_nametype(hdr);
+ 	inst = dest->addr.name.name.instance;
+ 	scope = msg_lookup_scope(hdr);
  	exclude = tipc_group_exclude(grp);
- 	if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain,
- 				 &dsts, &dstcnt, exclude, true))
+ 
+ 	if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts,
+ 				 &dstcnt, exclude, true))
  		return -EHOSTUNREACH;
  
  	if (dstcnt == 1) {
@@@ -1116,24 -1118,29 +1118,29 @@@
  void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
  		       struct sk_buff_head *inputq)
  {
- 	u32 scope = TIPC_CLUSTER_SCOPE;
  	u32 self = tipc_own_addr(net);
+ 	u32 type, lower, upper, scope;
  	struct sk_buff *skb, *_skb;
- 	u32 lower = 0, upper = ~0;
- 	struct sk_buff_head tmpq;
  	u32 portid, oport, onode;
+ 	struct sk_buff_head tmpq;
  	struct list_head dports;
- 	struct tipc_msg *msg;
- 	int user, mtyp, hsz;
+ 	struct tipc_msg *hdr;
+ 	int user, mtyp, hlen;
+ 	bool exact;
  
  	__skb_queue_head_init(&tmpq);
  	INIT_LIST_HEAD(&dports);
  
  	skb = tipc_skb_peek(arrvq, &inputq->lock);
  	for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) {
- 		msg = buf_msg(skb);
- 		user = msg_user(msg);
- 		mtyp = msg_type(msg);
+ 		hdr = buf_msg(skb);
+ 		user = msg_user(hdr);
+ 		mtyp = msg_type(hdr);
+ 		hlen = skb_headroom(skb) + msg_hdr_sz(hdr);
+ 		oport = msg_origport(hdr);
+ 		onode = msg_orignode(hdr);
+ 		type = msg_nametype(hdr);
+ 
  		if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) {
  			spin_lock_bh(&inputq->lock);
  			if (skb_peek(arrvq) == skb) {
@@@ -1144,21 -1151,31 +1151,31 @@@
  			spin_unlock_bh(&inputq->lock);
  			continue;
  		}
- 		hsz = skb_headroom(skb) + msg_hdr_sz(msg);
- 		oport = msg_origport(msg);
- 		onode = msg_orignode(msg);
- 		if (onode == self)
- 			scope = TIPC_NODE_SCOPE;
- 
- 		/* Create destination port list and message clones: */
- 		if (!msg_in_group(msg)) {
- 			lower = msg_namelower(msg);
- 			upper = msg_nameupper(msg);
+ 
+ 		/* Group messages require exact scope match */
+ 		if (msg_in_group(hdr)) {
+ 			lower = 0;
+ 			upper = ~0;
+ 			scope = msg_lookup_scope(hdr);
+ 			exact = true;
+ 		} else {
+ 			/* TIPC_NODE_SCOPE means "any scope" in this context */
+ 			if (onode == self)
+ 				scope = TIPC_NODE_SCOPE;
+ 			else
+ 				scope = TIPC_CLUSTER_SCOPE;
+ 			exact = false;
+ 			lower = msg_namelower(hdr);
+ 			upper = msg_nameupper(hdr);
  		}
- 		tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper,
- 					  scope, &dports);
+ 
+ 		/* Create destination port list: */
+ 		tipc_nametbl_mc_lookup(net, type, lower, upper,
+ 				       scope, exact, &dports);
+ 
+ 		/* Clone message per destination */
  		while (tipc_dest_pop(&dports, NULL, &portid)) {
- 			_skb = __pskb_copy(skb, hsz, GFP_ATOMIC);
+ 			_skb = __pskb_copy(skb, hlen, GFP_ATOMIC);
  			if (_skb) {
  				msg_set_destport(buf_msg(_skb), portid);
  				__skb_queue_tail(&tmpq, _skb);
@@@ -1933,8 -1950,7 +1950,7 @@@ static void tipc_sk_proto_rcv(struct so
  		break;
  	case TOP_SRV:
  		tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf,
- 				      skb, inputq, xmitq);
- 		skb = NULL;
+ 				      hdr, inputq, xmitq);
  		break;
  	default:
  		break;
@@@ -2640,9 -2656,7 +2656,7 @@@ void tipc_sk_reinit(struct net *net
  	rhashtable_walk_enter(&tn->sk_rht, &iter);
  
  	do {
- 		tsk = ERR_PTR(rhashtable_walk_start(&iter));
- 		if (IS_ERR(tsk))
- 			goto walk_stop;
+ 		rhashtable_walk_start(&iter);
  
  		while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
  			spin_lock_bh(&tsk->sk.sk_lock.slock);
@@@ -2651,7 -2665,7 +2665,7 @@@
  			msg_set_orignode(msg, tn->own_addr);
  			spin_unlock_bh(&tsk->sk.sk_lock.slock);
  		}
- walk_stop:
+ 
  		rhashtable_walk_stop(&iter);
  	} while (tsk == ERR_PTR(-EAGAIN));
  }
@@@ -2734,7 -2748,6 +2748,6 @@@ void tipc_sk_rht_destroy(struct net *ne
  static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq)
  {
  	struct net *net = sock_net(&tsk->sk);
- 	u32 domain = addr_domain(net, mreq->scope);
  	struct tipc_group *grp = tsk->group;
  	struct tipc_msg *hdr = &tsk->phdr;
  	struct tipc_name_seq seq;
@@@ -2742,6 -2755,8 +2755,8 @@@
  
  	if (mreq->type < TIPC_RESERVED_TYPES)
  		return -EACCES;
+ 	if (mreq->scope > TIPC_NODE_SCOPE)
+ 		return -EINVAL;
  	if (grp)
  		return -EACCES;
  	grp = tipc_group_create(net, tsk->portid, mreq);
@@@ -2754,16 -2769,16 +2769,16 @@@
  	seq.type = mreq->type;
  	seq.lower = mreq->instance;
  	seq.upper = seq.lower;
- 	tipc_nametbl_build_group(net, grp, mreq->type, domain);
+ 	tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope);
  	rc = tipc_sk_publish(tsk, mreq->scope, &seq);
  	if (rc) {
  		tipc_group_delete(net, grp);
  		tsk->group = NULL;
  	}
- 
- 	/* Eliminate any risk that a broadcast overtakes the sent JOIN */
+ 	/* Eliminate any risk that a broadcast overtakes sent JOINs */
  	tsk->mc_method.rcast = true;
  	tsk->mc_method.mandatory = true;
+ 	tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf);
  	return rc;
  }
  
diff --combined net/xfrm/xfrm_input.c
index 5b2409746ae0,26b10eb7a206..1472c0857975
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@@ -257,7 -257,6 +257,6 @@@ int xfrm_input(struct sk_buff *skb, in
  
  		if (xo && (xo->flags & CRYPTO_DONE)) {
  			crypto_done = true;
- 			x = xfrm_input_state(skb);
  			family = XFRM_SPI_SKB_CB(skb)->family;
  
  			if (!(xo->status & CRYPTO_SUCCESS)) {
@@@ -518,7 -517,7 +517,7 @@@ int xfrm_trans_queue(struct sk_buff *sk
  		return -ENOBUFS;
  
  	XFRM_TRANS_SKB_CB(skb)->finish = finish;
 -	skb_queue_tail(&trans->queue, skb);
 +	__skb_queue_tail(&trans->queue, skb);
  	tasklet_schedule(&trans->tasklet);
  	return 0;
  }
diff --combined net/xfrm/xfrm_policy.c
index bc5eae12fb09,d8a8129b9232..073a5f8948e5
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@@ -54,7 -54,7 +54,7 @@@ static struct xfrm_policy_afinfo const 
  static struct kmem_cache *xfrm_dst_cache __read_mostly;
  static __read_mostly seqcount_t xfrm_policy_hash_generation;
  
- static void xfrm_init_pmtu(struct dst_entry *dst);
+ static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr);
  static int stale_bundle(struct dst_entry *dst);
  static int xfrm_bundle_ok(struct xfrm_dst *xdst);
  static void xfrm_policy_queue_process(struct timer_list *t);
@@@ -609,8 -609,7 +609,8 @@@ static void xfrm_hash_rebuild(struct wo
  
  	/* re-insert all policies by order of creation */
  	list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
 -		if (xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
 +		if (policy->walk.dead ||
 +		    xfrm_policy_id2dir(policy->index) >= XFRM_POLICY_MAX) {
  			/* skip socket policies */
  			continue;
  		}
@@@ -975,6 -974,8 +975,6 @@@ int xfrm_policy_flush(struct net *net, 
  	}
  	if (!cnt)
  		err = -ESRCH;
 -	else
 -		xfrm_policy_cache_flush();
  out:
  	spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
  	return err;
@@@ -1256,7 -1257,7 +1256,7 @@@ EXPORT_SYMBOL(xfrm_policy_delete)
  
  int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
  {
- 	struct net *net = xp_net(pol);
+ 	struct net *net = sock_net(sk);
  	struct xfrm_policy *old_pol;
  
  #ifdef CONFIG_XFRM_SUB_POLICY
@@@ -1543,7 -1544,9 +1543,9 @@@ static inline int xfrm_fill_dst(struct 
   */
  
  static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
- 					    struct xfrm_state **xfrm, int nx,
+ 					    struct xfrm_state **xfrm,
+ 					    struct xfrm_dst **bundle,
+ 					    int nx,
  					    const struct flowi *fl,
  					    struct dst_entry *dst)
  {
@@@ -1551,8 -1554,8 +1553,8 @@@
  	unsigned long now = jiffies;
  	struct net_device *dev;
  	struct xfrm_mode *inner_mode;
- 	struct dst_entry *dst_prev = NULL;
- 	struct dst_entry *dst0 = NULL;
+ 	struct xfrm_dst *xdst_prev = NULL;
+ 	struct xfrm_dst *xdst0 = NULL;
  	int i = 0;
  	int err;
  	int header_len = 0;
@@@ -1578,13 -1581,14 +1580,14 @@@
  			goto put_states;
  		}
  
- 		if (!dst_prev)
- 			dst0 = dst1;
+ 		bundle[i] = xdst;
+ 		if (!xdst_prev)
+ 			xdst0 = xdst;
  		else
  			/* Ref count is taken during xfrm_alloc_dst()
  			 * No need to do dst_clone() on dst1
  			 */
- 			dst_prev->child = dst1;
+ 			xfrm_dst_set_child(xdst_prev, &xdst->u.dst);
  
  		if (xfrm[i]->sel.family == AF_UNSPEC) {
  			inner_mode = xfrm_ip2inner_mode(xfrm[i],
@@@ -1621,8 -1625,7 +1624,7 @@@
  		dst1->input = dst_discard;
  		dst1->output = inner_mode->afinfo->output;
  
- 		dst1->next = dst_prev;
- 		dst_prev = dst1;
+ 		xdst_prev = xdst;
  
  		header_len += xfrm[i]->props.header_len;
  		if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
@@@ -1630,40 -1633,39 +1632,39 @@@
  		trailer_len += xfrm[i]->props.trailer_len;
  	}
  
- 	dst_prev->child = dst;
- 	dst0->path = dst;
+ 	xfrm_dst_set_child(xdst_prev, dst);
+ 	xdst0->path = dst;
  
  	err = -ENODEV;
  	dev = dst->dev;
  	if (!dev)
  		goto free_dst;
  
- 	xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
- 	xfrm_init_pmtu(dst_prev);
- 
- 	for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
- 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
+ 	xfrm_init_path(xdst0, dst, nfheader_len);
+ 	xfrm_init_pmtu(bundle, nx);
  
- 		err = xfrm_fill_dst(xdst, dev, fl);
+ 	for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst;
+ 	     xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) {
+ 		err = xfrm_fill_dst(xdst_prev, dev, fl);
  		if (err)
  			goto free_dst;
  
- 		dst_prev->header_len = header_len;
- 		dst_prev->trailer_len = trailer_len;
- 		header_len -= xdst->u.dst.xfrm->props.header_len;
- 		trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
+ 		xdst_prev->u.dst.header_len = header_len;
+ 		xdst_prev->u.dst.trailer_len = trailer_len;
+ 		header_len -= xdst_prev->u.dst.xfrm->props.header_len;
+ 		trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
  	}
  
  out:
- 	return dst0;
+ 	return &xdst0->u.dst;
  
  put_states:
  	for (; i < nx; i++)
  		xfrm_state_put(xfrm[i]);
  free_dst:
- 	if (dst0)
- 		dst_release_immediate(dst0);
- 	dst0 = ERR_PTR(err);
+ 	if (xdst0)
+ 		dst_release_immediate(&xdst0->u.dst);
+ 	xdst0 = ERR_PTR(err);
  	goto out;
  }
  
@@@ -1742,8 -1744,6 +1743,8 @@@ void xfrm_policy_cache_flush(void
  	bool found = 0;
  	int cpu;
  
 +	might_sleep();
 +
  	local_bh_disable();
  	rcu_read_lock();
  	for_each_possible_cpu(cpu) {
@@@ -1807,7 -1807,7 +1808,7 @@@ static bool xfrm_xdst_can_reuse(struct 
  	for (i = 0; i < num; i++) {
  		if (!dst || dst->xfrm != xfrm[i])
  			return false;
- 		dst = dst->child;
+ 		dst = xfrm_dst_child(dst);
  	}
  
  	return xfrm_bundle_ok(xdst);
@@@ -1820,6 -1820,7 +1821,7 @@@ xfrm_resolve_and_create_bundle(struct x
  {
  	struct net *net = xp_net(pols[0]);
  	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+ 	struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
  	struct xfrm_dst *xdst, *old;
  	struct dst_entry *dst;
  	int err;
@@@ -1848,7 -1849,7 +1850,7 @@@
  
  	old = xdst;
  
- 	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
+ 	dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig);
  	if (IS_ERR(dst)) {
  		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
  		return ERR_CAST(dst);
@@@ -1888,8 -1889,8 +1890,8 @@@ static void xfrm_policy_queue_process(s
  	xfrm_decode_session(skb, &fl, dst->ops->family);
  	spin_unlock(&pq->hold_queue.lock);
  
- 	dst_hold(dst->path);
- 	dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
+ 	dst_hold(xfrm_dst_path(dst));
+ 	dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, 0);
  	if (IS_ERR(dst))
  		goto purge_queue;
  
@@@ -1918,8 -1919,8 +1920,8 @@@
  		skb = __skb_dequeue(&list);
  
  		xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
- 		dst_hold(skb_dst(skb)->path);
- 		dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
+ 		dst_hold(xfrm_dst_path(skb_dst(skb)));
+ 		dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0);
  		if (IS_ERR(dst)) {
  			kfree_skb(skb);
  			continue;
@@@ -2020,8 -2021,8 +2022,8 @@@ static struct xfrm_dst *xfrm_create_dum
  	dst1->output = xdst_queue_output;
  
  	dst_hold(dst);
- 	dst1->child = dst;
- 	dst1->path = dst;
+ 	xfrm_dst_set_child(xdst, dst);
+ 	xdst->path = dst;
  
  	xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
  
@@@ -2584,7 -2585,7 +2586,7 @@@ static int stale_bundle(struct dst_entr
  
  void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
  {
- 	while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
+ 	while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) {
  		dst->dev = dev_net(dev)->loopback_dev;
  		dev_hold(dst->dev);
  		dev_put(dev);
@@@ -2608,13 -2609,15 +2610,15 @@@ static struct dst_entry *xfrm_negative_
  	return dst;
  }
  
- static void xfrm_init_pmtu(struct dst_entry *dst)
+ static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr)
  {
- 	do {
- 		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+ 	while (nr--) {
+ 		struct xfrm_dst *xdst = bundle[nr];
  		u32 pmtu, route_mtu_cached;
+ 		struct dst_entry *dst;
  
- 		pmtu = dst_mtu(dst->child);
+ 		dst = &xdst->u.dst;
+ 		pmtu = dst_mtu(xfrm_dst_child(dst));
  		xdst->child_mtu_cached = pmtu;
  
  		pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
@@@ -2626,7 -2629,7 +2630,7 @@@
  			pmtu = route_mtu_cached;
  
  		dst_metric_set(dst, RTAX_MTU, pmtu);
- 	} while ((dst = dst->next));
+ 	}
  }
  
  /* Check that the bundle accepts the flow and its components are
@@@ -2635,19 -2638,20 +2639,20 @@@
  
  static int xfrm_bundle_ok(struct xfrm_dst *first)
  {
+ 	struct xfrm_dst *bundle[XFRM_MAX_DEPTH];
  	struct dst_entry *dst = &first->u.dst;
- 	struct xfrm_dst *last;
+ 	struct xfrm_dst *xdst;
+ 	int start_from, nr;
  	u32 mtu;
  
- 	if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
+ 	if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) ||
  	    (dst->dev && !netif_running(dst->dev)))
  		return 0;
  
  	if (dst->flags & DST_XFRM_QUEUE)
  		return 1;
  
- 	last = NULL;
- 
+ 	start_from = nr = 0;
  	do {
  		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
  
@@@ -2659,9 -2663,11 +2664,11 @@@
  		    xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
  			return 0;
  
- 		mtu = dst_mtu(dst->child);
+ 		bundle[nr++] = xdst;
+ 
+ 		mtu = dst_mtu(xfrm_dst_child(dst));
  		if (xdst->child_mtu_cached != mtu) {
- 			last = xdst;
+ 			start_from = nr;
  			xdst->child_mtu_cached = mtu;
  		}
  
@@@ -2669,30 -2675,30 +2676,30 @@@
  			return 0;
  		mtu = dst_mtu(xdst->route);
  		if (xdst->route_mtu_cached != mtu) {
- 			last = xdst;
+ 			start_from = nr;
  			xdst->route_mtu_cached = mtu;
  		}
  
- 		dst = dst->child;
+ 		dst = xfrm_dst_child(dst);
  	} while (dst->xfrm);
  
- 	if (likely(!last))
+ 	if (likely(!start_from))
  		return 1;
  
- 	mtu = last->child_mtu_cached;
- 	for (;;) {
- 		dst = &last->u.dst;
+ 	xdst = bundle[start_from - 1];
+ 	mtu = xdst->child_mtu_cached;
+ 	while (start_from--) {
+ 		dst = &xdst->u.dst;
  
  		mtu = xfrm_state_mtu(dst->xfrm, mtu);
- 		if (mtu > last->route_mtu_cached)
- 			mtu = last->route_mtu_cached;
+ 		if (mtu > xdst->route_mtu_cached)
+ 			mtu = xdst->route_mtu_cached;
  		dst_metric_set(dst, RTAX_MTU, mtu);
- 
- 		if (last == first)
+ 		if (!start_from)
  			break;
  
- 		last = (struct xfrm_dst *)last->u.dst.next;
- 		last->child_mtu_cached = mtu;
+ 		xdst = bundle[start_from - 1];
+ 		xdst->child_mtu_cached = mtu;
  	}
  
  	return 1;
@@@ -2700,22 -2706,20 +2707,20 @@@
  
  static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
  {
- 	return dst_metric_advmss(dst->path);
+ 	return dst_metric_advmss(xfrm_dst_path(dst));
  }
  
  static unsigned int xfrm_mtu(const struct dst_entry *dst)
  {
  	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
  
- 	return mtu ? : dst_mtu(dst->path);
+ 	return mtu ? : dst_mtu(xfrm_dst_path(dst));
  }
  
  static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst,
  					const void *daddr)
  {
- 	const struct dst_entry *path = dst->path;
- 
- 	for (; dst != path; dst = dst->child) {
+ 	while (dst->xfrm) {
  		const struct xfrm_state *xfrm = dst->xfrm;
  
  		if (xfrm->props.mode == XFRM_MODE_TRANSPORT)
@@@ -2724,6 -2728,8 +2729,8 @@@
  			daddr = xfrm->coaddr;
  		else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR))
  			daddr = &xfrm->id.daddr;
+ 
+ 		dst = xfrm_dst_child(dst);
  	}
  	return daddr;
  }
@@@ -2732,7 -2738,7 +2739,7 @@@ static struct neighbour *xfrm_neigh_loo
  					   struct sk_buff *skb,
  					   const void *daddr)
  {
- 	const struct dst_entry *path = dst->path;
+ 	const struct dst_entry *path = xfrm_dst_path(dst);
  
  	if (!skb)
  		daddr = xfrm_get_dst_nexthop(dst, daddr);
@@@ -2741,7 -2747,7 +2748,7 @@@
  
  static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr)
  {
- 	const struct dst_entry *path = dst->path;
+ 	const struct dst_entry *path = xfrm_dst_path(dst);
  
  	daddr = xfrm_get_dst_nexthop(dst, daddr);
  	path->ops->confirm_neigh(path, daddr);
diff --combined net/xfrm/xfrm_state.c
index 429957412633,cc4c519cad76..20b1e414dbee
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@@ -313,14 -313,13 +313,14 @@@ retry
  	if ((type && !try_module_get(type->owner)))
  		type = NULL;
  
 +	rcu_read_unlock();
 +
  	if (!type && try_load) {
  		request_module("xfrm-offload-%d-%d", family, proto);
  		try_load = 0;
  		goto retry;
  	}
  
 -	rcu_read_unlock();
  	return type;
  }
  
@@@ -1535,12 -1534,8 +1535,12 @@@ out
  	err = -EINVAL;
  	spin_lock_bh(&x1->lock);
  	if (likely(x1->km.state == XFRM_STATE_VALID)) {
 -		if (x->encap && x1->encap)
 +		if (x->encap && x1->encap &&
 +		    x->encap->encap_type == x1->encap->encap_type)
  			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
 +		else if (x->encap || x1->encap)
 +			goto fail;
 +
  		if (x->coaddr && x1->coaddr) {
  			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
  		}
@@@ -1557,8 -1552,6 +1557,8 @@@
  		x->km.state = XFRM_STATE_DEAD;
  		__xfrm_state_put(x);
  	}
 +
 +fail:
  	spin_unlock_bh(&x1->lock);
  
  	xfrm_state_put(x1);
@@@ -2056,6 -2049,13 +2056,13 @@@ int xfrm_user_policy(struct sock *sk, i
  	struct xfrm_mgr *km;
  	struct xfrm_policy *pol = NULL;
  
+ 	if (!optval && !optlen) {
+ 		xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
+ 		xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
+ 		__sk_dst_reset(sk);
+ 		return 0;
+ 	}
+ 
  	if (optlen <= 0 || optlen > PAGE_SIZE)
  		return -EMSGSIZE;
  
diff --combined tools/testing/selftests/bpf/test_align.c
index 471bbbdb94db,fe916d29e166..e19b410125eb
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@@ -64,11 -64,11 +64,11 @@@ static struct bpf_align_test tests[] = 
  		.matches = {
  			{1, "R1=ctx(id=0,off=0,imm=0)"},
  			{1, "R10=fp0"},
- 			{1, "R3=inv2"},
- 			{2, "R3=inv4"},
- 			{3, "R3=inv8"},
- 			{4, "R3=inv16"},
- 			{5, "R3=inv32"},
+ 			{1, "R3_w=inv2"},
+ 			{2, "R3_w=inv4"},
+ 			{3, "R3_w=inv8"},
+ 			{4, "R3_w=inv16"},
+ 			{5, "R3_w=inv32"},
  		},
  	},
  	{
@@@ -92,17 -92,17 +92,17 @@@
  		.matches = {
  			{1, "R1=ctx(id=0,off=0,imm=0)"},
  			{1, "R10=fp0"},
- 			{1, "R3=inv1"},
- 			{2, "R3=inv2"},
- 			{3, "R3=inv4"},
- 			{4, "R3=inv8"},
- 			{5, "R3=inv16"},
- 			{6, "R3=inv1"},
- 			{7, "R4=inv32"},
- 			{8, "R4=inv16"},
- 			{9, "R4=inv8"},
- 			{10, "R4=inv4"},
- 			{11, "R4=inv2"},
+ 			{1, "R3_w=inv1"},
+ 			{2, "R3_w=inv2"},
+ 			{3, "R3_w=inv4"},
+ 			{4, "R3_w=inv8"},
+ 			{5, "R3_w=inv16"},
+ 			{6, "R3_w=inv1"},
+ 			{7, "R4_w=inv32"},
+ 			{8, "R4_w=inv16"},
+ 			{9, "R4_w=inv8"},
+ 			{10, "R4_w=inv4"},
+ 			{11, "R4_w=inv2"},
  		},
  	},
  	{
@@@ -121,12 -121,12 +121,12 @@@
  		.matches = {
  			{1, "R1=ctx(id=0,off=0,imm=0)"},
  			{1, "R10=fp0"},
- 			{1, "R3=inv4"},
- 			{2, "R3=inv8"},
- 			{3, "R3=inv10"},
- 			{4, "R4=inv8"},
- 			{5, "R4=inv12"},
- 			{6, "R4=inv14"},
+ 			{1, "R3_w=inv4"},
+ 			{2, "R3_w=inv8"},
+ 			{3, "R3_w=inv10"},
+ 			{4, "R4_w=inv8"},
+ 			{5, "R4_w=inv12"},
+ 			{6, "R4_w=inv14"},
  		},
  	},
  	{
@@@ -143,10 -143,10 +143,10 @@@
  		.matches = {
  			{1, "R1=ctx(id=0,off=0,imm=0)"},
  			{1, "R10=fp0"},
- 			{1, "R3=inv7"},
- 			{2, "R3=inv7"},
- 			{3, "R3=inv14"},
- 			{4, "R3=inv56"},
+ 			{1, "R3_w=inv7"},
+ 			{2, "R3_w=inv7"},
+ 			{3, "R3_w=inv14"},
+ 			{4, "R3_w=inv56"},
  		},
  	},
  
@@@ -185,18 -185,18 +185,18 @@@
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  		.matches = {
  			{7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
- 			{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- 			{9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- 			{10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- 			{11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ 			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ 			{9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ 			{11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
  			{18, "R3=pkt_end(id=0,off=0,imm=0)"},
- 			{18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
- 			{20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
- 			{21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- 			{22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- 			{23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ 			{18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+ 			{20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ 			{21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ 			{22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
  		},
  	},
  	{
@@@ -217,16 -217,16 +217,16 @@@
  		},
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  		.matches = {
- 			{7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- 			{12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- 			{14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- 			{16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ 			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ 			{12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ 			{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
  		},
  	},
  	{
@@@ -257,14 -257,14 +257,14 @@@
  		},
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  		.matches = {
- 			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
- 			{5, "R5=pkt(id=0,off=14,r=0,imm=0)"},
- 			{6, "R4=pkt(id=0,off=14,r=0,imm=0)"},
+ 			{4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
+ 			{5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
+ 			{6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
  			{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
  			{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
- 			{10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- 			{14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
- 			{15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ 			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ 			{14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ 			{15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
  		},
  	},
  	{
@@@ -320,11 -320,11 +320,11 @@@
  			 * alignment of 4.
  			 */
  			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
- 			{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* Offset is added to packet pointer R5, resulting in
  			 * known fixed offset, and variable offset from R6.
  			 */
- 			{11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* At the time the word size load is performed from R5,
  			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
  			 * reg->aux_off (14) which is 16.  Then the variable
@@@ -336,11 -336,11 +336,11 @@@
  			/* Variable offset is added to R5 packet pointer,
  			 * resulting in auxiliary alignment of 4.
  			 */
- 			{18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* Constant offset is added to R5, resulting in
  			 * reg->off of 14.
  			 */
- 			{19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* At the time the word size load is performed from R5,
  			 * its total fixed offset is NET_IP_ALIGN + reg->off
  			 * (14) which is 16.  Then the variable offset is 4-byte
@@@ -352,18 -352,18 +352,18 @@@
  			/* Constant offset is added to R5 packet pointer,
  			 * resulting in reg->off value of 14.
  			 */
- 			{26, "R5=pkt(id=0,off=14,r=8"},
+ 			{26, "R5_w=pkt(id=0,off=14,r=8"},
  			/* Variable offset is added to R5, resulting in a
  			 * variable offset of (4n).
  			 */
- 			{27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* Constant is added to R5 again, setting reg->off to 18. */
- 			{28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* And once more we add a variable; resulting var_off
  			 * is still (4n), fixed offset is not changed.
  			 * Also, we create a new reg->id.
  			 */
- 			{29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ 			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
  			/* At the time the word size load is performed from R5,
  			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
  			 * which is 20.  Then the variable offset is (4n), so
@@@ -410,11 -410,11 +410,11 @@@
  			 * alignment of 4.
  			 */
  			{8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
- 			{8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* Adding 14 makes R6 be (4n+2) */
- 			{9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ 			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
  			/* Packet pointer has (4n+2) offset */
- 			{11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ 			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
  			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
  			/* At the time the word size load is performed from R5,
  			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@@ -426,11 -426,11 +426,11 @@@
  			/* Newly read value in R6 was shifted left by 2, so has
  			 * known alignment of 4.
  			 */
- 			{18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* Added (4n) to packet pointer's (4n+2) var_off, giving
  			 * another (4n+2).
  			 */
- 			{19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ 			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
  			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
  			/* At the time the word size load is performed from R5,
  			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@@ -473,8 -473,28 +473,8 @@@
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  		.result = REJECT,
  		.matches = {
- 			{4, "R5=pkt(id=0,off=0,r=0,imm=0)"},
+ 			{4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
 -			/* ptr & 0x40 == either 0 or 0x40 */
 -			{5, "R5_w=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"},
 -			/* ptr << 2 == unknown, (4n) */
 -			{7, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
 -			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 -			 * the add could overflow.
 -			 */
 -			{8, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
 -			/* Checked s>=0 */
 -			{10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 -			/* packet pointer + nonnegative (4n+2) */
 -			{12, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 -			{14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 -			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 -			 * We checked the bounds, but it might have been able
 -			 * to overflow if the packet pointer started in the
 -			 * upper half of the address space.
 -			 * So we did not get a 'range' on R6, and the access
 -			 * attempt will fail.
 -			 */
 -			{16, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
 +			/* R5 bitwise operator &= on pointer prohibited */
  		}
  	},
  	{
@@@ -510,11 -530,11 +510,11 @@@
  			 * alignment of 4.
  			 */
  			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
- 			{9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* Adding 14 makes R6 be (4n+2) */
- 			{10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ 			{10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
  			/* New unknown value in R7 is (4n) */
- 			{11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ 			{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
  			/* Subtracting it from R6 blows our unsigned bounds */
  			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
  			/* Checked s>= 0 */
@@@ -563,15 -583,15 +563,15 @@@
  			 * alignment of 4.
  			 */
  			{7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
- 			{10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+ 			{10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
  			/* Adding 14 makes R6 be (4n+2) */
- 			{11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+ 			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
  			/* Subtracting from packet pointer overflows ubounds */
- 			{13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+ 			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
  			/* New unknown value in R7 is (4n), >= 76 */
- 			{15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+ 			{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
  			/* Adding it to packet pointer gives nice bounds again */
- 			{16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ 			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
  			/* At the time the word size load is performed from R5,
  			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
  			 * which is 2.  Then the variable offset is (4n+2), so

-- 
LinuxNextTracking


More information about the linux-merge mailing list