[linux-next] LinuxNextTracking branch, master, updated. next-20160923

batman at open-mesh.org batman at open-mesh.org
Sat Sep 24 00:15:50 CEST 2016


The following commit has been merged in the master branch:
commit 2b4d58ea31a161221c5fe57a22d30c88847a3cad
Merge: 9ebb580225aea0027850bdafb7bda9f15cacbdcc cdd0766d7da19085e88df86d1e5e21d9fe3d374f
Author: Stephen Rothwell <sfr at canb.auug.org.au>
Date:   Fri Sep 23 10:57:04 2016 +1000

    Merge remote-tracking branch 'net-next/master'

diff --combined MAINTAINERS
index 153545d,ce80b36..a1cee6e
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -636,6 -636,15 +636,15 @@@ F:	drivers/tty/serial/altera_jtaguart.
  F:	include/linux/altera_uart.h
  F:	include/linux/altera_jtaguart.h
  
+ AMAZON ETHERNET DRIVERS
+ M:	Netanel Belgazal <netanel at annapurnalabs.com>
+ R:	Saeed Bishara <saeed at annapurnalabs.com>
+ R:	Zorik Machulsky <zorik at annapurnalabs.com>
+ L:	netdev at vger.kernel.org
+ S:	Supported
+ F:	Documentation/networking/ena.txt
+ F:	drivers/net/ethernet/amazon/
+ 
  AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER
  M:	Tom Lendacky <thomas.lendacky at amd.com>
  M:	Gary Hook <gary.hook at amd.com>
@@@ -857,13 -866,6 +866,13 @@@ F:	drivers/net/phy/mdio-xgene.
  F:	Documentation/devicetree/bindings/net/apm-xgene-enet.txt
  F:	Documentation/devicetree/bindings/net/apm-xgene-mdio.txt
  
 +APPLIED MICRO (APM) X-GENE SOC PMU
 +M:	Tai Nguyen <ttnguyen at apm.com>
 +S:	Supported
 +F:	drivers/perf/xgene_pmu.c
 +F:	Documentation/perf/xgene-pmu.txt
 +F:	Documentation/devicetree/bindings/perf/apm-xgene-pmu.txt
 +
  APTINA CAMERA SENSOR PLL
  M:	Laurent Pinchart <Laurent.pinchart at ideasonboard.com>
  L:	linux-media at vger.kernel.org
@@@ -920,17 -922,15 +929,17 @@@ F:	arch/arm/include/asm/floppy.
  
  ARM PMU PROFILING AND DEBUGGING
  M:	Will Deacon <will.deacon at arm.com>
 -R:	Mark Rutland <mark.rutland at arm.com>
 +M:	Mark Rutland <mark.rutland at arm.com>
  S:	Maintained
 +L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
  F:	arch/arm*/kernel/perf_*
  F:	arch/arm/oprofile/common.c
  F:	arch/arm*/kernel/hw_breakpoint.c
  F:	arch/arm*/include/asm/hw_breakpoint.h
  F:	arch/arm*/include/asm/perf_event.h
 -F:	drivers/perf/arm_pmu.c
 +F:	drivers/perf/*
  F:	include/linux/perf/arm_pmu.h
 +F:	Documentation/devicetree/bindings/arm/pmu.txt
  
  ARM PORT
  M:	Russell King <linux at armlinux.org.uk>
@@@ -1001,7 -1001,6 +1010,7 @@@ M:	Chen-Yu Tsai <wens at csie.org
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  N:	sun[x456789]i
 +F:	arch/arm/boot/dts/ntc-gr8*
  
  ARM/Allwinner SoC Clock Support
  M:	Emilio López <emilio at elopez.com.ar>
@@@ -1453,7 -1452,6 +1462,7 @@@ F:	arch/arm/mach-orion5x/ts78xx-
  ARM/OXNAS platform support
  M:	Neil Armstrong <narmstrong at baylibre.com>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
 +L:	linux-oxnas at lists.tuxfamily.org (moderated for non-subscribers)
  S:	Maintained
  F:	arch/arm/mach-oxnas/
  F:	arch/arm/boot/dts/oxnas*
@@@ -1636,7 -1634,6 +1645,7 @@@ N:	rockchi
  ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
  M:	Kukjin Kim <kgene at kernel.org>
  M:	Krzysztof Kozlowski <krzk at kernel.org>
 +R:	Javier Martinez Canillas <javier at osg.samsung.com>
  L:	linux-arm-kernel at lists.infradead.org (moderated for non-subscribers)
  L:	linux-samsung-soc at vger.kernel.org (moderated for non-subscribers)
  S:	Maintained
@@@ -1688,6 -1685,14 +1697,6 @@@ S:	Maintaine
  F:	arch/arm/plat-samsung/s5p-dev-mfc.c
  F:	drivers/media/platform/s5p-mfc/
  
 -ARM/SAMSUNG S5P SERIES TV SUBSYSTEM SUPPORT
 -M:	Kyungmin Park <kyungmin.park at samsung.com>
 -M:	Tomasz Stanislawski <t.stanislaws at samsung.com>
 -L:	linux-arm-kernel at lists.infradead.org
 -L:	linux-media at vger.kernel.org
 -S:	Maintained
 -F:	drivers/media/platform/s5p-tv/
 -
  ARM/SAMSUNG S5P SERIES HDMI CEC SUBSYSTEM SUPPORT
  M:	Kyungmin Park <kyungmin.park at samsung.com>
  L:	linux-arm-kernel at lists.infradead.org
@@@ -1845,7 -1850,6 +1854,7 @@@ F:	arch/arm64/boot/dts/socionext
  F:	drivers/bus/uniphier-system-bus.c
  F:	drivers/i2c/busses/i2c-uniphier*
  F:	drivers/pinctrl/uniphier/
 +F:	drivers/reset/reset-uniphier.c
  F:	drivers/tty/serial/8250/8250_uniphier.c
  N:	uniphier
  
@@@ -2227,9 -2231,9 +2236,9 @@@ S:	Maintaine
  F:	drivers/net/wireless/atmel/atmel*
  
  ATMEL MAXTOUCH DRIVER
 -M:	Nick Dyer <nick.dyer at itdev.co.uk>
 -T:	git git://github.com/atmel-maxtouch/linux.git
 -S:	Supported
 +M:	Nick Dyer <nick at shmanahar.org>
 +T:	git git://github.com/ndyer/linux.git
 +S:	Maintained
  F:	Documentation/devicetree/bindings/input/atmel,maxtouch.txt
  F:	drivers/input/touchscreen/atmel_mxt_ts.c
  F:	include/linux/platform_data/atmel_mxt_ts.h
@@@ -2505,7 -2509,7 +2514,7 @@@ S:	Supporte
  F:	kernel/bpf/
  
  BROADCOM B44 10/100 ETHERNET DRIVER
 -M:	Gary Zambrano <zambrano at broadcom.com>
 +M:	Michael Chan <michael.chan at broadcom.com>
  L:	netdev at vger.kernel.org
  S:	Supported
  F:	drivers/net/ethernet/broadcom/b44.*
@@@ -2580,13 -2584,6 +2589,13 @@@ F:	arch/arm/mach-bcm/bcm_5301x.
  F:	arch/arm/boot/dts/bcm5301x*.dtsi
  F:	arch/arm/boot/dts/bcm470*
  
 +BROADCOM BCM53573 ARM ARCHITECTURE
 +M:	Rafał Miłecki <rafal at milecki.pl>
 +L:	linux-arm-kernel at lists.infradead.org
 +S:	Maintained
 +F:	arch/arm/boot/dts/bcm53573*
 +F:	arch/arm/boot/dts/bcm47189*
 +
  BROADCOM BCM63XX ARM ARCHITECTURE
  M:	Florian Fainelli <f.fainelli at gmail.com>
  M:	bcm-kernel-feedback-list at broadcom.com
@@@ -2786,7 -2783,7 +2795,7 @@@ L:	linux-media at vger.kernel.or
  W:	https://linuxtv.org
  T:	git git://linuxtv.org/media_tree.git
  S:	Odd fixes
 -F:	Documentation/video4linux/bttv/
 +F:	Documentation/media/v4l-drivers/bttv*
  F:	drivers/media/pci/bt8xx/bttv*
  
  BUSLOGIC SCSI DRIVER
@@@ -2831,7 -2828,7 +2840,7 @@@ M:	Jonathan Corbet <corbet at lwn.net
  L:	linux-media at vger.kernel.org
  T:	git git://linuxtv.org/media_tree.git
  S:	Maintained
 -F:	Documentation/video4linux/cafe_ccic
 +F:	Documentation/media/v4l-drivers/cafe_ccic*
  F:	drivers/media/platform/marvell-ccic/
  
  CAIF NETWORK LAYER
@@@ -2900,14 -2897,6 +2909,14 @@@ S:	Maintaine
  F:	drivers/iio/light/cm*
  F:	Documentation/devicetree/bindings/i2c/trivial-devices.txt
  
 +CAVIUM I2C DRIVER
 +M:	Jan Glauber <jglauber at cavium.com>
 +M:	David Daney <david.daney at cavium.com>
 +W:	http://www.cavium.com
 +S:	Supported
 +F:	drivers/i2c/busses/i2c-octeon*
 +F:	drivers/i2c/busses/i2c-thunderx*
 +
  CAVIUM LIQUIDIO NETWORK DRIVER
  M:     Derek Chickles <derek.chickles at caviumnetworks.com>
  M:     Satanand Burla <satananda.burla at caviumnetworks.com>
@@@ -2933,7 -2922,7 +2942,7 @@@ T:	git git://linuxtv.org/media_tree.gi
  W:	http://linuxtv.org
  S:	Supported
  F:	Documentation/cec.txt
 -F:	Documentation/DocBook/media/v4l/cec*
 +F:	Documentation/media/uapi/cec
  F:	drivers/staging/media/cec/
  F:	drivers/media/cec-edid.c
  F:	drivers/media/rc/keymaps/rc-cec.c
@@@ -3155,7 -3144,7 +3164,7 @@@ L:	cocci at systeme.lip6.fr (moderated fo
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/mmarek/kbuild.git misc
  W:	http://coccinelle.lip6.fr/
  S:	Supported
 -F:	Documentation/coccinelle.txt
 +F:	Documentation/dev-tools/coccinelle.rst
  F:	scripts/coccinelle/
  F:	scripts/coccicheck
  
@@@ -3301,7 -3290,6 +3310,7 @@@ L:	linux-pm at vger.kernel.or
  S:	Maintained
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
  T:	git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
 +F:	Documentation/cpu-freq/
  F:	drivers/cpufreq/
  F:	include/linux/cpufreq.h
  
@@@ -3420,7 -3408,7 +3429,7 @@@ T:	git git://linuxtv.org/media_tree.gi
  W:	https://linuxtv.org
  W:	http://www.ivtvdriver.org/index.php/Cx18
  S:	Maintained
 -F:	Documentation/video4linux/cx18.txt
 +F:	Documentation/media/v4l-drivers/cx18*
  F:	drivers/media/pci/cx18/
  F:	include/uapi/linux/ivtv*
  
@@@ -3449,7 -3437,7 +3458,7 @@@ L:	linux-media at vger.kernel.or
  W:	https://linuxtv.org
  T:	git git://linuxtv.org/media_tree.git
  S:	Odd fixes
 -F:	Documentation/video4linux/cx88/
 +F:	Documentation/media/v4l-drivers/cx88*
  F:	drivers/media/pci/cx88/
  
  CXD2820R MEDIA DRIVER
@@@ -3513,14 -3501,14 +3522,14 @@@ F:	drivers/net/ethernet/chelsio/cxgb4vf
  
  CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER
  M:	Ian Munsie <imunsie at au1.ibm.com>
 -M:	Michael Neuling <mikey at neuling.org>
 +M:	Frederic Barrat <fbarrat at linux.vnet.ibm.com>
  L:	linuxppc-dev at lists.ozlabs.org
  S:	Supported
 +F:	arch/powerpc/platforms/powernv/pci-cxl.c
  F:	drivers/misc/cxl/
  F:	include/misc/cxl*
  F:	include/uapi/misc/cxl.h
  F:	Documentation/powerpc/cxl.txt
 -F:	Documentation/powerpc/cxl.txt
  F:	Documentation/ABI/testing/sysfs-class-cxl
  
  CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER
@@@ -3922,7 -3910,7 +3931,7 @@@ X:	Documentation/devicetree
  X:	Documentation/acpi
  X:	Documentation/power
  X:	Documentation/spi
 -X:	Documentation/DocBook/media
 +X:	Documentation/media
  T:	git git://git.lwn.net/linux.git docs-next
  
  DOUBLETALK DRIVER
@@@ -4623,7 -4611,6 +4632,7 @@@ W:	https://linuxtv.or
  T:	git git://linuxtv.org/media_tree.git
  S:	Maintained
  F:	drivers/media/usb/em28xx/
 +F:	Documentation/media/v4l-drivers/em28xx*
  
  EMBEDDED LINUX
  M:	Paul Gortmaker <paul.gortmaker at windriver.com>
@@@ -5092,9 -5079,10 +5101,9 @@@ F:	include/linux/fscrypto.
  
  F2FS FILE SYSTEM
  M:	Jaegeuk Kim <jaegeuk at kernel.org>
 -M:	Changman Lee <cm224.lee at samsung.com>
 -R:	Chao Yu <yuchao0 at huawei.com>
 +M:	Chao Yu <yuchao0 at huawei.com>
  L:	linux-f2fs-devel at lists.sourceforge.net
 -W:	http://en.wikipedia.org/wiki/F2FS
 +W:	https://f2fs.wiki.kernel.org/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
  S:	Maintained
  F:	Documentation/filesystems/f2fs.txt
@@@ -5156,7 -5144,7 +5165,7 @@@ GCOV BASED KERNEL PROFILIN
  M:	Peter Oberparleiter <oberpar at linux.vnet.ibm.com>
  S:	Maintained
  F:	kernel/gcov/
 -F:	Documentation/gcov.txt
 +F:	Documentation/dev-tools/gcov.rst
  
  GDT SCSI DISK ARRAY CONTROLLER DRIVER
  M:	Achim Leubner <achim_leubner at adaptec.com>
@@@ -5304,13 -5292,6 +5313,13 @@@ L:	netdev at vger.kernel.or
  S:	Maintained
  F:	drivers/net/ethernet/aeroflex/
  
 +GS1662 VIDEO SERIALIZER
 +M:	Charles-Antoine Couret <charles-antoine.couret at nexvision.fr>
 +L:	linux-media at vger.kernel.org
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Maintained
 +F:	drivers/media/spi/gs1662.c
 +
  GSPCA FINEPIX SUBDRIVER
  M:	Frank Zago <frank at zago.net>
  L:	linux-media at vger.kernel.org
@@@ -5651,7 -5632,7 +5660,7 @@@ M:	Sebastian Reichel <sre at kernel.org
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/sre/linux-hsi.git
  S:	Maintained
  F:	Documentation/ABI/testing/sysfs-bus-hsi
 -F:	Documentation/hsi.txt
 +F:	Documentation/device-drivers/serial-interfaces.rst
  F:	drivers/hsi/
  F:	include/linux/hsi/
  F:	include/uapi/linux/hsi/
@@@ -5679,14 -5660,6 +5688,14 @@@ M:	Nadia Yvette Chambers <nyc at holomorph
  S:	Maintained
  F:	fs/hugetlbfs/
  
 +HVA ST MEDIA DRIVER
 +M:	Jean-Christophe Trotin <jean-christophe.trotin at st.com>
 +L:	linux-media at vger.kernel.org
 +T:	git git://linuxtv.org/media_tree.git
 +W:	https://linuxtv.org
 +S:	Supported
 +F:	drivers/media/platform/sti/hva
 +
  Hyper-V CORE AND DRIVERS
  M:	"K. Y. Srinivasan" <kys at microsoft.com>
  M:	Haiyang Zhang <haiyangz at microsoft.com>
@@@ -5713,8 -5686,6 +5722,8 @@@ S:	Maintaine
  F:	Documentation/i2c/i2c-topology
  F:	Documentation/i2c/muxes/
  F:	Documentation/devicetree/bindings/i2c/i2c-mux*
 +F:	Documentation/devicetree/bindings/i2c/i2c-arb*
 +F:	Documentation/devicetree/bindings/i2c/i2c-gate*
  F:	drivers/i2c/i2c-mux.c
  F:	drivers/i2c/muxes/
  F:	include/linux/i2c-mux.h
@@@ -6132,13 -6103,6 +6141,13 @@@ T:	git git://git.kernel.org/pub/scm/lin
  S:	Supported
  F:	drivers/idle/intel_idle.c
  
 +INTEL INTEGRATED SENSOR HUB DRIVER
 +M:	Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
 +M:	Jiri Kosina <jikos at kernel.org>
 +L:	linux-input at vger.kernel.org
 +S:	Maintained
 +F:	drivers/hid/intel-ish-hid/
 +
  INTEL PSTATE DRIVER
  M:	Srinivas Pandruvada <srinivas.pandruvada at linux.intel.com>
  M:	Len Brown <lenb at kernel.org>
@@@ -6147,7 -6111,7 +6156,7 @@@ S:	Supporte
  F:	drivers/cpufreq/intel_pstate.c
  
  INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
 -M:	Maik Broemme <mbroemme at plusserver.de>
 +M:	Maik Broemme <mbroemme at libmpq.org>
  L:	linux-fbdev at vger.kernel.org
  S:	Maintained
  F:	Documentation/fb/intelfb.txt
@@@ -6565,7 -6529,7 +6574,7 @@@ L:	linux-media at vger.kernel.or
  T:	git git://linuxtv.org/media_tree.git
  W:	http://www.ivtvdriver.org
  S:	Maintained
 -F:	Documentation/video4linux/*.ivtv
 +F:	Documentation/media/v4l-drivers/ivtv*
  F:	drivers/media/pci/ivtv/
  F:	include/uapi/linux/ivtv*
  
@@@ -6649,7 -6613,7 +6658,7 @@@ L:	kasan-dev at googlegroups.co
  S:	Maintained
  F:	arch/*/include/asm/kasan.h
  F:	arch/*/mm/kasan_init*
 -F:	Documentation/kasan.txt
 +F:	Documentation/dev-tools/kasan.rst
  F:	include/linux/kasan*.h
  F:	lib/test_kasan.c
  F:	mm/kasan/
@@@ -6865,7 -6829,7 +6874,7 @@@ KMEMCHEC
  M:	Vegard Nossum <vegardno at ifi.uio.no>
  M:	Pekka Enberg <penberg at kernel.org>
  S:	Maintained
 -F:	Documentation/kmemcheck.txt
 +F:	Documentation/dev-tools/kmemcheck.rst
  F:	arch/x86/include/asm/kmemcheck.h
  F:	arch/x86/mm/kmemcheck/
  F:	include/linux/kmemcheck.h
@@@ -6874,7 -6838,7 +6883,7 @@@ F:	mm/kmemcheck.
  KMEMLEAK
  M:	Catalin Marinas <catalin.marinas at arm.com>
  S:	Maintained
 -F:	Documentation/kmemleak.txt
 +F:	Documentation/dev-tools/kmemleak.rst
  F:	include/linux/kmemleak.h
  F:	mm/kmemleak.c
  F:	mm/kmemleak-test.c
@@@ -7570,15 -7534,6 +7579,15 @@@ F:	Documentation/devicetree/bindings/me
  F:	drivers/media/platform/rcar-fcp.c
  F:	include/media/rcar-fcp.h
  
 +MEDIA DRIVERS FOR RENESAS - VIN
 +M:	Niklas Söderlund <niklas.soderlund at ragnatech.se>
 +L:	linux-media at vger.kernel.org
 +L:	linux-renesas-soc at vger.kernel.org
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Supported
 +F:	Documentation/devicetree/bindings/media/rcar_vin.txt
 +F:	drivers/media/platform/rcar-vin/
 +
  MEDIA DRIVERS FOR RENESAS - VSP1
  M:	Laurent Pinchart <laurent.pinchart at ideasonboard.com>
  L:	linux-media at vger.kernel.org
@@@ -7656,7 -7611,9 +7665,7 @@@ W:	https://linuxtv.or
  Q:	http://patchwork.kernel.org/project/linux-media/list/
  T:	git git://linuxtv.org/media_tree.git
  S:	Maintained
 -F:	Documentation/dvb/
 -F:	Documentation/video4linux/
 -F:	Documentation/DocBook/media/
 +F:	Documentation/media/
  F:	drivers/media/
  F:	drivers/staging/media/
  F:	include/linux/platform_data/media/
@@@ -7804,14 -7761,6 +7813,14 @@@ T:	git git://git.monstr.eu/linux-2.6-mi
  S:	Supported
  F:	arch/microblaze/
  
 +MICROCHIP / ATMEL ISC DRIVER
 +M:	Songjun Wu <songjun.wu at microchip.com>
 +L:	linux-media at vger.kernel.org
 +S:	Supported
 +F:	drivers/media/platform/atmel/atmel-isc.c
 +F:	drivers/media/platform/atmel/atmel-isc-regs.h
 +F:	devicetree/bindings/media/atmel-isc.txt
 +
  MICROSOFT SURFACE PRO 3 BUTTON DRIVER
  M:	Chen Yu <yu.c.chen at intel.com>
  L:	platform-driver-x86 at vger.kernel.org
@@@ -7925,7 -7874,7 +7934,7 @@@ F:	kernel/module.
  MOTION EYE VAIO PICTUREBOOK CAMERA DRIVER
  W:	http://popies.net/meye/
  S:	Orphan
 -F:	Documentation/video4linux/meye.txt
 +F:	Documentation/media/v4l-drivers/meye*
  F:	drivers/media/pci/meye/
  F:	include/uapi/linux/meye.h
  
@@@ -8220,15 -8169,6 +8229,15 @@@ S:	Maintaine
  W:	https://fedorahosted.org/dropwatch/
  F:	net/core/drop_monitor.c
  
 +NETWORKING [DSA]
 +M:	Andrew Lunn <andrew at lunn.ch>
 +M:	Vivien Didelot <vivien.didelot at savoirfairelinux.com>
 +M:	Florian Fainelli <f.fainelli at gmail.com>
 +S:	Maintained
 +F:	net/dsa/
 +F:	include/net/dsa.h
 +F:	drivers/net/dsa/
 +
  NETWORKING [GENERAL]
  M:	"David S. Miller" <davem at davemloft.net>
  L:	netdev at vger.kernel.org
@@@ -9171,15 -9111,6 +9180,15 @@@ S:	Maintaine
  F:	Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
  F:	drivers/pci/host/pcie-hisi.c
  
 +PCIE DRIVER FOR ROCKCHIP
 +M:	Shawn Lin <shawn.lin at rock-chips.com>
 +M:	Wenrui Li <wenrui.li at rock-chips.com>
 +L:	linux-pci at vger.kernel.org
 +L:	linux-rockchip at lists.infradead.org
 +S:	Maintained
 +F:	Documentation/devicetree/bindings/pci/rockchip-pcie.txt
 +F:	drivers/pci/host/pcie-rockchip.c
 +
  PCIE DRIVER FOR QUALCOMM MSM
  M:     Stanimir Varbanov <svarbanov at mm-sol.com>
  L:     linux-pci at vger.kernel.org
@@@ -9333,8 -9264,6 +9342,8 @@@ L:	linux-arm-kernel at lists.infradead.or
  L:	linux-samsung-soc at vger.kernel.org (moderated for non-subscribers)
  S:	Maintained
  F:	drivers/pinctrl/samsung/
 +F:	include/dt-bindings/pinctrl/samsung.h
 +F:	Documentation/devicetree/bindings/pinctrl/samsung-pinctrl.txt
  
  PIN CONTROLLER - SINGLE
  M:	Tony Lindgren <tony at atomide.com>
@@@ -9589,7 -9518,7 +9598,7 @@@ L:	linux-media at vger.kernel.or
  W:	http://www.isely.net/pvrusb2/
  T:	git git://linuxtv.org/media_tree.git
  S:	Maintained
 -F:	Documentation/video4linux/README.pvrusb2
 +F:	Documentation/media/v4l-drivers/pvrusb2*
  F:	drivers/media/usb/pvrusb2/
  
  PWC WEBCAM DRIVER
@@@ -9770,6 -9699,12 +9779,12 @@@ T:	git git://git.kernel.org/pub/scm/lin
  S:	Supported
  F:	drivers/net/wireless/ath/ath10k/
  
+ QUALCOMM EMAC GIGABIT ETHERNET DRIVER
+ M:	Timur Tabi <timur at codeaurora.org>
+ L:	netdev at vger.kernel.org
+ S:	Supported
+ F:	drivers/net/ethernet/qualcomm/emac/
+ 
  QUALCOMM HEXAGON ARCHITECTURE
  M:	Richard Kuo <rkuo at codeaurora.org>
  L:	linux-hexagon at vger.kernel.org
@@@ -10025,6 -9960,7 +10040,7 @@@ F:	net/rfkill
  
  RHASHTABLE
  M:	Thomas Graf <tgraf at suug.ch>
+ M:	Herbert Xu <herbert at gondor.apana.org.au>
  L:	netdev at vger.kernel.org
  S:	Maintained
  F:	lib/rhashtable.c
@@@ -10243,7 -10179,7 +10259,7 @@@ L:	linux-media at vger.kernel.or
  W:	https://linuxtv.org
  T:	git git://linuxtv.org/media_tree.git
  S:	Odd fixes
 -F:	Documentation/video4linux/*.saa7134
 +F:	Documentation/media/v4l-drivers/saa7134*
  F:	drivers/media/pci/saa7134/
  
  SAA7146 VIDEO4LINUX-2 DRIVER
@@@ -10385,13 -10321,6 +10401,13 @@@ S:	Maintaine
  F:	Documentation/devicetree/bindings/serial/
  F:	drivers/tty/serial/
  
 +STI CEC DRIVER
 +M:	Benjamin Gaignard <benjamin.gaignard at linaro.org>
 +L:	kernel at stlinux.com
 +S:	Maintained
 +F:	drivers/staging/media/st-cec/
 +F:	Documentation/devicetree/bindings/media/stih-cec.txt
 +
  SYNOPSYS DESIGNWARE DMAC DRIVER
  M:	Viresh Kumar <vireshk at kernel.org>
  M:	Andy Shevchenko <andriy.shevchenko at linux.intel.com>
@@@ -11952,15 -11881,6 +11968,15 @@@ W:	https://linuxtv.or
  T:	git git://linuxtv.org/media_tree.git
  S:	Odd fixes
  F:	drivers/media/usb/tm6000/
 +F:	Documentation/media/v4l-drivers/tm6000*
 +
 +TW5864 VIDEO4LINUX DRIVER
 +M:	Bluecherry Maintainers <maintainers at bluecherrydvr.com>
 +M:	Andrey Utkin <andrey.utkin at corp.bluecherry.net>
 +M:	Andrey Utkin <andrey_utkin at fastmail.com>
 +L:	linux-media at vger.kernel.org
 +S:	Supported
 +F:	drivers/media/pci/tw5864/
  
  TW68 VIDEO4LINUX DRIVER
  M:	Hans Verkuil <hverkuil at xs4all.nl>
@@@ -12376,6 -12296,7 +12392,7 @@@ F:	drivers/net/usb/smsc75xx.
  
  USB SMSC95XX ETHERNET DRIVER
  M:	Steve Glendinning <steve.glendinning at shawell.net>
+ M:	Microchip Linux Driver Support <UNGLinuxDriver at microchip.com>
  L:	netdev at vger.kernel.org
  S:	Maintained
  F:	drivers/net/usb/smsc95xx.*
@@@ -12457,7 -12378,7 +12474,7 @@@ L:	linux-media at vger.kernel.or
  T:	git git://linuxtv.org/media_tree.git
  W:	http://royale.zerezo.com/zr364xx/
  S:	Maintained
 -F:	Documentation/video4linux/zr364xx.txt
 +F:	Documentation/media/v4l-drivers/zr364xx*
  F:	drivers/media/usb/zr364xx/
  
  ULPI BUS
@@@ -12664,7 -12585,7 +12681,7 @@@ F:	include/linux/if_*vlan.
  F:	net/8021q/
  
  VLYNQ BUS
 -M:	Florian Fainelli <florian at openwrt.org>
 +M:	Florian Fainelli <f.fainelli at gmail.com>
  L:	openwrt-devel at lists.openwrt.org (subscribers-only)
  S:	Maintained
  F:	drivers/vlynq/vlynq.c
@@@ -13037,10 -12958,11 +13054,10 @@@ F:	arch/x86/xen/*swiotlb
  F:	drivers/xen/*swiotlb*
  
  XFS FILESYSTEM
 -P:	Silicon Graphics Inc
  M:	Dave Chinner <david at fromorbit.com>
 -M:	xfs at oss.sgi.com
 -L:	xfs at oss.sgi.com
 -W:	http://oss.sgi.com/projects/xfs
 +M:	linux-xfs at vger.kernel.org
 +L:	linux-xfs at vger.kernel.org
 +W:	http://xfs.org/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs.git
  S:	Supported
  F:	Documentation/filesystems/xfs.txt
diff --combined arch/arm64/boot/dts/apm/apm-storm.dtsi
index 954ea6a,d5c3435..63be8e5
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@@ -110,10 -110,10 +110,10 @@@
  
  	timer {
  		compatible = "arm,armv8-timer";
 -		interrupts = <1 0 0xff01>,	/* Secure Phys IRQ */
 -			     <1 13 0xff01>,	/* Non-secure Phys IRQ */
 -			     <1 14 0xff01>,	/* Virt IRQ */
 -			     <1 15 0xff01>;	/* Hyp IRQ */
 +		interrupts = <1 0 0xff08>,	/* Secure Phys IRQ */
 +			     <1 13 0xff08>,	/* Non-secure Phys IRQ */
 +			     <1 14 0xff08>,	/* Virt IRQ */
 +			     <1 15 0xff08>;	/* Hyp IRQ */
  		clock-frequency = <50000000>;
  	};
  
@@@ -553,64 -553,6 +553,64 @@@
  			};
  		};
  
 +		pmu: pmu at 78810000 {
 +			compatible = "apm,xgene-pmu-v2";
 +			#address-cells = <2>;
 +			#size-cells = <2>;
 +			ranges;
 +			regmap-csw = <&csw>;
 +			regmap-mcba = <&mcba>;
 +			regmap-mcbb = <&mcbb>;
 +			reg = <0x0 0x78810000 0x0 0x1000>;
 +			interrupts = <0x0 0x22 0x4>;
 +
 +			pmul3c at 7e610000 {
 +				compatible = "apm,xgene-pmu-l3c";
 +				reg = <0x0 0x7e610000 0x0 0x1000>;
 +			};
 +
 +			pmuiob at 7e940000 {
 +				compatible = "apm,xgene-pmu-iob";
 +				reg = <0x0 0x7e940000 0x0 0x1000>;
 +			};
 +
 +			pmucmcb at 7e710000 {
 +				compatible = "apm,xgene-pmu-mcb";
 +				reg = <0x0 0x7e710000 0x0 0x1000>;
 +				enable-bit-index = <0>;
 +			};
 +
 +			pmucmcb at 7e730000 {
 +				compatible = "apm,xgene-pmu-mcb";
 +				reg = <0x0 0x7e730000 0x0 0x1000>;
 +				enable-bit-index = <1>;
 +			};
 +
 +			pmucmc at 7e810000 {
 +				compatible = "apm,xgene-pmu-mc";
 +				reg = <0x0 0x7e810000 0x0 0x1000>;
 +				enable-bit-index = <0>;
 +			};
 +
 +			pmucmc at 7e850000 {
 +				compatible = "apm,xgene-pmu-mc";
 +				reg = <0x0 0x7e850000 0x0 0x1000>;
 +				enable-bit-index = <1>;
 +			};
 +
 +			pmucmc at 7e890000 {
 +				compatible = "apm,xgene-pmu-mc";
 +				reg = <0x0 0x7e890000 0x0 0x1000>;
 +				enable-bit-index = <2>;
 +			};
 +
 +			pmucmc at 7e8d0000 {
 +				compatible = "apm,xgene-pmu-mc";
 +				reg = <0x0 0x7e8d0000 0x0 0x1000>;
 +				enable-bit-index = <3>;
 +			};
 +		};
 +
  		pcie0: pcie at 1f2b0000 {
  			status = "disabled";
  			device_type = "pci";
@@@ -627,10 -569,10 +627,10 @@@
  			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
  				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
  			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
 -			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc2 0x1
 -					 0x0 0x0 0x0 0x2 &gic 0x0 0xc3 0x1
 -					 0x0 0x0 0x0 0x3 &gic 0x0 0xc4 0x1
 -					 0x0 0x0 0x0 0x4 &gic 0x0 0xc5 0x1>;
 +			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc2 0x4
 +					 0x0 0x0 0x0 0x2 &gic 0x0 0xc3 0x4
 +					 0x0 0x0 0x0 0x3 &gic 0x0 0xc4 0x4
 +					 0x0 0x0 0x0 0x4 &gic 0x0 0xc5 0x4>;
  			dma-coherent;
  			clocks = <&pcie0clk 0>;
  			msi-parent = <&msi>;
@@@ -652,10 -594,10 +652,10 @@@
  			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
  				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
  			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
 -			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc8 0x1
 -					 0x0 0x0 0x0 0x2 &gic 0x0 0xc9 0x1
 -					 0x0 0x0 0x0 0x3 &gic 0x0 0xca 0x1
 -					 0x0 0x0 0x0 0x4 &gic 0x0 0xcb 0x1>;
 +			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xc8 0x4
 +					 0x0 0x0 0x0 0x2 &gic 0x0 0xc9 0x4
 +					 0x0 0x0 0x0 0x3 &gic 0x0 0xca 0x4
 +					 0x0 0x0 0x0 0x4 &gic 0x0 0xcb 0x4>;
  			dma-coherent;
  			clocks = <&pcie1clk 0>;
  			msi-parent = <&msi>;
@@@ -677,10 -619,10 +677,10 @@@
  			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
  				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
  			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
 -			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xce 0x1
 -					 0x0 0x0 0x0 0x2 &gic 0x0 0xcf 0x1
 -					 0x0 0x0 0x0 0x3 &gic 0x0 0xd0 0x1
 -					 0x0 0x0 0x0 0x4 &gic 0x0 0xd1 0x1>;
 +			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xce 0x4
 +					 0x0 0x0 0x0 0x2 &gic 0x0 0xcf 0x4
 +					 0x0 0x0 0x0 0x3 &gic 0x0 0xd0 0x4
 +					 0x0 0x0 0x0 0x4 &gic 0x0 0xd1 0x4>;
  			dma-coherent;
  			clocks = <&pcie2clk 0>;
  			msi-parent = <&msi>;
@@@ -702,10 -644,10 +702,10 @@@
  			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
  				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
  			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
 -			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xd4 0x1
 -					 0x0 0x0 0x0 0x2 &gic 0x0 0xd5 0x1
 -					 0x0 0x0 0x0 0x3 &gic 0x0 0xd6 0x1
 -					 0x0 0x0 0x0 0x4 &gic 0x0 0xd7 0x1>;
 +			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xd4 0x4
 +					 0x0 0x0 0x0 0x2 &gic 0x0 0xd5 0x4
 +					 0x0 0x0 0x0 0x3 &gic 0x0 0xd6 0x4
 +					 0x0 0x0 0x0 0x4 &gic 0x0 0xd7 0x4>;
  			dma-coherent;
  			clocks = <&pcie3clk 0>;
  			msi-parent = <&msi>;
@@@ -727,10 -669,10 +727,10 @@@
  			dma-ranges = <0x42000000 0x80 0x00000000 0x80 0x00000000 0x00 0x80000000
  				      0x42000000 0x00 0x00000000 0x00 0x00000000 0x80 0x00000000>;
  			interrupt-map-mask = <0x0 0x0 0x0 0x7>;
 -			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xda 0x1
 -					 0x0 0x0 0x0 0x2 &gic 0x0 0xdb 0x1
 -					 0x0 0x0 0x0 0x3 &gic 0x0 0xdc 0x1
 -					 0x0 0x0 0x0 0x4 &gic 0x0 0xdd 0x1>;
 +			interrupt-map = <0x0 0x0 0x0 0x1 &gic 0x0 0xda 0x4
 +					 0x0 0x0 0x0 0x2 &gic 0x0 0xdb 0x4
 +					 0x0 0x0 0x0 0x3 &gic 0x0 0xdc 0x4
 +					 0x0 0x0 0x0 0x4 &gic 0x0 0xdd 0x4>;
  			dma-coherent;
  			clocks = <&pcie4clk 0>;
  			msi-parent = <&msi>;
@@@ -755,11 -697,6 +755,11 @@@
  			mboxes = <&mailbox 0>;
  		};
  
 +		hwmonslimpro {
 +			compatible = "apm,xgene-slimpro-hwmon";
 +			mboxes = <&mailbox 7>;
 +		};
 +
  		serial0: serial at 1c020000 {
  			status = "disabled";
  			device_type = "serial";
@@@ -986,7 -923,7 +986,7 @@@
  			/* mac address will be overwritten by the bootloader */
  			local-mac-address = [00 00 00 00 00 00];
  			phy-connection-type = "rgmii";
- 			phy-handle = <&menet0phy>,<&menetphy>;
+ 			phy-handle = <&menetphy>,<&menet0phy>;
  			mdio {
  				compatible = "apm,xgene-mdio";
  				#address-cells = <1>;
diff --combined drivers/infiniband/hw/cxgb4/cm.c
index 80f9889,3cbbfbe..71c8867
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@@ -49,6 -49,7 +49,7 @@@
  
  #include <rdma/ib_addr.h>
  
+ #include <libcxgb_cm.h>
  #include "iw_cxgb4.h"
  #include "clip_tbl.h"
  
@@@ -239,15 -240,13 +240,13 @@@ int c4iw_ofld_send(struct c4iw_rdev *rd
  
  static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
  {
- 	struct cpl_tid_release *req;
+ 	u32 len = roundup(sizeof(struct cpl_tid_release), 16);
  
- 	skb = get_skb(skb, sizeof *req, GFP_KERNEL);
+ 	skb = get_skb(skb, len, GFP_KERNEL);
  	if (!skb)
  		return;
- 	req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req));
- 	INIT_TP_WR(req, hwtid);
- 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid));
- 	set_wr_txq(skb, CPL_PRIORITY_SETUP, 0);
+ 
+ 	cxgb_mk_tid_release(skb, len, hwtid, 0);
  	c4iw_ofld_send(rdev, skb);
  	return;
  }
@@@ -333,8 -332,6 +332,8 @@@ static void remove_ep_tid(struct c4iw_e
  
  	spin_lock_irqsave(&ep->com.dev->lock, flags);
  	_remove_handle(ep->com.dev, &ep->com.dev->hwtid_idr, ep->hwtid, 0);
 +	if (idr_is_empty(&ep->com.dev->hwtid_idr))
 +		wake_up(&ep->com.dev->wait);
  	spin_unlock_irqrestore(&ep->com.dev->lock, flags);
  }
  
@@@ -466,72 -463,6 +465,6 @@@ static struct net_device *get_real_dev(
  	return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
  }
  
- static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev)
- {
- 	int i;
- 
- 	egress_dev = get_real_dev(egress_dev);
- 	for (i = 0; i < dev->rdev.lldi.nports; i++)
- 		if (dev->rdev.lldi.ports[i] == egress_dev)
- 			return 1;
- 	return 0;
- }
- 
- static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip,
- 				     __u8 *peer_ip, __be16 local_port,
- 				     __be16 peer_port, u8 tos,
- 				     __u32 sin6_scope_id)
- {
- 	struct dst_entry *dst = NULL;
- 
- 	if (IS_ENABLED(CONFIG_IPV6)) {
- 		struct flowi6 fl6;
- 
- 		memset(&fl6, 0, sizeof(fl6));
- 		memcpy(&fl6.daddr, peer_ip, 16);
- 		memcpy(&fl6.saddr, local_ip, 16);
- 		if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
- 			fl6.flowi6_oif = sin6_scope_id;
- 		dst = ip6_route_output(&init_net, NULL, &fl6);
- 		if (!dst)
- 			goto out;
- 		if (!our_interface(dev, ip6_dst_idev(dst)->dev) &&
- 		    !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) {
- 			dst_release(dst);
- 			dst = NULL;
- 		}
- 	}
- 
- out:
- 	return dst;
- }
- 
- static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip,
- 				 __be32 peer_ip, __be16 local_port,
- 				 __be16 peer_port, u8 tos)
- {
- 	struct rtable *rt;
- 	struct flowi4 fl4;
- 	struct neighbour *n;
- 
- 	rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip,
- 				   peer_port, local_port, IPPROTO_TCP,
- 				   tos, 0);
- 	if (IS_ERR(rt))
- 		return NULL;
- 	n = dst_neigh_lookup(&rt->dst, &peer_ip);
- 	if (!n)
- 		return NULL;
- 	if (!our_interface(dev, n->dev) &&
- 	    !(n->dev->flags & IFF_LOOPBACK)) {
- 		neigh_release(n);
- 		dst_release(&rt->dst);
- 		return NULL;
- 	}
- 	neigh_release(n);
- 	return &rt->dst;
- }
- 
  static void arp_failure_discard(void *handle, struct sk_buff *skb)
  {
  	pr_err(MOD "ARP failure\n");
@@@ -706,56 -637,32 +639,32 @@@ static int send_flowc(struct c4iw_ep *e
  
  static int send_halfclose(struct c4iw_ep *ep)
  {
- 	struct cpl_close_con_req *req;
  	struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
- 	int wrlen = roundup(sizeof *req, 16);
+ 	u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
  
  	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
  	if (WARN_ON(!skb))
  		return -ENOMEM;
  
- 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- 	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
- 	req = (struct cpl_close_con_req *) skb_put(skb, wrlen);
- 	memset(req, 0, wrlen);
- 	INIT_TP_WR(req, ep->hwtid);
- 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ,
- 						    ep->hwtid));
+ 	cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
+ 			      NULL, arp_failure_discard);
+ 
  	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
  }
  
  static int send_abort(struct c4iw_ep *ep)
  {
- 	struct cpl_abort_req *req;
- 	int wrlen = roundup(sizeof *req, 16);
+ 	u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
  	struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
  
  	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
  	if (WARN_ON(!req_skb))
  		return -ENOMEM;
  
- 	set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx);
- 	t4_set_arp_err_handler(req_skb, ep, abort_arp_failure);
- 	req = (struct cpl_abort_req *)skb_put(req_skb, wrlen);
- 	memset(req, 0, wrlen);
- 	INIT_TP_WR(req, ep->hwtid);
- 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid));
- 	req->cmd = CPL_ABORT_SEND_RST;
- 	return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
- }
+ 	cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
+ 			  ep, abort_arp_failure);
  
- static void best_mtu(const unsigned short *mtus, unsigned short mtu,
- 		     unsigned int *idx, int use_ts, int ipv6)
- {
- 	unsigned short hdr_size = (ipv6 ?
- 				   sizeof(struct ipv6hdr) :
- 				   sizeof(struct iphdr)) +
- 				  sizeof(struct tcphdr) +
- 				  (use_ts ?
- 				   round_up(TCPOLEN_TIMESTAMP, 4) : 0);
- 	unsigned short data_size = mtu - hdr_size;
- 
- 	cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx);
+ 	return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
  }
  
  static int send_connect(struct c4iw_ep *ep)
@@@ -770,7 -677,7 +679,7 @@@
  	u64 opt0;
  	u32 opt2;
  	unsigned int mtu_idx;
- 	int wscale;
+ 	u32 wscale;
  	int win, sizev4, sizev6, wrlen;
  	struct sockaddr_in *la = (struct sockaddr_in *)
  				 &ep->com.local_addr;
@@@ -817,10 -724,10 +726,10 @@@
  	}
  	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
  
- 	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- 		 enable_tcp_timestamps,
- 		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- 	wscale = compute_wscale(rcv_win);
+ 	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ 		      enable_tcp_timestamps,
+ 		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ 	wscale = cxgb_compute_wscale(rcv_win);
  
  	/*
  	 * Specify the largest window that will fit in opt0. The
@@@ -1447,9 -1354,9 +1356,9 @@@ static void established_upcall(struct c
  
  static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
  {
- 	struct cpl_rx_data_ack *req;
  	struct sk_buff *skb;
- 	int wrlen = roundup(sizeof *req, 16);
+ 	u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
+ 	u32 credit_dack;
  
  	PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits);
  	skb = get_skb(NULL, wrlen, GFP_KERNEL);
@@@ -1466,15 -1373,12 +1375,12 @@@
  	if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
  		credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
  
- 	req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen);
- 	memset(req, 0, wrlen);
- 	INIT_TP_WR(req, ep->hwtid);
- 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
- 						    ep->hwtid));
- 	req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F |
- 				       RX_DACK_CHANGE_F |
- 				       RX_DACK_MODE_V(dack_mode));
- 	set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx);
+ 	credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
+ 		      RX_DACK_MODE_V(dack_mode);
+ 
+ 	cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
+ 			    credit_dack);
+ 
  	c4iw_ofld_send(&ep->com.dev->rdev, skb);
  	return credits;
  }
@@@ -1972,7 -1876,7 +1878,7 @@@ static int send_fw_act_open_req(struct 
  	struct sk_buff *skb;
  	struct fw_ofld_connection_wr *req;
  	unsigned int mtu_idx;
- 	int wscale;
+ 	u32 wscale;
  	struct sockaddr_in *sin;
  	int win;
  
@@@ -1997,10 -1901,10 +1903,10 @@@
  			htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
  	req->tcb.tx_max = (__force __be32) jiffies;
  	req->tcb.rcv_adv = htons(1);
- 	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- 		 enable_tcp_timestamps,
- 		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- 	wscale = compute_wscale(rcv_win);
+ 	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ 		      enable_tcp_timestamps,
+ 		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ 	wscale = cxgb_compute_wscale(rcv_win);
  
  	/*
  	 * Specify the largest window that will fit in opt0. The
@@@ -2054,15 -1958,6 +1960,6 @@@ static inline int act_open_has_tid(int 
  		status != CPL_ERR_CONN_EXIST);
  }
  
- /* Returns whether a CPL status conveys negative advice.
-  */
- static int is_neg_adv(unsigned int status)
- {
- 	return status == CPL_ERR_RTX_NEG_ADVICE ||
- 	       status == CPL_ERR_PERSIST_NEG_ADVICE ||
- 	       status == CPL_ERR_KEEPALV_NEG_ADVICE;
- }
- 
  static char *neg_adv_str(unsigned int status)
  {
  	switch (status) {
@@@ -2119,10 -2014,8 +2016,10 @@@ static int import_ep(struct c4iw_ep *ep
  		}
  		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
  					n, pdev, rt_tos2priority(tos));
 -		if (!ep->l2t)
 +		if (!ep->l2t) {
 +			dev_put(pdev);
  			goto out;
 +		}
  		ep->mtu = pdev->mtu;
  		ep->tx_chan = cxgb4_port_chan(pdev);
  		ep->smac_idx = cxgb4_tp_smt_idx(adapter_type,
@@@ -2218,16 -2111,21 +2115,21 @@@ static int c4iw_reconnect(struct c4iw_e
  
  	/* find a route */
  	if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
- 		ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
- 				     raddr->sin_addr.s_addr, laddr->sin_port,
- 				     raddr->sin_port, ep->com.cm_id->tos);
+ 		ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
+ 					  laddr->sin_addr.s_addr,
+ 					  raddr->sin_addr.s_addr,
+ 					  laddr->sin_port,
+ 					  raddr->sin_port, ep->com.cm_id->tos);
  		iptype = 4;
  		ra = (__u8 *)&raddr->sin_addr;
  	} else {
- 		ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr,
- 				      raddr6->sin6_addr.s6_addr,
- 				      laddr6->sin6_port, raddr6->sin6_port, 0,
- 				      raddr6->sin6_scope_id);
+ 		ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
+ 					   get_real_dev,
+ 					   laddr6->sin6_addr.s6_addr,
+ 					   raddr6->sin6_addr.s6_addr,
+ 					   laddr6->sin6_port,
+ 					   raddr6->sin6_port, 0,
+ 					   raddr6->sin6_scope_id);
  		iptype = 6;
  		ra = (__u8 *)&raddr6->sin6_addr;
  	}
@@@ -2299,7 -2197,7 +2201,7 @@@ static int act_open_rpl(struct c4iw_de
  	PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
  	     status, status2errno(status));
  
- 	if (is_neg_adv(status)) {
+ 	if (cxgb_is_neg_adv(status)) {
  		PDBG("%s Connection problems for atid %u status %u (%s)\n",
  		     __func__, atid, status, neg_adv_str(status));
  		ep->stats.connect_neg_adv++;
@@@ -2426,7 -2324,7 +2328,7 @@@ static int accept_cr(struct c4iw_ep *ep
  	unsigned int mtu_idx;
  	u64 opt0;
  	u32 opt2;
- 	int wscale;
+ 	u32 wscale;
  	struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
  	int win;
  	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
@@@ -2447,10 -2345,10 +2349,10 @@@
  	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
  						    ep->hwtid));
  
- 	best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
- 		 enable_tcp_timestamps && req->tcpopt.tstamp,
- 		 (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1);
- 	wscale = compute_wscale(rcv_win);
+ 	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
+ 		      enable_tcp_timestamps && req->tcpopt.tstamp,
+ 		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
+ 	wscale = cxgb_compute_wscale(rcv_win);
  
  	/*
  	 * Specify the largest window that will fit in opt0. The
@@@ -2522,42 -2420,6 +2424,6 @@@ static void reject_cr(struct c4iw_dev *
  	return;
  }
  
- static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type,
- 		       int *iptype, __u8 *local_ip, __u8 *peer_ip,
- 		       __be16 *local_port, __be16 *peer_port)
- {
- 	int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
- 		      ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
- 		      T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- 	int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ?
- 		     IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) :
- 		     T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len));
- 	struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len);
- 	struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len);
- 	struct tcphdr *tcp = (struct tcphdr *)
- 			     ((u8 *)(req + 1) + eth_len + ip_len);
- 
- 	if (ip->version == 4) {
- 		PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__,
- 		     ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source),
- 		     ntohs(tcp->dest));
- 		*iptype = 4;
- 		memcpy(peer_ip, &ip->saddr, 4);
- 		memcpy(local_ip, &ip->daddr, 4);
- 	} else {
- 		PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__,
- 		     ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source),
- 		     ntohs(tcp->dest));
- 		*iptype = 6;
- 		memcpy(peer_ip, ip6->saddr.s6_addr, 16);
- 		memcpy(local_ip, ip6->daddr.s6_addr, 16);
- 	}
- 	*peer_port = tcp->source;
- 	*local_port = tcp->dest;
- 
- 	return;
- }
- 
  static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
  {
  	struct c4iw_ep *child_ep = NULL, *parent_ep;
@@@ -2586,8 -2448,8 +2452,8 @@@
  		goto reject;
  	}
  
- 	get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype,
- 		   local_ip, peer_ip, &local_port, &peer_port);
+ 	cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
+ 			&iptype, local_ip, peer_ip, &local_port, &peer_port);
  
  	/* Find output route */
  	if (iptype == 4)  {
@@@ -2595,18 -2457,19 +2461,19 @@@
  		     , __func__, parent_ep, hwtid,
  		     local_ip, peer_ip, ntohs(local_port),
  		     ntohs(peer_port), peer_mss);
- 		dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
- 				 local_port, peer_port,
- 				 tos);
+ 		dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ 				      *(__be32 *)local_ip, *(__be32 *)peer_ip,
+ 				      local_port, peer_port, tos);
  	} else {
  		PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
  		     , __func__, parent_ep, hwtid,
  		     local_ip, peer_ip, ntohs(local_port),
  		     ntohs(peer_port), peer_mss);
- 		dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port,
- 				  PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
- 				  ((struct sockaddr_in6 *)
- 				  &parent_ep->com.local_addr)->sin6_scope_id);
+ 		dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+ 				local_ip, peer_ip, local_port, peer_port,
+ 				PASS_OPEN_TOS_G(ntohl(req->tos_stid)),
+ 				((struct sockaddr_in6 *)
+ 				 &parent_ep->com.local_addr)->sin6_scope_id);
  	}
  	if (!dst) {
  		printk(KERN_ERR MOD "%s - failed to find dst entry!\n",
@@@ -2839,18 -2702,18 +2706,18 @@@ static int peer_abort(struct c4iw_dev *
  {
  	struct cpl_abort_req_rss *req = cplhdr(skb);
  	struct c4iw_ep *ep;
- 	struct cpl_abort_rpl *rpl;
  	struct sk_buff *rpl_skb;
  	struct c4iw_qp_attributes attrs;
  	int ret;
  	int release = 0;
  	unsigned int tid = GET_TID(req);
+ 	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
  
  	ep = get_ep_from_tid(dev, tid);
  	if (!ep)
  		return 0;
  
- 	if (is_neg_adv(req->status)) {
+ 	if (cxgb_is_neg_adv(req->status)) {
  		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
  		     __func__, ep->hwtid, req->status,
  		     neg_adv_str(req->status));
@@@ -2943,11 -2806,9 +2810,9 @@@
  		release = 1;
  		goto out;
  	}
- 	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
- 	rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl));
- 	INIT_TP_WR(rpl, ep->hwtid);
- 	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid));
- 	rpl->cmd = CPL_ABORT_NO_RST;
+ 
+ 	cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
+ 
  	c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
  out:
  	if (release)
@@@ -3379,9 -3240,11 +3244,11 @@@ int c4iw_connect(struct iw_cm_id *cm_id
  		PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
  		     __func__, &laddr->sin_addr, ntohs(laddr->sin_port),
  		     ra, ntohs(raddr->sin_port));
- 		ep->dst = find_route(dev, laddr->sin_addr.s_addr,
- 				     raddr->sin_addr.s_addr, laddr->sin_port,
- 				     raddr->sin_port, cm_id->tos);
+ 		ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ 					  laddr->sin_addr.s_addr,
+ 					  raddr->sin_addr.s_addr,
+ 					  laddr->sin_port,
+ 					  raddr->sin_port, cm_id->tos);
  	} else {
  		iptype = 6;
  		ra = (__u8 *)&raddr6->sin6_addr;
@@@ -3400,10 -3263,12 +3267,12 @@@
  		     __func__, laddr6->sin6_addr.s6_addr,
  		     ntohs(laddr6->sin6_port),
  		     raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
- 		ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr,
- 				      raddr6->sin6_addr.s6_addr,
- 				      laddr6->sin6_port, raddr6->sin6_port, 0,
- 				      raddr6->sin6_scope_id);
+ 		ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
+ 					   laddr6->sin6_addr.s6_addr,
+ 					   raddr6->sin6_addr.s6_addr,
+ 					   laddr6->sin6_port,
+ 					   raddr6->sin6_port, 0,
+ 					   raddr6->sin6_scope_id);
  	}
  	if (!ep->dst) {
  		printk(KERN_ERR MOD "%s - cannot find route.\n", __func__);
@@@ -4045,8 -3910,9 +3914,9 @@@ static int rx_pkt(struct c4iw_dev *dev
  	     ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
  	     ntohs(tcph->source), iph->tos);
  
- 	dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source,
- 			 iph->tos);
+ 	dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
+ 			      iph->daddr, iph->saddr, tcph->dest,
+ 			      tcph->source, iph->tos);
  	if (!dst) {
  		pr_err("%s - failed to find dst entry!\n",
  		       __func__);
@@@ -4321,7 -4187,7 +4191,7 @@@ static int peer_abort_intr(struct c4iw_
  		kfree_skb(skb);
  		return 0;
  	}
- 	if (is_neg_adv(req->status)) {
+ 	if (cxgb_is_neg_adv(req->status)) {
  		PDBG("%s Negative advice on abort- tid %u status %d (%s)\n",
  		     __func__, ep->hwtid, req->status,
  		     neg_adv_str(req->status));
diff --combined drivers/infiniband/hw/cxgb4/device.c
index 3c4b212,f170b63..93e3d27
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c
@@@ -872,13 -872,9 +872,13 @@@ static void c4iw_rdev_close(struct c4iw
  static void c4iw_dealloc(struct uld_ctx *ctx)
  {
  	c4iw_rdev_close(&ctx->dev->rdev);
 +	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->cqidr));
  	idr_destroy(&ctx->dev->cqidr);
 +	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->qpidr));
  	idr_destroy(&ctx->dev->qpidr);
 +	WARN_ON_ONCE(!idr_is_empty(&ctx->dev->mmidr));
  	idr_destroy(&ctx->dev->mmidr);
 +	wait_event(ctx->dev->wait, idr_is_empty(&ctx->dev->hwtid_idr));
  	idr_destroy(&ctx->dev->hwtid_idr);
  	idr_destroy(&ctx->dev->stid_idr);
  	idr_destroy(&ctx->dev->atid_idr);
@@@ -996,7 -992,6 +996,7 @@@ static struct c4iw_dev *c4iw_alloc(cons
  	mutex_init(&devp->rdev.stats.lock);
  	mutex_init(&devp->db_mutex);
  	INIT_LIST_HEAD(&devp->db_fc_list);
 +	init_waitqueue_head(&devp->wait);
  	devp->avail_ird = devp->rdev.lldi.max_ird_adapter;
  
  	if (c4iw_debugfs_root) {
@@@ -1480,6 -1475,10 +1480,10 @@@ static int c4iw_uld_control(void *handl
  
  static struct cxgb4_uld_info c4iw_uld_info = {
  	.name = DRV_NAME,
+ 	.nrxq = MAX_ULD_QSETS,
+ 	.rxq_size = 511,
+ 	.ciq = true,
+ 	.lro = false,
  	.add = c4iw_uld_add,
  	.rx_handler = c4iw_uld_rx_handler,
  	.state_change = c4iw_uld_state_change,
diff --combined drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 4b83b84,6a9bef1f..cdcf3ee
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@@ -263,7 -263,6 +263,7 @@@ struct c4iw_dev 
  	struct idr stid_idr;
  	struct list_head db_fc_list;
  	u32 avail_ird;
 +	wait_queue_head_t wait;
  };
  
  static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
@@@ -882,15 -881,6 +882,6 @@@ static inline struct c4iw_listen_ep *to
  	return cm_id->provider_data;
  }
  
- static inline int compute_wscale(int win)
- {
- 	int wscale = 0;
- 
- 	while (wscale < 14 && (65535<<wscale) < win)
- 		wscale++;
- 	return wscale;
- }
- 
  static inline int ocqp_supported(const struct cxgb4_lld_info *infop)
  {
  #if defined(__i386__) || defined(__x86_64__) || defined(CONFIG_PPC64)
diff --combined drivers/infiniband/hw/mlx5/main.c
index e19537c,e4aecbf..551aa0e
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@@ -232,23 -232,19 +232,19 @@@ static int set_roce_addr(struct ib_devi
  			 const union ib_gid *gid,
  			 const struct ib_gid_attr *attr)
  {
- 	struct mlx5_ib_dev *dev	= to_mdev(device);
- 	u32  in[MLX5_ST_SZ_DW(set_roce_address_in)];
- 	u32 out[MLX5_ST_SZ_DW(set_roce_address_out)];
+ 	struct mlx5_ib_dev *dev = to_mdev(device);
+ 	u32  in[MLX5_ST_SZ_DW(set_roce_address_in)]  = {0};
+ 	u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
  	void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
  	enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num);
  
  	if (ll != IB_LINK_LAYER_ETHERNET)
  		return -EINVAL;
  
- 	memset(in, 0, sizeof(in));
- 
  	ib_gid_to_mlx5_roce_addr(gid, attr, in_addr);
  
  	MLX5_SET(set_roce_address_in, in, roce_address_index, index);
  	MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
- 
- 	memset(out, 0, sizeof(out));
  	return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
  }
  
@@@ -288,9 -284,7 +284,9 @@@ __be16 mlx5_get_roce_udp_sport(struct m
  
  static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
  {
 -	return !MLX5_CAP_GEN(dev->mdev, ib_virt);
 +	if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
 +		return !MLX5_CAP_GEN(dev->mdev, ib_virt);
 +	return 0;
  }
  
  enum {
@@@ -753,8 -747,7 +749,7 @@@ static int mlx5_query_hca_port(struct i
  				     &props->active_width);
  	if (err)
  		goto out;
- 	err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB,
- 					 port);
+ 	err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
  	if (err)
  		goto out;
  
@@@ -1430,13 -1423,6 +1425,13 @@@ static int parse_flow_attr(u32 *match_c
  					     dmac_47_16),
  				ib_spec->eth.val.dst_mac);
  
 +		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c,
 +					     smac_47_16),
 +				ib_spec->eth.mask.src_mac);
 +		ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v,
 +					     smac_47_16),
 +				ib_spec->eth.val.src_mac);
 +
  		if (ib_spec->eth.mask.vlan_tag) {
  			MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c,
  				 vlan_tag, 1);
diff --combined drivers/net/ethernet/broadcom/bnx2.c
index 505ceaf,ecd357d..27f11a5
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@@ -50,7 -50,7 +50,7 @@@
  #include <linux/log2.h>
  #include <linux/aer.h>
  
- #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
+ #if IS_ENABLED(CONFIG_CNIC)
  #define BCM_CNIC 1
  #include "cnic_if.h"
  #endif
@@@ -6356,6 -6356,10 +6356,6 @@@ bnx2_open(struct net_device *dev
  	struct bnx2 *bp = netdev_priv(dev);
  	int rc;
  
 -	rc = bnx2_request_firmware(bp);
 -	if (rc < 0)
 -		goto out;
 -
  	netif_carrier_off(dev);
  
  	bnx2_disable_int(bp);
@@@ -6424,6 -6428,7 +6424,6 @@@ open_err
  	bnx2_free_irq(bp);
  	bnx2_free_mem(bp);
  	bnx2_del_napi(bp);
 -	bnx2_release_firmware(bp);
  	goto out;
  }
  
@@@ -8570,12 -8575,6 +8570,12 @@@ bnx2_init_one(struct pci_dev *pdev, con
  
  	pci_set_drvdata(pdev, dev);
  
 +	rc = bnx2_request_firmware(bp);
 +	if (rc < 0)
 +		goto error;
 +
 +
 +	bnx2_reset_chip(bp, BNX2_DRV_MSG_CODE_RESET);
  	memcpy(dev->dev_addr, bp->mac_addr, ETH_ALEN);
  
  	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_SG |
@@@ -8608,7 -8607,6 +8608,7 @@@
  	return 0;
  
  error:
 +	bnx2_release_firmware(bp);
  	pci_iounmap(pdev, bp->regview);
  	pci_release_regions(pdev);
  	pci_disable_device(pdev);
diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index edd2338,ea0d1f1..28e653e
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the Chelsio T4 Ethernet driver for Linux.
   *
-  * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+  * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -53,6 -53,8 +53,8 @@@
  #include "cxgb4_uld.h"
  
  #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
+ extern struct list_head adapter_list;
+ extern struct mutex uld_mutex;
  
  enum {
  	MAX_NPORTS	= 4,     /* max # of ports */
@@@ -338,12 -340,14 +340,14 @@@ struct adapter_params 
  	enum chip_type chip;               /* chip code */
  	struct arch_specific_params arch;  /* chip specific params */
  	unsigned char offload;
+ 	unsigned char crypto;		/* HW capability for crypto */
  
  	unsigned char bypass;
  
  	unsigned int ofldq_wr_cred;
  	bool ulptx_memwrite_dsgl;          /* use of T5 DSGL allowed */
  
+ 	unsigned int nsched_cls;          /* number of traffic classes */
  	unsigned int max_ordird_qp;       /* Max read depth per RDMA QP */
  	unsigned int max_ird_adapter;     /* Max read depth per adapter */
  };
@@@ -403,7 -407,6 +407,6 @@@ struct fw_info 
  	struct fw_hdr fw_hdr;
  };
  
- 
  struct trace_params {
  	u32 data[TRACE_LEN / 4];
  	u32 mask[TRACE_LEN / 4];
@@@ -419,8 -422,8 +422,8 @@@ struct link_config 
  	unsigned short supported;        /* link capabilities */
  	unsigned short advertising;      /* advertised capabilities */
  	unsigned short lp_advertising;   /* peer advertised capabilities */
 -	unsigned short requested_speed;  /* speed user has requested */
 -	unsigned short speed;            /* actual link speed */
 +	unsigned int   requested_speed;  /* speed user has requested */
 +	unsigned int   speed;            /* actual link speed */
  	unsigned char  requested_fc;     /* flow control user has requested */
  	unsigned char  fc;               /* actual link flow control */
  	unsigned char  autoneg;          /* autonegotiating? */
@@@ -434,11 -437,6 +437,6 @@@ enum 
  	MAX_ETH_QSETS = 32,           /* # of Ethernet Tx/Rx queue sets */
  	MAX_OFLD_QSETS = 16,          /* # of offload Tx, iscsi Rx queue sets */
  	MAX_CTRL_QUEUES = NCHAN,      /* # of control Tx queues */
- 	MAX_RDMA_QUEUES = NCHAN,      /* # of streaming RDMA Rx queues */
- 	MAX_RDMA_CIQS = 32,        /* # of  RDMA concentrator IQs */
- 
- 	/* # of streaming iSCSIT Rx queues */
- 	MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS,
  };
  
  enum {
@@@ -455,8 -453,7 +453,7 @@@
  enum {
  	INGQ_EXTRAS = 2,        /* firmware event queue and */
  				/*   forwarded interrupts */
- 	MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES +
- 		   MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS,
+ 	MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS,
  };
  
  struct adapter;
@@@ -493,6 -490,7 +490,7 @@@ struct port_info 
  #endif /* CONFIG_CHELSIO_T4_FCOE */
  	bool rxtstamp;  /* Enable TS */
  	struct hwtstamp_config tstamp_config;
+ 	struct sched_table *sched_tbl;
  };
  
  struct dentry;
@@@ -510,6 -508,10 +508,10 @@@ enum {                                 
  	FW_OFLD_CONN       = (1 << 9),
  };
  
+ enum {
+ 	ULP_CRYPTO_LOOKASIDE = 1 << 0,
+ };
+ 
  struct rx_sw_desc;
  
  struct sge_fl {                     /* SGE free-buffer queue state */
@@@ -680,17 -682,24 +682,24 @@@ struct sge_ctrl_txq {               /* 
  	u8 full;                    /* the Tx ring is full */
  } ____cacheline_aligned_in_smp;
  
+ struct sge_uld_rxq_info {
+ 	char name[IFNAMSIZ];	/* name of ULD driver */
+ 	struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */
+ 	u16 *msix_tbl;		/* msix_tbl for uld */
+ 	u16 *rspq_id;		/* response queue id's of rxq */
+ 	u16 nrxq;		/* # of ingress uld queues */
+ 	u16 nciq;		/* # of completion queues */
+ 	u8 uld;			/* uld type */
+ };
+ 
  struct sge {
  	struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
  	struct sge_ofld_txq ofldtxq[MAX_OFLD_QSETS];
  	struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES];
  
  	struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
- 	struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS];
- 	struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES];
- 	struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES];
- 	struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS];
  	struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
+ 	struct sge_uld_rxq_info **uld_rxq_info;
  
  	struct sge_rspq intrq ____cacheline_aligned_in_smp;
  	spinlock_t intrq_lock;
@@@ -698,14 -707,8 +707,8 @@@
  	u16 max_ethqsets;           /* # of available Ethernet queue sets */
  	u16 ethqsets;               /* # of active Ethernet queue sets */
  	u16 ethtxq_rover;           /* Tx queue to clean up next */
- 	u16 iscsiqsets;              /* # of active iSCSI queue sets */
- 	u16 niscsitq;               /* # of available iSCST Rx queues */
- 	u16 rdmaqs;                 /* # of available RDMA Rx queues */
- 	u16 rdmaciqs;               /* # of available RDMA concentrator IQs */
- 	u16 iscsi_rxq[MAX_OFLD_QSETS];
- 	u16 iscsit_rxq[MAX_ISCSIT_QUEUES];
- 	u16 rdma_rxq[MAX_RDMA_QUEUES];
- 	u16 rdma_ciq[MAX_RDMA_CIQS];
+ 	u16 ofldqsets;              /* # of active ofld queue sets */
+ 	u16 nqs_per_uld;	    /* # of Rx queues per ULD */
  	u16 timer_val[SGE_NTIMERS];
  	u8 counter_val[SGE_NCOUNTERS];
  	u32 fl_pg_order;            /* large page allocation size */
@@@ -729,10 -732,7 +732,7 @@@
  };
  
  #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++)
- #define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++)
- #define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++)
- #define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++)
- #define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++)
+ #define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++)
  
  struct l2t_data;
  
@@@ -757,6 -757,23 +757,23 @@@ struct hash_mac_addr 
  	u8 addr[ETH_ALEN];
  };
  
+ struct uld_msix_bmap {
+ 	unsigned long *msix_bmap;
+ 	unsigned int mapsize;
+ 	spinlock_t lock; /* lock for acquiring bitmap */
+ };
+ 
+ struct uld_msix_info {
+ 	unsigned short vec;
+ 	char desc[IFNAMSIZ + 10];
+ 	unsigned int idx;
+ };
+ 
+ struct vf_info {
+ 	unsigned char vf_mac_addr[ETH_ALEN];
+ 	bool pf_set_mac;
+ };
+ 
  struct adapter {
  	void __iomem *regs;
  	void __iomem *bar2;
@@@ -767,6 -784,7 +784,7 @@@
  	unsigned int mbox;
  	unsigned int pf;
  	unsigned int flags;
+ 	unsigned int adap_idx;
  	enum chip_type chip;
  
  	int msg_enable;
@@@ -779,6 -797,9 +797,9 @@@
  		unsigned short vec;
  		char desc[IFNAMSIZ + 10];
  	} msix_info[MAX_INGQ + 1];
+ 	struct uld_msix_info *msix_info_ulds; /* msix info for uld's */
+ 	struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */
+ 	int msi_idx;
  
  	struct doorbell_stats db_stats;
  	struct sge sge;
@@@ -786,6 -807,9 +807,9 @@@
  	struct net_device *port[MAX_NPORTS];
  	u8 chan_map[NCHAN];                   /* channel -> port map */
  
+ 	struct vf_info *vfinfo;
+ 	u8 num_vfs;
+ 
  	u32 filter_mode;
  	unsigned int l2t_start;
  	unsigned int l2t_end;
@@@ -793,7 -817,10 +817,10 @@@
  	unsigned int clipt_start;
  	unsigned int clipt_end;
  	struct clip_tbl *clipt;
+ 	struct cxgb4_uld_info *uld;
  	void *uld_handle[CXGB4_ULD_MAX];
+ 	unsigned int num_uld;
+ 	unsigned int num_ofld_uld;
  	struct list_head list_node;
  	struct list_head rcu_node;
  	struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */
@@@ -813,6 -840,8 +840,8 @@@
  #define T4_OS_LOG_MBOX_CMDS 256
  	struct mbox_cmd_log *mbox_log;
  
+ 	struct mutex uld_mutex;
+ 
  	struct dentry *debugfs_root;
  	bool use_bd;     /* Use SGE Back Door intfc for reading SGE Contexts */
  	bool trace_rss;	/* 1 implies that different RSS flit per filter is
@@@ -822,6 -851,58 +851,58 @@@
  
  	spinlock_t stats_lock;
  	spinlock_t win0_lock ____cacheline_aligned_in_smp;
+ 
+ 	/* TC u32 offload */
+ 	struct cxgb4_tc_u32_table *tc_u32;
+ };
+ 
+ /* Support for "sched-class" command to allow a TX Scheduling Class to be
+  * programmed with various parameters.
+  */
+ struct ch_sched_params {
+ 	s8   type;                     /* packet or flow */
+ 	union {
+ 		struct {
+ 			s8   level;    /* scheduler hierarchy level */
+ 			s8   mode;     /* per-class or per-flow */
+ 			s8   rateunit; /* bit or packet rate */
+ 			s8   ratemode; /* %port relative or kbps absolute */
+ 			s8   channel;  /* scheduler channel [0..N] */
+ 			s8   class;    /* scheduler class [0..N] */
+ 			s32  minrate;  /* minimum rate */
+ 			s32  maxrate;  /* maximum rate */
+ 			s16  weight;   /* percent weight */
+ 			s16  pktsize;  /* average packet size */
+ 		} params;
+ 	} u;
+ };
+ 
+ enum {
+ 	SCHED_CLASS_TYPE_PACKET = 0,    /* class type */
+ };
+ 
+ enum {
+ 	SCHED_CLASS_LEVEL_CL_RL = 0,    /* class rate limiter */
+ };
+ 
+ enum {
+ 	SCHED_CLASS_MODE_CLASS = 0,     /* per-class scheduling */
+ };
+ 
+ enum {
+ 	SCHED_CLASS_RATEUNIT_BITS = 0,  /* bit rate scheduling */
+ };
+ 
+ enum {
+ 	SCHED_CLASS_RATEMODE_ABS = 1,   /* Kb/s */
+ };
+ 
+ /* Support for "sched_queue" command to allow one or more NIC TX Queues
+  * to be bound to a TX Scheduling Class.
+  */
+ struct ch_sched_queue {
+ 	s8   queue;    /* queue index */
+ 	s8   class;    /* class index */
  };
  
  /* Defined bit width of user definable filter tuples
@@@ -947,11 -1028,47 +1028,47 @@@ enum 
  	VLAN_REWRITE
  };
  
+ /* Host shadow copy of ingress filter entry.  This is in host native format
+  * and doesn't match the ordering or bit order, etc. of the hardware of the
+  * firmware command.  The use of bit-field structure elements is purely to
+  * remind ourselves of the field size limitations and save memory in the case
+  * where the filter table is large.
+  */
+ struct filter_entry {
+ 	/* Administrative fields for filter. */
+ 	u32 valid:1;            /* filter allocated and valid */
+ 	u32 locked:1;           /* filter is administratively locked */
+ 
+ 	u32 pending:1;          /* filter action is pending firmware reply */
+ 	u32 smtidx:8;           /* Source MAC Table index for smac */
+ 	struct filter_ctx *ctx; /* Caller's completion hook */
+ 	struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
+ 	struct net_device *dev; /* Associated net device */
+ 	u32 tid;                /* This will store the actual tid */
+ 
+ 	/* The filter itself.  Most of this is a straight copy of information
+ 	 * provided by the extended ioctl().  Some fields are translated to
+ 	 * internal forms -- for instance the Ingress Queue ID passed in from
+ 	 * the ioctl() is translated into the Absolute Ingress Queue ID.
+ 	 */
+ 	struct ch_filter_specification fs;
+ };
+ 
  static inline int is_offload(const struct adapter *adap)
  {
  	return adap->params.offload;
  }
  
+ static inline int is_pci_uld(const struct adapter *adap)
+ {
+ 	return adap->params.crypto;
+ }
+ 
+ static inline int is_uld(const struct adapter *adap)
+ {
+ 	return (adap->params.offload || adap->params.crypto);
+ }
+ 
  static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr)
  {
  	return readl(adap->regs + reg_addr);
@@@ -1178,6 -1295,8 +1295,8 @@@ int t4_sge_alloc_eth_txq(struct adapte
  int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
  			  struct net_device *dev, unsigned int iqid,
  			  unsigned int cmplqid);
+ int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
+ 			unsigned int cmplqid);
  int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
  			  struct net_device *dev, unsigned int iqid);
  irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
@@@ -1185,8 -1304,6 +1304,6 @@@ int t4_sge_init(struct adapter *adap)
  void t4_sge_start(struct adapter *adap);
  void t4_sge_stop(struct adapter *adap);
  int cxgb_busy_poll(struct napi_struct *napi);
- int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
- 			       unsigned int cnt);
  void cxgb4_set_ethtool_ops(struct net_device *netdev);
  int cxgb4_write_rss(const struct port_info *pi, const u16 *queues);
  extern int dbfifo_int_thresh;
@@@ -1289,6 -1406,18 +1406,18 @@@ static inline int hash_mac_addr(const u
  	return a & 0x3f;
  }
  
+ int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us,
+ 			       unsigned int cnt);
+ static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
+ 			     unsigned int us, unsigned int cnt,
+ 			     unsigned int size, unsigned int iqe_size)
+ {
+ 	q->adap = adap;
+ 	cxgb4_set_rspq_intr_params(q, us, cnt);
+ 	q->iqe_len = iqe_size;
+ 	q->size = size;
+ }
+ 
  void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
  		       unsigned int data_reg, const u32 *vals,
  		       unsigned int nregs, unsigned int start_idx);
@@@ -1514,6 -1643,9 +1643,9 @@@ void t4_get_trace_filter(struct adapte
  			 int filter_index, int *enabled);
  int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
  			 u32 addr, u32 val);
+ int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
+ 		    int rateunit, int ratemode, int channel, int class,
+ 		    int minrate, int maxrate, int weight, int pktsize);
  void t4_sge_decode_idma_state(struct adapter *adapter, int state);
  void t4_free_mem(void *addr);
  void t4_idma_monitor_init(struct adapter *adapter,
@@@ -1521,4 -1653,11 +1653,11 @@@
  void t4_idma_monitor(struct adapter *adapter,
  		     struct sge_idma_monitor_state *idma,
  		     int hz, int ticks);
+ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+ 		      unsigned int naddr, u8 *addr);
+ void t4_uld_mem_free(struct adapter *adap);
+ int t4_uld_mem_alloc(struct adapter *adap);
+ void t4_uld_clean_up(struct adapter *adap);
+ void t4_register_netevent_notifier(void);
+ void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
  #endif /* __CXGB4_H__ */
diff --combined drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 3ceafb55,1be4d23..eaa7fa9
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the Chelsio T4 Ethernet driver for Linux.
   *
-  * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+  * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -67,6 -67,7 +67,7 @@@
  #include <linux/crash_dump.h>
  
  #include "cxgb4.h"
+ #include "cxgb4_filter.h"
  #include "t4_regs.h"
  #include "t4_values.h"
  #include "t4_msg.h"
@@@ -76,6 -77,8 +77,8 @@@
  #include "cxgb4_debugfs.h"
  #include "clip_tbl.h"
  #include "l2t.h"
+ #include "sched.h"
+ #include "cxgb4_tc_u32.h"
  
  char cxgb4_driver_name[] = KBUILD_MODNAME;
  
@@@ -86,30 -89,6 +89,6 @@@
  const char cxgb4_driver_version[] = DRV_VERSION;
  #define DRV_DESC "Chelsio T4/T5/T6 Network Driver"
  
- /* Host shadow copy of ingress filter entry.  This is in host native format
-  * and doesn't match the ordering or bit order, etc. of the hardware of the
-  * firmware command.  The use of bit-field structure elements is purely to
-  * remind ourselves of the field size limitations and save memory in the case
-  * where the filter table is large.
-  */
- struct filter_entry {
- 	/* Administrative fields for filter.
- 	 */
- 	u32 valid:1;            /* filter allocated and valid */
- 	u32 locked:1;           /* filter is administratively locked */
- 
- 	u32 pending:1;          /* filter action is pending firmware reply */
- 	u32 smtidx:8;           /* Source MAC Table index for smac */
- 	struct l2t_entry *l2t;  /* Layer Two Table entry for dmac */
- 
- 	/* The filter itself.  Most of this is a straight copy of information
- 	 * provided by the extended ioctl().  Some fields are translated to
- 	 * internal forms -- for instance the Ingress Queue ID passed in from
- 	 * the ioctl() is translated into the Absolute Ingress Queue ID.
- 	 */
- 	struct ch_filter_specification fs;
- };
- 
  #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \
  			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
  			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
@@@ -223,13 -202,8 +202,8 @@@ MODULE_PARM_DESC(select_queue
  
  static struct dentry *cxgb4_debugfs_root;
  
- static LIST_HEAD(adapter_list);
- static DEFINE_MUTEX(uld_mutex);
- /* Adapter list to be accessed from atomic context */
- static LIST_HEAD(adap_rcu_list);
- static DEFINE_SPINLOCK(adap_rcu_lock);
- static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX];
- static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" };
+ LIST_HEAD(adapter_list);
+ DEFINE_MUTEX(uld_mutex);
  
  static void link_report(struct net_device *dev)
  {
@@@ -531,66 -505,6 +505,6 @@@ static void dcb_rpl(struct adapter *ada
  }
  #endif /* CONFIG_CHELSIO_T4_DCB */
  
- /* Clear a filter and release any of its resources that we own.  This also
-  * clears the filter's "pending" status.
-  */
- static void clear_filter(struct adapter *adap, struct filter_entry *f)
- {
- 	/* If the new or old filter have loopback rewriteing rules then we'll
- 	 * need to free any existing Layer Two Table (L2T) entries of the old
- 	 * filter rule.  The firmware will handle freeing up any Source MAC
- 	 * Table (SMT) entries used for rewriting Source MAC Addresses in
- 	 * loopback rules.
- 	 */
- 	if (f->l2t)
- 		cxgb4_l2t_release(f->l2t);
- 
- 	/* The zeroing of the filter rule below clears the filter valid,
- 	 * pending, locked flags, l2t pointer, etc. so it's all we need for
- 	 * this operation.
- 	 */
- 	memset(f, 0, sizeof(*f));
- }
- 
- /* Handle a filter write/deletion reply.
-  */
- static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
- {
- 	unsigned int idx = GET_TID(rpl);
- 	unsigned int nidx = idx - adap->tids.ftid_base;
- 	unsigned int ret;
- 	struct filter_entry *f;
- 
- 	if (idx >= adap->tids.ftid_base && nidx <
- 	   (adap->tids.nftids + adap->tids.nsftids)) {
- 		idx = nidx;
- 		ret = TCB_COOKIE_G(rpl->cookie);
- 		f = &adap->tids.ftid_tab[idx];
- 
- 		if (ret == FW_FILTER_WR_FLT_DELETED) {
- 			/* Clear the filter when we get confirmation from the
- 			 * hardware that the filter has been deleted.
- 			 */
- 			clear_filter(adap, f);
- 		} else if (ret == FW_FILTER_WR_SMT_TBL_FULL) {
- 			dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n",
- 				idx);
- 			clear_filter(adap, f);
- 		} else if (ret == FW_FILTER_WR_FLT_ADDED) {
- 			f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff;
- 			f->pending = 0;  /* asynchronous setup completed */
- 			f->valid = 1;
- 		} else {
- 			/* Something went wrong.  Issue a warning about the
- 			 * problem and clear everything out.
- 			 */
- 			dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n",
- 				idx, ret);
- 			clear_filter(adap, f);
- 		}
- 	}
- }
- 
  /* Response queue handler for the FW event queue.
   */
  static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
@@@ -677,56 -591,6 +591,6 @@@ out
  	return 0;
  }
  
- /* Flush the aggregated lro sessions */
- static void uldrx_flush_handler(struct sge_rspq *q)
- {
- 	if (ulds[q->uld].lro_flush)
- 		ulds[q->uld].lro_flush(&q->lro_mgr);
- }
- 
- /**
-  *	uldrx_handler - response queue handler for ULD queues
-  *	@q: the response queue that received the packet
-  *	@rsp: the response queue descriptor holding the offload message
-  *	@gl: the gather list of packet fragments
-  *
-  *	Deliver an ingress offload packet to a ULD.  All processing is done by
-  *	the ULD, we just maintain statistics.
-  */
- static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp,
- 			 const struct pkt_gl *gl)
- {
- 	struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq);
- 	int ret;
- 
- 	/* FW can send CPLs encapsulated in a CPL_FW4_MSG.
- 	 */
- 	if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG &&
- 	    ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL)
- 		rsp += 2;
- 
- 	if (q->flush_handler)
- 		ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld],
- 						  rsp, gl, &q->lro_mgr,
- 						  &q->napi);
- 	else
- 		ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld],
- 					      rsp, gl);
- 
- 	if (ret) {
- 		rxq->stats.nomem++;
- 		return -1;
- 	}
- 
- 	if (gl == NULL)
- 		rxq->stats.imm++;
- 	else if (gl == CXGB4_MSG_AN)
- 		rxq->stats.an++;
- 	else
- 		rxq->stats.pkts++;
- 	return 0;
- }
- 
  static void disable_msi(struct adapter *adapter)
  {
  	if (adapter->flags & USING_MSIX) {
@@@ -778,30 -642,12 +642,12 @@@ static void name_msix_vecs(struct adapt
  			snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d",
  				 d->name, i);
  	}
- 
- 	/* offload queues */
- 	for_each_iscsirxq(&adap->sge, i)
- 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d",
- 			 adap->port[0]->name, i);
- 
- 	for_each_iscsitrxq(&adap->sge, i)
- 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d",
- 			 adap->port[0]->name, i);
- 
- 	for_each_rdmarxq(&adap->sge, i)
- 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d",
- 			 adap->port[0]->name, i);
- 
- 	for_each_rdmaciq(&adap->sge, i)
- 		snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d",
- 			 adap->port[0]->name, i);
  }
  
  static int request_msix_queue_irqs(struct adapter *adap)
  {
  	struct sge *s = &adap->sge;
- 	int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0;
- 	int iscsitqidx = 0;
+ 	int err, ethqidx;
  	int msi_index = 2;
  
  	err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0,
@@@ -818,57 -664,9 +664,9 @@@
  			goto unwind;
  		msi_index++;
  	}
- 	for_each_iscsirxq(s, iscsiqidx) {
- 		err = request_irq(adap->msix_info[msi_index].vec,
- 				  t4_sge_intr_msix, 0,
- 				  adap->msix_info[msi_index].desc,
- 				  &s->iscsirxq[iscsiqidx].rspq);
- 		if (err)
- 			goto unwind;
- 		msi_index++;
- 	}
- 	for_each_iscsitrxq(s, iscsitqidx) {
- 		err = request_irq(adap->msix_info[msi_index].vec,
- 				  t4_sge_intr_msix, 0,
- 				  adap->msix_info[msi_index].desc,
- 				  &s->iscsitrxq[iscsitqidx].rspq);
- 		if (err)
- 			goto unwind;
- 		msi_index++;
- 	}
- 	for_each_rdmarxq(s, rdmaqidx) {
- 		err = request_irq(adap->msix_info[msi_index].vec,
- 				  t4_sge_intr_msix, 0,
- 				  adap->msix_info[msi_index].desc,
- 				  &s->rdmarxq[rdmaqidx].rspq);
- 		if (err)
- 			goto unwind;
- 		msi_index++;
- 	}
- 	for_each_rdmaciq(s, rdmaciqqidx) {
- 		err = request_irq(adap->msix_info[msi_index].vec,
- 				  t4_sge_intr_msix, 0,
- 				  adap->msix_info[msi_index].desc,
- 				  &s->rdmaciq[rdmaciqqidx].rspq);
- 		if (err)
- 			goto unwind;
- 		msi_index++;
- 	}
  	return 0;
  
  unwind:
- 	while (--rdmaciqqidx >= 0)
- 		free_irq(adap->msix_info[--msi_index].vec,
- 			 &s->rdmaciq[rdmaciqqidx].rspq);
- 	while (--rdmaqidx >= 0)
- 		free_irq(adap->msix_info[--msi_index].vec,
- 			 &s->rdmarxq[rdmaqidx].rspq);
- 	while (--iscsitqidx >= 0)
- 		free_irq(adap->msix_info[--msi_index].vec,
- 			 &s->iscsitrxq[iscsitqidx].rspq);
- 	while (--iscsiqidx >= 0)
- 		free_irq(adap->msix_info[--msi_index].vec,
- 			 &s->iscsirxq[iscsiqidx].rspq);
  	while (--ethqidx >= 0)
  		free_irq(adap->msix_info[--msi_index].vec,
  			 &s->ethrxq[ethqidx].rspq);
@@@ -884,16 -682,6 +682,6 @@@ static void free_msix_queue_irqs(struc
  	free_irq(adap->msix_info[1].vec, &s->fw_evtq);
  	for_each_ethrxq(s, i)
  		free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq);
- 	for_each_iscsirxq(s, i)
- 		free_irq(adap->msix_info[msi_index++].vec,
- 			 &s->iscsirxq[i].rspq);
- 	for_each_iscsitrxq(s, i)
- 		free_irq(adap->msix_info[msi_index++].vec,
- 			 &s->iscsitrxq[i].rspq);
- 	for_each_rdmarxq(s, i)
- 		free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq);
- 	for_each_rdmaciq(s, i)
- 		free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq);
  }
  
  /**
@@@ -1032,28 -820,30 +820,30 @@@ static void enable_rx(struct adapter *a
  	}
  }
  
- static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
- 			   unsigned int nq, unsigned int per_chan, int msi_idx,
- 			   u16 *ids, bool lro)
+ 
+ static int setup_fw_sge_queues(struct adapter *adap)
  {
- 	int i, err;
+ 	struct sge *s = &adap->sge;
+ 	int err = 0;
+ 
+ 	bitmap_zero(s->starving_fl, s->egr_sz);
+ 	bitmap_zero(s->txq_maperr, s->egr_sz);
  
- 	for (i = 0; i < nq; i++, q++) {
- 		if (msi_idx > 0)
- 			msi_idx++;
- 		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
- 				       adap->port[i / per_chan],
- 				       msi_idx, q->fl.size ? &q->fl : NULL,
- 				       uldrx_handler,
- 				       lro ? uldrx_flush_handler : NULL,
- 				       0);
+ 	if (adap->flags & USING_MSIX)
+ 		adap->msi_idx = 1;         /* vector 0 is for non-queue interrupts */
+ 	else {
+ 		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
+ 				       NULL, NULL, NULL, -1);
  		if (err)
  			return err;
- 		memset(&q->stats, 0, sizeof(q->stats));
- 		if (ids)
- 			ids[i] = q->rspq.abs_id;
+ 		adap->msi_idx = -((int)s->intrq.abs_id + 1);
  	}
- 	return 0;
+ 
+ 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
+ 			       adap->msi_idx, NULL, fwevtq_handler, NULL, -1);
+ 	if (err)
+ 		t4_free_sge_resources(adap);
+ 	return err;
  }
  
  /**
@@@ -1066,41 -856,10 +856,10 @@@
   */
  static int setup_sge_queues(struct adapter *adap)
  {
- 	int err, msi_idx, i, j;
+ 	int err, i, j;
  	struct sge *s = &adap->sge;
- 
- 	bitmap_zero(s->starving_fl, s->egr_sz);
- 	bitmap_zero(s->txq_maperr, s->egr_sz);
- 
- 	if (adap->flags & USING_MSIX)
- 		msi_idx = 1;         /* vector 0 is for non-queue interrupts */
- 	else {
- 		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
- 				       NULL, NULL, NULL, -1);
- 		if (err)
- 			return err;
- 		msi_idx = -((int)s->intrq.abs_id + 1);
- 	}
- 
- 	/* NOTE: If you add/delete any Ingress/Egress Queue allocations in here,
- 	 * don't forget to update the following which need to be
- 	 * synchronized to and changes here.
- 	 *
- 	 * 1. The calculations of MAX_INGQ in cxgb4.h.
- 	 *
- 	 * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs
- 	 *    to accommodate any new/deleted Ingress Queues
- 	 *    which need MSI-X Vectors.
- 	 *
- 	 * 3. Update sge_qinfo_show() to include information on the
- 	 *    new/deleted queues.
- 	 */
- 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
- 			       msi_idx, NULL, fwevtq_handler, NULL, -1);
- 	if (err) {
- freeout:	t4_free_sge_resources(adap);
- 		return err;
- 	}
+ 	struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA];
+ 	unsigned int cmplqid = 0;
  
  	for_each_port(adap, i) {
  		struct net_device *dev = adap->port[i];
@@@ -1109,10 -868,10 +868,10 @@@
  		struct sge_eth_txq *t = &s->ethtxq[pi->first_qset];
  
  		for (j = 0; j < pi->nqsets; j++, q++) {
- 			if (msi_idx > 0)
- 				msi_idx++;
+ 			if (adap->msi_idx > 0)
+ 				adap->msi_idx++;
  			err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
- 					       msi_idx, &q->fl,
+ 					       adap->msi_idx, &q->fl,
  					       t4_ethrx_handler,
  					       NULL,
  					       t4_get_mps_bg_map(adap,
@@@ -1131,8 -890,8 +890,8 @@@
  		}
  	}
  
- 	j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */
- 	for_each_iscsirxq(s, i) {
+ 	j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */
+ 	for_each_ofldtxq(s, i) {
  		err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i],
  					    adap->port[i / j],
  					    s->fw_evtq.cntxt_id);
@@@ -1140,30 -899,15 +899,15 @@@
  			goto freeout;
  	}
  
- #define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \
- 	err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \
- 	if (err) \
- 		goto freeout; \
- 	if (msi_idx > 0) \
- 		msi_idx += nq; \
- } while (0)
- 
- 	ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false);
- 	ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true);
- 	ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false);
- 	j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */
- 	ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false);
- 
- #undef ALLOC_OFLD_RXQS
- 
  	for_each_port(adap, i) {
- 		/*
- 		 * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't
+ 		/* Note that cmplqid below is 0 if we don't
  		 * have RDMA queues, and that's the right value.
  		 */
+ 		if (rxq_info)
+ 			cmplqid	= rxq_info->uldrxq[i].rspq.cntxt_id;
+ 
  		err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i],
- 					    s->fw_evtq.cntxt_id,
- 					    s->rdmarxq[i].rspq.cntxt_id);
+ 					    s->fw_evtq.cntxt_id, cmplqid);
  		if (err)
  			goto freeout;
  	}
@@@ -1174,6 -918,9 +918,9 @@@
  		     RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) |
  		     QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id));
  	return 0;
+ freeout:
+ 	t4_free_sge_resources(adap);
+ 	return err;
  }
  
  /*
@@@ -1197,151 -944,6 +944,6 @@@ void t4_free_mem(void *addr
  	kvfree(addr);
  }
  
- /* Send a Work Request to write the filter at a specified index.  We construct
-  * a Firmware Filter Work Request to have the work done and put the indicated
-  * filter into "pending" mode which will prevent any further actions against
-  * it till we get a reply from the firmware on the completion status of the
-  * request.
-  */
- static int set_filter_wr(struct adapter *adapter, int fidx)
- {
- 	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
- 	struct sk_buff *skb;
- 	struct fw_filter_wr *fwr;
- 	unsigned int ftid;
- 
- 	skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
- 	if (!skb)
- 		return -ENOMEM;
- 
- 	/* If the new filter requires loopback Destination MAC and/or VLAN
- 	 * rewriting then we need to allocate a Layer 2 Table (L2T) entry for
- 	 * the filter.
- 	 */
- 	if (f->fs.newdmac || f->fs.newvlan) {
- 		/* allocate L2T entry for new filter */
- 		f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan,
- 						f->fs.eport, f->fs.dmac);
- 		if (f->l2t == NULL) {
- 			kfree_skb(skb);
- 			return -ENOMEM;
- 		}
- 	}
- 
- 	ftid = adapter->tids.ftid_base + fidx;
- 
- 	fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr));
- 	memset(fwr, 0, sizeof(*fwr));
- 
- 	/* It would be nice to put most of the following in t4_hw.c but most
- 	 * of the work is translating the cxgbtool ch_filter_specification
- 	 * into the Work Request and the definition of that structure is
- 	 * currently in cxgbtool.h which isn't appropriate to pull into the
- 	 * common code.  We may eventually try to come up with a more neutral
- 	 * filter specification structure but for now it's easiest to simply
- 	 * put this fairly direct code in line ...
- 	 */
- 	fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR));
- 	fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16));
- 	fwr->tid_to_iq =
- 		htonl(FW_FILTER_WR_TID_V(ftid) |
- 		      FW_FILTER_WR_RQTYPE_V(f->fs.type) |
- 		      FW_FILTER_WR_NOREPLY_V(0) |
- 		      FW_FILTER_WR_IQ_V(f->fs.iq));
- 	fwr->del_filter_to_l2tix =
- 		htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) |
- 		      FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) |
- 		      FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) |
- 		      FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) |
- 		      FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) |
- 		      FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) |
- 		      FW_FILTER_WR_DMAC_V(f->fs.newdmac) |
- 		      FW_FILTER_WR_SMAC_V(f->fs.newsmac) |
- 		      FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT ||
- 					     f->fs.newvlan == VLAN_REWRITE) |
- 		      FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE ||
- 					    f->fs.newvlan == VLAN_REWRITE) |
- 		      FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) |
- 		      FW_FILTER_WR_TXCHAN_V(f->fs.eport) |
- 		      FW_FILTER_WR_PRIO_V(f->fs.prio) |
- 		      FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0));
- 	fwr->ethtype = htons(f->fs.val.ethtype);
- 	fwr->ethtypem = htons(f->fs.mask.ethtype);
- 	fwr->frag_to_ovlan_vldm =
- 		(FW_FILTER_WR_FRAG_V(f->fs.val.frag) |
- 		 FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) |
- 		 FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) |
- 		 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
- 		 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
- 		 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
- 	fwr->smac_sel = 0;
- 	fwr->rx_chan_rx_rpl_iq =
- 		htons(FW_FILTER_WR_RX_CHAN_V(0) |
- 		      FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
- 	fwr->maci_to_matchtypem =
- 		htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) |
- 		      FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) |
- 		      FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) |
- 		      FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) |
- 		      FW_FILTER_WR_PORT_V(f->fs.val.iport) |
- 		      FW_FILTER_WR_PORTM_V(f->fs.mask.iport) |
- 		      FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) |
- 		      FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype));
- 	fwr->ptcl = f->fs.val.proto;
- 	fwr->ptclm = f->fs.mask.proto;
- 	fwr->ttyp = f->fs.val.tos;
- 	fwr->ttypm = f->fs.mask.tos;
- 	fwr->ivlan = htons(f->fs.val.ivlan);
- 	fwr->ivlanm = htons(f->fs.mask.ivlan);
- 	fwr->ovlan = htons(f->fs.val.ovlan);
- 	fwr->ovlanm = htons(f->fs.mask.ovlan);
- 	memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip));
- 	memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm));
- 	memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip));
- 	memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm));
- 	fwr->lp = htons(f->fs.val.lport);
- 	fwr->lpm = htons(f->fs.mask.lport);
- 	fwr->fp = htons(f->fs.val.fport);
- 	fwr->fpm = htons(f->fs.mask.fport);
- 	if (f->fs.newsmac)
- 		memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma));
- 
- 	/* Mark the filter as "pending" and ship off the Filter Work Request.
- 	 * When we get the Work Request Reply we'll clear the pending status.
- 	 */
- 	f->pending = 1;
- 	set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3);
- 	t4_ofld_send(adapter, skb);
- 	return 0;
- }
- 
- /* Delete the filter at a specified index.
-  */
- static int del_filter_wr(struct adapter *adapter, int fidx)
- {
- 	struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
- 	struct sk_buff *skb;
- 	struct fw_filter_wr *fwr;
- 	unsigned int len, ftid;
- 
- 	len = sizeof(*fwr);
- 	ftid = adapter->tids.ftid_base + fidx;
- 
- 	skb = alloc_skb(len, GFP_KERNEL);
- 	if (!skb)
- 		return -ENOMEM;
- 
- 	fwr = (struct fw_filter_wr *)__skb_put(skb, len);
- 	t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id);
- 
- 	/* Mark the filter as "pending" and ship off the Filter Work Request.
- 	 * When we get the Work Request Reply we'll clear the pending status.
- 	 */
- 	f->pending = 1;
- 	t4_mgmt_tx(adapter, skb);
- 	return 0;
- }
- 
  static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
  			     void *accel_priv, select_queue_fallback_t fallback)
  {
@@@ -1723,19 -1325,22 +1325,22 @@@ EXPORT_SYMBOL(cxgb4_remove_tid)
   */
  static int tid_init(struct tid_info *t)
  {
- 	size_t size;
- 	unsigned int stid_bmap_size;
- 	unsigned int natids = t->natids;
  	struct adapter *adap = container_of(t, struct adapter, tids);
+ 	unsigned int max_ftids = t->nftids + t->nsftids;
+ 	unsigned int natids = t->natids;
+ 	unsigned int stid_bmap_size;
+ 	unsigned int ftid_bmap_size;
+ 	size_t size;
  
  	stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
+ 	ftid_bmap_size = BITS_TO_LONGS(t->nftids);
  	size = t->ntids * sizeof(*t->tid_tab) +
  	       natids * sizeof(*t->atid_tab) +
  	       t->nstids * sizeof(*t->stid_tab) +
  	       t->nsftids * sizeof(*t->stid_tab) +
  	       stid_bmap_size * sizeof(long) +
- 	       t->nftids * sizeof(*t->ftid_tab) +
- 	       t->nsftids * sizeof(*t->ftid_tab);
+ 	       max_ftids * sizeof(*t->ftid_tab) +
+ 	       ftid_bmap_size * sizeof(long);
  
  	t->tid_tab = t4_alloc_mem(size);
  	if (!t->tid_tab)
@@@ -1745,8 -1350,10 +1350,10 @@@
  	t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
  	t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
  	t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+ 	t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
  	spin_lock_init(&t->stid_lock);
  	spin_lock_init(&t->atid_lock);
+ 	spin_lock_init(&t->ftid_lock);
  
  	t->stids_in_use = 0;
  	t->sftids_in_use = 0;
@@@ -1761,12 -1368,16 +1368,16 @@@
  			t->atid_tab[natids - 1].next = &t->atid_tab[natids];
  		t->afree = t->atid_tab;
  	}
- 	bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
- 	/* Reserve stid 0 for T4/T5 adapters */
- 	if (!t->stid_base &&
- 	    (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5))
- 		__set_bit(0, t->stid_bmap);
  
+ 	if (is_offload(adap)) {
+ 		bitmap_zero(t->stid_bmap, t->nstids + t->nsftids);
+ 		/* Reserve stid 0 for T4/T5 adapters */
+ 		if (!t->stid_base &&
+ 		    CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)
+ 			__set_bit(0, t->stid_bmap);
+ 	}
+ 
+ 	bitmap_zero(t->ftid_bmap, t->nftids);
  	return 0;
  }
  
@@@ -2316,7 -1927,7 +1927,7 @@@ static void disable_dbs(struct adapter 
  
  	for_each_ethrxq(&adap->sge, i)
  		disable_txq_db(&adap->sge.ethtxq[i].q);
- 	for_each_iscsirxq(&adap->sge, i)
+ 	for_each_ofldtxq(&adap->sge, i)
  		disable_txq_db(&adap->sge.ofldtxq[i].q);
  	for_each_port(adap, i)
  		disable_txq_db(&adap->sge.ctrlq[i].q);
@@@ -2328,7 -1939,7 +1939,7 @@@ static void enable_dbs(struct adapter *
  
  	for_each_ethrxq(&adap->sge, i)
  		enable_txq_db(adap, &adap->sge.ethtxq[i].q);
- 	for_each_iscsirxq(&adap->sge, i)
+ 	for_each_ofldtxq(&adap->sge, i)
  		enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
  	for_each_port(adap, i)
  		enable_txq_db(adap, &adap->sge.ctrlq[i].q);
@@@ -2336,9 -1947,10 +1947,10 @@@
  
  static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd)
  {
- 	if (adap->uld_handle[CXGB4_ULD_RDMA])
- 		ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA],
- 				cmd);
+ 	enum cxgb4_uld type = CXGB4_ULD_RDMA;
+ 
+ 	if (adap->uld && adap->uld[type].handle)
+ 		adap->uld[type].control(adap->uld[type].handle, cmd);
  }
  
  static void process_db_full(struct work_struct *work)
@@@ -2392,13 -2004,14 +2004,14 @@@ out
  	if (ret)
  		CH_WARN(adap, "DB drop recovery failed.\n");
  }
+ 
  static void recover_all_queues(struct adapter *adap)
  {
  	int i;
  
  	for_each_ethrxq(&adap->sge, i)
  		sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
- 	for_each_iscsirxq(&adap->sge, i)
+ 	for_each_ofldtxq(&adap->sge, i)
  		sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
  	for_each_port(adap, i)
  		sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
@@@ -2463,94 -2076,12 +2076,12 @@@ void t4_db_dropped(struct adapter *adap
  	queue_work(adap->workq, &adap->db_drop_task);
  }
  
- static void uld_attach(struct adapter *adap, unsigned int uld)
- {
- 	void *handle;
- 	struct cxgb4_lld_info lli;
- 	unsigned short i;
- 
- 	lli.pdev = adap->pdev;
- 	lli.pf = adap->pf;
- 	lli.l2t = adap->l2t;
- 	lli.tids = &adap->tids;
- 	lli.ports = adap->port;
- 	lli.vr = &adap->vres;
- 	lli.mtus = adap->params.mtus;
- 	if (uld == CXGB4_ULD_RDMA) {
- 		lli.rxq_ids = adap->sge.rdma_rxq;
- 		lli.ciq_ids = adap->sge.rdma_ciq;
- 		lli.nrxq = adap->sge.rdmaqs;
- 		lli.nciq = adap->sge.rdmaciqs;
- 	} else if (uld == CXGB4_ULD_ISCSI) {
- 		lli.rxq_ids = adap->sge.iscsi_rxq;
- 		lli.nrxq = adap->sge.iscsiqsets;
- 	} else if (uld == CXGB4_ULD_ISCSIT) {
- 		lli.rxq_ids = adap->sge.iscsit_rxq;
- 		lli.nrxq = adap->sge.niscsitq;
- 	}
- 	lli.ntxq = adap->sge.iscsiqsets;
- 	lli.nchan = adap->params.nports;
- 	lli.nports = adap->params.nports;
- 	lli.wr_cred = adap->params.ofldq_wr_cred;
- 	lli.adapter_type = adap->params.chip;
- 	lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
- 	lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
- 	lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
- 	lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A);
- 	lli.iscsi_ppm = &adap->iscsi_ppm;
- 	lli.cclk_ps = 1000000000 / adap->params.vpd.cclk;
- 	lli.udb_density = 1 << adap->params.sge.eq_qpp;
- 	lli.ucq_density = 1 << adap->params.sge.iq_qpp;
- 	lli.filt_mode = adap->params.tp.vlan_pri_map;
- 	/* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */
- 	for (i = 0; i < NCHAN; i++)
- 		lli.tx_modq[i] = i;
- 	lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A);
- 	lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A);
- 	lli.fw_vers = adap->params.fw_vers;
- 	lli.dbfifo_int_thresh = dbfifo_int_thresh;
- 	lli.sge_ingpadboundary = adap->sge.fl_align;
- 	lli.sge_egrstatuspagesize = adap->sge.stat_len;
- 	lli.sge_pktshift = adap->sge.pktshift;
- 	lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN;
- 	lli.max_ordird_qp = adap->params.max_ordird_qp;
- 	lli.max_ird_adapter = adap->params.max_ird_adapter;
- 	lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
- 	lli.nodeid = dev_to_node(adap->pdev_dev);
- 
- 	handle = ulds[uld].add(&lli);
- 	if (IS_ERR(handle)) {
- 		dev_warn(adap->pdev_dev,
- 			 "could not attach to the %s driver, error %ld\n",
- 			 uld_str[uld], PTR_ERR(handle));
- 		return;
- 	}
- 
- 	adap->uld_handle[uld] = handle;
- 
+ void t4_register_netevent_notifier(void)
+ {
  	if (!netevent_registered) {
  		register_netevent_notifier(&cxgb4_netevent_nb);
  		netevent_registered = true;
  	}
- 
- 	if (adap->flags & FULL_INIT_DONE)
- 		ulds[uld].state_change(handle, CXGB4_STATE_UP);
- }
- 
- static void attach_ulds(struct adapter *adap)
- {
- 	unsigned int i;
- 
- 	spin_lock(&adap_rcu_lock);
- 	list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list);
- 	spin_unlock(&adap_rcu_lock);
- 
- 	mutex_lock(&uld_mutex);
- 	list_add_tail(&adap->list_node, &adapter_list);
- 	for (i = 0; i < CXGB4_ULD_MAX; i++)
- 		if (ulds[i].add)
- 			uld_attach(adap, i);
- 	mutex_unlock(&uld_mutex);
  }
  
  static void detach_ulds(struct adapter *adap)
@@@ -2560,20 -2091,16 +2091,16 @@@
  	mutex_lock(&uld_mutex);
  	list_del(&adap->list_node);
  	for (i = 0; i < CXGB4_ULD_MAX; i++)
- 		if (adap->uld_handle[i]) {
- 			ulds[i].state_change(adap->uld_handle[i],
+ 		if (adap->uld && adap->uld[i].handle) {
+ 			adap->uld[i].state_change(adap->uld[i].handle,
  					     CXGB4_STATE_DETACH);
- 			adap->uld_handle[i] = NULL;
+ 			adap->uld[i].handle = NULL;
  		}
  	if (netevent_registered && list_empty(&adapter_list)) {
  		unregister_netevent_notifier(&cxgb4_netevent_nb);
  		netevent_registered = false;
  	}
  	mutex_unlock(&uld_mutex);
- 
- 	spin_lock(&adap_rcu_lock);
- 	list_del_rcu(&adap->rcu_node);
- 	spin_unlock(&adap_rcu_lock);
  }
  
  static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state)
@@@ -2582,61 -2109,12 +2109,12 @@@
  
  	mutex_lock(&uld_mutex);
  	for (i = 0; i < CXGB4_ULD_MAX; i++)
- 		if (adap->uld_handle[i])
- 			ulds[i].state_change(adap->uld_handle[i], new_state);
+ 		if (adap->uld && adap->uld[i].handle)
+ 			adap->uld[i].state_change(adap->uld[i].handle,
+ 						  new_state);
  	mutex_unlock(&uld_mutex);
  }
  
- /**
-  *	cxgb4_register_uld - register an upper-layer driver
-  *	@type: the ULD type
-  *	@p: the ULD methods
-  *
-  *	Registers an upper-layer driver with this driver and notifies the ULD
-  *	about any presently available devices that support its type.  Returns
-  *	%-EBUSY if a ULD of the same type is already registered.
-  */
- int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p)
- {
- 	int ret = 0;
- 	struct adapter *adap;
- 
- 	if (type >= CXGB4_ULD_MAX)
- 		return -EINVAL;
- 	mutex_lock(&uld_mutex);
- 	if (ulds[type].add) {
- 		ret = -EBUSY;
- 		goto out;
- 	}
- 	ulds[type] = *p;
- 	list_for_each_entry(adap, &adapter_list, list_node)
- 		uld_attach(adap, type);
- out:	mutex_unlock(&uld_mutex);
- 	return ret;
- }
- EXPORT_SYMBOL(cxgb4_register_uld);
- 
- /**
-  *	cxgb4_unregister_uld - unregister an upper-layer driver
-  *	@type: the ULD type
-  *
-  *	Unregisters an existing upper-layer driver.
-  */
- int cxgb4_unregister_uld(enum cxgb4_uld type)
- {
- 	struct adapter *adap;
- 
- 	if (type >= CXGB4_ULD_MAX)
- 		return -EINVAL;
- 	mutex_lock(&uld_mutex);
- 	list_for_each_entry(adap, &adapter_list, list_node)
- 		adap->uld_handle[type] = NULL;
- 	ulds[type].add = NULL;
- 	mutex_unlock(&uld_mutex);
- 	return 0;
- }
- EXPORT_SYMBOL(cxgb4_unregister_uld);
- 
  #if IS_ENABLED(CONFIG_IPV6)
  static int cxgb4_inet6addr_handler(struct notifier_block *this,
  				   unsigned long event, void *data)
@@@ -2741,7 -2219,6 +2219,6 @@@ static int cxgb_up(struct adapter *adap
  				  adap->msix_info[0].desc, adap);
  		if (err)
  			goto irq_err;
- 
  		err = request_msix_queue_irqs(adap);
  		if (err) {
  			free_irq(adap->msix_info[0].vec, adap);
@@@ -2819,40 -2296,6 +2296,6 @@@ static int cxgb_close(struct net_devic
  	return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false);
  }
  
- /* Return an error number if the indicated filter isn't writable ...
-  */
- static int writable_filter(struct filter_entry *f)
- {
- 	if (f->locked)
- 		return -EPERM;
- 	if (f->pending)
- 		return -EBUSY;
- 
- 	return 0;
- }
- 
- /* Delete the filter at the specified index (if valid).  The checks for all
-  * the common problems with doing this like the filter being locked, currently
-  * pending in another operation, etc.
-  */
- static int delete_filter(struct adapter *adapter, unsigned int fidx)
- {
- 	struct filter_entry *f;
- 	int ret;
- 
- 	if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
- 		return -EINVAL;
- 
- 	f = &adapter->tids.ftid_tab[fidx];
- 	ret = writable_filter(f);
- 	if (ret)
- 		return ret;
- 	if (f->valid)
- 		return del_filter_wr(adapter, fidx);
- 
- 	return 0;
- }
- 
  int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid,
  		__be32 sip, __be16 sport, __be16 vlan,
  		unsigned int queue, unsigned char port, unsigned char mask)
@@@ -2922,7 -2365,6 +2365,6 @@@ EXPORT_SYMBOL(cxgb4_create_server_filte
  int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid,
  		unsigned int queue, bool ipv6)
  {
- 	int ret;
  	struct filter_entry *f;
  	struct adapter *adap;
  
@@@ -2936,11 -2378,7 +2378,7 @@@
  	/* Unlock the filter */
  	f->locked = 0;
  
- 	ret = delete_filter(adap, stid);
- 	if (ret)
- 		return ret;
- 
- 	return 0;
+ 	return delete_filter(adap, stid);
  }
  EXPORT_SYMBOL(cxgb4_remove_server_filter);
  
@@@ -3078,6 -2516,85 +2516,85 @@@ static int cxgb_change_mtu(struct net_d
  	return ret;
  }
  
+ #ifdef CONFIG_PCI_IOV
+ static int dummy_open(struct net_device *dev)
+ {
+ 	/* Turn carrier off since we don't have to transmit anything on this
+ 	 * interface.
+ 	 */
+ 	netif_carrier_off(dev);
+ 	return 0;
+ }
+ 
+ /* Fill MAC address that will be assigned by the FW */
+ static void fill_vf_station_mac_addr(struct adapter *adap)
+ {
+ 	unsigned int i;
+ 	u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN];
+ 	int err;
+ 	u8 *na;
+ 	u16 a, b;
+ 
+ 	err = t4_get_raw_vpd_params(adap, &adap->params.vpd);
+ 	if (!err) {
+ 		na = adap->params.vpd.na;
+ 		for (i = 0; i < ETH_ALEN; i++)
+ 			hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 +
+ 				      hex2val(na[2 * i + 1]));
+ 		a = (hw_addr[0] << 8) | hw_addr[1];
+ 		b = (hw_addr[1] << 8) | hw_addr[2];
+ 		a ^= b;
+ 		a |= 0x0200;    /* locally assigned Ethernet MAC address */
+ 		a &= ~0x0100;   /* not a multicast Ethernet MAC address */
+ 		macaddr[0] = a >> 8;
+ 		macaddr[1] = a & 0xff;
+ 
+ 		for (i = 2; i < 5; i++)
+ 			macaddr[i] = hw_addr[i + 1];
+ 
+ 		for (i = 0; i < adap->num_vfs; i++) {
+ 			macaddr[5] = adap->pf * 16 + i;
+ 			ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr);
+ 		}
+ 	}
+ }
+ 
+ static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
+ {
+ 	struct port_info *pi = netdev_priv(dev);
+ 	struct adapter *adap = pi->adapter;
+ 	int ret;
+ 
+ 	/* verify MAC addr is valid */
+ 	if (!is_valid_ether_addr(mac)) {
+ 		dev_err(pi->adapter->pdev_dev,
+ 			"Invalid Ethernet address %pM for VF %d\n",
+ 			mac, vf);
+ 		return -EINVAL;
+ 	}
+ 
+ 	dev_info(pi->adapter->pdev_dev,
+ 		 "Setting MAC %pM on VF %d\n", mac, vf);
+ 	ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac);
+ 	if (!ret)
+ 		ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac);
+ 	return ret;
+ }
+ 
+ static int cxgb_get_vf_config(struct net_device *dev,
+ 			      int vf, struct ifla_vf_info *ivi)
+ {
+ 	struct port_info *pi = netdev_priv(dev);
+ 	struct adapter *adap = pi->adapter;
+ 
+ 	if (vf >= adap->num_vfs)
+ 		return -EINVAL;
+ 	ivi->vf = vf;
+ 	ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr);
+ 	return 0;
+ }
+ #endif
+ 
  static int cxgb_set_mac_addr(struct net_device *dev, void *p)
  {
  	int ret;
@@@ -3114,6 -2631,116 +2631,116 @@@ static void cxgb_netpoll(struct net_dev
  }
  #endif
  
+ static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate)
+ {
+ 	struct port_info *pi = netdev_priv(dev);
+ 	struct adapter *adap = pi->adapter;
+ 	struct sched_class *e;
+ 	struct ch_sched_params p;
+ 	struct ch_sched_queue qe;
+ 	u32 req_rate;
+ 	int err = 0;
+ 
+ 	if (!can_sched(dev))
+ 		return -ENOTSUPP;
+ 
+ 	if (index < 0 || index > pi->nqsets - 1)
+ 		return -EINVAL;
+ 
+ 	if (!(adap->flags & FULL_INIT_DONE)) {
+ 		dev_err(adap->pdev_dev,
+ 			"Failed to rate limit on queue %d. Link Down?\n",
+ 			index);
+ 		return -EINVAL;
+ 	}
+ 
+ 	/* Convert from Mbps to Kbps */
+ 	req_rate = rate << 10;
+ 
+ 	/* Max rate is 10 Gbps */
+ 	if (req_rate >= SCHED_MAX_RATE_KBPS) {
+ 		dev_err(adap->pdev_dev,
+ 			"Invalid rate %u Mbps, Max rate is %u Gbps\n",
+ 			rate, SCHED_MAX_RATE_KBPS);
+ 		return -ERANGE;
+ 	}
+ 
+ 	/* First unbind the queue from any existing class */
+ 	memset(&qe, 0, sizeof(qe));
+ 	qe.queue = index;
+ 	qe.class = SCHED_CLS_NONE;
+ 
+ 	err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE);
+ 	if (err) {
+ 		dev_err(adap->pdev_dev,
+ 			"Unbinding Queue %d on port %d fail. Err: %d\n",
+ 			index, pi->port_id, err);
+ 		return err;
+ 	}
+ 
+ 	/* Queue already unbound */
+ 	if (!req_rate)
+ 		return 0;
+ 
+ 	/* Fetch any available unused or matching scheduling class */
+ 	memset(&p, 0, sizeof(p));
+ 	p.type = SCHED_CLASS_TYPE_PACKET;
+ 	p.u.params.level    = SCHED_CLASS_LEVEL_CL_RL;
+ 	p.u.params.mode     = SCHED_CLASS_MODE_CLASS;
+ 	p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS;
+ 	p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS;
+ 	p.u.params.channel  = pi->tx_chan;
+ 	p.u.params.class    = SCHED_CLS_NONE;
+ 	p.u.params.minrate  = 0;
+ 	p.u.params.maxrate  = req_rate;
+ 	p.u.params.weight   = 0;
+ 	p.u.params.pktsize  = dev->mtu;
+ 
+ 	e = cxgb4_sched_class_alloc(dev, &p);
+ 	if (!e)
+ 		return -ENOMEM;
+ 
+ 	/* Bind the queue to a scheduling class */
+ 	memset(&qe, 0, sizeof(qe));
+ 	qe.queue = index;
+ 	qe.class = e->idx;
+ 
+ 	err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE);
+ 	if (err)
+ 		dev_err(adap->pdev_dev,
+ 			"Queue rate limiting failed. Err: %d\n", err);
+ 	return err;
+ }
+ 
+ int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ 		  struct tc_to_netdev *tc)
+ {
+ 	struct port_info *pi = netdev2pinfo(dev);
+ 	struct adapter *adap = netdev2adap(dev);
+ 
+ 	if (!(adap->flags & FULL_INIT_DONE)) {
+ 		dev_err(adap->pdev_dev,
+ 			"Failed to setup tc on port %d. Link Down?\n",
+ 			pi->port_id);
+ 		return -EINVAL;
+ 	}
+ 
+ 	if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) &&
+ 	    tc->type == TC_SETUP_CLSU32) {
+ 		switch (tc->cls_u32->command) {
+ 		case TC_CLSU32_NEW_KNODE:
+ 		case TC_CLSU32_REPLACE_KNODE:
+ 			return cxgb4_config_knode(dev, proto, tc->cls_u32);
+ 		case TC_CLSU32_DELETE_KNODE:
+ 			return cxgb4_delete_knode(dev, proto, tc->cls_u32);
+ 		default:
+ 			return -EOPNOTSUPP;
+ 		}
+ 	}
+ 
+ 	return -EOPNOTSUPP;
+ }
+ 
  static const struct net_device_ops cxgb4_netdev_ops = {
  	.ndo_open             = cxgb_open,
  	.ndo_stop             = cxgb_close,
@@@ -3136,7 -2763,31 +2763,31 @@@
  #ifdef CONFIG_NET_RX_BUSY_POLL
  	.ndo_busy_poll        = cxgb_busy_poll,
  #endif
+ 	.ndo_set_tx_maxrate   = cxgb_set_tx_maxrate,
+ 	.ndo_setup_tc         = cxgb_setup_tc,
+ };
  
+ #ifdef CONFIG_PCI_IOV
+ static const struct net_device_ops cxgb4_mgmt_netdev_ops = {
+ 	.ndo_open             = dummy_open,
+ 	.ndo_set_vf_mac       = cxgb_set_vf_mac,
+ 	.ndo_get_vf_config    = cxgb_get_vf_config,
+ };
+ #endif
+ 
+ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+ {
+ 	struct adapter *adapter = netdev2adap(dev);
+ 
+ 	strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver));
+ 	strlcpy(info->version, cxgb4_driver_version,
+ 		sizeof(info->version));
+ 	strlcpy(info->bus_info, pci_name(adapter->pdev),
+ 		sizeof(info->bus_info));
+ }
+ 
+ static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = {
+ 	.get_drvinfo       = get_drvinfo,
  };
  
  void t4_fatal_err(struct adapter *adap)
@@@ -3979,6 -3630,12 +3630,12 @@@ static int adap_init0(struct adapter *a
  	adap->clipt_start = val[0];
  	adap->clipt_end = val[1];
  
+ 	/* We don't yet have a PARAMs calls to retrieve the number of Traffic
+ 	 * Classes supported by the hardware/firmware so we hard code it here
+ 	 * for now.
+ 	 */
+ 	adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16;
+ 
  	/* query params related to active filter region */
  	params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START);
  	params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END);
@@@ -4067,6 -3724,7 +3724,7 @@@
  		adap->params.ofldq_wr_cred = val[5];
  
  		adap->params.offload = 1;
+ 		adap->num_ofld_uld += 1;
  	}
  	if (caps_cmd.rdmacaps) {
  		params[0] = FW_PARAM_PFVF(STAG_START);
@@@ -4119,6 -3777,7 +3777,7 @@@
  			 "max_ordird_qp %d max_ird_adapter %d\n",
  			 adap->params.max_ordird_qp,
  			 adap->params.max_ird_adapter);
+ 		adap->num_ofld_uld += 2;
  	}
  	if (caps_cmd.iscsicaps) {
  		params[0] = FW_PARAM_PFVF(ISCSI_START);
@@@ -4129,6 -3788,13 +3788,13 @@@
  			goto bye;
  		adap->vres.iscsi.start = val[0];
  		adap->vres.iscsi.size = val[1] - val[0] + 1;
+ 		/* LIO target and cxgb4i initiaitor */
+ 		adap->num_ofld_uld += 2;
+ 	}
+ 	if (caps_cmd.cryptocaps) {
+ 		/* Should query params here...TODO */
+ 		adap->params.crypto |= ULP_CRYPTO_LOOKASIDE;
+ 		adap->num_uld += 1;
  	}
  #undef FW_PARAM_PFVF
  #undef FW_PARAM_DEV
@@@ -4305,29 -3971,12 +3971,19 @@@ static const struct pci_error_handlers 
  	.resume         = eeh_resume,
  };
  
 +/* Return true if the Link Configuration supports "High Speeds" (those greater
 + * than 1Gb/s).
 + */
  static inline bool is_x_10g_port(const struct link_config *lc)
  {
 -	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
 -	       (lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
 +	unsigned int speeds, high_speeds;
 +
 +	speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
 +	high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
 +
 +	return high_speeds != 0;
  }
  
- static inline void init_rspq(struct adapter *adap, struct sge_rspq *q,
- 			     unsigned int us, unsigned int cnt,
- 			     unsigned int size, unsigned int iqe_size)
- {
- 	q->adap = adap;
- 	cxgb4_set_rspq_intr_params(q, us, cnt);
- 	q->iqe_len = iqe_size;
- 	q->size = size;
- }
- 
  /*
   * Perform default configuration of DMA queues depending on the number and type
   * of ports we found and the number of available CPUs.  Most settings can be
@@@ -4340,12 -3989,16 +3996,16 @@@ static void cfg_queues(struct adapter *
  #ifndef CONFIG_CHELSIO_T4_DCB
  	int q10g = 0;
  #endif
- 	int ciq_size;
  
  	/* Reduce memory usage in kdump environment, disable all offload.
  	 */
- 	if (is_kdump_kernel())
+ 	if (is_kdump_kernel()) {
+ 		adap->params.offload = 0;
+ 		adap->params.crypto = 0;
+ 	} else if (is_uld(adap) && t4_uld_mem_alloc(adap)) {
  		adap->params.offload = 0;
+ 		adap->params.crypto = 0;
+ 	}
  
  	for_each_port(adap, i)
  		n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
@@@ -4389,33 -4042,18 +4049,18 @@@
  	s->ethqsets = qidx;
  	s->max_ethqsets = qidx;   /* MSI-X may lower it later */
  
- 	if (is_offload(adap)) {
+ 	if (is_uld(adap)) {
  		/*
  		 * For offload we use 1 queue/channel if all ports are up to 1G,
  		 * otherwise we divide all available queues amongst the channels
  		 * capped by the number of available cores.
  		 */
  		if (n10g) {
- 			i = min_t(int, ARRAY_SIZE(s->iscsirxq),
- 				  num_online_cpus());
- 			s->iscsiqsets = roundup(i, adap->params.nports);
- 		} else
- 			s->iscsiqsets = adap->params.nports;
- 		/* For RDMA one Rx queue per channel suffices */
- 		s->rdmaqs = adap->params.nports;
- 		/* Try and allow at least 1 CIQ per cpu rounding down
- 		 * to the number of ports, with a minimum of 1 per port.
- 		 * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port.
- 		 * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port.
- 		 * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port.
- 		 */
- 		s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus());
- 		s->rdmaciqs = (s->rdmaciqs / adap->params.nports) *
- 				adap->params.nports;
- 		s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports);
- 
- 		if (!is_t4(adap->params.chip))
- 			s->niscsitq = s->iscsiqsets;
+ 			i = num_online_cpus();
+ 			s->ofldqsets = roundup(i, adap->params.nports);
+ 		} else {
+ 			s->ofldqsets = adap->params.nports;
+ 		}
  	}
  
  	for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) {
@@@ -4434,47 -4072,8 +4079,8 @@@
  	for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
  		s->ofldtxq[i].q.size = 1024;
  
- 	for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) {
- 		struct sge_ofld_rxq *r = &s->iscsirxq[i];
- 
- 		init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
- 		r->rspq.uld = CXGB4_ULD_ISCSI;
- 		r->fl.size = 72;
- 	}
- 
- 	if (!is_t4(adap->params.chip)) {
- 		for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) {
- 			struct sge_ofld_rxq *r = &s->iscsitrxq[i];
- 
- 			init_rspq(adap, &r->rspq, 5, 1, 1024, 64);
- 			r->rspq.uld = CXGB4_ULD_ISCSIT;
- 			r->fl.size = 72;
- 		}
- 	}
- 
- 	for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) {
- 		struct sge_ofld_rxq *r = &s->rdmarxq[i];
- 
- 		init_rspq(adap, &r->rspq, 5, 1, 511, 64);
- 		r->rspq.uld = CXGB4_ULD_RDMA;
- 		r->fl.size = 72;
- 	}
- 
- 	ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids;
- 	if (ciq_size > SGE_MAX_IQ_SIZE) {
- 		CH_WARN(adap, "CIQ size too small for available IQs\n");
- 		ciq_size = SGE_MAX_IQ_SIZE;
- 	}
- 
- 	for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) {
- 		struct sge_ofld_rxq *r = &s->rdmaciq[i];
- 
- 		init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64);
- 		r->rspq.uld = CXGB4_ULD_RDMA;
- 	}
- 
  	init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
- 	init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64);
+ 	init_rspq(adap, &s->intrq, 0, 1, 512, 64);
  }
  
  /*
@@@ -4505,42 -4104,90 +4111,90 @@@ static void reduce_ethqs(struct adapte
  	}
  }
  
+ static int get_msix_info(struct adapter *adap)
+ {
+ 	struct uld_msix_info *msix_info;
+ 	unsigned int max_ingq = 0;
+ 
+ 	if (is_offload(adap))
+ 		max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld;
+ 	if (is_pci_uld(adap))
+ 		max_ingq += MAX_OFLD_QSETS * adap->num_uld;
+ 
+ 	if (!max_ingq)
+ 		goto out;
+ 
+ 	msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL);
+ 	if (!msix_info)
+ 		return -ENOMEM;
+ 
+ 	adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq),
+ 						 sizeof(long), GFP_KERNEL);
+ 	if (!adap->msix_bmap_ulds.msix_bmap) {
+ 		kfree(msix_info);
+ 		return -ENOMEM;
+ 	}
+ 	spin_lock_init(&adap->msix_bmap_ulds.lock);
+ 	adap->msix_info_ulds = msix_info;
+ out:
+ 	return 0;
+ }
+ 
+ static void free_msix_info(struct adapter *adap)
+ {
+ 	if (!(adap->num_uld && adap->num_ofld_uld))
+ 		return;
+ 
+ 	kfree(adap->msix_info_ulds);
+ 	kfree(adap->msix_bmap_ulds.msix_bmap);
+ }
+ 
  /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */
  #define EXTRA_VECS 2
  
  static int enable_msix(struct adapter *adap)
  {
- 	int ofld_need = 0;
- 	int i, want, need, allocated;
+ 	int ofld_need = 0, uld_need = 0;
+ 	int i, j, want, need, allocated;
  	struct sge *s = &adap->sge;
  	unsigned int nchan = adap->params.nports;
  	struct msix_entry *entries;
+ 	int max_ingq = MAX_INGQ;
  
- 	entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1),
+ 	if (is_pci_uld(adap))
+ 		max_ingq += (MAX_OFLD_QSETS * adap->num_uld);
+ 	if (is_offload(adap))
+ 		max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld);
+ 	entries = kmalloc(sizeof(*entries) * (max_ingq + 1),
  			  GFP_KERNEL);
  	if (!entries)
  		return -ENOMEM;
  
- 	for (i = 0; i < MAX_INGQ + 1; ++i)
+ 	/* map for msix */
+ 	if (get_msix_info(adap)) {
+ 		adap->params.offload = 0;
+ 		adap->params.crypto = 0;
+ 	}
+ 
+ 	for (i = 0; i < max_ingq + 1; ++i)
  		entries[i].entry = i;
  
  	want = s->max_ethqsets + EXTRA_VECS;
  	if (is_offload(adap)) {
- 		want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets	+
- 			s->niscsitq;
- 		/* need nchan for each possible ULD */
- 		if (is_t4(adap->params.chip))
- 			ofld_need = 3 * nchan;
- 		else
- 			ofld_need = 4 * nchan;
+ 		want += adap->num_ofld_uld * s->ofldqsets;
+ 		ofld_need = adap->num_ofld_uld * nchan;
+ 	}
+ 	if (is_pci_uld(adap)) {
+ 		want += adap->num_uld * s->ofldqsets;
+ 		uld_need = adap->num_uld * nchan;
  	}
  #ifdef CONFIG_CHELSIO_T4_DCB
  	/* For Data Center Bridging we need 8 Ethernet TX Priority Queues for
  	 * each port.
  	 */
- 	need = 8 * adap->params.nports + EXTRA_VECS + ofld_need;
+ 	need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
  #else
- 	need = adap->params.nports + EXTRA_VECS + ofld_need;
+ 	need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need;
  #endif
  	allocated = pci_enable_msix_range(adap->pdev, entries, need, want);
  	if (allocated < 0) {
@@@ -4554,33 -4201,31 +4208,31 @@@
  	 * Every group gets its minimum requirement and NIC gets top
  	 * priority for leftovers.
  	 */
- 	i = allocated - EXTRA_VECS - ofld_need;
+ 	i = allocated - EXTRA_VECS - ofld_need - uld_need;
  	if (i < s->max_ethqsets) {
  		s->max_ethqsets = i;
  		if (i < s->ethqsets)
  			reduce_ethqs(adap, i);
  	}
- 	if (is_offload(adap)) {
- 		if (allocated < want) {
- 			s->rdmaqs = nchan;
- 			s->rdmaciqs = nchan;
+ 	if (is_uld(adap)) {
+ 		if (allocated < want)
+ 			s->nqs_per_uld = nchan;
+ 		else
+ 			s->nqs_per_uld = s->ofldqsets;
+ 	}
  
- 			if (!is_t4(adap->params.chip))
- 				s->niscsitq = nchan;
+ 	for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i)
+ 		adap->msix_info[i].vec = entries[i].vector;
+ 	if (is_uld(adap)) {
+ 		for (j = 0 ; i < allocated; ++i, j++) {
+ 			adap->msix_info_ulds[j].vec = entries[i].vector;
+ 			adap->msix_info_ulds[j].idx = i;
  		}
- 
- 		/* leftovers go to OFLD */
- 		i = allocated - EXTRA_VECS - s->max_ethqsets -
- 		    s->rdmaqs - s->rdmaciqs - s->niscsitq;
- 		s->iscsiqsets = (i / nchan) * nchan;  /* round down */
- 
+ 		adap->msix_bmap_ulds.mapsize = j;
  	}
- 	for (i = 0; i < allocated; ++i)
- 		adap->msix_info[i].vec = entries[i].vector;
  	dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, "
- 		 "nic %d iscsi %d rdma cpl %d rdma ciq %d\n",
- 		 allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs,
- 		 s->rdmaciqs);
+ 		 "nic %d per uld %d\n",
+ 		 allocated, s->max_ethqsets, s->nqs_per_uld);
  
  	kfree(entries);
  	return 0;
@@@ -4763,12 -4408,8 +4415,12 @@@ static void print_port_info(const struc
  		bufp += sprintf(bufp, "1000/");
  	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_10G)
  		bufp += sprintf(bufp, "10G/");
 +	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_25G)
 +		bufp += sprintf(bufp, "25G/");
  	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_40G)
  		bufp += sprintf(bufp, "40G/");
 +	if (pi->link_cfg.supported & FW_PORT_CAP_SPEED_100G)
 +		bufp += sprintf(bufp, "100G/");
  	if (bufp != buf)
  		--bufp;
  	sprintf(bufp, "BASE-%s", t4_get_port_type_description(pi->port_type));
@@@ -4794,7 -4435,9 +4446,9 @@@ static void free_some_resources(struct 
  	unsigned int i;
  
  	t4_free_mem(adapter->l2t);
+ 	t4_cleanup_sched(adapter);
  	t4_free_mem(adapter->tids.tid_tab);
+ 	cxgb4_cleanup_tc_u32(adapter);
  	kfree(adapter->sge.egr_map);
  	kfree(adapter->sge.ingr_map);
  	kfree(adapter->sge.starving_fl);
@@@ -4845,21 -4488,59 +4499,59 @@@ static int get_chip_type(struct pci_de
  }
  
  #ifdef CONFIG_PCI_IOV
+ static void dummy_setup(struct net_device *dev)
+ {
+ 	dev->type = ARPHRD_NONE;
+ 	dev->mtu = 0;
+ 	dev->hard_header_len = 0;
+ 	dev->addr_len = 0;
+ 	dev->tx_queue_len = 0;
+ 	dev->flags |= IFF_NOARP;
+ 	dev->priv_flags |= IFF_NO_QUEUE;
+ 
+ 	/* Initialize the device structure. */
+ 	dev->netdev_ops = &cxgb4_mgmt_netdev_ops;
+ 	dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops;
+ 	dev->destructor = free_netdev;
+ }
+ 
+ static int config_mgmt_dev(struct pci_dev *pdev)
+ {
+ 	struct adapter *adap = pci_get_drvdata(pdev);
+ 	struct net_device *netdev;
+ 	struct port_info *pi;
+ 	char name[IFNAMSIZ];
+ 	int err;
+ 
+ 	snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf);
+ 	netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup);
+ 	if (!netdev)
+ 		return -ENOMEM;
+ 
+ 	pi = netdev_priv(netdev);
+ 	pi->adapter = adap;
+ 	SET_NETDEV_DEV(netdev, &pdev->dev);
+ 
+ 	adap->port[0] = netdev;
+ 
+ 	err = register_netdev(adap->port[0]);
+ 	if (err) {
+ 		pr_info("Unable to register VF mgmt netdev %s\n", name);
+ 		free_netdev(adap->port[0]);
+ 		adap->port[0] = NULL;
+ 		return err;
+ 	}
+ 	return 0;
+ }
+ 
  static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
  {
+ 	struct adapter *adap = pci_get_drvdata(pdev);
  	int err = 0;
  	int current_vfs = pci_num_vf(pdev);
  	u32 pcie_fw;
- 	void __iomem *regs;
- 
- 	regs = pci_ioremap_bar(pdev, 0);
- 	if (!regs) {
- 		dev_err(&pdev->dev, "cannot map device registers\n");
- 		return -ENOMEM;
- 	}
  
- 	pcie_fw = readl(regs + PCIE_FW_A);
- 	iounmap(regs);
+ 	pcie_fw = readl(adap->regs + PCIE_FW_A);
  	/* Check if cxgb4 is the MASTER and fw is initialized */
  	if (!(pcie_fw & PCIE_FW_INIT_F) ||
  	    !(pcie_fw & PCIE_FW_MASTER_VLD_F) ||
@@@ -4886,6 -4567,14 +4578,14 @@@
  	 */
  	if (!num_vfs) {
  		pci_disable_sriov(pdev);
+ 		if (adap->port[0]) {
+ 			unregister_netdev(adap->port[0]);
+ 			adap->port[0] = NULL;
+ 		}
+ 		/* free VF resources */
+ 		kfree(adap->vfinfo);
+ 		adap->vfinfo = NULL;
+ 		adap->num_vfs = 0;
  		return num_vfs;
  	}
  
@@@ -4893,7 -4582,17 +4593,17 @@@
  		err = pci_enable_sriov(pdev, num_vfs);
  		if (err)
  			return err;
+ 
+ 		adap->num_vfs = num_vfs;
+ 		err = config_mgmt_dev(pdev);
+ 		if (err)
+ 			return err;
  	}
+ 
+ 	adap->vfinfo = kcalloc(adap->num_vfs,
+ 			       sizeof(struct vf_info), GFP_KERNEL);
+ 	if (adap->vfinfo)
+ 		fill_vf_station_mac_addr(adap);
  	return num_vfs;
  }
  #endif
@@@ -4904,9 -4603,11 +4614,11 @@@ static int init_one(struct pci_dev *pde
  	struct port_info *pi;
  	bool highdma = false;
  	struct adapter *adapter = NULL;
+ 	struct net_device *netdev;
  	void __iomem *regs;
  	u32 whoami, pl_rev;
  	enum chip_type chip;
+ 	static int adap_idx = 1;
  
  	printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
  
@@@ -4941,7 -4642,9 +4653,9 @@@
  	func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ?
  		SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
  	if (func != ent->driver_data) {
+ #ifndef CONFIG_PCI_IOV
  		iounmap(regs);
+ #endif
  		pci_disable_device(pdev);
  		pci_save_state(pdev);        /* to restore SR-IOV later */
  		goto sriov;
@@@ -4973,6 -4676,7 +4687,7 @@@
  		err = -ENOMEM;
  		goto out_unmap_bar0;
  	}
+ 	adap_idx++;
  
  	adapter->workq = create_singlethread_workqueue("cxgb4");
  	if (!adapter->workq) {
@@@ -5059,8 -4763,6 +4774,6 @@@
  			      T6_STATMODE_V(0)));
  
  	for_each_port(adapter, i) {
- 		struct net_device *netdev;
- 
  		netdev = alloc_etherdev_mq(sizeof(struct port_info),
  					   MAX_ETH_QSETS);
  		if (!netdev) {
@@@ -5080,7 -4782,8 +4793,8 @@@
  		netdev->hw_features = NETIF_F_SG | TSO_FLAGS |
  			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
  			NETIF_F_RXCSUM | NETIF_F_RXHASH |
- 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+ 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
+ 			NETIF_F_HW_TC;
  		if (highdma)
  			netdev->hw_features |= NETIF_F_HIGHDMA;
  		netdev->features |= netdev->hw_features;
@@@ -5154,10 -4857,26 +4868,26 @@@
  		}
  	}
  #endif
- 	if (is_offload(adapter) && tid_init(&adapter->tids) < 0) {
+ 
+ 	for_each_port(adapter, i) {
+ 		pi = adap2pinfo(adapter, i);
+ 		pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls);
+ 		if (!pi->sched_tbl)
+ 			dev_warn(&pdev->dev,
+ 				 "could not activate scheduling on port %d\n",
+ 				 i);
+ 	}
+ 
+ 	if (tid_init(&adapter->tids) < 0) {
  		dev_warn(&pdev->dev, "could not allocate TID table, "
  			 "continuing\n");
  		adapter->params.offload = 0;
+ 	} else {
+ 		adapter->tc_u32 = cxgb4_init_tc_u32(adapter,
+ 						    CXGB4_MAX_LINK_HANDLE);
+ 		if (!adapter->tc_u32)
+ 			dev_warn(&pdev->dev,
+ 				 "could not offload tc u32, continuing\n");
  	}
  
  	if (is_offload(adapter)) {
@@@ -5179,8 -4898,11 +4909,11 @@@
  	/* See what interrupts we'll be using */
  	if (msi > 1 && enable_msix(adapter) == 0)
  		adapter->flags |= USING_MSIX;
- 	else if (msi > 0 && pci_enable_msi(pdev) == 0)
+ 	else if (msi > 0 && pci_enable_msi(pdev) == 0) {
  		adapter->flags |= USING_MSI;
+ 		if (msi > 1)
+ 			free_msix_info(adapter);
+ 	}
  
  	/* check for PCI Express bandwidth capabiltites */
  	cxgb4_check_pcie_caps(adapter);
@@@ -5224,10 -4946,15 +4957,15 @@@
  	/* PCIe EEH recovery on powerpc platforms needs fundamental reset */
  	pdev->needs_freset = 1;
  
- 	if (is_offload(adapter))
- 		attach_ulds(adapter);
+ 	if (is_uld(adapter)) {
+ 		mutex_lock(&uld_mutex);
+ 		list_add_tail(&adapter->list_node, &adapter_list);
+ 		mutex_unlock(&uld_mutex);
+ 	}
  
  	print_adapter_info(adapter);
+ 	setup_fw_sge_queues(adapter);
+ 	return 0;
  
  sriov:
  #ifdef CONFIG_PCI_IOV
@@@ -5241,11 -4968,48 +4979,48 @@@
  				 "instantiated %u virtual functions\n",
  				 num_vf[func]);
  	}
- #endif
+ 
+ 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
+ 	if (!adapter) {
+ 		err = -ENOMEM;
+ 		goto free_pci_region;
+ 	}
+ 
+ 	adapter->pdev = pdev;
+ 	adapter->pdev_dev = &pdev->dev;
+ 	adapter->name = pci_name(pdev);
+ 	adapter->mbox = func;
+ 	adapter->pf = func;
+ 	adapter->regs = regs;
+ 	adapter->adap_idx = adap_idx;
+ 	adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) +
+ 				    (sizeof(struct mbox_cmd) *
+ 				     T4_OS_LOG_MBOX_CMDS),
+ 				    GFP_KERNEL);
+ 	if (!adapter->mbox_log) {
+ 		err = -ENOMEM;
+ 		goto free_adapter;
+ 	}
+ 	pci_set_drvdata(pdev, adapter);
+ 	return 0;
+ 
+  free_adapter:
+ 	kfree(adapter);
+  free_pci_region:
+ 	iounmap(regs);
+ 	pci_disable_sriov(pdev);
+ 	pci_release_regions(pdev);
+ 	return err;
+ #else
  	return 0;
+ #endif
  
   out_free_dev:
  	free_some_resources(adapter);
+ 	if (adapter->flags & USING_MSIX)
+ 		free_msix_info(adapter);
+ 	if (adapter->num_uld || adapter->num_ofld_uld)
+ 		t4_uld_mem_free(adapter);
   out_unmap_bar:
  	if (!is_t4(adapter->params.chip))
  		iounmap(adapter->bar2);
@@@ -5269,12 -5033,12 +5044,12 @@@ static void remove_one(struct pci_dev *
  {
  	struct adapter *adapter = pci_get_drvdata(pdev);
  
- #ifdef CONFIG_PCI_IOV
- 	pci_disable_sriov(pdev);
- 
- #endif
+ 	if (!adapter) {
+ 		pci_release_regions(pdev);
+ 		return;
+ 	}
  
- 	if (adapter) {
+ 	if (adapter->pf == 4) {
  		int i;
  
  		/* Tear down per-adapter Work Queue first since it can contain
@@@ -5282,7 -5046,7 +5057,7 @@@
  		 */
  		destroy_workqueue(adapter->workq);
  
- 		if (is_offload(adapter))
+ 		if (is_uld(adapter))
  			detach_ulds(adapter);
  
  		disable_interrupts(adapter);
@@@ -5296,17 -5060,15 +5071,15 @@@
  		/* If we allocated filters, free up state associated with any
  		 * valid filters ...
  		 */
- 		if (adapter->tids.ftid_tab) {
- 			struct filter_entry *f = &adapter->tids.ftid_tab[0];
- 			for (i = 0; i < (adapter->tids.nftids +
- 					adapter->tids.nsftids); i++, f++)
- 				if (f->valid)
- 					clear_filter(adapter, f);
- 		}
+ 		clear_all_filters(adapter);
  
  		if (adapter->flags & FULL_INIT_DONE)
  			cxgb_down(adapter);
  
+ 		if (adapter->flags & USING_MSIX)
+ 			free_msix_info(adapter);
+ 		if (adapter->num_uld || adapter->num_ofld_uld)
+ 			t4_uld_mem_free(adapter);
  		free_some_resources(adapter);
  #if IS_ENABLED(CONFIG_IPV6)
  		t4_cleanup_clip_tbl(adapter);
@@@ -5323,8 -5085,64 +5096,64 @@@
  		kfree(adapter->mbox_log);
  		synchronize_rcu();
  		kfree(adapter);
- 	} else
+ 	}
+ #ifdef CONFIG_PCI_IOV
+ 	else {
+ 		if (adapter->port[0])
+ 			unregister_netdev(adapter->port[0]);
+ 		iounmap(adapter->regs);
+ 		kfree(adapter->vfinfo);
+ 		kfree(adapter);
+ 		pci_disable_sriov(pdev);
+ 		pci_release_regions(pdev);
+ 	}
+ #endif
+ }
+ 
+ /* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt
+  * delivery.  This is essentially a stripped down version of the PCI remove()
+  * function where we do the minimal amount of work necessary to shutdown any
+  * further activity.
+  */
+ static void shutdown_one(struct pci_dev *pdev)
+ {
+ 	struct adapter *adapter = pci_get_drvdata(pdev);
+ 
+ 	/* As with remove_one() above (see extended comment), we only want do
+ 	 * do cleanup on PCI Devices which went all the way through init_one()
+ 	 * ...
+ 	 */
+ 	if (!adapter) {
  		pci_release_regions(pdev);
+ 		return;
+ 	}
+ 
+ 	if (adapter->pf == 4) {
+ 		int i;
+ 
+ 		for_each_port(adapter, i)
+ 			if (adapter->port[i]->reg_state == NETREG_REGISTERED)
+ 				cxgb_close(adapter->port[i]);
+ 
+ 		t4_uld_clean_up(adapter);
+ 		disable_interrupts(adapter);
+ 		disable_msi(adapter);
+ 
+ 		t4_sge_stop(adapter);
+ 		if (adapter->flags & FW_OK)
+ 			t4_fw_bye(adapter, adapter->mbox);
+ 	}
+ #ifdef CONFIG_PCI_IOV
+ 	else {
+ 		if (adapter->port[0])
+ 			unregister_netdev(adapter->port[0]);
+ 		iounmap(adapter->regs);
+ 		kfree(adapter->vfinfo);
+ 		kfree(adapter);
+ 		pci_disable_sriov(pdev);
+ 		pci_release_regions(pdev);
+ 	}
+ #endif
  }
  
  static struct pci_driver cxgb4_driver = {
@@@ -5332,7 -5150,7 +5161,7 @@@
  	.id_table = cxgb4_pci_tbl,
  	.probe    = init_one,
  	.remove   = remove_one,
- 	.shutdown = remove_one,
+ 	.shutdown = shutdown_one,
  #ifdef CONFIG_PCI_IOV
  	.sriov_configure = cxgb4_iov_configure,
  #endif
diff --combined drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 660204b,15be5432..20dec85
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the Chelsio T4 Ethernet driver for Linux.
   *
-  * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved.
+  * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -2729,7 -2729,7 +2729,7 @@@ int t4_get_raw_vpd_params(struct adapte
  
  out:
  	vfree(vpd);
- 	return ret;
+ 	return ret < 0 ? ret : 0;
  }
  
  /**
@@@ -3627,8 -3627,7 +3627,8 @@@ void t4_ulprx_read_la(struct adapter *a
  }
  
  #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
 -		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
 +		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
 +		     FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
  		     FW_PORT_CAP_ANEG)
  
  /**
@@@ -7197,12 -7196,8 +7197,12 @@@ void t4_handle_get_port_info(struct por
  		speed = 1000;
  	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
  		speed = 10000;
 +	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
 +		speed = 25000;
  	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
  		speed = 40000;
 +	else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
 +		speed = 100000;
  
  	lc = &pi->link_cfg;
  
@@@ -8269,3 -8264,73 +8269,73 @@@ void t4_idma_monitor(struct adapter *ad
  		t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
  	}
  }
+ 
+ /**
+  *	t4_set_vf_mac - Set MAC address for the specified VF
+  *	@adapter: The adapter
+  *	@vf: one of the VFs instantiated by the specified PF
+  *	@naddr: the number of MAC addresses
+  *	@addr: the MAC address(es) to be set to the specified VF
+  */
+ int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf,
+ 		      unsigned int naddr, u8 *addr)
+ {
+ 	struct fw_acl_mac_cmd cmd;
+ 
+ 	memset(&cmd, 0, sizeof(cmd));
+ 	cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+ 				    FW_CMD_REQUEST_F |
+ 				    FW_CMD_WRITE_F |
+ 				    FW_ACL_MAC_CMD_PFN_V(adapter->pf) |
+ 				    FW_ACL_MAC_CMD_VFN_V(vf));
+ 
+ 	/* Note: Do not enable the ACL */
+ 	cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+ 	cmd.nmac = naddr;
+ 
+ 	switch (adapter->pf) {
+ 	case 3:
+ 		memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3));
+ 		break;
+ 	case 2:
+ 		memcpy(cmd.macaddr2, addr, sizeof(cmd.macaddr2));
+ 		break;
+ 	case 1:
+ 		memcpy(cmd.macaddr1, addr, sizeof(cmd.macaddr1));
+ 		break;
+ 	case 0:
+ 		memcpy(cmd.macaddr0, addr, sizeof(cmd.macaddr0));
+ 		break;
+ 	}
+ 
+ 	return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd);
+ }
+ 
+ int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
+ 		    int rateunit, int ratemode, int channel, int class,
+ 		    int minrate, int maxrate, int weight, int pktsize)
+ {
+ 	struct fw_sched_cmd cmd;
+ 
+ 	memset(&cmd, 0, sizeof(cmd));
+ 	cmd.op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_SCHED_CMD) |
+ 				      FW_CMD_REQUEST_F |
+ 				      FW_CMD_WRITE_F);
+ 	cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd));
+ 
+ 	cmd.u.params.sc = FW_SCHED_SC_PARAMS;
+ 	cmd.u.params.type = type;
+ 	cmd.u.params.level = level;
+ 	cmd.u.params.mode = mode;
+ 	cmd.u.params.ch = channel;
+ 	cmd.u.params.cl = class;
+ 	cmd.u.params.unit = rateunit;
+ 	cmd.u.params.rate = ratemode;
+ 	cmd.u.params.min = cpu_to_be32(minrate);
+ 	cmd.u.params.max = cpu_to_be32(maxrate);
+ 	cmd.u.params.weight = cpu_to_be16(weight);
+ 	cmd.u.params.pktsize = cpu_to_be16(pktsize);
+ 
+ 	return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
+ 			       NULL, 1);
+ }
diff --combined drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 30507d4,ffe4bf4..4b58b32
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * This file is part of the Chelsio T4 Ethernet driver for Linux.
   *
-  * Copyright (c) 2009-2014 Chelsio Communications, Inc. All rights reserved.
+  * Copyright (c) 2009-2016 Chelsio Communications, Inc. All rights reserved.
   *
   * This software is available to you under a choice of one of two
   * licenses.  You may choose to be licensed under the terms of the GNU
@@@ -102,6 -102,7 +102,7 @@@ enum fw_wr_opcodes 
  	FW_RI_FR_NSMR_WR               = 0x19,
  	FW_RI_INV_LSTAG_WR             = 0x1a,
  	FW_ISCSI_TX_DATA_WR	       = 0x45,
+ 	FW_CRYPTO_LOOKASIDE_WR         = 0X6d,
  	FW_LASTC2E_WR                  = 0x70
  };
  
@@@ -680,6 -681,7 +681,7 @@@ enum fw_cmd_opcodes 
  	FW_RSS_IND_TBL_CMD             = 0x20,
  	FW_RSS_GLB_CONFIG_CMD          = 0x22,
  	FW_RSS_VI_CONFIG_CMD           = 0x23,
+ 	FW_SCHED_CMD                   = 0x24,
  	FW_DEVLOG_CMD                  = 0x25,
  	FW_CLIP_CMD                    = 0x28,
  	FW_LASTC2E_CMD                 = 0x40,
@@@ -1060,7 -1062,7 +1062,7 @@@ struct fw_caps_config_cmd 
  	__be16 niccaps;
  	__be16 ofldcaps;
  	__be16 rdmacaps;
- 	__be16 r4;
+ 	__be16 cryptocaps;
  	__be16 iscsicaps;
  	__be16 fcoecaps;
  	__be32 cfcsum;
@@@ -2265,12 -2267,6 +2267,12 @@@ enum fw_port_cap 
  	FW_PORT_CAP_802_3_ASM_DIR	= 0x8000,
  };
  
 +#define FW_PORT_CAP_SPEED_S     0
 +#define FW_PORT_CAP_SPEED_M     0x3f
 +#define FW_PORT_CAP_SPEED_V(x)  ((x) << FW_PORT_CAP_SPEED_S)
 +#define FW_PORT_CAP_SPEED_G(x) \
 +	(((x) >> FW_PORT_CAP_SPEED_S) & FW_PORT_CAP_SPEED_M)
 +
  enum fw_port_mdi {
  	FW_PORT_CAP_MDI_UNCHANGED,
  	FW_PORT_CAP_MDI_AUTO,
@@@ -2967,6 -2963,41 +2969,41 @@@ struct fw_rss_vi_config_cmd 
  #define FW_RSS_VI_CONFIG_CMD_UDPEN_V(x)	((x) << FW_RSS_VI_CONFIG_CMD_UDPEN_S)
  #define FW_RSS_VI_CONFIG_CMD_UDPEN_F	FW_RSS_VI_CONFIG_CMD_UDPEN_V(1U)
  
+ enum fw_sched_sc {
+ 	FW_SCHED_SC_PARAMS		= 1,
+ };
+ 
+ struct fw_sched_cmd {
+ 	__be32 op_to_write;
+ 	__be32 retval_len16;
+ 	union fw_sched {
+ 		struct fw_sched_config {
+ 			__u8   sc;
+ 			__u8   type;
+ 			__u8   minmaxen;
+ 			__u8   r3[5];
+ 			__u8   nclasses[4];
+ 			__be32 r4;
+ 		} config;
+ 		struct fw_sched_params {
+ 			__u8   sc;
+ 			__u8   type;
+ 			__u8   level;
+ 			__u8   mode;
+ 			__u8   unit;
+ 			__u8   rate;
+ 			__u8   ch;
+ 			__u8   cl;
+ 			__be32 min;
+ 			__be32 max;
+ 			__be16 weight;
+ 			__be16 pktsize;
+ 			__be16 burstsize;
+ 			__be16 r4;
+ 		} params;
+ 	} u;
+ };
+ 
  struct fw_clip_cmd {
  	__be32 op_to_write;
  	__be32 alloc_to_len16;
@@@ -3255,4 -3286,127 +3292,127 @@@ struct fw_devlog_cmd 
  #define PCIE_FW_PF_DEVLOG_MEMTYPE_G(x) \
  	(((x) >> PCIE_FW_PF_DEVLOG_MEMTYPE_S) & PCIE_FW_PF_DEVLOG_MEMTYPE_M)
  
+ #define MAX_IMM_OFLD_TX_DATA_WR_LEN (0xff + sizeof(struct fw_ofld_tx_data_wr))
+ 
+ struct fw_crypto_lookaside_wr {
+ 	__be32 op_to_cctx_size;
+ 	__be32 len16_pkd;
+ 	__be32 session_id;
+ 	__be32 rx_chid_to_rx_q_id;
+ 	__be32 key_addr;
+ 	__be32 pld_size_hash_size;
+ 	__be64 cookie;
+ };
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_OPCODE_S 24
+ #define FW_CRYPTO_LOOKASIDE_WR_OPCODE_M 0xff
+ #define FW_CRYPTO_LOOKASIDE_WR_OPCODE_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_OPCODE_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_OPCODE_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_OPCODE_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_OPCODE_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_COMPL_S 23
+ #define FW_CRYPTO_LOOKASIDE_WR_COMPL_M 0x1
+ #define FW_CRYPTO_LOOKASIDE_WR_COMPL_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_COMPL_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_COMPL_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_COMPL_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_COMPL_M)
+ #define FW_CRYPTO_LOOKASIDE_WR_COMPL_F FW_CRYPTO_LOOKASIDE_WR_COMPL_V(1U)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S 15
+ #define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M 0xff
+ #define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S 5
+ #define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M 0x3
+ #define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S 0
+ #define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M 0x1f
+ #define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_LEN16_S 0
+ #define FW_CRYPTO_LOOKASIDE_WR_LEN16_M 0xff
+ #define FW_CRYPTO_LOOKASIDE_WR_LEN16_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_LEN16_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_LEN16_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_LEN16_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_LEN16_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S 29
+ #define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M 0x3
+ #define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_LCB_S  27
+ #define FW_CRYPTO_LOOKASIDE_WR_LCB_M  0x3
+ #define FW_CRYPTO_LOOKASIDE_WR_LCB_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_LCB_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_LCB_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_LCB_S) & FW_CRYPTO_LOOKASIDE_WR_LCB_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_PHASH_S 25
+ #define FW_CRYPTO_LOOKASIDE_WR_PHASH_M 0x3
+ #define FW_CRYPTO_LOOKASIDE_WR_PHASH_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_PHASH_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_PHASH_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_PHASH_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_PHASH_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_IV_S   23
+ #define FW_CRYPTO_LOOKASIDE_WR_IV_M   0x3
+ #define FW_CRYPTO_LOOKASIDE_WR_IV_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_IV_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_IV_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_IV_S) & FW_CRYPTO_LOOKASIDE_WR_IV_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_S 10
+ #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_M 0x3
+ #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_TX_CH_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_TX_CH_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_TX_CH_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_TX_CH_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S 0
+ #define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M 0x3ff
+ #define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S 24
+ #define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M 0xff
+ #define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M)
+ 
+ #define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S 17
+ #define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M 0x7f
+ #define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(x) \
+ 	((x) << FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S)
+ #define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_G(x) \
+ 	(((x) >> FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) & \
+ 	 FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M)
+ 
  #endif /* _T4FW_INTERFACE_H_ */
diff --combined drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index 17a2bbc,8067424..b3903fe
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@@ -108,8 -108,8 +108,8 @@@ struct link_config 
  	unsigned int   supported;        /* link capabilities */
  	unsigned int   advertising;      /* advertised capabilities */
  	unsigned short lp_advertising;   /* peer advertised capabilities */
 -	unsigned short requested_speed;  /* speed user has requested */
 -	unsigned short speed;            /* actual link speed */
 +	unsigned int   requested_speed;  /* speed user has requested */
 +	unsigned int   speed;            /* actual link speed */
  	unsigned char  requested_fc;     /* flow control user has requested */
  	unsigned char  fc;               /* actual link flow control */
  	unsigned char  autoneg;          /* autonegotiating? */
@@@ -271,17 -271,10 +271,17 @@@ static inline bool is_10g_port(const st
  	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0;
  }
  
 +/* Return true if the Link Configuration supports "High Speeds" (those greater
 + * than 1Gb/s).
 + */
  static inline bool is_x_10g_port(const struct link_config *lc)
  {
 -	return (lc->supported & FW_PORT_CAP_SPEED_10G) != 0 ||
 -		(lc->supported & FW_PORT_CAP_SPEED_40G) != 0;
 +	unsigned int speeds, high_speeds;
 +
 +	speeds = FW_PORT_CAP_SPEED_V(FW_PORT_CAP_SPEED_G(lc->supported));
 +	high_speeds = speeds & ~(FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G);
 +
 +	return high_speeds != 0;
  }
  
  static inline unsigned int core_ticks_per_usec(const struct adapter *adapter)
@@@ -354,6 -347,7 +354,7 @@@ int t4vf_bar2_sge_qregs(struct adapter 
  			u64 *pbar2_qoffset,
  			unsigned int *pbar2_qid);
  
+ unsigned int t4vf_get_pf_from_vf(struct adapter *);
  int t4vf_get_sge_params(struct adapter *);
  int t4vf_get_vpd_params(struct adapter *);
  int t4vf_get_dev_params(struct adapter *);
@@@ -388,5 -382,7 +389,7 @@@ int t4vf_eth_eq_free(struct adapter *, 
  
  int t4vf_handle_fw_rpl(struct adapter *, const __be64 *);
  int t4vf_prep_adapter(struct adapter *);
+ int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+ 			unsigned int *naddr, u8 *addr);
  
  #endif /* __T4VF_COMMON_H__ */
diff --combined drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index b5622b1,879f4c5..e98248f
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@@ -314,9 -314,8 +314,9 @@@ int t4vf_wr_mbox_core(struct adapter *a
  }
  
  #define ADVERT_MASK (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
 -		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_40G | \
 -		     FW_PORT_CAP_SPEED_100G | FW_PORT_CAP_ANEG)
 +		     FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_SPEED_25G | \
 +		     FW_PORT_CAP_SPEED_40G | FW_PORT_CAP_SPEED_100G | \
 +		     FW_PORT_CAP_ANEG)
  
  /**
   *	init_link_config - initialize a link's SW state
@@@ -640,6 -639,15 +640,15 @@@ int t4vf_bar2_sge_qregs(struct adapter 
  	return 0;
  }
  
+ unsigned int t4vf_get_pf_from_vf(struct adapter *adapter)
+ {
+ 	u32 whoami;
+ 
+ 	whoami = t4_read_reg(adapter, T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
+ 	return (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
+ 			SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami));
+ }
+ 
  /**
   *	t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters
   *	@adapter: the adapter
@@@ -717,7 -725,6 +726,6 @@@ int t4vf_get_sge_params(struct adapter 
  	 * read.
  	 */
  	if (!is_t4(adapter->params.chip)) {
- 		u32 whoami;
  		unsigned int pf, s_hps, s_qpp;
  
  		params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) |
@@@ -741,11 -748,7 +749,7 @@@
  		 * register we just read. Do it once here so other code in
  		 * the driver can just use it.
  		 */
- 		whoami = t4_read_reg(adapter,
- 				     T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A);
- 		pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ?
- 			SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami);
- 
+ 		pf = t4vf_get_pf_from_vf(adapter);
  		s_hps = (HOSTPAGESIZEPF0_S +
  			 (HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * pf);
  		sge_params->sge_vf_hps =
@@@ -1713,12 -1716,8 +1717,12 @@@ int t4vf_handle_fw_rpl(struct adapter *
  			speed = 1000;
  		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_10G))
  			speed = 10000;
 +		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_25G))
 +			speed = 25000;
  		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_40G))
  			speed = 40000;
 +		else if (stat & FW_PORT_CMD_LSPEED_V(FW_PORT_CAP_SPEED_100G))
 +			speed = 100000;
  
  		/*
  		 * Scan all of our "ports" (Virtual Interfaces) looking for
@@@ -1812,3 -1811,50 +1816,50 @@@ int t4vf_prep_adapter(struct adapter *a
  
  	return 0;
  }
+ 
+ /**
+  *	t4vf_get_vf_mac_acl - Get the MAC address to be set to
+  *			      the VI of this VF.
+  *	@adapter: The adapter
+  *	@pf: The pf associated with vf
+  *	@naddr: the number of ACL MAC addresses returned in addr
+  *	@addr: Placeholder for MAC addresses
+  *
+  *	Find the MAC address to be set to the VF's VI. The requested MAC address
+  *	is from the host OS via callback in the PF driver.
+  */
+ int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf,
+ 			unsigned int *naddr, u8 *addr)
+ {
+ 	struct fw_acl_mac_cmd cmd;
+ 	int ret;
+ 
+ 	memset(&cmd, 0, sizeof(cmd));
+ 	cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) |
+ 				    FW_CMD_REQUEST_F |
+ 				    FW_CMD_READ_F);
+ 	cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd));
+ 	ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd);
+ 	if (ret)
+ 		return ret;
+ 
+ 	if (cmd.nmac < *naddr)
+ 		*naddr = cmd.nmac;
+ 
+ 	switch (pf) {
+ 	case 3:
+ 		memcpy(addr, cmd.macaddr3, sizeof(cmd.macaddr3));
+ 		break;
+ 	case 2:
+ 		memcpy(addr, cmd.macaddr2, sizeof(cmd.macaddr2));
+ 		break;
+ 	case 1:
+ 		memcpy(addr, cmd.macaddr1, sizeof(cmd.macaddr1));
+ 		break;
+ 	case 0:
+ 		memcpy(addr, cmd.macaddr0, sizeof(cmd.macaddr0));
+ 		break;
+ 	}
+ 
+ 	return ret;
+ }
diff --combined drivers/net/ethernet/ibm/emac/core.c
index 7af09cb,ec4d0f3..8f13919
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@@ -977,37 -977,7 +977,37 @@@ static void emac_set_multicast_list(str
  		dev->mcast_pending = 1;
  		return;
  	}
 +
 +	mutex_lock(&dev->link_lock);
  	__emac_set_multicast_list(dev);
 +	mutex_unlock(&dev->link_lock);
 +}
 +
 +static int emac_set_mac_address(struct net_device *ndev, void *sa)
 +{
 +	struct emac_instance *dev = netdev_priv(ndev);
 +	struct sockaddr *addr = sa;
 +	struct emac_regs __iomem *p = dev->emacp;
 +
 +	if (!is_valid_ether_addr(addr->sa_data))
 +	       return -EADDRNOTAVAIL;
 +
 +	mutex_lock(&dev->link_lock);
 +
 +	memcpy(ndev->dev_addr, addr->sa_data, ndev->addr_len);
 +
 +	emac_rx_disable(dev);
 +	emac_tx_disable(dev);
 +	out_be32(&p->iahr, (ndev->dev_addr[0] << 8) | ndev->dev_addr[1]);
 +	out_be32(&p->ialr, (ndev->dev_addr[2] << 24) |
 +		(ndev->dev_addr[3] << 16) | (ndev->dev_addr[4] << 8) |
 +		ndev->dev_addr[5]);
 +	emac_tx_enable(dev);
 +	emac_rx_enable(dev);
 +
 +	mutex_unlock(&dev->link_lock);
 +
 +	return 0;
  }
  
  static int emac_resize_rx_ring(struct emac_instance *dev, int new_mtu)
@@@ -2716,7 -2686,7 +2716,7 @@@ static const struct net_device_ops emac
  	.ndo_do_ioctl		= emac_ioctl,
  	.ndo_tx_timeout		= emac_tx_timeout,
  	.ndo_validate_addr	= eth_validate_addr,
 -	.ndo_set_mac_address	= eth_mac_addr,
 +	.ndo_set_mac_address	= emac_set_mac_address,
  	.ndo_start_xmit		= emac_start_xmit,
  	.ndo_change_mtu		= eth_change_mtu,
  };
@@@ -2729,7 -2699,7 +2729,7 @@@ static const struct net_device_ops emac
  	.ndo_do_ioctl		= emac_ioctl,
  	.ndo_tx_timeout		= emac_tx_timeout,
  	.ndo_validate_addr	= eth_validate_addr,
 -	.ndo_set_mac_address	= eth_mac_addr,
 +	.ndo_set_mac_address	= emac_set_mac_address,
  	.ndo_start_xmit		= emac_start_xmit_sg,
  	.ndo_change_mtu		= emac_change_mtu,
  };
@@@ -2780,7 -2750,7 +2780,7 @@@ static int emac_probe(struct platform_d
  	/* Get interrupts. EMAC irq is mandatory, WOL irq is optional */
  	dev->emac_irq = irq_of_parse_and_map(np, 0);
  	dev->wol_irq = irq_of_parse_and_map(np, 1);
- 	if (dev->emac_irq == NO_IRQ) {
+ 	if (!dev->emac_irq) {
  		printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name);
  		goto err_free;
  	}
@@@ -2943,9 -2913,9 +2943,9 @@@
   err_reg_unmap:
  	iounmap(dev->emacp);
   err_irq_unmap:
- 	if (dev->wol_irq != NO_IRQ)
+ 	if (dev->wol_irq)
  		irq_dispose_mapping(dev->wol_irq);
- 	if (dev->emac_irq != NO_IRQ)
+ 	if (dev->emac_irq)
  		irq_dispose_mapping(dev->emac_irq);
   err_free:
  	free_netdev(ndev);
@@@ -2987,9 -2957,9 +2987,9 @@@ static int emac_remove(struct platform_
  	emac_dbg_unregister(dev);
  	iounmap(dev->emacp);
  
- 	if (dev->wol_irq != NO_IRQ)
+ 	if (dev->wol_irq)
  		irq_dispose_mapping(dev->wol_irq);
- 	if (dev->emac_irq != NO_IRQ)
+ 	if (dev->emac_irq)
  		irq_dispose_mapping(dev->emac_irq);
  
  	free_netdev(dev->ndev);
diff --combined drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 3743af8,2909372..e000ccf
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@@ -18,6 -18,7 +18,7 @@@
  #include <linux/mfd/syscon.h>
  #include <linux/regmap.h>
  #include <linux/clk.h>
+ #include <linux/pm_runtime.h>
  #include <linux/if_vlan.h>
  #include <linux/reset.h>
  #include <linux/tcp.h>
@@@ -144,6 -145,9 +145,9 @@@ static void mtk_phy_link_adjust(struct 
  		  MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
  		  MAC_MCR_BACKPR_EN;
  
+ 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ 		return;
+ 
  	switch (mac->phy_dev->speed) {
  	case SPEED_1000:
  		mcr |= MAC_MCR_SPEED_1000;
@@@ -230,7 -234,7 +234,7 @@@ static int mtk_phy_connect(struct mtk_m
  {
  	struct mtk_eth *eth = mac->hw;
  	struct device_node *np;
- 	u32 val, ge_mode;
+ 	u32 val;
  
  	np = of_parse_phandle(mac->of_node, "phy-handle", 0);
  	if (!np && of_phy_is_fixed_link(mac->of_node))
@@@ -244,18 -248,18 +248,18 @@@
  	case PHY_INTERFACE_MODE_RGMII_RXID:
  	case PHY_INTERFACE_MODE_RGMII_ID:
  	case PHY_INTERFACE_MODE_RGMII:
- 		ge_mode = 0;
+ 		mac->ge_mode = 0;
  		break;
  	case PHY_INTERFACE_MODE_MII:
- 		ge_mode = 1;
+ 		mac->ge_mode = 1;
  		break;
  	case PHY_INTERFACE_MODE_REVMII:
- 		ge_mode = 2;
+ 		mac->ge_mode = 2;
  		break;
  	case PHY_INTERFACE_MODE_RMII:
  		if (!mac->id)
  			goto err_phy;
- 		ge_mode = 3;
+ 		mac->ge_mode = 3;
  		break;
  	default:
  		goto err_phy;
@@@ -264,7 -268,7 +268,7 @@@
  	/* put the gmac into the right mode */
  	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
  	val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id);
- 	val |= SYSCFG0_GE_MODE(ge_mode, mac->id);
+ 	val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id);
  	regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
  
  	mtk_phy_connect_node(eth, mac, np);
@@@ -336,25 -340,27 +340,27 @@@ static void mtk_mdio_cleanup(struct mtk
  	mdiobus_unregister(eth->mii_bus);
  }
  
- static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
+ static inline void mtk_irq_disable(struct mtk_eth *eth,
+ 				   unsigned reg, u32 mask)
  {
  	unsigned long flags;
  	u32 val;
  
  	spin_lock_irqsave(&eth->irq_lock, flags);
- 	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
- 	mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK);
+ 	val = mtk_r32(eth, reg);
+ 	mtk_w32(eth, val & ~mask, reg);
  	spin_unlock_irqrestore(&eth->irq_lock, flags);
  }
  
- static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask)
+ static inline void mtk_irq_enable(struct mtk_eth *eth,
+ 				  unsigned reg, u32 mask)
  {
  	unsigned long flags;
  	u32 val;
  
  	spin_lock_irqsave(&eth->irq_lock, flags);
- 	val = mtk_r32(eth, MTK_QDMA_INT_MASK);
- 	mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK);
+ 	val = mtk_r32(eth, reg);
+ 	mtk_w32(eth, val | mask, reg);
  	spin_unlock_irqrestore(&eth->irq_lock, flags);
  }
  
@@@ -363,18 -369,20 +369,20 @@@ static int mtk_set_mac_address(struct n
  	int ret = eth_mac_addr(dev, p);
  	struct mtk_mac *mac = netdev_priv(dev);
  	const char *macaddr = dev->dev_addr;
- 	unsigned long flags;
  
  	if (ret)
  		return ret;
  
- 	spin_lock_irqsave(&mac->hw->page_lock, flags);
+ 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ 		return -EBUSY;
+ 
+ 	spin_lock_bh(&mac->hw->page_lock);
  	mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1],
  		MTK_GDMA_MAC_ADRH(mac->id));
  	mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) |
  		(macaddr[4] << 8) | macaddr[5],
  		MTK_GDMA_MAC_ADRL(mac->id));
- 	spin_unlock_irqrestore(&mac->hw->page_lock, flags);
+ 	spin_unlock_bh(&mac->hw->page_lock);
  
  	return 0;
  }
@@@ -759,7 -767,6 +767,6 @@@ static int mtk_start_xmit(struct sk_buf
  	struct mtk_eth *eth = mac->hw;
  	struct mtk_tx_ring *ring = &eth->tx_ring;
  	struct net_device_stats *stats = &dev->stats;
- 	unsigned long flags;
  	bool gso = false;
  	int tx_num;
  
@@@ -767,14 -774,17 +774,17 @@@
  	 * however we have 2 queues running on the same ring so we need to lock
  	 * the ring access
  	 */
- 	spin_lock_irqsave(&eth->page_lock, flags);
+ 	spin_lock(&eth->page_lock);
+ 
+ 	if (unlikely(test_bit(MTK_RESETTING, &eth->state)))
+ 		goto drop;
  
  	tx_num = mtk_cal_txd_req(skb);
  	if (unlikely(atomic_read(&ring->free_count) <= tx_num)) {
  		mtk_stop_queue(eth);
  		netif_err(eth, tx_queued, dev,
  			  "Tx Ring full when queue awake!\n");
- 		spin_unlock_irqrestore(&eth->page_lock, flags);
+ 		spin_unlock(&eth->page_lock);
  		return NETDEV_TX_BUSY;
  	}
  
@@@ -799,22 -809,62 +809,62 @@@
  	if (unlikely(atomic_read(&ring->free_count) <= ring->thresh))
  		mtk_stop_queue(eth);
  
- 	spin_unlock_irqrestore(&eth->page_lock, flags);
+ 	spin_unlock(&eth->page_lock);
  
  	return NETDEV_TX_OK;
  
  drop:
- 	spin_unlock_irqrestore(&eth->page_lock, flags);
+ 	spin_unlock(&eth->page_lock);
  	stats->tx_dropped++;
  	dev_kfree_skb(skb);
  	return NETDEV_TX_OK;
  }
  
+ static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth)
+ {
+ 	int i;
+ 	struct mtk_rx_ring *ring;
+ 	int idx;
+ 
+ 	if (!eth->hwlro)
+ 		return &eth->rx_ring[0];
+ 
+ 	for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
+ 		ring = &eth->rx_ring[i];
+ 		idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
+ 		if (ring->dma[idx].rxd2 & RX_DMA_DONE) {
+ 			ring->calc_idx_update = true;
+ 			return ring;
+ 		}
+ 	}
+ 
+ 	return NULL;
+ }
+ 
+ static void mtk_update_rx_cpu_idx(struct mtk_eth *eth)
+ {
+ 	struct mtk_rx_ring *ring;
+ 	int i;
+ 
+ 	if (!eth->hwlro) {
+ 		ring = &eth->rx_ring[0];
+ 		mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+ 	} else {
+ 		for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) {
+ 			ring = &eth->rx_ring[i];
+ 			if (ring->calc_idx_update) {
+ 				ring->calc_idx_update = false;
+ 				mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+ 			}
+ 		}
+ 	}
+ }
+ 
  static int mtk_poll_rx(struct napi_struct *napi, int budget,
  		       struct mtk_eth *eth)
  {
- 	struct mtk_rx_ring *ring = &eth->rx_ring;
- 	int idx = ring->calc_idx;
+ 	struct mtk_rx_ring *ring;
+ 	int idx;
  	struct sk_buff *skb;
  	u8 *data, *new_data;
  	struct mtk_rx_dma *rxd, trxd;
@@@ -826,7 -876,11 +876,11 @@@
  		dma_addr_t dma_addr;
  		int mac = 0;
  
- 		idx = NEXT_RX_DESP_IDX(idx);
+ 		ring = mtk_get_rx_ring(eth);
+ 		if (unlikely(!ring))
+ 			goto rx_done;
+ 
+ 		idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size);
  		rxd = &ring->dma[idx];
  		data = ring->data[idx];
  
@@@ -841,6 -895,9 +895,9 @@@
  
  		netdev = eth->netdev[mac];
  
+ 		if (unlikely(test_bit(MTK_RESETTING, &eth->state)))
+ 			goto release_desc;
+ 
  		/* alloc new buffer */
  		new_data = napi_alloc_frag(ring->frag_size);
  		if (unlikely(!new_data)) {
@@@ -890,17 -947,19 +947,19 @@@ release_desc
  		rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size);
  
  		ring->calc_idx = idx;
+ 
+ 		done++;
+ 	}
+ 
+ rx_done:
+ 	if (done) {
  		/* make sure that all changes to the dma ring are flushed before
  		 * we continue
  		 */
  		wmb();
- 		mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0);
- 		done++;
+ 		mtk_update_rx_cpu_idx(eth);
  	}
  
- 	if (done < budget)
- 		mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
- 
  	return done;
  }
  
@@@ -1009,7 -1068,7 +1068,7 @@@ static int mtk_napi_tx(struct napi_stru
  		return budget;
  
  	napi_complete(napi);
- 	mtk_irq_enable(eth, MTK_TX_DONE_INT);
+ 	mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
  
  	return tx_done;
  }
@@@ -1019,30 -1078,33 +1078,33 @@@ static int mtk_napi_rx(struct napi_stru
  	struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi);
  	u32 status, mask;
  	int rx_done = 0;
+ 	int remain_budget = budget;
  
  	mtk_handle_status_irq(eth);
- 	mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS);
- 	rx_done = mtk_poll_rx(napi, budget, eth);
+ 
+ poll_again:
+ 	mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS);
+ 	rx_done = mtk_poll_rx(napi, remain_budget, eth);
  
  	if (unlikely(netif_msg_intr(eth))) {
- 		status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
- 		mask = mtk_r32(eth, MTK_QDMA_INT_MASK);
+ 		status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
+ 		mask = mtk_r32(eth, MTK_PDMA_INT_MASK);
  		dev_info(eth->dev,
  			 "done rx %d, intr 0x%08x/0x%x\n",
  			 rx_done, status, mask);
  	}
- 
- 	if (rx_done == budget)
- 		return budget;
- 
- 	status = mtk_r32(eth, MTK_QMTK_INT_STATUS);
- 	if (status & MTK_RX_DONE_INT)
+ 	if (rx_done == remain_budget)
  		return budget;
  
+ 	status = mtk_r32(eth, MTK_PDMA_INT_STATUS);
+ 	if (status & MTK_RX_DONE_INT) {
+ 		remain_budget -= rx_done;
+ 		goto poll_again;
+ 	}
  	napi_complete(napi);
- 	mtk_irq_enable(eth, MTK_RX_DONE_INT);
+ 	mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
  
- 	return rx_done;
+ 	return rx_done + budget - remain_budget;
  }
  
  static int mtk_tx_alloc(struct mtk_eth *eth)
@@@ -1089,6 -1151,7 +1151,7 @@@
  	mtk_w32(eth,
  		ring->phys + ((MTK_DMA_SIZE - 1) * sz),
  		MTK_QTX_DRX_PTR);
+ 	mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
  
  	return 0;
  
@@@ -1117,32 -1180,41 +1180,41 @@@ static void mtk_tx_clean(struct mtk_et
  	}
  }
  
- static int mtk_rx_alloc(struct mtk_eth *eth)
+ static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag)
  {
- 	struct mtk_rx_ring *ring = &eth->rx_ring;
+ 	struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
+ 	int rx_data_len, rx_dma_size;
  	int i;
  
- 	ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN);
+ 	if (rx_flag == MTK_RX_FLAGS_HWLRO) {
+ 		rx_data_len = MTK_MAX_LRO_RX_LENGTH;
+ 		rx_dma_size = MTK_HW_LRO_DMA_SIZE;
+ 	} else {
+ 		rx_data_len = ETH_DATA_LEN;
+ 		rx_dma_size = MTK_DMA_SIZE;
+ 	}
+ 
+ 	ring->frag_size = mtk_max_frag_size(rx_data_len);
  	ring->buf_size = mtk_max_buf_size(ring->frag_size);
- 	ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data),
+ 	ring->data = kcalloc(rx_dma_size, sizeof(*ring->data),
  			     GFP_KERNEL);
  	if (!ring->data)
  		return -ENOMEM;
  
- 	for (i = 0; i < MTK_DMA_SIZE; i++) {
+ 	for (i = 0; i < rx_dma_size; i++) {
  		ring->data[i] = netdev_alloc_frag(ring->frag_size);
  		if (!ring->data[i])
  			return -ENOMEM;
  	}
  
  	ring->dma = dma_alloc_coherent(eth->dev,
- 				       MTK_DMA_SIZE * sizeof(*ring->dma),
+ 				       rx_dma_size * sizeof(*ring->dma),
  				       &ring->phys,
  				       GFP_ATOMIC | __GFP_ZERO);
  	if (!ring->dma)
  		return -ENOMEM;
  
- 	for (i = 0; i < MTK_DMA_SIZE; i++) {
+ 	for (i = 0; i < rx_dma_size; i++) {
  		dma_addr_t dma_addr = dma_map_single(eth->dev,
  				ring->data[i] + NET_SKB_PAD,
  				ring->buf_size,
@@@ -1153,28 -1225,30 +1225,30 @@@
  
  		ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size);
  	}
- 	ring->calc_idx = MTK_DMA_SIZE - 1;
+ 	ring->dma_size = rx_dma_size;
+ 	ring->calc_idx_update = false;
+ 	ring->calc_idx = rx_dma_size - 1;
+ 	ring->crx_idx_reg = MTK_PRX_CRX_IDX_CFG(ring_no);
  	/* make sure that all changes to the dma ring are flushed before we
  	 * continue
  	 */
  	wmb();
  
- 	mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0);
- 	mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0);
- 	mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0);
- 	mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX);
- 	mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0));
+ 	mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no));
+ 	mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no));
+ 	mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg);
+ 	mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX);
  
  	return 0;
  }
  
- static void mtk_rx_clean(struct mtk_eth *eth)
+ static void mtk_rx_clean(struct mtk_eth *eth, int ring_no)
  {
- 	struct mtk_rx_ring *ring = &eth->rx_ring;
+ 	struct mtk_rx_ring *ring = &eth->rx_ring[ring_no];
  	int i;
  
  	if (ring->data && ring->dma) {
- 		for (i = 0; i < MTK_DMA_SIZE; i++) {
+ 		for (i = 0; i < ring->dma_size; i++) {
  			if (!ring->data[i])
  				continue;
  			if (!ring->dma[i].rxd1)
@@@ -1191,13 -1265,274 +1265,274 @@@
  
  	if (ring->dma) {
  		dma_free_coherent(eth->dev,
- 				  MTK_DMA_SIZE * sizeof(*ring->dma),
+ 				  ring->dma_size * sizeof(*ring->dma),
  				  ring->dma,
  				  ring->phys);
  		ring->dma = NULL;
  	}
  }
  
+ static int mtk_hwlro_rx_init(struct mtk_eth *eth)
+ {
+ 	int i;
+ 	u32 ring_ctrl_dw1 = 0, ring_ctrl_dw2 = 0, ring_ctrl_dw3 = 0;
+ 	u32 lro_ctrl_dw0 = 0, lro_ctrl_dw3 = 0;
+ 
+ 	/* set LRO rings to auto-learn modes */
+ 	ring_ctrl_dw2 |= MTK_RING_AUTO_LERAN_MODE;
+ 
+ 	/* validate LRO ring */
+ 	ring_ctrl_dw2 |= MTK_RING_VLD;
+ 
+ 	/* set AGE timer (unit: 20us) */
+ 	ring_ctrl_dw2 |= MTK_RING_AGE_TIME_H;
+ 	ring_ctrl_dw1 |= MTK_RING_AGE_TIME_L;
+ 
+ 	/* set max AGG timer (unit: 20us) */
+ 	ring_ctrl_dw2 |= MTK_RING_MAX_AGG_TIME;
+ 
+ 	/* set max LRO AGG count */
+ 	ring_ctrl_dw2 |= MTK_RING_MAX_AGG_CNT_L;
+ 	ring_ctrl_dw3 |= MTK_RING_MAX_AGG_CNT_H;
+ 
+ 	for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) {
+ 		mtk_w32(eth, ring_ctrl_dw1, MTK_LRO_CTRL_DW1_CFG(i));
+ 		mtk_w32(eth, ring_ctrl_dw2, MTK_LRO_CTRL_DW2_CFG(i));
+ 		mtk_w32(eth, ring_ctrl_dw3, MTK_LRO_CTRL_DW3_CFG(i));
+ 	}
+ 
+ 	/* IPv4 checksum update enable */
+ 	lro_ctrl_dw0 |= MTK_L3_CKS_UPD_EN;
+ 
+ 	/* switch priority comparison to packet count mode */
+ 	lro_ctrl_dw0 |= MTK_LRO_ALT_PKT_CNT_MODE;
+ 
+ 	/* bandwidth threshold setting */
+ 	mtk_w32(eth, MTK_HW_LRO_BW_THRE, MTK_PDMA_LRO_CTRL_DW2);
+ 
+ 	/* auto-learn score delta setting */
+ 	mtk_w32(eth, MTK_HW_LRO_REPLACE_DELTA, MTK_PDMA_LRO_ALT_SCORE_DELTA);
+ 
+ 	/* set refresh timer for altering flows to 1 sec. (unit: 20us) */
+ 	mtk_w32(eth, (MTK_HW_LRO_TIMER_UNIT << 16) | MTK_HW_LRO_REFRESH_TIME,
+ 		MTK_PDMA_LRO_ALT_REFRESH_TIMER);
+ 
+ 	/* set HW LRO mode & the max aggregation count for rx packets */
+ 	lro_ctrl_dw3 |= MTK_ADMA_MODE | (MTK_HW_LRO_MAX_AGG_CNT & 0xff);
+ 
+ 	/* the minimal remaining room of SDL0 in RXD for lro aggregation */
+ 	lro_ctrl_dw3 |= MTK_LRO_MIN_RXD_SDL;
+ 
+ 	/* enable HW LRO */
+ 	lro_ctrl_dw0 |= MTK_LRO_EN;
+ 
+ 	mtk_w32(eth, lro_ctrl_dw3, MTK_PDMA_LRO_CTRL_DW3);
+ 	mtk_w32(eth, lro_ctrl_dw0, MTK_PDMA_LRO_CTRL_DW0);
+ 
+ 	return 0;
+ }
+ 
+ static void mtk_hwlro_rx_uninit(struct mtk_eth *eth)
+ {
+ 	int i;
+ 	u32 val;
+ 
+ 	/* relinquish lro rings, flush aggregated packets */
+ 	mtk_w32(eth, MTK_LRO_RING_RELINQUISH_REQ, MTK_PDMA_LRO_CTRL_DW0);
+ 
+ 	/* wait for relinquishments done */
+ 	for (i = 0; i < 10; i++) {
+ 		val = mtk_r32(eth, MTK_PDMA_LRO_CTRL_DW0);
+ 		if (val & MTK_LRO_RING_RELINQUISH_DONE) {
+ 			msleep(20);
+ 			continue;
+ 		}
+ 	}
+ 
+ 	/* invalidate lro rings */
+ 	for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
+ 		mtk_w32(eth, 0, MTK_LRO_CTRL_DW2_CFG(i));
+ 
+ 	/* disable HW LRO */
+ 	mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0);
+ }
+ 
+ static void mtk_hwlro_val_ipaddr(struct mtk_eth *eth, int idx, __be32 ip)
+ {
+ 	u32 reg_val;
+ 
+ 	reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx));
+ 
+ 	/* invalidate the IP setting */
+ 	mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+ 
+ 	mtk_w32(eth, ip, MTK_LRO_DIP_DW0_CFG(idx));
+ 
+ 	/* validate the IP setting */
+ 	mtk_w32(eth, (reg_val | MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+ }
+ 
+ static void mtk_hwlro_inval_ipaddr(struct mtk_eth *eth, int idx)
+ {
+ 	u32 reg_val;
+ 
+ 	reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx));
+ 
+ 	/* invalidate the IP setting */
+ 	mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx));
+ 
+ 	mtk_w32(eth, 0, MTK_LRO_DIP_DW0_CFG(idx));
+ }
+ 
+ static int mtk_hwlro_get_ip_cnt(struct mtk_mac *mac)
+ {
+ 	int cnt = 0;
+ 	int i;
+ 
+ 	for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+ 		if (mac->hwlro_ip[i])
+ 			cnt++;
+ 	}
+ 
+ 	return cnt;
+ }
+ 
+ static int mtk_hwlro_add_ipaddr(struct net_device *dev,
+ 				struct ethtool_rxnfc *cmd)
+ {
+ 	struct ethtool_rx_flow_spec *fsp =
+ 		(struct ethtool_rx_flow_spec *)&cmd->fs;
+ 	struct mtk_mac *mac = netdev_priv(dev);
+ 	struct mtk_eth *eth = mac->hw;
+ 	int hwlro_idx;
+ 
+ 	if ((fsp->flow_type != TCP_V4_FLOW) ||
+ 	    (!fsp->h_u.tcp_ip4_spec.ip4dst) ||
+ 	    (fsp->location > 1))
+ 		return -EINVAL;
+ 
+ 	mac->hwlro_ip[fsp->location] = htonl(fsp->h_u.tcp_ip4_spec.ip4dst);
+ 	hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location;
+ 
+ 	mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+ 
+ 	mtk_hwlro_val_ipaddr(eth, hwlro_idx, mac->hwlro_ip[fsp->location]);
+ 
+ 	return 0;
+ }
+ 
+ static int mtk_hwlro_del_ipaddr(struct net_device *dev,
+ 				struct ethtool_rxnfc *cmd)
+ {
+ 	struct ethtool_rx_flow_spec *fsp =
+ 		(struct ethtool_rx_flow_spec *)&cmd->fs;
+ 	struct mtk_mac *mac = netdev_priv(dev);
+ 	struct mtk_eth *eth = mac->hw;
+ 	int hwlro_idx;
+ 
+ 	if (fsp->location > 1)
+ 		return -EINVAL;
+ 
+ 	mac->hwlro_ip[fsp->location] = 0;
+ 	hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location;
+ 
+ 	mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+ 
+ 	mtk_hwlro_inval_ipaddr(eth, hwlro_idx);
+ 
+ 	return 0;
+ }
+ 
+ static void mtk_hwlro_netdev_disable(struct net_device *dev)
+ {
+ 	struct mtk_mac *mac = netdev_priv(dev);
+ 	struct mtk_eth *eth = mac->hw;
+ 	int i, hwlro_idx;
+ 
+ 	for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+ 		mac->hwlro_ip[i] = 0;
+ 		hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + i;
+ 
+ 		mtk_hwlro_inval_ipaddr(eth, hwlro_idx);
+ 	}
+ 
+ 	mac->hwlro_ip_cnt = 0;
+ }
+ 
+ static int mtk_hwlro_get_fdir_entry(struct net_device *dev,
+ 				    struct ethtool_rxnfc *cmd)
+ {
+ 	struct mtk_mac *mac = netdev_priv(dev);
+ 	struct ethtool_rx_flow_spec *fsp =
+ 		(struct ethtool_rx_flow_spec *)&cmd->fs;
+ 
+ 	/* only tcp dst ipv4 is meaningful, others are meaningless */
+ 	fsp->flow_type = TCP_V4_FLOW;
+ 	fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]);
+ 	fsp->m_u.tcp_ip4_spec.ip4dst = 0;
+ 
+ 	fsp->h_u.tcp_ip4_spec.ip4src = 0;
+ 	fsp->m_u.tcp_ip4_spec.ip4src = 0xffffffff;
+ 	fsp->h_u.tcp_ip4_spec.psrc = 0;
+ 	fsp->m_u.tcp_ip4_spec.psrc = 0xffff;
+ 	fsp->h_u.tcp_ip4_spec.pdst = 0;
+ 	fsp->m_u.tcp_ip4_spec.pdst = 0xffff;
+ 	fsp->h_u.tcp_ip4_spec.tos = 0;
+ 	fsp->m_u.tcp_ip4_spec.tos = 0xff;
+ 
+ 	return 0;
+ }
+ 
+ static int mtk_hwlro_get_fdir_all(struct net_device *dev,
+ 				  struct ethtool_rxnfc *cmd,
+ 				  u32 *rule_locs)
+ {
+ 	struct mtk_mac *mac = netdev_priv(dev);
+ 	int cnt = 0;
+ 	int i;
+ 
+ 	for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) {
+ 		if (mac->hwlro_ip[i]) {
+ 			rule_locs[cnt] = i;
+ 			cnt++;
+ 		}
+ 	}
+ 
+ 	cmd->rule_cnt = cnt;
+ 
+ 	return 0;
+ }
+ 
+ static netdev_features_t mtk_fix_features(struct net_device *dev,
+ 					  netdev_features_t features)
+ {
+ 	if (!(features & NETIF_F_LRO)) {
+ 		struct mtk_mac *mac = netdev_priv(dev);
+ 		int ip_cnt = mtk_hwlro_get_ip_cnt(mac);
+ 
+ 		if (ip_cnt) {
+ 			netdev_info(dev, "RX flow is programmed, LRO should keep on\n");
+ 
+ 			features |= NETIF_F_LRO;
+ 		}
+ 	}
+ 
+ 	return features;
+ }
+ 
+ static int mtk_set_features(struct net_device *dev, netdev_features_t features)
+ {
+ 	int err = 0;
+ 
+ 	if (!((dev->features ^ features) & NETIF_F_LRO))
+ 		return 0;
+ 
+ 	if (!(features & NETIF_F_LRO))
+ 		mtk_hwlro_netdev_disable(dev);
+ 
+ 	return err;
+ }
+ 
  /* wait for DMA to finish whatever it is doing before we start using it again */
  static int mtk_dma_busy_wait(struct mtk_eth *eth)
  {
@@@ -1218,6 -1553,7 +1553,7 @@@
  static int mtk_dma_init(struct mtk_eth *eth)
  {
  	int err;
+ 	u32 i;
  
  	if (mtk_dma_busy_wait(eth))
  		return -EBUSY;
@@@ -1233,10 -1569,21 +1569,21 @@@
  	if (err)
  		return err;
  
- 	err = mtk_rx_alloc(eth);
+ 	err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL);
  	if (err)
  		return err;
  
+ 	if (eth->hwlro) {
+ 		for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) {
+ 			err = mtk_rx_alloc(eth, i, MTK_RX_FLAGS_HWLRO);
+ 			if (err)
+ 				return err;
+ 		}
+ 		err = mtk_hwlro_rx_init(eth);
+ 		if (err)
+ 			return err;
+ 	}
+ 
  	/* Enable random early drop and set drop threshold automatically */
  	mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN,
  		MTK_QDMA_FC_THRES);
@@@ -1261,7 -1608,14 +1608,14 @@@ static void mtk_dma_free(struct mtk_et
  		eth->phy_scratch_ring = 0;
  	}
  	mtk_tx_clean(eth);
- 	mtk_rx_clean(eth);
+ 	mtk_rx_clean(eth, 0);
+ 
+ 	if (eth->hwlro) {
+ 		mtk_hwlro_rx_uninit(eth);
+ 		for (i = 1; i < MTK_MAX_RX_RING_NUM; i++)
+ 			mtk_rx_clean(eth, i);
+ 	}
+ 
  	kfree(eth->scratch_head);
  }
  
@@@ -1282,7 -1636,7 +1636,7 @@@ static irqreturn_t mtk_handle_irq_rx(in
  
  	if (likely(napi_schedule_prep(&eth->rx_napi))) {
  		__napi_schedule(&eth->rx_napi);
- 		mtk_irq_disable(eth, MTK_RX_DONE_INT);
+ 		mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
  	}
  
  	return IRQ_HANDLED;
@@@ -1294,7 -1648,7 +1648,7 @@@ static irqreturn_t mtk_handle_irq_tx(in
  
  	if (likely(napi_schedule_prep(&eth->tx_napi))) {
  		__napi_schedule(&eth->tx_napi);
- 		mtk_irq_disable(eth, MTK_TX_DONE_INT);
+ 		mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
  	}
  
  	return IRQ_HANDLED;
@@@ -1305,11 -1659,12 +1659,12 @@@ static void mtk_poll_controller(struct 
  {
  	struct mtk_mac *mac = netdev_priv(dev);
  	struct mtk_eth *eth = mac->hw;
- 	u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT;
  
- 	mtk_irq_disable(eth, int_mask);
+ 	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+ 	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
  	mtk_handle_irq_rx(eth->irq[2], dev);
- 	mtk_irq_enable(eth, int_mask);
+ 	mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+ 	mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
  }
  #endif
  
@@@ -1324,11 -1679,15 +1679,15 @@@ static int mtk_start_dma(struct mtk_et
  	}
  
  	mtk_w32(eth,
- 		MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN |
- 		MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS |
- 		MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO,
+ 		MTK_TX_WB_DDONE | MTK_TX_DMA_EN |
+ 		MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO,
  		MTK_QDMA_GLO_CFG);
  
+ 	mtk_w32(eth,
+ 		MTK_RX_DMA_EN | MTK_RX_2B_OFFSET |
+ 		MTK_RX_BT_32DWORDS | MTK_MULTI_EN,
+ 		MTK_PDMA_GLO_CFG);
+ 
  	return 0;
  }
  
@@@ -1346,7 -1705,8 +1705,8 @@@ static int mtk_open(struct net_device *
  
  		napi_enable(&eth->tx_napi);
  		napi_enable(&eth->rx_napi);
- 		mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+ 		mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+ 		mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
  	}
  	atomic_inc(&eth->dma_refcnt);
  
@@@ -1358,16 -1718,15 +1718,15 @@@
  
  static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg)
  {
- 	unsigned long flags;
  	u32 val;
  	int i;
  
  	/* stop the dma engine */
- 	spin_lock_irqsave(&eth->page_lock, flags);
+ 	spin_lock_bh(&eth->page_lock);
  	val = mtk_r32(eth, glo_cfg);
  	mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN),
  		glo_cfg);
- 	spin_unlock_irqrestore(&eth->page_lock, flags);
+ 	spin_unlock_bh(&eth->page_lock);
  
  	/* wait for dma stop */
  	for (i = 0; i < 10; i++) {
@@@ -1392,7 -1751,8 +1751,8 @@@ static int mtk_stop(struct net_device *
  	if (!atomic_dec_and_test(&eth->dma_refcnt))
  		return 0;
  
- 	mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT);
+ 	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT);
+ 	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT);
  	napi_disable(&eth->tx_napi);
  	napi_disable(&eth->rx_napi);
  
@@@ -1403,15 -1763,44 +1763,44 @@@
  	return 0;
  }
  
- static int __init mtk_hw_init(struct mtk_eth *eth)
+ static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits)
  {
- 	int err, i;
+ 	regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL,
+ 			   reset_bits,
+ 			   reset_bits);
+ 
+ 	usleep_range(1000, 1100);
+ 	regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL,
+ 			   reset_bits,
+ 			   ~reset_bits);
+ 	mdelay(10);
+ }
+ 
+ static int mtk_hw_init(struct mtk_eth *eth)
+ {
+ 	int i, val;
+ 
+ 	if (test_and_set_bit(MTK_HW_INIT, &eth->state))
+ 		return 0;
  
- 	/* reset the frame engine */
- 	reset_control_assert(eth->rstc);
- 	usleep_range(10, 20);
- 	reset_control_deassert(eth->rstc);
- 	usleep_range(10, 20);
+ 	pm_runtime_enable(eth->dev);
+ 	pm_runtime_get_sync(eth->dev);
+ 
+ 	clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
+ 	clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
+ 	clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
+ 	clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
+ 	ethsys_reset(eth, RSTCTRL_FE);
+ 	ethsys_reset(eth, RSTCTRL_PPE);
+ 
+ 	regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val);
+ 	for (i = 0; i < MTK_MAC_COUNT; i++) {
+ 		if (!eth->mac[i])
+ 			continue;
+ 		val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, eth->mac[i]->id);
+ 		val |= SYSCFG0_GE_MODE(eth->mac[i]->ge_mode, eth->mac[i]->id);
+ 	}
+ 	regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val);
  
  	/* Set GE2 driving and slew rate */
  	regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00);
@@@ -1431,22 -1820,11 +1820,11 @@@
  	/* Enable RX VLan Offloading */
  	mtk_w32(eth, 1, MTK_CDMP_EG_CTRL);
  
- 	err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
- 			       dev_name(eth->dev), eth);
- 	if (err)
- 		return err;
- 	err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
- 			       dev_name(eth->dev), eth);
- 	if (err)
- 		return err;
- 
- 	err = mtk_mdio_init(eth);
- 	if (err)
- 		return err;
- 
  	/* disable delay and normal interrupt */
  	mtk_w32(eth, 0, MTK_QDMA_DELAY_INT);
- 	mtk_irq_disable(eth, ~0);
+ 	mtk_w32(eth, 0, MTK_PDMA_DELAY_INT);
+ 	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
+ 	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
  	mtk_w32(eth, RST_GL_PSE, MTK_RST_GL);
  	mtk_w32(eth, 0, MTK_RST_GL);
  
@@@ -1460,9 -1838,8 +1838,8 @@@
  	for (i = 0; i < 2; i++) {
  		u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i));
  
- 		/* setup the forward port to send frame to QDMA */
+ 		/* setup the forward port to send frame to PDMA */
  		val &= ~0xffff;
- 		val |= 0x5555;
  
  		/* Enable RX checksum */
  		val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN;
@@@ -1474,6 -1851,22 +1851,22 @@@
  	return 0;
  }
  
+ static int mtk_hw_deinit(struct mtk_eth *eth)
+ {
+ 	if (!test_and_clear_bit(MTK_HW_INIT, &eth->state))
+ 		return 0;
+ 
+ 	clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
+ 	clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
+ 	clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
+ 	clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
+ 
+ 	pm_runtime_put_sync(eth->dev);
+ 	pm_runtime_disable(eth->dev);
+ 
+ 	return 0;
+ }
+ 
  static int __init mtk_init(struct net_device *dev)
  {
  	struct mtk_mac *mac = netdev_priv(dev);
@@@ -1501,7 -1894,8 +1894,8 @@@ static void mtk_uninit(struct net_devic
  	struct mtk_eth *eth = mac->hw;
  
  	phy_disconnect(mac->phy_dev);
- 	mtk_irq_disable(eth, ~0);
+ 	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
+ 	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
  }
  
  static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
@@@ -1528,6 -1922,12 +1922,12 @@@ static void mtk_pending_work(struct wor
  
  	rtnl_lock();
  
+ 	dev_dbg(eth->dev, "[%s][%d] reset\n", __func__, __LINE__);
+ 
+ 	while (test_and_set_bit_lock(MTK_RESETTING, &eth->state))
+ 		cpu_relax();
+ 
+ 	dev_dbg(eth->dev, "[%s][%d] mtk_stop starts\n", __func__, __LINE__);
  	/* stop all devices to make sure that dma is properly shut down */
  	for (i = 0; i < MTK_MAC_COUNT; i++) {
  		if (!eth->netdev[i])
@@@ -1535,6 -1935,27 +1935,27 @@@
  		mtk_stop(eth->netdev[i]);
  		__set_bit(i, &restart);
  	}
+ 	dev_dbg(eth->dev, "[%s][%d] mtk_stop ends\n", __func__, __LINE__);
+ 
+ 	/* restart underlying hardware such as power, clock, pin mux
+ 	 * and the connected phy
+ 	 */
+ 	mtk_hw_deinit(eth);
+ 
+ 	if (eth->dev->pins)
+ 		pinctrl_select_state(eth->dev->pins->p,
+ 				     eth->dev->pins->default_state);
+ 	mtk_hw_init(eth);
+ 
+ 	for (i = 0; i < MTK_MAC_COUNT; i++) {
+ 		if (!eth->mac[i] ||
+ 		    of_phy_is_fixed_link(eth->mac[i]->of_node))
+ 			continue;
+ 		err = phy_init_hw(eth->mac[i]->phy_dev);
+ 		if (err)
+ 			dev_err(eth->dev, "%s: PHY init failed.\n",
+ 				eth->netdev[i]->name);
+ 	}
  
  	/* restart DMA and enable IRQs */
  	for (i = 0; i < MTK_MAC_COUNT; i++) {
@@@ -1547,20 -1968,44 +1968,44 @@@
  			dev_close(eth->netdev[i]);
  		}
  	}
+ 
+ 	dev_dbg(eth->dev, "[%s][%d] reset done\n", __func__, __LINE__);
+ 
+ 	clear_bit_unlock(MTK_RESETTING, &eth->state);
+ 
  	rtnl_unlock();
  }
  
- static int mtk_cleanup(struct mtk_eth *eth)
+ static int mtk_free_dev(struct mtk_eth *eth)
  {
  	int i;
  
  	for (i = 0; i < MTK_MAC_COUNT; i++) {
  		if (!eth->netdev[i])
  			continue;
+ 		free_netdev(eth->netdev[i]);
+ 	}
+ 
+ 	return 0;
+ }
  
+ static int mtk_unreg_dev(struct mtk_eth *eth)
+ {
+ 	int i;
+ 
+ 	for (i = 0; i < MTK_MAC_COUNT; i++) {
+ 		if (!eth->netdev[i])
+ 			continue;
  		unregister_netdev(eth->netdev[i]);
- 		free_netdev(eth->netdev[i]);
  	}
+ 
+ 	return 0;
+ }
+ 
+ static int mtk_cleanup(struct mtk_eth *eth)
+ {
+ 	mtk_unreg_dev(eth);
+ 	mtk_free_dev(eth);
  	cancel_work_sync(&eth->pending_work);
  
  	return 0;
@@@ -1572,6 -2017,9 +2017,9 @@@ static int mtk_get_settings(struct net_
  	struct mtk_mac *mac = netdev_priv(dev);
  	int err;
  
+ 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ 		return -EBUSY;
+ 
  	err = phy_read_status(mac->phy_dev);
  	if (err)
  		return -ENODEV;
@@@ -1622,6 -2070,9 +2070,9 @@@ static int mtk_nway_reset(struct net_de
  {
  	struct mtk_mac *mac = netdev_priv(dev);
  
+ 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ 		return -EBUSY;
+ 
  	return genphy_restart_aneg(mac->phy_dev);
  }
  
@@@ -1630,6 -2081,9 +2081,9 @@@ static u32 mtk_get_link(struct net_devi
  	struct mtk_mac *mac = netdev_priv(dev);
  	int err;
  
+ 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ 		return -EBUSY;
+ 
  	err = genphy_update_link(mac->phy_dev);
  	if (err)
  		return ethtool_op_get_link(dev);
@@@ -1670,6 -2124,9 +2124,9 @@@ static void mtk_get_ethtool_stats(struc
  	unsigned int start;
  	int i;
  
+ 	if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state)))
+ 		return;
+ 
  	if (netif_running(dev) && netif_device_present(dev)) {
  		if (spin_trylock(&hwstats->stats_lock)) {
  			mtk_stats_update_mac(mac);
@@@ -1677,8 -2134,9 +2134,9 @@@
  		}
  	}
  
+ 	data_src = (u64 *)hwstats;
+ 
  	do {
- 		data_src = (u64*)hwstats;
  		data_dst = data;
  		start = u64_stats_fetch_begin_irq(&hwstats->syncp);
  
@@@ -1687,7 -2145,63 +2145,63 @@@
  	} while (u64_stats_fetch_retry_irq(&hwstats->syncp, start));
  }
  
- static struct ethtool_ops mtk_ethtool_ops = {
+ static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+ 			 u32 *rule_locs)
+ {
+ 	int ret = -EOPNOTSUPP;
+ 
+ 	switch (cmd->cmd) {
+ 	case ETHTOOL_GRXRINGS:
+ 		if (dev->features & NETIF_F_LRO) {
+ 			cmd->data = MTK_MAX_RX_RING_NUM;
+ 			ret = 0;
+ 		}
+ 		break;
+ 	case ETHTOOL_GRXCLSRLCNT:
+ 		if (dev->features & NETIF_F_LRO) {
+ 			struct mtk_mac *mac = netdev_priv(dev);
+ 
+ 			cmd->rule_cnt = mac->hwlro_ip_cnt;
+ 			ret = 0;
+ 		}
+ 		break;
+ 	case ETHTOOL_GRXCLSRULE:
+ 		if (dev->features & NETIF_F_LRO)
+ 			ret = mtk_hwlro_get_fdir_entry(dev, cmd);
+ 		break;
+ 	case ETHTOOL_GRXCLSRLALL:
+ 		if (dev->features & NETIF_F_LRO)
+ 			ret = mtk_hwlro_get_fdir_all(dev, cmd,
+ 						     rule_locs);
+ 		break;
+ 	default:
+ 		break;
+ 	}
+ 
+ 	return ret;
+ }
+ 
+ static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+ {
+ 	int ret = -EOPNOTSUPP;
+ 
+ 	switch (cmd->cmd) {
+ 	case ETHTOOL_SRXCLSRLINS:
+ 		if (dev->features & NETIF_F_LRO)
+ 			ret = mtk_hwlro_add_ipaddr(dev, cmd);
+ 		break;
+ 	case ETHTOOL_SRXCLSRLDEL:
+ 		if (dev->features & NETIF_F_LRO)
+ 			ret = mtk_hwlro_del_ipaddr(dev, cmd);
+ 		break;
+ 	default:
+ 		break;
+ 	}
+ 
+ 	return ret;
+ }
+ 
+ static const struct ethtool_ops mtk_ethtool_ops = {
  	.get_settings		= mtk_get_settings,
  	.set_settings		= mtk_set_settings,
  	.get_drvinfo		= mtk_get_drvinfo,
@@@ -1698,6 -2212,8 +2212,8 @@@
  	.get_strings		= mtk_get_strings,
  	.get_sset_count		= mtk_get_sset_count,
  	.get_ethtool_stats	= mtk_get_ethtool_stats,
+ 	.get_rxnfc		= mtk_get_rxnfc,
+ 	.set_rxnfc              = mtk_set_rxnfc,
  };
  
  static const struct net_device_ops mtk_netdev_ops = {
@@@ -1712,6 -2228,8 +2228,8 @@@
  	.ndo_change_mtu		= eth_change_mtu,
  	.ndo_tx_timeout		= mtk_tx_timeout,
  	.ndo_get_stats64        = mtk_get_stats64,
+ 	.ndo_fix_features	= mtk_fix_features,
+ 	.ndo_set_features	= mtk_set_features,
  #ifdef CONFIG_NET_POLL_CONTROLLER
  	.ndo_poll_controller	= mtk_poll_controller,
  #endif
@@@ -1750,6 -2268,9 +2268,9 @@@ static int mtk_add_mac(struct mtk_eth *
  	mac->hw = eth;
  	mac->of_node = np;
  
+ 	memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
+ 	mac->hwlro_ip_cnt = 0;
+ 
  	mac->hw_stats = devm_kzalloc(eth->dev,
  				     sizeof(*mac->hw_stats),
  				     GFP_KERNEL);
@@@ -1766,21 -2287,17 +2287,17 @@@
  	eth->netdev[id]->watchdog_timeo = 5 * HZ;
  	eth->netdev[id]->netdev_ops = &mtk_netdev_ops;
  	eth->netdev[id]->base_addr = (unsigned long)eth->base;
+ 
+ 	eth->netdev[id]->hw_features = MTK_HW_FEATURES;
+ 	if (eth->hwlro)
+ 		eth->netdev[id]->hw_features |= NETIF_F_LRO;
+ 
  	eth->netdev[id]->vlan_features = MTK_HW_FEATURES &
  		~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX);
  	eth->netdev[id]->features |= MTK_HW_FEATURES;
  	eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops;
  
- 	err = register_netdev(eth->netdev[id]);
- 	if (err) {
- 		dev_err(eth->dev, "error bringing up device\n");
- 		goto free_netdev;
- 	}
  	eth->netdev[id]->irq = eth->irq[0];
- 	netif_info(eth, probe, eth->netdev[id],
- 		   "mediatek frame engine at 0x%08lx, irq %d\n",
- 		   eth->netdev[id]->base_addr, eth->irq[0]);
- 
  	return 0;
  
  free_netdev:
@@@ -1827,11 -2344,7 +2344,7 @@@ static int mtk_probe(struct platform_de
  		return PTR_ERR(eth->pctl);
  	}
  
- 	eth->rstc = devm_reset_control_get(&pdev->dev, "eth");
- 	if (IS_ERR(eth->rstc)) {
- 		dev_err(&pdev->dev, "no eth reset found\n");
- 		return PTR_ERR(eth->rstc);
- 	}
+ 	eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro");
  
  	for (i = 0; i < 3; i++) {
  		eth->irq[i] = platform_get_irq(pdev, i);
@@@ -1850,11 -2363,6 +2363,6 @@@
  		}
  	}
  
- 	clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]);
- 	clk_prepare_enable(eth->clks[MTK_CLK_ESW]);
- 	clk_prepare_enable(eth->clks[MTK_CLK_GP1]);
- 	clk_prepare_enable(eth->clks[MTK_CLK_GP2]);
- 
  	eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE);
  	INIT_WORK(&eth->pending_work, mtk_pending_work);
  
@@@ -1872,7 -2380,35 +2380,35 @@@
  
  		err = mtk_add_mac(eth, mac_np);
  		if (err)
- 			goto err_free_dev;
+ 			goto err_deinit_hw;
+ 	}
+ 
+ 	err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0,
+ 			       dev_name(eth->dev), eth);
+ 	if (err)
+ 		goto err_free_dev;
+ 
+ 	err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0,
+ 			       dev_name(eth->dev), eth);
+ 	if (err)
+ 		goto err_free_dev;
+ 
+ 	err = mtk_mdio_init(eth);
+ 	if (err)
+ 		goto err_free_dev;
+ 
+ 	for (i = 0; i < MTK_MAX_DEVS; i++) {
+ 		if (!eth->netdev[i])
+ 			continue;
+ 
+ 		err = register_netdev(eth->netdev[i]);
+ 		if (err) {
+ 			dev_err(eth->dev, "error bringing up device\n");
+ 			goto err_deinit_mdio;
+ 		} else
+ 			netif_info(eth, probe, eth->netdev[i],
+ 				   "mediatek frame engine at 0x%08lx, irq %d\n",
+ 				   eth->netdev[i]->base_addr, eth->irq[0]);
  	}
  
  	/* we run 2 devices on the same DMA ring so we need a dummy device
@@@ -1888,8 -2424,13 +2424,13 @@@
  
  	return 0;
  
+ err_deinit_mdio:
+ 	mtk_mdio_cleanup(eth);
  err_free_dev:
- 	mtk_cleanup(eth);
+ 	mtk_free_dev(eth);
+ err_deinit_hw:
+ 	mtk_hw_deinit(eth);
+ 
  	return err;
  }
  
@@@ -1905,16 -2446,12 +2446,12 @@@ static int mtk_remove(struct platform_d
  		mtk_stop(eth->netdev[i]);
  	}
  
- 	clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]);
- 	clk_disable_unprepare(eth->clks[MTK_CLK_ESW]);
- 	clk_disable_unprepare(eth->clks[MTK_CLK_GP1]);
- 	clk_disable_unprepare(eth->clks[MTK_CLK_GP2]);
+ 	mtk_hw_deinit(eth);
  
  	netif_napi_del(&eth->tx_napi);
  	netif_napi_del(&eth->rx_napi);
  	mtk_cleanup(eth);
  	mtk_mdio_cleanup(eth);
- 	platform_set_drvdata(pdev, NULL);
  
  	return 0;
  }
@@@ -1923,7 -2460,6 +2460,7 @@@ const struct of_device_id of_mtk_match[
  	{ .compatible = "mediatek,mt7623-eth" },
  	{},
  };
 +MODULE_DEVICE_TABLE(of, of_mtk_match);
  
  static struct platform_driver mtk_driver = {
  	.probe = mtk_probe,
diff --combined drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index b247949,4927494..f75f864
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@@ -81,19 -81,12 +81,12 @@@ enum 
  			    MC_ADDR_CHANGE | \
  			    PROMISC_CHANGE)
  
- int esw_offloads_init(struct mlx5_eswitch *esw, int nvports);
- void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports);
- 
  static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
  					u32 events_mask)
  {
- 	int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)];
- 	int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+ 	int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]   = {0};
+ 	int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
  	void *nic_vport_ctx;
- 	int err;
- 
- 	memset(out, 0, sizeof(out));
- 	memset(in, 0, sizeof(in));
  
  	MLX5_SET(modify_nic_vport_context_in, in,
  		 opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
@@@ -116,99 -109,27 +109,27 @@@
  		MLX5_SET(nic_vport_context, nic_vport_ctx,
  			 event_on_promisc_change, 1);
  
- 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
- 	if (err)
- 		goto ex;
- 	err = mlx5_cmd_status_to_err_v2(out);
- 	if (err)
- 		goto ex;
- 	return 0;
- ex:
- 	return err;
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  /* E-Switch vport context HW commands */
- static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
- 				       u32 *out, int outlen)
- {
- 	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)];
- 
- 	memset(in, 0, sizeof(in));
- 
- 	MLX5_SET(query_nic_vport_context_in, in, opcode,
- 		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
- 
- 	MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
- 	if (vport)
- 		MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
- 
- 	return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen);
- }
- 
- static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
- 				 u16 *vlan, u8 *qos)
- {
- 	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)];
- 	int err;
- 	bool cvlan_strip;
- 	bool cvlan_insert;
- 
- 	memset(out, 0, sizeof(out));
- 
- 	*vlan = 0;
- 	*qos = 0;
- 
- 	if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
- 	    !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
- 		return -ENOTSUPP;
- 
- 	err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
- 	if (err)
- 		goto out;
- 
- 	cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
- 			       esw_vport_context.vport_cvlan_strip);
- 
- 	cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
- 				esw_vport_context.vport_cvlan_insert);
- 
- 	if (cvlan_strip || cvlan_insert) {
- 		*vlan = MLX5_GET(query_esw_vport_context_out, out,
- 				 esw_vport_context.cvlan_id);
- 		*qos = MLX5_GET(query_esw_vport_context_out, out,
- 				esw_vport_context.cvlan_pcp);
- 	}
- 
- 	esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
- 		  vport, *vlan, *qos);
- out:
- 	return err;
- }
- 
  static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
  					void *in, int inlen)
  {
- 	u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)];
- 
- 	memset(out, 0, sizeof(out));
+ 	u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
  
+ 	MLX5_SET(modify_esw_vport_context_in, in, opcode,
+ 		 MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
  	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
  	if (vport)
  		MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
- 
- 	MLX5_SET(modify_esw_vport_context_in, in, opcode,
- 		 MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
- 
- 	return mlx5_cmd_exec_check_status(dev, in, inlen,
- 					  out, sizeof(out));
+ 	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
  }
  
  static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
  				  u16 vlan, u8 qos, bool set)
  {
- 	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)];
- 
- 	memset(in, 0, sizeof(in));
+ 	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
  
  	if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
  	    !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
@@@ -216,7 -137,6 +137,6 @@@
  
  	esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
  		  vport, vlan, qos, set);
- 
  	if (set) {
  		MLX5_SET(modify_esw_vport_context_in, in,
  			 esw_vport_context.vport_cvlan_strip, 1);
@@@ -241,13 -161,10 +161,10 @@@
  static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
  				  u8 *mac, u8 vlan_valid, u16 vlan)
  {
- 	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)];
- 	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)];
+ 	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]   = {0};
+ 	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
  	u8 *in_mac_addr;
  
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
- 
  	MLX5_SET(set_l2_table_entry_in, in, opcode,
  		 MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
  	MLX5_SET(set_l2_table_entry_in, in, table_index, index);
@@@ -257,23 -174,18 +174,18 @@@
  	in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
  	ether_addr_copy(&in_mac_addr[2], mac);
  
- 	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
- 					  out, sizeof(out));
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
  {
- 	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)];
- 	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)];
- 
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
+ 	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]   = {0};
+ 	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
  
  	MLX5_SET(delete_l2_table_entry_in, in, opcode,
  		 MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
  	MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
- 	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
- 					  out, sizeof(out));
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
@@@ -340,7 -252,7 +252,7 @@@ __esw_fdb_set_vport_rule(struct mlx5_es
  
  	spec = mlx5_vzalloc(sizeof(*spec));
  	if (!spec) {
- 		pr_warn("FDB: Failed to alloc match parameters\n");
+ 		esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n");
  		return NULL;
  	}
  	dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
@@@ -374,8 -286,8 +286,8 @@@
  				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
  				   0, &dest);
  	if (IS_ERR(flow_rule)) {
- 		pr_warn(
- 			"FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
+ 		esw_warn(esw->dev,
+ 			 "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
  			 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
  		flow_rule = NULL;
  	}
@@@ -955,7 -867,7 +867,7 @@@ static void esw_update_vport_rx_mode(st
  	esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n",
  		  vport_num, promisc_all, promisc_mc);
  
- 	if (!vport->trusted || !vport->enabled) {
+ 	if (!vport->info.trusted || !vport->enabled) {
  		promisc_uc = 0;
  		promisc_mc = 0;
  		promisc_all = 0;
@@@ -1291,30 -1203,20 +1203,20 @@@ static int esw_vport_ingress_config(str
  				    struct mlx5_vport *vport)
  {
  	struct mlx5_flow_spec *spec;
- 	u8 smac[ETH_ALEN];
  	int err = 0;
  	u8 *smac_v;
  
- 	if (vport->spoofchk) {
- 		err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac);
- 		if (err) {
- 			esw_warn(esw->dev,
- 				 "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n",
- 				 vport->vport, err);
- 			return err;
- 		}
+ 	if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {
+ 		mlx5_core_warn(esw->dev,
+ 			       "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
+ 			       vport->vport);
+ 		return -EPERM;
  
- 		if (!is_valid_ether_addr(smac)) {
- 			mlx5_core_warn(esw->dev,
- 				       "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",
- 				       vport->vport);
- 			return -EPERM;
- 		}
  	}
  
  	esw_vport_cleanup_ingress_rules(esw, vport);
  
- 	if (!vport->vlan && !vport->qos && !vport->spoofchk) {
+ 	if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
  		esw_vport_disable_ingress_acl(esw, vport);
  		return 0;
  	}
@@@ -1323,7 -1225,7 +1225,7 @@@
  
  	esw_debug(esw->dev,
  		  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
- 		  vport->vport, vport->vlan, vport->qos);
+ 		  vport->vport, vport->info.vlan, vport->info.qos);
  
  	spec = mlx5_vzalloc(sizeof(*spec));
  	if (!spec) {
@@@ -1333,16 -1235,16 +1235,16 @@@
  		goto out;
  	}
  
- 	if (vport->vlan || vport->qos)
+ 	if (vport->info.vlan || vport->info.qos)
  		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
  
- 	if (vport->spoofchk) {
+ 	if (vport->info.spoofchk) {
  		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
  		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
  		smac_v = MLX5_ADDR_OF(fte_match_param,
  				      spec->match_value,
  				      outer_headers.smac_47_16);
- 		ether_addr_copy(smac_v, smac);
+ 		ether_addr_copy(smac_v, vport->info.mac);
  	}
  
  	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
@@@ -1352,8 -1254,9 +1254,9 @@@
  				   0, NULL);
  	if (IS_ERR(vport->ingress.allow_rule)) {
  		err = PTR_ERR(vport->ingress.allow_rule);
- 		pr_warn("vport[%d] configure ingress allow rule, err(%d)\n",
- 			vport->vport, err);
+ 		esw_warn(esw->dev,
+ 			 "vport[%d] configure ingress allow rule, err(%d)\n",
+ 			 vport->vport, err);
  		vport->ingress.allow_rule = NULL;
  		goto out;
  	}
@@@ -1365,8 -1268,9 +1268,9 @@@
  				   0, NULL);
  	if (IS_ERR(vport->ingress.drop_rule)) {
  		err = PTR_ERR(vport->ingress.drop_rule);
- 		pr_warn("vport[%d] configure ingress drop rule, err(%d)\n",
- 			vport->vport, err);
+ 		esw_warn(esw->dev,
+ 			 "vport[%d] configure ingress drop rule, err(%d)\n",
+ 			 vport->vport, err);
  		vport->ingress.drop_rule = NULL;
  		goto out;
  	}
@@@ -1386,7 -1290,7 +1290,7 @@@ static int esw_vport_egress_config(stru
  
  	esw_vport_cleanup_egress_rules(esw, vport);
  
- 	if (!vport->vlan && !vport->qos) {
+ 	if (!vport->info.vlan && !vport->info.qos) {
  		esw_vport_disable_egress_acl(esw, vport);
  		return 0;
  	}
@@@ -1395,7 -1299,7 +1299,7 @@@
  
  	esw_debug(esw->dev,
  		  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
- 		  vport->vport, vport->vlan, vport->qos);
+ 		  vport->vport, vport->info.vlan, vport->info.qos);
  
  	spec = mlx5_vzalloc(sizeof(*spec));
  	if (!spec) {
@@@ -1409,7 -1313,7 +1313,7 @@@
  	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag);
  	MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag);
  	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
- 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan);
+ 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
  
  	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
  	vport->egress.allowed_vlan =
@@@ -1418,8 -1322,9 +1322,9 @@@
  				   0, NULL);
  	if (IS_ERR(vport->egress.allowed_vlan)) {
  		err = PTR_ERR(vport->egress.allowed_vlan);
- 		pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
- 			vport->vport, err);
+ 		esw_warn(esw->dev,
+ 			 "vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
+ 			 vport->vport, err);
  		vport->egress.allowed_vlan = NULL;
  		goto out;
  	}
@@@ -1432,8 -1337,9 +1337,9 @@@
  				   0, NULL);
  	if (IS_ERR(vport->egress.drop_rule)) {
  		err = PTR_ERR(vport->egress.drop_rule);
- 		pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n",
- 			vport->vport, err);
+ 		esw_warn(esw->dev,
+ 			 "vport[%d] configure egress drop rule failed, err(%d)\n",
+ 			 vport->vport, err);
  		vport->egress.drop_rule = NULL;
  	}
  out:
@@@ -1441,6 -1347,41 +1347,41 @@@
  	return err;
  }
  
+ static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
+ {
+ 	((u8 *)node_guid)[7] = mac[0];
+ 	((u8 *)node_guid)[6] = mac[1];
+ 	((u8 *)node_guid)[5] = mac[2];
+ 	((u8 *)node_guid)[4] = 0xff;
+ 	((u8 *)node_guid)[3] = 0xfe;
+ 	((u8 *)node_guid)[2] = mac[3];
+ 	((u8 *)node_guid)[1] = mac[4];
+ 	((u8 *)node_guid)[0] = mac[5];
+ }
+ 
+ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
+ 				 struct mlx5_vport *vport)
+ {
+ 	int vport_num = vport->vport;
+ 
+ 	if (!vport_num)
+ 		return;
+ 
+ 	mlx5_modify_vport_admin_state(esw->dev,
+ 				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ 				      vport_num,
+ 				      vport->info.link_state);
+ 	mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac);
+ 	mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid);
+ 	modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos,
+ 			       (vport->info.vlan || vport->info.qos));
+ 
+ 	/* Only legacy mode needs ACLs */
+ 	if (esw->mode == SRIOV_LEGACY) {
+ 		esw_vport_ingress_config(esw, vport);
+ 		esw_vport_egress_config(esw, vport);
+ 	}
+ }
  static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
  			     int enable_events)
  {
@@@ -1451,23 -1392,17 +1392,17 @@@
  
  	esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
  
- 	/* Only VFs need ACLs for VST and spoofchk filtering */
- 	if (vport_num && esw->mode == SRIOV_LEGACY) {
- 		esw_vport_ingress_config(esw, vport);
- 		esw_vport_egress_config(esw, vport);
- 	}
- 
- 	mlx5_modify_vport_admin_state(esw->dev,
- 				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
- 				      vport_num,
- 				      MLX5_ESW_VPORT_ADMIN_STATE_AUTO);
+ 	/* Restore old vport configuration */
+ 	esw_apply_vport_conf(esw, vport);
  
  	/* Sync with current vport context */
  	vport->enabled_events = enable_events;
  	vport->enabled = true;
  
  	/* only PF is trusted by default */
- 	vport->trusted = (vport_num) ? false : true;
+ 	if (!vport_num)
+ 		vport->info.trusted = true;
+ 
  	esw_vport_change_handle_locked(vport);
  
  	esw->enabled_vports++;
@@@ -1487,11 -1422,6 +1422,6 @@@ static void esw_disable_vport(struct ml
  	vport->enabled = false;
  
  	synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC));
- 
- 	mlx5_modify_vport_admin_state(esw->dev,
- 				      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
- 				      vport_num,
- 				      MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
  	/* Wait for current already scheduled events to complete */
  	flush_workqueue(esw->work_queue);
  	/* Disable events from this vport */
@@@ -1503,7 -1433,12 +1433,12 @@@
  	 */
  	esw_vport_change_handle_locked(vport);
  	vport->enabled_events = 0;
+ 
  	if (vport_num && esw->mode == SRIOV_LEGACY) {
+ 		mlx5_modify_vport_admin_state(esw->dev,
+ 					      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ 					      vport_num,
+ 					      MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
  		esw_vport_disable_egress_acl(esw, vport);
  		esw_vport_disable_ingress_acl(esw, vport);
  	}
@@@ -1554,7 -1489,6 +1489,7 @@@ int mlx5_eswitch_enable_sriov(struct ml
  
  abort:
  	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
 +	esw->mode = SRIOV_NONE;
  	return err;
  }
  
@@@ -1590,6 -1524,25 +1525,25 @@@ void mlx5_eswitch_disable_sriov(struct 
  	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
  }
  
+ void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
+ {
+ 	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+ 	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ 		return;
+ 
+ 	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
+ 	/* VF Vports will be enabled when SRIOV is enabled */
+ }
+ 
+ void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
+ {
+ 	if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
+ 	    MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+ 		return;
+ 
+ 	esw_disable_vport(esw, 0);
+ }
+ 
  int mlx5_eswitch_init(struct mlx5_core_dev *dev)
  {
  	int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
@@@ -1657,6 -1610,7 +1611,7 @@@
  		struct mlx5_vport *vport = &esw->vports[vport_num];
  
  		vport->vport = vport_num;
+ 		vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO;
  		vport->dev = dev;
  		INIT_WORK(&vport->vport_change_handler,
  			  esw_vport_change_handler);
@@@ -1667,8 -1621,6 +1622,6 @@@
  	esw->mode = SRIOV_NONE;
  
  	dev->priv.eswitch = esw;
- 	esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
- 	/* VF Vports will be enabled when SRIOV is enabled */
  	return 0;
  abort:
  	if (esw->work_queue)
@@@ -1687,7 -1639,6 +1640,6 @@@ void mlx5_eswitch_cleanup(struct mlx5_e
  		return;
  
  	esw_info(esw->dev, "cleanup\n");
- 	esw_disable_vport(esw, 0);
  
  	esw->dev->priv.eswitch = NULL;
  	destroy_workqueue(esw->work_queue);
@@@ -1720,18 -1671,6 +1672,6 @@@ void mlx5_eswitch_vport_event(struct ml
  	(esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
  #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
  
- static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
- {
- 	((u8 *)node_guid)[7] = mac[0];
- 	((u8 *)node_guid)[6] = mac[1];
- 	((u8 *)node_guid)[5] = mac[2];
- 	((u8 *)node_guid)[4] = 0xff;
- 	((u8 *)node_guid)[3] = 0xfe;
- 	((u8 *)node_guid)[2] = mac[3];
- 	((u8 *)node_guid)[1] = mac[4];
- 	((u8 *)node_guid)[0] = mac[5];
- }
- 
  int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
  			       int vport, u8 mac[ETH_ALEN])
  {
@@@ -1744,13 -1683,15 +1684,15 @@@
  	if (!LEGAL_VPORT(esw, vport))
  		return -EINVAL;
  
+ 	mutex_lock(&esw->state_lock);
  	evport = &esw->vports[vport];
  
- 	if (evport->spoofchk && !is_valid_ether_addr(mac)) {
+ 	if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {
  		mlx5_core_warn(esw->dev,
  			       "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",
  			       vport);
- 		return -EPERM;
+ 		err = -EPERM;
+ 		goto unlock;
  	}
  
  	err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
@@@ -1758,7 -1699,7 +1700,7 @@@
  		mlx5_core_warn(esw->dev,
  			       "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
  			       vport, err);
- 		return err;
+ 		goto unlock;
  	}
  
  	node_guid_gen_from_mac(&node_guid, mac);
@@@ -1768,9 -1709,12 +1710,12 @@@
  			       "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
  			       vport, err);
  
- 	mutex_lock(&esw->state_lock);
+ 	ether_addr_copy(evport->info.mac, mac);
+ 	evport->info.node_guid = node_guid;
  	if (evport->enabled && esw->mode == SRIOV_LEGACY)
  		err = esw_vport_ingress_config(esw, evport);
+ 
+ unlock:
  	mutex_unlock(&esw->state_lock);
  	return err;
  }
@@@ -1778,22 -1722,38 +1723,38 @@@
  int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
  				 int vport, int link_state)
  {
+ 	struct mlx5_vport *evport;
+ 	int err = 0;
+ 
  	if (!ESW_ALLOWED(esw))
  		return -EPERM;
  	if (!LEGAL_VPORT(esw, vport))
  		return -EINVAL;
  
- 	return mlx5_modify_vport_admin_state(esw->dev,
- 					     MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
- 					     vport, link_state);
+ 	mutex_lock(&esw->state_lock);
+ 	evport = &esw->vports[vport];
+ 
+ 	err = mlx5_modify_vport_admin_state(esw->dev,
+ 					    MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
+ 					    vport, link_state);
+ 	if (err) {
+ 		mlx5_core_warn(esw->dev,
+ 			       "Failed to set vport %d link state, err = %d",
+ 			       vport, err);
+ 		goto unlock;
+ 	}
+ 
+ 	evport->info.link_state = link_state;
+ 
+ unlock:
+ 	mutex_unlock(&esw->state_lock);
+ 	return 0;
  }
  
  int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
  				  int vport, struct ifla_vf_info *ivi)
  {
  	struct mlx5_vport *evport;
- 	u16 vlan;
- 	u8 qos;
  
  	if (!ESW_ALLOWED(esw))
  		return -EPERM;
@@@ -1805,14 -1765,14 +1766,14 @@@
  	memset(ivi, 0, sizeof(*ivi));
  	ivi->vf = vport - 1;
  
- 	mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac);
- 	ivi->linkstate = mlx5_query_vport_admin_state(esw->dev,
- 						      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
- 						      vport);
- 	query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos);
- 	ivi->vlan = vlan;
- 	ivi->qos = qos;
- 	ivi->spoofchk = evport->spoofchk;
+ 	mutex_lock(&esw->state_lock);
+ 	ether_addr_copy(ivi->mac, evport->info.mac);
+ 	ivi->linkstate = evport->info.link_state;
+ 	ivi->vlan = evport->info.vlan;
+ 	ivi->qos = evport->info.qos;
+ 	ivi->spoofchk = evport->info.spoofchk;
+ 	ivi->trusted = evport->info.trusted;
+ 	mutex_unlock(&esw->state_lock);
  
  	return 0;
  }
@@@ -1832,23 -1792,23 +1793,23 @@@ int mlx5_eswitch_set_vport_vlan(struct 
  	if (vlan || qos)
  		set = 1;
  
+ 	mutex_lock(&esw->state_lock);
  	evport = &esw->vports[vport];
  
  	err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
  	if (err)
- 		return err;
+ 		goto unlock;
  
- 	mutex_lock(&esw->state_lock);
- 	evport->vlan = vlan;
- 	evport->qos = qos;
+ 	evport->info.vlan = vlan;
+ 	evport->info.qos = qos;
  	if (evport->enabled && esw->mode == SRIOV_LEGACY) {
  		err = esw_vport_ingress_config(esw, evport);
  		if (err)
- 			goto out;
+ 			goto unlock;
  		err = esw_vport_egress_config(esw, evport);
  	}
  
- out:
+ unlock:
  	mutex_unlock(&esw->state_lock);
  	return err;
  }
@@@ -1865,16 -1825,14 +1826,14 @@@ int mlx5_eswitch_set_vport_spoofchk(str
  	if (!LEGAL_VPORT(esw, vport))
  		return -EINVAL;
  
- 	evport = &esw->vports[vport];
- 
  	mutex_lock(&esw->state_lock);
- 	pschk = evport->spoofchk;
- 	evport->spoofchk = spoofchk;
- 	if (evport->enabled && esw->mode == SRIOV_LEGACY) {
+ 	evport = &esw->vports[vport];
+ 	pschk = evport->info.spoofchk;
+ 	evport->info.spoofchk = spoofchk;
+ 	if (evport->enabled && esw->mode == SRIOV_LEGACY)
  		err = esw_vport_ingress_config(esw, evport);
- 		if (err)
- 			evport->spoofchk = pschk;
- 	}
+ 	if (err)
+ 		evport->info.spoofchk = pschk;
  	mutex_unlock(&esw->state_lock);
  
  	return err;
@@@ -1890,10 -1848,9 +1849,9 @@@ int mlx5_eswitch_set_vport_trust(struc
  	if (!LEGAL_VPORT(esw, vport))
  		return -EINVAL;
  
- 	evport = &esw->vports[vport];
- 
  	mutex_lock(&esw->state_lock);
- 	evport->trusted = setting;
+ 	evport = &esw->vports[vport];
+ 	evport->info.trusted = setting;
  	if (evport->enabled)
  		esw_vport_change_handle_locked(evport);
  	mutex_unlock(&esw->state_lock);
@@@ -1906,7 -1863,7 +1864,7 @@@ int mlx5_eswitch_get_vport_stats(struc
  				 struct ifla_vf_stats *vf_stats)
  {
  	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
- 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)];
+ 	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
  	int err = 0;
  	u32 *out;
  
@@@ -1919,8 -1876,6 +1877,6 @@@
  	if (!out)
  		return -ENOMEM;
  
- 	memset(in, 0, sizeof(in));
- 
  	MLX5_SET(query_vport_counter_in, in, opcode,
  		 MLX5_CMD_OP_QUERY_VPORT_COUNTER);
  	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
diff --combined drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 287ade1,7a0415e..113c323
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@@ -41,10 -41,8 +41,8 @@@
  int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
  			    struct mlx5_flow_table *ft)
  {
- 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)];
- 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)];
- 
- 	memset(in, 0, sizeof(in));
+ 	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
+ 	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
  
  	MLX5_SET(set_flow_table_root_in, in, opcode,
  		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
@@@ -55,30 -53,23 +53,23 @@@
  		MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
  	}
  
- 	memset(out, 0, sizeof(out));
- 	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- 					  sizeof(out));
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
  			       u16 vport,
+ 			       enum fs_flow_table_op_mod op_mod,
  			       enum fs_flow_table_type type, unsigned int level,
  			       unsigned int log_size, struct mlx5_flow_table
  			       *next_ft, unsigned int *table_id)
  {
- 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)];
- 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)];
+ 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
+ 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
  	int err;
  
- 	memset(in, 0, sizeof(in));
- 
  	MLX5_SET(create_flow_table_in, in, opcode,
  		 MLX5_CMD_OP_CREATE_FLOW_TABLE);
  
- 	if (next_ft) {
- 		MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
- 		MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
- 	}
  	MLX5_SET(create_flow_table_in, in, table_type, type);
  	MLX5_SET(create_flow_table_in, in, level, level);
  	MLX5_SET(create_flow_table_in, in, log_size, log_size);
@@@ -87,10 -78,23 +78,23 @@@
  		MLX5_SET(create_flow_table_in, in, other_vport, 1);
  	}
  
- 	memset(out, 0, sizeof(out));
- 	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- 					 sizeof(out));
+ 	switch (op_mod) {
+ 	case FS_FT_OP_MOD_NORMAL:
+ 		if (next_ft) {
+ 			MLX5_SET(create_flow_table_in, in, table_miss_mode, 1);
+ 			MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id);
+ 		}
+ 		break;
+ 
+ 	case FS_FT_OP_MOD_LAG_DEMUX:
+ 		MLX5_SET(create_flow_table_in, in, op_mod, 0x1);
+ 		if (next_ft)
+ 			MLX5_SET(create_flow_table_in, in, lag_master_next_table_id,
+ 				 next_ft->id);
+ 		break;
+ 	}
  
+ 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  	if (!err)
  		*table_id = MLX5_GET(create_flow_table_out, out,
  				     table_id);
@@@ -100,11 -104,8 +104,8 @@@
  int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
  				struct mlx5_flow_table *ft)
  {
- 	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)];
- 	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)];
- 
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
+ 	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
+ 	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
  
  	MLX5_SET(destroy_flow_table_in, in, opcode,
  		 MLX5_CMD_OP_DESTROY_FLOW_TABLE);
@@@ -115,39 -116,49 +116,49 @@@
  		MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
  	}
  
- 	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- 					  sizeof(out));
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev,
  			       struct mlx5_flow_table *ft,
  			       struct mlx5_flow_table *next_ft)
  {
- 	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)];
- 	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)];
- 
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
+ 	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
+ 	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
  
  	MLX5_SET(modify_flow_table_in, in, opcode,
  		 MLX5_CMD_OP_MODIFY_FLOW_TABLE);
  	MLX5_SET(modify_flow_table_in, in, table_type, ft->type);
  	MLX5_SET(modify_flow_table_in, in, table_id, ft->id);
- 	if (ft->vport) {
- 		MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport);
- 		MLX5_SET(modify_flow_table_in, in, other_vport, 1);
- 	}
- 	MLX5_SET(modify_flow_table_in, in, modify_field_select,
- 		 MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
- 	if (next_ft) {
- 		MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
- 		MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id);
+ 
+ 	if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) {
+ 		MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ 			 MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID);
+ 		if (next_ft) {
+ 			MLX5_SET(modify_flow_table_in, in,
+ 				 lag_master_next_table_id, next_ft->id);
+ 		} else {
+ 			MLX5_SET(modify_flow_table_in, in,
+ 				 lag_master_next_table_id, 0);
+ 		}
  	} else {
- 		MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+ 		if (ft->vport) {
+ 			MLX5_SET(modify_flow_table_in, in, vport_number,
+ 				 ft->vport);
+ 			MLX5_SET(modify_flow_table_in, in, other_vport, 1);
+ 		}
+ 		MLX5_SET(modify_flow_table_in, in, modify_field_select,
+ 			 MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID);
+ 		if (next_ft) {
+ 			MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1);
+ 			MLX5_SET(modify_flow_table_in, in, table_miss_id,
+ 				 next_ft->id);
+ 		} else {
+ 			MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0);
+ 		}
  	}
  
- 	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- 					  sizeof(out));
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev,
@@@ -155,12 -166,10 +166,10 @@@
  			       u32 *in,
  			       unsigned int *group_id)
  {
+ 	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
  	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
- 	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)];
  	int err;
  
- 	memset(out, 0, sizeof(out));
- 
  	MLX5_SET(create_flow_group_in, in, opcode,
  		 MLX5_CMD_OP_CREATE_FLOW_GROUP);
  	MLX5_SET(create_flow_group_in, in, table_type, ft->type);
@@@ -170,13 -179,10 +179,10 @@@
  		MLX5_SET(create_flow_group_in, in, other_vport, 1);
  	}
  
- 	err = mlx5_cmd_exec_check_status(dev, in,
- 					 inlen, out,
- 					 sizeof(out));
+ 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
  	if (!err)
  		*group_id = MLX5_GET(create_flow_group_out, out,
  				     group_id);
- 
  	return err;
  }
  
@@@ -184,11 -190,8 +190,8 @@@ int mlx5_cmd_destroy_flow_group(struct 
  				struct mlx5_flow_table *ft,
  				unsigned int group_id)
  {
- 	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)];
- 	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)];
- 
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
+ 	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
+ 	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
  
  	MLX5_SET(destroy_flow_group_in, in, opcode,
  		 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
@@@ -200,8 -203,7 +203,7 @@@
  		MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
  	}
  
- 	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- 					  sizeof(out));
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
@@@ -212,7 -214,7 +214,7 @@@
  {
  	unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) +
  		fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct);
- 	u32 out[MLX5_ST_SZ_DW(set_fte_out)];
+ 	u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0};
  	struct mlx5_flow_rule *dst;
  	void *in_flow_context;
  	void *in_match_value;
@@@ -290,11 -292,8 +292,8 @@@
  			 list_size);
  	}
  
- 	memset(out, 0, sizeof(out));
- 	err = mlx5_cmd_exec_check_status(dev, in, inlen, out,
- 					 sizeof(out));
+ 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
  	kvfree(in);
- 
  	return err;
  }
  
@@@ -303,7 -302,7 +302,7 @@@ int mlx5_cmd_create_fte(struct mlx5_cor
  			unsigned group_id,
  			struct fs_fte *fte)
  {
- 	return	mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
+ 	return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte);
  }
  
  int mlx5_cmd_update_fte(struct mlx5_core_dev *dev,
@@@ -327,12 -326,8 +326,8 @@@ int mlx5_cmd_delete_fte(struct mlx5_cor
  			struct mlx5_flow_table *ft,
  			unsigned int index)
  {
- 	u32 out[MLX5_ST_SZ_DW(delete_fte_out)];
- 	u32 in[MLX5_ST_SZ_DW(delete_fte_in)];
- 	int err;
- 
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
+ 	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
+ 	u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
  
  	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
  	MLX5_SET(delete_fte_in, in, table_type, ft->type);
@@@ -343,74 -338,55 +338,55 @@@
  		MLX5_SET(delete_fte_in, in, other_vport, 1);
  	}
  
- 	err =  mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
- 
- 	return err;
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id)
  {
- 	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)];
- 	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)];
+ 	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
+ 	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
  	int err;
  
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
- 
  	MLX5_SET(alloc_flow_counter_in, in, opcode,
  		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
  
- 	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- 					 sizeof(out));
- 	if (err)
- 		return err;
- 
- 	*id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
- 
- 	return 0;
+ 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ 	if (!err)
+ 		*id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
+ 	return err;
  }
  
  int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id)
  {
- 	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)];
- 	u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)];
- 
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
+ 	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]   = {0};
+ 	u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
  
  	MLX5_SET(dealloc_flow_counter_in, in, opcode,
  		 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
  	MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
- 
- 	return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out,
- 					  sizeof(out));
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  }
  
  int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id,
  		      u64 *packets, u64 *bytes)
  {
  	u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
- 		MLX5_ST_SZ_BYTES(traffic_counter)];
- 	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
+ 		MLX5_ST_SZ_BYTES(traffic_counter)]   = {0};
+ 	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
  	void *stats;
  	int err = 0;
  
- 	memset(in, 0, sizeof(in));
- 	memset(out, 0, sizeof(out));
- 
  	MLX5_SET(query_flow_counter_in, in, opcode,
  		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
  	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
  	MLX5_SET(query_flow_counter_in, in, flow_counter_id, id);
- 
- 	err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out));
+ 	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  	if (err)
  		return err;
  
  	stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics);
  	*packets = MLX5_GET64(traffic_counter, stats, packets);
  	*bytes = MLX5_GET64(traffic_counter, stats, octets);
- 
  	return 0;
  }
  
@@@ -425,11 -401,11 +401,11 @@@ struct mlx5_cmd_fc_bulk 
  mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev, u16 id, int num)
  {
  	struct mlx5_cmd_fc_bulk *b;
 -	int outlen = sizeof(*b) +
 +	int outlen =
  		MLX5_ST_SZ_BYTES(query_flow_counter_out) +
  		MLX5_ST_SZ_BYTES(traffic_counter) * num;
  
 -	b = kzalloc(outlen, GFP_KERNEL);
 +	b = kzalloc(sizeof(*b) + outlen, GFP_KERNEL);
  	if (!b)
  		return NULL;
  
@@@ -448,18 -424,14 +424,14 @@@ void mlx5_cmd_fc_bulk_free(struct mlx5_
  int
  mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b)
  {
- 	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)];
- 
- 	memset(in, 0, sizeof(in));
+ 	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
  
  	MLX5_SET(query_flow_counter_in, in, opcode,
  		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
  	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
  	MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id);
  	MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num);
- 
- 	return mlx5_cmd_exec_check_status(dev, in, sizeof(in),
- 					  b->out, b->outlen);
+ 	return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen);
  }
  
  void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
@@@ -480,3 -452,51 +452,51 @@@
  	*packets = MLX5_GET64(traffic_counter, stats, packets);
  	*bytes = MLX5_GET64(traffic_counter, stats, octets);
  }
+ 
+ #define MAX_ENCAP_SIZE (128)
+ 
+ int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
+ 			 int header_type,
+ 			 size_t size,
+ 			 void *encap_header,
+ 			 u32 *encap_id)
+ {
+ 	u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
+ 	u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) +
+ 	      (MAX_ENCAP_SIZE / sizeof(u32))];
+ 	void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in,
+ 					     encap_header);
+ 	void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in,
+ 				    encap_header);
+ 	int inlen = header - (void *)in  + size;
+ 	int err;
+ 
+ 	if (size > MAX_ENCAP_SIZE)
+ 		return -EINVAL;
+ 
+ 	memset(in, 0, inlen);
+ 	MLX5_SET(alloc_encap_header_in, in, opcode,
+ 		 MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
+ 	MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size);
+ 	MLX5_SET(encap_header_in, encap_header_in, header_type, header_type);
+ 	memcpy(header, encap_header, size);
+ 
+ 	memset(out, 0, sizeof(out));
+ 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+ 
+ 	*encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+ 	return err;
+ }
+ 
+ void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id)
+ {
+ 	u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
+ 	u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
+ 
+ 	memset(in, 0, sizeof(in));
+ 	MLX5_SET(dealloc_encap_header_in, in, opcode,
+ 		 MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+ 	MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id);
+ 
+ 	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ }
diff --combined drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 39dadfc,415691e..aee3fd2
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@@ -60,6 -60,7 +60,7 @@@
  
  #include <linux/ktime.h>
  
+ #include <net/pkt_cls.h>
  #include <net/vxlan.h>
  
  #include "nfp_net_ctrl.h"
@@@ -1292,38 -1293,72 +1293,72 @@@ static void nfp_net_rx_csum(struct nfp_
  	}
  }
  
- /**
-  * nfp_net_set_hash() - Set SKB hash data
-  * @netdev: adapter's net_device structure
-  * @skb:   SKB to set the hash data on
-  * @rxd:   RX descriptor
-  *
-  * The RSS hash and hash-type are pre-pended to the packet data.
-  * Extract and decode it and set the skb fields.
-  */
  static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb,
- 			     struct nfp_net_rx_desc *rxd)
+ 			     unsigned int type, __be32 *hash)
  {
- 	struct nfp_net_rx_hash *rx_hash;
- 
- 	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) ||
- 	    !(netdev->features & NETIF_F_RXHASH))
+ 	if (!(netdev->features & NETIF_F_RXHASH))
  		return;
  
- 	rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
- 
- 	switch (be32_to_cpu(rx_hash->hash_type)) {
+ 	switch (type) {
  	case NFP_NET_RSS_IPV4:
  	case NFP_NET_RSS_IPV6:
  	case NFP_NET_RSS_IPV6_EX:
- 		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3);
+ 		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3);
  		break;
  	default:
- 		skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4);
+ 		skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4);
  		break;
  	}
  }
  
+ static void
+ nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb,
+ 		      struct nfp_net_rx_desc *rxd)
+ {
+ 	struct nfp_net_rx_hash *rx_hash;
+ 
+ 	if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS))
+ 		return;
+ 
+ 	rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash));
+ 
+ 	nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type),
+ 			 &rx_hash->hash);
+ }
+ 
+ static void *
+ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
+ 		   int meta_len)
+ {
+ 	u8 *data = skb->data - meta_len;
+ 	u32 meta_info;
+ 
+ 	meta_info = get_unaligned_be32(data);
+ 	data += 4;
+ 
+ 	while (meta_info) {
+ 		switch (meta_info & NFP_NET_META_FIELD_MASK) {
+ 		case NFP_NET_META_HASH:
+ 			meta_info >>= NFP_NET_META_FIELD_SIZE;
+ 			nfp_net_set_hash(netdev, skb,
+ 					 meta_info & NFP_NET_META_FIELD_MASK,
+ 					 (__be32 *)data);
+ 			data += 4;
+ 			break;
+ 		case NFP_NET_META_MARK:
+ 			skb->mark = get_unaligned_be32(data);
+ 			data += 4;
+ 			break;
+ 		default:
+ 			return NULL;
+ 		}
+ 
+ 		meta_info >>= NFP_NET_META_FIELD_SIZE;
+ 	}
+ 
+ 	return data;
+ }
+ 
  /**
   * nfp_net_rx() - receive up to @budget packets on @rx_ring
   * @rx_ring:   RX ring to receive from
@@@ -1438,14 -1473,29 +1473,29 @@@ static int nfp_net_rx(struct nfp_net_rx
  			skb_reserve(skb, nn->rx_offset);
  		skb_put(skb, data_len - meta_len);
  
- 		nfp_net_set_hash(nn->netdev, skb, rxd);
- 
  		/* Stats update */
  		u64_stats_update_begin(&r_vec->rx_sync);
  		r_vec->rx_pkts++;
  		r_vec->rx_bytes += skb->len;
  		u64_stats_update_end(&r_vec->rx_sync);
  
+ 		if (nn->fw_ver.major <= 3) {
+ 			nfp_net_set_hash_desc(nn->netdev, skb, rxd);
+ 		} else if (meta_len) {
+ 			void *end;
+ 
+ 			end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
+ 			if (unlikely(end != skb->data)) {
+ 				u64_stats_update_begin(&r_vec->rx_sync);
+ 				r_vec->rx_drops++;
+ 				u64_stats_update_end(&r_vec->rx_sync);
+ 
+ 				dev_kfree_skb_any(skb);
+ 				nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
+ 				continue;
+ 			}
+ 		}
+ 
  		skb_record_rx_queue(skb, rx_ring->idx);
  		skb->protocol = eth_type_trans(skb, nn->netdev);
  
@@@ -2044,16 -2094,12 +2094,16 @@@ static int nfp_net_netdev_open(struct n
  
  	nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
  			       GFP_KERNEL);
 -	if (!nn->rx_rings)
 +	if (!nn->rx_rings) {
 +		err = -ENOMEM;
  		goto err_free_lsc;
 +	}
  	nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings),
  			       GFP_KERNEL);
 -	if (!nn->tx_rings)
 +	if (!nn->tx_rings) {
 +		err = -ENOMEM;
  		goto err_free_rx_rings;
 +	}
  
  	for (r = 0; r < nn->num_r_vecs; r++) {
  		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
@@@ -2386,6 -2432,31 +2436,31 @@@ static struct rtnl_link_stats64 *nfp_ne
  	return stats;
  }
  
+ static bool nfp_net_ebpf_capable(struct nfp_net *nn)
+ {
+ 	if (nn->cap & NFP_NET_CFG_CTRL_BPF &&
+ 	    nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI)
+ 		return true;
+ 	return false;
+ }
+ 
+ static int
+ nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
+ 		 struct tc_to_netdev *tc)
+ {
+ 	struct nfp_net *nn = netdev_priv(netdev);
+ 
+ 	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
+ 		return -ENOTSUPP;
+ 	if (proto != htons(ETH_P_ALL))
+ 		return -ENOTSUPP;
+ 
+ 	if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
+ 		return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
+ 
+ 	return -EINVAL;
+ }
+ 
  static int nfp_net_set_features(struct net_device *netdev,
  				netdev_features_t features)
  {
@@@ -2440,6 -2511,11 +2515,11 @@@
  			new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER;
  	}
  
+ 	if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+ 		nn_err(nn, "Cannot disable HW TC offload while in use\n");
+ 		return -EBUSY;
+ 	}
+ 
  	nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n",
  	       netdev->features, features, changed);
  
@@@ -2589,6 -2665,7 +2669,7 @@@ static const struct net_device_ops nfp_
  	.ndo_stop		= nfp_net_netdev_close,
  	.ndo_start_xmit		= nfp_net_tx,
  	.ndo_get_stats64	= nfp_net_stat64,
+ 	.ndo_setup_tc		= nfp_net_setup_tc,
  	.ndo_tx_timeout		= nfp_net_tx_timeout,
  	.ndo_set_rx_mode	= nfp_net_set_rx_mode,
  	.ndo_change_mtu		= nfp_net_change_mtu,
@@@ -2614,7 -2691,7 +2695,7 @@@ void nfp_net_info(struct nfp_net *nn
  		nn->fw_ver.resv, nn->fw_ver.class,
  		nn->fw_ver.major, nn->fw_ver.minor,
  		nn->max_mtu);
- 	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
+ 	nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
  		nn->cap,
  		nn->cap & NFP_NET_CFG_CTRL_PROMISC  ? "PROMISC "  : "",
  		nn->cap & NFP_NET_CFG_CTRL_L2BC     ? "L2BCFILT " : "",
@@@ -2631,7 -2708,8 +2712,8 @@@
  		nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "",
  		nn->cap & NFP_NET_CFG_CTRL_IRQMOD   ? "IRQMOD "   : "",
  		nn->cap & NFP_NET_CFG_CTRL_VXLAN    ? "VXLAN "    : "",
- 		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "");
+ 		nn->cap & NFP_NET_CFG_CTRL_NVGRE    ? "NVGRE "	  : "",
+ 		nfp_net_ebpf_capable(nn)            ? "BPF "	  : "");
  }
  
  /**
@@@ -2674,10 -2752,13 +2756,13 @@@ struct nfp_net *nfp_net_netdev_alloc(st
  	nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
  
  	spin_lock_init(&nn->reconfig_lock);
+ 	spin_lock_init(&nn->rx_filter_lock);
  	spin_lock_init(&nn->link_status_lock);
  
  	setup_timer(&nn->reconfig_timer,
  		    nfp_net_reconfig_timer, (unsigned long)nn);
+ 	setup_timer(&nn->rx_filter_stats_timer,
+ 		    nfp_net_filter_stats_timer, (unsigned long)nn);
  
  	return nn;
  }
@@@ -2799,6 -2880,9 +2884,9 @@@ int nfp_net_netdev_init(struct net_devi
  
  	netdev->features = netdev->hw_features;
  
+ 	if (nfp_net_ebpf_capable(nn))
+ 		netdev->hw_features |= NETIF_F_HW_TC;
+ 
  	/* Advertise but disable TSO by default. */
  	netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6);
  
diff --combined drivers/net/ethernet/qlogic/qed/qed_mcp.c
index f776a77,7d39cb9..bdc9ba9
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@@ -54,8 -54,7 +54,7 @@@ bool qed_mcp_is_init(struct qed_hwfn *p
  	return true;
  }
  
- void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn,
- 			   struct qed_ptt *p_ptt)
+ void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
  	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
  					PUBLIC_PORT);
@@@ -68,8 -67,7 +67,7 @@@
  		   p_hwfn->mcp_info->port_addr, MFW_PORT(p_hwfn));
  }
  
- void qed_mcp_read_mb(struct qed_hwfn *p_hwfn,
- 		     struct qed_ptt *p_ptt)
+ void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
  	u32 length = MFW_DRV_MSG_MAX_DWORDS(p_hwfn->mcp_info->mfw_mb_length);
  	u32 tmp, i;
@@@ -99,8 -97,7 +97,7 @@@ int qed_mcp_free(struct qed_hwfn *p_hwf
  	return 0;
  }
  
- static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn,
- 				struct qed_ptt *p_ptt)
+ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
  	struct qed_mcp_info *p_info = p_hwfn->mcp_info;
  	u32 drv_mb_offsize, mfw_mb_offsize;
@@@ -143,8 -140,7 +140,7 @@@
  	return 0;
  }
  
- int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn,
- 		     struct qed_ptt *p_ptt)
+ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
  	struct qed_mcp_info *p_info;
  	u32 size;
@@@ -165,9 -161,7 +161,7 @@@
  
  	size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
  	p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL);
- 	p_info->mfw_mb_shadow =
- 		kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS(
- 				p_info->mfw_mb_length), GFP_KERNEL);
+ 	p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL);
  	if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
  		goto err;
  
@@@ -177,7 -171,6 +171,6 @@@
  	return 0;
  
  err:
- 	DP_NOTICE(p_hwfn, "Failed to allocate mcp memory\n");
  	qed_mcp_free(p_hwfn);
  	return -ENOMEM;
  }
@@@ -189,8 -182,7 +182,7 @@@
   * access is achieved by setting a blocking flag, which will fail other
   * competing contexts to send their mailboxes.
   */
- static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn,
- 			   u32 cmd)
+ static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, u32 cmd)
  {
  	spin_lock_bh(&p_hwfn->mcp_info->lock);
  
@@@ -221,15 -213,13 +213,13 @@@
  	return 0;
  }
  
- static void qed_mcp_mb_unlock(struct qed_hwfn	*p_hwfn,
- 			      u32		cmd)
+ static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, u32 cmd)
  {
  	if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ)
  		spin_unlock_bh(&p_hwfn->mcp_info->lock);
  }
  
- int qed_mcp_reset(struct qed_hwfn *p_hwfn,
- 		  struct qed_ptt *p_ptt)
+ int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
  	u32 seq = ++p_hwfn->mcp_info->drv_mb_seq;
  	u8 delay = CHIP_MCP_RESP_ITER_US;
@@@ -326,7 -316,8 +316,8 @@@ static int qed_do_mcp_cmd(struct qed_hw
  		*o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param);
  	} else {
  		/* FW BUG! */
- 		DP_ERR(p_hwfn, "MFW failed to respond!\n");
+ 		DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n",
+ 		       cmd, param);
  		*o_mcp_resp = 0;
  		rc = -EAGAIN;
  	}
@@@ -342,7 -333,7 +333,7 @@@ static int qed_mcp_cmd_and_union(struc
  
  	/* MCP not initialized */
  	if (!qed_mcp_is_init(p_hwfn)) {
- 		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+ 		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
  		return -EBUSY;
  	}
  
@@@ -398,9 -389,36 +389,36 @@@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn
  	return 0;
  }
  
+ int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn,
+ 		       struct qed_ptt *p_ptt,
+ 		       u32 cmd,
+ 		       u32 param,
+ 		       u32 *o_mcp_resp,
+ 		       u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf)
+ {
+ 	struct qed_mcp_mb_params mb_params;
+ 	union drv_union_data union_data;
+ 	int rc;
+ 
+ 	memset(&mb_params, 0, sizeof(mb_params));
+ 	mb_params.cmd = cmd;
+ 	mb_params.param = param;
+ 	mb_params.p_data_dst = &union_data;
+ 	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ 	if (rc)
+ 		return rc;
+ 
+ 	*o_mcp_resp = mb_params.mcp_resp;
+ 	*o_mcp_param = mb_params.mcp_param;
+ 
+ 	*o_txn_size = *o_mcp_param;
+ 	memcpy(o_buf, &union_data.raw_data, *o_txn_size);
+ 
+ 	return 0;
+ }
+ 
  int qed_mcp_load_req(struct qed_hwfn *p_hwfn,
- 		     struct qed_ptt *p_ptt,
- 		     u32 *p_load_code)
+ 		     struct qed_ptt *p_ptt, u32 *p_load_code)
  {
  	struct qed_dev *cdev = p_hwfn->cdev;
  	struct qed_mcp_mb_params mb_params;
@@@ -527,8 -545,7 +545,7 @@@ static void qed_mcp_handle_transceiver_
  		   "Received transceiver state update [0x%08x] from mfw [Addr 0x%x]\n",
  		   transceiver_state,
  		   (u32)(p_hwfn->mcp_info->port_addr +
- 			 offsetof(struct public_port,
- 				  transceiver_data)));
+ 			  offsetof(struct public_port, transceiver_data)));
  
  	transceiver_state = GET_FIELD(transceiver_state,
  				      ETH_TRANSCEIVER_STATE);
@@@ -540,8 -557,7 +557,7 @@@
  }
  
  static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
- 				       struct qed_ptt *p_ptt,
- 				       bool b_reset)
+ 				       struct qed_ptt *p_ptt, bool b_reset)
  {
  	struct qed_mcp_link_state *p_link;
  	u8 max_bw, min_bw;
@@@ -557,8 -573,7 +573,7 @@@
  			   "Received link update [0x%08x] from mfw [Addr 0x%x]\n",
  			   status,
  			   (u32)(p_hwfn->mcp_info->port_addr +
- 				 offsetof(struct public_port,
- 					  link_status)));
+ 				 offsetof(struct public_port, link_status)));
  	} else {
  		DP_VERBOSE(p_hwfn, NETIF_MSG_LINK,
  			   "Resetting link indications\n");
@@@ -635,6 -650,9 +650,9 @@@
  		(status & LINK_STATUS_LINK_PARTNER_20G_CAPABLE) ?
  		QED_LINK_PARTNER_SPEED_20G : 0;
  	p_link->partner_adv_speed |=
+ 		(status & LINK_STATUS_LINK_PARTNER_25G_CAPABLE) ?
+ 		QED_LINK_PARTNER_SPEED_25G : 0;
+ 	p_link->partner_adv_speed |=
  		(status & LINK_STATUS_LINK_PARTNER_40G_CAPABLE) ?
  		QED_LINK_PARTNER_SPEED_40G : 0;
  	p_link->partner_adv_speed |=
@@@ -722,6 -740,48 +740,48 @@@ int qed_mcp_set_link(struct qed_hwfn *p
  	return 0;
  }
  
+ static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn,
+ 					struct qed_ptt *p_ptt,
+ 					enum MFW_DRV_MSG_TYPE type)
+ {
+ 	enum qed_mcp_protocol_type stats_type;
+ 	union qed_mcp_protocol_stats stats;
+ 	struct qed_mcp_mb_params mb_params;
+ 	union drv_union_data union_data;
+ 	u32 hsi_param;
+ 
+ 	switch (type) {
+ 	case MFW_DRV_MSG_GET_LAN_STATS:
+ 		stats_type = QED_MCP_LAN_STATS;
+ 		hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN;
+ 		break;
+ 	case MFW_DRV_MSG_GET_FCOE_STATS:
+ 		stats_type = QED_MCP_FCOE_STATS;
+ 		hsi_param = DRV_MSG_CODE_STATS_TYPE_FCOE;
+ 		break;
+ 	case MFW_DRV_MSG_GET_ISCSI_STATS:
+ 		stats_type = QED_MCP_ISCSI_STATS;
+ 		hsi_param = DRV_MSG_CODE_STATS_TYPE_ISCSI;
+ 		break;
+ 	case MFW_DRV_MSG_GET_RDMA_STATS:
+ 		stats_type = QED_MCP_RDMA_STATS;
+ 		hsi_param = DRV_MSG_CODE_STATS_TYPE_RDMA;
+ 		break;
+ 	default:
+ 		DP_NOTICE(p_hwfn, "Invalid protocol type %d\n", type);
+ 		return;
+ 	}
+ 
+ 	qed_get_protocol_stats(p_hwfn->cdev, stats_type, &stats);
+ 
+ 	memset(&mb_params, 0, sizeof(mb_params));
+ 	mb_params.cmd = DRV_MSG_CODE_GET_STATS;
+ 	mb_params.param = hsi_param;
+ 	memcpy(&union_data, &stats, sizeof(stats));
+ 	mb_params.p_data_src = &union_data;
+ 	qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+ }
+ 
  static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn,
  				  struct public_func *p_shmem_info)
  {
@@@ -752,8 -812,7 +812,7 @@@
  
  static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn,
  				  struct qed_ptt *p_ptt,
- 				  struct public_func *p_data,
- 				  int pfid)
+ 				  struct public_func *p_data, int pfid)
  {
  	u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base,
  					PUBLIC_FUNC);
@@@ -763,51 -822,20 +822,20 @@@
  
  	memset(p_data, 0, sizeof(*p_data));
  
- 	size = min_t(u32, sizeof(*p_data),
- 		     QED_SECTION_SIZE(mfw_path_offsize));
+ 	size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize));
  	for (i = 0; i < size / sizeof(u32); i++)
  		((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt,
  					    func_addr + (i << 2));
  	return size;
  }
  
- int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn,
- 			  struct qed_ptt *p_ptt, u8 *p_pf)
- {
- 	struct public_func shmem_info;
- 	int i;
- 
- 	/* Find first Ethernet interface in port */
- 	for (i = 0; i < NUM_OF_ENG_PFS(p_hwfn->cdev);
- 	     i += p_hwfn->cdev->num_ports_in_engines) {
- 		qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
- 				       MCP_PF_ID_BY_REL(p_hwfn, i));
- 
- 		if (shmem_info.config & FUNC_MF_CFG_FUNC_HIDE)
- 			continue;
- 
- 		if ((shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK) ==
- 		    FUNC_MF_CFG_PROTOCOL_ETHERNET) {
- 			*p_pf = (u8)i;
- 			return 0;
- 		}
- 	}
- 
- 	DP_NOTICE(p_hwfn,
- 		  "Failed to find on port an ethernet interface in MF_SI mode\n");
- 
- 	return -EINVAL;
- }
- 
- static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn,
- 			      struct qed_ptt *p_ptt)
+ static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
  	struct qed_mcp_function_info *p_info;
  	struct public_func shmem_info;
  	u32 resp = 0, param = 0;
  
- 	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
- 			       MCP_PF_ID(p_hwfn));
+ 	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
  
  	qed_read_pf_bandwidth(p_hwfn, &shmem_info);
  
@@@ -867,6 -895,12 +895,12 @@@ int qed_mcp_handle_events(struct qed_hw
  		case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE:
  			qed_mcp_handle_transceiver_change(p_hwfn, p_ptt);
  			break;
+ 		case MFW_DRV_MSG_GET_LAN_STATS:
+ 		case MFW_DRV_MSG_GET_FCOE_STATS:
+ 		case MFW_DRV_MSG_GET_ISCSI_STATS:
+ 		case MFW_DRV_MSG_GET_RDMA_STATS:
+ 			qed_mcp_send_protocol_stats(p_hwfn, p_ptt, i);
+ 			break;
  		case MFW_DRV_MSG_BW_UPDATE:
  			qed_mcp_update_bw(p_hwfn, p_ptt);
  			break;
@@@ -940,8 -974,7 +974,7 @@@ int qed_mcp_get_mfw_ver(struct qed_hwf
  	return 0;
  }
  
- int qed_mcp_get_media_type(struct qed_dev *cdev,
- 			   u32 *p_media_type)
+ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
  {
  	struct qed_hwfn *p_hwfn = &cdev->hwfns[0];
  	struct qed_ptt  *p_ptt;
@@@ -950,7 -983,7 +983,7 @@@
  		return -EINVAL;
  
  	if (!qed_mcp_is_init(p_hwfn)) {
- 		DP_NOTICE(p_hwfn, "MFW is not initialized !\n");
+ 		DP_NOTICE(p_hwfn, "MFW is not initialized!\n");
  		return -EBUSY;
  	}
  
@@@ -1003,15 -1036,13 +1036,13 @@@ int qed_mcp_fill_shmem_func_info(struc
  	struct qed_mcp_function_info *info;
  	struct public_func shmem_info;
  
- 	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info,
- 			       MCP_PF_ID(p_hwfn));
+ 	qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn));
  	info = &p_hwfn->mcp_info->func_info;
  
  	info->pause_on_host = (shmem_info.config &
  			       FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0;
  
- 	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info,
- 				    &info->protocol)) {
+ 	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) {
  		DP_ERR(p_hwfn, "Unknown personality %08x\n",
  		       (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK));
  		return -EINVAL;
@@@ -1072,15 -1103,13 +1103,13 @@@ struct qed_mcp_link_capabilitie
  	return &p_hwfn->mcp_info->link_capabilities;
  }
  
- int qed_mcp_drain(struct qed_hwfn *p_hwfn,
- 		  struct qed_ptt *p_ptt)
+ int qed_mcp_drain(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
  	u32 resp = 0, param = 0;
  	int rc;
  
  	rc = qed_mcp_cmd(p_hwfn, p_ptt,
- 			 DRV_MSG_CODE_NIG_DRAIN, 1000,
- 			 &resp, &param);
+ 			 DRV_MSG_CODE_NIG_DRAIN, 1000, &resp, &param);
  
  	/* Wait for the drain to complete before returning */
  	msleep(1020);
@@@ -1089,8 -1118,7 +1118,7 @@@
  }
  
  int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn,
- 			   struct qed_ptt *p_ptt,
- 			   u32 *p_flash_size)
+ 			   struct qed_ptt *p_ptt, u32 *p_flash_size)
  {
  	u32 flash_size;
  
@@@ -1153,8 -1181,8 +1181,8 @@@ qed_mcp_send_drv_version(struct qed_hwf
  	p_drv_version = &union_data.drv_version;
  	p_drv_version->version = p_ver->version;
  
 -	for (i = 0; i < MCP_DRV_VER_STR_SIZE - 1; i += 4) {
 -		val = cpu_to_be32(p_ver->name[i]);
 +	for (i = 0; i < (MCP_DRV_VER_STR_SIZE - 4) / sizeof(u32); i++) {
 +		val = cpu_to_be32(*((u32 *)&p_ver->name[i * sizeof(u32)]));
  		*(__be32 *)&p_drv_version->name[i * sizeof(u32)] = val;
  	}
  
@@@ -1168,8 -1196,35 +1196,35 @@@
  	return rc;
  }
  
- int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
- 		    enum qed_led_mode mode)
+ int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+ {
+ 	u32 resp = 0, param = 0;
+ 	int rc;
+ 
+ 	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp,
+ 			 &param);
+ 	if (rc)
+ 		DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+ 
+ 	return rc;
+ }
+ 
+ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+ {
+ 	u32 value, cpu_mode;
+ 
+ 	qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff);
+ 
+ 	value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+ 	value &= ~MCP_REG_CPU_MODE_SOFT_HALT;
+ 	qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value);
+ 	cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE);
+ 
+ 	return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
+ }
+ 
+ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
+ 		    struct qed_ptt *p_ptt, enum qed_led_mode mode)
  {
  	u32 resp = 0, param = 0, drv_mb_param;
  	int rc;
@@@ -1195,6 -1250,27 +1250,27 @@@
  	return rc;
  }
  
+ int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
+ 			  struct qed_ptt *p_ptt, u32 mask_parities)
+ {
+ 	u32 resp = 0, param = 0;
+ 	int rc;
+ 
+ 	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES,
+ 			 mask_parities, &resp, &param);
+ 
+ 	if (rc) {
+ 		DP_ERR(p_hwfn,
+ 		       "MCP response failure for mask parities, aborting\n");
+ 	} else if (resp != FW_MSG_CODE_OK) {
+ 		DP_ERR(p_hwfn,
+ 		       "MCP did not acknowledge mask parity request. Old MFW?\n");
+ 		rc = -EINVAL;
+ 	}
+ 
+ 	return rc;
+ }
+ 
  int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
  {
  	u32 drv_mb_param = 0, rsp, param;
diff --combined drivers/net/ethernet/stmicro/stmmac/Kconfig
index 54de175,c732b8c..3818c5e
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@@ -61,13 -61,13 +61,13 @@@ config DWMAC_LPC18X
  config DWMAC_MESON
  	tristate "Amlogic Meson dwmac support"
  	default ARCH_MESON
 -	depends on OF && (ARCH_MESON || COMPILE_TEST)
 +	depends on OF && COMMON_CLK && (ARCH_MESON || COMPILE_TEST)
  	help
  	  Support for Ethernet controller on Amlogic Meson SoCs.
  
  	  This selects the Amlogic Meson SoC glue layer support for
 -	  the stmmac device driver. This driver is used for Meson6 and
 -	  Meson8 SoCs.
 +	  the stmmac device driver. This driver is used for Meson6,
 +	  Meson8, Meson8b and GXBB SoCs.
  
  config DWMAC_ROCKCHIP
  	tristate "Rockchip dwmac support"
@@@ -104,6 -104,18 +104,18 @@@ config DWMAC_ST
  	  device driver. This driver is used on for the STi series
  	  SOCs GMAC ethernet controller.
  
+ config DWMAC_STM32
+ 	tristate "STM32 DWMAC support"
+ 	default ARCH_STM32
+ 	depends on OF && HAS_IOMEM
+ 	select MFD_SYSCON
+ 	---help---
+ 	  Support for ethernet controller on STM32 SOCs.
+ 
+ 	  This selects STM32 SoC glue layer support for the stmmac
+ 	  device driver. This driver is used on for the STM32 series
+ 	  SOCs GMAC ethernet controller.
+ 
  config DWMAC_SUNXI
  	tristate "Allwinner GMAC support"
  	default ARCH_SUNXI
diff --combined drivers/net/ethernet/stmicro/stmmac/Makefile
index f77edb9,f0c9396..5d6ece5
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@@ -9,10 -9,11 +9,11 @@@ stmmac-objs:= stmmac_main.o stmmac_etht
  obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
  obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
  obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
 -obj-$(CONFIG_DWMAC_MESON)	+= dwmac-meson.o
 +obj-$(CONFIG_DWMAC_MESON)	+= dwmac-meson.o dwmac-meson8b.o
  obj-$(CONFIG_DWMAC_ROCKCHIP)	+= dwmac-rk.o
  obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-altr-socfpga.o
  obj-$(CONFIG_DWMAC_STI)		+= dwmac-sti.o
+ obj-$(CONFIG_DWMAC_STM32)	+= dwmac-stm32.o
  obj-$(CONFIG_DWMAC_SUNXI)	+= dwmac-sunxi.o
  obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
  stmmac-platform-objs:= stmmac_platform.o
diff --combined drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 885a5e6,6f6bbc5..7df4ff1
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@@ -145,7 -145,7 +145,7 @@@ static void dwmac1000_set_mchash(void _
  		numhashregs = 8;
  		break;
  	default:
- 		pr_debug("STMMAC: err in setting mulitcast filter\n");
+ 		pr_debug("STMMAC: err in setting multicast filter\n");
  		return;
  		break;
  	}
@@@ -261,7 -261,7 +261,7 @@@ static void dwmac1000_pmt(struct mac_de
  	}
  	if (mode & WAKE_UCAST) {
  		pr_debug("GMAC: WOL on global unicast\n");
 -		pmt |= global_unicast;
 +		pmt |= power_down | global_unicast | wake_up_frame_en;
  	}
  
  	writel(pmt, ioaddr + GMAC_PMT);
diff --combined drivers/net/usb/r8152.c
index c254248,9338f58..44d439f
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@@ -32,7 -32,7 +32,7 @@@
  #define NETNEXT_VERSION		"08"
  
  /* Information for net */
 -#define NET_VERSION		"5"
 +#define NET_VERSION		"6"
  
  #define DRIVER_VERSION		"v1." NETNEXT_VERSION "." NET_VERSION
  #define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd at realtek.com>"
@@@ -1076,8 -1076,7 +1076,7 @@@ static int vendor_mac_passthru_addr_rea
  		return -ENODEV;
  	if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) {
  		netif_warn(tp, probe, tp->netdev,
- 			   "Invalid buffer when reading pass-thru MAC addr: "
- 			   "(%d, %d)\n",
+ 			   "Invalid buffer for pass-thru MAC addr: (%d, %d)\n",
  			   obj->type, obj->string.length);
  		goto amacout;
  	}
@@@ -1090,8 -1089,8 +1089,8 @@@
  	ret = hex2bin(buf, obj->string.pointer + 9, 6);
  	if (!(ret == 0 && is_valid_ether_addr(buf))) {
  		netif_warn(tp, probe, tp->netdev,
- 			   "Invalid MAC when reading pass-thru MAC addr: "
- 			   "%d, %pM\n", ret, buf);
+ 			   "Invalid MAC for pass-thru MAC addr: %d, %pM\n",
+ 			   ret, buf);
  		ret = -EINVAL;
  		goto amacout;
  	}
@@@ -1111,9 -1110,9 +1110,9 @@@ static int set_ethernet_addr(struct r81
  	struct sockaddr sa;
  	int ret;
  
- 	if (tp->version == RTL_VER_01)
+ 	if (tp->version == RTL_VER_01) {
  		ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data);
- 	else {
+ 	} else {
  		/* if this is not an RTL8153-AD, no eFuse mac pass thru set,
  		 * or system doesn't provide valid _SB.AMAC this will be
  		 * be expected to non-zero
@@@ -2552,77 -2551,6 +2551,77 @@@ static void r8152_aldps_en(struct r815
  	}
  }
  
 +static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
 +{
 +	ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
 +	ocp_reg_write(tp, OCP_EEE_DATA, reg);
 +	ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
 +}
 +
 +static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
 +{
 +	u16 data;
 +
 +	r8152_mmd_indirect(tp, dev, reg);
 +	data = ocp_reg_read(tp, OCP_EEE_DATA);
 +	ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
 +
 +	return data;
 +}
 +
 +static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
 +{
 +	r8152_mmd_indirect(tp, dev, reg);
 +	ocp_reg_write(tp, OCP_EEE_DATA, data);
 +	ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
 +}
 +
 +static void r8152_eee_en(struct r8152 *tp, bool enable)
 +{
 +	u16 config1, config2, config3;
 +	u32 ocp_data;
 +
 +	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
 +	config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
 +	config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
 +	config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
 +
 +	if (enable) {
 +		ocp_data |= EEE_RX_EN | EEE_TX_EN;
 +		config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
 +		config1 |= sd_rise_time(1);
 +		config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
 +		config3 |= fast_snr(42);
 +	} else {
 +		ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
 +		config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
 +			     RX_QUIET_EN);
 +		config1 |= sd_rise_time(7);
 +		config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
 +		config3 |= fast_snr(511);
 +	}
 +
 +	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
 +	ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
 +	ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
 +	ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
 +}
 +
 +static void r8152b_enable_eee(struct r8152 *tp)
 +{
 +	r8152_eee_en(tp, true);
 +	r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
 +}
 +
 +static void r8152b_enable_fc(struct r8152 *tp)
 +{
 +	u16 anar;
 +
 +	anar = r8152_mdio_read(tp, MII_ADVERTISE);
 +	anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
 +	r8152_mdio_write(tp, MII_ADVERTISE, anar);
 +}
 +
  static void rtl8152_disable(struct r8152 *tp)
  {
  	r8152_aldps_en(tp, false);
@@@ -2632,9 -2560,13 +2631,9 @@@
  
  static void r8152b_hw_phy_cfg(struct r8152 *tp)
  {
 -	u16 data;
 -
 -	data = r8152_mdio_read(tp, MII_BMCR);
 -	if (data & BMCR_PDOWN) {
 -		data &= ~BMCR_PDOWN;
 -		r8152_mdio_write(tp, MII_BMCR, data);
 -	}
 +	r8152b_enable_eee(tp);
 +	r8152_aldps_en(tp, true);
 +	r8152b_enable_fc(tp);
  
  	set_bit(PHY_RESET, &tp->flags);
  }
@@@ -2768,52 -2700,20 +2767,52 @@@ static void r8152b_enter_oob(struct r81
  	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
  }
  
 +static void r8153_aldps_en(struct r8152 *tp, bool enable)
 +{
 +	u16 data;
 +
 +	data = ocp_reg_read(tp, OCP_POWER_CFG);
 +	if (enable) {
 +		data |= EN_ALDPS;
 +		ocp_reg_write(tp, OCP_POWER_CFG, data);
 +	} else {
 +		data &= ~EN_ALDPS;
 +		ocp_reg_write(tp, OCP_POWER_CFG, data);
 +		msleep(20);
 +	}
 +}
 +
 +static void r8153_eee_en(struct r8152 *tp, bool enable)
 +{
 +	u32 ocp_data;
 +	u16 config;
 +
 +	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
 +	config = ocp_reg_read(tp, OCP_EEE_CFG);
 +
 +	if (enable) {
 +		ocp_data |= EEE_RX_EN | EEE_TX_EN;
 +		config |= EEE10_EN;
 +	} else {
 +		ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
 +		config &= ~EEE10_EN;
 +	}
 +
 +	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
 +	ocp_reg_write(tp, OCP_EEE_CFG, config);
 +}
 +
  static void r8153_hw_phy_cfg(struct r8152 *tp)
  {
  	u32 ocp_data;
  	u16 data;
  
 -	if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
 -	    tp->version == RTL_VER_05)
 -		ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
 +	/* disable ALDPS before updating the PHY parameters */
 +	r8153_aldps_en(tp, false);
  
 -	data = r8152_mdio_read(tp, MII_BMCR);
 -	if (data & BMCR_PDOWN) {
 -		data &= ~BMCR_PDOWN;
 -		r8152_mdio_write(tp, MII_BMCR, data);
 -	}
 +	/* disable EEE before updating the PHY parameters */
 +	r8153_eee_en(tp, false);
 +	ocp_reg_write(tp, OCP_EEE_ADV, 0);
  
  	if (tp->version == RTL_VER_03) {
  		data = ocp_reg_read(tp, OCP_EEE_CFG);
@@@ -2844,12 -2744,6 +2843,12 @@@
  	sram_write(tp, SRAM_10M_AMP1, 0x00af);
  	sram_write(tp, SRAM_10M_AMP2, 0x0208);
  
 +	r8153_eee_en(tp, true);
 +	ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
 +
 +	r8153_aldps_en(tp, true);
 +	r8152b_enable_fc(tp);
 +
  	set_bit(PHY_RESET, &tp->flags);
  }
  
@@@ -2971,6 -2865,21 +2970,6 @@@ static void r8153_enter_oob(struct r815
  	ocp_write_dword(tp, MCU_TYPE_PLA, PLA_RCR, ocp_data);
  }
  
 -static void r8153_aldps_en(struct r8152 *tp, bool enable)
 -{
 -	u16 data;
 -
 -	data = ocp_reg_read(tp, OCP_POWER_CFG);
 -	if (enable) {
 -		data |= EN_ALDPS;
 -		ocp_reg_write(tp, OCP_POWER_CFG, data);
 -	} else {
 -		data &= ~EN_ALDPS;
 -		ocp_reg_write(tp, OCP_POWER_CFG, data);
 -		msleep(20);
 -	}
 -}
 -
  static void rtl8153_disable(struct r8152 *tp)
  {
  	r8153_aldps_en(tp, false);
@@@ -3336,6 -3245,103 +3335,6 @@@ static int rtl8152_close(struct net_dev
  	return res;
  }
  
 -static inline void r8152_mmd_indirect(struct r8152 *tp, u16 dev, u16 reg)
 -{
 -	ocp_reg_write(tp, OCP_EEE_AR, FUN_ADDR | dev);
 -	ocp_reg_write(tp, OCP_EEE_DATA, reg);
 -	ocp_reg_write(tp, OCP_EEE_AR, FUN_DATA | dev);
 -}
 -
 -static u16 r8152_mmd_read(struct r8152 *tp, u16 dev, u16 reg)
 -{
 -	u16 data;
 -
 -	r8152_mmd_indirect(tp, dev, reg);
 -	data = ocp_reg_read(tp, OCP_EEE_DATA);
 -	ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
 -
 -	return data;
 -}
 -
 -static void r8152_mmd_write(struct r8152 *tp, u16 dev, u16 reg, u16 data)
 -{
 -	r8152_mmd_indirect(tp, dev, reg);
 -	ocp_reg_write(tp, OCP_EEE_DATA, data);
 -	ocp_reg_write(tp, OCP_EEE_AR, 0x0000);
 -}
 -
 -static void r8152_eee_en(struct r8152 *tp, bool enable)
 -{
 -	u16 config1, config2, config3;
 -	u32 ocp_data;
 -
 -	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
 -	config1 = ocp_reg_read(tp, OCP_EEE_CONFIG1) & ~sd_rise_time_mask;
 -	config2 = ocp_reg_read(tp, OCP_EEE_CONFIG2);
 -	config3 = ocp_reg_read(tp, OCP_EEE_CONFIG3) & ~fast_snr_mask;
 -
 -	if (enable) {
 -		ocp_data |= EEE_RX_EN | EEE_TX_EN;
 -		config1 |= EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN | RX_QUIET_EN;
 -		config1 |= sd_rise_time(1);
 -		config2 |= RG_DACQUIET_EN | RG_LDVQUIET_EN;
 -		config3 |= fast_snr(42);
 -	} else {
 -		ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
 -		config1 &= ~(EEE_10_CAP | EEE_NWAY_EN | TX_QUIET_EN |
 -			     RX_QUIET_EN);
 -		config1 |= sd_rise_time(7);
 -		config2 &= ~(RG_DACQUIET_EN | RG_LDVQUIET_EN);
 -		config3 |= fast_snr(511);
 -	}
 -
 -	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
 -	ocp_reg_write(tp, OCP_EEE_CONFIG1, config1);
 -	ocp_reg_write(tp, OCP_EEE_CONFIG2, config2);
 -	ocp_reg_write(tp, OCP_EEE_CONFIG3, config3);
 -}
 -
 -static void r8152b_enable_eee(struct r8152 *tp)
 -{
 -	r8152_eee_en(tp, true);
 -	r8152_mmd_write(tp, MDIO_MMD_AN, MDIO_AN_EEE_ADV, MDIO_EEE_100TX);
 -}
 -
 -static void r8153_eee_en(struct r8152 *tp, bool enable)
 -{
 -	u32 ocp_data;
 -	u16 config;
 -
 -	ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEE_CR);
 -	config = ocp_reg_read(tp, OCP_EEE_CFG);
 -
 -	if (enable) {
 -		ocp_data |= EEE_RX_EN | EEE_TX_EN;
 -		config |= EEE10_EN;
 -	} else {
 -		ocp_data &= ~(EEE_RX_EN | EEE_TX_EN);
 -		config &= ~EEE10_EN;
 -	}
 -
 -	ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEE_CR, ocp_data);
 -	ocp_reg_write(tp, OCP_EEE_CFG, config);
 -}
 -
 -static void r8153_enable_eee(struct r8152 *tp)
 -{
 -	r8153_eee_en(tp, true);
 -	ocp_reg_write(tp, OCP_EEE_ADV, MDIO_EEE_1000T | MDIO_EEE_100TX);
 -}
 -
 -static void r8152b_enable_fc(struct r8152 *tp)
 -{
 -	u16 anar;
 -
 -	anar = r8152_mdio_read(tp, MII_ADVERTISE);
 -	anar |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
 -	r8152_mdio_write(tp, MII_ADVERTISE, anar);
 -}
 -
  static void rtl_tally_reset(struct r8152 *tp)
  {
  	u32 ocp_data;
@@@ -3348,17 -3354,10 +3347,17 @@@
  static void r8152b_init(struct r8152 *tp)
  {
  	u32 ocp_data;
 +	u16 data;
  
  	if (test_bit(RTL8152_UNPLUG, &tp->flags))
  		return;
  
 +	data = r8152_mdio_read(tp, MII_BMCR);
 +	if (data & BMCR_PDOWN) {
 +		data &= ~BMCR_PDOWN;
 +		r8152_mdio_write(tp, MII_BMCR, data);
 +	}
 +
  	r8152_aldps_en(tp, false);
  
  	if (tp->version == RTL_VER_01) {
@@@ -3380,6 -3379,9 +3379,6 @@@
  		   SPDWN_RXDV_MSK | SPDWN_LINKCHG_MSK;
  	ocp_write_word(tp, MCU_TYPE_PLA, PLA_GPHY_INTR_IMR, ocp_data);
  
 -	r8152b_enable_eee(tp);
 -	r8152_aldps_en(tp, true);
 -	r8152b_enable_fc(tp);
  	rtl_tally_reset(tp);
  
  	/* enable rx aggregation */
@@@ -3391,12 -3393,12 +3390,12 @@@
  static void r8153_init(struct r8152 *tp)
  {
  	u32 ocp_data;
 +	u16 data;
  	int i;
  
  	if (test_bit(RTL8152_UNPLUG, &tp->flags))
  		return;
  
 -	r8153_aldps_en(tp, false);
  	r8153_u1u2en(tp, false);
  
  	for (i = 0; i < 500; i++) {
@@@ -3413,23 -3415,6 +3412,23 @@@
  		msleep(20);
  	}
  
 +	if (tp->version == RTL_VER_03 || tp->version == RTL_VER_04 ||
 +	    tp->version == RTL_VER_05)
 +		ocp_reg_write(tp, OCP_ADC_CFG, CKADSEL_L | ADC_EN | EN_EMI_L);
 +
 +	data = r8152_mdio_read(tp, MII_BMCR);
 +	if (data & BMCR_PDOWN) {
 +		data &= ~BMCR_PDOWN;
 +		r8152_mdio_write(tp, MII_BMCR, data);
 +	}
 +
 +	for (i = 0; i < 500; i++) {
 +		ocp_data = ocp_reg_read(tp, OCP_PHY_STATUS) & PHY_STAT_MASK;
 +		if (ocp_data == PHY_STAT_LAN_ON)
 +			break;
 +		msleep(20);
 +	}
 +
  	usb_disable_lpm(tp->udev);
  	r8153_u2p3en(tp, false);
  
@@@ -3497,6 -3482,9 +3496,6 @@@
  	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
  	ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
  
 -	r8153_enable_eee(tp);
 -	r8153_aldps_en(tp, true);
 -	r8152b_enable_fc(tp);
  	rtl_tally_reset(tp);
  	r8153_u2p3en(tp, true);
  }
@@@ -4043,7 -4031,7 +4042,7 @@@ static int rtl8152_set_coalesce(struct 
  	return ret;
  }
  
- static struct ethtool_ops ops = {
+ static const struct ethtool_ops ops = {
  	.get_drvinfo = rtl8152_get_drvinfo,
  	.get_settings = rtl8152_get_settings,
  	.set_settings = rtl8152_set_settings,
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index b3a87a3,8b91544..f915024
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@@ -513,15 -513,6 +513,15 @@@ int iwl_mvm_tx_skb_non_sta(struct iwl_m
  	int hdrlen = ieee80211_hdrlen(hdr->frame_control);
  	int queue;
  
 +	/* IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
 +	 * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
 +	 * queue. STATION (HS2.0) uses the auxiliary context of the FW,
 +	 * and hence needs to be sent on the aux queue
 +	 */
 +	if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
 +	    skb_info->control.vif->type == NL80211_IFTYPE_STATION)
 +		IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
 +
  	memcpy(&info, skb->cb, sizeof(info));
  
  	if (WARN_ON_ONCE(info.flags & IEEE80211_TX_CTL_AMPDU))
@@@ -535,6 -526,16 +535,6 @@@
  	/* This holds the amsdu headers length */
  	skb_info->driver_data[0] = (void *)(uintptr_t)0;
  
 -	/*
 -	 * IWL_MVM_OFFCHANNEL_QUEUE is used for ROC packets that can be used
 -	 * in 2 different types of vifs, P2P & STATION. P2P uses the offchannel
 -	 * queue. STATION (HS2.0) uses the auxiliary context of the FW,
 -	 * and hence needs to be sent on the aux queue
 -	 */
 -	if (IEEE80211_SKB_CB(skb)->hw_queue == IWL_MVM_OFFCHANNEL_QUEUE &&
 -	    info.control.vif->type == NL80211_IFTYPE_STATION)
 -		IEEE80211_SKB_CB(skb)->hw_queue = mvm->aux_queue;
 -
  	queue = info.hw_queue;
  
  	/*
@@@ -837,6 -838,22 +837,22 @@@ static void iwl_mvm_tx_add_stream(struc
  	}
  }
  
+ /* Check if there are any timed-out TIDs on a given shared TXQ */
+ static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id)
+ {
+ 	unsigned long queue_tid_bitmap = mvm->queue_info[txq_id].tid_bitmap;
+ 	unsigned long now = jiffies;
+ 	int tid;
+ 
+ 	for_each_set_bit(tid, &queue_tid_bitmap, IWL_MAX_TID_COUNT + 1) {
+ 		if (time_before(mvm->queue_info[txq_id].last_frame_time[tid] +
+ 				IWL_MVM_DQA_QUEUE_TIMEOUT, now))
+ 			return true;
+ 	}
+ 
+ 	return false;
+ }
+ 
  /*
   * Sets the fields in the Tx cmd that are crypto related
   */
@@@ -939,7 -956,6 +955,6 @@@ static int iwl_mvm_tx_mpdu(struct iwl_m
  			iwl_trans_free_tx_cmd(mvm->trans, dev_cmd);
  			spin_unlock(&mvmsta->lock);
  			return 0;
- 
  		}
  
  		/* If we are here - TXQ exists and needs to be re-activated */
@@@ -952,8 -968,25 +967,25 @@@
  				    txq_id);
  	}
  
- 	/* Keep track of the time of the last frame for this RA/TID */
- 	mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
+ 	if (iwl_mvm_is_dqa_supported(mvm)) {
+ 		/* Keep track of the time of the last frame for this RA/TID */
+ 		mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
+ 
+ 		/*
+ 		 * If we have timed-out TIDs - schedule the worker that will
+ 		 * reconfig the queues and update them
+ 		 *
+ 		 * Note that the mvm->queue_info_lock isn't being taken here in
+ 		 * order to not serialize the TX flow. This isn't dangerous
+ 		 * because scheduling mvm->add_stream_wk can't ruin the state,
+ 		 * and if we DON'T schedule it due to some race condition then
+ 		 * next TX we get here we will.
+ 		 */
+ 		if (unlikely(mvm->queue_info[txq_id].status ==
+ 			     IWL_MVM_QUEUE_SHARED &&
+ 			     iwl_mvm_txq_should_update(mvm, txq_id)))
+ 			schedule_work(&mvm->add_stream_wk);
+ 	}
  
  	IWL_DEBUG_TX(mvm, "TX to [%d|%d] Q:%d - seq: 0x%x\n", mvmsta->sta_id,
  		     tid, txq_id, IEEE80211_SEQ_TO_SN(seq_number));
diff --combined drivers/net/xen-netback/xenbus.c
index daf4c78,bacf6e0..9911b4e
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@@ -165,7 -165,7 +165,7 @@@ xenvif_write_io_ring(struct file *filp
  	return count;
  }
  
- static int xenvif_dump_open(struct inode *inode, struct file *filp)
+ static int xenvif_io_ring_open(struct inode *inode, struct file *filp)
  {
  	int ret;
  	void *queue = NULL;
@@@ -179,13 -179,35 +179,35 @@@
  
  static const struct file_operations xenvif_dbg_io_ring_ops_fops = {
  	.owner = THIS_MODULE,
- 	.open = xenvif_dump_open,
+ 	.open = xenvif_io_ring_open,
  	.read = seq_read,
  	.llseek = seq_lseek,
  	.release = single_release,
  	.write = xenvif_write_io_ring,
  };
  
+ static int xenvif_read_ctrl(struct seq_file *m, void *v)
+ {
+ 	struct xenvif *vif = m->private;
+ 
+ 	xenvif_dump_hash_info(vif, m);
+ 
+ 	return 0;
+ }
+ 
+ static int xenvif_ctrl_open(struct inode *inode, struct file *filp)
+ {
+ 	return single_open(filp, xenvif_read_ctrl, inode->i_private);
+ }
+ 
+ static const struct file_operations xenvif_dbg_ctrl_ops_fops = {
+ 	.owner = THIS_MODULE,
+ 	.open = xenvif_ctrl_open,
+ 	.read = seq_read,
+ 	.llseek = seq_lseek,
+ 	.release = single_release,
+ };
+ 
  static void xenvif_debugfs_addif(struct xenvif *vif)
  {
  	struct dentry *pfile;
@@@ -210,6 -232,17 +232,17 @@@
  				pr_warn("Creation of io_ring file returned %ld!\n",
  					PTR_ERR(pfile));
  		}
+ 
+ 		if (vif->ctrl_task) {
+ 			pfile = debugfs_create_file("ctrl",
+ 						    S_IRUSR,
+ 						    vif->xenvif_dbg_root,
+ 						    vif,
+ 						    &xenvif_dbg_ctrl_ops_fops);
+ 			if (IS_ERR_OR_NULL(pfile))
+ 				pr_warn("Creation of ctrl file returned %ld!\n",
+ 					PTR_ERR(pfile));
+ 		}
  	} else
  		netdev_warn(vif->dev,
  			    "Creation of vif debugfs dir returned %ld!\n",
@@@ -271,11 -304,6 +304,11 @@@ static int netback_probe(struct xenbus_
  	be->dev = dev;
  	dev_set_drvdata(&dev->dev, be);
  
 +	be->state = XenbusStateInitialising;
 +	err = xenbus_switch_state(dev, XenbusStateInitialising);
 +	if (err)
 +		goto fail;
 +
  	sg = 1;
  
  	do {
@@@ -388,6 -416,11 +421,6 @@@
  
  	be->hotplug_script = script;
  
 -	err = xenbus_switch_state(dev, XenbusStateInitWait);
 -	if (err)
 -		goto fail;
 -
 -	be->state = XenbusStateInitWait;
  
  	/* This kicks hotplug scripts, so do it immediately. */
  	err = backend_create_xenvif(be);
@@@ -492,20 -525,20 +525,20 @@@ static inline void backend_switch_state
  
  /* Handle backend state transitions:
   *
 - * The backend state starts in InitWait and the following transitions are
 + * The backend state starts in Initialising and the following transitions are
   * allowed.
   *
 - * InitWait -> Connected
 - *
 - *    ^    \         |
 - *    |     \        |
 - *    |      \       |
 - *    |       \      |
 - *    |        \     |
 - *    |         \    |
 - *    |          V   V
 + * Initialising -> InitWait -> Connected
 + *          \
 + *           \        ^    \         |
 + *            \       |     \        |
 + *             \      |      \       |
 + *              \     |       \      |
 + *               \    |        \     |
 + *                \   |         \    |
 + *                 V  |          V   V
   *
 - *  Closed  <-> Closing
 + *                  Closed  <-> Closing
   *
   * The state argument specifies the eventual state of the backend and the
   * function transitions to that state via the shortest path.
@@@ -515,20 -548,6 +548,20 @@@ static void set_backend_state(struct ba
  {
  	while (be->state != state) {
  		switch (be->state) {
 +		case XenbusStateInitialising:
 +			switch (state) {
 +			case XenbusStateInitWait:
 +			case XenbusStateConnected:
 +			case XenbusStateClosing:
 +				backend_switch_state(be, XenbusStateInitWait);
 +				break;
 +			case XenbusStateClosed:
 +				backend_switch_state(be, XenbusStateClosed);
 +				break;
 +			default:
 +				BUG();
 +			}
 +			break;
  		case XenbusStateClosed:
  			switch (state) {
  			case XenbusStateInitWait:
diff --combined include/net/sock.h
index 8741988,c797c57..ebf75db
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@@ -1020,7 -1020,6 +1020,6 @@@ struct proto 
  	void			(*unhash)(struct sock *sk);
  	void			(*rehash)(struct sock *sk);
  	int			(*get_port)(struct sock *sk, unsigned short snum);
- 	void			(*clear_sk)(struct sock *sk, int size);
  
  	/* Keeping track of sockets in use */
  #ifdef CONFIG_PROC_FS
@@@ -1114,6 -1113,16 +1113,16 @@@ static inline bool sk_stream_is_writeab
  	       sk_stream_memory_free(sk);
  }
  
+ static inline int sk_under_cgroup_hierarchy(struct sock *sk,
+ 					    struct cgroup *ancestor)
+ {
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+ 	return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data),
+ 				    ancestor);
+ #else
+ 	return -ENOTSUPP;
+ #endif
+ }
  
  static inline bool sk_has_memory_pressure(const struct sock *sk)
  {
@@@ -1232,8 -1241,6 +1241,6 @@@ static inline int __sk_prot_rehash(stru
  	return sk->sk_prot->hash(sk);
  }
  
- void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
- 
  /* About 10 seconds */
  #define SOCK_DESTROY_TIME (10*HZ)
  
@@@ -1332,16 -1339,6 +1339,16 @@@ static inline void sk_mem_uncharge(stru
  	if (!sk_has_account(sk))
  		return;
  	sk->sk_forward_alloc += size;
 +
 +	/* Avoid a possible overflow.
 +	 * TCP send queues can make this happen, if sk_mem_reclaim()
 +	 * is not called and more than 2 GBytes are released at once.
 +	 *
 +	 * If we reach 2 MBytes, reclaim 1 MBytes right now, there is
 +	 * no need to hold that much forward allocation anyway.
 +	 */
 +	if (unlikely(sk->sk_forward_alloc >= 1 << 21))
 +		__sk_mem_reclaim(sk, 1 << 20);
  }
  
  static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
diff --combined include/net/xfrm.h
index 1793431,d2fdd6d..31947b9
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@@ -187,7 -187,7 +187,7 @@@ struct xfrm_state 
  	struct xfrm_replay_state_esn *preplay_esn;
  
  	/* The functions for replay detection. */
- 	struct xfrm_replay	*repl;
+ 	const struct xfrm_replay *repl;
  
  	/* internal flag that only holds state for delayed aevent at the
  	 * moment
@@@ -1540,10 -1540,8 +1540,10 @@@ int xfrm4_tunnel_deregister(struct xfrm
  void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
  int xfrm6_extract_header(struct sk_buff *skb);
  int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 -int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi);
 +int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 +		  struct ip6_tnl *t);
  int xfrm6_transport_finish(struct sk_buff *skb, int async);
 +int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
  int xfrm6_rcv(struct sk_buff *skb);
  int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
  		     xfrm_address_t *saddr, u8 proto);
diff --combined kernel/events/core.c
index a54f2c2,a7b8c1c..9fc3be0
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@@ -2496,11 -2496,11 +2496,11 @@@ static int __perf_event_stop(void *info
  	return 0;
  }
  
 -static int perf_event_restart(struct perf_event *event)
 +static int perf_event_stop(struct perf_event *event, int restart)
  {
  	struct stop_event_data sd = {
  		.event		= event,
 -		.restart	= 1,
 +		.restart	= restart,
  	};
  	int ret = 0;
  
@@@ -3549,18 -3549,10 +3549,18 @@@ static int perf_event_read(struct perf_
  			.group = group,
  			.ret = 0,
  		};
 -		ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
 -		/* The event must have been read from an online CPU: */
 -		WARN_ON_ONCE(ret);
 -		ret = ret ? : data.ret;
 +		/*
 +		 * Purposely ignore the smp_call_function_single() return
 +		 * value.
 +		 *
 +		 * If event->oncpu isn't a valid CPU it means the event got
 +		 * scheduled out and that will have updated the event count.
 +		 *
 +		 * Therefore, either way, we'll have an up-to-date event count
 +		 * after this.
 +		 */
 +		(void)smp_call_function_single(event->oncpu, __perf_event_read, &data, 1);
 +		ret = data.ret;
  	} else if (event->state == PERF_EVENT_STATE_INACTIVE) {
  		struct perf_event_context *ctx = event->ctx;
  		unsigned long flags;
@@@ -4845,19 -4837,6 +4845,19 @@@ static void ring_buffer_attach(struct p
  		spin_unlock_irqrestore(&rb->event_lock, flags);
  	}
  
 +	/*
 +	 * Avoid racing with perf_mmap_close(AUX): stop the event
 +	 * before swizzling the event::rb pointer; if it's getting
 +	 * unmapped, its aux_mmap_count will be 0 and it won't
 +	 * restart. See the comment in __perf_pmu_output_stop().
 +	 *
 +	 * Data will inevitably be lost when set_output is done in
 +	 * mid-air, but then again, whoever does it like this is
 +	 * not in for the data anyway.
 +	 */
 +	if (has_aux(event))
 +		perf_event_stop(event, 0);
 +
  	rcu_assign_pointer(event->rb, rb);
  
  	if (old_rb) {
@@@ -6133,7 -6112,7 +6133,7 @@@ static void perf_event_addr_filters_exe
  	raw_spin_unlock_irqrestore(&ifh->lock, flags);
  
  	if (restart)
 -		perf_event_restart(event);
 +		perf_event_stop(event, 1);
  }
  
  void perf_event_exec(void)
@@@ -6177,13 -6156,7 +6177,13 @@@ static void __perf_event_output_stop(st
  
  	/*
  	 * In case of inheritance, it will be the parent that links to the
 -	 * ring-buffer, but it will be the child that's actually using it:
 +	 * ring-buffer, but it will be the child that's actually using it.
 +	 *
 +	 * We are using event::rb to determine if the event should be stopped,
 +	 * however this may race with ring_buffer_attach() (through set_output),
 +	 * which will make us skip the event that actually needs to be stopped.
 +	 * So ring_buffer_attach() has to stop an aux event before re-assigning
 +	 * its rb pointer.
  	 */
  	if (rcu_dereference(parent->rb) == rb)
  		ro->err = __perf_event_stop(&sd);
@@@ -6697,7 -6670,7 +6697,7 @@@ static void __perf_addr_filters_adjust(
  	raw_spin_unlock_irqrestore(&ifh->lock, flags);
  
  	if (restart)
 -		perf_event_restart(event);
 +		perf_event_stop(event, 1);
  }
  
  /*
@@@ -7049,7 -7022,7 +7049,7 @@@ static int __perf_event_overflow(struc
  		irq_work_queue(&event->pending);
  	}
  
- 	event->overflow_handler(event, data, regs);
+ 	READ_ONCE(event->overflow_handler)(event, data, regs);
  
  	if (*perf_event_fasync(event) && event->pending_kill) {
  		event->pending_wakeup = 1;
@@@ -7664,11 -7637,83 +7664,83 @@@ static void perf_event_free_filter(stru
  	ftrace_profile_free_filter(event);
  }
  
+ #ifdef CONFIG_BPF_SYSCALL
+ static void bpf_overflow_handler(struct perf_event *event,
+ 				 struct perf_sample_data *data,
+ 				 struct pt_regs *regs)
+ {
+ 	struct bpf_perf_event_data_kern ctx = {
+ 		.data = data,
+ 		.regs = regs,
+ 	};
+ 	int ret = 0;
+ 
+ 	preempt_disable();
+ 	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1))
+ 		goto out;
+ 	rcu_read_lock();
+ 	ret = BPF_PROG_RUN(event->prog, (void *)&ctx);
+ 	rcu_read_unlock();
+ out:
+ 	__this_cpu_dec(bpf_prog_active);
+ 	preempt_enable();
+ 	if (!ret)
+ 		return;
+ 
+ 	event->orig_overflow_handler(event, data, regs);
+ }
+ 
+ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+ {
+ 	struct bpf_prog *prog;
+ 
+ 	if (event->overflow_handler_context)
+ 		/* hw breakpoint or kernel counter */
+ 		return -EINVAL;
+ 
+ 	if (event->prog)
+ 		return -EEXIST;
+ 
+ 	prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT);
+ 	if (IS_ERR(prog))
+ 		return PTR_ERR(prog);
+ 
+ 	event->prog = prog;
+ 	event->orig_overflow_handler = READ_ONCE(event->overflow_handler);
+ 	WRITE_ONCE(event->overflow_handler, bpf_overflow_handler);
+ 	return 0;
+ }
+ 
+ static void perf_event_free_bpf_handler(struct perf_event *event)
+ {
+ 	struct bpf_prog *prog = event->prog;
+ 
+ 	if (!prog)
+ 		return;
+ 
+ 	WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler);
+ 	event->prog = NULL;
+ 	bpf_prog_put(prog);
+ }
+ #else
+ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd)
+ {
+ 	return -EOPNOTSUPP;
+ }
+ static void perf_event_free_bpf_handler(struct perf_event *event)
+ {
+ }
+ #endif
+ 
  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
  {
  	bool is_kprobe, is_tracepoint;
  	struct bpf_prog *prog;
  
+ 	if (event->attr.type == PERF_TYPE_HARDWARE ||
+ 	    event->attr.type == PERF_TYPE_SOFTWARE)
+ 		return perf_event_set_bpf_handler(event, prog_fd);
+ 
  	if (event->attr.type != PERF_TYPE_TRACEPOINT)
  		return -EINVAL;
  
@@@ -7709,6 -7754,8 +7781,8 @@@ static void perf_event_free_bpf_prog(st
  {
  	struct bpf_prog *prog;
  
+ 	perf_event_free_bpf_handler(event);
+ 
  	if (!event->tp_event)
  		return;
  
@@@ -7886,7 -7933,7 +7960,7 @@@ static void perf_event_addr_filters_app
  	mmput(mm);
  
  restart:
 -	perf_event_restart(event);
 +	perf_event_stop(event, 1);
  }
  
  /*
@@@ -9025,6 -9072,19 +9099,19 @@@ perf_event_alloc(struct perf_event_att
  	if (!overflow_handler && parent_event) {
  		overflow_handler = parent_event->overflow_handler;
  		context = parent_event->overflow_handler_context;
+ #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING)
+ 		if (overflow_handler == bpf_overflow_handler) {
+ 			struct bpf_prog *prog = bpf_prog_inc(parent_event->prog);
+ 
+ 			if (IS_ERR(prog)) {
+ 				err = PTR_ERR(prog);
+ 				goto err_ns;
+ 			}
+ 			event->prog = prog;
+ 			event->orig_overflow_handler =
+ 				parent_event->orig_overflow_handler;
+ 		}
+ #endif
  	}
  
  	if (overflow_handler) {
diff --combined net/batman-adv/routing.c
index 3d19947,610f2c4..7e8dc64
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@@ -74,11 -74,23 +74,23 @@@ static void _batadv_update_route(struc
  	if (!orig_ifinfo)
  		return;
  
- 	rcu_read_lock();
- 	curr_router = rcu_dereference(orig_ifinfo->router);
- 	if (curr_router && !kref_get_unless_zero(&curr_router->refcount))
- 		curr_router = NULL;
- 	rcu_read_unlock();
+ 	spin_lock_bh(&orig_node->neigh_list_lock);
+ 	/* curr_router used earlier may not be the current orig_ifinfo->router
+ 	 * anymore because it was dereferenced outside of the neigh_list_lock
+ 	 * protected region. After the new best neighbor has replace the current
+ 	 * best neighbor the reference counter needs to decrease. Consequently,
+ 	 * the code needs to ensure the curr_router variable contains a pointer
+ 	 * to the replaced best neighbor.
+ 	 */
+ 	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+ 
+ 	/* increase refcount of new best neighbor */
+ 	if (neigh_node)
+ 		kref_get(&neigh_node->refcount);
+ 
+ 	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+ 	spin_unlock_bh(&orig_node->neigh_list_lock);
+ 	batadv_orig_ifinfo_put(orig_ifinfo);
  
  	/* route deleted */
  	if ((curr_router) && (!neigh_node)) {
@@@ -100,27 -112,6 +112,6 @@@
  			   curr_router->addr);
  	}
  
- 	if (curr_router)
- 		batadv_neigh_node_put(curr_router);
- 
- 	spin_lock_bh(&orig_node->neigh_list_lock);
- 	/* curr_router used earlier may not be the current orig_ifinfo->router
- 	 * anymore because it was dereferenced outside of the neigh_list_lock
- 	 * protected region. After the new best neighbor has replace the current
- 	 * best neighbor the reference counter needs to decrease. Consequently,
- 	 * the code needs to ensure the curr_router variable contains a pointer
- 	 * to the replaced best neighbor.
- 	 */
- 	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
- 
- 	/* increase refcount of new best neighbor */
- 	if (neigh_node)
- 		kref_get(&neigh_node->refcount);
- 
- 	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
- 	spin_unlock_bh(&orig_node->neigh_list_lock);
- 	batadv_orig_ifinfo_put(orig_ifinfo);
- 
  	/* decrease refcount of previous best neighbor */
  	if (curr_router)
  		batadv_neigh_node_put(curr_router);
@@@ -470,29 -461,6 +461,29 @@@ static int batadv_check_unicast_packet(
  }
  
  /**
 + * batadv_last_bonding_get - Get last_bonding_candidate of orig_node
 + * @orig_node: originator node whose last bonding candidate should be retrieved
 + *
 + * Return: last bonding candidate of router or NULL if not found
 + *
 + * The object is returned with refcounter increased by 1.
 + */
 +static struct batadv_orig_ifinfo *
 +batadv_last_bonding_get(struct batadv_orig_node *orig_node)
 +{
 +	struct batadv_orig_ifinfo *last_bonding_candidate;
 +
 +	spin_lock_bh(&orig_node->neigh_list_lock);
 +	last_bonding_candidate = orig_node->last_bonding_candidate;
 +
 +	if (last_bonding_candidate)
 +		kref_get(&last_bonding_candidate->refcount);
 +	spin_unlock_bh(&orig_node->neigh_list_lock);
 +
 +	return last_bonding_candidate;
 +}
 +
 +/**
   * batadv_last_bonding_replace - Replace last_bonding_candidate of orig_node
   * @orig_node: originator node whose bonding candidates should be replaced
   * @new_candidate: new bonding candidate or NULL
@@@ -562,7 -530,7 +553,7 @@@ batadv_find_router(struct batadv_priv *
  	 * router - obviously there are no other candidates.
  	 */
  	rcu_read_lock();
 -	last_candidate = orig_node->last_bonding_candidate;
 +	last_candidate = batadv_last_bonding_get(orig_node);
  	if (last_candidate)
  		last_cand_router = rcu_dereference(last_candidate->router);
  
@@@ -654,9 -622,6 +645,9 @@@ next
  		batadv_orig_ifinfo_put(next_candidate);
  	}
  
 +	if (last_candidate)
 +		batadv_orig_ifinfo_put(last_candidate);
 +
  	return router;
  }
  
diff --combined net/core/sock.c
index fd7b41e,51a7304..038e660
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@@ -1315,24 -1315,6 +1315,6 @@@ static void sock_copy(struct sock *nsk
  #endif
  }
  
- void sk_prot_clear_portaddr_nulls(struct sock *sk, int size)
- {
- 	unsigned long nulls1, nulls2;
- 
- 	nulls1 = offsetof(struct sock, __sk_common.skc_node.next);
- 	nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next);
- 	if (nulls1 > nulls2)
- 		swap(nulls1, nulls2);
- 
- 	if (nulls1 != 0)
- 		memset((char *)sk, 0, nulls1);
- 	memset((char *)sk + nulls1 + sizeof(void *), 0,
- 	       nulls2 - nulls1 - sizeof(void *));
- 	memset((char *)sk + nulls2 + sizeof(void *), 0,
- 	       size - nulls2 - sizeof(void *));
- }
- EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls);
- 
  static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
  		int family)
  {
@@@ -1344,12 -1326,8 +1326,8 @@@
  		sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
  		if (!sk)
  			return sk;
- 		if (priority & __GFP_ZERO) {
- 			if (prot->clear_sk)
- 				prot->clear_sk(sk, prot->obj_size);
- 			else
- 				sk_prot_clear_nulls(sk, prot->obj_size);
- 		}
+ 		if (priority & __GFP_ZERO)
+ 			sk_prot_clear_nulls(sk, prot->obj_size);
  	} else
  		sk = kmalloc(prot->obj_size, priority);
  
@@@ -1362,6 -1340,7 +1340,6 @@@
  		if (!try_module_get(prot->owner))
  			goto out_free_sec;
  		sk_tx_queue_clear(sk);
 -		cgroup_sk_alloc(&sk->sk_cgrp_data);
  	}
  
  	return sk;
@@@ -1421,7 -1400,6 +1399,7 @@@ struct sock *sk_alloc(struct net *net, 
  		sock_net_set(sk, net);
  		atomic_set(&sk->sk_wmem_alloc, 1);
  
 +		cgroup_sk_alloc(&sk->sk_cgrp_data);
  		sock_update_classid(&sk->sk_cgrp_data);
  		sock_update_netprioidx(&sk->sk_cgrp_data);
  	}
@@@ -1566,9 -1544,6 +1544,9 @@@ struct sock *sk_clone_lock(const struc
  		newsk->sk_priority = 0;
  		newsk->sk_incoming_cpu = raw_smp_processor_id();
  		atomic64_set(&newsk->sk_cookie, 0);
 +
 +		cgroup_sk_alloc(&newsk->sk_cgrp_data);
 +
  		/*
  		 * Before updating sk_refcnt, we must commit prior changes to memory
  		 * (Documentation/RCU/rculist_nulls.txt for details)
diff --combined net/ipv4/route.c
index b5b47a2,b52496f..654a9af
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@@ -476,18 -476,12 +476,18 @@@ u32 ip_idents_reserve(u32 hash, int seg
  	atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ;
  	u32 old = ACCESS_ONCE(*p_tstamp);
  	u32 now = (u32)jiffies;
 -	u32 delta = 0;
 +	u32 new, delta = 0;
  
  	if (old != now && cmpxchg(p_tstamp, old, now) == old)
  		delta = prandom_u32_max(now - old);
  
 -	return atomic_add_return(segs + delta, p_id) - segs;
 +	/* Do not use atomic_add_return() as it makes UBSAN unhappy */
 +	do {
 +		old = (u32)atomic_read(p_id);
 +		new = old + delta + segs;
 +	} while (atomic_cmpxchg(p_id, old, new) != old);
 +
 +	return new - segs;
  }
  EXPORT_SYMBOL(ip_idents_reserve);
  
@@@ -1252,7 -1246,9 +1252,9 @@@ static unsigned int ipv4_mtu(const stru
  			mtu = 576;
  	}
  
- 	return min_t(unsigned int, mtu, IP_MAX_MTU);
+ 	mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
+ 
+ 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
  }
  
  static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
@@@ -1835,7 -1831,7 +1837,7 @@@ static int ip_route_input_slow(struct s
  	 *	Now we are ready to route packet.
  	 */
  	fl4.flowi4_oif = 0;
- 	fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev);
+ 	fl4.flowi4_iif = dev->ifindex;
  	fl4.flowi4_mark = skb->mark;
  	fl4.flowi4_tos = tos;
  	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
@@@ -2022,7 -2018,9 +2024,9 @@@ static struct rtable *__mkroute_output(
  		return ERR_PTR(-EINVAL);
  
  	if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
- 		if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
+ 		if (ipv4_is_loopback(fl4->saddr) &&
+ 		    !(dev_out->flags & IFF_LOOPBACK) &&
+ 		    !netif_is_l3_master(dev_out))
  			return ERR_PTR(-EINVAL);
  
  	if (ipv4_is_lbcast(fl4->daddr))
@@@ -2152,7 -2150,6 +2156,6 @@@ struct rtable *__ip_route_output_key_ha
  	unsigned int flags = 0;
  	struct fib_result res;
  	struct rtable *rth;
- 	int master_idx;
  	int orig_oif;
  	int err = -ENETUNREACH;
  
@@@ -2162,9 -2159,6 +2165,6 @@@
  
  	orig_oif = fl4->flowi4_oif;
  
- 	master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif);
- 	if (master_idx)
- 		fl4->flowi4_oif = master_idx;
  	fl4->flowi4_iif = LOOPBACK_IFINDEX;
  	fl4->flowi4_tos = tos & IPTOS_RT_MASK;
  	fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
@@@ -2248,10 -2242,6 +2248,6 @@@
  				fl4->saddr = inet_select_addr(dev_out, 0,
  							      RT_SCOPE_HOST);
  		}
- 
- 		rth = l3mdev_get_rtable(dev_out, fl4);
- 		if (rth)
- 			goto out;
  	}
  
  	if (!fl4->daddr) {
@@@ -2269,8 -2259,7 +2265,7 @@@
  	if (err) {
  		res.fi = NULL;
  		res.table = NULL;
- 		if (fl4->flowi4_oif &&
- 		    !netif_index_is_l3_master(net, fl4->flowi4_oif)) {
+ 		if (fl4->flowi4_oif) {
  			/* Apparently, routing tables are wrong. Assume,
  			   that the destination is on link.
  
@@@ -2306,7 -2295,9 +2301,9 @@@
  			else
  				fl4->saddr = fl4->daddr;
  		}
- 		dev_out = net->loopback_dev;
+ 
+ 		/* L3 master device is the loopback for that domain */
+ 		dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev;
  		fl4->flowi4_oif = dev_out->ifindex;
  		flags |= RTCF_LOCAL;
  		goto make_route;
@@@ -2581,9 -2572,6 +2578,6 @@@ static int inet_rtm_getroute(struct sk_
  	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
  	fl4.flowi4_mark = mark;
  
- 	if (netif_index_is_l3_master(net, fl4.flowi4_oif))
- 		fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF;
- 
  	if (iif) {
  		struct net_device *dev;
  
diff --combined net/ipv4/tcp_input.c
index 08323bd,980a83e..4062ed2
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -289,6 -289,7 +289,7 @@@ static bool tcp_ecn_rcv_ecn_echo(const 
  static void tcp_sndbuf_expand(struct sock *sk)
  {
  	const struct tcp_sock *tp = tcp_sk(sk);
+ 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
  	int sndmem, per_mss;
  	u32 nr_segs;
  
@@@ -309,7 -310,8 +310,8 @@@
  	 * Cubic needs 1.7 factor, rounded to 2 to include
  	 * extra cushion (application might react slowly to POLLOUT)
  	 */
- 	sndmem = 2 * nr_segs * per_mss;
+ 	sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2;
+ 	sndmem *= nr_segs * per_mss;
  
  	if (sk->sk_sndbuf < sndmem)
  		sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
@@@ -899,12 -901,29 +901,29 @@@ static void tcp_verify_retransmit_hint(
  		tp->retransmit_high = TCP_SKB_CB(skb)->end_seq;
  }
  
+ /* Sum the number of packets on the wire we have marked as lost.
+  * There are two cases we care about here:
+  * a) Packet hasn't been marked lost (nor retransmitted),
+  *    and this is the first loss.
+  * b) Packet has been marked both lost and retransmitted,
+  *    and this means we think it was lost again.
+  */
+ static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb)
+ {
+ 	__u8 sacked = TCP_SKB_CB(skb)->sacked;
+ 
+ 	if (!(sacked & TCPCB_LOST) ||
+ 	    ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS)))
+ 		tp->lost += tcp_skb_pcount(skb);
+ }
+ 
  static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb)
  {
  	if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
  		tcp_verify_retransmit_hint(tp, skb);
  
  		tp->lost_out += tcp_skb_pcount(skb);
+ 		tcp_sum_lost(tp, skb);
  		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
  	}
  }
@@@ -913,6 -932,7 +932,7 @@@ void tcp_skb_mark_lost_uncond_verify(st
  {
  	tcp_verify_retransmit_hint(tp, skb);
  
+ 	tcp_sum_lost(tp, skb);
  	if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
  		tp->lost_out += tcp_skb_pcount(skb);
  		TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
@@@ -1094,6 -1114,7 +1114,7 @@@ struct tcp_sacktag_state 
  	 */
  	struct skb_mstamp first_sackt;
  	struct skb_mstamp last_sackt;
+ 	struct rate_sample *rate;
  	int	flag;
  };
  
@@@ -1261,6 -1282,7 +1282,7 @@@ static bool tcp_shifted_skb(struct soc
  	tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked,
  			start_seq, end_seq, dup_sack, pcount,
  			&skb->skb_mstamp);
+ 	tcp_rate_skb_delivered(sk, skb, state->rate);
  
  	if (skb == tp->lost_skb_hint)
  		tp->lost_cnt_hint += pcount;
@@@ -1311,6 -1333,9 +1333,9 @@@
  		tcp_advance_highest_sack(sk, skb);
  
  	tcp_skb_collapse_tstamp(prev, skb);
+ 	if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64))
+ 		TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0;
+ 
  	tcp_unlink_write_queue(skb, sk);
  	sk_wmem_free_skb(sk, skb);
  
@@@ -1540,6 -1565,7 +1565,7 @@@ static struct sk_buff *tcp_sacktag_walk
  						dup_sack,
  						tcp_skb_pcount(skb),
  						&skb->skb_mstamp);
+ 			tcp_rate_skb_delivered(sk, skb, state->rate);
  
  			if (!before(TCP_SKB_CB(skb)->seq,
  				    tcp_highest_sack_seq(tp)))
@@@ -1622,8 -1648,10 +1648,10 @@@ tcp_sacktag_write_queue(struct sock *sk
  
  	found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire,
  					 num_sacks, prior_snd_una);
- 	if (found_dup_sack)
+ 	if (found_dup_sack) {
  		state->flag |= FLAG_DSACKING_ACK;
+ 		tp->delivered++; /* A spurious retransmission is delivered */
+ 	}
  
  	/* Eliminate too old ACKs, but take into
  	 * account more or less fresh ones, they can
@@@ -1890,6 -1918,7 +1918,7 @@@ void tcp_enter_loss(struct sock *sk
  	struct sk_buff *skb;
  	bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
  	bool is_reneg;			/* is receiver reneging on SACKs? */
+ 	bool mark_lost;
  
  	/* Reduce ssthresh if it has not yet been made inside this window. */
  	if (icsk->icsk_ca_state <= TCP_CA_Disorder ||
@@@ -1923,8 -1952,12 +1952,12 @@@
  		if (skb == tcp_send_head(sk))
  			break;
  
+ 		mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
+ 			     is_reneg);
+ 		if (mark_lost)
+ 			tcp_sum_lost(tp, skb);
  		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
- 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
+ 		if (mark_lost) {
  			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
  			TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
  			tp->lost_out += tcp_skb_pcount(skb);
@@@ -2503,6 -2536,9 +2536,9 @@@ static inline void tcp_end_cwnd_reducti
  {
  	struct tcp_sock *tp = tcp_sk(sk);
  
+ 	if (inet_csk(sk)->icsk_ca_ops->cong_control)
+ 		return;
+ 
  	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
  	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
  	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
@@@ -2879,67 -2915,13 +2915,13 @@@ static void tcp_fastretrans_alert(struc
  	*rexmit = REXMIT_LOST;
  }
  
- /* Kathleen Nichols' algorithm for tracking the minimum value of
-  * a data stream over some fixed time interval. (E.g., the minimum
-  * RTT over the past five minutes.) It uses constant space and constant
-  * time per update yet almost always delivers the same minimum as an
-  * implementation that has to keep all the data in the window.
-  *
-  * The algorithm keeps track of the best, 2nd best & 3rd best min
-  * values, maintaining an invariant that the measurement time of the
-  * n'th best >= n-1'th best. It also makes sure that the three values
-  * are widely separated in the time window since that bounds the worse
-  * case error when that data is monotonically increasing over the window.
-  *
-  * Upon getting a new min, we can forget everything earlier because it
-  * has no value - the new min is <= everything else in the window by
-  * definition and it's the most recent. So we restart fresh on every new min
-  * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd
-  * best.
-  */
  static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us)
  {
- 	const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
- 	struct rtt_meas *m = tcp_sk(sk)->rtt_min;
- 	struct rtt_meas rttm = {
- 		.rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
- 		.ts = now,
- 	};
- 	u32 elapsed;
- 
- 	/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
- 	if (unlikely(rttm.rtt <= m[0].rtt))
- 		m[0] = m[1] = m[2] = rttm;
- 	else if (rttm.rtt <= m[1].rtt)
- 		m[1] = m[2] = rttm;
- 	else if (rttm.rtt <= m[2].rtt)
- 		m[2] = rttm;
- 
- 	elapsed = now - m[0].ts;
- 	if (unlikely(elapsed > wlen)) {
- 		/* Passed entire window without a new min so make 2nd choice
- 		 * the new min & 3rd choice the new 2nd. So forth and so on.
- 		 */
- 		m[0] = m[1];
- 		m[1] = m[2];
- 		m[2] = rttm;
- 		if (now - m[0].ts > wlen) {
- 			m[0] = m[1];
- 			m[1] = rttm;
- 			if (now - m[0].ts > wlen)
- 				m[0] = rttm;
- 		}
- 	} else if (m[1].ts == m[0].ts && elapsed > wlen / 4) {
- 		/* Passed a quarter of the window without a new min so
- 		 * take 2nd choice from the 2nd quarter of the window.
- 		 */
- 		m[2] = m[1] = rttm;
- 	} else if (m[2].ts == m[1].ts && elapsed > wlen / 2) {
- 		/* Passed half the window without a new min so take the 3rd
- 		 * choice from the last half of the window.
- 		 */
- 		m[2] = rttm;
- 	}
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 	u32 wlen = sysctl_tcp_min_rtt_wlen * HZ;
+ 
+ 	minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp,
+ 			   rtt_us ? : jiffies_to_usecs(1));
  }
  
  static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag,
@@@ -3102,10 -3084,11 +3084,11 @@@ static void tcp_ack_tstamp(struct sock 
   */
  static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
  			       u32 prior_snd_una, int *acked,
- 			       struct tcp_sacktag_state *sack)
+ 			       struct tcp_sacktag_state *sack,
+ 			       struct skb_mstamp *now)
  {
  	const struct inet_connection_sock *icsk = inet_csk(sk);
- 	struct skb_mstamp first_ackt, last_ackt, now;
+ 	struct skb_mstamp first_ackt, last_ackt;
  	struct tcp_sock *tp = tcp_sk(sk);
  	u32 prior_sacked = tp->sacked_out;
  	u32 reord = tp->packets_out;
@@@ -3137,7 -3120,6 +3120,6 @@@
  			acked_pcount = tcp_tso_acked(sk, skb);
  			if (!acked_pcount)
  				break;
- 
  			fully_acked = false;
  		} else {
  			/* Speedup tcp_unlink_write_queue() and next loop */
@@@ -3173,6 -3155,7 +3155,7 @@@
  
  		tp->packets_out -= acked_pcount;
  		pkts_acked += acked_pcount;
+ 		tcp_rate_skb_delivered(sk, skb, sack->rate);
  
  		/* Initial outgoing SYN's get put onto the write_queue
  		 * just like anything else we transmit.  It is not
@@@ -3205,16 -3188,15 +3188,15 @@@
  	if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
  		flag |= FLAG_SACK_RENEGING;
  
- 	skb_mstamp_get(&now);
  	if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) {
- 		seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt);
- 		ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt);
+ 		seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt);
+ 		ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt);
  	}
  	if (sack->first_sackt.v64) {
- 		sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt);
- 		ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt);
+ 		sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt);
+ 		ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt);
  	}
- 
+ 	sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */
  	rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us,
  					ca_rtt_us);
  
@@@ -3242,7 -3224,7 +3224,7 @@@
  		tp->fackets_out -= min(pkts_acked, tp->fackets_out);
  
  	} else if (skb && rtt_update && sack_rtt_us >= 0 &&
- 		   sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) {
+ 		   sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) {
  		/* Do not re-arm RTO if the sack RTT is measured from data sent
  		 * after when the head was last (re)transmitted. Otherwise the
  		 * timeout may continue to extend in loss recovery.
@@@ -3333,8 -3315,15 +3315,15 @@@ static inline bool tcp_may_raise_cwnd(c
   * information. All transmission or retransmission are delayed afterwards.
   */
  static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
- 			     int flag)
+ 			     int flag, const struct rate_sample *rs)
  {
+ 	const struct inet_connection_sock *icsk = inet_csk(sk);
+ 
+ 	if (icsk->icsk_ca_ops->cong_control) {
+ 		icsk->icsk_ca_ops->cong_control(sk, rs);
+ 		return;
+ 	}
+ 
  	if (tcp_in_cwnd_reduction(sk)) {
  		/* Reduce cwnd if state mandates */
  		tcp_cwnd_reduction(sk, acked_sacked, flag);
@@@ -3579,17 -3568,21 +3568,21 @@@ static int tcp_ack(struct sock *sk, con
  	struct inet_connection_sock *icsk = inet_csk(sk);
  	struct tcp_sock *tp = tcp_sk(sk);
  	struct tcp_sacktag_state sack_state;
+ 	struct rate_sample rs = { .prior_delivered = 0 };
  	u32 prior_snd_una = tp->snd_una;
  	u32 ack_seq = TCP_SKB_CB(skb)->seq;
  	u32 ack = TCP_SKB_CB(skb)->ack_seq;
  	bool is_dupack = false;
  	u32 prior_fackets;
  	int prior_packets = tp->packets_out;
- 	u32 prior_delivered = tp->delivered;
+ 	u32 delivered = tp->delivered;
+ 	u32 lost = tp->lost;
  	int acked = 0; /* Number of packets newly acked */
  	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ 	struct skb_mstamp now;
  
  	sack_state.first_sackt.v64 = 0;
+ 	sack_state.rate = &rs;
  
  	/* We very likely will need to access write queue head. */
  	prefetchw(sk->sk_write_queue.next);
@@@ -3612,6 -3605,8 +3605,8 @@@
  	if (after(ack, tp->snd_nxt))
  		goto invalid_ack;
  
+ 	skb_mstamp_get(&now);
+ 
  	if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
  	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
  		tcp_rearm_rto(sk);
@@@ -3622,6 -3617,7 +3617,7 @@@
  	}
  
  	prior_fackets = tp->fackets_out;
+ 	rs.prior_in_flight = tcp_packets_in_flight(tp);
  
  	/* ts_recent update must be made after we are sure that the packet
  	 * is in window.
@@@ -3677,7 -3673,7 +3673,7 @@@
  
  	/* See if we can take anything off of the retransmit queue. */
  	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
- 				    &sack_state);
+ 				    &sack_state, &now);
  
  	if (tcp_ack_is_dubious(sk, flag)) {
  		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
@@@ -3694,7 -3690,10 +3690,10 @@@
  
  	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
  		tcp_schedule_loss_probe(sk);
- 	tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+ 	delivered = tp->delivered - delivered;	/* freshly ACKed or SACKed */
+ 	lost = tp->lost - lost;			/* freshly marked lost */
+ 	tcp_rate_gen(sk, delivered, lost, &now, &rs);
+ 	tcp_cong_control(sk, ack, delivered, flag, &rs);
  	tcp_xmit_recovery(sk, rexmit);
  	return 1;
  
@@@ -4108,7 -4107,7 +4107,7 @@@ void tcp_fin(struct sock *sk
  	/* It _is_ possible, that we have something out-of-order _after_ FIN.
  	 * Probably, we should reset in this case. For now drop them.
  	 */
- 	__skb_queue_purge(&tp->out_of_order_queue);
+ 	skb_rbtree_purge(&tp->out_of_order_queue);
  	if (tcp_is_sack(tp))
  		tcp_sack_reset(&tp->rx_opt);
  	sk_mem_reclaim(sk);
@@@ -4268,7 -4267,7 +4267,7 @@@ static void tcp_sack_remove(struct tcp_
  	int this_sack;
  
  	/* Empty ofo queue, hence, all the SACKs are eaten. Clear. */
- 	if (skb_queue_empty(&tp->out_of_order_queue)) {
+ 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
  		tp->rx_opt.num_sacks = 0;
  		return;
  	}
@@@ -4344,10 -4343,13 +4343,13 @@@ static void tcp_ofo_queue(struct sock *
  {
  	struct tcp_sock *tp = tcp_sk(sk);
  	__u32 dsack_high = tp->rcv_nxt;
+ 	bool fin, fragstolen, eaten;
  	struct sk_buff *skb, *tail;
- 	bool fragstolen, eaten;
+ 	struct rb_node *p;
  
- 	while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) {
+ 	p = rb_first(&tp->out_of_order_queue);
+ 	while (p) {
+ 		skb = rb_entry(p, struct sk_buff, rbnode);
  		if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt))
  			break;
  
@@@ -4357,9 -4359,10 +4359,10 @@@
  				dsack_high = TCP_SKB_CB(skb)->end_seq;
  			tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack);
  		}
+ 		p = rb_next(p);
+ 		rb_erase(&skb->rbnode, &tp->out_of_order_queue);
  
- 		__skb_unlink(skb, &tp->out_of_order_queue);
- 		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
+ 		if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) {
  			SOCK_DEBUG(sk, "ofo packet was already received\n");
  			tcp_drop(sk, skb);
  			continue;
@@@ -4371,12 -4374,19 +4374,19 @@@
  		tail = skb_peek_tail(&sk->sk_receive_queue);
  		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
  		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
+ 		fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
  		if (!eaten)
  			__skb_queue_tail(&sk->sk_receive_queue, skb);
- 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
- 			tcp_fin(sk);
- 		if (eaten)
+ 		else
  			kfree_skb_partial(skb, fragstolen);
+ 
+ 		if (unlikely(fin)) {
+ 			tcp_fin(sk);
+ 			/* tcp_fin() purges tp->out_of_order_queue,
+ 			 * so we must end this loop right now.
+ 			 */
+ 			break;
+ 		}
  	}
  }
  
@@@ -4392,12 -4402,9 +4402,9 @@@ static int tcp_try_rmem_schedule(struc
  		if (tcp_prune_queue(sk) < 0)
  			return -1;
  
- 		if (!sk_rmem_schedule(sk, skb, size)) {
+ 		while (!sk_rmem_schedule(sk, skb, size)) {
  			if (!tcp_prune_ofo_queue(sk))
  				return -1;
- 
- 			if (!sk_rmem_schedule(sk, skb, size))
- 				return -1;
  		}
  	}
  	return 0;
@@@ -4406,8 -4413,10 +4413,10 @@@
  static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
  {
  	struct tcp_sock *tp = tcp_sk(sk);
+ 	struct rb_node **p, *q, *parent;
  	struct sk_buff *skb1;
  	u32 seq, end_seq;
+ 	bool fragstolen;
  
  	tcp_ecn_check_ce(tp, skb);
  
@@@ -4422,88 -4431,92 +4431,92 @@@
  	inet_csk_schedule_ack(sk);
  
  	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
+ 	seq = TCP_SKB_CB(skb)->seq;
+ 	end_seq = TCP_SKB_CB(skb)->end_seq;
  	SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
- 		   tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
+ 		   tp->rcv_nxt, seq, end_seq);
  
- 	skb1 = skb_peek_tail(&tp->out_of_order_queue);
- 	if (!skb1) {
+ 	p = &tp->out_of_order_queue.rb_node;
+ 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
  		/* Initial out of order segment, build 1 SACK. */
  		if (tcp_is_sack(tp)) {
  			tp->rx_opt.num_sacks = 1;
- 			tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq;
- 			tp->selective_acks[0].end_seq =
- 						TCP_SKB_CB(skb)->end_seq;
+ 			tp->selective_acks[0].start_seq = seq;
+ 			tp->selective_acks[0].end_seq = end_seq;
  		}
- 		__skb_queue_head(&tp->out_of_order_queue, skb);
+ 		rb_link_node(&skb->rbnode, NULL, p);
+ 		rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
+ 		tp->ooo_last_skb = skb;
  		goto end;
  	}
  
- 	seq = TCP_SKB_CB(skb)->seq;
- 	end_seq = TCP_SKB_CB(skb)->end_seq;
- 
- 	if (seq == TCP_SKB_CB(skb1)->end_seq) {
- 		bool fragstolen;
- 
- 		if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
- 			__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
- 		} else {
- 			tcp_grow_window(sk, skb);
- 			kfree_skb_partial(skb, fragstolen);
- 			skb = NULL;
- 		}
- 
- 		if (!tp->rx_opt.num_sacks ||
- 		    tp->selective_acks[0].end_seq != seq)
- 			goto add_sack;
- 
- 		/* Common case: data arrive in order after hole. */
- 		tp->selective_acks[0].end_seq = end_seq;
- 		goto end;
- 	}
- 
- 	/* Find place to insert this segment. */
- 	while (1) {
- 		if (!after(TCP_SKB_CB(skb1)->seq, seq))
- 			break;
- 		if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) {
- 			skb1 = NULL;
- 			break;
- 		}
- 		skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1);
- 	}
- 
- 	/* Do skb overlap to previous one? */
- 	if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) {
- 		if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- 			/* All the bits are present. Drop. */
- 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
- 			tcp_drop(sk, skb);
- 			skb = NULL;
- 			tcp_dsack_set(sk, seq, end_seq);
- 			goto add_sack;
+ 	/* In the typical case, we are adding an skb to the end of the list.
+ 	 * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
+ 	 */
+ 	if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+ coalesce_done:
+ 		tcp_grow_window(sk, skb);
+ 		kfree_skb_partial(skb, fragstolen);
+ 		skb = NULL;
+ 		goto add_sack;
+ 	}
+ 	/* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */
+ 	if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) {
+ 		parent = &tp->ooo_last_skb->rbnode;
+ 		p = &parent->rb_right;
+ 		goto insert;
+ 	}
+ 
+ 	/* Find place to insert this segment. Handle overlaps on the way. */
+ 	parent = NULL;
+ 	while (*p) {
+ 		parent = *p;
+ 		skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ 		if (before(seq, TCP_SKB_CB(skb1)->seq)) {
+ 			p = &parent->rb_left;
+ 			continue;
  		}
- 		if (after(seq, TCP_SKB_CB(skb1)->seq)) {
- 			/* Partial overlap. */
- 			tcp_dsack_set(sk, seq,
- 				      TCP_SKB_CB(skb1)->end_seq);
- 		} else {
- 			if (skb_queue_is_first(&tp->out_of_order_queue,
- 					       skb1))
- 				skb1 = NULL;
- 			else
- 				skb1 = skb_queue_prev(
- 					&tp->out_of_order_queue,
- 					skb1);
+ 		if (before(seq, TCP_SKB_CB(skb1)->end_seq)) {
+ 			if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+ 				/* All the bits are present. Drop. */
+ 				NET_INC_STATS(sock_net(sk),
+ 					      LINUX_MIB_TCPOFOMERGE);
+ 				__kfree_skb(skb);
+ 				skb = NULL;
+ 				tcp_dsack_set(sk, seq, end_seq);
+ 				goto add_sack;
+ 			}
+ 			if (after(seq, TCP_SKB_CB(skb1)->seq)) {
+ 				/* Partial overlap. */
+ 				tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq);
+ 			} else {
+ 				/* skb's seq == skb1's seq and skb covers skb1.
+ 				 * Replace skb1 with skb.
+ 				 */
+ 				rb_replace_node(&skb1->rbnode, &skb->rbnode,
+ 						&tp->out_of_order_queue);
+ 				tcp_dsack_extend(sk,
+ 						 TCP_SKB_CB(skb1)->seq,
+ 						 TCP_SKB_CB(skb1)->end_seq);
+ 				NET_INC_STATS(sock_net(sk),
+ 					      LINUX_MIB_TCPOFOMERGE);
+ 				__kfree_skb(skb1);
+ 				goto merge_right;
+ 			}
+ 		} else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+ 			goto coalesce_done;
  		}
+ 		p = &parent->rb_right;
  	}
- 	if (!skb1)
- 		__skb_queue_head(&tp->out_of_order_queue, skb);
- 	else
- 		__skb_queue_after(&tp->out_of_order_queue, skb1, skb);
+ insert:
+ 	/* Insert segment into RB tree. */
+ 	rb_link_node(&skb->rbnode, parent, p);
+ 	rb_insert_color(&skb->rbnode, &tp->out_of_order_queue);
  
- 	/* And clean segments covered by new one as whole. */
- 	while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) {
- 		skb1 = skb_queue_next(&tp->out_of_order_queue, skb);
+ merge_right:
+ 	/* Remove other segments covered by skb. */
+ 	while ((q = rb_next(&skb->rbnode)) != NULL) {
+ 		skb1 = rb_entry(q, struct sk_buff, rbnode);
  
  		if (!after(end_seq, TCP_SKB_CB(skb1)->seq))
  			break;
@@@ -4512,12 -4525,15 +4525,15 @@@
  					 end_seq);
  			break;
  		}
- 		__skb_unlink(skb1, &tp->out_of_order_queue);
+ 		rb_erase(&skb1->rbnode, &tp->out_of_order_queue);
  		tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq,
  				 TCP_SKB_CB(skb1)->end_seq);
  		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE);
  		tcp_drop(sk, skb1);
  	}
+ 	/* If there is no skb after us, we are the last_skb ! */
+ 	if (!q)
+ 		tp->ooo_last_skb = skb;
  
  add_sack:
  	if (tcp_is_sack(tp))
@@@ -4654,13 -4670,13 +4670,13 @@@ queue_and_out
  		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
  			tcp_fin(sk);
  
- 		if (!skb_queue_empty(&tp->out_of_order_queue)) {
+ 		if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) {
  			tcp_ofo_queue(sk);
  
  			/* RFC2581. 4.2. SHOULD send immediate ACK, when
  			 * gap in queue is filled.
  			 */
- 			if (skb_queue_empty(&tp->out_of_order_queue))
+ 			if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
  				inet_csk(sk)->icsk_ack.pingpong = 0;
  		}
  
@@@ -4714,48 -4730,76 +4730,76 @@@ drop
  	tcp_data_queue_ofo(sk, skb);
  }
  
+ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list)
+ {
+ 	if (list)
+ 		return !skb_queue_is_last(list, skb) ? skb->next : NULL;
+ 
+ 	return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode);
+ }
+ 
  static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb,
- 					struct sk_buff_head *list)
+ 					struct sk_buff_head *list,
+ 					struct rb_root *root)
  {
- 	struct sk_buff *next = NULL;
+ 	struct sk_buff *next = tcp_skb_next(skb, list);
  
- 	if (!skb_queue_is_last(list, skb))
- 		next = skb_queue_next(list, skb);
+ 	if (list)
+ 		__skb_unlink(skb, list);
+ 	else
+ 		rb_erase(&skb->rbnode, root);
  
- 	__skb_unlink(skb, list);
  	__kfree_skb(skb);
  	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED);
  
  	return next;
  }
  
+ /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */
+ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb)
+ {
+ 	struct rb_node **p = &root->rb_node;
+ 	struct rb_node *parent = NULL;
+ 	struct sk_buff *skb1;
+ 
+ 	while (*p) {
+ 		parent = *p;
+ 		skb1 = rb_entry(parent, struct sk_buff, rbnode);
+ 		if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq))
+ 			p = &parent->rb_left;
+ 		else
+ 			p = &parent->rb_right;
+ 	}
+ 	rb_link_node(&skb->rbnode, parent, p);
+ 	rb_insert_color(&skb->rbnode, root);
+ }
+ 
  /* Collapse contiguous sequence of skbs head..tail with
   * sequence numbers start..end.
   *
-  * If tail is NULL, this means until the end of the list.
+  * If tail is NULL, this means until the end of the queue.
   *
   * Segments with FIN/SYN are not collapsed (only because this
   * simplifies code)
   */
  static void
- tcp_collapse(struct sock *sk, struct sk_buff_head *list,
- 	     struct sk_buff *head, struct sk_buff *tail,
- 	     u32 start, u32 end)
+ tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root,
+ 	     struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end)
  {
- 	struct sk_buff *skb, *n;
+ 	struct sk_buff *skb = head, *n;
+ 	struct sk_buff_head tmp;
  	bool end_of_skbs;
  
  	/* First, check that queue is collapsible and find
- 	 * the point where collapsing can be useful. */
- 	skb = head;
+ 	 * the point where collapsing can be useful.
+ 	 */
  restart:
- 	end_of_skbs = true;
- 	skb_queue_walk_from_safe(list, skb, n) {
- 		if (skb == tail)
- 			break;
+ 	for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) {
+ 		n = tcp_skb_next(skb, list);
+ 
  		/* No new bits? It is possible on ofo queue. */
  		if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- 			skb = tcp_collapse_one(sk, skb, list);
+ 			skb = tcp_collapse_one(sk, skb, list, root);
  			if (!skb)
  				break;
  			goto restart;
@@@ -4773,13 -4817,10 +4817,10 @@@
  			break;
  		}
  
- 		if (!skb_queue_is_last(list, skb)) {
- 			struct sk_buff *next = skb_queue_next(list, skb);
- 			if (next != tail &&
- 			    TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) {
- 				end_of_skbs = false;
- 				break;
- 			}
+ 		if (n && n != tail &&
+ 		    TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
+ 			end_of_skbs = false;
+ 			break;
  		}
  
  		/* Decided to skip this, advance start seq. */
@@@ -4789,17 -4830,22 +4830,22 @@@
  	    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
  		return;
  
+ 	__skb_queue_head_init(&tmp);
+ 
  	while (before(start, end)) {
  		int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start);
  		struct sk_buff *nskb;
  
  		nskb = alloc_skb(copy, GFP_ATOMIC);
  		if (!nskb)
- 			return;
+ 			break;
  
  		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
  		TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
- 		__skb_queue_before(list, skb, nskb);
+ 		if (list)
+ 			__skb_queue_before(list, skb, nskb);
+ 		else
+ 			__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
  		skb_set_owner_r(nskb, sk);
  
  		/* Copy data, releasing collapsed skbs. */
@@@ -4817,14 -4863,17 +4863,17 @@@
  				start += size;
  			}
  			if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
- 				skb = tcp_collapse_one(sk, skb, list);
+ 				skb = tcp_collapse_one(sk, skb, list, root);
  				if (!skb ||
  				    skb == tail ||
  				    (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
- 					return;
+ 					goto end;
  			}
  		}
  	}
+ end:
+ 	skb_queue_walk_safe(&tmp, skb, n)
+ 		tcp_rbtree_insert(root, skb);
  }
  
  /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs
@@@ -4833,70 -4882,86 +4882,86 @@@
  static void tcp_collapse_ofo_queue(struct sock *sk)
  {
  	struct tcp_sock *tp = tcp_sk(sk);
- 	struct sk_buff *skb = skb_peek(&tp->out_of_order_queue);
- 	struct sk_buff *head;
+ 	struct sk_buff *skb, *head;
+ 	struct rb_node *p;
  	u32 start, end;
  
- 	if (!skb)
+ 	p = rb_first(&tp->out_of_order_queue);
+ 	skb = rb_entry_safe(p, struct sk_buff, rbnode);
+ new_range:
+ 	if (!skb) {
+ 		p = rb_last(&tp->out_of_order_queue);
+ 		/* Note: This is possible p is NULL here. We do not
+ 		 * use rb_entry_safe(), as ooo_last_skb is valid only
+ 		 * if rbtree is not empty.
+ 		 */
+ 		tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode);
  		return;
- 
+ 	}
  	start = TCP_SKB_CB(skb)->seq;
  	end = TCP_SKB_CB(skb)->end_seq;
- 	head = skb;
- 
- 	for (;;) {
- 		struct sk_buff *next = NULL;
  
- 		if (!skb_queue_is_last(&tp->out_of_order_queue, skb))
- 			next = skb_queue_next(&tp->out_of_order_queue, skb);
- 		skb = next;
+ 	for (head = skb;;) {
+ 		skb = tcp_skb_next(skb, NULL);
  
- 		/* Segment is terminated when we see gap or when
- 		 * we are at the end of all the queue. */
+ 		/* Range is terminated when we see a gap or when
+ 		 * we are at the queue end.
+ 		 */
  		if (!skb ||
  		    after(TCP_SKB_CB(skb)->seq, end) ||
  		    before(TCP_SKB_CB(skb)->end_seq, start)) {
- 			tcp_collapse(sk, &tp->out_of_order_queue,
+ 			tcp_collapse(sk, NULL, &tp->out_of_order_queue,
  				     head, skb, start, end);
- 			head = skb;
- 			if (!skb)
- 				break;
- 			/* Start new segment */
+ 			goto new_range;
+ 		}
+ 
+ 		if (unlikely(before(TCP_SKB_CB(skb)->seq, start)))
  			start = TCP_SKB_CB(skb)->seq;
+ 		if (after(TCP_SKB_CB(skb)->end_seq, end))
  			end = TCP_SKB_CB(skb)->end_seq;
- 		} else {
- 			if (before(TCP_SKB_CB(skb)->seq, start))
- 				start = TCP_SKB_CB(skb)->seq;
- 			if (after(TCP_SKB_CB(skb)->end_seq, end))
- 				end = TCP_SKB_CB(skb)->end_seq;
- 		}
  	}
  }
  
  /*
-  * Purge the out-of-order queue.
-  * Return true if queue was pruned.
+  * Clean the out-of-order queue to make room.
+  * We drop high sequences packets to :
+  * 1) Let a chance for holes to be filled.
+  * 2) not add too big latencies if thousands of packets sit there.
+  *    (But if application shrinks SO_RCVBUF, we could still end up
+  *     freeing whole queue here)
+  *
+  * Return true if queue has shrunk.
   */
  static bool tcp_prune_ofo_queue(struct sock *sk)
  {
  	struct tcp_sock *tp = tcp_sk(sk);
- 	bool res = false;
+ 	struct rb_node *node, *prev;
  
- 	if (!skb_queue_empty(&tp->out_of_order_queue)) {
- 		NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
- 		__skb_queue_purge(&tp->out_of_order_queue);
+ 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue))
+ 		return false;
  
- 		/* Reset SACK state.  A conforming SACK implementation will
- 		 * do the same at a timeout based retransmit.  When a connection
- 		 * is in a sad state like this, we care only about integrity
- 		 * of the connection not performance.
- 		 */
- 		if (tp->rx_opt.sack_ok)
- 			tcp_sack_reset(&tp->rx_opt);
+ 	NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED);
+ 	node = &tp->ooo_last_skb->rbnode;
+ 	do {
+ 		prev = rb_prev(node);
+ 		rb_erase(node, &tp->out_of_order_queue);
+ 		tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode));
  		sk_mem_reclaim(sk);
- 		res = true;
- 	}
- 	return res;
+ 		if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
+ 		    !tcp_under_memory_pressure(sk))
+ 			break;
+ 		node = prev;
+ 	} while (node);
+ 	tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode);
+ 
+ 	/* Reset SACK state.  A conforming SACK implementation will
+ 	 * do the same at a timeout based retransmit.  When a connection
+ 	 * is in a sad state like this, we care only about integrity
+ 	 * of the connection not performance.
+ 	 */
+ 	if (tp->rx_opt.sack_ok)
+ 		tcp_sack_reset(&tp->rx_opt);
+ 	return true;
  }
  
  /* Reduce allocated memory if we can, trying to get
@@@ -4921,7 -4986,7 +4986,7 @@@ static int tcp_prune_queue(struct sock 
  
  	tcp_collapse_ofo_queue(sk);
  	if (!skb_queue_empty(&sk->sk_receive_queue))
- 		tcp_collapse(sk, &sk->sk_receive_queue,
+ 		tcp_collapse(sk, &sk->sk_receive_queue, NULL,
  			     skb_peek(&sk->sk_receive_queue),
  			     NULL,
  			     tp->copied_seq, tp->rcv_nxt);
@@@ -5026,7 -5091,7 +5091,7 @@@ static void __tcp_ack_snd_check(struct 
  	    /* We ACK each frame or... */
  	    tcp_in_quickack_mode(sk) ||
  	    /* We have out of order data. */
- 	    (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
+ 	    (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) {
  		/* Then ack it now */
  		tcp_send_ack(sk);
  	} else {
@@@ -5885,7 -5950,7 +5950,7 @@@ int tcp_rcv_state_process(struct sock *
  		 * so release it.
  		 */
  		if (req) {
 -			tp->total_retrans = req->num_retrans;
 +			inet_csk(sk)->icsk_retransmits = 0;
  			reqsk_fastopen_remove(sk, req, false);
  		} else {
  			/* Make sure socket is routed, for correct metrics. */
@@@ -5927,7 -5992,8 +5992,8 @@@
  		} else
  			tcp_init_metrics(sk);
  
- 		tcp_update_pacing_rate(sk);
+ 		if (!inet_csk(sk)->icsk_ca_ops->cong_control)
+ 			tcp_update_pacing_rate(sk);
  
  		/* Prevent spurious tcp_cwnd_restart() on first data packet */
  		tp->lsndtime = tcp_time_stamp;
diff --combined net/ipv4/tcp_output.c
index 5288cec,478dfc5..7c777089
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -734,9 -734,16 +734,16 @@@ static void tcp_tsq_handler(struct soc
  {
  	if ((1 << sk->sk_state) &
  	    (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
- 	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK))
- 		tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ 	     TCPF_CLOSE_WAIT  | TCPF_LAST_ACK)) {
+ 		struct tcp_sock *tp = tcp_sk(sk);
+ 
+ 		if (tp->lost_out > tp->retrans_out &&
+ 		    tp->snd_cwnd > tcp_packets_in_flight(tp))
+ 			tcp_xmit_retransmit_queue(sk);
+ 
+ 		tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle,
  			       0, GFP_ATOMIC);
+ 	}
  }
  /*
   * One tasklet per cpu tries to send more skbs.
@@@ -918,6 -925,7 +925,7 @@@ static int tcp_transmit_skb(struct soc
  		skb_mstamp_get(&skb->skb_mstamp);
  		TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
  			- tp->snd_una;
+ 		tcp_rate_skb_sent(sk, skb);
  
  		if (unlikely(skb_cloned(skb)))
  			skb = pskb_copy(skb, gfp_mask);
@@@ -1213,6 -1221,9 +1221,9 @@@ int tcp_fragment(struct sock *sk, struc
  	tcp_set_skb_tso_segs(skb, mss_now);
  	tcp_set_skb_tso_segs(buff, mss_now);
  
+ 	/* Update delivered info for the new segment */
+ 	TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx;
+ 
  	/* If this packet has been sent out already, we must
  	 * adjust the various packet counters.
  	 */
@@@ -1358,6 -1369,7 +1369,7 @@@ int tcp_mss_to_mtu(struct sock *sk, in
  	}
  	return mtu;
  }
+ EXPORT_SYMBOL(tcp_mss_to_mtu);
  
  /* MTU probing init per socket */
  void tcp_mtup_init(struct sock *sk)
@@@ -1545,7 -1557,8 +1557,8 @@@ static bool tcp_nagle_check(bool partia
  /* Return how many segs we'd like on a TSO packet,
   * to send one TSO packet per ms
   */
- static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
+ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
+ 		     int min_tso_segs)
  {
  	u32 bytes, segs;
  
@@@ -1557,10 -1570,23 +1570,23 @@@
  	 * This preserves ACK clocking and is consistent
  	 * with tcp_tso_should_defer() heuristic.
  	 */
- 	segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
+ 	segs = max_t(u32, bytes / mss_now, min_tso_segs);
  
  	return min_t(u32, segs, sk->sk_gso_max_segs);
  }
+ EXPORT_SYMBOL(tcp_tso_autosize);
+ 
+ /* Return the number of segments we want in the skb we are transmitting.
+  * See if congestion control module wants to decide; otherwise, autosize.
+  */
+ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now)
+ {
+ 	const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops;
+ 	u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0;
+ 
+ 	return tso_segs ? :
+ 		tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs);
+ }
  
  /* Returns the portion of skb which can be sent right away */
  static unsigned int tcp_mss_split_point(const struct sock *sk,
@@@ -2020,6 -2046,39 +2046,39 @@@ static int tcp_mtu_probe(struct sock *s
  	return -1;
  }
  
+ /* TCP Small Queues :
+  * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+  * (These limits are doubled for retransmits)
+  * This allows for :
+  *  - better RTT estimation and ACK scheduling
+  *  - faster recovery
+  *  - high rates
+  * Alas, some drivers / subsystems require a fair amount
+  * of queued bytes to ensure line rate.
+  * One example is wifi aggregation (802.11 AMPDU)
+  */
+ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
+ 				  unsigned int factor)
+ {
+ 	unsigned int limit;
+ 
+ 	limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+ 	limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
+ 	limit <<= factor;
+ 
+ 	if (atomic_read(&sk->sk_wmem_alloc) > limit) {
+ 		set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags);
+ 		/* It is possible TX completion already happened
+ 		 * before we set TSQ_THROTTLED, so we must
+ 		 * test again the condition.
+ 		 */
+ 		smp_mb__after_atomic();
+ 		if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ 			return true;
+ 	}
+ 	return false;
+ }
+ 
  /* This routine writes packets to the network.  It advances the
   * send_head.  This happens as incoming acks open up the remote
   * window for us.
@@@ -2057,7 -2116,7 +2116,7 @@@ static bool tcp_write_xmit(struct sock 
  		}
  	}
  
- 	max_segs = tcp_tso_autosize(sk, mss_now);
+ 	max_segs = tcp_tso_segs(sk, mss_now);
  	while ((skb = tcp_send_head(sk))) {
  		unsigned int limit;
  
@@@ -2106,29 -2165,8 +2165,8 @@@
  		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
  			break;
  
- 		/* TCP Small Queues :
- 		 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
- 		 * This allows for :
- 		 *  - better RTT estimation and ACK scheduling
- 		 *  - faster recovery
- 		 *  - high rates
- 		 * Alas, some drivers / subsystems require a fair amount
- 		 * of queued bytes to ensure line rate.
- 		 * One example is wifi aggregation (802.11 AMPDU)
- 		 */
- 		limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
- 		limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
- 
- 		if (atomic_read(&sk->sk_wmem_alloc) > limit) {
- 			set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- 			/* It is possible TX completion already happened
- 			 * before we set TSQ_THROTTLED, so we must
- 			 * test again the condition.
- 			 */
- 			smp_mb__after_atomic();
- 			if (atomic_read(&sk->sk_wmem_alloc) > limit)
- 				break;
- 		}
+ 		if (tcp_small_queue_check(sk, skb, 0))
+ 			break;
  
  		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
  			break;
@@@ -2605,8 -2643,7 +2643,8 @@@ int __tcp_retransmit_skb(struct sock *s
  	 * copying overhead: fragmentation, tunneling, mangling etc.
  	 */
  	if (atomic_read(&sk->sk_wmem_alloc) >
 -	    min(sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), sk->sk_sndbuf))
 +	    min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2),
 +		  sk->sk_sndbuf))
  		return -EAGAIN;
  
  	if (skb_still_in_host_queue(sk, skb))
@@@ -2775,9 -2812,9 +2813,9 @@@ void tcp_xmit_retransmit_queue(struct s
  		last_lost = tp->snd_una;
  	}
  
- 	max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk));
+ 	max_segs = tcp_tso_segs(sk, tcp_current_mss(sk));
  	tcp_for_write_queue_from(skb, sk) {
- 		__u8 sacked = TCP_SKB_CB(skb)->sacked;
+ 		__u8 sacked;
  		int segs;
  
  		if (skb == tcp_send_head(sk))
@@@ -2789,6 -2826,7 +2827,7 @@@
  		segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
  		if (segs <= 0)
  			return;
+ 		sacked = TCP_SKB_CB(skb)->sacked;
  		/* In case tcp_shift_skb_data() have aggregated large skbs,
  		 * we need to make sure not sending too bigs TSO packets
  		 */
@@@ -2828,10 -2866,13 +2867,13 @@@ begin_fwd
  		if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))
  			continue;
  
+ 		if (tcp_small_queue_check(sk, skb, 1))
+ 			return;
+ 
  		if (tcp_retransmit_skb(sk, skb, segs))
  			return;
  
 -		NET_INC_STATS(sock_net(sk), mib_idx);
 +		NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
  
  		if (tcp_in_cwnd_reduction(sk))
  			tp->prr_out += tcp_skb_pcount(skb);
@@@ -3568,8 -3609,6 +3610,8 @@@ int tcp_rtx_synack(const struct sock *s
  	if (!res) {
  		__TCP_INC_STATS(sock_net(sk), TCP_MIB_RETRANSSEGS);
  		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS);
 +		if (unlikely(tcp_passive_fastopen(sk)))
 +			tcp_sk(sk)->total_retrans++;
  	}
  	return res;
  }
diff --combined net/ipv6/ip6_vti.c
index 5bd3afd,cc7e058..8a02ca8
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@@ -50,14 -50,14 +50,14 @@@
  #include <net/net_namespace.h>
  #include <net/netns/generic.h>
  
- #define HASH_SIZE_SHIFT  5
- #define HASH_SIZE (1 << HASH_SIZE_SHIFT)
+ #define IP6_VTI_HASH_SIZE_SHIFT  5
+ #define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT)
  
  static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2)
  {
  	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2);
  
- 	return hash_32(hash, HASH_SIZE_SHIFT);
+ 	return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT);
  }
  
  static int vti6_dev_init(struct net_device *dev);
@@@ -69,7 -69,7 +69,7 @@@ struct vti6_net 
  	/* the vti6 tunnel fallback device */
  	struct net_device *fb_tnl_dev;
  	/* lists for storing tunnels in use */
- 	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE];
+ 	struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE];
  	struct ip6_tnl __rcu *tnls_wc[1];
  	struct ip6_tnl __rcu **tnls[2];
  };
@@@ -321,9 -321,11 +321,9 @@@ static int vti6_rcv(struct sk_buff *skb
  			goto discard;
  		}
  
 -		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
 -
  		rcu_read_unlock();
  
 -		return xfrm6_rcv(skb);
 +		return xfrm6_rcv_tnl(skb, t);
  	}
  	rcu_read_unlock();
  	return -EINVAL;
@@@ -338,7 -340,6 +338,7 @@@ static int vti6_rcv_cb(struct sk_buff *
  	struct net_device *dev;
  	struct pcpu_sw_netstats *tstats;
  	struct xfrm_state *x;
 +	struct xfrm_mode *inner_mode;
  	struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
  	u32 orig_mark = skb->mark;
  	int ret;
@@@ -356,19 -357,7 +356,19 @@@
  	}
  
  	x = xfrm_input_state(skb);
 -	family = x->inner_mode->afinfo->family;
 +
 +	inner_mode = x->inner_mode;
 +
 +	if (x->sel.family == AF_UNSPEC) {
 +		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
 +		if (inner_mode == NULL) {
 +			XFRM_INC_STATS(dev_net(skb->dev),
 +				       LINUX_MIB_XFRMINSTATEMODEERROR);
 +			return -EINVAL;
 +		}
 +	}
 +
 +	family = inner_mode->afinfo->family;
  
  	skb->mark = be32_to_cpu(t->parms.i_key);
  	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
@@@ -1051,7 -1040,7 +1051,7 @@@ static void __net_exit vti6_destroy_tun
  	struct ip6_tnl *t;
  	LIST_HEAD(list);
  
- 	for (h = 0; h < HASH_SIZE; h++) {
+ 	for (h = 0; h < IP6_VTI_HASH_SIZE; h++) {
  		t = rtnl_dereference(ip6n->tnls_r_l[h]);
  		while (t) {
  			unregister_netdevice_queue(t->dev, &list);
diff --combined net/ipv6/route.c
index e3a224b,4dab585..5a5aeb9
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -1147,15 -1147,16 +1147,16 @@@ static struct rt6_info *ip6_pol_route_i
  	return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
  }
  
- static struct dst_entry *ip6_route_input_lookup(struct net *net,
- 						struct net_device *dev,
- 						struct flowi6 *fl6, int flags)
+ struct dst_entry *ip6_route_input_lookup(struct net *net,
+ 					 struct net_device *dev,
+ 					 struct flowi6 *fl6, int flags)
  {
  	if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
  		flags |= RT6_LOOKUP_F_IFACE;
  
  	return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
  }
+ EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
  
  void ip6_route_input(struct sk_buff *skb)
  {
@@@ -1164,7 -1165,7 +1165,7 @@@
  	int flags = RT6_LOOKUP_F_HAS_SADDR;
  	struct ip_tunnel_info *tun_info;
  	struct flowi6 fl6 = {
- 		.flowi6_iif = l3mdev_fib_oif(skb->dev),
+ 		.flowi6_iif = skb->dev->ifindex,
  		.daddr = iph->daddr,
  		.saddr = iph->saddr,
  		.flowlabel = ip6_flowinfo(iph),
@@@ -1188,12 -1189,15 +1189,15 @@@ static struct rt6_info *ip6_pol_route_o
  struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
  					 struct flowi6 *fl6, int flags)
  {
- 	struct dst_entry *dst;
  	bool any_src;
  
- 	dst = l3mdev_get_rt6_dst(net, fl6);
- 	if (dst)
- 		return dst;
+ 	if (rt6_need_strict(&fl6->daddr)) {
+ 		struct dst_entry *dst;
+ 
+ 		dst = l3mdev_link_scope_lookup(net, fl6);
+ 		if (dst)
+ 			return dst;
+ 	}
  
  	fl6->flowi6_iif = LOOPBACK_IFINDEX;
  
@@@ -1604,7 -1608,9 +1608,9 @@@ static unsigned int ip6_mtu(const struc
  	rcu_read_unlock();
  
  out:
- 	return min_t(unsigned int, mtu, IP6_MAX_MTU);
+ 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+ 
+ 	return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
  }
  
  static struct dst_entry *icmp6_dst_gc_list;
@@@ -1986,18 -1992,9 +1992,18 @@@ static struct rt6_info *ip6_route_info_
  			if (!(gwa_type & IPV6_ADDR_UNICAST))
  				goto out;
  
 -			if (cfg->fc_table)
 +			if (cfg->fc_table) {
  				grt = ip6_nh_lookup_table(net, cfg, gw_addr);
  
 +				if (grt) {
 +					if (grt->rt6i_flags & RTF_GATEWAY ||
 +					    (dev && dev != grt->dst.dev)) {
 +						ip6_rt_put(grt);
 +						grt = NULL;
 +					}
 +				}
 +			}
 +
  			if (!grt)
  				grt = rt6_lookup(net, gw_addr, NULL,
  						 cfg->fc_ifindex, 1);
@@@ -2565,8 -2562,16 +2571,16 @@@ struct rt6_info *addrconf_dst_alloc(str
  {
  	u32 tb_id;
  	struct net *net = dev_net(idev->dev);
- 	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
- 					    DST_NOCOUNT);
+ 	struct net_device *dev = net->loopback_dev;
+ 	struct rt6_info *rt;
+ 
+ 	/* use L3 Master device as loopback for host routes if device
+ 	 * is enslaved and address is not link local or multicast
+ 	 */
+ 	if (!rt6_need_strict(addr))
+ 		dev = l3mdev_master_dev_rcu(idev->dev) ? : dev;
+ 
+ 	rt = ip6_dst_alloc(net, dev, DST_NOCOUNT);
  	if (!rt)
  		return ERR_PTR(-ENOMEM);
  
@@@ -3345,11 -3350,6 +3359,6 @@@ static int inet6_rtm_getroute(struct sk
  	} else {
  		fl6.flowi6_oif = oif;
  
- 		if (netif_index_is_l3_master(net, oif)) {
- 			fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC |
- 					   FLOWI_FLAG_SKIP_NH_OIF;
- 		}
- 
  		rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
  	}
  
diff --combined net/irda/af_irda.c
index ccc2444,db63969..391c3cb
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@@ -832,7 -832,7 +832,7 @@@ static int irda_accept(struct socket *s
  	struct sock *sk = sock->sk;
  	struct irda_sock *new, *self = irda_sk(sk);
  	struct sock *newsk;
 -	struct sk_buff *skb;
 +	struct sk_buff *skb = NULL;
  	int err;
  
  	err = irda_create(sock_net(sk), newsock, sk->sk_protocol, 0);
@@@ -845,9 -845,6 +845,6 @@@
  	if (sock->state != SS_UNCONNECTED)
  		goto out;
  
- 	if ((sk = sock->sk) == NULL)
- 		goto out;
- 
  	err = -EOPNOTSUPP;
  	if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) &&
  	    (sk->sk_type != SOCK_DGRAM))
@@@ -900,6 -897,7 +897,6 @@@
  	err = -EPERM; /* value does not seem to make sense. -arnd */
  	if (!new->tsap) {
  		pr_debug("%s(), dup failed!\n", __func__);
 -		kfree_skb(skb);
  		goto out;
  	}
  
@@@ -918,6 -916,7 +915,6 @@@
  	/* Clean up the original one to keep it in listen state */
  	irttp_listen(self->tsap);
  
 -	kfree_skb(skb);
  	sk->sk_ack_backlog--;
  
  	newsock->state = SS_CONNECTED;
@@@ -925,7 -924,6 +922,7 @@@
  	irda_connect_response(new);
  	err = 0;
  out:
 +	kfree_skb(skb);
  	release_sock(sk);
  	return err;
  }
diff --combined net/mac80211/agg-rx.c
index afa9468,a5d69df..f6749dc
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@@ -261,16 -261,10 +261,16 @@@ void __ieee80211_start_rx_ba_session(st
  		.timeout = timeout,
  		.ssn = start_seq_num,
  	};
 -
  	int i, ret = -EOPNOTSUPP;
  	u16 status = WLAN_STATUS_REQUEST_DECLINED;
  
 +	if (tid >= IEEE80211_FIRST_TSPEC_TSID) {
 +		ht_dbg(sta->sdata,
 +		       "STA %pM requests BA session on unsupported tid %d\n",
 +		       sta->sta.addr, tid);
 +		goto end_no_lock;
 +	}
 +
  	if (!sta->sta.ht_cap.ht_supported) {
  		ht_dbg(sta->sdata,
  		       "STA %pM erroneously requests BA session on tid %d w/o QoS\n",
@@@ -304,10 -298,13 +304,13 @@@
  		buf_size = IEEE80211_MAX_AMPDU_BUF;
  
  	/* make sure the size doesn't exceed the maximum supported by the hw */
- 	if (buf_size > local->hw.max_rx_aggregation_subframes)
- 		buf_size = local->hw.max_rx_aggregation_subframes;
+ 	if (buf_size > sta->sta.max_rx_aggregation_subframes)
+ 		buf_size = sta->sta.max_rx_aggregation_subframes;
  	params.buf_size = buf_size;
  
+ 	ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n",
+ 	       buf_size, sta->sta.addr);
+ 
  	/* examine state machine */
  	mutex_lock(&sta->ampdu_mlme.mtx);
  
@@@ -412,8 -409,10 +415,10 @@@
  	}
  
  end:
- 	if (status == WLAN_STATUS_SUCCESS)
+ 	if (status == WLAN_STATUS_SUCCESS) {
  		__set_bit(tid, sta->ampdu_mlme.agg_session_valid);
+ 		__clear_bit(tid, sta->ampdu_mlme.unexpected_agg);
+ 	}
  	mutex_unlock(&sta->ampdu_mlme.mtx);
  
  end_no_lock:
diff --combined net/mac80211/mesh_hwmp.c
index faccef9,fa7d37c..b747c96
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@@ -326,22 -326,33 +326,33 @@@ static u32 airtime_link_metric_get(stru
  	u32 tx_time, estimated_retx;
  	u64 result;
  
- 	if (sta->mesh->fail_avg >= 100)
- 		return MAX_METRIC;
+ 	/* Try to get rate based on HW/SW RC algorithm.
+ 	 * Rate is returned in units of Kbps, correct this
+ 	 * to comply with airtime calculation units
+ 	 * Round up in case we get rate < 100Kbps
+ 	 */
+ 	rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100);
+ 
+ 	if (rate) {
+ 		err = 0;
+ 	} else {
+ 		if (sta->mesh->fail_avg >= 100)
+ 			return MAX_METRIC;
  
- 	sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
- 	rate = cfg80211_calculate_bitrate(&rinfo);
- 	if (WARN_ON(!rate))
- 		return MAX_METRIC;
+ 		sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo);
+ 		rate = cfg80211_calculate_bitrate(&rinfo);
+ 		if (WARN_ON(!rate))
+ 			return MAX_METRIC;
  
- 	err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+ 		err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100;
+ 	}
  
  	/* bitrate is in units of 100 Kbps, while we need rate in units of
  	 * 1Mbps. This will be corrected on tx_time computation.
  	 */
  	tx_time = (device_constant + 10 * test_frame_len / rate);
  	estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err));
- 	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ;
+ 	result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT);
  	return (u32)result;
  }
  
@@@ -746,7 -757,6 +757,7 @@@ static void hwmp_perr_frame_process(str
  		sta = next_hop_deref_protected(mpath);
  		if (mpath->flags & MESH_PATH_ACTIVE &&
  		    ether_addr_equal(ta, sta->sta.addr) &&
 +		    !(mpath->flags & MESH_PATH_FIXED) &&
  		    (!(mpath->flags & MESH_PATH_SN_VALID) ||
  		    SN_GT(target_sn, mpath->sn)  || target_sn == 0)) {
  			mpath->flags &= ~MESH_PATH_ACTIVE;
@@@ -1013,7 -1023,7 +1024,7 @@@ void mesh_path_start_discovery(struct i
  		goto enddiscovery;
  
  	spin_lock_bh(&mpath->state_lock);
 -	if (mpath->flags & MESH_PATH_DELETED) {
 +	if (mpath->flags & (MESH_PATH_DELETED | MESH_PATH_FIXED)) {
  		spin_unlock_bh(&mpath->state_lock);
  		goto enddiscovery;
  	}
diff --combined net/mac80211/sta_info.c
index aa58df8,c803e2c..011880d
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@@ -67,12 -67,10 +67,10 @@@
  
  static const struct rhashtable_params sta_rht_params = {
  	.nelem_hint = 3, /* start small */
- 	.insecure_elasticity = true, /* Disable chain-length checks. */
  	.automatic_shrinking = true,
  	.head_offset = offsetof(struct sta_info, hash_node),
  	.key_offset = offsetof(struct sta_info, addr),
  	.key_len = ETH_ALEN,
- 	.hashfn = sta_addr_hash,
  	.max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE,
  };
  
@@@ -80,8 -78,8 +78,8 @@@
  static int sta_info_hash_del(struct ieee80211_local *local,
  			     struct sta_info *sta)
  {
- 	return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node,
- 				      sta_rht_params);
+ 	return rhltable_remove(&local->sta_hash, &sta->hash_node,
+ 			       sta_rht_params);
  }
  
  static void __cleanup_single_sta(struct sta_info *sta)
@@@ -157,19 -155,22 +155,22 @@@ static void cleanup_single_sta(struct s
  	sta_info_free(local, sta);
  }
  
+ struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local,
+ 					 const u8 *addr)
+ {
+ 	return rhltable_lookup(&local->sta_hash, addr, sta_rht_params);
+ }
+ 
  /* protected by RCU */
  struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
  			      const u8 *addr)
  {
  	struct ieee80211_local *local = sdata->local;
+ 	struct rhlist_head *tmp;
  	struct sta_info *sta;
- 	struct rhash_head *tmp;
- 	const struct bucket_table *tbl;
  
  	rcu_read_lock();
- 	tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
- 
- 	for_each_sta_info(local, tbl, addr, sta, tmp) {
+ 	for_each_sta_info(local, addr, sta, tmp) {
  		if (sta->sdata == sdata) {
  			rcu_read_unlock();
  			/* this is safe as the caller must already hold
@@@ -190,14 -191,11 +191,11 @@@ struct sta_info *sta_info_get_bss(struc
  				  const u8 *addr)
  {
  	struct ieee80211_local *local = sdata->local;
+ 	struct rhlist_head *tmp;
  	struct sta_info *sta;
- 	struct rhash_head *tmp;
- 	const struct bucket_table *tbl;
  
  	rcu_read_lock();
- 	tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
- 
- 	for_each_sta_info(local, tbl, addr, sta, tmp) {
+ 	for_each_sta_info(local, addr, sta, tmp) {
  		if (sta->sdata == sdata ||
  		    (sta->sdata->bss && sta->sdata->bss == sdata->bss)) {
  			rcu_read_unlock();
@@@ -263,8 -261,8 +261,8 @@@ void sta_info_free(struct ieee80211_loc
  static int sta_info_hash_add(struct ieee80211_local *local,
  			     struct sta_info *sta)
  {
- 	return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node,
- 				      sta_rht_params);
+ 	return rhltable_insert(&local->sta_hash, &sta->hash_node,
+ 			       sta_rht_params);
  }
  
  static void sta_deliver_ps_frames(struct work_struct *wk)
@@@ -340,6 -338,9 +338,9 @@@ struct sta_info *sta_info_alloc(struct 
  
  	memcpy(sta->addr, addr, ETH_ALEN);
  	memcpy(sta->sta.addr, addr, ETH_ALEN);
+ 	sta->sta.max_rx_aggregation_subframes =
+ 		local->hw.max_rx_aggregation_subframes;
+ 
  	sta->local = local;
  	sta->sdata = sdata;
  	sta->rx_stats.last_rx = jiffies;
@@@ -450,9 -451,9 +451,9 @@@ static int sta_info_insert_check(struc
  		    is_multicast_ether_addr(sta->sta.addr)))
  		return -EINVAL;
  
- 	/* Strictly speaking this isn't necessary as we hold the mutex, but
- 	 * the rhashtable code can't really deal with that distinction. We
- 	 * do require the mutex for correctness though.
+ 	/* The RCU read lock is required by rhashtable due to
+ 	 * asynchronous resize/rehash.  We also require the mutex
+ 	 * for correctness.
  	 */
  	rcu_read_lock();
  	lockdep_assert_held(&sdata->local->sta_mtx);
@@@ -687,7 -688,7 +688,7 @@@ static void __sta_info_recalc_tim(struc
  	}
  
  	/* No need to do anything if the driver does all */
- 	if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
+ 	if (!local->ops->set_tim)
  		return;
  
  	if (sta->dead)
@@@ -1040,16 -1041,11 +1041,11 @@@ static void sta_info_cleanup(unsigned l
  		  round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL));
  }
  
- u32 sta_addr_hash(const void *key, u32 length, u32 seed)
- {
- 	return jhash(key, ETH_ALEN, seed);
- }
- 
  int sta_info_init(struct ieee80211_local *local)
  {
  	int err;
  
- 	err = rhashtable_init(&local->sta_hash, &sta_rht_params);
+ 	err = rhltable_init(&local->sta_hash, &sta_rht_params);
  	if (err)
  		return err;
  
@@@ -1065,7 -1061,7 +1061,7 @@@
  void sta_info_stop(struct ieee80211_local *local)
  {
  	del_timer_sync(&local->sta_cleanup);
- 	rhashtable_destroy(&local->sta_hash);
+ 	rhltable_destroy(&local->sta_hash);
  }
  
  
@@@ -1135,17 -1131,14 +1131,14 @@@ struct ieee80211_sta *ieee80211_find_st
  						   const u8 *localaddr)
  {
  	struct ieee80211_local *local = hw_to_local(hw);
+ 	struct rhlist_head *tmp;
  	struct sta_info *sta;
- 	struct rhash_head *tmp;
- 	const struct bucket_table *tbl;
- 
- 	tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash);
  
  	/*
  	 * Just return a random station if localaddr is NULL
  	 * ... first in list.
  	 */
- 	for_each_sta_info(local, tbl, addr, sta, tmp) {
+ 	for_each_sta_info(local, addr, sta, tmp) {
  		if (localaddr &&
  		    !ether_addr_equal(sta->sdata->vif.addr, localaddr))
  			continue;
@@@ -1616,6 -1609,7 +1609,6 @@@ ieee80211_sta_ps_deliver_response(struc
  
  		sta_info_recalc_tim(sta);
  	} else {
 -		unsigned long tids = sta->txq_buffered_tids & driver_release_tids;
  		int tid;
  
  		/*
@@@ -1647,8 -1641,7 +1640,8 @@@
  		for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) {
  			struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
  
 -			if (!(tids & BIT(tid)) || txqi->tin.backlog_packets)
 +			if (!(driver_release_tids & BIT(tid)) ||
 +			    txqi->tin.backlog_packets)
  				continue;
  
  			sta_info_recalc_tim(sta);
@@@ -2279,11 -2272,7 +2272,7 @@@ void sta_set_sinfo(struct sta_info *sta
  	if (test_sta_flag(sta, WLAN_STA_TDLS_PEER))
  		sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER);
  
- 	/* check if the driver has a SW RC implementation */
- 	if (ref && ref->ops->get_expected_throughput)
- 		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
- 	else
- 		thr = drv_get_expected_throughput(local, &sta->sta);
+ 	thr = sta_get_expected_throughput(sta);
  
  	if (thr != 0) {
  		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
@@@ -2291,6 -2280,25 +2280,25 @@@
  	}
  }
  
+ u32 sta_get_expected_throughput(struct sta_info *sta)
+ {
+ 	struct ieee80211_sub_if_data *sdata = sta->sdata;
+ 	struct ieee80211_local *local = sdata->local;
+ 	struct rate_control_ref *ref = NULL;
+ 	u32 thr = 0;
+ 
+ 	if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL))
+ 		ref = local->rate_ctrl;
+ 
+ 	/* check if the driver has a SW RC implementation */
+ 	if (ref && ref->ops->get_expected_throughput)
+ 		thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv);
+ 	else
+ 		thr = drv_get_expected_throughput(local, sta);
+ 
+ 	return thr;
+ }
+ 
  unsigned long ieee80211_sta_last_active(struct sta_info *sta)
  {
  	struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta);
diff --combined net/mac80211/tx.c
index 18b285e,61d302d..1ff08be
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@@ -796,36 -796,6 +796,36 @@@ static __le16 ieee80211_tx_next_seq(str
  	return ret;
  }
  
 +static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
 +					  struct ieee80211_vif *vif,
 +					  struct ieee80211_sta *pubsta,
 +					  struct sk_buff *skb)
 +{
 +	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 +	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 +	struct ieee80211_txq *txq = NULL;
 +
 +	if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
 +	    (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
 +		return NULL;
 +
 +	if (!ieee80211_is_data(hdr->frame_control))
 +		return NULL;
 +
 +	if (pubsta) {
 +		u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
 +
 +		txq = pubsta->txq[tid];
 +	} else if (vif) {
 +		txq = vif->txq;
 +	}
 +
 +	if (!txq)
 +		return NULL;
 +
 +	return to_txq_info(txq);
 +}
 +
  static ieee80211_tx_result debug_noinline
  ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx)
  {
@@@ -883,8 -853,7 +883,8 @@@
  	tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
  	tx->sta->tx_stats.msdu[tid]++;
  
 -	if (!tx->sta->sta.txq[0])
 +	if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta,
 +			       tx->skb))
  		hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid);
  
  	return TX_CONTINUE;
@@@ -1274,6 -1243,36 +1274,6 @@@ ieee80211_tx_prepare(struct ieee80211_s
  	return TX_CONTINUE;
  }
  
 -static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local,
 -					  struct ieee80211_vif *vif,
 -					  struct ieee80211_sta *pubsta,
 -					  struct sk_buff *skb)
 -{
 -	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data;
 -	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 -	struct ieee80211_txq *txq = NULL;
 -
 -	if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) ||
 -	    (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE))
 -		return NULL;
 -
 -	if (!ieee80211_is_data(hdr->frame_control))
 -		return NULL;
 -
 -	if (pubsta) {
 -		u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK;
 -
 -		txq = pubsta->txq[tid];
 -	} else if (vif) {
 -		txq = vif->txq;
 -	}
 -
 -	if (!txq)
 -		return NULL;
 -
 -	return to_txq_info(txq);
 -}
 -
  static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb)
  {
  	IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time();
@@@ -1344,7 -1343,7 +1344,7 @@@ static struct sk_buff *fq_tin_dequeue_f
  	local = container_of(fq, struct ieee80211_local, fq);
  	txqi = container_of(tin, struct txq_info, tin);
  	cparams = &local->cparams;
- 	cstats = &local->cstats;
+ 	cstats = &txqi->cstats;
  
  	if (flow == &txqi->def_flow)
  		cvars = &txqi->def_cvars;
@@@ -1404,6 -1403,7 +1404,7 @@@ void ieee80211_txq_init(struct ieee8021
  	fq_tin_init(&txqi->tin);
  	fq_flow_init(&txqi->def_flow);
  	codel_vars_init(&txqi->def_cvars);
+ 	codel_stats_init(&txqi->cstats);
  
  	txqi->txq.vif = &sdata->vif;
  
@@@ -1442,7 -1442,6 +1443,6 @@@ int ieee80211_txq_setup_flows(struct ie
  		return ret;
  
  	codel_params_init(&local->cparams);
- 	codel_stats_init(&local->cstats);
  	local->cparams.interval = MS2TIME(100);
  	local->cparams.target = MS2TIME(20);
  	local->cparams.ecn = true;
@@@ -1515,12 -1514,8 +1515,12 @@@ out
  	spin_unlock_bh(&fq->lock);
  
  	if (skb && skb_has_frag_list(skb) &&
 -	    !ieee80211_hw_check(&local->hw, TX_FRAG_LIST))
 -		skb_linearize(skb);
 +	    !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) {
 +		if (skb_linearize(skb)) {
 +			ieee80211_free_txskb(&local->hw, skb);
 +			return NULL;
 +		}
 +	}
  
  	return skb;
  }
@@@ -1648,7 -1643,7 +1648,7 @@@ static bool __ieee80211_tx(struct ieee8
  
  	switch (sdata->vif.type) {
  	case NL80211_IFTYPE_MONITOR:
- 		if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) {
+ 		if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) {
  			vif = &sdata->vif;
  			break;
  		}
@@@ -2268,15 -2263,9 +2268,9 @@@ static int ieee80211_lookup_ra_sta(stru
  	case NL80211_IFTYPE_STATION:
  		if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) {
  			sta = sta_info_get(sdata, skb->data);
- 			if (sta) {
- 				bool tdls_peer, tdls_auth;
- 
- 				tdls_peer = test_sta_flag(sta,
- 							  WLAN_STA_TDLS_PEER);
- 				tdls_auth = test_sta_flag(sta,
- 						WLAN_STA_TDLS_PEER_AUTH);
- 
- 				if (tdls_peer && tdls_auth) {
+ 			if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+ 				if (test_sta_flag(sta,
+ 						  WLAN_STA_TDLS_PEER_AUTH)) {
  					*sta_out = sta;
  					return 0;
  				}
@@@ -2288,8 -2277,7 +2282,7 @@@
  				 * after a TDLS sta is removed due to being
  				 * unreachable.
  				 */
- 				if (tdls_peer && !tdls_auth &&
- 				    !ieee80211_is_tdls_setup(skb))
+ 				if (!ieee80211_is_tdls_setup(skb))
  					return -EINVAL;
  			}
  
@@@ -2339,7 -2327,6 +2332,6 @@@ static struct sk_buff *ieee80211_build_
  	struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL;
  	const u8 *encaps_data;
  	int encaps_len, skip_header_bytes;
- 	int nh_pos, h_pos;
  	bool wme_sta = false, authorized = false;
  	bool tdls_peer;
  	bool multicast;
@@@ -2645,13 -2632,7 +2637,7 @@@
  		encaps_len = 0;
  	}
  
- 	nh_pos = skb_network_header(skb) - skb->data;
- 	h_pos = skb_transport_header(skb) - skb->data;
- 
  	skb_pull(skb, skip_header_bytes);
- 	nh_pos -= skip_header_bytes;
- 	h_pos -= skip_header_bytes;
- 
  	head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb);
  
  	/*
@@@ -2677,18 -2658,12 +2663,12 @@@
  		}
  	}
  
- 	if (encaps_data) {
+ 	if (encaps_data)
  		memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len);
- 		nh_pos += encaps_len;
- 		h_pos += encaps_len;
- 	}
  
  #ifdef CONFIG_MAC80211_MESH
- 	if (meshhdrlen > 0) {
+ 	if (meshhdrlen > 0)
  		memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen);
- 		nh_pos += meshhdrlen;
- 		h_pos += meshhdrlen;
- 	}
  #endif
  
  	if (ieee80211_is_data_qos(fc)) {
@@@ -2704,15 -2679,7 +2684,7 @@@
  	} else
  		memcpy(skb_push(skb, hdrlen), &hdr, hdrlen);
  
- 	nh_pos += hdrlen;
- 	h_pos += hdrlen;
- 
- 	/* Update skb pointers to various headers since this modified frame
- 	 * is going to go through Linux networking code that may potentially
- 	 * need things like pointer to IP header. */
  	skb_reset_mac_header(skb);
- 	skb_set_network_header(skb, nh_pos);
- 	skb_set_transport_header(skb, h_pos);
  
  	info = IEEE80211_SKB_CB(skb);
  	memset(info, 0, sizeof(*info));
@@@ -3269,7 -3236,7 +3241,7 @@@ static bool ieee80211_xmit_fast(struct 
  
  	if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) {
  		*ieee80211_get_qos_ctl(hdr) = tid;
 -		if (!sta->sta.txq[0])
 +		if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb))
  			hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid);
  	} else {
  		info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
@@@ -4395,9 -4362,6 +4367,6 @@@ void __ieee80211_tx_skb_tid_band(struc
  	int ac = ieee802_1d_to_ac[tid & 7];
  
  	skb_reset_mac_header(skb);
- 	skb_reset_network_header(skb);
- 	skb_reset_transport_header(skb);
- 
  	skb_set_queue_mapping(skb, ac);
  	skb->priority = tid;
  
diff --combined net/netfilter/nf_conntrack_core.c
index 9934b0c,ac1db40..6570982
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@@ -72,12 -72,24 +72,24 @@@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_l
  struct hlist_nulls_head *nf_conntrack_hash __read_mostly;
  EXPORT_SYMBOL_GPL(nf_conntrack_hash);
  
+ struct conntrack_gc_work {
+ 	struct delayed_work	dwork;
+ 	u32			last_bucket;
+ 	bool			exiting;
+ };
+ 
  static __read_mostly struct kmem_cache *nf_conntrack_cachep;
  static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
- static __read_mostly seqcount_t nf_conntrack_generation;
  static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
  static __read_mostly bool nf_conntrack_locks_all;
  
+ #define GC_MAX_BUCKETS_DIV	64u
+ #define GC_MAX_BUCKETS		8192u
+ #define GC_INTERVAL		(5 * HZ)
+ #define GC_MAX_EVICTS		256u
+ 
+ static struct conntrack_gc_work conntrack_gc_work;
+ 
  void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
  {
  	spin_lock(lock);
@@@ -164,7 -176,7 +176,7 @@@ unsigned int nf_conntrack_htable_size _
  EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
  
  unsigned int nf_conntrack_max __read_mostly;
- EXPORT_SYMBOL_GPL(nf_conntrack_max);
+ seqcount_t nf_conntrack_generation __read_mostly;
  
  DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);
  EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);
@@@ -372,7 -384,6 +384,6 @@@ destroy_conntrack(struct nf_conntrack *
  
  	pr_debug("destroy_conntrack(%p)\n", ct);
  	NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
- 	NF_CT_ASSERT(!timer_pending(&ct->timeout));
  
  	if (unlikely(nf_ct_is_template(ct))) {
  		nf_ct_tmpl_free(ct);
@@@ -435,35 -446,30 +446,30 @@@ bool nf_ct_delete(struct nf_conn *ct, u
  {
  	struct nf_conn_tstamp *tstamp;
  
+ 	if (test_and_set_bit(IPS_DYING_BIT, &ct->status))
+ 		return false;
+ 
  	tstamp = nf_conn_tstamp_find(ct);
  	if (tstamp && tstamp->stop == 0)
  		tstamp->stop = ktime_get_real_ns();
  
- 	if (nf_ct_is_dying(ct))
- 		goto delete;
- 
  	if (nf_conntrack_event_report(IPCT_DESTROY, ct,
  				    portid, report) < 0) {
- 		/* destroy event was not delivered */
+ 		/* destroy event was not delivered. nf_ct_put will
+ 		 * be done by event cache worker on redelivery.
+ 		 */
  		nf_ct_delete_from_lists(ct);
  		nf_conntrack_ecache_delayed_work(nf_ct_net(ct));
  		return false;
  	}
  
  	nf_conntrack_ecache_work(nf_ct_net(ct));
- 	set_bit(IPS_DYING_BIT, &ct->status);
-  delete:
  	nf_ct_delete_from_lists(ct);
  	nf_ct_put(ct);
  	return true;
  }
  EXPORT_SYMBOL_GPL(nf_ct_delete);
  
- static void death_by_timeout(unsigned long ul_conntrack)
- {
- 	nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
- }
- 
  static inline bool
  nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
  		const struct nf_conntrack_tuple *tuple,
@@@ -481,22 -487,17 +487,17 @@@
  	       net_eq(net, nf_ct_net(ct));
  }
  
- /* must be called with rcu read lock held */
- void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+ /* caller must hold rcu readlock and none of the nf_conntrack_locks */
+ static void nf_ct_gc_expired(struct nf_conn *ct)
  {
- 	struct hlist_nulls_head *hptr;
- 	unsigned int sequence, hsz;
+ 	if (!atomic_inc_not_zero(&ct->ct_general.use))
+ 		return;
  
- 	do {
- 		sequence = read_seqcount_begin(&nf_conntrack_generation);
- 		hsz = nf_conntrack_htable_size;
- 		hptr = nf_conntrack_hash;
- 	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ 	if (nf_ct_should_gc(ct))
+ 		nf_ct_kill(ct);
  
- 	*hash = hptr;
- 	*hsize = hsz;
+ 	nf_ct_put(ct);
  }
- EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
  
  /*
   * Warning :
@@@ -510,16 -511,24 +511,24 @@@ ____nf_conntrack_find(struct net *net, 
  	struct nf_conntrack_tuple_hash *h;
  	struct hlist_nulls_head *ct_hash;
  	struct hlist_nulls_node *n;
- 	unsigned int bucket, sequence;
+ 	unsigned int bucket, hsize;
  
  begin:
- 	do {
- 		sequence = read_seqcount_begin(&nf_conntrack_generation);
- 		bucket = scale_hash(hash);
- 		ct_hash = nf_conntrack_hash;
- 	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ 	nf_conntrack_get_ht(&ct_hash, &hsize);
+ 	bucket = reciprocal_scale(hash, hsize);
  
  	hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) {
+ 		struct nf_conn *ct;
+ 
+ 		ct = nf_ct_tuplehash_to_ctrack(h);
+ 		if (nf_ct_is_expired(ct)) {
+ 			nf_ct_gc_expired(ct);
+ 			continue;
+ 		}
+ 
+ 		if (nf_ct_is_dying(ct))
+ 			continue;
+ 
  		if (nf_ct_key_equal(h, tuple, zone, net)) {
  			NF_CT_STAT_INC_ATOMIC(net, found);
  			return h;
@@@ -618,7 -627,6 +627,6 @@@ nf_conntrack_hash_check_insert(struct n
  				    zone, net))
  			goto out;
  
- 	add_timer(&ct->timeout);
  	smp_wmb();
  	/* The caller holds a reference to this object */
  	atomic_set(&ct->ct_general.use, 2);
@@@ -771,8 -779,7 +779,7 @@@ __nf_conntrack_confirm(struct sk_buff *
  	/* Timer relative to confirmation time, not original
  	   setting time, otherwise we'd get timer wrap in
  	   weird delay cases. */
- 	ct->timeout.expires += jiffies;
- 	add_timer(&ct->timeout);
+ 	ct->timeout += nfct_time_stamp;
  	atomic_inc(&ct->ct_general.use);
  	ct->status |= IPS_CONFIRMED;
  
@@@ -823,29 -830,41 +830,41 @@@ nf_conntrack_tuple_taken(const struct n
  	const struct nf_conntrack_zone *zone;
  	struct nf_conntrack_tuple_hash *h;
  	struct hlist_nulls_head *ct_hash;
- 	unsigned int hash, sequence;
+ 	unsigned int hash, hsize;
  	struct hlist_nulls_node *n;
  	struct nf_conn *ct;
  
  	zone = nf_ct_zone(ignored_conntrack);
  
  	rcu_read_lock();
- 	do {
- 		sequence = read_seqcount_begin(&nf_conntrack_generation);
- 		hash = hash_conntrack(net, tuple);
- 		ct_hash = nf_conntrack_hash;
- 	} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+  begin:
+ 	nf_conntrack_get_ht(&ct_hash, &hsize);
+ 	hash = __hash_conntrack(net, tuple, hsize);
  
  	hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
  		ct = nf_ct_tuplehash_to_ctrack(h);
- 		if (ct != ignored_conntrack &&
- 		    nf_ct_key_equal(h, tuple, zone, net)) {
+ 
+ 		if (ct == ignored_conntrack)
+ 			continue;
+ 
+ 		if (nf_ct_is_expired(ct)) {
+ 			nf_ct_gc_expired(ct);
+ 			continue;
+ 		}
+ 
+ 		if (nf_ct_key_equal(h, tuple, zone, net)) {
  			NF_CT_STAT_INC_ATOMIC(net, found);
  			rcu_read_unlock();
  			return 1;
  		}
  		NF_CT_STAT_INC_ATOMIC(net, searched);
  	}
+ 
+ 	if (get_nulls_value(n) != hash) {
+ 		NF_CT_STAT_INC_ATOMIC(net, search_restart);
+ 		goto begin;
+ 	}
+ 
  	rcu_read_unlock();
  
  	return 0;
@@@ -867,6 -886,11 +886,11 @@@ static unsigned int early_drop_list(str
  	hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
  		tmp = nf_ct_tuplehash_to_ctrack(h);
  
+ 		if (nf_ct_is_expired(tmp)) {
+ 			nf_ct_gc_expired(tmp);
+ 			continue;
+ 		}
+ 
  		if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
  		    !net_eq(nf_ct_net(tmp), net) ||
  		    nf_ct_is_dying(tmp))
@@@ -884,7 -908,6 +908,6 @@@
  		 */
  		if (net_eq(nf_ct_net(tmp), net) &&
  		    nf_ct_is_confirmed(tmp) &&
- 		    del_timer(&tmp->timeout) &&
  		    nf_ct_delete(tmp, 0, 0))
  			drops++;
  
@@@ -900,14 -923,11 +923,11 @@@ static noinline int early_drop(struct n
  
  	for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
  		struct hlist_nulls_head *ct_hash;
- 		unsigned hash, sequence, drops;
+ 		unsigned int hash, hsize, drops;
  
  		rcu_read_lock();
- 		do {
- 			sequence = read_seqcount_begin(&nf_conntrack_generation);
- 			hash = scale_hash(_hash++);
- 			ct_hash = nf_conntrack_hash;
- 		} while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+ 		nf_conntrack_get_ht(&ct_hash, &hsize);
+ 		hash = reciprocal_scale(_hash++, hsize);
  
  		drops = early_drop_list(net, &ct_hash[hash]);
  		rcu_read_unlock();
@@@ -921,6 -941,69 +941,69 @@@
  	return false;
  }
  
+ static void gc_worker(struct work_struct *work)
+ {
+ 	unsigned int i, goal, buckets = 0, expired_count = 0;
+ 	unsigned long next_run = GC_INTERVAL;
+ 	unsigned int ratio, scanned = 0;
+ 	struct conntrack_gc_work *gc_work;
+ 
+ 	gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
+ 
+ 	goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
+ 	i = gc_work->last_bucket;
+ 
+ 	do {
+ 		struct nf_conntrack_tuple_hash *h;
+ 		struct hlist_nulls_head *ct_hash;
+ 		struct hlist_nulls_node *n;
+ 		unsigned int hashsz;
+ 		struct nf_conn *tmp;
+ 
+ 		i++;
+ 		rcu_read_lock();
+ 
+ 		nf_conntrack_get_ht(&ct_hash, &hashsz);
+ 		if (i >= hashsz)
+ 			i = 0;
+ 
+ 		hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) {
+ 			tmp = nf_ct_tuplehash_to_ctrack(h);
+ 
+ 			scanned++;
+ 			if (nf_ct_is_expired(tmp)) {
+ 				nf_ct_gc_expired(tmp);
+ 				expired_count++;
+ 				continue;
+ 			}
+ 		}
+ 
+ 		/* could check get_nulls_value() here and restart if ct
+ 		 * was moved to another chain.  But given gc is best-effort
+ 		 * we will just continue with next hash slot.
+ 		 */
+ 		rcu_read_unlock();
+ 		cond_resched_rcu_qs();
+ 	} while (++buckets < goal &&
+ 		 expired_count < GC_MAX_EVICTS);
+ 
+ 	if (gc_work->exiting)
+ 		return;
+ 
+ 	ratio = scanned ? expired_count * 100 / scanned : 0;
+ 	if (ratio >= 90)
+ 		next_run = 0;
+ 
+ 	gc_work->last_bucket = i;
+ 	schedule_delayed_work(&gc_work->dwork, next_run);
+ }
+ 
+ static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
+ {
+ 	INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ 	gc_work->exiting = false;
+ }
+ 
  static struct nf_conn *
  __nf_conntrack_alloc(struct net *net,
  		     const struct nf_conntrack_zone *zone,
@@@ -957,8 -1040,6 +1040,6 @@@
  	/* save hash for reusing when confirming */
  	*(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash;
  	ct->status = 0;
- 	/* Don't set timer yet: wait for confirmation */
- 	setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
  	write_pnet(&ct->ct_net, net);
  	memset(&ct->__nfct_init_offset[0], 0,
  	       offsetof(struct nf_conn, proto) -
@@@ -1035,9 -1116,9 +1116,9 @@@ init_conntrack(struct net *net, struct 
  	if (IS_ERR(ct))
  		return (struct nf_conntrack_tuple_hash *)ct;
  
 -	if (tmpl && nfct_synproxy(tmpl)) {
 -		nfct_seqadj_ext_add(ct);
 -		nfct_synproxy_ext_add(ct);
 +	if (!nf_ct_add_synproxy(ct, tmpl)) {
 +		nf_conntrack_free(ct);
 +		return ERR_PTR(-ENOMEM);
  	}
  
  	timeout_ext = tmpl ? nf_ct_timeout_find(tmpl) : NULL;
@@@ -1332,7 -1413,6 +1413,6 @@@ void __nf_ct_refresh_acct(struct nf_con
  			  unsigned long extra_jiffies,
  			  int do_acct)
  {
- 	NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
  	NF_CT_ASSERT(skb);
  
  	/* Only update if this is not a fixed timeout */
@@@ -1340,39 -1420,25 +1420,25 @@@
  		goto acct;
  
  	/* If not in hash table, timer will not be active yet */
- 	if (!nf_ct_is_confirmed(ct)) {
- 		ct->timeout.expires = extra_jiffies;
- 	} else {
- 		unsigned long newtime = jiffies + extra_jiffies;
- 
- 		/* Only update the timeout if the new timeout is at least
- 		   HZ jiffies from the old timeout. Need del_timer for race
- 		   avoidance (may already be dying). */
- 		if (newtime - ct->timeout.expires >= HZ)
- 			mod_timer_pending(&ct->timeout, newtime);
- 	}
+ 	if (nf_ct_is_confirmed(ct))
+ 		extra_jiffies += nfct_time_stamp;
  
+ 	ct->timeout = extra_jiffies;
  acct:
  	if (do_acct)
  		nf_ct_acct_update(ct, ctinfo, skb->len);
  }
  EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
  
- bool __nf_ct_kill_acct(struct nf_conn *ct,
- 		       enum ip_conntrack_info ctinfo,
- 		       const struct sk_buff *skb,
- 		       int do_acct)
+ bool nf_ct_kill_acct(struct nf_conn *ct,
+ 		     enum ip_conntrack_info ctinfo,
+ 		     const struct sk_buff *skb)
  {
- 	if (do_acct)
- 		nf_ct_acct_update(ct, ctinfo, skb->len);
+ 	nf_ct_acct_update(ct, ctinfo, skb->len);
  
- 	if (del_timer(&ct->timeout)) {
- 		ct->timeout.function((unsigned long)ct);
- 		return true;
- 	}
- 	return false;
+ 	return nf_ct_delete(ct, 0, 0);
  }
- EXPORT_SYMBOL_GPL(__nf_ct_kill_acct);
+ EXPORT_SYMBOL_GPL(nf_ct_kill_acct);
  
  #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
  
@@@ -1505,11 -1571,8 +1571,8 @@@ void nf_ct_iterate_cleanup(struct net *
  
  	while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
  		/* Time to push up daises... */
- 		if (del_timer(&ct->timeout))
- 			nf_ct_delete(ct, portid, report);
- 
- 		/* ... else the timer will get him soon. */
  
+ 		nf_ct_delete(ct, portid, report);
  		nf_ct_put(ct);
  		cond_resched();
  	}
@@@ -1545,6 -1608,7 +1608,7 @@@ static int untrack_refs(void
  
  void nf_conntrack_cleanup_start(void)
  {
+ 	conntrack_gc_work.exiting = true;
  	RCU_INIT_POINTER(ip_ct_attach, NULL);
  }
  
@@@ -1554,6 -1618,7 +1618,7 @@@ void nf_conntrack_cleanup_end(void
  	while (untrack_refs() > 0)
  		schedule();
  
+ 	cancel_delayed_work_sync(&conntrack_gc_work.dwork);
  	nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size);
  
  	nf_conntrack_proto_fini();
@@@ -1828,6 -1893,10 +1893,10 @@@ int nf_conntrack_init_start(void
  	}
  	/*  - and look it like as a confirmed connection */
  	nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
+ 
+ 	conntrack_gc_work_init(&conntrack_gc_work);
+ 	schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
+ 
  	return 0;
  
  err_proto:
diff --combined net/netfilter/nf_nat_core.c
index ecee105,81ae41f..bbb8f3d
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@@ -441,8 -441,7 +441,8 @@@ nf_nat_setup_info(struct nf_conn *ct
  			ct->status |= IPS_DST_NAT;
  
  		if (nfct_help(ct))
 -			nfct_seqadj_ext_add(ct);
 +			if (!nfct_seqadj_ext_add(ct))
 +				return NF_DROP;
  	}
  
  	if (maniptype == NF_NAT_MANIP_SRC) {
@@@ -566,16 -565,10 +566,10 @@@ static int nf_nat_proto_clean(struct nf
  	 * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
  	 * will delete entry from already-freed table.
  	 */
- 	if (!del_timer(&ct->timeout))
- 		return 1;
- 
  	ct->status &= ~IPS_NAT_DONE_MASK;
- 
  	rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
  			       nf_nat_bysource_params);
  
- 	add_timer(&ct->timeout);
- 
  	/* don't delete conntrack.  Although that would make things a lot
  	 * simpler, we'd end up flushing all conntracks on nat rmmod.
  	 */
@@@ -808,7 -801,7 +802,7 @@@ nfnetlink_parse_nat_setup(struct nf_con
  	if (err < 0)
  		return err;
  
 -	return nf_nat_setup_info(ct, &range, manip);
 +	return nf_nat_setup_info(ct, &range, manip) == NF_DROP ? -ENOMEM : 0;
  }
  #else
  static int
diff --combined net/sctp/input.c
index 1555fb8,a1d8506..a2ea1d1
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@@ -605,7 -605,7 +605,7 @@@ void sctp_v4_err(struct sk_buff *skb, _
  		/* PMTU discovery (RFC1191) */
  		if (ICMP_FRAG_NEEDED == code) {
  			sctp_icmp_frag_needed(sk, asoc, transport,
- 					      WORD_TRUNC(info));
+ 					      SCTP_TRUNC4(info));
  			goto out_unlock;
  		} else {
  			if (ICMP_PROT_UNREACH == code) {
@@@ -673,7 -673,7 +673,7 @@@ static int sctp_rcv_ootb(struct sk_buf
  		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
  			break;
  
- 		ch_end = offset + WORD_ROUND(ntohs(ch->length));
+ 		ch_end = offset + SCTP_PAD4(ntohs(ch->length));
  		if (ch_end > skb->len)
  			break;
  
@@@ -796,34 -796,27 +796,34 @@@ struct sctp_hash_cmp_arg 
  static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg,
  				const void *ptr)
  {
 +	struct sctp_transport *t = (struct sctp_transport *)ptr;
  	const struct sctp_hash_cmp_arg *x = arg->key;
 -	const struct sctp_transport *t = ptr;
 -	struct sctp_association *asoc = t->asoc;
 -	const struct net *net = x->net;
 +	struct sctp_association *asoc;
 +	int err = 1;
  
  	if (!sctp_cmp_addr_exact(&t->ipaddr, x->paddr))
 -		return 1;
 -	if (!net_eq(sock_net(asoc->base.sk), net))
 -		return 1;
 +		return err;
 +	if (!sctp_transport_hold(t))
 +		return err;
 +
 +	asoc = t->asoc;
 +	if (!net_eq(sock_net(asoc->base.sk), x->net))
 +		goto out;
  	if (x->ep) {
  		if (x->ep != asoc->ep)
 -			return 1;
 +			goto out;
  	} else {
  		if (x->laddr->v4.sin_port != htons(asoc->base.bind_addr.port))
 -			return 1;
 +			goto out;
  		if (!sctp_bind_addr_match(&asoc->base.bind_addr,
  					  x->laddr, sctp_sk(asoc->base.sk)))
 -			return 1;
 +			goto out;
  	}
  
 -	return 0;
 +	err = 0;
 +out:
 +	sctp_transport_put(t);
 +	return err;
  }
  
  static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed)
@@@ -1128,7 -1121,7 +1128,7 @@@ static struct sctp_association *__sctp_
  		if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t))
  			break;
  
- 		ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length));
+ 		ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length));
  		if (ch_end > skb_tail_pointer(skb))
  			break;
  
@@@ -1197,7 -1190,7 +1197,7 @@@ static struct sctp_association *__sctp_
  	 * that the chunk length doesn't cause overflow.  Otherwise, we'll
  	 * walk off the end.
  	 */
- 	if (WORD_ROUND(ntohs(ch->length)) > skb->len)
+ 	if (SCTP_PAD4(ntohs(ch->length)) > skb->len)
  		return NULL;
  
  	/* If this is INIT/INIT-ACK look inside the chunk too. */
diff --combined net/wireless/nl80211.c
index 4809f4d,887c4c1..fd111e2
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@@ -848,13 -848,21 +848,21 @@@ nl80211_parse_connkeys(struct cfg80211_
  	struct nlattr *key;
  	struct cfg80211_cached_keys *result;
  	int rem, err, def = 0;
+ 	bool have_key = false;
+ 
+ 	nla_for_each_nested(key, keys, rem) {
+ 		have_key = true;
+ 		break;
+ 	}
+ 
+ 	if (!have_key)
+ 		return NULL;
  
  	result = kzalloc(sizeof(*result), GFP_KERNEL);
  	if (!result)
  		return ERR_PTR(-ENOMEM);
  
  	result->def = -1;
- 	result->defmgmt = -1;
  
  	nla_for_each_nested(key, keys, rem) {
  		memset(&parse, 0, sizeof(parse));
@@@ -866,7 -874,7 +874,7 @@@
  		err = -EINVAL;
  		if (!parse.p.key)
  			goto error;
- 		if (parse.idx < 0 || parse.idx > 4)
+ 		if (parse.idx < 0 || parse.idx > 3)
  			goto error;
  		if (parse.def) {
  			if (def)
@@@ -881,16 -889,24 +889,24 @@@
  						     parse.idx, false, NULL);
  		if (err)
  			goto error;
+ 		if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 &&
+ 		    parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) {
+ 			err = -EINVAL;
+ 			goto error;
+ 		}
  		result->params[parse.idx].cipher = parse.p.cipher;
  		result->params[parse.idx].key_len = parse.p.key_len;
  		result->params[parse.idx].key = result->data[parse.idx];
  		memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len);
  
- 		if (parse.p.cipher == WLAN_CIPHER_SUITE_WEP40 ||
- 		    parse.p.cipher == WLAN_CIPHER_SUITE_WEP104) {
- 			if (no_ht)
- 				*no_ht = true;
- 		}
+ 		/* must be WEP key if we got here */
+ 		if (no_ht)
+ 			*no_ht = true;
+ 	}
+ 
+ 	if (result->def < 0) {
+ 		err = -EINVAL;
+ 		goto error;
  	}
  
  	return result;
@@@ -2525,10 -2541,35 +2541,35 @@@ static int nl80211_dump_interface(struc
  	int if_idx = 0;
  	int wp_start = cb->args[0];
  	int if_start = cb->args[1];
+ 	int filter_wiphy = -1;
  	struct cfg80211_registered_device *rdev;
  	struct wireless_dev *wdev;
  
  	rtnl_lock();
+ 	if (!cb->args[2]) {
+ 		struct nl80211_dump_wiphy_state state = {
+ 			.filter_wiphy = -1,
+ 		};
+ 		int ret;
+ 
+ 		ret = nl80211_dump_wiphy_parse(skb, cb, &state);
+ 		if (ret)
+ 			return ret;
+ 
+ 		filter_wiphy = state.filter_wiphy;
+ 
+ 		/*
+ 		 * if filtering, set cb->args[2] to +1 since 0 is the default
+ 		 * value needed to determine that parsing is necessary.
+ 		 */
+ 		if (filter_wiphy >= 0)
+ 			cb->args[2] = filter_wiphy + 1;
+ 		else
+ 			cb->args[2] = -1;
+ 	} else if (cb->args[2] > 0) {
+ 		filter_wiphy = cb->args[2] - 1;
+ 	}
+ 
  	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
  		if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk)))
  			continue;
@@@ -2536,6 -2577,10 +2577,10 @@@
  			wp_idx++;
  			continue;
  		}
+ 
+ 		if (filter_wiphy >= 0 && filter_wiphy != rdev->wiphy_idx)
+ 			continue;
+ 
  		if_idx = 0;
  
  		list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) {
@@@ -2751,7 -2796,7 +2796,7 @@@ static int nl80211_new_interface(struc
  	struct cfg80211_registered_device *rdev = info->user_ptr[0];
  	struct vif_params params;
  	struct wireless_dev *wdev;
- 	struct sk_buff *msg, *event;
+ 	struct sk_buff *msg;
  	int err;
  	enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED;
  	u32 flags;
@@@ -2855,20 -2900,15 +2900,15 @@@
  		return -ENOBUFS;
  	}
  
- 	event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- 	if (event) {
- 		if (nl80211_send_iface(event, 0, 0, 0,
- 				       rdev, wdev, false) < 0) {
- 			nlmsg_free(event);
- 			goto out;
- 		}
- 
- 		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
- 					event, 0, NL80211_MCGRP_CONFIG,
- 					GFP_KERNEL);
- 	}
+ 	/*
+ 	 * For wdevs which have no associated netdev object (e.g. of type
+ 	 * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here.
+ 	 * For all other types, the event will be generated from the
+ 	 * netdev notifier
+ 	 */
+ 	if (!wdev->netdev)
+ 		nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE);
  
- out:
  	return genlmsg_reply(msg, info);
  }
  
@@@ -2876,18 -2916,10 +2916,10 @@@ static int nl80211_del_interface(struc
  {
  	struct cfg80211_registered_device *rdev = info->user_ptr[0];
  	struct wireless_dev *wdev = info->user_ptr[1];
- 	struct sk_buff *msg;
- 	int status;
  
  	if (!rdev->ops->del_virtual_intf)
  		return -EOPNOTSUPP;
  
- 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- 	if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) {
- 		nlmsg_free(msg);
- 		msg = NULL;
- 	}
- 
  	/*
  	 * If we remove a wireless device without a netdev then clear
  	 * user_ptr[1] so that nl80211_post_doit won't dereference it
@@@ -2898,15 -2930,7 +2930,7 @@@
  	if (!wdev->netdev)
  		info->user_ptr[1] = NULL;
  
- 	status = rdev_del_virtual_intf(rdev, wdev);
- 	if (status >= 0 && msg)
- 		genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
- 					msg, 0, NL80211_MCGRP_CONFIG,
- 					GFP_KERNEL);
- 	else
- 		nlmsg_free(msg);
- 
- 	return status;
+ 	return rdev_del_virtual_intf(rdev, wdev);
  }
  
  static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
@@@ -5374,6 -5398,18 +5398,18 @@@ static int nl80211_check_s32(const stru
  	return 0;
  }
  
+ static int nl80211_check_power_mode(const struct nlattr *nla,
+ 				    enum nl80211_mesh_power_mode min,
+ 				    enum nl80211_mesh_power_mode max,
+ 				    enum nl80211_mesh_power_mode *out)
+ {
+ 	u32 val = nla_get_u32(nla);
+ 	if (val < min || val > max)
+ 		return -EINVAL;
+ 	*out = val;
+ 	return 0;
+ }
+ 
  static int nl80211_parse_mesh_config(struct genl_info *info,
  				     struct mesh_config *cfg,
  				     u32 *mask_out)
@@@ -5518,7 -5554,7 +5554,7 @@@ do {									    
  				  NL80211_MESH_POWER_ACTIVE,
  				  NL80211_MESH_POWER_MAX,
  				  mask, NL80211_MESHCONF_POWER_MODE,
- 				  nl80211_check_u32);
+ 				  nl80211_check_power_mode);
  	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
  				  0, 65535, mask,
  				  NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16);
@@@ -6978,7 -7014,7 +7014,7 @@@ static int nl80211_channel_switch(struc
  
  		params.n_counter_offsets_presp = len / sizeof(u16);
  		if (rdev->wiphy.max_num_csa_counters &&
 -		    (params.n_counter_offsets_beacon >
 +		    (params.n_counter_offsets_presp >
  		     rdev->wiphy.max_num_csa_counters))
  			return -EINVAL;
  
@@@ -7368,7 -7404,7 +7404,7 @@@ static int nl80211_authenticate(struct 
  		    (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 ||
  		     key.p.key_len != WLAN_KEY_LEN_WEP104))
  			return -EINVAL;
- 		if (key.idx > 4)
+ 		if (key.idx > 3)
  			return -EINVAL;
  	} else {
  		key.p.key_len = 0;
@@@ -7773,12 -7809,13 +7809,13 @@@ static int nl80211_join_ibss(struct sk_
  
  	ibss.beacon_interval = 100;
  
- 	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
+ 	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL])
  		ibss.beacon_interval =
  			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- 		if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000)
- 			return -EINVAL;
- 	}
+ 
+ 	err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval);
+ 	if (err)
+ 		return err;
  
  	if (!rdev->ops->join_ibss)
  		return -EOPNOTSUPP;
@@@ -7985,6 -8022,8 +8022,8 @@@ __cfg80211_alloc_vendor_skb(struct cfg8
  	}
  
  	data = nla_nest_start(skb, attr);
+ 	if (!data)
+ 		goto nla_put_failure;
  
  	((void **)skb->cb)[0] = rdev;
  	((void **)skb->cb)[1] = hdr;
@@@ -9252,9 -9291,10 +9291,10 @@@ static int nl80211_join_mesh(struct sk_
  	if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) {
  		setup.beacon_interval =
  			nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]);
- 		if (setup.beacon_interval < 10 ||
- 		    setup.beacon_interval > 10000)
- 			return -EINVAL;
+ 
+ 		err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval);
+ 		if (err)
+ 			return err;
  	}
  
  	if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) {
@@@ -9413,18 -9453,27 +9453,27 @@@ static int nl80211_send_wowlan_nd(struc
  	if (!freqs)
  		return -ENOBUFS;
  
- 	for (i = 0; i < req->n_channels; i++)
- 		nla_put_u32(msg, i, req->channels[i]->center_freq);
+ 	for (i = 0; i < req->n_channels; i++) {
+ 		if (nla_put_u32(msg, i, req->channels[i]->center_freq))
+ 			return -ENOBUFS;
+ 	}
  
  	nla_nest_end(msg, freqs);
  
  	if (req->n_match_sets) {
  		matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH);
+ 		if (!matches)
+ 			return -ENOBUFS;
+ 
  		for (i = 0; i < req->n_match_sets; i++) {
  			match = nla_nest_start(msg, i);
- 			nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
- 				req->match_sets[i].ssid.ssid_len,
- 				req->match_sets[i].ssid.ssid);
+ 			if (!match)
+ 				return -ENOBUFS;
+ 
+ 			if (nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
+ 				    req->match_sets[i].ssid.ssid_len,
+ 				    req->match_sets[i].ssid.ssid))
+ 				return -ENOBUFS;
  			nla_nest_end(msg, match);
  		}
  		nla_nest_end(msg, matches);
@@@ -9436,6 -9485,9 +9485,9 @@@
  
  	for (i = 0; i < req->n_scan_plans; i++) {
  		scan_plan = nla_nest_start(msg, i + 1);
+ 		if (!scan_plan)
+ 			return -ENOBUFS;
+ 
  		if (!scan_plan ||
  		    nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL,
  				req->scan_plans[i].interval) ||
@@@ -11847,6 -11899,29 +11899,29 @@@ void nl80211_notify_wiphy(struct cfg802
  				NL80211_MCGRP_CONFIG, GFP_KERNEL);
  }
  
+ void nl80211_notify_iface(struct cfg80211_registered_device *rdev,
+ 				struct wireless_dev *wdev,
+ 				enum nl80211_commands cmd)
+ {
+ 	struct sk_buff *msg;
+ 
+ 	WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE &&
+ 		cmd != NL80211_CMD_DEL_INTERFACE);
+ 
+ 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ 	if (!msg)
+ 		return;
+ 
+ 	if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev,
+ 			       cmd == NL80211_CMD_DEL_INTERFACE) < 0) {
+ 		nlmsg_free(msg);
+ 		return;
+ 	}
+ 
+ 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
+ 				NL80211_MCGRP_CONFIG, GFP_KERNEL);
+ }
+ 
  static int nl80211_add_scan_req(struct sk_buff *msg,
  				struct cfg80211_registered_device *rdev)
  {
diff --combined net/xfrm/xfrm_state.c
index a30f898d,ba8bf51..5685da0
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@@ -28,6 -28,11 +28,11 @@@
  
  #include "xfrm_hash.h"
  
+ #define xfrm_state_deref_prot(table, net) \
+ 	rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
+ 
+ static void xfrm_state_gc_task(struct work_struct *work);
+ 
  /* Each xfrm_state may be linked to two tables:
  
     1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
@@@ -36,6 -41,15 +41,15 @@@
   */
  
  static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+ static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
+ 
+ static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
+ static HLIST_HEAD(xfrm_state_gc_list);
+ 
+ static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
+ {
+ 	return atomic_inc_not_zero(&x->refcnt);
+ }
  
  static inline unsigned int xfrm_dst_hash(struct net *net,
  					 const xfrm_address_t *daddr,
@@@ -76,18 -90,18 +90,18 @@@ static void xfrm_hash_transfer(struct h
  		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
  				    x->props.reqid, x->props.family,
  				    nhashmask);
- 		hlist_add_head(&x->bydst, ndsttable+h);
+ 		hlist_add_head_rcu(&x->bydst, ndsttable + h);
  
  		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
  				    x->props.family,
  				    nhashmask);
- 		hlist_add_head(&x->bysrc, nsrctable+h);
+ 		hlist_add_head_rcu(&x->bysrc, nsrctable + h);
  
  		if (x->id.spi) {
  			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
  					    x->id.proto, x->props.family,
  					    nhashmask);
- 			hlist_add_head(&x->byspi, nspitable+h);
+ 			hlist_add_head_rcu(&x->byspi, nspitable + h);
  		}
  	}
  }
@@@ -122,25 -136,29 +136,29 @@@ static void xfrm_hash_resize(struct wor
  	}
  
  	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ 	write_seqcount_begin(&xfrm_state_hash_generation);
  
  	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
+ 	odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
  	for (i = net->xfrm.state_hmask; i >= 0; i--)
- 		xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi,
- 				   nhashmask);
+ 		xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
  
- 	odst = net->xfrm.state_bydst;
- 	osrc = net->xfrm.state_bysrc;
- 	ospi = net->xfrm.state_byspi;
+ 	osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
+ 	ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
  	ohashmask = net->xfrm.state_hmask;
  
- 	net->xfrm.state_bydst = ndst;
- 	net->xfrm.state_bysrc = nsrc;
- 	net->xfrm.state_byspi = nspi;
+ 	rcu_assign_pointer(net->xfrm.state_bydst, ndst);
+ 	rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
+ 	rcu_assign_pointer(net->xfrm.state_byspi, nspi);
  	net->xfrm.state_hmask = nhashmask;
  
+ 	write_seqcount_end(&xfrm_state_hash_generation);
  	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
  
  	osize = (ohashmask + 1) * sizeof(struct hlist_head);
+ 
+ 	synchronize_rcu();
+ 
  	xfrm_hash_free(odst, osize);
  	xfrm_hash_free(osrc, osize);
  	xfrm_hash_free(ospi, osize);
@@@ -332,7 -350,6 +350,7 @@@ static void xfrm_state_gc_destroy(struc
  {
  	tasklet_hrtimer_cancel(&x->mtimer);
  	del_timer_sync(&x->rtimer);
 +	kfree(x->aead);
  	kfree(x->aalg);
  	kfree(x->ealg);
  	kfree(x->calg);
@@@ -356,15 -373,16 +374,16 @@@
  
  static void xfrm_state_gc_task(struct work_struct *work)
  {
- 	struct net *net = container_of(work, struct net, xfrm.state_gc_work);
  	struct xfrm_state *x;
  	struct hlist_node *tmp;
  	struct hlist_head gc_list;
  
  	spin_lock_bh(&xfrm_state_gc_lock);
- 	hlist_move_list(&net->xfrm.state_gc_list, &gc_list);
+ 	hlist_move_list(&xfrm_state_gc_list, &gc_list);
  	spin_unlock_bh(&xfrm_state_gc_lock);
  
+ 	synchronize_rcu();
+ 
  	hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
  		xfrm_state_gc_destroy(x);
  }
@@@ -501,14 -519,12 +520,12 @@@ EXPORT_SYMBOL(xfrm_state_alloc)
  
  void __xfrm_state_destroy(struct xfrm_state *x)
  {
- 	struct net *net = xs_net(x);
- 
  	WARN_ON(x->km.state != XFRM_STATE_DEAD);
  
  	spin_lock_bh(&xfrm_state_gc_lock);
- 	hlist_add_head(&x->gclist, &net->xfrm.state_gc_list);
+ 	hlist_add_head(&x->gclist, &xfrm_state_gc_list);
  	spin_unlock_bh(&xfrm_state_gc_lock);
- 	schedule_work(&net->xfrm.state_gc_work);
+ 	schedule_work(&xfrm_state_gc_work);
  }
  EXPORT_SYMBOL(__xfrm_state_destroy);
  
@@@ -521,10 -537,10 +538,10 @@@ int __xfrm_state_delete(struct xfrm_sta
  		x->km.state = XFRM_STATE_DEAD;
  		spin_lock(&net->xfrm.xfrm_state_lock);
  		list_del(&x->km.all);
- 		hlist_del(&x->bydst);
- 		hlist_del(&x->bysrc);
+ 		hlist_del_rcu(&x->bydst);
+ 		hlist_del_rcu(&x->bysrc);
  		if (x->id.spi)
- 			hlist_del(&x->byspi);
+ 			hlist_del_rcu(&x->byspi);
  		net->xfrm.state_num--;
  		spin_unlock(&net->xfrm.xfrm_state_lock);
  
@@@ -660,7 -676,7 +677,7 @@@ static struct xfrm_state *__xfrm_state_
  	unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
  	struct xfrm_state *x;
  
- 	hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) {
+ 	hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
  		if (x->props.family != family ||
  		    x->id.spi       != spi ||
  		    x->id.proto     != proto ||
@@@ -669,7 -685,8 +686,8 @@@
  
  		if ((mark & x->mark.m) != x->mark.v)
  			continue;
- 		xfrm_state_hold(x);
+ 		if (!xfrm_state_hold_rcu(x))
+ 			continue;
  		return x;
  	}
  
@@@ -684,7 -701,7 +702,7 @@@ static struct xfrm_state *__xfrm_state_
  	unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
  	struct xfrm_state *x;
  
- 	hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
+ 	hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
  		if (x->props.family != family ||
  		    x->id.proto     != proto ||
  		    !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
@@@ -693,7 -710,8 +711,8 @@@
  
  		if ((mark & x->mark.m) != x->mark.v)
  			continue;
- 		xfrm_state_hold(x);
+ 		if (!xfrm_state_hold_rcu(x))
+ 			continue;
  		return x;
  	}
  
@@@ -776,13 -794,16 +795,16 @@@ xfrm_state_find(const xfrm_address_t *d
  	struct xfrm_state *best = NULL;
  	u32 mark = pol->mark.v & pol->mark.m;
  	unsigned short encap_family = tmpl->encap_family;
+ 	unsigned int sequence;
  	struct km_event c;
  
  	to_put = NULL;
  
- 	spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ 	sequence = read_seqcount_begin(&xfrm_state_hash_generation);
+ 
+ 	rcu_read_lock();
  	h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
- 	hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
+ 	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
  		if (x->props.family == encap_family &&
  		    x->props.reqid == tmpl->reqid &&
  		    (mark & x->mark.m) == x->mark.v &&
@@@ -798,7 -819,7 +820,7 @@@
  		goto found;
  
  	h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
- 	hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) {
+ 	hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
  		if (x->props.family == encap_family &&
  		    x->props.reqid == tmpl->reqid &&
  		    (mark & x->mark.m) == x->mark.v &&
@@@ -851,19 -872,21 +873,21 @@@ found
  		}
  
  		if (km_query(x, tmpl, pol) == 0) {
+ 			spin_lock_bh(&net->xfrm.xfrm_state_lock);
  			x->km.state = XFRM_STATE_ACQ;
  			list_add(&x->km.all, &net->xfrm.state_all);
- 			hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ 			hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
  			h = xfrm_src_hash(net, daddr, saddr, encap_family);
- 			hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ 			hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
  			if (x->id.spi) {
  				h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
- 				hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ 				hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
  			}
  			x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
  			tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
  			net->xfrm.state_num++;
  			xfrm_hash_grow_check(net, x->bydst.next != NULL);
+ 			spin_unlock_bh(&net->xfrm.xfrm_state_lock);
  		} else {
  			x->km.state = XFRM_STATE_DEAD;
  			to_put = x;
@@@ -872,13 -895,26 +896,26 @@@
  		}
  	}
  out:
- 	if (x)
- 		xfrm_state_hold(x);
- 	else
+ 	if (x) {
+ 		if (!xfrm_state_hold_rcu(x)) {
+ 			*err = -EAGAIN;
+ 			x = NULL;
+ 		}
+ 	} else {
  		*err = acquire_in_progress ? -EAGAIN : error;
- 	spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ 	}
+ 	rcu_read_unlock();
  	if (to_put)
  		xfrm_state_put(to_put);
+ 
+ 	if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
+ 		*err = -EAGAIN;
+ 		if (x) {
+ 			xfrm_state_put(x);
+ 			x = NULL;
+ 		}
+ 	}
+ 
  	return x;
  }
  
@@@ -946,16 -982,16 +983,16 @@@ static void __xfrm_state_insert(struct 
  
  	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
  			  x->props.reqid, x->props.family);
- 	hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ 	hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
  
  	h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
- 	hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ 	hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
  
  	if (x->id.spi) {
  		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
  				  x->props.family);
  
- 		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ 		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
  	}
  
  	tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
@@@ -1064,9 -1100,9 +1101,9 @@@ static struct xfrm_state *__find_acq_co
  		xfrm_state_hold(x);
  		tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
  		list_add(&x->km.all, &net->xfrm.state_all);
- 		hlist_add_head(&x->bydst, net->xfrm.state_bydst+h);
+ 		hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
  		h = xfrm_src_hash(net, daddr, saddr, family);
- 		hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h);
+ 		hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
  
  		net->xfrm.state_num++;
  
@@@ -1582,7 -1618,7 +1619,7 @@@ int xfrm_alloc_spi(struct xfrm_state *x
  	if (x->id.spi) {
  		spin_lock_bh(&net->xfrm.xfrm_state_lock);
  		h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
- 		hlist_add_head(&x->byspi, net->xfrm.state_byspi+h);
+ 		hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
  		spin_unlock_bh(&net->xfrm.xfrm_state_lock);
  
  		err = 0;
@@@ -2100,8 -2136,6 +2137,6 @@@ int __net_init xfrm_state_init(struct n
  
  	net->xfrm.state_num = 0;
  	INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
- 	INIT_HLIST_HEAD(&net->xfrm.state_gc_list);
- 	INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task);
  	spin_lock_init(&net->xfrm.xfrm_state_lock);
  	return 0;
  
@@@ -2119,7 -2153,7 +2154,7 @@@ void xfrm_state_fini(struct net *net
  
  	flush_work(&net->xfrm.state_hash_work);
  	xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
- 	flush_work(&net->xfrm.state_gc_work);
+ 	flush_work(&xfrm_state_gc_work);
  
  	WARN_ON(!list_empty(&net->xfrm.state_all));
  

-- 
LinuxNextTracking


More information about the linux-merge mailing list