[linux-next] LinuxNextTracking branch, master, updated. next-20160223

23 Feb 2016

The following commit has been merged in the master branch:
commit 32fb1f1fccc4eac30d5cf8d573a0c89c05f3f684
Merge: 67dccb15f7f35215d6f93bd3d625ae283b6c5f51 b1d95ae5c5bd3deba84d00c4f83d7d0836b5936f
Author: Stephen Rothwell sfr@canb.auug.org.au
Date:   Tue Feb 23 11:01:51 2016 +1100
Merge remote-tracking branch 'net-next/master'
diff --combined MAINTAINERS
index b2169ee,355e1c8..12cbb39

--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -223,7 -223,9 +223,7 @@@ F:	drivers/scsi/aacraid
ABI/API
  L:	linux-api@vger.kernel.org
 -F:	Documentation/ABI/
  F:	include/linux/syscalls.h
 -F:	include/uapi/
  F:	kernel/sys_ni.c
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
@@@ -673,25 -675,24 +673,25 @@@ F:	drivers/gpu/drm/radeon/radeon_kfd.
  F:	drivers/gpu/drm/radeon/radeon_kfd.h
  F:	include/uapi/linux/kfd_ioctl.h
+AMD SEATTLE DEVICE TREE SUPPORT
 +M:	Brijesh Singh brijeshkumar.singh@amd.com
 +M:	Suravee Suthikulpanit suravee.suthikulpanit@amd.com
 +M:	Tom Lendacky thomas.lendacky@amd.com
 +S:	Supported
 +F:	arch/arm64/boot/dts/amd/
 +
  AMD XGBE DRIVER
  M:	Tom Lendacky thomas.lendacky@amd.com
  L:	netdev@vger.kernel.org
  S:	Supported
  F:	drivers/net/ethernet/amd/xgbe/
 +F:	arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi
AMS (Apple Motion Sensor) DRIVER
  M:	Michael Hanselmann linux-kernel@hansmi.ch
  S:	Supported
  F:	drivers/macintosh/ams/
-AMSO1100 RNIC DRIVER
 -M:	Tom Tucker tom@opengridcomputing.com
 -M:	Steve Wise swise@opengridcomputing.com
 -L:	linux-rdma@vger.kernel.org
 -S:	Maintained
 -F:	drivers/infiniband/hw/amso1100/
 -
  ANALOG DEVICES INC AD9389B DRIVER
  M:	Hans Verkuil hans.verkuil@cisco.com
  L:	linux-media@vger.kernel.org
@@@ -966,8 -967,6 +966,8 @@@ M:	Rob Herring <robh@kernel.org
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  F:	arch/arm/mach-highbank/
 +F:	arch/arm/boot/dts/highbank.dts
 +F:	arch/arm/boot/dts/ecx-*.dts*
ARM/CAVIUM NETWORKS CNS3XXX MACHINE SUPPORT
  M:	Krzysztof Halasa khalasa@piap.pl
@@@ -1043,7 -1042,6 +1043,7 @@@ M:	Barry Song <baohua@kernel.org
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
  S:	Maintained
 +F:	arch/arm/boot/dts/prima2*
  F:	arch/arm/mach-prima2/
  F:	drivers/clk/sirf/
  F:	drivers/clocksource/timer-prima2.c
@@@ -1145,10 -1143,6 +1145,10 @@@ W:	http://www.hisilicon.co
  S:	Supported
  T:	git git://github.com/hisilicon/linux-hisi.git
  F:	arch/arm/mach-hisi/
 +F:	arch/arm/boot/dts/hi3*
 +F:	arch/arm/boot/dts/hip*
 +F:	arch/arm/boot/dts/hisi*
 +F:	arch/arm64/boot/dts/hisilicon/
ARM/HP JORNADA 7XX MACHINE SUPPORT
  M:	Kristoffer Ericson kristoffer.ericson@gmail.com
@@@ -1225,7 -1219,6 +1225,7 @@@ M:	Santosh Shilimkar <ssantosh@kernel.o
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  F:	arch/arm/mach-keystone/
 +F:	arch/arm/boot/dts/k2*
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/ssantosh/linux-keystone.git
ARM/TEXAS INSTRUMENT KEYSTONE CLOCK FRAMEWORK
@@@ -1286,7 -1279,6 +1286,7 @@@ F:	arch/arm/mach-mvebu
  F:	drivers/rtc/rtc-armada38x.c
  F:	arch/arm/boot/dts/armada*
  F:	arch/arm/boot/dts/kirkwood*
 +F:	arch/arm64/boot/dts/marvell/armada*
ARM/Marvell Berlin SoC support
@@@ -1295,7 -1287,6 +1295,7 @@@ L:	linux-arm-kernel@lists.infradead.or
  S:	Maintained
  F:	arch/arm/mach-berlin/
  F:	arch/arm/boot/dts/berlin*
 +F:	arch/arm64/boot/dts/marvell/berlin*
ARM/Marvell Dove/MV78xx0/Orion SOC support
@@@ -1434,10 -1425,7 +1434,10 @@@ S:	Maintaine
  F:	arch/arm/boot/dts/qcom-*.dts
  F:	arch/arm/boot/dts/qcom-*.dtsi
  F:	arch/arm/mach-qcom/
 +F:	arch/arm64/boot/dts/qcom/*
 +F:	drivers/i2c/busses/i2c-qup.c
  F:	drivers/soc/qcom/
 +F:	drivers/spi/spi-qup.c
  F:	drivers/tty/serial/msm_serial.h
  F:	drivers/tty/serial/msm_serial.c
  F:	drivers/*/pm8???-*
@@@ -1453,8 -1441,8 +1453,8 @@@ S:	Maintaine
  ARM/RENESAS ARM64 ARCHITECTURE
  M:	Simon Horman horms@verge.net.au
  M:	Magnus Damm magnus.damm@gmail.com
 -L:	linux-sh@vger.kernel.org
 -Q:	http://patchwork.kernel.org/project/linux-sh/list/
 +L:	linux-renesas-soc@vger.kernel.org
 +Q:	http://patchwork.kernel.org/project/linux-renesas-soc/list/
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/horms/renesas.git next
  S:	Supported
  F:	arch/arm64/boot/dts/renesas/
@@@ -1496,8 -1484,6 +1496,8 @@@ L:	linux-arm-kernel@lists.infradead.or
  L:	linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
  S:	Maintained
  F:	arch/arm/boot/dts/s3c*
 +F:	arch/arm/boot/dts/s5p*
 +F:	arch/arm/boot/dts/samsung*
  F:	arch/arm/boot/dts/exynos*
  F:	arch/arm64/boot/dts/exynos/
  F:	arch/arm/plat-samsung/
@@@ -1507,7 -1493,6 +1507,7 @@@ F:	arch/arm/mach-s5p*
  F:	arch/arm/mach-exynos*/
  F:	drivers/*/*s3c2410*
  F:	drivers/*/*/*s3c2410*
 +F:	drivers/soc/samsung/*
  F:	drivers/spi/spi-s3c*
  F:	sound/soc/samsung/*
  F:	Documentation/arm/Samsung/
@@@ -1578,7 -1563,6 +1578,7 @@@ S:	Maintaine
  F:	arch/arm/mach-socfpga/
  F:	arch/arm/boot/dts/socfpga*
  F:	arch/arm/configs/socfpga_defconfig
 +F:	arch/arm64/boot/dts/altera/
  W:	http://www.rocketboards.org
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
@@@ -1732,7 -1716,7 +1732,7 @@@ M:	Lorenzo Pieralisi <lorenzo.pieralisi
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:	Maintained
  F:	arch/arm/boot/dts/vexpress*
 -F:	arch/arm64/boot/dts/arm/vexpress*
 +F:	arch/arm64/boot/dts/arm/
  F:	arch/arm/mach-vexpress/
  F:	*/*/vexpress*
  F:	*/*/*/vexpress*
@@@ -2163,7 -2147,7 +2163,7 @@@ M:	Marek Lindner <mareklindner@neomailb
  M:	Simon Wunderlich sw@simonwunderlich.de
  M:	Antonio Quartulli a@unstable.cc
  L:	b.a.t.m.a.n@lists.open-mesh.org
- W:	http://www.open-mesh.org/
+ W:	https://www.open-mesh.org/
  S:	Maintained
  F:	net/batman-adv/
@@@ -2359,7 -2343,6 +2359,7 @@@ F:	arch/arm/mach-bcm
  F:	arch/arm/boot/dts/bcm113*
  F:	arch/arm/boot/dts/bcm216*
  F:	arch/arm/boot/dts/bcm281*
 +F:	arch/arm64/boot/dts/broadcom/
  F:	arch/arm/configs/bcm_defconfig
  F:	drivers/mmc/host/sdhci-bcm-kona.c
  F:	drivers/clocksource/bcm_kona_timer.c
@@@ -2374,6 -2357,14 +2374,6 @@@ T:	git git://git.kernel.org/pub/scm/lin
  S:	Maintained
  N:	bcm2835
-BROADCOM BCM33XX MIPS ARCHITECTURE
 -M:	Kevin Cernekee cernekee@gmail.com
 -L:	linux-mips@linux-mips.org
 -S:	Maintained
 -F:	arch/mips/bcm3384/*
 -F:	arch/mips/include/asm/mach-bcm3384/*
 -F:	arch/mips/kernel/*bmips*
 -
  BROADCOM BCM47XX MIPS ARCHITECTURE
  M:	Hauke Mehrtens hauke@hauke-m.de
  M:	Rafał Miłecki zajec5@gmail.com
@@@ -2433,6 -2424,7 +2433,7 @@@ F:	include/linux/bcm963xx_nvram.
  F:	include/linux/bcm963xx_tag.h
BROADCOM TG3 GIGABIT ETHERNET DRIVER
+ M:	Siva Reddy Kallam siva.kallam@broadcom.com
  M:	Prashant Sreedharan prashant@broadcom.com
  M:	Michael Chan mchan@broadcom.com
  L:	netdev@vger.kernel.org
@@@ -3454,8 -3446,9 +3455,8 @@@ S:	Maintaine
  F:	drivers/usb/dwc2/
DESIGNWARE USB3 DRD IP DRIVER
 -M:	Felipe Balbi balbi@ti.com
 +M:	Felipe Balbi balbi@kernel.org
  L:	linux-usb@vger.kernel.org
 -L:	linux-omap@vger.kernel.org
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
  S:	Maintained
  F:	drivers/usb/dwc3/
@@@ -4192,6 -4185,13 +4193,6 @@@ W:	http://aeschi.ch.eu.org/efs
  S:	Orphan
  F:	fs/efs/
-EHCA (IBM GX bus InfiniBand adapter) DRIVER
 -M:	Hoang-Nam Nguyen hnguyen@de.ibm.com
 -M:	Christoph Raisch raisch@de.ibm.com
 -L:	linux-rdma@vger.kernel.org
 -S:	Supported
 -F:	drivers/infiniband/hw/ehca/
 -
  EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER
  M:	Thadeu Lima de Souza Cascardo cascardo@linux.vnet.ibm.com
  L:	netdev@vger.kernel.org
@@@ -4233,6 -4233,13 +4234,6 @@@ M:	Maxim Levitsky <maximlevitsky@gmail.
  S:	Maintained
  F:	drivers/media/rc/ene_ir.*
-ENHANCED ERROR HANDLING (EEH)
 -M:	Gavin Shan shangw@linux.vnet.ibm.com
 -L:	linuxppc-dev@lists.ozlabs.org
 -S:	Supported
 -F:	Documentation/powerpc/eeh-pci-error-recovery.txt
 -F:	arch/powerpc/kernel/eeh*.c
 -
  EPSON S1D13XXX FRAMEBUFFER DRIVER
  M:	Kristoffer Ericson kristoffer.ericson@gmail.com
  S:	Maintained
@@@ -5803,6 -5810,12 +5804,6 @@@ M:	Juanjo Ciarlante <jjciarla@raiz.uncu
  S:	Maintained
  F:	net/ipv4/netfilter/ipt_MASQUERADE.c
-IPATH DRIVER
 -M:	Mike Marciniszyn infinipath@intel.com
 -L:	linux-rdma@vger.kernel.org
 -S:	Maintained
 -F:	drivers/staging/rdma/ipath/
 -
  IPMI SUBSYSTEM
  M:	Corey Minyard minyard@acm.org
  L:	openipmi-developer@lists.sourceforge.net (moderated for non-subscribers)
@@@ -6132,7 -6145,7 +6133,7 @@@ F:	include/uapi/linux/sunrpc
KERNEL SELFTEST FRAMEWORK
  M:	Shuah Khan shuahkh@osg.samsung.com
 -L:	linux-api@vger.kernel.org
 +L:	linux-kselftest@vger.kernel.org
  T:	git git://git.kernel.org/pub/scm/shuah/linux-kselftest
  S:	Maintained
  F:	tools/testing/selftests
@@@ -7358,7 -7371,7 +7359,7 @@@ F:	drivers/tty/isicom.
  F:	include/linux/isicom.h
MUSB MULTIPOINT HIGH SPEED DUAL-ROLE CONTROLLER
 -M:	Felipe Balbi balbi@ti.com
 +M:	Bin Liu b-liu@ti.com
  L:	linux-usb@vger.kernel.org
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
  S:	Maintained
@@@ -7927,9 -7940,11 +7928,9 @@@ F:	drivers/media/platform/omap3isp
  F:	drivers/staging/media/omap4iss/
OMAP USB SUPPORT
 -M:	Felipe Balbi balbi@ti.com
  L:	linux-usb@vger.kernel.org
  L:	linux-omap@vger.kernel.org
 -T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 -S:	Maintained
 +S:	Orphan
  F:	drivers/usb/*/*omap*
  F:	arch/arm/*omap*/usb*
@@@ -8113,14 -8128,6 +8114,14 @@@ S:	Supporte
  F:	fs/overlayfs/
  F:	Documentation/filesystems/overlayfs.txt
+ORANGEFS FILESYSTEM
 +M:	Mike Marshall hubcap@omnibond.com
 +L:	pvfs2-developers@beowulf-underground.org
 +T:	git git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux.git
 +S:	Supported
 +F:	fs/orangefs/
 +F:	Documentation/filesystems/orangefs.txt
 +
  P54 WIRELESS DRIVER
  M:	Christian Lamparter chunkeey@googlemail.com
  L:	linux-wireless@vger.kernel.org
@@@ -8254,15 -8261,6 +8255,15 @@@ L:	linux-pci@vger.kernel.or
  S:	Supported
  F:	Documentation/PCI/pci-error-recovery.txt
+PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
 +M:	Russell Currey ruscur@russell.cc
 +L:	linuxppc-dev@lists.ozlabs.org
 +S:	Supported
 +F:	Documentation/powerpc/eeh-pci-error-recovery.txt
 +F:	arch/powerpc/kernel/eeh*.c
 +F:	arch/powerpc/platforms/*/eeh*.c
 +F:	arch/powerpc/include/*/eeh*.h
 +
  PCI SUBSYSTEM
  M:	Bjorn Helgaas bhelgaas@google.com
  L:	linux-pci@vger.kernel.org
@@@ -8821,7 -8819,6 +8822,7 @@@ L:	linux-arm-kernel@lists.infradead.or
  T:	git git://github.com/hzhuang1/linux.git
  T:	git git://github.com/rjarzmik/linux.git
  S:	Maintained
 +F:	arch/arm/boot/dts/pxa*
  F:	arch/arm/mach-pxa/
  F:	drivers/dma/pxa*
  F:	drivers/pcmcia/pxa2xx*
@@@ -8851,7 -8848,6 +8852,7 @@@ L:	linux-arm-kernel@lists.infradead.or
  T:	git git://github.com/hzhuang1/linux.git
  T:	git git://git.linaro.org/people/ycmiao/pxa-linux.git
  S:	Maintained
 +F:	arch/arm/boot/dts/mmp*
  F:	arch/arm/mach-mmp/
PXA MMCI DRIVER
@@@ -9798,11 -9794,10 +9799,11 @@@ S:	Supporte
  F:	drivers/scsi/be2iscsi/
Emulex 10Gbps NIC BE2, BE3-R, Lancer, Skyhawk-R DRIVER
 -M:	Sathya Perla sathya.perla@avagotech.com
 -M:	Ajit Khaparde ajit.khaparde@avagotech.com
 -M:	Padmanabh Ratnakar padmanabh.ratnakar@avagotech.com
 -M:	Sriharsha Basavapatna sriharsha.basavapatna@avagotech.com
 +M:	Sathya Perla sathya.perla@broadcom.com
 +M:	Ajit Khaparde ajit.khaparde@broadcom.com
 +M:	Padmanabh Ratnakar padmanabh.ratnakar@broadcom.com
 +M:	Sriharsha Basavapatna sriharsha.basavapatna@broadcom.com
 +M:	Somnath Kotur somnath.kotur@broadcom.com
  L:	netdev@vger.kernel.org
  W:	http://www.emulex.com
  S:	Supported
@@@ -10164,7 -10159,6 +10165,7 @@@ S:	Supporte
  F:	drivers/media/pci/solo6x10/
SOFTWARE RAID (Multiple Disks) SUPPORT
 +M:	Shaohua Li shli@kernel.org
  L:	linux-raid@vger.kernel.org
  T:	git git://neil.brown.name/md
  S:	Supported
@@@ -10298,7 -10292,6 +10299,7 @@@ L:	spear-devel@list.st.co
  L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  W:	http://www.st.com/spear
  S:	Maintained
 +F:	arch/arm/boot/dts/spear*
  F:	arch/arm/mach-spear/
SPEAR CLOCK FRAMEWORK SUPPORT
@@@ -10861,14 -10854,6 +10862,14 @@@ L:	linux-omap@vger.kernel.or
  S:	Maintained
  F:	drivers/thermal/ti-soc-thermal/
+TI VPE/CAL DRIVERS
 +M:	Benoit Parrot bparrot@ti.com
 +L:	linux-media@vger.kernel.org
 +W:	http://linuxtv.org/
 +Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 +S:	Maintained
 +F:	drivers/media/platform/ti-vpe/
 +
  TI CDCE706 CLOCK DRIVER
  M:	Max Filippov jcmvbkbc@gmail.com
  S:	Maintained
@@@ -11334,7 -11319,7 +11335,7 @@@ F:	Documentation/usb/ehci.tx
  F:	drivers/usb/host/ehci*
USB GADGET/PERIPHERAL SUBSYSTEM
 -M:	Felipe Balbi balbi@ti.com
 +M:	Felipe Balbi balbi@kernel.org
  L:	linux-usb@vger.kernel.org
  W:	http://www.linux-usb.org/gadget
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
@@@ -11410,7 -11395,7 +11411,7 @@@ S:	Maintaine
  F:	drivers/net/usb/pegasus.*
USB PHY LAYER
 -M:	Felipe Balbi balbi@ti.com
 +M:	Felipe Balbi balbi@kernel.org
  L:	linux-usb@vger.kernel.org
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
  S:	Maintained
@@@ -12040,6 -12025,7 +12041,6 @@@ F:	arch/arm64/xen
  F:	arch/arm64/include/asm/xen/
XEN NETWORK BACKEND DRIVER
 -M:	Ian Campbell ian.campbell@citrix.com
  M:	Wei Liu wei.liu2@citrix.com
  L:	xen-devel@lists.xenproject.org (moderated for non-subscribers)
  L:	netdev@vger.kernel.org
@@@ -12148,7 -12134,7 +12149,7 @@@ F:	drivers/net/hamradio/*scc.
  F:	drivers/net/hamradio/z8530.h
ZBUD COMPRESSED PAGE ALLOCATOR
 -M:	Seth Jennings sjennings@variantweb.net
 +M:	Seth Jennings sjenning@redhat.com
  L:	linux-mm@kvack.org
  S:	Maintained
  F:	mm/zbud.c
@@@ -12203,7 -12189,7 +12204,7 @@@ F:	include/linux/zsmalloc.
  F:	Documentation/vm/zsmalloc.txt
ZSWAP COMPRESSED SWAP CACHING
 -M:	Seth Jennings sjennings@variantweb.net
 +M:	Seth Jennings sjenning@redhat.com
  L:	linux-mm@kvack.org
  S:	Maintained
  F:	mm/zswap.c
diff --combined drivers/net/bonding/bond_main.c
index b7f1a99,45bdd87..a6527d5
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@@ -214,8 -214,6 +214,8 @@@ static void bond_uninit(struct net_devi
  static struct rtnl_link_stats64 *bond_get_stats(struct net_device *bond_dev,
    					struct rtnl_link_stats64 *stats);
  static void bond_slave_arr_handler(struct work_struct *work);
 +static bool bond_time_in_interval(struct bonding *bond, unsigned long last_act,
 +				  int mod);
/*---------------------------- General routines -----------------------------*/
@@@ -620,8 -618,8 +620,8 @@@ static void bond_hw_addr_swap(struct bo
  static void bond_set_dev_addr(struct net_device *bond_dev,
    		      struct net_device *slave_dev)
  {
- 	netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->addr_len=%d\n",
- 		   bond_dev, slave_dev, slave_dev->addr_len);
+ 	netdev_dbg(bond_dev, "bond_dev=%p slave_dev=%p slave_dev->name=%s slave_dev->addr_len=%d\n",
+ 		   bond_dev, slave_dev, slave_dev->name, slave_dev->addr_len);
    memcpy(bond_dev->dev_addr, slave_dev->dev_addr, slave_dev->addr_len);
    bond_dev->addr_assign_type = NET_ADDR_STOLEN;
    call_netdevice_notifiers(NETDEV_CHANGEADDR, bond_dev);
@@@ -930,11 -928,10 +930,10 @@@ void bond_select_active_slave(struct bo
    	if (!rv)
    		return;
- 		if (netif_carrier_ok(bond->dev)) {
+ 		if (netif_carrier_ok(bond->dev))
    		netdev_info(bond->dev, "first active interface up!\n");
- 		} else {
+ 		else
    		netdev_info(bond->dev, "now running without any active interface!\n");
- 		}
    }
  }
@@@ -1180,9 -1177,8 +1179,8 @@@ static rx_handler_result_t bond_handle_
    	}
    }
- 	if (bond_should_deliver_exact_match(skb, slave, bond)) {
+ 	if (bond_should_deliver_exact_match(skb, slave, bond))
    	return RX_HANDLER_EXACT;
- 	}
skb->dev = bond->dev;
@@@ -1243,7 -1239,7 +1241,7 @@@ static struct slave *bond_alloc_slave(s
  {
    struct slave *slave = NULL;
- 	slave = kzalloc(sizeof(struct slave), GFP_KERNEL);
+ 	slave = kzalloc(sizeof(*slave), GFP_KERNEL);
    if (!slave)
    	return NULL;
@@@ -1383,8 -1379,7 +1381,7 @@@ int bond_enslave(struct net_device *bon
    if (slave_dev->flags & IFF_UP) {
    	netdev_err(bond_dev, "%s is up - this may be due to an out of date ifenslave\n",
    		   slave_dev->name);
- 		res = -EPERM;
- 		goto err_undo_flags;
+ 		return -EPERM;
    }
/* set bonding device ether type by slave - bonding netdevices are
@@@ -1404,8 -1399,7 +1401,7 @@@
    		res = notifier_to_errno(res);
    		if (res) {
    			netdev_err(bond_dev, "refused to change device type\n");
- 				res = -EBUSY;
- 				goto err_undo_flags;
+ 				return -EBUSY;
    		}
/* Flush unicast and multicast addresses */
@@@ -1425,8 -1419,7 +1421,7 @@@
    } else if (bond_dev->type != slave_dev->type) {
    	netdev_err(bond_dev, "%s ether type (%d) is different from other slaves (%d), can not enslave it\n",
    		   slave_dev->name, slave_dev->type, bond_dev->type);
- 		res = -EINVAL;
- 		goto err_undo_flags;
+ 		return -EINVAL;
    }
if (slave_ops->ndo_set_mac_address == NULL) {
@@@ -2129,7 -2122,6 +2124,7 @@@ static void bond_miimon_commit(struct b
    		continue;
case BOND_LINK_UP:
 +			bond_update_speed_duplex(slave);
    		bond_set_slave_link_state(slave, BOND_LINK_UP,
    					  BOND_SLAVE_NOTIFY_NOW);
    		slave->last_link_up = jiffies;
@@@ -2462,7 -2454,7 +2457,7 @@@ int bond_arp_rcv(const struct sk_buff *
    	 struct slave *slave)
  {
    struct arphdr *arp = (struct arphdr *)skb->data;
 -	struct slave *curr_active_slave;
 +	struct slave *curr_active_slave, *curr_arp_slave;
    unsigned char *arp_ptr;
    __be32 sip, tip;
    int alen, is_arp = skb->protocol == __cpu_to_be16(ETH_P_ARP);
@@@ -2509,41 -2501,26 +2504,41 @@@
    	     &sip, &tip);
curr_active_slave = rcu_dereference(bond->curr_active_slave);
 +	curr_arp_slave = rcu_dereference(bond->current_arp_slave);
-	/* Backup slaves won't see the ARP reply, but do come through
 -	 * here for each ARP probe (so we swap the sip/tip to validate
 -	 * the probe).  In a "redundant switch, common router" type of
 -	 * configuration, the ARP probe will (hopefully) travel from
 -	 * the active, through one switch, the router, then the other
 -	 * switch before reaching the backup.
 +	/* We 'trust' the received ARP enough to validate it if:
     *
 -	 * We 'trust' the arp requests if there is an active slave and
 -	 * it received valid arp reply(s) after it became active. This
 -	 * is done to avoid endless looping when we can't reach the
 +	 * (a) the slave receiving the ARP is active (which includes the
 +	 * current ARP slave, if any), or
 +	 *
 +	 * (b) the receiving slave isn't active, but there is a currently
 +	 * active slave and it received valid arp reply(s) after it became
 +	 * the currently active slave, or
 +	 *
 +	 * (c) there is an ARP slave that sent an ARP during the prior ARP
 +	 * interval, and we receive an ARP reply on any slave.  We accept
 +	 * these because switch FDB update delays may deliver the ARP
 +	 * reply to a slave other than the sender of the ARP request.
 +	 *
 +	 * Note: for (b), backup slaves are receiving the broadcast ARP
 +	 * request, not a reply.  This request passes from the sending
 +	 * slave through the L2 switch(es) to the receiving slave.  Since
 +	 * this is checking the request, sip/tip are swapped for
 +	 * validation.
 +	 *
 +	 * This is done to avoid endless looping when we can't reach the
     * arp_ip_target and fool ourselves with our own arp requests.
     */
 -
    if (bond_is_active_slave(slave))
    	bond_validate_arp(bond, slave, sip, tip);
    else if (curr_active_slave &&
    	 time_after(slave_last_rx(bond, curr_active_slave),
    		    curr_active_slave->last_link_up))
    	bond_validate_arp(bond, slave, tip, sip);
 +	else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
 +		 bond_time_in_interval(bond,
 +				       dev_trans_start(curr_arp_slave->dev), 1))
 +		bond_validate_arp(bond, slave, sip, tip);
out_unlock:
    if (arp != (struct arphdr *)skb->data)
@@@ -3327,6 -3304,7 +3322,7 @@@ static struct rtnl_link_stats64 *bond_g
    	stats->rx_bytes += sstats->rx_bytes - pstats->rx_bytes;
    	stats->rx_errors += sstats->rx_errors - pstats->rx_errors;
    	stats->rx_dropped += sstats->rx_dropped - pstats->rx_dropped;
+ 		stats->rx_nohandler += sstats->rx_nohandler - pstats->rx_nohandler;
stats->tx_packets += sstats->tx_packets - pstats->tx_packets;;
    	stats->tx_bytes += sstats->tx_bytes - pstats->tx_bytes;
diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8ab000d,169920a..ff1507f
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@@ -69,7 -69,7 +69,7 @@@ MODULE_VERSION(DRV_MODULE_VERSION)
  #define BNXT_RX_DMA_OFFSET NET_SKB_PAD
  #define BNXT_RX_COPY_THRESH 256
-#define BNXT_TX_PUSH_THRESH 92
 +#define BNXT_TX_PUSH_THRESH 164
enum board_idx {
    BCM57301,
@@@ -223,12 -223,11 +223,12 @@@ static netdev_tx_t bnxt_start_xmit(stru
    }
if (free_size == bp->tx_ring_size && length <= bp->tx_push_thresh) {
 -		struct tx_push_bd *push = txr->tx_push;
 -		struct tx_bd *tx_push = &push->txbd1;
 -		struct tx_bd_ext *tx_push1 = &push->txbd2;
 -		void *pdata = tx_push1 + 1;
 -		int j;
 +		struct tx_push_buffer *tx_push_buf = txr->tx_push;
 +		struct tx_push_bd *tx_push = &tx_push_buf->push_bd;
 +		struct tx_bd_ext *tx_push1 = &tx_push->txbd2;
 +		void *pdata = tx_push_buf->data;
 +		u64 *end;
 +		int j, push_len;
/* Set COAL_NOW to be ready quickly for the next push */
    	tx_push->tx_bd_len_flags_type =
@@@ -248,9 -247,6 +248,9 @@@
    	tx_push1->tx_bd_cfa_meta = cpu_to_le32(vlan_tag_flags);
    	tx_push1->tx_bd_cfa_action = cpu_to_le32(cfa_action);
+		end = PTR_ALIGN(pdata + length + 1, 8) - 1;
 +		*end = 0;
 +
    	skb_copy_from_linear_data(skb, pdata, len);
    	pdata += len;
    	for (j = 0; j < last_frag; j++) {
@@@ -265,29 -261,22 +265,29 @@@
    		pdata += skb_frag_size(frag);
    	}
-		memcpy(txbd, tx_push, sizeof(*txbd));
 +		txbd->tx_bd_len_flags_type = tx_push->tx_bd_len_flags_type;
 +		txbd->tx_bd_haddr = txr->data_mapping;
    	prod = NEXT_TX(prod);
    	txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
    	memcpy(txbd, tx_push1, sizeof(*txbd));
    	prod = NEXT_TX(prod);
 -		push->doorbell =
 +		tx_push->doorbell =
    		cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
    	txr->tx_prod = prod;
netdev_tx_sent_queue(txq, skb->len);
-		__iowrite64_copy(txr->tx_doorbell, push,
 -				 (length + sizeof(*push) + 8) / 8);
 +		push_len = (length + sizeof(*tx_push) + 7) / 8;
 +		if (push_len > 16) {
 +			__iowrite64_copy(txr->tx_doorbell, tx_push_buf, 16);
 +			__iowrite64_copy(txr->tx_doorbell + 4, tx_push_buf + 1,
 +					 push_len - 16);
 +		} else {
 +			__iowrite64_copy(txr->tx_doorbell, tx_push_buf,
 +					 push_len);
 +		}
tx_buf->is_push = 1;
 -
    	goto tx_done;
    }
@@@ -1764,7 -1753,7 +1764,7 @@@ static int bnxt_alloc_tx_rings(struct b
    	push_size  = L1_CACHE_ALIGN(sizeof(struct tx_push_bd) +
    				bp->tx_push_thresh);
-		if (push_size > 128) {
 +		if (push_size > 256) {
    		push_size = 0;
    		bp->tx_push_thresh = 0;
    	}
@@@ -1783,6 -1772,7 +1783,6 @@@
    		return rc;
if (bp->tx_push_size) {
 -			struct tx_bd *txbd;
    		dma_addr_t mapping;
/* One pre-allocated DMA buffer to backup
@@@ -1796,11 -1786,13 +1796,11 @@@
    		if (!txr->tx_push)
    			return -ENOMEM;
-			txbd = &txr->tx_push->txbd1;
 -
    		mapping = txr->tx_push_mapping +
    			sizeof(struct tx_push_bd);
 -			txbd->tx_bd_haddr = cpu_to_le64(mapping);
 +			txr->data_mapping = cpu_to_le64(mapping);
-			memset(txbd + 1, 0, sizeof(struct tx_bd_ext));
 +			memset(txr->tx_push, 0, sizeof(struct tx_push_bd));
    	}
    	ring->queue_id = bp->q_info[j].queue_id;
    	if (i % bp->tx_nr_rings_per_tc == (bp->tx_nr_rings_per_tc - 1))
@@@ -4554,18 -4546,20 +4554,18 @@@ static int bnxt_update_phy_setting(stru
    if (!(link_info->autoneg & BNXT_AUTONEG_FLOW_CTRL) &&
        link_info->force_pause_setting != link_info->req_flow_ctrl)
    	update_pause = true;
 -	if (link_info->req_duplex != link_info->duplex_setting)
 -		update_link = true;
    if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) {
    	if (BNXT_AUTO_MODE(link_info->auto_mode))
    		update_link = true;
    	if (link_info->req_link_speed != link_info->force_link_speed)
    		update_link = true;
 +		if (link_info->req_duplex != link_info->duplex_setting)
 +			update_link = true;
    } else {
    	if (link_info->auto_mode == BNXT_LINK_AUTO_NONE)
    		update_link = true;
    	if (link_info->advertising != link_info->auto_link_speeds)
    		update_link = true;
 -		if (link_info->req_link_speed != link_info->auto_link_speed)
 -			update_link = true;
    }
if (update_link)
@@@ -4642,7 -4636,7 +4642,7 @@@ static int __bnxt_open_nic(struct bnxt 
    if (link_re_init) {
    	rc = bnxt_update_phy_setting(bp);
    	if (rc)
 -			goto open_err;
 +			netdev_warn(bp->dev, "failed to update phy settings\n");
    }
if (irq_re_init) {
@@@ -4660,7 -4654,6 +4660,7 @@@
    /* Enable TX queues */
    bnxt_tx_enable(bp);
    mod_timer(&bp->timer, jiffies + bp->current_interval);
 +	bnxt_update_link(bp, true);
return 0;
@@@ -5377,9 -5370,16 +5377,16 @@@ static int bnxt_change_mtu(struct net_d
    return 0;
  }
- static int bnxt_setup_tc(struct net_device *dev, u8 tc)
+ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ 			 struct tc_to_netdev *ntc)
  {
    struct bnxt *bp = netdev_priv(dev);
+ 	u8 tc;
+ 
+ 	if (handle != TC_H_ROOT || ntc->type != TC_SETUP_MQPRIO)
+ 		return -EINVAL;
+ 
+ 	tc = ntc->tc;
if (tc > bp->max_tc) {
    	netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n",
@@@ -5677,16 -5677,22 +5684,16 @@@ static int bnxt_probe_phy(struct bnxt *
    }
/*initialize the ethool setting copy with NVM settings */
 -	if (BNXT_AUTO_MODE(link_info->auto_mode))
 -		link_info->autoneg |= BNXT_AUTONEG_SPEED;
 -
 -	if (link_info->auto_pause_setting & BNXT_LINK_PAUSE_BOTH) {
 -		if (link_info->auto_pause_setting == BNXT_LINK_PAUSE_BOTH)
 -			link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL;
 +	if (BNXT_AUTO_MODE(link_info->auto_mode)) {
 +		link_info->autoneg = BNXT_AUTONEG_SPEED |
 +				     BNXT_AUTONEG_FLOW_CTRL;
 +		link_info->advertising = link_info->auto_link_speeds;
    	link_info->req_flow_ctrl = link_info->auto_pause_setting;
 -	} else if (link_info->force_pause_setting & BNXT_LINK_PAUSE_BOTH) {
 +	} else {
 +		link_info->req_link_speed = link_info->force_link_speed;
 +		link_info->req_duplex = link_info->duplex_setting;
    	link_info->req_flow_ctrl = link_info->force_pause_setting;
    }
 -	link_info->req_duplex = link_info->duplex_setting;
 -	if (link_info->autoneg & BNXT_AUTONEG_SPEED)
 -		link_info->req_link_speed = link_info->auto_link_speed;
 -	else
 -		link_info->req_link_speed = link_info->force_link_speed;
 -	link_info->advertising = link_info->auto_link_speeds;
    snprintf(phy_ver, PHY_VER_STR_LEN, " ph %d.%d.%d",
    	 link_info->phy_ver[0],
    	 link_info->phy_ver[1],
diff --combined drivers/net/ethernet/cavium/thunder/nicvf_main.c
index a009bc3,90ce93e..bfee298
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@@ -574,7 -574,8 +574,7 @@@ static inline void nicvf_set_rxhash(str
static void nicvf_rcv_pkt_handler(struct net_device *netdev,
    			  struct napi_struct *napi,
 -				  struct cmp_queue *cq,
 -				  struct cqe_rx_t *cqe_rx, int cqe_type)
 +				  struct cqe_rx_t *cqe_rx)
  {
    struct sk_buff *skb;
    struct nicvf *nic = netdev_priv(netdev);
@@@ -590,7 -591,7 +590,7 @@@
    }
/* Check for errors */
 -	err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx);
 +	err = nicvf_check_cqe_rx_errs(nic, cqe_rx);
    if (err && !cqe_rx->rb_cnt)
    	return;
@@@ -681,7 -682,8 +681,7 @@@ loop
    		   cq_idx, cq_desc->cqe_type);
    	switch (cq_desc->cqe_type) {
    	case CQE_TYPE_RX:
 -			nicvf_rcv_pkt_handler(netdev, napi, cq,
 -					      cq_desc, CQE_TYPE_RX);
 +			nicvf_rcv_pkt_handler(netdev, napi, cq_desc);
    		work_done++;
    	break;
    	case CQE_TYPE_SEND:
@@@ -826,7 -828,7 +826,7 @@@ static irqreturn_t nicvf_intr_handler(i
    nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
/* Schedule NAPI */
- 	napi_schedule(&cq_poll->napi);
+ 	napi_schedule_irqoff(&cq_poll->napi);
/* Clear interrupt */
    nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
@@@ -897,6 -899,31 +897,31 @@@ static void nicvf_disable_msix(struct n
    }
  }
+ static void nicvf_set_irq_affinity(struct nicvf *nic)
+ {
+ 	int vec, cpu;
+ 	int irqnum;
+ 
+ 	for (vec = 0; vec < nic->num_vec; vec++) {
+ 		if (!nic->irq_allocated[vec])
+ 			continue;
+ 
+ 		if (!zalloc_cpumask_var(&nic->affinity_mask[vec], GFP_KERNEL))
+ 			return;
+ 		 /* CQ interrupts */
+ 		if (vec < NICVF_INTR_ID_SQ)
+ 			/* Leave CPU0 for RBDR and other interrupts */
+ 			cpu = nicvf_netdev_qidx(nic, vec) + 1;
+ 		else
+ 			cpu = 0;
+ 
+ 		cpumask_set_cpu(cpumask_local_spread(cpu, nic->node),
+ 				nic->affinity_mask[vec]);
+ 		irqnum = nic->msix_entries[vec].vector;
+ 		irq_set_affinity_hint(irqnum, nic->affinity_mask[vec]);
+ 	}
+ }
+ 
  static int nicvf_register_interrupts(struct nicvf *nic)
  {
    int irq, ret = 0;
@@@ -942,8 -969,13 +967,13 @@@
    ret = request_irq(nic->msix_entries[irq].vector,
    		  nicvf_qs_err_intr_handler,
    		  0, nic->irq_name[irq], nic);
- 	if (!ret)
- 		nic->irq_allocated[irq] = true;
+ 	if (ret)
+ 		goto err;
+ 
+ 	nic->irq_allocated[irq] = true;
+ 
+ 	/* Set IRQ affinities */
+ 	nicvf_set_irq_affinity(nic);
err:
    if (ret)
@@@ -961,6 -993,9 +991,9 @@@ static void nicvf_unregister_interrupts
    	if (!nic->irq_allocated[irq])
    		continue;
+ 		irq_set_affinity_hint(nic->msix_entries[irq].vector, NULL);
+ 		free_cpumask_var(nic->affinity_mask[irq]);
+ 
    	if (irq < NICVF_INTR_ID_SQ)
    		free_irq(nic->msix_entries[irq].vector, nic->napi[irq]);
    	else
@@@ -1123,6 -1158,7 +1156,6 @@@ int nicvf_stop(struct net_device *netde
/* Clear multiqset info */
    nic->pnicvf = nic;
 -	nic->sqs_count = 0;
return 0;
  }
@@@ -1351,9 -1387,6 +1384,9 @@@ void nicvf_update_stats(struct nicvf *n
    drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
    			  stats->tx_bcast_frames_ok +
    			  stats->tx_mcast_frames_ok;
 +	drv_stats->rx_frames_ok = stats->rx_ucast_frames +
 +				  stats->rx_bcast_frames +
 +				  stats->rx_mcast_frames;
    drv_stats->rx_drops = stats->rx_drop_red +
    		      stats->rx_drop_overrun;
    drv_stats->tx_drops = stats->tx_drops;
@@@ -1394,6 -1427,7 +1427,7 @@@ static void nicvf_tx_timeout(struct net
    	netdev_warn(dev, "%s: Transmit timed out, resetting\n",
    		    dev->name);
+ 	nic->drv_stats.tx_timeout++;
    schedule_work(&nic->reset_task);
  }
@@@ -1538,9 -1572,6 +1572,9 @@@ static int nicvf_probe(struct pci_dev *
nicvf_send_vf_struct(nic);
+	if (!pass1_silicon(nic->pdev))
 +		nic->hw_tso = true;
 +
    /* Check if this VF is in QS only mode */
    if (nic->sqs_mode)
    	return 0;
@@@ -1560,6 -1591,9 +1594,6 @@@
netdev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
-	if (!pass1_silicon(nic->pdev))
 -		nic->hw_tso = true;
 -
    netdev->netdev_ops = &nicvf_netdev_ops;
    netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
diff --combined drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 767347b,5adb208..0dd1abf
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@@ -78,7 -78,7 +78,7 @@@ static void nicvf_free_q_desc_mem(struc
  static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp,
    				 u32 buf_len, u64 **rbuf)
  {
- 	int order = get_order(buf_len);
+ 	int order = (PAGE_SIZE <= 4096) ?  PAGE_ALLOC_COSTLY_ORDER : 0;
/* Check if request can be accomodated in previous allocated page */
    if (nic->rb_page) {
@@@ -96,8 -96,7 +96,7 @@@
    	nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
    				   order);
    	if (!nic->rb_page) {
- 			netdev_err(nic->netdev,
- 				   "Failed to allocate new rcv buffer\n");
+ 			nic->drv_stats.rcv_buffer_alloc_failures++;
    		return -ENOMEM;
    	}
    	nic->rb_page_offset = 0;
@@@ -1329,12 -1328,16 +1328,12 @@@ void nicvf_update_sq_stats(struct nicv
  }
/* Check for errors in the receive cmp.queue entry */
 -int nicvf_check_cqe_rx_errs(struct nicvf *nic,
 -			    struct cmp_queue *cq, struct cqe_rx_t *cqe_rx)
 +int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx)
  {
    struct nicvf_hw_stats *stats = &nic->hw_stats;
 -	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
-	if (!cqe_rx->err_level && !cqe_rx->err_opcode) {
 -		drv_stats->rx_frames_ok++;
 +	if (!cqe_rx->err_level && !cqe_rx->err_opcode)
    	return 0;
 -	}
if (netif_msg_rx_err(nic))
    	netdev_err(nic->netdev,
diff --combined drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index f191a16,01d6a96..96d95cb
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@@ -69,6 -69,15 +69,15 @@@ int mlx4_en_setup_tc(struct net_device 
    return 0;
  }
+ static int __mlx4_en_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ 			      struct tc_to_netdev *tc)
+ {
+ 	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+ 		return -EINVAL;
+ 
+ 	return mlx4_en_setup_tc(dev, tc->tc);
+ }
+ 
  #ifdef CONFIG_RFS_ACCEL
struct mlx4_en_filter {
@@@ -2344,6 -2353,8 +2353,6 @@@ out
    /* set offloads */
    priv->dev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
    			      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL;
 -	priv->dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
 -	priv->dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
  }
static void mlx4_en_del_vxlan_offloads(struct work_struct *work)
@@@ -2354,6 -2365,8 +2363,6 @@@
    /* unset offloads */
    priv->dev->hw_enc_features &= ~(NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
    			      NETIF_F_TSO | NETIF_F_GSO_UDP_TUNNEL);
 -	priv->dev->hw_features &= ~NETIF_F_GSO_UDP_TUNNEL;
 -	priv->dev->features    &= ~NETIF_F_GSO_UDP_TUNNEL;
ret = mlx4_SET_PORT_VXLAN(priv->mdev->dev, priv->port,
    			  VXLAN_STEER_BY_OUTER_MAC, 0);
@@@ -2462,7 -2475,7 +2471,7 @@@ static const struct net_device_ops mlx4
  #endif
    .ndo_set_features	= mlx4_en_set_features,
    .ndo_fix_features	= mlx4_en_fix_features,
- 	.ndo_setup_tc		= mlx4_en_setup_tc,
+ 	.ndo_setup_tc		= __mlx4_en_setup_tc,
  #ifdef CONFIG_RFS_ACCEL
    .ndo_rx_flow_steer	= mlx4_en_filter_rfs,
  #endif
@@@ -2500,7 -2513,7 +2509,7 @@@ static const struct net_device_ops mlx4
  #endif
    .ndo_set_features	= mlx4_en_set_features,
    .ndo_fix_features	= mlx4_en_fix_features,
- 	.ndo_setup_tc		= mlx4_en_setup_tc,
+ 	.ndo_setup_tc		= __mlx4_en_setup_tc,
  #ifdef CONFIG_RFS_ACCEL
    .ndo_rx_flow_steer	= mlx4_en_filter_rfs,
  #endif
@@@ -2976,11 -2989,6 +2985,11 @@@ int mlx4_en_init_netdev(struct mlx4_en_
    	priv->rss_hash_fn = ETH_RSS_HASH_TOP;
    }
+	if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
 +		dev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
 +		dev->features    |= NETIF_F_GSO_UDP_TUNNEL;
 +	}
 +
    mdev->pndev[port] = dev;
    mdev->upper[port] = NULL;
diff --combined drivers/net/ethernet/renesas/ravb_main.c
index 744d780,331c596..88656ce
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@@ -2,7 -2,7 +2,7 @@@
   *
   * Copyright (C) 2014-2015 Renesas Electronics Corporation
   * Copyright (C) 2015 Renesas Solutions Corp.
-  * Copyright (C) 2015 Cogent Embedded, Inc. source@cogentembedded.com
+  * Copyright (C) 2015-2016 Cogent Embedded, Inc. source@cogentembedded.com
   *
   * Based on the SuperH Ethernet driver
   *
@@@ -42,6 -42,12 +42,12 @@@
    	 NETIF_MSG_RX_ERR | \
    	 NETIF_MSG_TX_ERR)
+ void ravb_modify(struct net_device *ndev, enum ravb_reg reg, u32 clear,
+ 		 u32 set)
+ {
+ 	ravb_write(ndev, (ravb_read(ndev, reg) & ~clear) | set, reg);
+ }
+ 
  int ravb_wait(struct net_device *ndev, enum ravb_reg reg, u32 mask, u32 value)
  {
    int i;
@@@ -59,8 -65,7 +65,7 @@@ static int ravb_config(struct net_devic
    int error;
/* Set config mode */
- 	ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_CONFIG,
- 		   CCC);
+ 	ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
    /* Check if the operating mode is changed to the config mode */
    error = ravb_wait(ndev, CSR, CSR_OPS, CSR_OPS_CONFIG);
    if (error)
@@@ -72,13 -77,8 +77,8 @@@
  static void ravb_set_duplex(struct net_device *ndev)
  {
    struct ravb_private *priv = netdev_priv(ndev);
- 	u32 ecmr = ravb_read(ndev, ECMR);
- 	if (priv->duplex)	/* Full */
- 		ecmr |=  ECMR_DM;
- 	else			/* Half */
- 		ecmr &= ~ECMR_DM;
- 	ravb_write(ndev, ecmr, ECMR);
+ 	ravb_modify(ndev, ECMR, ECMR_DM, priv->duplex ? ECMR_DM : 0);
  }
static void ravb_set_rate(struct net_device *ndev)
@@@ -92,8 -92,6 +92,6 @@@
    case 1000:		/* 1000BASE */
    	ravb_write(ndev, GECMR_SPEED_1000, GECMR);
    	break;
- 	default:
- 		break;
    }
  }
@@@ -131,13 -129,8 +129,8 @@@ static void ravb_mdio_ctrl(struct mdiob
  {
    struct ravb_private *priv = container_of(ctrl, struct ravb_private,
    					 mdiobb);
- 	u32 pir = ravb_read(priv->ndev, PIR);
- 	if (set)
- 		pir |=  mask;
- 	else
- 		pir &= ~mask;
- 	ravb_write(priv->ndev, pir, PIR);
+ 	ravb_modify(priv->ndev, PIR, mask, set ? mask : 0);
  }
/* MDC pin control */
@@@ -393,9 -386,9 +386,9 @@@ static int ravb_dmac_init(struct net_de
    ravb_ring_format(ndev, RAVB_NC);
#if defined(__LITTLE_ENDIAN)
- 	ravb_write(ndev, ravb_read(ndev, CCC) & ~CCC_BOC, CCC);
+ 	ravb_modify(ndev, CCC, CCC_BOC, 0);
  #else
- 	ravb_write(ndev, ravb_read(ndev, CCC) | CCC_BOC, CCC);
+ 	ravb_modify(ndev, CCC, CCC_BOC, CCC_BOC);
  #endif
/* Set AVB RX */
@@@ -418,8 -411,7 +411,7 @@@
    ravb_write(ndev, TIC_FTE0 | TIC_FTE1 | TIC_TFUE, TIC);
/* Setting the control will start the AVB-DMAC process. */
- 	ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) | CCC_OPC_OPERATION,
- 		   CCC);
+ 	ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_OPERATION);
return 0;
  }
@@@ -493,7 -485,7 +485,7 @@@ static void ravb_get_tx_tstamp(struct n
    			break;
    		}
    	}
- 		ravb_write(ndev, ravb_read(ndev, TCCR) | TCCR_TFR, TCCR);
+ 		ravb_modify(ndev, TCCR, TCCR_TFR, TCCR_TFR);
    }
  }
@@@ -613,13 -605,13 +605,13 @@@ static bool ravb_rx(struct net_device *
  static void ravb_rcv_snd_disable(struct net_device *ndev)
  {
    /* Disable TX and RX */
- 	ravb_write(ndev, ravb_read(ndev, ECMR) & ~(ECMR_RE | ECMR_TE), ECMR);
+ 	ravb_modify(ndev, ECMR, ECMR_RE | ECMR_TE, 0);
  }
static void ravb_rcv_snd_enable(struct net_device *ndev)
  {
    /* Enable TX and RX */
- 	ravb_write(ndev, ravb_read(ndev, ECMR) | ECMR_RE | ECMR_TE, ECMR);
+ 	ravb_modify(ndev, ECMR, ECMR_RE | ECMR_TE, ECMR_RE | ECMR_TE);
  }
/* function for waiting dma process finished */
@@@ -812,8 -804,8 +804,8 @@@ static int ravb_poll(struct napi_struc
/* Re-enable RX/TX interrupts */
    spin_lock_irqsave(&priv->lock, flags);
- 	ravb_write(ndev, ravb_read(ndev, RIC0) | mask, RIC0);
- 	ravb_write(ndev, ravb_read(ndev, TIC)  | mask,  TIC);
+ 	ravb_modify(ndev, RIC0, mask, mask);
+ 	ravb_modify(ndev, TIC,  mask, mask);
    mmiowb();
    spin_unlock_irqrestore(&priv->lock, flags);
@@@ -852,8 -844,7 +844,7 @@@ static void ravb_adjust_link(struct net
    		ravb_set_rate(ndev);
    	}
    	if (!priv->link) {
- 			ravb_write(ndev, ravb_read(ndev, ECMR) & ~ECMR_TXF,
- 				   ECMR);
+ 			ravb_modify(ndev, ECMR, ECMR_TXF, 0);
    		new_state = true;
    		priv->link = phydev->link;
    		if (priv->no_avb_link)
@@@ -1139,8 -1130,7 +1130,8 @@@ static int ravb_set_ringparam(struct ne
    if (netif_running(ndev)) {
    	netif_device_detach(ndev);
    	/* Stop PTP Clock driver */
 -		ravb_ptp_stop(ndev);
 +		if (priv->chip_id == RCAR_GEN2)
 +			ravb_ptp_stop(ndev);
    	/* Wait for DMA stopping */
    	error = ravb_stop_dma(ndev);
    	if (error) {
@@@ -1171,8 -1161,7 +1162,8 @@@
    	ravb_emac_init(ndev);
/* Initialise PTP Clock driver */
 -		ravb_ptp_init(ndev, priv->pdev);
 +		if (priv->chip_id == RCAR_GEN2)
 +			ravb_ptp_init(ndev, priv->pdev);
netif_device_attach(ndev);
    }
@@@ -1300,8 -1289,7 +1291,8 @@@ static void ravb_tx_timeout_work(struc
    netif_tx_stop_all_queues(ndev);
/* Stop PTP Clock driver */
 -	ravb_ptp_stop(ndev);
 +	if (priv->chip_id == RCAR_GEN2)
 +		ravb_ptp_stop(ndev);
/* Wait for DMA stopping */
    ravb_stop_dma(ndev);
@@@ -1314,8 -1302,7 +1305,8 @@@
    ravb_emac_init(ndev);
/* Initialise PTP Clock driver */
 -	ravb_ptp_init(ndev, priv->pdev);
 +	if (priv->chip_id == RCAR_GEN2)
 +		ravb_ptp_init(ndev, priv->pdev);
netif_tx_start_all_queues(ndev);
  }
@@@ -1397,7 -1384,7 +1388,7 @@@ static netdev_tx_t ravb_start_xmit(stru
    desc--;
    desc->die_dt = DT_FSTART;
- 	ravb_write(ndev, ravb_read(ndev, TCCR) | (TCCR_TSRQ0 << q), TCCR);
+ 	ravb_modify(ndev, TCCR, TCCR_TSRQ0 << q, TCCR_TSRQ0 << q);
priv->cur_tx[q] += NUM_TX_DESC;
    if (priv->cur_tx[q] - priv->dirty_tx[q] >
@@@ -1472,15 -1459,10 +1463,10 @@@ static void ravb_set_rx_mode(struct net
  {
    struct ravb_private *priv = netdev_priv(ndev);
    unsigned long flags;
- 	u32 ecmr;
spin_lock_irqsave(&priv->lock, flags);
- 	ecmr = ravb_read(ndev, ECMR);
- 	if (ndev->flags & IFF_PROMISC)
- 		ecmr |=  ECMR_PRM;
- 	else
- 		ecmr &= ~ECMR_PRM;
- 	ravb_write(ndev, ecmr, ECMR);
+ 	ravb_modify(ndev, ECMR, ECMR_PRM,
+ 		    ndev->flags & IFF_PROMISC ? ECMR_PRM : 0);
    mmiowb();
    spin_unlock_irqrestore(&priv->lock, flags);
  }
@@@ -1808,23 -1790,25 +1794,21 @@@ static int ravb_probe(struct platform_d
/* Set AVB config mode */
    if (chip_id == RCAR_GEN2) {
- 		ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) |
- 			   CCC_OPC_CONFIG, CCC);
+ 		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG);
    	/* Set CSEL value */
- 		ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) |
- 			   CCC_CSEL_HPB, CCC);
+ 		ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB);
    } else {
- 		ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_OPC) |
- 			   CCC_OPC_CONFIG | CCC_GAC | CCC_CSEL_HPB, CCC);
+ 		ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG |
+ 			    CCC_GAC | CCC_CSEL_HPB);
    }
-	/* Set CSEL value */
 -	ravb_write(ndev, (ravb_read(ndev, CCC) & ~CCC_CSEL) | CCC_CSEL_HPB,
 -		   CCC);
 -
    /* Set GTI value */
    error = ravb_set_gti(ndev);
    if (error)
    	goto out_release;
/* Request GTI loading */
- 	ravb_write(ndev, ravb_read(ndev, GCCR) | GCCR_LTI, GCCR);
+ 	ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI);
/* Allocate descriptor base address table */
    priv->desc_bat_size = sizeof(struct ravb_desc) * DBAT_ENTRY_NUM;
diff --combined drivers/net/ethernet/ti/netcp_core.c
index 029841f,06a0a73..ed0c30f
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@@ -117,17 -117,21 +117,17 @@@ static void get_pkt_info(dma_addr_t *bu
    *ndesc = le32_to_cpu(desc->next_desc);
  }
-static void get_pad_info(u32 *pad0, u32 *pad1, u32 *pad2, struct knav_dma_desc *desc)
 +static u32 get_sw_data(int index, struct knav_dma_desc *desc)
  {
 -	*pad0 = le32_to_cpu(desc->pad[0]);
 -	*pad1 = le32_to_cpu(desc->pad[1]);
 -	*pad2 = le32_to_cpu(desc->pad[2]);
 +	/* No Endian conversion needed as this data is untouched by hw */
 +	return desc->sw_data[index];
  }
-static void get_pad_ptr(void **padptr, struct knav_dma_desc *desc)
 -{
 -	u64 pad64;
 -
 -	pad64 = le32_to_cpu(desc->pad[0]) +
 -		((u64)le32_to_cpu(desc->pad[1]) << 32);
 -	*padptr = (void *)(uintptr_t)pad64;
 -}
 +/* use these macros to get sw data */
 +#define GET_SW_DATA0(desc) get_sw_data(0, desc)
 +#define GET_SW_DATA1(desc) get_sw_data(1, desc)
 +#define GET_SW_DATA2(desc) get_sw_data(2, desc)
 +#define GET_SW_DATA3(desc) get_sw_data(3, desc)
static void get_org_pkt_info(dma_addr_t *buff, u32 *buff_len,
    		     struct knav_dma_desc *desc)
@@@ -159,18 -163,13 +159,18 @@@ static void set_desc_info(u32 desc_info
    desc->packet_info = cpu_to_le32(pkt_info);
  }
-static void set_pad_info(u32 pad0, u32 pad1, u32 pad2, struct knav_dma_desc *desc)
 +static void set_sw_data(int index, u32 data, struct knav_dma_desc *desc)
  {
 -	desc->pad[0] = cpu_to_le32(pad0);
 -	desc->pad[1] = cpu_to_le32(pad1);
 -	desc->pad[2] = cpu_to_le32(pad1);
 +	/* No Endian conversion needed as this data is untouched by hw */
 +	desc->sw_data[index] = data;
  }
+/* use these macros to set sw data */
 +#define SET_SW_DATA0(data, desc) set_sw_data(0, data, desc)
 +#define SET_SW_DATA1(data, desc) set_sw_data(1, data, desc)
 +#define SET_SW_DATA2(data, desc) set_sw_data(2, data, desc)
 +#define SET_SW_DATA3(data, desc) set_sw_data(3, data, desc)
 +
  static void set_org_pkt_info(dma_addr_t buff, u32 buff_len,
    		     struct knav_dma_desc *desc)
  {
@@@ -582,6 -581,7 +582,6 @@@ static void netcp_free_rx_desc_chain(st
    dma_addr_t dma_desc, dma_buf;
    unsigned int buf_len, dma_sz = sizeof(*ndesc);
    void *buf_ptr;
 -	u32 pad[2];
    u32 tmp;
get_words(&dma_desc, 1, &desc->next_desc);
@@@ -593,20 -593,14 +593,20 @@@
    		break;
    	}
    	get_pkt_info(&dma_buf, &tmp, &dma_desc, ndesc);
 -		get_pad_ptr(&buf_ptr, ndesc);
 +		/* warning!!!! We are retrieving the virtual ptr in the sw_data
 +		 * field as a 32bit value. Will not work on 64bit machines
 +		 */
 +		buf_ptr = (void *)GET_SW_DATA0(ndesc);
 +		buf_len = (int)GET_SW_DATA1(desc);
    	dma_unmap_page(netcp->dev, dma_buf, PAGE_SIZE, DMA_FROM_DEVICE);
    	__free_page(buf_ptr);
    	knav_pool_desc_put(netcp->rx_pool, desc);
    }
 -
 -	get_pad_info(&pad[0], &pad[1], &buf_len, desc);
 -	buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
 +	/* warning!!!! We are retrieving the virtual ptr in the sw_data
 +	 * field as a 32bit value. Will not work on 64bit machines
 +	 */
 +	buf_ptr = (void *)GET_SW_DATA0(desc);
 +	buf_len = (int)GET_SW_DATA1(desc);
if (buf_ptr)
    	netcp_frag_free(buf_len <= PAGE_SIZE, buf_ptr);
@@@ -645,6 -639,7 +645,6 @@@ static int netcp_process_one_rx_packet(
    dma_addr_t dma_desc, dma_buff;
    struct netcp_packet p_info;
    struct sk_buff *skb;
 -	u32 pad[2];
    void *org_buf_ptr;
dma_desc = knav_queue_pop(netcp->rx_queue, &dma_sz);
@@@ -658,11 -653,8 +658,11 @@@
    }
get_pkt_info(&dma_buff, &buf_len, &dma_desc, desc);
 -	get_pad_info(&pad[0], &pad[1], &org_buf_len, desc);
 -	org_buf_ptr = (void *)(uintptr_t)(pad[0] + ((u64)pad[1] << 32));
 +	/* warning!!!! We are retrieving the virtual ptr in the sw_data
 +	 * field as a 32bit value. Will not work on 64bit machines
 +	 */
 +	org_buf_ptr = (void *)GET_SW_DATA0(desc);
 +	org_buf_len = (int)GET_SW_DATA1(desc);
if (unlikely(!org_buf_ptr)) {
    	dev_err(netcp->ndev_dev, "NULL bufptr in desc\n");
@@@ -687,6 -679,7 +687,6 @@@
    /* Fill in the page fragment list */
    while (dma_desc) {
    	struct page *page;
 -		void *ptr;
ndesc = knav_pool_desc_unmap(netcp->rx_pool, dma_desc, dma_sz);
    	if (unlikely(!ndesc)) {
@@@ -695,10 -688,8 +695,10 @@@
    	}
get_pkt_info(&dma_buff, &buf_len, &dma_desc, ndesc);
 -		get_pad_ptr(&ptr, ndesc);
 -		page = ptr;
 +		/* warning!!!! We are retrieving the virtual ptr in the sw_data
 +		 * field as a 32bit value. Will not work on 64bit machines
 +		 */
 +		page = (struct page *)GET_SW_DATA0(desc);
if (likely(dma_buff && buf_len && page)) {
    		dma_unmap_page(netcp->dev, dma_buff, PAGE_SIZE,
@@@ -786,10 -777,7 +786,10 @@@ static void netcp_free_rx_buf(struct ne
    	}
get_org_pkt_info(&dma, &buf_len, desc);
 -		get_pad_ptr(&buf_ptr, desc);
 +		/* warning!!!! We are retrieving the virtual ptr in the sw_data
 +		 * field as a 32bit value. Will not work on 64bit machines
 +		 */
 +		buf_ptr = (void *)GET_SW_DATA0(desc);
if (unlikely(!dma)) {
    		dev_err(netcp->ndev_dev, "NULL orig_buff in desc\n");
@@@ -841,7 -829,7 +841,7 @@@ static int netcp_allocate_rx_buf(struc
    struct page *page;
    dma_addr_t dma;
    void *bufptr;
 -	u32 pad[3];
 +	u32 sw_data[2];
/* Allocate descriptor */
    hwdesc = knav_pool_desc_get(netcp->rx_pool);
@@@ -858,7 -846,7 +858,7 @@@
    			SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
bufptr = netdev_alloc_frag(primary_buf_len);
 -		pad[2] = primary_buf_len;
 +		sw_data[1] = primary_buf_len;
if (unlikely(!bufptr)) {
    		dev_warn_ratelimited(netcp->ndev_dev,
@@@ -870,10 -858,9 +870,10 @@@
    	if (unlikely(dma_mapping_error(netcp->dev, dma)))
    		goto fail;
-		pad[0] = lower_32_bits((uintptr_t)bufptr);
 -		pad[1] = upper_32_bits((uintptr_t)bufptr);
 -
 +		/* warning!!!! We are saving the virtual ptr in the sw_data
 +		 * field as a 32bit value. Will not work on 64bit machines
 +		 */
 +		sw_data[0] = (u32)bufptr;
    } else {
    	/* Allocate a secondary receive queue entry */
    	page = alloc_page(GFP_ATOMIC | GFP_DMA | __GFP_COLD);
@@@ -883,11 -870,9 +883,11 @@@
    	}
    	buf_len = PAGE_SIZE;
    	dma = dma_map_page(netcp->dev, page, 0, buf_len, DMA_TO_DEVICE);
 -		pad[0] = lower_32_bits(dma);
 -		pad[1] = upper_32_bits(dma);
 -		pad[2] = 0;
 +		/* warning!!!! We are saving the virtual ptr in the sw_data
 +		 * field as a 32bit value. Will not work on 64bit machines
 +		 */
 +		sw_data[0] = (u32)page;
 +		sw_data[1] = 0;
    }
desc_info =  KNAV_DMA_DESC_PS_INFO_IN_DESC;
@@@ -897,8 -882,7 +897,8 @@@
    pkt_info |= (netcp->rx_queue_id & KNAV_DMA_DESC_RETQ_MASK) <<
    	    KNAV_DMA_DESC_RETQ_SHIFT;
    set_org_pkt_info(dma, buf_len, hwdesc);
 -	set_pad_info(pad[0], pad[1], pad[2], hwdesc);
 +	SET_SW_DATA0(sw_data[0], hwdesc);
 +	SET_SW_DATA1(sw_data[1], hwdesc);
    set_desc_info(desc_info, pkt_info, hwdesc);
/* Push to FDQs */
@@@ -987,6 -971,7 +987,6 @@@ static int netcp_process_tx_compl_packe
    				  unsigned int budget)
  {
    struct knav_dma_desc *desc;
 -	void *ptr;
    struct sk_buff *skb;
    unsigned int dma_sz;
    dma_addr_t dma;
@@@ -1003,10 -988,8 +1003,10 @@@
    		continue;
    	}
-		get_pad_ptr(&ptr, desc);
 -		skb = ptr;
 +		/* warning!!!! We are retrieving the virtual ptr in the sw_data
 +		 * field as a 32bit value. Will not work on 64bit machines
 +		 */
 +		skb = (struct sk_buff *)GET_SW_DATA0(desc);
    	netcp_free_tx_desc_chain(netcp, desc, dma_sz);
    	if (!skb) {
    		dev_err(netcp->ndev_dev, "No skb in Tx desc\n");
@@@ -1211,10 -1194,10 +1211,10 @@@ static int netcp_tx_submit_skb(struct n
    }
set_words(&tmp, 1, &desc->packet_info);
 -	tmp = lower_32_bits((uintptr_t)&skb);
 -	set_words(&tmp, 1, &desc->pad[0]);
 -	tmp = upper_32_bits((uintptr_t)&skb);
 -	set_words(&tmp, 1, &desc->pad[1]);
 +	/* warning!!!! We are saving the virtual ptr in the sw_data
 +	 * field as a 32bit value. Will not work on 64bit machines
 +	 */
 +	SET_SW_DATA0((u32)skb, desc);
if (tx_pipe->flags & SWITCH_TO_PORT_IN_TAGINFO) {
    	tmp = tx_pipe->switch_to_port;
@@@ -1852,22 -1835,26 +1852,26 @@@ static u16 netcp_select_queue(struct ne
    return 0;
  }
- static int netcp_setup_tc(struct net_device *dev, u8 num_tc)
+ static int netcp_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+ 			  struct tc_to_netdev *tc)
  {
    int i;
/* setup tc must be called under rtnl lock */
    ASSERT_RTNL();
+ 	if (handle != TC_H_ROOT || tc->type != TC_SETUP_MQPRIO)
+ 		return -EINVAL;
+ 
    /* Sanity-check the number of traffic classes requested */
    if ((dev->real_num_tx_queues <= 1) ||
- 	    (dev->real_num_tx_queues < num_tc))
+ 	    (dev->real_num_tx_queues < tc->tc))
    	return -EINVAL;
/* Configure traffic class to queue mappings */
- 	if (num_tc) {
- 		netdev_set_num_tc(dev, num_tc);
- 		for (i = 0; i < num_tc; i++)
+ 	if (tc->tc) {
+ 		netdev_set_num_tc(dev, tc->tc);
+ 		for (i = 0; i < tc->tc; i++)
    		netdev_set_tc_queue(dev, i, 1, i);
    } else {
    	netdev_reset_tc(dev);
diff --combined drivers/net/geneve.c
index 0bf7edd,8fa8388..bc5da35
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@@ -72,10 -72,11 +72,11 @@@ struct geneve_dev 
    bool		   collect_md;
    struct gro_cells   gro_cells;
    u32		   flags;
+ 	struct dst_cache   dst_cache;
  };
/* Geneve device flags */
- #define GENEVE_F_UDP_CSUM		BIT(0)
+ #define GENEVE_F_UDP_ZERO_CSUM_TX	BIT(0)
  #define GENEVE_F_UDP_ZERO_CSUM6_TX	BIT(1)
  #define GENEVE_F_UDP_ZERO_CSUM6_RX	BIT(2)
@@@ -109,6 -110,11 +110,11 @@@ static __be64 vni_to_tunnel_id(const __
  #endif
  }
+ static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
+ {
+ 	return gs->sock->sk->sk_family;
+ }
+ 
  static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
    				__be32 addr, u8 vni[])
  {
@@@ -152,58 -158,60 +158,60 @@@ static inline struct genevehdr *geneve_
    return (struct genevehdr *)(udp_hdr(skb) + 1);
  }
- /* geneve receive/decap routine */
- static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
+ static struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
+ 					    struct sk_buff *skb)
  {
- 	struct genevehdr *gnvh = geneve_hdr(skb);
- 	struct metadata_dst *tun_dst = NULL;
- 	struct geneve_dev *geneve = NULL;
- 	struct pcpu_sw_netstats *stats;
- 	struct iphdr *iph = NULL;
+ 	u8 *vni;
    __be32 addr;
    static u8 zero_vni[3];
- 	u8 *vni;
- 	int err = 0;
- 	sa_family_t sa_family;
  #if IS_ENABLED(CONFIG_IPV6)
- 	struct ipv6hdr *ip6h = NULL;
- 	struct in6_addr addr6;
    static struct in6_addr zero_addr6;
  #endif
- 	sa_family = gs->sock->sk->sk_family;
+ 	if (geneve_get_sk_family(gs) == AF_INET) {
+ 		struct iphdr *iph;
- 	if (sa_family == AF_INET) {
    	iph = ip_hdr(skb); /* outer IP header... */
if (gs->collect_md) {
    		vni = zero_vni;
    		addr = 0;
    	} else {
- 			vni = gnvh->vni;
- 
+ 			vni = geneve_hdr(skb)->vni;
    		addr = iph->saddr;
    	}
- 		geneve = geneve_lookup(gs, addr, vni);
+ 		return geneve_lookup(gs, addr, vni);
  #if IS_ENABLED(CONFIG_IPV6)
- 	} else if (sa_family == AF_INET6) {
+ 	} else if (geneve_get_sk_family(gs) == AF_INET6) {
+ 		struct ipv6hdr *ip6h;
+ 		struct in6_addr addr6;
+ 
    	ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
if (gs->collect_md) {
    		vni = zero_vni;
    		addr6 = zero_addr6;
    	} else {
- 			vni = gnvh->vni;
- 
+ 			vni = geneve_hdr(skb)->vni;
    		addr6 = ip6h->saddr;
    	}
- 		geneve = geneve6_lookup(gs, addr6, vni);
+ 		return geneve6_lookup(gs, addr6, vni);
  #endif
    }
- 	if (!geneve)
- 		goto drop;
+ 	return NULL;
+ }
+ 
+ /* geneve receive/decap routine */
+ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
+ 		      struct sk_buff *skb)
+ {
+ 	struct genevehdr *gnvh = geneve_hdr(skb);
+ 	struct metadata_dst *tun_dst = NULL;
+ 	struct pcpu_sw_netstats *stats;
+ 	int err = 0;
+ 	void *oiph;
if (ip_tunnel_collect_metadata() || gs->collect_md) {
    	__be16 flags;
@@@ -212,7 -220,7 +220,7 @@@
    		(gnvh->oam ? TUNNEL_OAM : 0) |
    		(gnvh->critical ? TUNNEL_CRIT_OPT : 0);
- 		tun_dst = udp_tun_rx_dst(skb, sa_family, flags,
+ 		tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
    				 vni_to_tunnel_id(gnvh->vni),
    				 gnvh->opt_len * 4);
    	if (!tun_dst)
@@@ -229,7 -237,6 +237,6 @@@
    }
skb_reset_mac_header(skb);
- 	skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev)));
    skb->protocol = eth_type_trans(skb, geneve->dev);
    skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@@ -240,25 -247,27 +247,27 @@@
    if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
    	goto drop;
+ 	oiph = skb_network_header(skb);
    skb_reset_network_header(skb);
- 	if (iph)
- 		err = IP_ECN_decapsulate(iph, skb);
+ 	if (geneve_get_sk_family(gs) == AF_INET)
+ 		err = IP_ECN_decapsulate(oiph, skb);
  #if IS_ENABLED(CONFIG_IPV6)
- 	if (ip6h)
- 		err = IP6_ECN_decapsulate(ip6h, skb);
+ 	else
+ 		err = IP6_ECN_decapsulate(oiph, skb);
  #endif
if (unlikely(err)) {
    	if (log_ecn_error) {
- 			if (iph)
+ 			if (geneve_get_sk_family(gs) == AF_INET)
    			net_info_ratelimited("non-ECT from %pI4 "
    					     "with TOS=%#x\n",
- 						     &iph->saddr, iph->tos);
+ 						     &((struct iphdr *)oiph)->saddr,
+ 						     ((struct iphdr *)oiph)->tos);
  #if IS_ENABLED(CONFIG_IPV6)
- 			if (ip6h)
+ 			else
    			net_info_ratelimited("non-ECT from %pI6\n",
- 						     &ip6h->saddr);
+ 						     &((struct ipv6hdr *)oiph)->saddr);
  #endif
    	}
    	if (err > 1) {
@@@ -297,6 -306,13 +306,13 @@@ static int geneve_init(struct net_devic
    	return err;
    }
+ 	err = dst_cache_init(&geneve->dst_cache, GFP_KERNEL);
+ 	if (err) {
+ 		free_percpu(dev->tstats);
+ 		gro_cells_destroy(&geneve->gro_cells);
+ 		return err;
+ 	}
+ 
    return 0;
  }
@@@ -304,6 -320,7 +320,7 @@@ static void geneve_uninit(struct net_de
  {
    struct geneve_dev *geneve = netdev_priv(dev);
+ 	dst_cache_destroy(&geneve->dst_cache);
    gro_cells_destroy(&geneve->gro_cells);
    free_percpu(dev->tstats);
  }
@@@ -312,6 -329,7 +329,7 @@@
  static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
  {
    struct genevehdr *geneveh;
+ 	struct geneve_dev *geneve;
    struct geneve_sock *gs;
    int opts_len;
@@@ -327,16 -345,21 +345,21 @@@
    if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
    	goto error;
- 	opts_len = geneveh->opt_len * 4;
- 	if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
- 				 htons(ETH_P_TEB)))
- 		goto drop;
- 
    gs = rcu_dereference_sk_user_data(sk);
    if (!gs)
    	goto drop;
- 	geneve_rx(gs, skb);
+ 	geneve = geneve_lookup_skb(gs, skb);
+ 	if (!geneve)
+ 		goto drop;
+ 
+ 	opts_len = geneveh->opt_len * 4;
+ 	if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
+ 				 htons(ETH_P_TEB),
+ 				 !net_eq(geneve->net, dev_net(geneve->dev))))
+ 		goto drop;
+ 
+ 	geneve_rx(geneve, gs, skb);
    return 0;
drop:
@@@ -383,7 -406,7 +406,7 @@@ static void geneve_notify_add_rx_port(s
    struct net_device *dev;
    struct sock *sk = gs->sock->sk;
    struct net *net = sock_net(sk);
- 	sa_family_t sa_family = sk->sk_family;
+ 	sa_family_t sa_family = geneve_get_sk_family(gs);
    __be16 port = inet_sk(sk)->inet_sport;
    int err;
@@@ -544,7 -567,7 +567,7 @@@ static void geneve_notify_del_rx_port(s
    struct net_device *dev;
    struct sock *sk = gs->sock->sk;
    struct net *net = sock_net(sk);
- 	sa_family_t sa_family = sk->sk_family;
+ 	sa_family_t sa_family = geneve_get_sk_family(gs);
    __be16 port = inet_sk(sk)->inet_sport;
rcu_read_lock();
@@@ -587,7 -610,7 +610,7 @@@ static struct geneve_sock *geneve_find_
list_for_each_entry(gs, &gn->sock_list, list) {
    	if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
- 		    inet_sk(gs->sock->sk)->sk.sk_family == family) {
+ 		    geneve_get_sk_family(gs) == family) {
    		return gs;
    	}
    }
@@@ -680,7 -703,7 +703,7 @@@ static int geneve_build_skb(struct rtab
    struct genevehdr *gnvh;
    int min_headroom;
    int err;
- 	bool udp_sum = !!(flags & GENEVE_F_UDP_CSUM);
+ 	bool udp_sum = !(flags & GENEVE_F_UDP_ZERO_CSUM_TX);
skb_scrub_packet(skb, xnet);
@@@ -753,7 -776,9 +776,9 @@@ static struct rtable *geneve_get_v4_rt(
    			       struct ip_tunnel_info *info)
  {
    struct geneve_dev *geneve = netdev_priv(dev);
+ 	struct dst_cache *dst_cache;
    struct rtable *rt = NULL;
+ 	bool use_cache = true;
    __u8 tos;
memset(fl4, 0, sizeof(*fl4));
@@@ -764,16 -789,26 +789,26 @@@
    	fl4->daddr = info->key.u.ipv4.dst;
    	fl4->saddr = info->key.u.ipv4.src;
    	fl4->flowi4_tos = RT_TOS(info->key.tos);
+ 		dst_cache = &info->dst_cache;
    } else {
    	tos = geneve->tos;
    	if (tos == 1) {
    		const struct iphdr *iip = ip_hdr(skb);
tos = ip_tunnel_get_dsfield(iip, skb);
+ 			use_cache = false;
    	}
fl4->flowi4_tos = RT_TOS(tos);
    	fl4->daddr = geneve->remote.sin.sin_addr.s_addr;
+ 		dst_cache = &geneve->dst_cache;
+ 	}
+ 
+ 	use_cache = use_cache && !skb->mark;
+ 	if (use_cache) {
+ 		rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
+ 		if (rt)
+ 			return rt;
    }
rt = ip_route_output_key(geneve->net, fl4);
@@@ -786,6 -821,8 +821,8 @@@
    	ip_rt_put(rt);
    	return ERR_PTR(-ELOOP);
    }
+ 	if (use_cache)
+ 		dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
    return rt;
  }
@@@ -798,6 -835,8 +835,8 @@@ static struct dst_entry *geneve_get_v6_
    struct geneve_dev *geneve = netdev_priv(dev);
    struct geneve_sock *gs6 = geneve->sock6;
    struct dst_entry *dst = NULL;
+ 	struct dst_cache *dst_cache;
+ 	bool use_cache = true;
    __u8 prio;
memset(fl6, 0, sizeof(*fl6));
@@@ -808,16 -847,26 +847,26 @@@
    	fl6->daddr = info->key.u.ipv6.dst;
    	fl6->saddr = info->key.u.ipv6.src;
    	fl6->flowi6_tos = RT_TOS(info->key.tos);
+ 		dst_cache = &info->dst_cache;
    } else {
    	prio = geneve->tos;
    	if (prio == 1) {
    		const struct iphdr *iip = ip_hdr(skb);
prio = ip_tunnel_get_dsfield(iip, skb);
+ 			use_cache = false;
    	}
fl6->flowi6_tos = RT_TOS(prio);
    	fl6->daddr = geneve->remote.sin6.sin6_addr;
+ 		dst_cache = &geneve->dst_cache;
+ 	}
+ 
+ 	use_cache = use_cache && !skb->mark;
+ 	if (use_cache) {
+ 		dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
+ 		if (dst)
+ 			return dst;
    }
if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
@@@ -830,6 -879,8 +879,8 @@@
    	return ERR_PTR(-ELOOP);
    }
+ 	if (use_cache)
+ 		dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
    return dst;
  }
  #endif
@@@ -893,9 -944,9 +944,9 @@@ static netdev_tx_t geneve_xmit_skb(stru
    		opts = ip_tunnel_info_opts(info);
if (key->tun_flags & TUNNEL_CSUM)
- 			flags |= GENEVE_F_UDP_CSUM;
+ 			flags &= ~GENEVE_F_UDP_ZERO_CSUM_TX;
    	else
- 			flags &= ~GENEVE_F_UDP_CSUM;
+ 			flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
err = geneve_build_skb(rt, skb, key->tun_flags, vni,
    			       info->options_len, opts, flags, xnet);
@@@ -921,7 -972,7 +972,7 @@@
    udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
    		    tos, ttl, df, sport, geneve->dst_port,
    		    !net_eq(geneve->net, dev_net(geneve->dev)),
- 			    !(flags & GENEVE_F_UDP_CSUM));
+ 			    !!(flags & GENEVE_F_UDP_ZERO_CSUM_TX));
return NETDEV_TX_OK;
@@@ -1039,34 -1090,6 +1090,34 @@@ static netdev_tx_t geneve_xmit(struct s
    return geneve_xmit_skb(skb, dev, info);
  }
+static int __geneve_change_mtu(struct net_device *dev, int new_mtu, bool strict)
 +{
 +	/* The max_mtu calculation does not take account of GENEVE
 +	 * options, to avoid excluding potentially valid
 +	 * configurations.
 +	 */
 +	int max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - sizeof(struct iphdr)
 +		- dev->hard_header_len;
 +
 +	if (new_mtu < 68)
 +		return -EINVAL;
 +
 +	if (new_mtu > max_mtu) {
 +		if (strict)
 +			return -EINVAL;
 +
 +		new_mtu = max_mtu;
 +	}
 +
 +	dev->mtu = new_mtu;
 +	return 0;
 +}
 +
 +static int geneve_change_mtu(struct net_device *dev, int new_mtu)
 +{
 +	return __geneve_change_mtu(dev, new_mtu, true);
 +}
 +
  static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
  {
    struct ip_tunnel_info *info = skb_tunnel_info(skb);
@@@ -1111,7 -1134,7 +1162,7 @@@ static const struct net_device_ops gene
    .ndo_stop		= geneve_stop,
    .ndo_start_xmit		= geneve_xmit,
    .ndo_get_stats64	= ip_tunnel_get_stats64,
 -	.ndo_change_mtu		= eth_change_mtu,
 +	.ndo_change_mtu		= geneve_change_mtu,
    .ndo_validate_addr	= eth_validate_addr,
    .ndo_set_mac_address	= eth_mac_addr,
    .ndo_fill_metadata_dst	= geneve_fill_metadata_dst,
@@@ -1178,7 -1201,6 +1229,7 @@@ static void geneve_setup(struct net_dev
    dev->hw_features |= NETIF_F_GSO_SOFTWARE;
netif_keep_dst(dev);
 +	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
    dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
    eth_hw_addr_random(dev);
  }
@@@ -1301,6 -1323,8 +1352,8 @@@ static int geneve_configure(struct net 
    		return -EPERM;
    }
+ 	dst_cache_reset(&geneve->dst_cache);
+ 
    err = register_netdevice(dev);
    if (err)
    	return err;
@@@ -1359,8 -1383,8 +1412,8 @@@ static int geneve_newlink(struct net *n
    	metadata = true;
if (data[IFLA_GENEVE_UDP_CSUM] &&
- 	    nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
- 		flags |= GENEVE_F_UDP_CSUM;
+ 	    !nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
+ 		flags |= GENEVE_F_UDP_ZERO_CSUM_TX;
if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] &&
        nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
@@@ -1430,7 -1454,7 +1483,7 @@@ static int geneve_fill_info(struct sk_b
    }
if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
- 		       !!(geneve->flags & GENEVE_F_UDP_CSUM)) ||
+ 		       !(geneve->flags & GENEVE_F_UDP_ZERO_CSUM_TX)) ||
        nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
    	       !!(geneve->flags & GENEVE_F_UDP_ZERO_CSUM6_TX)) ||
        nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
@@@ -1470,23 -1494,12 +1523,23 @@@ struct net_device *geneve_dev_create_fb
    	return dev;
err = geneve_configure(net, dev, &geneve_remote_unspec,
 -			       0, 0, 0, htons(dst_port), true, 0);
 -	if (err) {
 -		free_netdev(dev);
 -		return ERR_PTR(err);
 -	}
 +			       0, 0, 0, htons(dst_port), true,
 +			       GENEVE_F_UDP_ZERO_CSUM6_RX);
 +	if (err)
 +		goto err;
 +
 +	/* openvswitch users expect packet sizes to be unrestricted,
 +	 * so set the largest MTU we can.
 +	 */
 +	err = __geneve_change_mtu(dev, IP_MAX_MTU, false);
 +	if (err)
 +		goto err;
 +
    return dev;
 +
 + err:
 +	free_netdev(dev);
 +	return ERR_PTR(err);
  }
  EXPORT_SYMBOL_GPL(geneve_dev_create_fb);
diff --combined drivers/net/hyperv/netvsc_drv.c
index 98e34fe,202e2b1..2b6595e
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@@ -43,6 -43,11 +43,11 @@@
#define RING_SIZE_MIN 64
  #define LINKCHANGE_INT (2 * HZ)
+ #define NETVSC_HW_FEATURES	(NETIF_F_RXCSUM | \
+ 				 NETIF_F_SG | \
+ 				 NETIF_F_TSO | \
+ 				 NETIF_F_TSO6 | \
+ 				 NETIF_F_HW_CSUM)
  static int ring_size = 128;
  module_param(ring_size, int, S_IRUGO);
  MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
@@@ -545,6 -550,8 +550,8 @@@ do_send
    packet->page_buf_cnt = init_page_array(rndis_msg, rndis_msg_size,
    				       skb, packet, &pb);
+ 	/* timestamp packet in software */
+ 	skb_tx_timestamp(skb);
    ret = netvsc_send(net_device_ctx->device_ctx, packet,
    		  rndis_msg, &pb, skb);
@@@ -915,6 -922,7 +922,7 @@@ static const struct ethtool_ops ethtool
    .get_link	= ethtool_op_get_link,
    .get_channels   = netvsc_get_channels,
    .set_channels   = netvsc_set_channels,
+ 	.get_ts_info	= ethtool_op_get_ts_info,
  };
static const struct net_device_ops device_ops = {
@@@ -1081,17 -1089,12 +1089,15 @@@ static int netvsc_probe(struct hv_devic
net->netdev_ops = &device_ops;
- 	net->hw_features = NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_IP_CSUM |
- 				NETIF_F_TSO;
- 	net->features = NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_SG | NETIF_F_RXCSUM |
- 			NETIF_F_IP_CSUM | NETIF_F_TSO;
+ 	net->hw_features = NETVSC_HW_FEATURES;
+ 	net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
net->ethtool_ops = &ethtool_ops;
    SET_NETDEV_DEV(net, &dev->device);
+	/* We always need headroom for rndis header */
 +	net->needed_headroom = RNDIS_AND_PPI_SIZE;
 +
    /* Notify the netvsc driver of the new device */
    memset(&device_info, 0, sizeof(device_info));
    device_info.ring_size = ring_size;
diff --combined drivers/net/phy/bcm7xxx.c
index db507e3,9b31104..b881a7b1
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@@ -24,7 -24,7 +24,7 @@@
  #define MII_BCM7XXX_100TX_FALSE_CAR	0x13
  #define MII_BCM7XXX_100TX_DISC		0x14
  #define MII_BCM7XXX_AUX_MODE		0x1d
- #define  MII_BCM7XX_64CLK_MDIO		BIT(12)
+ #define  MII_BCM7XXX_64CLK_MDIO		BIT(12)
  #define MII_BCM7XXX_TEST		0x1f
  #define  MII_BCM7XXX_SHD_MODE_2		BIT(2)
@@@ -247,9 -247,13 +247,9 @@@ static int bcm7xxx_config_init(struct p
    int ret;
/* Enable 64 clock MDIO */
- 	phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XX_64CLK_MDIO);
+ 	phy_write(phydev, MII_BCM7XXX_AUX_MODE, MII_BCM7XXX_64CLK_MDIO);
    phy_read(phydev, MII_BCM7XXX_AUX_MODE);
-	/* Workaround only required for 100Mbits/sec capable PHYs */
 -	if (phydev->supported & PHY_GBIT_FEATURES)
 -		return 0;
 -
    /* set shadow mode 2 */
    ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST,
    		MII_BCM7XXX_SHD_MODE_2, MII_BCM7XXX_SHD_MODE_2);
@@@ -266,7 -270,7 +266,7 @@@
    phy_write(phydev, MII_BCM7XXX_100TX_FALSE_CAR, 0x7555);
/* reset shadow mode 2 */
 -	ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, MII_BCM7XXX_SHD_MODE_2, 0);
 +	ret = phy_set_clr_bits(phydev, MII_BCM7XXX_TEST, 0, MII_BCM7XXX_SHD_MODE_2);
    if (ret < 0)
    	return ret;
@@@ -303,6 -307,11 +303,6 @@@ static int bcm7xxx_suspend(struct phy_d
    return 0;
  }
-static int bcm7xxx_dummy_config_init(struct phy_device *phydev)
 -{
 -	return 0;
 -}
 -
  #define BCM7XXX_28NM_GPHY(_oui, _name)					\
  {									\
    .phy_id		= (_oui),					\
@@@ -317,6 -326,21 +317,21 @@@
    .resume		= bcm7xxx_28nm_resume,				\
  }
+ #define BCM7XXX_40NM_EPHY(_oui, _name)					\
+ {									\
+ 	.phy_id         = (_oui),					\
+ 	.phy_id_mask    = 0xfffffff0,					\
+ 	.name           = _name,					\
+ 	.features       = PHY_BASIC_FEATURES |				\
+ 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,	\
+ 	.flags          = PHY_IS_INTERNAL,				\
+ 	.config_init    = bcm7xxx_config_init,				\
+ 	.config_aneg    = genphy_config_aneg,				\
+ 	.read_status    = genphy_read_status,				\
+ 	.suspend        = bcm7xxx_suspend,				\
+ 	.resume         = bcm7xxx_config_init,				\
+ }
+ 
  static struct phy_driver bcm7xxx_driver[] = {
    BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"),
    BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"),
@@@ -324,43 -348,34 +339,10 @@@
    BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
    BCM7XXX_28NM_GPHY(PHY_ID_BCM7439_2, "Broadcom BCM7439 (2)"),
    BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
- {
- 	.phy_id         = PHY_ID_BCM7425,
- 	.phy_id_mask    = 0xfffffff0,
- 	.name           = "Broadcom BCM7425",
- 	.features       = PHY_BASIC_FEATURES |
- 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
- 	.flags          = PHY_IS_INTERNAL,
- 	.config_init    = bcm7xxx_config_init,
- 	.config_aneg    = genphy_config_aneg,
- 	.read_status    = genphy_read_status,
- 	.suspend        = bcm7xxx_suspend,
- 	.resume         = bcm7xxx_config_init,
- }, {
- 	.phy_id         = PHY_ID_BCM7429,
- 	.phy_id_mask    = 0xfffffff0,
- 	.name           = "Broadcom BCM7429",
- 	.features       = PHY_BASIC_FEATURES |
- 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
- 	.flags          = PHY_IS_INTERNAL,
- 	.config_init    = bcm7xxx_config_init,
- 	.config_aneg    = genphy_config_aneg,
- 	.read_status    = genphy_read_status,
- 	.suspend        = bcm7xxx_suspend,
- 	.resume         = bcm7xxx_config_init,
- }, {
- 	.phy_id         = PHY_ID_BCM7435,
- 	.phy_id_mask    = 0xfffffff0,
- 	.name           = "Broadcom BCM7435",
- 	.features       = PHY_BASIC_FEATURES |
- 			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
- 	.flags          = PHY_IS_INTERNAL,
- 	.config_init    = bcm7xxx_config_init,
- 	.config_aneg    = genphy_config_aneg,
- 	.read_status    = genphy_read_status,
- 	.suspend        = bcm7xxx_suspend,
- 	.resume         = bcm7xxx_config_init,
- } };
+ 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7425, "Broadcom BCM7425"),
+ 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7429, "Broadcom BCM7429"),
+ 	BCM7XXX_40NM_EPHY(PHY_ID_BCM7435, "Broadcom BCM7435"),
 -{
 -	.phy_id		= PHY_BCM_OUI_4,
 -	.phy_id_mask	= 0xffff0000,
 -	.name		= "Broadcom BCM7XXX 40nm",
 -	.features	= PHY_GBIT_FEATURES |
 -			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
 -	.flags		= PHY_IS_INTERNAL,
 -	.config_init	= bcm7xxx_config_init,
 -	.config_aneg	= genphy_config_aneg,
 -	.read_status	= genphy_read_status,
 -	.suspend	= bcm7xxx_suspend,
 -	.resume		= bcm7xxx_config_init,
 -}, {
 -	.phy_id		= PHY_BCM_OUI_5,
 -	.phy_id_mask	= 0xffffff00,
 -	.name		= "Broadcom BCM7XXX 65nm",
 -	.features	= PHY_BASIC_FEATURES |
 -			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
 -	.flags		= PHY_IS_INTERNAL,
 -	.config_init	= bcm7xxx_dummy_config_init,
 -	.config_aneg	= genphy_config_aneg,
 -	.read_status	= genphy_read_status,
 -	.suspend	= bcm7xxx_suspend,
 -	.resume		= bcm7xxx_config_init,
 -} };
++};
static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
    { PHY_ID_BCM7250, 0xfffffff0, },
@@@ -371,6 -386,8 +353,6 @@@
    { PHY_ID_BCM7439, 0xfffffff0, },
    { PHY_ID_BCM7435, 0xfffffff0, },
    { PHY_ID_BCM7445, 0xfffffff0, },
 -	{ PHY_BCM_OUI_4, 0xffff0000 },
 -	{ PHY_BCM_OUI_5, 0xffffff00 },
    { }
  };
diff --combined drivers/net/phy/marvell.c
index ab1d0fc,d0168f1..9fb9d80
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@@ -133,6 -133,11 +133,11 @@@
  #define MII_88E3016_DISABLE_SCRAMBLER	0x0200
  #define MII_88E3016_AUTO_MDIX_CROSSOVER	0x0030
+ #define MII_88E1510_GEN_CTRL_REG_1		0x14
+ #define MII_88E1510_GEN_CTRL_REG_1_MODE_MASK	0x7
+ #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII	0x1	/* SGMII to copper */
+ #define MII_88E1510_GEN_CTRL_REG_1_RESET	0x8000	/* Soft reset */
+ 
  MODULE_DESCRIPTION("Marvell PHY driver");
  MODULE_AUTHOR("Andy Fleming");
  MODULE_LICENSE("GPL");
@@@ -438,6 -443,41 +443,41 @@@ static int m88e1318_config_aneg(struct 
    return m88e1121_config_aneg(phydev);
  }
+ static int m88e1510_config_init(struct phy_device *phydev)
+ {
+ 	int err;
+ 	int temp;
+ 
+ 	/* SGMII-to-Copper mode initialization */
+ 	if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+ 		/* Select page 18 */
+ 		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 18);
+ 		if (err < 0)
+ 			return err;
+ 
+ 		/* In reg 20, write MODE[2:0] = 0x1 (SGMII to Copper) */
+ 		temp = phy_read(phydev, MII_88E1510_GEN_CTRL_REG_1);
+ 		temp &= ~MII_88E1510_GEN_CTRL_REG_1_MODE_MASK;
+ 		temp |= MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII;
+ 		err = phy_write(phydev, MII_88E1510_GEN_CTRL_REG_1, temp);
+ 		if (err < 0)
+ 			return err;
+ 
+ 		/* PHY reset is necessary after changing MODE[2:0] */
+ 		temp |= MII_88E1510_GEN_CTRL_REG_1_RESET;
+ 		err = phy_write(phydev, MII_88E1510_GEN_CTRL_REG_1, temp);
+ 		if (err < 0)
+ 			return err;
+ 
+ 		/* Reset page selection */
+ 		err = phy_write(phydev, MII_MARVELL_PHY_PAGE, 0);
+ 		if (err < 0)
+ 			return err;
+ 	}
+ 
+ 	return 0;
+ }
+ 
  static int m88e1510_config_aneg(struct phy_device *phydev)
  {
    int err;
@@@ -446,12 -486,6 +486,12 @@@
    if (err < 0)
    	return err;
+	return 0;
 +}
 +
 +static int marvell_config_init(struct phy_device *phydev)
 +{
 +	/* Set registers from marvell,reg-init DT property */
    return marvell_of_reg_init(phydev);
  }
@@@ -501,7 -535,7 +541,7 @@@ static int m88e1116r_config_init(struc
mdelay(500);
-	return 0;
 +	return marvell_config_init(phydev);
  }
static int m88e3016_config_init(struct phy_device *phydev)
@@@ -520,7 -554,7 +560,7 @@@
    if (reg < 0)
    	return reg;
-	return 0;
 +	return marvell_config_init(phydev);
  }
static int m88e1111_config_init(struct phy_device *phydev)
@@@ -1031,8 -1065,8 +1071,8 @@@ static u64 marvell_get_stat(struct phy_
  {
    struct marvell_hw_stat stat = marvell_hw_stats[i];
    struct marvell_priv *priv = phydev->priv;
- 	int err, oldpage;
- 	u64 val;
+ 	int err, oldpage, val;
+ 	u64 ret;
oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE);
    err = phy_write(phydev, MII_MARVELL_PHY_PAGE,
@@@ -1042,16 -1076,16 +1082,16 @@@
val = phy_read(phydev, stat.reg);
    if (val < 0) {
- 		val = UINT64_MAX;
+ 		ret = UINT64_MAX;
    } else {
    	val = val & ((1 << stat.bits) - 1);
    	priv->stats[i] += val;
- 		val = priv->stats[i];
+ 		ret = priv->stats[i];
    }
phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage);
- 	return val;
+ 	return ret;
  }
static void marvell_get_stats(struct phy_device *phydev,
@@@ -1084,7 -1118,6 +1124,7 @@@ static struct phy_driver marvell_driver
    	.features = PHY_GBIT_FEATURES,
    	.probe = marvell_probe,
    	.flags = PHY_HAS_INTERRUPT,
 +		.config_init = &marvell_config_init,
    	.config_aneg = &marvell_config_aneg,
    	.read_status = &genphy_read_status,
    	.ack_interrupt = &marvell_ack_interrupt,
@@@ -1156,7 -1189,6 +1196,7 @@@
    	.features = PHY_GBIT_FEATURES,
    	.flags = PHY_HAS_INTERRUPT,
    	.probe = marvell_probe,
 +		.config_init = &marvell_config_init,
    	.config_aneg = &m88e1121_config_aneg,
    	.read_status = &marvell_read_status,
    	.ack_interrupt = &marvell_ack_interrupt,
@@@ -1175,7 -1207,6 +1215,7 @@@
    	.features = PHY_GBIT_FEATURES,
    	.flags = PHY_HAS_INTERRUPT,
    	.probe = marvell_probe,
 +		.config_init = &marvell_config_init,
    	.config_aneg = &m88e1318_config_aneg,
    	.read_status = &marvell_read_status,
    	.ack_interrupt = &marvell_ack_interrupt,
@@@ -1268,7 -1299,7 +1308,7 @@@
    	.features = PHY_GBIT_FEATURES,
    	.flags = PHY_HAS_INTERRUPT,
    	.probe = marvell_probe,
- 		.config_init = &marvell_config_init,
+ 		.config_init = &m88e1510_config_init,
    	.config_aneg = &m88e1510_config_aneg,
    	.read_status = &marvell_read_status,
    	.ack_interrupt = &marvell_ack_interrupt,
@@@ -1287,7 -1318,6 +1327,7 @@@
    	.features = PHY_GBIT_FEATURES,
    	.flags = PHY_HAS_INTERRUPT,
    	.probe = marvell_probe,
 +		.config_init = &marvell_config_init,
    	.config_aneg = &m88e1510_config_aneg,
    	.read_status = &marvell_read_status,
    	.ack_interrupt = &marvell_ack_interrupt,
diff --combined drivers/net/vxlan.c
index e6944b2,2ddc642..909f793
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -42,7 -42,7 +42,7 @@@
  #include <net/netns/generic.h>
  #include <net/vxlan.h>
  #include <net/protocol.h>
- #include <net/udp_tunnel.h>
+ 
  #if IS_ENABLED(CONFIG_IPV6)
  #include <net/ipv6.h>
  #include <net/addrconf.h>
@@@ -197,9 -197,9 +197,9 @@@ static int vxlan_nla_put_addr(struct sk
  #endif
/* Virtual Network hash table head */
- static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
+ static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
  {
- 	return &vs->vni_list[hash_32(id, VNI_HASH_BITS)];
+ 	return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
  }
/* Socket hash table head */
@@@ -242,12 -242,16 +242,16 @@@ static struct vxlan_sock *vxlan_find_so
    return NULL;
  }
- static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
+ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni)
  {
    struct vxlan_dev *vxlan;
- 	hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) {
- 		if (vxlan->default_dst.remote_vni == id)
+ 	/* For flow based devices, map all packets to VNI 0 */
+ 	if (vs->flags & VXLAN_F_COLLECT_METADATA)
+ 		vni = 0;
+ 
+ 	hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
+ 		if (vxlan->default_dst.remote_vni == vni)
    		return vxlan;
    }
@@@ -255,7 -259,7 +259,7 @@@
  }
/* Look up VNI in a per net namespace table */
- static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
+ static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni,
    				sa_family_t family, __be16 port,
    				u32 flags)
  {
@@@ -265,7 -269,7 +269,7 @@@
    if (!vs)
    	return NULL;
- 	return vxlan_vs_find_vni(vs, id);
+ 	return vxlan_vs_find_vni(vs, vni);
  }
/* Fill in neighbour message in skbuff. */
@@@ -315,7 -319,7 +319,7 @@@ static int vxlan_fdb_info(struct sk_buf
        nla_put_be16(skb, NDA_PORT, rdst->remote_port))
    	goto nla_put_failure;
    if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
- 	    nla_put_u32(skb, NDA_VNI, rdst->remote_vni))
+ 	    nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
    	goto nla_put_failure;
    if (rdst->remote_ifindex &&
        nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
@@@ -383,7 -387,7 +387,7 @@@ static void vxlan_ip_miss(struct net_de
    };
    struct vxlan_rdst remote = {
    	.remote_ip = *ipa, /* goes to NDA_DST */
- 		.remote_vni = VXLAN_N_VID,
+ 		.remote_vni = cpu_to_be32(VXLAN_N_VID),
    };
vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
@@@ -452,7 -456,7 +456,7 @@@ static struct vxlan_fdb *vxlan_find_mac
  /* caller should hold vxlan->hash_lock */
  static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
    				      union vxlan_addr *ip, __be16 port,
- 					      __u32 vni, __u32 ifindex)
+ 					      __be32 vni, __u32 ifindex)
  {
    struct vxlan_rdst *rd;
@@@ -469,7 -473,8 +473,8 @@@
/* Replace destination of unicast mac */
  static int vxlan_fdb_replace(struct vxlan_fdb *f,
- 			     union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex)
+ 			     union vxlan_addr *ip, __be16 port, __be32 vni,
+ 			     __u32 ifindex)
  {
    struct vxlan_rdst *rd;
@@@ -480,6 -485,8 +485,8 @@@
    rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
    if (!rd)
    	return 0;
+ 
+ 	dst_cache_reset(&rd->dst_cache);
    rd->remote_ip = *ip;
    rd->remote_port = port;
    rd->remote_vni = vni;
@@@ -489,7 -496,7 +496,7 @@@
/* Add/update destinations for multicast */
  static int vxlan_fdb_append(struct vxlan_fdb *f,
- 			    union vxlan_addr *ip, __be16 port, __u32 vni,
+ 			    union vxlan_addr *ip, __be16 port, __be32 vni,
    		    __u32 ifindex, struct vxlan_rdst **rdp)
  {
    struct vxlan_rdst *rd;
@@@ -501,6 -508,12 +508,12 @@@
    rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
    if (rd == NULL)
    	return -ENOBUFS;
+ 
+ 	if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
+ 		kfree(rd);
+ 		return -ENOBUFS;
+ 	}
+ 
    rd->remote_ip = *ip;
    rd->remote_port = port;
    rd->remote_vni = vni;
@@@ -515,7 -528,8 +528,8 @@@
  static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
    				  unsigned int off,
    				  struct vxlanhdr *vh, size_t hdrlen,
- 					  u32 data, struct gro_remcsum *grc,
+ 					  __be32 vni_field,
+ 					  struct gro_remcsum *grc,
    				  bool nopartial)
  {
    size_t start, offset;
@@@ -526,10 -540,8 +540,8 @@@
    if (!NAPI_GRO_CB(skb)->csum_valid)
    	return NULL;
- 	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
- 	offset = start + ((data & VXLAN_RCO_UDP) ?
- 			  offsetof(struct udphdr, check) :
- 			  offsetof(struct tcphdr, check));
+ 	start = vxlan_rco_start(vni_field);
+ 	offset = start + vxlan_rco_offset(vni_field);
vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
    			     start, offset, grc, nopartial);
@@@ -549,7 -561,7 +561,7 @@@ static struct sk_buff **vxlan_gro_recei
    int flush = 1;
    struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
    				     udp_offloads);
- 	u32 flags;
+ 	__be32 flags;
    struct gro_remcsum grc;
skb_gro_remcsum_init(&grc);
@@@ -565,11 -577,11 +577,11 @@@
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
- 	flags = ntohl(vh->vx_flags);
+ 	flags = vh->vx_flags;
if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
    	vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
- 				       ntohl(vh->vx_vni), &grc,
+ 				       vh->vx_vni, &grc,
    			       !!(vs->flags &
    				  VXLAN_F_REMCSUM_NOPARTIAL));
@@@ -660,7 -672,7 +672,7 @@@ static void vxlan_notify_del_rx_port(st
  static int vxlan_fdb_create(struct vxlan_dev *vxlan,
    		    const u8 *mac, union vxlan_addr *ip,
    		    __u16 state, __u16 flags,
- 			    __be16 port, __u32 vni, __u32 ifindex,
+ 			    __be16 port, __be32 vni, __u32 ifindex,
    		    __u8 ndm_flags)
  {
    struct vxlan_rdst *rd = NULL;
@@@ -749,8 -761,10 +761,10 @@@ static void vxlan_fdb_free(struct rcu_h
    struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
    struct vxlan_rdst *rd, *nd;
- 	list_for_each_entry_safe(rd, nd, &f->remotes, list)
+ 	list_for_each_entry_safe(rd, nd, &f->remotes, list) {
+ 		dst_cache_destroy(&rd->dst_cache);
    	kfree(rd);
+ 	}
    kfree(f);
  }
@@@ -767,7 -781,8 +781,8 @@@ static void vxlan_fdb_destroy(struct vx
  }
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
- 			   union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex)
+ 			   union vxlan_addr *ip, __be16 *port, __be32 *vni,
+ 			   u32 *ifindex)
  {
    struct net *net = dev_net(vxlan->dev);
    int err;
@@@ -800,7 -815,7 +815,7 @@@
    if (tb[NDA_VNI]) {
    	if (nla_len(tb[NDA_VNI]) != sizeof(u32))
    		return -EINVAL;
- 		*vni = nla_get_u32(tb[NDA_VNI]);
+ 		*vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
    } else {
    	*vni = vxlan->default_dst.remote_vni;
    }
@@@ -830,7 -845,8 +845,8 @@@ static int vxlan_fdb_add(struct ndmsg *
    /* struct net *net = dev_net(vxlan->dev); */
    union vxlan_addr ip;
    __be16 port;
- 	u32 vni, ifindex;
+ 	__be32 vni;
+ 	u32 ifindex;
    int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
@@@ -867,7 -883,8 +883,8 @@@ static int vxlan_fdb_delete(struct ndms
    struct vxlan_rdst *rd = NULL;
    union vxlan_addr ip;
    __be16 port;
- 	u32 vni, ifindex;
+ 	__be32 vni;
+ 	u32 ifindex;
    int err;
err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
@@@ -1122,54 -1139,65 +1139,65 @@@ static int vxlan_igmp_leave(struct vxla
    return ret;
  }
- static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
- 				      size_t hdrlen, u32 data, bool nopartial)
+ static bool vxlan_remcsum(struct vxlanhdr *unparsed,
+ 			  struct sk_buff *skb, u32 vxflags)
  {
    size_t start, offset, plen;
- 	if (skb->remcsum_offload)
- 		return vh;
+ 	if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
+ 		goto out;
- 	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
- 	offset = start + ((data & VXLAN_RCO_UDP) ?
- 			  offsetof(struct udphdr, check) :
- 			  offsetof(struct tcphdr, check));
+ 	start = vxlan_rco_start(unparsed->vx_vni);
+ 	offset = start + vxlan_rco_offset(unparsed->vx_vni);
- 	plen = hdrlen + offset + sizeof(u16);
+ 	plen = sizeof(struct vxlanhdr) + offset + sizeof(u16);
if (!pskb_may_pull(skb, plen))
- 		return NULL;
+ 		return false;
- 	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+ 	skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
+ 			    !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
+ out:
+ 	unparsed->vx_flags &= ~VXLAN_HF_RCO;
+ 	unparsed->vx_vni &= VXLAN_VNI_MASK;
+ 	return true;
+ }
- 	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset,
- 			    nopartial);
+ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
+ 				struct vxlan_metadata *md,
+ 				struct metadata_dst *tun_dst)
+ {
+ 	struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
- 	return vh;
+ 	if (!(unparsed->vx_flags & VXLAN_HF_GBP))
+ 		goto out;
+ 
+ 	md->gbp = ntohs(gbp->policy_id);
+ 
+ 	if (tun_dst)
+ 		tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
+ 
+ 	if (gbp->dont_learn)
+ 		md->gbp |= VXLAN_GBP_DONT_LEARN;
+ 
+ 	if (gbp->policy_applied)
+ 		md->gbp |= VXLAN_GBP_POLICY_APPLIED;
+ 
+ out:
+ 	unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
  }
- static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
- 		      struct vxlan_metadata *md, u32 vni,
+ static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs,
+ 		      struct sk_buff *skb, struct vxlan_metadata *md,
    	      struct metadata_dst *tun_dst)
  {
    struct iphdr *oip = NULL;
    struct ipv6hdr *oip6 = NULL;
- 	struct vxlan_dev *vxlan;
    struct pcpu_sw_netstats *stats;
    union vxlan_addr saddr;
    int err = 0;
- 	/* For flow based devices, map all packets to VNI 0 */
- 	if (vs->flags & VXLAN_F_COLLECT_METADATA)
- 		vni = 0;
- 
- 	/* Is this VNI defined? */
- 	vxlan = vxlan_vs_find_vni(vs, vni);
- 	if (!vxlan)
- 		goto drop;
- 
    skb_reset_mac_header(skb);
- 	skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
    skb->protocol = eth_type_trans(skb, vxlan->dev);
    skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
@@@ -1246,48 -1274,45 +1274,45 @@@ drop
  static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
  {
    struct metadata_dst *tun_dst = NULL;
+ 	struct vxlan_dev *vxlan;
    struct vxlan_sock *vs;
- 	struct vxlanhdr *vxh;
- 	u32 flags, vni;
+ 	struct vxlanhdr unparsed;
    struct vxlan_metadata _md;
    struct vxlan_metadata *md = &_md;
/* Need Vxlan and inner Ethernet header to be present */
    if (!pskb_may_pull(skb, VXLAN_HLEN))
- 		goto error;
- 
- 	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
- 	flags = ntohl(vxh->vx_flags);
- 	vni = ntohl(vxh->vx_vni);
+ 		return 1;
- 	if (flags & VXLAN_HF_VNI) {
- 		flags &= ~VXLAN_HF_VNI;
- 	} else {
- 		/* VNI flag always required to be set */
- 		goto bad_flags;
+ 	unparsed = *vxlan_hdr(skb);
+ 	/* VNI flag always required to be set */
+ 	if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
+ 		netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
+ 			   ntohl(vxlan_hdr(skb)->vx_flags),
+ 			   ntohl(vxlan_hdr(skb)->vx_vni));
+ 		/* Return non vxlan pkt */
+ 		return 1;
    }
- 
- 	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
- 		goto drop;
- 	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+ 	unparsed.vx_flags &= ~VXLAN_HF_VNI;
+ 	unparsed.vx_vni &= ~VXLAN_VNI_MASK;
vs = rcu_dereference_sk_user_data(sk);
    if (!vs)
    	goto drop;
- 	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
- 		vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
- 				    !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));
- 		if (!vxh)
- 			goto drop;
+ 	vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni));
+ 	if (!vxlan)
+ 		goto drop;
- 		flags &= ~VXLAN_HF_RCO;
- 		vni &= VXLAN_VNI_MASK;
- 	}
+ 	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
+ 				 !net_eq(vxlan->net, dev_net(vxlan->dev))))
+ 		goto drop;
if (vxlan_collect_metadata(vs)) {
+ 		__be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
+ 
    	tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
- 					 cpu_to_be64(vni >> 8), sizeof(*md));
+ 					 vxlan_vni_to_tun_id(vni), sizeof(*md));
if (!tun_dst)
    		goto drop;
@@@ -1300,25 -1325,13 +1325,13 @@@
    /* For backwards compatibility, only allow reserved fields to be
     * used by VXLAN extensions if explicitly requested.
     */
- 	if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
- 		struct vxlanhdr_gbp *gbp;
- 
- 		gbp = (struct vxlanhdr_gbp *)vxh;
- 		md->gbp = ntohs(gbp->policy_id);
- 
- 		if (tun_dst)
- 			tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
- 
- 		if (gbp->dont_learn)
- 			md->gbp |= VXLAN_GBP_DONT_LEARN;
- 
- 		if (gbp->policy_applied)
- 			md->gbp |= VXLAN_GBP_POLICY_APPLIED;
- 
- 		flags &= ~VXLAN_GBP_USED_BITS;
- 	}
+ 	if (vs->flags & VXLAN_F_REMCSUM_RX)
+ 		if (!vxlan_remcsum(&unparsed, skb, vs->flags))
+ 			goto drop;
+ 	if (vs->flags & VXLAN_F_GBP)
+ 		vxlan_parse_gbp_hdr(&unparsed, md, tun_dst);
- 	if (flags || vni & ~VXLAN_VNI_MASK) {
+ 	if (unparsed.vx_flags || unparsed.vx_vni) {
    	/* If there are any unprocessed flags remaining treat
    	 * this as a malformed packet. This behavior diverges from
    	 * VXLAN RFC (RFC7348) which stipulates that bits in reserved
@@@ -1327,28 -1340,19 +1340,19 @@@
    	 * is more robust and provides a little more security in
    	 * adding extensions to VXLAN.
    	 */
- 
- 		goto bad_flags;
+ 		goto drop;
    }
- 	vxlan_rcv(vs, skb, md, vni >> 8, tun_dst);
+ 	vxlan_rcv(vxlan, vs, skb, md, tun_dst);
    return 0;
drop:
- 	/* Consume bad packet */
- 	kfree_skb(skb);
- 	return 0;
- 
- bad_flags:
- 	netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
- 		   ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
- 
- error:
    if (tun_dst)
    	dst_release((struct dst_entry *)tun_dst);
- 	/* Return non vxlan pkt */
- 	return 1;
+ 	/* Consume bad packet */
+ 	kfree_skb(skb);
+ 	return 0;
  }
static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
@@@ -1673,7 -1677,7 +1677,7 @@@ static void vxlan_build_gbp_hdr(struct 
    	return;
gbp = (struct vxlanhdr_gbp *)vxh;
- 	vxh->vx_flags |= htonl(VXLAN_HF_GBP);
+ 	vxh->vx_flags |= VXLAN_HF_GBP;
if (md->gbp & VXLAN_GBP_DONT_LEARN)
    	gbp->dont_learn = 1;
@@@ -1684,20 -1688,15 +1688,15 @@@
    gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
  }
- #if IS_ENABLED(CONFIG_IPV6)
- static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
- 			   struct sk_buff *skb,
- 			   struct net_device *dev, struct in6_addr *saddr,
- 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
- 			   __be16 src_port, __be16 dst_port, __be32 vni,
- 			   struct vxlan_metadata *md, bool xnet, u32 vxflags)
+ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
+ 			   int iphdr_len, __be32 vni,
+ 			   struct vxlan_metadata *md, u32 vxflags,
+ 			   bool udp_sum)
  {
    struct vxlanhdr *vxh;
    int min_headroom;
    int err;
- 	bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
    int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- 	u16 hdrlen = sizeof(struct vxlanhdr);
if ((vxflags & VXLAN_F_REMCSUM_TX) &&
        skb->ip_summed == CHECKSUM_PARTIAL) {
@@@ -1706,50 -1705,39 +1705,39 @@@
    	if (csum_start <= VXLAN_MAX_REMCSUM_START &&
    	    !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
    	    (skb->csum_offset == offsetof(struct udphdr, check) ||
- 		     skb->csum_offset == offsetof(struct tcphdr, check))) {
- 			udp_sum = false;
+ 		     skb->csum_offset == offsetof(struct tcphdr, check)))
    		type |= SKB_GSO_TUNNEL_REMCSUM;
- 		}
    }
- 	skb_scrub_packet(skb, xnet);
- 
    min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
- 			+ VXLAN_HLEN + sizeof(struct ipv6hdr)
+ 			+ VXLAN_HLEN + iphdr_len
    		+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
/* Need space for new headers (invalidates iph ptr) */
    err = skb_cow_head(skb, min_headroom);
    if (unlikely(err)) {
    	kfree_skb(skb);
- 		goto err;
+ 		return err;
    }
skb = vlan_hwaccel_push_inside(skb);
- 	if (WARN_ON(!skb)) {
- 		err = -ENOMEM;
- 		goto err;
- 	}
+ 	if (WARN_ON(!skb))
+ 		return -ENOMEM;
- 	skb = iptunnel_handle_offloads(skb, udp_sum, type);
- 	if (IS_ERR(skb)) {
- 		err = -EINVAL;
- 		goto err;
- 	}
+ 	skb = iptunnel_handle_offloads(skb, type);
+ 	if (IS_ERR(skb))
+ 		return PTR_ERR(skb);
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
- 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
- 	vxh->vx_vni = vni;
+ 	vxh->vx_flags = VXLAN_HF_VNI;
+ 	vxh->vx_vni = vxlan_vni_field(vni);
if (type & SKB_GSO_TUNNEL_REMCSUM) {
- 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
- 			   VXLAN_RCO_SHIFT;
+ 		unsigned int start;
- 		if (skb->csum_offset == offsetof(struct udphdr, check))
- 			data |= VXLAN_RCO_UDP;
- 
- 		vxh->vx_vni |= htonl(data);
- 		vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+ 		start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
+ 		vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
+ 		vxh->vx_flags |= VXLAN_HF_RCO;
if (!skb_is_gso(skb)) {
    		skb->ip_summed = CHECKSUM_NONE;
@@@ -1761,102 -1749,63 +1749,63 @@@
    	vxlan_build_gbp_hdr(vxh, vxflags, md);
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
- 
- 	udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
- 			     ttl, src_port, dst_port,
- 			     !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
    return 0;
- err:
- 	dst_release(dst);
- 	return err;
  }
- #endif
- static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
- 			  __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- 			  __be16 src_port, __be16 dst_port, __be32 vni,
- 			  struct vxlan_metadata *md, bool xnet, u32 vxflags)
+ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
+ 				      struct sk_buff *skb, int oif, u8 tos,
+ 				      __be32 daddr, __be32 *saddr,
+ 				      struct dst_cache *dst_cache,
+ 				      struct ip_tunnel_info *info)
  {
- 	struct vxlanhdr *vxh;
- 	int min_headroom;
- 	int err;
- 	bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
- 	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- 	u16 hdrlen = sizeof(struct vxlanhdr);
- 
- 	if ((vxflags & VXLAN_F_REMCSUM_TX) &&
- 	    skb->ip_summed == CHECKSUM_PARTIAL) {
- 		int csum_start = skb_checksum_start_offset(skb);
- 
- 		if (csum_start <= VXLAN_MAX_REMCSUM_START &&
- 		    !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
- 		    (skb->csum_offset == offsetof(struct udphdr, check) ||
- 		     skb->csum_offset == offsetof(struct tcphdr, check))) {
- 			udp_sum = false;
- 			type |= SKB_GSO_TUNNEL_REMCSUM;
- 		}
- 	}
- 
- 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- 			+ VXLAN_HLEN + sizeof(struct iphdr)
- 			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
+ 	struct rtable *rt = NULL;
+ 	bool use_cache = false;
+ 	struct flowi4 fl4;
- 	/* Need space for new headers (invalidates iph ptr) */
- 	err = skb_cow_head(skb, min_headroom);
- 	if (unlikely(err)) {
- 		kfree_skb(skb);
- 		return err;
+ 	/* when the ip_tunnel_info is availble, the tos used for lookup is
+ 	 * packet independent, so we can use the cache
+ 	 */
+ 	if (!skb->mark && (!tos || info)) {
+ 		use_cache = true;
+ 		rt = dst_cache_get_ip4(dst_cache, saddr);
+ 		if (rt)
+ 			return rt;
    }
- 	skb = vlan_hwaccel_push_inside(skb);
- 	if (WARN_ON(!skb))
- 		return -ENOMEM;
- 
- 	skb = iptunnel_handle_offloads(skb, udp_sum, type);
- 	if (IS_ERR(skb))
- 		return PTR_ERR(skb);
- 
- 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
- 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
- 	vxh->vx_vni = vni;
- 
- 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
- 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
- 			   VXLAN_RCO_SHIFT;
- 
- 		if (skb->csum_offset == offsetof(struct udphdr, check))
- 			data |= VXLAN_RCO_UDP;
- 
- 		vxh->vx_vni |= htonl(data);
- 		vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+ 	memset(&fl4, 0, sizeof(fl4));
+ 	fl4.flowi4_oif = oif;
+ 	fl4.flowi4_tos = RT_TOS(tos);
+ 	fl4.flowi4_mark = skb->mark;
+ 	fl4.flowi4_proto = IPPROTO_UDP;
+ 	fl4.daddr = daddr;
+ 	fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
- 		if (!skb_is_gso(skb)) {
- 			skb->ip_summed = CHECKSUM_NONE;
- 			skb->encapsulation = 0;
- 		}
+ 	rt = ip_route_output_key(vxlan->net, &fl4);
+ 	if (!IS_ERR(rt)) {
+ 		*saddr = fl4.saddr;
+ 		if (use_cache)
+ 			dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
    }
- 
- 	if (vxflags & VXLAN_F_GBP)
- 		vxlan_build_gbp_hdr(vxh, vxflags, md);
- 
- 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
- 
- 	udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df,
- 			    src_port, dst_port, xnet,
- 			    !(vxflags & VXLAN_F_UDP_CSUM));
- 	return 0;
+ 	return rt;
  }
#if IS_ENABLED(CONFIG_IPV6)
  static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
    				  struct sk_buff *skb, int oif,
    				  const struct in6_addr *daddr,
- 					  struct in6_addr *saddr)
+ 					  struct in6_addr *saddr,
+ 					  struct dst_cache *dst_cache)
  {
    struct dst_entry *ndst;
    struct flowi6 fl6;
    int err;
+ 	if (!skb->mark) {
+ 		ndst = dst_cache_get_ip6(dst_cache, saddr);
+ 		if (ndst)
+ 			return ndst;
+ 	}
+ 
    memset(&fl6, 0, sizeof(fl6));
    fl6.flowi6_oif = oif;
    fl6.daddr = *daddr;
@@@ -1871,6 -1820,8 +1820,8 @@@
    	return ERR_PTR(err);
*saddr = fl6.saddr;
+ 	if (!skb->mark)
+ 		dst_cache_set_ip6(dst_cache, ndst, saddr);
    return ndst;
  }
  #endif
@@@ -1923,22 -1874,24 +1874,24 @@@ static void vxlan_encap_bypass(struct s
  static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
    		   struct vxlan_rdst *rdst, bool did_rsc)
  {
+ 	struct dst_cache *dst_cache;
    struct ip_tunnel_info *info;
    struct vxlan_dev *vxlan = netdev_priv(dev);
    struct sock *sk;
    struct rtable *rt = NULL;
    const struct iphdr *old_iph;
- 	struct flowi4 fl4;
    union vxlan_addr *dst;
    union vxlan_addr remote_ip;
    struct vxlan_metadata _md;
    struct vxlan_metadata *md = &_md;
    __be16 src_port = 0, dst_port;
- 	u32 vni;
+ 	__be32 vni;
    __be16 df = 0;
    __u8 tos, ttl;
    int err;
    u32 flags = vxlan->flags;
+ 	bool udp_sum = false;
+ 	bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
info = skb_tunnel_info(skb);
@@@ -1946,6 -1899,7 +1899,7 @@@
    	dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
    	vni = rdst->remote_vni;
    	dst = &rdst->remote_ip;
+ 		dst_cache = &rdst->dst_cache;
    } else {
    	if (!info) {
    		WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
@@@ -1953,13 -1907,14 +1907,14 @@@
    		goto drop;
    	}
    	dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
- 		vni = be64_to_cpu(info->key.tun_id);
+ 		vni = vxlan_tun_id_to_vni(info->key.tun_id);
    	remote_ip.sa.sa_family = ip_tunnel_info_af(info);
    	if (remote_ip.sa.sa_family == AF_INET)
    		remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
    	else
    		remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
    	dst = &remote_ip;
+ 		dst_cache = &info->dst_cache;
    }
if (vxlan_addr_any(dst)) {
@@@ -1987,6 -1942,7 +1942,7 @@@
    if (info) {
    	ttl = info->key.ttl;
    	tos = info->key.tos;
+ 		udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
if (info->options_len)
    		md = ip_tunnel_info_opts(info);
@@@ -1995,29 -1951,16 +1951,16 @@@
    }
if (dst->sa.sa_family == AF_INET) {
+ 		__be32 saddr;
+ 
    	if (!vxlan->vn4_sock)
    		goto drop;
    	sk = vxlan->vn4_sock->sock->sk;
- 		if (info) {
- 			if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
- 				df = htons(IP_DF);
- 
- 			if (info->key.tun_flags & TUNNEL_CSUM)
- 				flags |= VXLAN_F_UDP_CSUM;
- 			else
- 				flags &= ~VXLAN_F_UDP_CSUM;
- 		}
- 
- 		memset(&fl4, 0, sizeof(fl4));
- 		fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
- 		fl4.flowi4_tos = RT_TOS(tos);
- 		fl4.flowi4_mark = skb->mark;
- 		fl4.flowi4_proto = IPPROTO_UDP;
- 		fl4.daddr = dst->sin.sin_addr.s_addr;
- 		fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
- 
- 		rt = ip_route_output_key(vxlan->net, &fl4);
+ 		rt = vxlan_get_route(vxlan, skb,
+ 				     rdst ? rdst->remote_ifindex : 0, tos,
+ 				     dst->sin.sin_addr.s_addr, &saddr,
+ 				     dst_cache, info);
    	if (IS_ERR(rt)) {
    		netdev_dbg(dev, "no route to %pI4\n",
    			   &dst->sin.sin_addr.s_addr);
@@@ -2047,18 -1990,21 +1990,21 @@@
    		return;
    	}
+ 		if (!info)
+ 			udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
+ 		else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+ 			df = htons(IP_DF);
+ 
    	tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
    	ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- 		err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
- 				     dst->sin.sin_addr.s_addr, tos, ttl, df,
- 				     src_port, dst_port, htonl(vni << 8), md,
- 				     !net_eq(vxlan->net, dev_net(vxlan->dev)),
- 				     flags);
- 		if (err < 0) {
- 			/* skb is already freed. */
- 			skb = NULL;
- 			goto rt_tx_error;
- 		}
+ 		err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
+ 				      vni, md, flags, udp_sum);
+ 		if (err < 0)
+ 			goto xmit_tx_error;
+ 
+ 		udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+ 				    dst->sin.sin_addr.s_addr, tos, ttl, df,
+ 				    src_port, dst_port, xnet, !udp_sum);
  #if IS_ENABLED(CONFIG_IPV6)
    } else {
    	struct dst_entry *ndst;
@@@ -2071,7 -2017,8 +2017,8 @@@
ndst = vxlan6_get_route(vxlan, skb,
    				rdst ? rdst->remote_ifindex : 0,
- 					&dst->sin6.sin6_addr, &saddr);
+ 					&dst->sin6.sin6_addr, &saddr,
+ 					dst_cache);
    	if (IS_ERR(ndst)) {
    		netdev_dbg(dev, "no route to %pI6\n",
    			   &dst->sin6.sin6_addr);
@@@ -2103,18 -2050,20 +2050,20 @@@
    		return;
    	}
- 		if (info) {
- 			if (info->key.tun_flags & TUNNEL_CSUM)
- 				flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
- 			else
- 				flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
- 		}
+ 		if (!info)
+ 			udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
- 		err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
- 				      0, ttl, src_port, dst_port, htonl(vni << 8), md,
- 				      !net_eq(vxlan->net, dev_net(vxlan->dev)),
- 				      flags);
+ 		skb_scrub_packet(skb, xnet);
+ 		err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
+ 				      vni, md, flags, udp_sum);
+ 		if (err < 0) {
+ 			dst_release(ndst);
+ 			return;
+ 		}
+ 		udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
+ 				     &saddr, &dst->sin6.sin6_addr,
+ 				     0, ttl, src_port, dst_port, !udp_sum);
  #endif
    }
@@@ -2124,6 -2073,9 +2073,9 @@@ drop
    dev->stats.tx_dropped++;
    goto tx_free;
+ xmit_tx_error:
+ 	/* skb is already freed. */
+ 	skb = NULL;
  rt_tx_error:
    ip_rt_put(rt);
  tx_error:
@@@ -2171,11 -2123,9 +2123,11 @@@ static netdev_tx_t vxlan_xmit(struct sk
  #endif
    }
-	if (vxlan->flags & VXLAN_F_COLLECT_METADATA &&
 -	    info && info->mode & IP_TUNNEL_INFO_TX) {
 -		vxlan_xmit_one(skb, dev, NULL, false);
 +	if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
 +		if (info && info->mode & IP_TUNNEL_INFO_TX)
 +			vxlan_xmit_one(skb, dev, NULL, false);
 +		else
 +			kfree_skb(skb);
    	return NETDEV_TX_OK;
    }
@@@ -2263,7 -2213,7 +2215,7 @@@ static void vxlan_cleanup(unsigned lon
  static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
  {
    struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- 	__u32 vni = vxlan->default_dst.remote_vni;
+ 	__be32 vni = vxlan->default_dst.remote_vni;
spin_lock(&vn->sock_lock);
    hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
@@@ -2369,68 -2319,29 +2321,43 @@@ static void vxlan_set_multicast_list(st
  {
  }
-static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
 +static int __vxlan_change_mtu(struct net_device *dev,
 +			      struct net_device *lowerdev,
 +			      struct vxlan_rdst *dst, int new_mtu, bool strict)
  {
 -	struct vxlan_dev *vxlan = netdev_priv(dev);
 -	struct vxlan_rdst *dst = &vxlan->default_dst;
 -	struct net_device *lowerdev;
 -	int max_mtu;
 +	int max_mtu = IP_MAX_MTU;
-	lowerdev = __dev_get_by_index(vxlan->net, dst->remote_ifindex);
 -	if (lowerdev == NULL)
 -		return eth_change_mtu(dev, new_mtu);
 +	if (lowerdev)
 +		max_mtu = lowerdev->mtu;
if (dst->remote_ip.sa.sa_family == AF_INET6)
 -		max_mtu = lowerdev->mtu - VXLAN6_HEADROOM;
 +		max_mtu -= VXLAN6_HEADROOM;
    else
 -		max_mtu = lowerdev->mtu - VXLAN_HEADROOM;
 +		max_mtu -= VXLAN_HEADROOM;
-	if (new_mtu < 68 || new_mtu > max_mtu)
 +	if (new_mtu < 68)
    	return -EINVAL;
+	if (new_mtu > max_mtu) {
 +		if (strict)
 +			return -EINVAL;
 +
 +		new_mtu = max_mtu;
 +	}
 +
    dev->mtu = new_mtu;
    return 0;
  }
+static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
 +{
 +	struct vxlan_dev *vxlan = netdev_priv(dev);
 +	struct vxlan_rdst *dst = &vxlan->default_dst;
 +	struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
 +							 dst->remote_ifindex);
 +	return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
 +}
 +
- static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
- 				struct ip_tunnel_info *info,
- 				__be16 sport, __be16 dport)
- {
- 	struct vxlan_dev *vxlan = netdev_priv(dev);
- 	struct rtable *rt;
- 	struct flowi4 fl4;
- 
- 	memset(&fl4, 0, sizeof(fl4));
- 	fl4.flowi4_tos = RT_TOS(info->key.tos);
- 	fl4.flowi4_mark = skb->mark;
- 	fl4.flowi4_proto = IPPROTO_UDP;
- 	fl4.daddr = info->key.u.ipv4.dst;
- 
- 	rt = ip_route_output_key(vxlan->net, &fl4);
- 	if (IS_ERR(rt))
- 		return PTR_ERR(rt);
- 	ip_rt_put(rt);
- 
- 	info->key.u.ipv4.src = fl4.saddr;
- 	info->key.tp_src = sport;
- 	info->key.tp_dst = dport;
- 	return 0;
- }
- 
  static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
  {
    struct vxlan_dev *vxlan = netdev_priv(dev);
@@@ -2442,9 -2353,16 +2369,16 @@@
    dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
if (ip_tunnel_info_af(info) == AF_INET) {
+ 		struct rtable *rt;
+ 
    	if (!vxlan->vn4_sock)
    		return -EINVAL;
- 		return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+ 		rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
+ 				     info->key.u.ipv4.dst,
+ 				     &info->key.u.ipv4.src, NULL, info);
+ 		if (IS_ERR(rt))
+ 			return PTR_ERR(rt);
+ 		ip_rt_put(rt);
    } else {
  #if IS_ENABLED(CONFIG_IPV6)
    	struct dst_entry *ndst;
@@@ -2453,17 -2371,16 +2387,16 @@@
    		return -EINVAL;
    	ndst = vxlan6_get_route(vxlan, skb, 0,
    				&info->key.u.ipv6.dst,
- 					&info->key.u.ipv6.src);
+ 					&info->key.u.ipv6.src, NULL);
    	if (IS_ERR(ndst))
    		return PTR_ERR(ndst);
    	dst_release(ndst);
- 
- 		info->key.tp_src = sport;
- 		info->key.tp_dst = dport;
  #else /* !CONFIG_IPV6 */
    	return -EPFNOSUPPORT;
  #endif
    }
+ 	info->key.tp_src = sport;
+ 	info->key.tp_dst = dport;
    return 0;
  }
@@@ -2539,7 -2456,6 +2472,7 @@@ static void vxlan_setup(struct net_devi
    dev->hw_features |= NETIF_F_GSO_SOFTWARE;
    dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
    netif_keep_dst(dev);
 +	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
    dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
INIT_LIST_HEAD(&vxlan->next);
@@@ -2782,7 -2698,6 +2715,7 @@@ static int vxlan_dev_configure(struct n
    int err;
    bool use_ipv6 = false;
    __be16 default_port = vxlan->cfg.dst_port;
 +	struct net_device *lowerdev = NULL;
vxlan->net = src_net;
@@@ -2803,7 -2718,9 +2736,7 @@@
    }
if (conf->remote_ifindex) {
 -		struct net_device *lowerdev
 -			 = __dev_get_by_index(src_net, conf->remote_ifindex);
 -
 +		lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
    	dst->remote_ifindex = conf->remote_ifindex;
if (!lowerdev) {
@@@ -2827,12 -2744,6 +2760,12 @@@
    	needed_headroom = lowerdev->hard_header_len;
    }
+	if (conf->mtu) {
 +		err = __vxlan_change_mtu(dev, lowerdev, dst, conf->mtu, false);
 +		if (err)
 +			return err;
 +	}
 +
    if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
    	needed_headroom += VXLAN6_HEADROOM;
    else
@@@ -2917,7 -2828,7 +2850,7 @@@ static int vxlan_newlink(struct net *sr
    memset(&conf, 0, sizeof(conf));
if (data[IFLA_VXLAN_ID])
- 		conf.vni = nla_get_u32(data[IFLA_VXLAN_ID]);
+ 		conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
if (data[IFLA_VXLAN_GROUP]) {
    	conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
@@@ -2985,8 -2896,9 +2918,9 @@@
    if (data[IFLA_VXLAN_PORT])
    	conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
- 	if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
- 		conf.flags |= VXLAN_F_UDP_CSUM;
+ 	if (data[IFLA_VXLAN_UDP_CSUM] &&
+ 	    !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+ 		conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
        nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
@@@ -3021,7 -2933,7 +2955,7 @@@
    	break;
case -EEXIST:
- 		pr_info("duplicate VNI %u\n", conf.vni);
+ 		pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
    	break;
    }
@@@ -3079,7 -2991,7 +3013,7 @@@ static int vxlan_fill_info(struct sk_bu
    	.high = htons(vxlan->cfg.port_max),
    };
- 	if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni))
+ 	if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
    	goto nla_put_failure;
if (!vxlan_addr_any(&dst->remote_ip)) {
@@@ -3130,7 -3042,7 +3064,7 @@@
        nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
        nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
        nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
- 			!!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
+ 			!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
        nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
    		!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
        nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
diff --combined drivers/net/wireless/intel/iwlwifi/Kconfig
index 7438fbe,11932d5..16c4f38
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
@@@ -53,6 -53,7 +53,6 @@@ config IWLWIFI_LED
config IWLDVM
    tristate "Intel Wireless WiFi DVM Firmware support"
 -	depends on m
    help
      This is the driver that supports the DVM firmware. The list
      of the devices that use this firmware is available here:
@@@ -98,6 -99,18 +98,18 @@@ config IWLWIFI_UAPS
If unsure, say N.
+ config IWLWIFI_PCIE_RTPM
+        bool "Enable runtime power management mode for PCIe devices"
+        depends on IWLMVM && PM
+        default false
+        help
+          Say Y here to enable runtime power management for PCIe
+          devices.  If enabled, the device will go into low power mode
+          when idle for a short period of time, allowing for improved
+          power saving during runtime.
+ 
+ 	 If unsure, say N.
+ 
  menu "Debugging Options"
config IWLWIFI_DEBUG
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index ea1e177,1e1ab9d..aa6d807
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@@ -930,8 -930,11 +930,11 @@@ int iwl_mvm_config_scan(struct iwl_mvm 
    if (WARN_ON(num_channels > mvm->fw->ucode_capa.n_scan_channels))
    	return -ENOBUFS;
- 	if (type == mvm->scan_type)
+ 	if (type == mvm->scan_type) {
+ 		IWL_DEBUG_SCAN(mvm,
+ 			       "Ignoring UMAC scan config of the same type\n");
    	return 0;
+ 	}
cmd_size = sizeof(*scan_config) + mvm->fw->ucode_capa.n_scan_channels;
@@@ -1109,7 -1112,7 +1112,7 @@@ static int iwl_mvm_scan_umac(struct iwl
    cmd->general_flags = cpu_to_le32(iwl_mvm_scan_umac_flags(mvm, params,
    							 vif));
- 	if (type == IWL_MVM_SCAN_SCHED)
+ 	if (type == IWL_MVM_SCAN_SCHED || type == IWL_MVM_SCAN_NETDETECT)
    	cmd->flags = cpu_to_le32(IWL_UMAC_SCAN_FLAG_PREEMPTIVE);
if (iwl_mvm_scan_use_ebs(mvm, vif))
@@@ -1298,10 -1301,6 +1301,10 @@@ int iwl_mvm_sched_scan_start(struct iwl
    	return -EBUSY;
    }
+	/* we don't support "match all" in the firmware */
 +	if (!req->n_match_sets)
 +		return -EOPNOTSUPP;
 +
    ret = iwl_mvm_check_running_scans(mvm, type);
    if (ret)
    	return ret;
@@@ -1355,7 -1354,7 +1358,7 @@@
if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_UMAC_SCAN)) {
    	hcmd.id = iwl_cmd_id(SCAN_REQ_UMAC, IWL_ALWAYS_LONG_GROUP, 0);
- 		ret = iwl_mvm_scan_umac(mvm, vif, &params, IWL_MVM_SCAN_SCHED);
+ 		ret = iwl_mvm_scan_umac(mvm, vif, &params, type);
    } else {
    	hcmd.id = SCAN_OFFLOAD_REQUEST_CMD;
    	ret = iwl_mvm_scan_lmac(mvm, vif, &params);
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 73c9559,2f95916..542bbc5
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@@ -2,6 -2,7 +2,7 @@@
   *
   * Copyright(c) 2003 - 2015 Intel Corporation. All rights reserved.
   * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+  * Copyright(c) 2016 Intel Deutschland GmbH
   *
   * Portions of this file are derived from the ipw3945 project, as well
   * as portions of the ieee80211 subsystem header files.
@@@ -56,17 -57,23 +57,23 @@@
  #define RX_NUM_QUEUES 1
  #define RX_POST_REQ_ALLOC 2
  #define RX_CLAIM_REQ_ALLOC 8
- #define RX_POOL_SIZE ((RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC) * RX_NUM_QUEUES)
- #define RX_LOW_WATERMARK 8
+ #define RX_PENDING_WATERMARK 16
struct iwl_host_cmd;
/*This file includes the declaration that are internal to the
   * trans_pcie layer */
+ /**
+  * struct iwl_rx_mem_buffer
+  * @page_dma: bus address of rxb page
+  * @page: driver's pointer to the rxb page
+  * @vid: index of this rxb in the global table
+  */
  struct iwl_rx_mem_buffer {
    dma_addr_t page_dma;
    struct page *page;
+ 	u16 vid;
    struct list_head list;
  };
@@@ -90,8 -97,12 +97,12 @@@ struct isr_statistics
/**
   * struct iwl_rxq - Rx queue
-  * @bd: driver's pointer to buffer of receive buffer descriptors (rbd)
+  * @id: queue index
+  * @bd: driver's pointer to buffer of receive buffer descriptors (rbd).
+  *	Address size is 32 bit in pre-9000 devices and 64 bit in 9000 devices.
   * @bd_dma: bus address of buffer of receive buffer descriptors (rbd)
+  * @ubd: driver's pointer to buffer of used receive buffer descriptors (rbd)
+  * @ubd_dma: physical address of buffer of used receive buffer descriptors (rbd)
   * @read: Shared index to newest available Rx buffer
   * @write: Shared index to oldest written Rx packet
   * @free_count: Number of pre-allocated buffers in rx_free
@@@ -103,32 -114,34 +114,34 @@@
   * @rb_stts: driver's pointer to receive buffer status
   * @rb_stts_dma: bus address of receive buffer status
   * @lock:
-  * @pool: initial pool of iwl_rx_mem_buffer for the queue
-  * @queue: actual rx queue
+  * @queue: actual rx queue. Not used for multi-rx queue.
   *
   * NOTE:  rx_free and rx_used are used as a FIFO for iwl_rx_mem_buffers
   */
  struct iwl_rxq {
- 	__le32 *bd;
+ 	int id;
+ 	void *bd;
    dma_addr_t bd_dma;
+ 	__le32 *used_bd;
+ 	dma_addr_t used_bd_dma;
    u32 read;
    u32 write;
    u32 free_count;
    u32 used_count;
    u32 write_actual;
+ 	u32 queue_size;
    struct list_head rx_free;
    struct list_head rx_used;
    bool need_update;
    struct iwl_rb_status *rb_stts;
    dma_addr_t rb_stts_dma;
    spinlock_t lock;
- 	struct iwl_rx_mem_buffer pool[RX_QUEUE_SIZE];
+ 	struct napi_struct napi;
    struct iwl_rx_mem_buffer *queue[RX_QUEUE_SIZE];
  };
/**
   * struct iwl_rb_allocator - Rx allocator
-  * @pool: initial pool of allocator
   * @req_pending: number of requests the allcator had not processed yet
   * @req_ready: number of requests honored and ready for claiming
   * @rbd_allocated: RBDs with pages allocated and ready to be handled to
@@@ -140,7 -153,6 +153,6 @@@
   * @rx_alloc: work struct for background calls
   */
  struct iwl_rb_allocator {
- 	struct iwl_rx_mem_buffer pool[RX_POOL_SIZE];
    atomic_t req_pending;
    atomic_t req_ready;
    struct list_head rbd_allocated;
@@@ -280,6 -292,7 +292,7 @@@ struct iwl_txq 
    bool ampdu;
    bool block;
    unsigned long wd_timeout;
+ 	struct sk_buff_head overflow_q;
  };
static inline dma_addr_t
@@@ -297,6 -310,8 +310,8 @@@ struct iwl_tso_hdr_page 
  /**
   * struct iwl_trans_pcie - PCIe transport specific data
   * @rxq: all the RX queue data
+  * @rx_pool: initial pool of iwl_rx_mem_buffer for all the queues
+  * @global_table: table mapping received VID from hw to rxb
   * @rba: allocator for RX replenishing
   * @drv - pointer to iwl_drv
   * @trans: pointer to the generic transport area
@@@ -323,13 -338,14 +338,14 @@@
   * @fw_mon_size: size of the buffer for the firmware monitor
   */
  struct iwl_trans_pcie {
- 	struct iwl_rxq rxq;
+ 	struct iwl_rxq *rxq;
+ 	struct iwl_rx_mem_buffer rx_pool[MQ_RX_POOL_SIZE];
+ 	struct iwl_rx_mem_buffer *global_table[MQ_RX_TABLE_SIZE];
    struct iwl_rb_allocator rba;
    struct iwl_trans *trans;
    struct iwl_drv *drv;
struct net_device napi_dev;
- 	struct napi_struct napi;
struct __percpu iwl_tso_hdr_page *tso_hdr_page;
@@@ -359,6 -375,7 +375,7 @@@
    bool ucode_write_complete;
    wait_queue_head_t ucode_write_waitq;
    wait_queue_head_t wait_command_queue;
+ 	wait_queue_head_t d0i3_waitq;
u8 cmd_queue;
    u8 cmd_fifo;
@@@ -490,15 -507,6 +507,15 @@@ static inline void iwl_enable_interrupt
    iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
  }
+static inline void iwl_enable_fw_load_int(struct iwl_trans *trans)
 +{
 +	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 +
 +	IWL_DEBUG_ISR(trans, "Enabling FW load interrupt\n");
 +	trans_pcie->inta_mask = CSR_INT_BIT_FH_TX;
 +	iwl_write32(trans, CSR_INT_MASK, trans_pcie->inta_mask);
 +}
 +
  static inline void iwl_enable_rfkill_int(struct iwl_trans *trans)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -588,4 -596,7 +605,7 @@@ static inline int iwl_trans_pcie_dbgfs_
  }
  #endif
+ int iwl_pci_fw_exit_d0i3(struct iwl_trans *trans);
+ int iwl_pci_fw_enter_d0i3(struct iwl_trans *trans);
+ 
  #endif /* __iwl_trans_int_pcie_h__ */
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 152cf9a,51314e56..07973ef
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@@ -2,6 -2,7 +2,7 @@@
   *
   * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
   * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
+  * Copyright(c) 2016 Intel Deutschland GmbH
   *
   * Portions of this file are derived from the ipw3945 project, as well
   * as portions of the ieee80211 subsystem header files.
@@@ -140,8 -141,8 +141,8 @@@
   */
  static int iwl_rxq_space(const struct iwl_rxq *rxq)
  {
- 	/* Make sure RX_QUEUE_SIZE is a power of 2 */
- 	BUILD_BUG_ON(RX_QUEUE_SIZE & (RX_QUEUE_SIZE - 1));
+ 	/* Make sure rx queue size is a power of 2 */
+ 	WARN_ON(rxq->queue_size & (rxq->queue_size - 1));
/*
     * There can be up to (RX_QUEUE_SIZE - 1) free slots, to avoid ambiguity
@@@ -149,7 -150,7 +150,7 @@@
     * The following is equivalent to modulo by RX_QUEUE_SIZE and is well
     * defined for negative dividends.
     */
- 	return (rxq->read - rxq->write - 1) & (RX_QUEUE_SIZE - 1);
+ 	return (rxq->read - rxq->write - 1) & (rxq->queue_size - 1);
  }
/*
@@@ -160,6 -161,12 +161,12 @@@ static inline __le32 iwl_pcie_dma_addr2
    return cpu_to_le32((u32)(dma_addr >> 8));
  }
+ static void iwl_pcie_write_prph_64(struct iwl_trans *trans, u64 ofs, u64 val)
+ {
+ 	iwl_write_prph(trans, ofs, val & 0xffffffff);
+ 	iwl_write_prph(trans, ofs + 4, val >> 32);
+ }
+ 
  /*
   * iwl_pcie_rx_stop - stops the Rx DMA
   */
@@@ -173,10 -180,9 +180,9 @@@ int iwl_pcie_rx_stop(struct iwl_trans *
  /*
   * iwl_pcie_rxq_inc_wr_ptr - Update the write pointer for the RX queue
   */
- static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans)
+ static void iwl_pcie_rxq_inc_wr_ptr(struct iwl_trans *trans,
+ 				    struct iwl_rxq *rxq)
  {
- 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
    u32 reg;
lockdep_assert_held(&rxq->lock);
@@@ -201,24 -207,73 +207,73 @@@
    }
rxq->write_actual = round_down(rxq->write, 8);
- 	iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
+ 	if (trans->cfg->mq_rx_supported)
+ 		iwl_write_prph(trans, RFH_Q_FRBDCB_WIDX(rxq->id),
+ 			       rxq->write_actual);
+ 	else
+ 		iwl_write32(trans, FH_RSCSR_CHNL0_WPTR, rxq->write_actual);
  }
static void iwl_pcie_rxq_check_wrptr(struct iwl_trans *trans)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
+ 	int i;
- 	spin_lock(&rxq->lock);
+ 	for (i = 0; i < trans->num_rx_queues; i++) {
+ 		struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+ 
+ 		if (!rxq->need_update)
+ 			continue;
+ 		spin_lock(&rxq->lock);
+ 		iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
+ 		rxq->need_update = false;
+ 		spin_unlock(&rxq->lock);
+ 	}
+ }
+ 
+ static void iwl_pcie_rxq_mq_restock(struct iwl_trans *trans,
+ 				    struct iwl_rxq *rxq)
+ {
+ 	struct iwl_rx_mem_buffer *rxb;
+ 
+ 	/*
+ 	 * If the device isn't enabled - no need to try to add buffers...
+ 	 * This can happen when we stop the device and still have an interrupt
+ 	 * pending. We stop the APM before we sync the interrupts because we
+ 	 * have to (see comment there). On the other hand, since the APM is
+ 	 * stopped, we cannot access the HW (in particular not prph).
+ 	 * So don't try to restock if the APM has been already stopped.
+ 	 */
+ 	if (!test_bit(STATUS_DEVICE_ENABLED, &trans->status))
+ 		return;
- 	if (!rxq->need_update)
- 		goto exit_unlock;
+ 	spin_lock(&rxq->lock);
+ 	while (rxq->free_count) {
+ 		__le64 *bd = (__le64 *)rxq->bd;
- 	iwl_pcie_rxq_inc_wr_ptr(trans);
- 	rxq->need_update = false;
+ 		/* Get next free Rx buffer, remove from free list */
+ 		rxb = list_first_entry(&rxq->rx_free, struct iwl_rx_mem_buffer,
+ 				       list);
+ 		list_del(&rxb->list);
-  exit_unlock:
+ 		/* 12 first bits are expected to be empty */
+ 		WARN_ON(rxb->page_dma & DMA_BIT_MASK(12));
+ 		/* Point to Rx buffer via next RBD in circular buffer */
+ 		bd[rxq->write] = cpu_to_le64(rxb->page_dma | rxb->vid);
+ 		rxq->write = (rxq->write + 1) & MQ_RX_TABLE_MASK;
+ 		rxq->free_count--;
+ 	}
    spin_unlock(&rxq->lock);
+ 
+ 	/*
+ 	 * If we've added more space for the firmware to place data, tell it.
+ 	 * Increment device's write pointer in multiples of 8.
+ 	 */
+ 	if (rxq->write_actual != (rxq->write & ~0x7)) {
+ 		spin_lock(&rxq->lock);
+ 		iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
+ 		spin_unlock(&rxq->lock);
+ 	}
  }
/*
@@@ -232,10 -287,8 +287,8 @@@
   * also updates the memory address in the firmware to reference the new
   * target buffer.
   */
- static void iwl_pcie_rxq_restock(struct iwl_trans *trans)
+ static void iwl_pcie_rxq_restock(struct iwl_trans *trans, struct iwl_rxq *rxq)
  {
- 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
    struct iwl_rx_mem_buffer *rxb;
/*
@@@ -251,6 -304,7 +304,7 @@@
spin_lock(&rxq->lock);
    while ((iwl_rxq_space(rxq) > 0) && (rxq->free_count)) {
+ 		__le32 *bd = (__le32 *)rxq->bd;
    	/* The overwritten rxb must be a used one */
    	rxb = rxq->queue[rxq->write];
    	BUG_ON(rxb && rxb->page);
@@@ -261,7 -315,7 +315,7 @@@
    	list_del(&rxb->list);
/* Point to Rx buffer via next RBD in circular buffer */
- 		rxq->bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma);
+ 		bd[rxq->write] = iwl_pcie_dma_addr2rbd_ptr(rxb->page_dma);
    	rxq->queue[rxq->write] = rxb;
    	rxq->write = (rxq->write + 1) & RX_QUEUE_MASK;
    	rxq->free_count--;
@@@ -272,7 -326,7 +326,7 @@@
     * Increment device's write pointer in multiples of 8. */
    if (rxq->write_actual != (rxq->write & ~0x7)) {
    	spin_lock(&rxq->lock);
- 		iwl_pcie_rxq_inc_wr_ptr(trans);
+ 		iwl_pcie_rxq_inc_wr_ptr(trans, rxq);
    	spin_unlock(&rxq->lock);
    }
  }
@@@ -285,13 -339,9 +339,9 @@@ static struct page *iwl_pcie_rx_alloc_p
    				   gfp_t priority)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
    struct page *page;
    gfp_t gfp_mask = priority;
- 	if (rxq->free_count > RX_LOW_WATERMARK)
- 		gfp_mask |= __GFP_NOWARN;
- 
    if (trans_pcie->rx_page_order > 0)
    	gfp_mask |= __GFP_COMP;
@@@ -301,16 -351,13 +351,13 @@@
    	if (net_ratelimit())
    		IWL_DEBUG_INFO(trans, "alloc_pages failed, order: %d\n",
    			       trans_pcie->rx_page_order);
- 		/* Issue an error if the hardware has consumed more than half
- 		 * of its free buffer list and we don't have enough
- 		 * pre-allocated buffers.
+ 		/*
+ 		 * Issue an error if we don't have enough pre-allocated
+ 		  * buffers.
  `		 */
- 		if (rxq->free_count <= RX_LOW_WATERMARK &&
- 		    iwl_rxq_space(rxq) > (RX_QUEUE_SIZE / 2) &&
- 		    net_ratelimit())
+ 		if (!(gfp_mask & __GFP_NOWARN) && net_ratelimit())
    		IWL_CRIT(trans,
- 				 "Failed to alloc_pages with GFP_KERNEL. Only %u free buffers remaining.\n",
- 				 rxq->free_count);
+ 				 "Failed to alloc_pages\n");
    	return NULL;
    }
    return page;
@@@ -325,10 -372,10 +372,10 @@@
   * iwl_pcie_rxq_restock. The latter function will update the HW to use the newly
   * allocated buffers.
   */
- static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority)
+ static void iwl_pcie_rxq_alloc_rbs(struct iwl_trans *trans, gfp_t priority,
+ 				   struct iwl_rxq *rxq)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
    struct iwl_rx_mem_buffer *rxb;
    struct page *page;
@@@ -372,10 -419,6 +419,6 @@@
    		__free_pages(page, trans_pcie->rx_page_order);
    		return;
    	}
- 		/* dma address must be no more than 36 bits */
- 		BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36));
- 		/* and also 256 byte aligned! */
- 		BUG_ON(rxb->page_dma & DMA_BIT_MASK(8));
spin_lock(&rxq->lock);
@@@ -386,41 -429,24 +429,24 @@@
    }
  }
- static void iwl_pcie_rxq_free_rbs(struct iwl_trans *trans)
+ static void iwl_pcie_free_rbs_pool(struct iwl_trans *trans)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
    int i;
- 	lockdep_assert_held(&rxq->lock);
- 
- 	for (i = 0; i < RX_QUEUE_SIZE; i++) {
- 		if (!rxq->pool[i].page)
+ 	for (i = 0; i < MQ_RX_POOL_SIZE; i++) {
+ 		if (!trans_pcie->rx_pool[i].page)
    		continue;
- 		dma_unmap_page(trans->dev, rxq->pool[i].page_dma,
+ 		dma_unmap_page(trans->dev, trans_pcie->rx_pool[i].page_dma,
    		       PAGE_SIZE << trans_pcie->rx_page_order,
    		       DMA_FROM_DEVICE);
- 		__free_pages(rxq->pool[i].page, trans_pcie->rx_page_order);
- 		rxq->pool[i].page = NULL;
+ 		__free_pages(trans_pcie->rx_pool[i].page,
+ 			     trans_pcie->rx_page_order);
+ 		trans_pcie->rx_pool[i].page = NULL;
    }
  }
/*
-  * iwl_pcie_rx_replenish - Move all used buffers from rx_used to rx_free
-  *
-  * When moving to rx_free an page is allocated for the slot.
-  *
-  * Also restock the Rx queue via iwl_pcie_rxq_restock.
-  * This is called only during initialization
-  */
- static void iwl_pcie_rx_replenish(struct iwl_trans *trans)
- {
- 	iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL);
- 
- 	iwl_pcie_rxq_restock(trans);
- }
- 
- /*
   * iwl_pcie_rx_allocator - Allocates pages in the background for RX queues
   *
   * Allocates for each received request 8 pages
@@@ -444,6 -470,11 +470,11 @@@ static void iwl_pcie_rx_allocator(struc
    while (pending) {
    	int i;
    	struct list_head local_allocated;
+ 		gfp_t gfp_mask = GFP_KERNEL;
+ 
+ 		/* Do not post a warning if there are only a few requests */
+ 		if (pending < RX_PENDING_WATERMARK)
+ 			gfp_mask |= __GFP_NOWARN;
INIT_LIST_HEAD(&local_allocated);
@@@ -463,7 -494,7 +494,7 @@@
    		BUG_ON(rxb->page);
/* Alloc a new receive buffer */
- 			page = iwl_pcie_rx_alloc_page(trans, GFP_KERNEL);
+ 			page = iwl_pcie_rx_alloc_page(trans, gfp_mask);
    		if (!page)
    			continue;
    		rxb->page = page;
@@@ -477,10 -508,6 +508,6 @@@
    			__free_pages(page, trans_pcie->rx_page_order);
    			continue;
    		}
- 			/* dma address must be no more than 36 bits */
- 			BUG_ON(rxb->page_dma & ~DMA_BIT_MASK(36));
- 			/* and also 256 byte aligned! */
- 			BUG_ON(rxb->page_dma & DMA_BIT_MASK(8));
/* move the allocated entry to the out list */
    		list_move(&rxb->list, &local_allocated);
@@@ -561,38 -588,83 +588,83 @@@ static void iwl_pcie_rx_allocator_work(
  static int iwl_pcie_rx_alloc(struct iwl_trans *trans)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
    struct iwl_rb_allocator *rba = &trans_pcie->rba;
    struct device *dev = trans->dev;
+ 	int i;
+ 	int free_size = trans->cfg->mq_rx_supported ? sizeof(__le64) :
+ 						      sizeof(__le32);
+ 
+ 	if (WARN_ON(trans_pcie->rxq))
+ 		return -EINVAL;
- 	memset(&trans_pcie->rxq, 0, sizeof(trans_pcie->rxq));
+ 	trans_pcie->rxq = kcalloc(trans->num_rx_queues, sizeof(struct iwl_rxq),
+ 				  GFP_KERNEL);
+ 	if (!trans_pcie->rxq)
+ 		return -EINVAL;
- 	spin_lock_init(&rxq->lock);
    spin_lock_init(&rba->lock);
- 	if (WARN_ON(rxq->bd || rxq->rb_stts))
- 		return -EINVAL;
+ 	for (i = 0; i < trans->num_rx_queues; i++) {
+ 		struct iwl_rxq *rxq = &trans_pcie->rxq[i];
- 	/* Allocate the circular buffer of Read Buffer Descriptors (RBDs) */
- 	rxq->bd = dma_zalloc_coherent(dev, sizeof(__le32) * RX_QUEUE_SIZE,
- 				      &rxq->bd_dma, GFP_KERNEL);
- 	if (!rxq->bd)
- 		goto err_bd;
+ 		spin_lock_init(&rxq->lock);
+ 		if (trans->cfg->mq_rx_supported)
+ 			rxq->queue_size = MQ_RX_TABLE_SIZE;
+ 		else
+ 			rxq->queue_size = RX_QUEUE_SIZE;
- 	/*Allocate the driver's pointer to receive buffer status */
- 	rxq->rb_stts = dma_zalloc_coherent(dev, sizeof(*rxq->rb_stts),
- 					   &rxq->rb_stts_dma, GFP_KERNEL);
- 	if (!rxq->rb_stts)
- 		goto err_rb_stts;
+ 		/*
+ 		 * Allocate the circular buffer of Read Buffer Descriptors
+ 		 * (RBDs)
+ 		 */
+ 		rxq->bd = dma_zalloc_coherent(dev,
+ 					     free_size * rxq->queue_size,
+ 					     &rxq->bd_dma, GFP_KERNEL);
+ 		if (!rxq->bd)
+ 			goto err;
+ 
+ 		if (trans->cfg->mq_rx_supported) {
+ 			rxq->used_bd = dma_zalloc_coherent(dev,
+ 							   sizeof(__le32) *
+ 							   rxq->queue_size,
+ 							   &rxq->used_bd_dma,
+ 							   GFP_KERNEL);
+ 			if (!rxq->used_bd)
+ 				goto err;
+ 		}
+ 		/*Allocate the driver's pointer to receive buffer status */
+ 		rxq->rb_stts = dma_zalloc_coherent(dev, sizeof(*rxq->rb_stts),
+ 						   &rxq->rb_stts_dma,
+ 						   GFP_KERNEL);
+ 		if (!rxq->rb_stts)
+ 			goto err;
+ 	}
    return 0;
- err_rb_stts:
- 	dma_free_coherent(dev, sizeof(__le32) * RX_QUEUE_SIZE,
- 			  rxq->bd, rxq->bd_dma);
- 	rxq->bd_dma = 0;
- 	rxq->bd = NULL;
- err_bd:
+ err:
+ 	for (i = 0; i < trans->num_rx_queues; i++) {
+ 		struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+ 
+ 		if (rxq->bd)
+ 			dma_free_coherent(dev, free_size * rxq->queue_size,
+ 					  rxq->bd, rxq->bd_dma);
+ 		rxq->bd_dma = 0;
+ 		rxq->bd = NULL;
+ 
+ 		if (rxq->rb_stts)
+ 			dma_free_coherent(trans->dev,
+ 					  sizeof(struct iwl_rb_status),
+ 					  rxq->rb_stts, rxq->rb_stts_dma);
+ 
+ 		if (rxq->used_bd)
+ 			dma_free_coherent(dev, sizeof(__le32) * rxq->queue_size,
+ 					  rxq->used_bd, rxq->used_bd_dma);
+ 		rxq->used_bd_dma = 0;
+ 		rxq->used_bd = NULL;
+ 	}
+ 	kfree(trans_pcie->rxq);
+ 
    return -ENOMEM;
  }
@@@ -659,65 -731,103 +731,103 @@@ static void iwl_pcie_rx_hw_init(struct 
    	iwl_set_bit(trans, CSR_INT_COALESCING, IWL_HOST_INT_OPER_MODE);
  }
- static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq)
+ static void iwl_pcie_rx_mq_hw_init(struct iwl_trans *trans)
  {
+ 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
+ 	u32 rb_size, enabled = 0;
    int i;
- 	lockdep_assert_held(&rxq->lock);
- 
- 	INIT_LIST_HEAD(&rxq->rx_free);
- 	INIT_LIST_HEAD(&rxq->rx_used);
- 	rxq->free_count = 0;
- 	rxq->used_count = 0;
+ 	switch (trans_pcie->rx_buf_size) {
+ 	case IWL_AMSDU_4K:
+ 		rb_size = RFH_RXF_DMA_RB_SIZE_4K;
+ 		break;
+ 	case IWL_AMSDU_8K:
+ 		rb_size = RFH_RXF_DMA_RB_SIZE_8K;
+ 		break;
+ 	case IWL_AMSDU_12K:
+ 		rb_size = RFH_RXF_DMA_RB_SIZE_12K;
+ 		break;
+ 	default:
+ 		WARN_ON(1);
+ 		rb_size = RFH_RXF_DMA_RB_SIZE_4K;
+ 	}
- 	for (i = 0; i < RX_QUEUE_SIZE; i++)
- 		list_add(&rxq->pool[i].list, &rxq->rx_used);
- }
+ 	/* Stop Rx DMA */
+ 	iwl_write_prph(trans, RFH_RXF_DMA_CFG, 0);
+ 	/* disable free amd used rx queue operation */
+ 	iwl_write_prph(trans, RFH_RXF_RXQ_ACTIVE, 0);
+ 
+ 	for (i = 0; i < trans->num_rx_queues; i++) {
+ 		/* Tell device where to find RBD free table in DRAM */
+ 		iwl_pcie_write_prph_64(trans, RFH_Q_FRBDCB_BA_LSB(i),
+ 				       (u64)(trans_pcie->rxq[i].bd_dma));
+ 		/* Tell device where to find RBD used table in DRAM */
+ 		iwl_pcie_write_prph_64(trans, RFH_Q_URBDCB_BA_LSB(i),
+ 				       (u64)(trans_pcie->rxq[i].used_bd_dma));
+ 		/* Tell device where in DRAM to update its Rx status */
+ 		iwl_pcie_write_prph_64(trans, RFH_Q_URBD_STTS_WPTR_LSB(i),
+ 				       trans_pcie->rxq[i].rb_stts_dma);
+ 		/* Reset device indice tables */
+ 		iwl_write_prph(trans, RFH_Q_FRBDCB_WIDX(i), 0);
+ 		iwl_write_prph(trans, RFH_Q_FRBDCB_RIDX(i), 0);
+ 		iwl_write_prph(trans, RFH_Q_URBDCB_WIDX(i), 0);
+ 
+ 		enabled |= BIT(i) | BIT(i + 16);
+ 	}
- static void iwl_pcie_rx_init_rba(struct iwl_rb_allocator *rba)
- {
- 	int i;
+ 	/* restock default queue */
+ 	iwl_pcie_rxq_mq_restock(trans, &trans_pcie->rxq[0]);
- 	lockdep_assert_held(&rba->lock);
+ 	/*
+ 	 * Enable Rx DMA
+ 	 * Single frame mode
+ 	 * Rx buffer size 4 or 8k or 12k
+ 	 * Min RB size 4 or 8
+ 	 * 512 RBDs
+ 	 */
+ 	iwl_write_prph(trans, RFH_RXF_DMA_CFG,
+ 		       RFH_DMA_EN_ENABLE_VAL |
+ 		       rb_size | RFH_RXF_DMA_SINGLE_FRAME_MASK |
+ 		       RFH_RXF_DMA_MIN_RB_4_8 |
+ 		       RFH_RXF_DMA_RBDCB_SIZE_512);
- 	INIT_LIST_HEAD(&rba->rbd_allocated);
- 	INIT_LIST_HEAD(&rba->rbd_empty);
+ 	iwl_write_prph(trans, RFH_GEN_CFG, RFH_GEN_CFG_RFH_DMA_SNOOP |
+ 					  RFH_GEN_CFG_SERVICE_DMA_SNOOP);
+ 	iwl_write_prph(trans, RFH_RXF_RXQ_ACTIVE, enabled);
- 	for (i = 0; i < RX_POOL_SIZE; i++)
- 		list_add(&rba->pool[i].list, &rba->rbd_empty);
+ 	/* Set interrupt coalescing timer to default (2048 usecs) */
+ 	iwl_write8(trans, CSR_INT_COALESCING, IWL_HOST_INT_TIMEOUT_DEF);
  }
- static void iwl_pcie_rx_free_rba(struct iwl_trans *trans)
+ static void iwl_pcie_rx_init_rxb_lists(struct iwl_rxq *rxq)
  {
- 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rb_allocator *rba = &trans_pcie->rba;
- 	int i;
+ 	lockdep_assert_held(&rxq->lock);
- 	lockdep_assert_held(&rba->lock);
+ 	INIT_LIST_HEAD(&rxq->rx_free);
+ 	INIT_LIST_HEAD(&rxq->rx_used);
+ 	rxq->free_count = 0;
+ 	rxq->used_count = 0;
+ }
- 	for (i = 0; i < RX_POOL_SIZE; i++) {
- 		if (!rba->pool[i].page)
- 			continue;
- 		dma_unmap_page(trans->dev, rba->pool[i].page_dma,
- 			       PAGE_SIZE << trans_pcie->rx_page_order,
- 			       DMA_FROM_DEVICE);
- 		__free_pages(rba->pool[i].page, trans_pcie->rx_page_order);
- 		rba->pool[i].page = NULL;
- 	}
+ static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget)
+ {
+ 	WARN_ON(1);
+ 	return 0;
  }
int iwl_pcie_rx_init(struct iwl_trans *trans)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
+ 	struct iwl_rxq *def_rxq;
    struct iwl_rb_allocator *rba = &trans_pcie->rba;
- 	int i, err;
+ 	int i, err, num_rbds, allocator_pool_size;
- 	if (!rxq->bd) {
+ 	if (!trans_pcie->rxq) {
    	err = iwl_pcie_rx_alloc(trans);
    	if (err)
    		return err;
    }
+ 	def_rxq = trans_pcie->rxq;
    if (!rba->alloc_wq)
    	rba->alloc_wq = alloc_workqueue("rb_allocator",
    					WQ_HIGHPRI | WQ_UNBOUND, 1);
@@@ -726,34 -836,68 +836,68 @@@
    spin_lock(&rba->lock);
    atomic_set(&rba->req_pending, 0);
    atomic_set(&rba->req_ready, 0);
- 	/* free all first - we might be reconfigured for a different size */
- 	iwl_pcie_rx_free_rba(trans);
- 	iwl_pcie_rx_init_rba(rba);
+ 	INIT_LIST_HEAD(&rba->rbd_allocated);
+ 	INIT_LIST_HEAD(&rba->rbd_empty);
    spin_unlock(&rba->lock);
- 	spin_lock(&rxq->lock);
- 
    /* free all first - we might be reconfigured for a different size */
- 	iwl_pcie_rxq_free_rbs(trans);
- 	iwl_pcie_rx_init_rxb_lists(rxq);
+ 	iwl_pcie_free_rbs_pool(trans);
for (i = 0; i < RX_QUEUE_SIZE; i++)
- 		rxq->queue[i] = NULL;
+ 		def_rxq->queue[i] = NULL;
- 	/* Set us so that we have processed and used all buffers, but have
- 	 * not restocked the Rx queue with fresh buffers */
- 	rxq->read = rxq->write = 0;
- 	rxq->write_actual = 0;
- 	memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts));
- 	spin_unlock(&rxq->lock);
+ 	for (i = 0; i < trans->num_rx_queues; i++) {
+ 		struct iwl_rxq *rxq = &trans_pcie->rxq[i];
- 	iwl_pcie_rx_replenish(trans);
+ 		rxq->id = i;
- 	iwl_pcie_rx_hw_init(trans, rxq);
+ 		spin_lock(&rxq->lock);
+ 		/*
+ 		 * Set read write pointer to reflect that we have processed
+ 		 * and used all buffers, but have not restocked the Rx queue
+ 		 * with fresh buffers
+ 		 */
+ 		rxq->read = 0;
+ 		rxq->write = 0;
+ 		rxq->write_actual = 0;
+ 		memset(rxq->rb_stts, 0, sizeof(*rxq->rb_stts));
- 	spin_lock(&rxq->lock);
- 	iwl_pcie_rxq_inc_wr_ptr(trans);
- 	spin_unlock(&rxq->lock);
+ 		iwl_pcie_rx_init_rxb_lists(rxq);
+ 
+ 		if (!rxq->napi.poll)
+ 			netif_napi_add(&trans_pcie->napi_dev, &rxq->napi,
+ 				       iwl_pcie_dummy_napi_poll, 64);
+ 
+ 		spin_unlock(&rxq->lock);
+ 	}
+ 
+ 	/* move the pool to the default queue and allocator ownerships */
+ 	num_rbds = trans->cfg->mq_rx_supported ?
+ 		     MQ_RX_POOL_SIZE : RX_QUEUE_SIZE;
+ 	allocator_pool_size = trans->num_rx_queues *
+ 		(RX_CLAIM_REQ_ALLOC - RX_POST_REQ_ALLOC);
+ 	for (i = 0; i < num_rbds; i++) {
+ 		struct iwl_rx_mem_buffer *rxb = &trans_pcie->rx_pool[i];
+ 
+ 		if (i < allocator_pool_size)
+ 			list_add(&rxb->list, &rba->rbd_empty);
+ 		else
+ 			list_add(&rxb->list, &def_rxq->rx_used);
+ 		trans_pcie->global_table[i] = rxb;
+ 		rxb->vid = (u16)i;
+ 	}
+ 
+ 	iwl_pcie_rxq_alloc_rbs(trans, GFP_KERNEL, def_rxq);
+ 	if (trans->cfg->mq_rx_supported) {
+ 		iwl_pcie_rx_mq_hw_init(trans);
+ 	} else {
+ 		iwl_pcie_rxq_restock(trans, def_rxq);
+ 		iwl_pcie_rx_hw_init(trans, def_rxq);
+ 	}
+ 
+ 	spin_lock(&def_rxq->lock);
+ 	iwl_pcie_rxq_inc_wr_ptr(trans, def_rxq);
+ 	spin_unlock(&def_rxq->lock);
return 0;
  }
@@@ -761,12 -905,16 +905,16 @@@
  void iwl_pcie_rx_free(struct iwl_trans *trans)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
    struct iwl_rb_allocator *rba = &trans_pcie->rba;
+ 	int free_size = trans->cfg->mq_rx_supported ? sizeof(__le64) :
+ 					      sizeof(__le32);
+ 	int i;
- 	/*if rxq->bd is NULL, it means that nothing has been allocated,
- 	 * exit now */
- 	if (!rxq->bd) {
+ 	/*
+ 	 * if rxq is NULL, it means that nothing has been allocated,
+ 	 * exit now
+ 	 */
+ 	if (!trans_pcie->rxq) {
    	IWL_DEBUG_INFO(trans, "Free NULL rx context\n");
    	return;
    }
@@@ -777,27 -925,37 +925,37 @@@
    	rba->alloc_wq = NULL;
    }
- 	spin_lock(&rba->lock);
- 	iwl_pcie_rx_free_rba(trans);
- 	spin_unlock(&rba->lock);
- 
- 	spin_lock(&rxq->lock);
- 	iwl_pcie_rxq_free_rbs(trans);
- 	spin_unlock(&rxq->lock);
- 
- 	dma_free_coherent(trans->dev, sizeof(__le32) * RX_QUEUE_SIZE,
- 			  rxq->bd, rxq->bd_dma);
- 	rxq->bd_dma = 0;
- 	rxq->bd = NULL;
- 
- 	if (rxq->rb_stts)
- 		dma_free_coherent(trans->dev,
- 				  sizeof(struct iwl_rb_status),
- 				  rxq->rb_stts, rxq->rb_stts_dma);
- 	else
- 		IWL_DEBUG_INFO(trans, "Free rxq->rb_stts which is NULL\n");
- 	rxq->rb_stts_dma = 0;
- 	rxq->rb_stts = NULL;
+ 	iwl_pcie_free_rbs_pool(trans);
+ 
+ 	for (i = 0; i < trans->num_rx_queues; i++) {
+ 		struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+ 
+ 		if (rxq->bd)
+ 			dma_free_coherent(trans->dev,
+ 					  free_size * rxq->queue_size,
+ 					  rxq->bd, rxq->bd_dma);
+ 		rxq->bd_dma = 0;
+ 		rxq->bd = NULL;
+ 
+ 		if (rxq->rb_stts)
+ 			dma_free_coherent(trans->dev,
+ 					  sizeof(struct iwl_rb_status),
+ 					  rxq->rb_stts, rxq->rb_stts_dma);
+ 		else
+ 			IWL_DEBUG_INFO(trans,
+ 				       "Free rxq->rb_stts which is NULL\n");
+ 
+ 		if (rxq->used_bd)
+ 			dma_free_coherent(trans->dev,
+ 					  sizeof(__le32) * rxq->queue_size,
+ 					  rxq->used_bd, rxq->used_bd_dma);
+ 		rxq->used_bd_dma = 0;
+ 		rxq->used_bd = NULL;
+ 
+ 		if (rxq->napi.poll)
+ 			netif_napi_del(&rxq->napi);
+ 	}
+ 	kfree(trans_pcie->rxq);
  }
/*
@@@ -841,11 -999,11 +999,11 @@@ static void iwl_pcie_rx_reuse_rbd(struc
  }
static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
+ 				struct iwl_rxq *rxq,
    			struct iwl_rx_mem_buffer *rxb,
    			bool emergency)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
    struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue];
    bool page_stolen = false;
    int max_len = PAGE_SIZE << trans_pcie->rx_page_order;
@@@ -911,7 -1069,12 +1069,12 @@@
    	index = SEQ_TO_INDEX(sequence);
    	cmd_index = get_cmd_index(&txq->q, index);
- 		iwl_op_mode_rx(trans->op_mode, &trans_pcie->napi, &rxcb);
+ 		if (rxq->id == 0)
+ 			iwl_op_mode_rx(trans->op_mode, &rxq->napi,
+ 				       &rxcb);
+ 		else
+ 			iwl_op_mode_rx_rss(trans->op_mode, &rxq->napi,
+ 					   &rxcb, rxq->id);
if (reclaim) {
    		kzfree(txq->entries[cmd_index].free_buf);
@@@ -975,7 -1138,7 +1138,7 @@@
  static void iwl_pcie_rx_handle(struct iwl_trans *trans)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
+ 	struct iwl_rxq *rxq = &trans_pcie->rxq[0];
    u32 r, i, j, count = 0;
    bool emergency = false;
@@@ -993,16 -1156,26 +1156,26 @@@ restart
    while (i != r) {
    	struct iwl_rx_mem_buffer *rxb;
- 		if (unlikely(rxq->used_count == RX_QUEUE_SIZE / 2))
+ 		if (unlikely(rxq->used_count == rxq->queue_size / 2))
    		emergency = true;
- 		rxb = rxq->queue[i];
- 		rxq->queue[i] = NULL;
+ 		if (trans->cfg->mq_rx_supported) {
+ 			/*
+ 			 * used_bd is a 32 bit but only 12 are used to retrieve
+ 			 * the vid
+ 			 */
+ 			u16 vid = (u16)le32_to_cpu(rxq->used_bd[i]);
+ 
+ 			rxb = trans_pcie->global_table[vid];
+ 		} else {
+ 			rxb = rxq->queue[i];
+ 			rxq->queue[i] = NULL;
+ 		}
IWL_DEBUG_RX(trans, "rxbuf: HW = %d, SW = %d\n", r, i);
- 		iwl_pcie_rx_handle_rb(trans, rxb, emergency);
+ 		iwl_pcie_rx_handle_rb(trans, rxq, rxb, emergency);
- 		i = (i + 1) & RX_QUEUE_MASK;
+ 		i = (i + 1) & (rxq->queue_size - 1);
/* If we have RX_CLAIM_REQ_ALLOC released rx buffers -
    	 * try to claim the pre-allocated buffers from the allocator */
@@@ -1040,10 -1213,10 +1213,10 @@@
    		count++;
    		if (count == 8) {
    			count = 0;
- 				if (rxq->used_count < RX_QUEUE_SIZE / 3)
+ 				if (rxq->used_count < rxq->queue_size / 3)
    				emergency = false;
    			spin_unlock(&rxq->lock);
- 				iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
+ 				iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC, rxq);
    			spin_lock(&rxq->lock);
    		}
    	}
@@@ -1055,7 -1228,10 +1228,10 @@@
    	if (rxq->free_count >=  RX_CLAIM_REQ_ALLOC) {
    		rxq->read = i;
    		spin_unlock(&rxq->lock);
- 			iwl_pcie_rxq_restock(trans);
+ 			if (trans->cfg->mq_rx_supported)
+ 				iwl_pcie_rxq_mq_restock(trans, rxq);
+ 			else
+ 				iwl_pcie_rxq_restock(trans, rxq);
    		goto restart;
    	}
    }
@@@ -1077,10 -1253,10 +1253,10 @@@
     * will be restocked by the next call of iwl_pcie_rxq_restock.
     */
    if (unlikely(emergency && count))
- 		iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC);
+ 		iwl_pcie_rxq_alloc_rbs(trans, GFP_ATOMIC, rxq);
- 	if (trans_pcie->napi.poll)
- 		napi_gro_flush(&trans_pcie->napi, false);
+ 	if (rxq->napi.poll)
+ 		napi_gro_flush(&rxq->napi, false);
  }
/*
@@@ -1438,11 -1614,9 +1614,11 @@@ irqreturn_t iwl_pcie_irq_handler(int ir
    		 inta & ~trans_pcie->inta_mask);
    }
-	/* Re-enable all interrupts */
 -	/* only Re-enable if disabled by irq */
 -	if (test_bit(STATUS_INT_ENABLED, &trans->status))
 +	/* we are loading the firmware, enable FH_TX interrupt only */
 +	if (handled & CSR_INT_BIT_FH_TX)
 +		iwl_enable_fw_load_int(trans);
 +	/* only Re-enable all interrupt if disabled by irq */
 +	else if (test_bit(STATUS_INT_ENABLED, &trans->status))
    	iwl_enable_interrupts(trans);
    /* Re-enable RF_KILL if it occurred */
    else if (handled & CSR_INT_BIT_RF_KILL)
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 5a854c6,b796952..58591ca
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@@ -72,6 -72,7 +72,7 @@@
  #include <linux/bitops.h>
  #include <linux/gfp.h>
  #include <linux/vmalloc.h>
+ #include <linux/pm_runtime.h>
#include "iwl-drv.h"
  #include "iwl-trans.h"
@@@ -1021,6 -1022,82 +1022,6 @@@ static int iwl_pcie_load_given_ucode_80
    				       &first_ucode_section);
  }
-static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
 -				   const struct fw_img *fw, bool run_in_rfkill)
 -{
 -	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 -	bool hw_rfkill;
 -	int ret;
 -
 -	mutex_lock(&trans_pcie->mutex);
 -
 -	/* Someone called stop_device, don't try to start_fw */
 -	if (trans_pcie->is_down) {
 -		IWL_WARN(trans,
 -			 "Can't start_fw since the HW hasn't been started\n");
 -		ret = EIO;
 -		goto out;
 -	}
 -
 -	/* This may fail if AMT took ownership of the device */
 -	if (iwl_pcie_prepare_card_hw(trans)) {
 -		IWL_WARN(trans, "Exit HW not ready\n");
 -		ret = -EIO;
 -		goto out;
 -	}
 -
 -	iwl_enable_rfkill_int(trans);
 -
 -	/* If platform's RF_KILL switch is NOT set to KILL */
 -	hw_rfkill = iwl_is_rfkill_set(trans);
 -	if (hw_rfkill)
 -		set_bit(STATUS_RFKILL, &trans->status);
 -	else
 -		clear_bit(STATUS_RFKILL, &trans->status);
 -	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
 -	if (hw_rfkill && !run_in_rfkill) {
 -		ret = -ERFKILL;
 -		goto out;
 -	}
 -
 -	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
 -
 -	ret = iwl_pcie_nic_init(trans);
 -	if (ret) {
 -		IWL_ERR(trans, "Unable to init nic\n");
 -		goto out;
 -	}
 -
 -	/* make sure rfkill handshake bits are cleared */
 -	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
 -	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
 -		    CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
 -
 -	/* clear (again), then enable host interrupts */
 -	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
 -	iwl_enable_interrupts(trans);
 -
 -	/* really make sure rfkill handshake bits are cleared */
 -	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
 -	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
 -
 -	/* Load the given image to the HW */
 -	if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
 -		ret = iwl_pcie_load_given_ucode_8000(trans, fw);
 -	else
 -		ret = iwl_pcie_load_given_ucode(trans, fw);
 -
 -out:
 -	mutex_unlock(&trans_pcie->mutex);
 -	return ret;
 -}
 -
 -static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
 -{
 -	iwl_pcie_reset_ict(trans);
 -	iwl_pcie_tx_start(trans, scd_addr);
 -}
 -
  static void _iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -1051,8 -1128,7 +1052,8 @@@
     * already dead.
     */
    if (test_and_clear_bit(STATUS_DEVICE_ENABLED, &trans->status)) {
 -		IWL_DEBUG_INFO(trans, "DEVICE_ENABLED bit was set and is now cleared\n");
 +		IWL_DEBUG_INFO(trans,
 +			       "DEVICE_ENABLED bit was set and is now cleared\n");
    	iwl_pcie_tx_stop(trans);
    	iwl_pcie_rx_stop(trans);
@@@ -1086,6 -1162,7 +1087,6 @@@
    iwl_disable_interrupts(trans);
    spin_unlock(&trans_pcie->irq_lock);
-
    /* clear all status bits */
    clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
    clear_bit(STATUS_INT_ENABLED, &trans->status);
@@@ -1118,116 -1195,10 +1119,116 @@@
    if (hw_rfkill != was_hw_rfkill)
    	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
-	/* re-take ownership to prevent other users from stealing the deivce */
 +	/* re-take ownership to prevent other users from stealing the device */
    iwl_pcie_prepare_card_hw(trans);
  }
+static int iwl_trans_pcie_start_fw(struct iwl_trans *trans,
 +				   const struct fw_img *fw, bool run_in_rfkill)
 +{
 +	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 +	bool hw_rfkill;
 +	int ret;
 +
 +	/* This may fail if AMT took ownership of the device */
 +	if (iwl_pcie_prepare_card_hw(trans)) {
 +		IWL_WARN(trans, "Exit HW not ready\n");
 +		ret = -EIO;
 +		goto out;
 +	}
 +
 +	iwl_enable_rfkill_int(trans);
 +
 +	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
 +
 +	/*
 +	 * We enabled the RF-Kill interrupt and the handler may very
 +	 * well be running. Disable the interrupts to make sure no other
 +	 * interrupt can be fired.
 +	 */
 +	iwl_disable_interrupts(trans);
 +
 +	/* Make sure it finished running */
 +	synchronize_irq(trans_pcie->pci_dev->irq);
 +
 +	mutex_lock(&trans_pcie->mutex);
 +
 +	/* If platform's RF_KILL switch is NOT set to KILL */
 +	hw_rfkill = iwl_is_rfkill_set(trans);
 +	if (hw_rfkill)
 +		set_bit(STATUS_RFKILL, &trans->status);
 +	else
 +		clear_bit(STATUS_RFKILL, &trans->status);
 +	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
 +	if (hw_rfkill && !run_in_rfkill) {
 +		ret = -ERFKILL;
 +		goto out;
 +	}
 +
 +	/* Someone called stop_device, don't try to start_fw */
 +	if (trans_pcie->is_down) {
 +		IWL_WARN(trans,
 +			 "Can't start_fw since the HW hasn't been started\n");
 +		ret = -EIO;
 +		goto out;
 +	}
 +
 +	/* make sure rfkill handshake bits are cleared */
 +	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
 +	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR,
 +		    CSR_UCODE_DRV_GP1_BIT_CMD_BLOCKED);
 +
 +	/* clear (again), then enable host interrupts */
 +	iwl_write32(trans, CSR_INT, 0xFFFFFFFF);
 +
 +	ret = iwl_pcie_nic_init(trans);
 +	if (ret) {
 +		IWL_ERR(trans, "Unable to init nic\n");
 +		goto out;
 +	}
 +
 +	/*
 +	 * Now, we load the firmware and don't want to be interrupted, even
 +	 * by the RF-Kill interrupt (hence mask all the interrupt besides the
 +	 * FH_TX interrupt which is needed to load the firmware). If the
 +	 * RF-Kill switch is toggled, we will find out after having loaded
 +	 * the firmware and return the proper value to the caller.
 +	 */
 +	iwl_enable_fw_load_int(trans);
 +
 +	/* really make sure rfkill handshake bits are cleared */
 +	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
 +	iwl_write32(trans, CSR_UCODE_DRV_GP1_CLR, CSR_UCODE_SW_BIT_RFKILL);
 +
 +	/* Load the given image to the HW */
 +	if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000)
 +		ret = iwl_pcie_load_given_ucode_8000(trans, fw);
 +	else
 +		ret = iwl_pcie_load_given_ucode(trans, fw);
 +	iwl_enable_interrupts(trans);
 +
 +	/* re-check RF-Kill state since we may have missed the interrupt */
 +	hw_rfkill = iwl_is_rfkill_set(trans);
 +	if (hw_rfkill)
 +		set_bit(STATUS_RFKILL, &trans->status);
 +	else
 +		clear_bit(STATUS_RFKILL, &trans->status);
 +
 +	iwl_trans_pcie_rf_kill(trans, hw_rfkill);
 +	if (hw_rfkill && !run_in_rfkill)
 +		ret = -ERFKILL;
 +
 +out:
 +	mutex_unlock(&trans_pcie->mutex);
 +	return ret;
 +}
 +
 +static void iwl_trans_pcie_fw_alive(struct iwl_trans *trans, u32 scd_addr)
 +{
 +	iwl_pcie_reset_ict(trans);
 +	iwl_pcie_tx_start(trans, scd_addr);
 +}
 +
  static void iwl_trans_pcie_stop_device(struct iwl_trans *trans, bool low_power)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@@ -1248,11 -1219,12 +1249,12 @@@ void iwl_trans_pcie_rf_kill(struct iwl_
    	_iwl_trans_pcie_stop_device(trans, true);
  }
- static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test)
+ static void iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test,
+ 				      bool reset)
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) {
+ 	if (!reset) {
    	/* Enable persistence mode to avoid reset */
    	iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG,
    		    CSR_HW_IF_CONFIG_REG_PERSIST_MODE);
@@@ -1276,7 -1248,7 +1278,7 @@@
    iwl_clear_bit(trans, CSR_GP_CNTRL,
    	      CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
- 	if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D3) {
+ 	if (reset) {
    	/*
    	 * reset TX queues -- some of their registers reset during S3
    	 * so if we don't reset everything here the D3 image would try
@@@ -1290,7 -1262,7 +1292,7 @@@
static int iwl_trans_pcie_d3_resume(struct iwl_trans *trans,
    			    enum iwl_d3_status *status,
- 				    bool test)
+ 				    bool test,  bool reset)
  {
    u32 val;
    int ret;
@@@ -1325,7 -1297,7 +1327,7 @@@
iwl_pcie_set_pwr(trans, false);
- 	if (trans->system_pm_mode == IWL_PLAT_PM_MODE_D0I3) {
+ 	if (!reset) {
    	iwl_clear_bit(trans, CSR_GP_CNTRL,
    		      CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
    } else {
@@@ -1383,6 -1355,10 +1385,10 @@@ static int _iwl_trans_pcie_start_hw(str
    /* ... rfkill can call stop_device and set it false if needed */
    iwl_trans_pcie_rf_kill(trans, hw_rfkill);
+ 	/* Make sure we sync here, because we'll need full access later */
+ 	if (low_power)
+ 		pm_runtime_resume(trans->dev);
+ 
    return 0;
  }
@@@ -1452,12 -1428,6 +1458,6 @@@ static void iwl_trans_pcie_write_prph(s
    iwl_trans_pcie_write32(trans, HBUS_TARG_PRPH_WDAT, val);
  }
- static int iwl_pcie_dummy_napi_poll(struct napi_struct *napi, int budget)
- {
- 	WARN_ON(1);
- 	return 0;
- }
- 
  static void iwl_trans_pcie_configure(struct iwl_trans *trans,
    			     const struct iwl_trans_config *trans_cfg)
  {
@@@ -1494,11 -1464,8 +1494,8 @@@
     * As this function may be called again in some corner cases don't
     * do anything if NAPI was already initialized.
     */
- 	if (!trans_pcie->napi.poll) {
+ 	if (trans_pcie->napi_dev.reg_state != NETREG_DUMMY)
    	init_dummy_netdev(&trans_pcie->napi_dev);
- 		netif_napi_add(&trans_pcie->napi_dev, &trans_pcie->napi,
- 			       iwl_pcie_dummy_napi_poll, 64);
- 	}
  }
void iwl_trans_pcie_free(struct iwl_trans *trans)
@@@ -1506,6 -1473,9 +1503,9 @@@
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
    int i;
+ 	/* TODO: check if this is really needed */
+ 	pm_runtime_disable(trans->dev);
+ 
    synchronize_irq(trans_pcie->pci_dev->irq);
iwl_pcie_tx_free(trans);
@@@ -1519,9 -1489,6 +1519,6 @@@
    pci_release_regions(trans_pcie->pci_dev);
    pci_disable_device(trans_pcie->pci_dev);
- 	if (trans_pcie->napi.poll)
- 		netif_napi_del(&trans_pcie->napi);
- 
    iwl_pcie_free_fw_monitor(trans);
for_each_possible_cpu(i) {
@@@ -1861,6 -1828,7 +1858,7 @@@ void iwl_trans_pcie_ref(struct iwl_tran
    spin_lock_irqsave(&trans_pcie->ref_lock, flags);
    IWL_DEBUG_RPM(trans, "ref_counter: %d\n", trans_pcie->ref_count);
    trans_pcie->ref_count++;
+ 	pm_runtime_get(&trans_pcie->pci_dev->dev);
    spin_unlock_irqrestore(&trans_pcie->ref_lock, flags);
  }
@@@ -1879,6 -1847,10 +1877,10 @@@ void iwl_trans_pcie_unref(struct iwl_tr
    	return;
    }
    trans_pcie->ref_count--;
+ 
+ 	pm_runtime_mark_last_busy(&trans_pcie->pci_dev->dev);
+ 	pm_runtime_put_autosuspend(&trans_pcie->pci_dev->dev);
+ 
    spin_unlock_irqrestore(&trans_pcie->ref_lock, flags);
  }
@@@ -2031,29 -2003,48 +2033,48 @@@ static ssize_t iwl_dbgfs_rx_queue_read(
  {
    struct iwl_trans *trans = file->private_data;
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
- 	char buf[256];
- 	int pos = 0;
- 	const size_t bufsz = sizeof(buf);
- 
- 	pos += scnprintf(buf + pos, bufsz - pos, "read: %u\n",
- 						rxq->read);
- 	pos += scnprintf(buf + pos, bufsz - pos, "write: %u\n",
- 						rxq->write);
- 	pos += scnprintf(buf + pos, bufsz - pos, "write_actual: %u\n",
- 						rxq->write_actual);
- 	pos += scnprintf(buf + pos, bufsz - pos, "need_update: %d\n",
- 						rxq->need_update);
- 	pos += scnprintf(buf + pos, bufsz - pos, "free_count: %u\n",
- 						rxq->free_count);
- 	if (rxq->rb_stts) {
- 		pos += scnprintf(buf + pos, bufsz - pos, "closed_rb_num: %u\n",
- 			 le16_to_cpu(rxq->rb_stts->closed_rb_num) &  0x0FFF);
- 	} else {
- 		pos += scnprintf(buf + pos, bufsz - pos,
- 					"closed_rb_num: Not Allocated\n");
+ 	char *buf;
+ 	int pos = 0, i, ret;
+ 	size_t bufsz = sizeof(buf);
+ 
+ 	bufsz = sizeof(char) * 121 * trans->num_rx_queues;
+ 
+ 	if (!trans_pcie->rxq)
+ 		return -EAGAIN;
+ 
+ 	buf = kzalloc(bufsz, GFP_KERNEL);
+ 	if (!buf)
+ 		return -ENOMEM;
+ 
+ 	for (i = 0; i < trans->num_rx_queues && pos < bufsz; i++) {
+ 		struct iwl_rxq *rxq = &trans_pcie->rxq[i];
+ 
+ 		pos += scnprintf(buf + pos, bufsz - pos, "queue#: %2d\n",
+ 				 i);
+ 		pos += scnprintf(buf + pos, bufsz - pos, "\tread: %u\n",
+ 				 rxq->read);
+ 		pos += scnprintf(buf + pos, bufsz - pos, "\twrite: %u\n",
+ 				 rxq->write);
+ 		pos += scnprintf(buf + pos, bufsz - pos, "\twrite_actual: %u\n",
+ 				 rxq->write_actual);
+ 		pos += scnprintf(buf + pos, bufsz - pos, "\tneed_update: %2d\n",
+ 				 rxq->need_update);
+ 		pos += scnprintf(buf + pos, bufsz - pos, "\tfree_count: %u\n",
+ 				 rxq->free_count);
+ 		if (rxq->rb_stts) {
+ 			pos += scnprintf(buf + pos, bufsz - pos,
+ 					 "\tclosed_rb_num: %u\n",
+ 					 le16_to_cpu(rxq->rb_stts->closed_rb_num) &
+ 					 0x0FFF);
+ 		} else {
+ 			pos += scnprintf(buf + pos, bufsz - pos,
+ 					 "\tclosed_rb_num: Not Allocated\n");
+ 	}
    }
- 	return simple_read_from_buffer(user_buf, count, ppos, buf, pos);
+ 	ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos);
+ 	kfree(buf);
+ 
+ 	return ret;
  }
static ssize_t iwl_dbgfs_interrupt_read(struct file *file,
@@@ -2218,7 -2209,8 +2239,8 @@@ static u32 iwl_trans_pcie_dump_rbs(stru
  {
    struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
    int max_len = PAGE_SIZE << trans_pcie->rx_page_order;
- 	struct iwl_rxq *rxq = &trans_pcie->rxq;
+ 	/* Dump RBs is supported only for pre-9000 devices (1 queue) */
+ 	struct iwl_rxq *rxq = &trans_pcie->rxq[0];
    u32 i, r, j, rb_len = 0;
spin_lock(&rxq->lock);
@@@ -2413,7 -2405,8 +2435,8 @@@ static struct iwl_trans_dump_dat
    u32 len, num_rbs;
    u32 monitor_len;
    int i, ptr;
- 	bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status);
+ 	bool dump_rbs = test_bit(STATUS_FW_ERROR, &trans->status) &&
+ 			!trans->cfg->mq_rx_supported;
/* transport dump header */
    len = sizeof(*dump_data);
@@@ -2468,11 -2461,12 +2491,12 @@@
    len += sizeof(*data) + (FH_MEM_UPPER_BOUND - FH_MEM_LOWER_BOUND);
if (dump_rbs) {
+ 		/* Dump RBs is supported only for pre-9000 devices (1 queue) */
+ 		struct iwl_rxq *rxq = &trans_pcie->rxq[0];
    	/* RBs */
- 		num_rbs = le16_to_cpu(ACCESS_ONCE(
- 				      trans_pcie->rxq.rb_stts->closed_rb_num))
+ 		num_rbs = le16_to_cpu(ACCESS_ONCE(rxq->rb_stts->closed_rb_num))
    			      & 0x0FFF;
- 		num_rbs = (num_rbs - trans_pcie->rxq.read) & RX_QUEUE_MASK;
+ 		num_rbs = (num_rbs - rxq->read) & RX_QUEUE_MASK;
    	len += num_rbs * (sizeof(*data) +
    			  sizeof(struct iwl_fw_error_dump_rb) +
    			  (PAGE_SIZE << trans_pcie->rx_page_order));
@@@ -2523,6 -2517,22 +2547,22 @@@
    return dump_data;
  }
+ #ifdef CONFIG_PM_SLEEP
+ static int iwl_trans_pcie_suspend(struct iwl_trans *trans)
+ {
+ 	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3)
+ 		return iwl_pci_fw_enter_d0i3(trans);
+ 
+ 	return 0;
+ }
+ 
+ static void iwl_trans_pcie_resume(struct iwl_trans *trans)
+ {
+ 	if (trans->runtime_pm_mode == IWL_PLAT_PM_MODE_D0I3)
+ 		iwl_pci_fw_exit_d0i3(trans);
+ }
+ #endif /* CONFIG_PM_SLEEP */
+ 
  static const struct iwl_trans_ops trans_ops_pcie = {
    .start_hw = iwl_trans_pcie_start_hw,
    .op_mode_leave = iwl_trans_pcie_op_mode_leave,
@@@ -2533,6 -2543,11 +2573,11 @@@
    .d3_suspend = iwl_trans_pcie_d3_suspend,
    .d3_resume = iwl_trans_pcie_d3_resume,
+ #ifdef CONFIG_PM_SLEEP
+ 	.suspend = iwl_trans_pcie_suspend,
+ 	.resume = iwl_trans_pcie_resume,
+ #endif /* CONFIG_PM_SLEEP */
+ 
    .send_cmd = iwl_trans_pcie_send_hcmd,
.tx = iwl_trans_pcie_tx,
@@@ -2571,7 -2586,7 +2616,7 @@@ struct iwl_trans *iwl_trans_pcie_alloc(
    struct iwl_trans_pcie *trans_pcie;
    struct iwl_trans *trans;
    u16 pci_cmd;
- 	int ret;
+ 	int ret, addr_size;
trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie),
    			&pdev->dev, cfg, &trans_ops_pcie, 0);
@@@ -2609,11 -2624,17 +2654,17 @@@
    			       PCIE_LINK_STATE_CLKPM);
    }
+ 	if (cfg->mq_rx_supported)
+ 		addr_size = 64;
+ 	else
+ 		addr_size = 36;
+ 
    pci_set_master(pdev);
- 	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(36));
+ 	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size));
    if (!ret)
- 		ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(36));
+ 		ret = pci_set_consistent_dma_mask(pdev,
+ 						  DMA_BIT_MASK(addr_size));
    if (ret) {
    	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
    	if (!ret)
@@@ -2716,6 -2737,8 +2767,8 @@@
    /* Initialize the wait queue for commands */
    init_waitqueue_head(&trans_pcie->wait_command_queue);
+ 	init_waitqueue_head(&trans_pcie->d0i3_waitq);
+ 
    ret = iwl_pcie_alloc_ict(trans);
    if (ret)
    	goto out_pci_disable_msi;
@@@ -2730,6 -2753,12 +2783,12 @@@
trans_pcie->inta_mask = CSR_INI_SET_MASK;
+ #ifdef CONFIG_IWLWIFI_PCIE_RTPM
+ 	trans->runtime_pm_mode = IWL_PLAT_PM_MODE_D0I3;
+ #else
+ 	trans->runtime_pm_mode = IWL_PLAT_PM_MODE_DISABLED;
+ #endif /* CONFIG_IWLWIFI_PCIE_RTPM */
+ 
    return trans;
out_free_ict:
diff --combined include/linux/netdevice.h
index 5440b7b,47671ce0..e52077f
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@@ -51,6 -51,7 +51,7 @@@
  #include <linux/neighbour.h>
  #include <uapi/linux/netdevice.h>
  #include <uapi/linux/if_bonding.h>
+ #include <uapi/linux/pkt_cls.h>
struct netpoll_info;
  struct device;
@@@ -778,6 -779,25 +779,25 @@@ static inline bool netdev_phys_item_id_
  typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
    			       struct sk_buff *skb);
+ /* These structures hold the attributes of qdisc and classifiers
+  * that are being passed to the netdevice through the setup_tc op.
+  */
+ enum {
+ 	TC_SETUP_MQPRIO,
+ 	TC_SETUP_CLSU32,
+ };
+ 
+ struct tc_cls_u32_offload;
+ 
+ struct tc_to_netdev {
+ 	unsigned int type;
+ 	union {
+ 		u8 tc;
+ 		struct tc_cls_u32_offload *cls_u32;
+ 	};
+ };
+ 
+ 
  /*
   * This structure defines the management hooks for network devices.
   * The following hooks can be defined; unless noted otherwise, they are
@@@ -1150,7 -1170,10 +1170,10 @@@ struct net_device_ops 
    int			(*ndo_set_vf_rss_query_en)(
    					   struct net_device *dev,
    					   int vf, bool setting);
- 	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
+ 	int			(*ndo_setup_tc)(struct net_device *dev,
+ 						u32 handle,
+ 						__be16 protocol,
+ 						struct tc_to_netdev *tc);
  #if IS_ENABLED(CONFIG_FCOE)
    int			(*ndo_fcoe_enable)(struct net_device *dev);
    int			(*ndo_fcoe_disable)(struct net_device *dev);
@@@ -1291,6 -1314,7 +1314,7 @@@
   * @IFF_OPENVSWITCH: device is a Open vSwitch master
   * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
   * @IFF_TEAM: device is a team device
+  * @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
   */
  enum netdev_priv_flags {
    IFF_802_1Q_VLAN			= 1<<0,
@@@ -1318,6 -1342,7 +1342,7 @@@
    IFF_OPENVSWITCH			= 1<<22,
    IFF_L3MDEV_SLAVE		= 1<<23,
    IFF_TEAM			= 1<<24,
+ 	IFF_RXFH_CONFIGURED		= 1<<25,
  };
#define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@@ -1345,6 -1370,7 +1370,7 @@@
  #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
  #define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
  #define IFF_TEAM			IFF_TEAM
+ #define IFF_RXFH_CONFIGURED		IFF_RXFH_CONFIGURED
/**
   *	struct net_device - The DEVICE structure.
@@@ -1397,6 -1423,8 +1423,8 @@@
   *			do not use this in drivers
   *	@tx_dropped:	Dropped packets by core network,
   *			do not use this in drivers
+  *	@rx_nohandler:	nohandler dropped packets by core network on
+  *			inactive devices, do not use this in drivers
   *
   *	@wireless_handlers:	List of functions to handle Wireless Extensions,
   *				instead of ioctl,
@@@ -1611,6 -1639,7 +1639,7 @@@ struct net_device
atomic_long_t		rx_dropped;
    atomic_long_t		tx_dropped;
+ 	atomic_long_t		rx_nohandler;
#ifdef CONFIG_WIRELESS_EXT
    const struct iw_handler_def *	wireless_handlers;
@@@ -3718,7 -3747,7 +3747,7 @@@ void *netdev_lower_get_next_private_rcu
  void *netdev_lower_get_next(struct net_device *dev,
    			struct list_head **iter);
  #define netdev_for_each_lower_dev(dev, ldev, iter) \
 -	for (iter = &(dev)->adj_list.lower, \
 +	for (iter = (dev)->adj_list.lower.next, \
         ldev = netdev_lower_get_next(dev, &(iter)); \
         ldev; \
         ldev = netdev_lower_get_next(dev, &(iter)))
@@@ -3741,7 -3770,7 +3770,7 @@@ void netdev_lower_state_changed(struct
/* RSS keys are 40 or 52 bytes long */
  #define NETDEV_RSS_KEY_LEN 52
- extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN];
+ extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly;
  void netdev_rss_key_fill(void *buffer, size_t len);
int dev_get_nest_level(struct net_device *dev,
@@@ -4045,6 -4074,11 +4074,11 @@@ static inline bool netif_is_lag_port(co
    return netif_is_bond_slave(dev) || netif_is_team_port(dev);
  }
+ static inline bool netif_is_rxfh_configured(const struct net_device *dev)
+ {
+ 	return dev->priv_flags & IFF_RXFH_CONFIGURED;
+ }
+ 
  /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
  static inline void netif_keep_dst(struct net_device *dev)
  {
diff --combined include/linux/skbuff.h
index 4ce9ff7,6a57757..eab4f8f
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@@ -299,7 -299,6 +299,7 @@@ struct sk_buff
  #else
  #define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
  #endif
 +extern int sysctl_max_skb_frags;
typedef struct skb_frag_struct skb_frag_t;
@@@ -2162,6 -2161,11 +2162,11 @@@ static inline int skb_checksum_start_of
    return skb->csum_start - skb_headroom(skb);
  }
+ static inline unsigned char *skb_checksum_start(const struct sk_buff *skb)
+ {
+ 	return skb->head + skb->csum_start;
+ }
+ 
  static inline int skb_transport_offset(const struct sk_buff *skb)
  {
    return skb_transport_header(skb) - skb->data;
@@@ -2400,6 -2404,10 +2405,10 @@@ static inline struct sk_buff *napi_allo
  {
    return __napi_alloc_skb(napi, length, GFP_ATOMIC);
  }
+ void napi_consume_skb(struct sk_buff *skb, int budget);
+ 
+ void __kfree_skb_flush(void);
+ void __kfree_skb_defer(struct sk_buff *skb);
/**
   * __dev_alloc_pages - allocate page for network Rx
@@@ -2622,6 -2630,13 +2631,13 @@@ static inline int skb_clone_writable(co
           skb_headroom(skb) + len <= skb->hdr_len;
  }
+ static inline int skb_try_make_writable(struct sk_buff *skb,
+ 					unsigned int write_len)
+ {
+ 	return skb_cloned(skb) && !skb_clone_writable(skb, write_len) &&
+ 	       pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+ }
+ 
  static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
    		    int cloned)
  {
@@@ -3550,6 -3565,7 +3566,7 @@@ static inline struct sec_path *skb_sec_
  struct skb_gso_cb {
    int	mac_offset;
    int	encap_level;
+ 	__wsum	csum;
    __u16	csum_start;
  };
  #define SKB_SGO_CB_OFFSET	32
@@@ -3576,6 -3592,16 +3593,16 @@@ static inline int gso_pskb_expand_head(
    return 0;
  }
+ static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
+ {
+ 	/* Do not update partial checksums if remote checksum is enabled. */
+ 	if (skb->remcsum_offload)
+ 		return;
+ 
+ 	SKB_GSO_CB(skb)->csum = res;
+ 	SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
+ }
+ 
  /* Compute the checksum for a gso segment. First compute the checksum value
   * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
   * then add in skb->csum (checksum from csum_start to end of packet).
@@@ -3586,15 -3612,14 +3613,14 @@@
   */
  static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
  {
- 	int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
- 		   skb_transport_offset(skb);
- 	__wsum partial;
+ 	unsigned char *csum_start = skb_transport_header(skb);
+ 	int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
+ 	__wsum partial = SKB_GSO_CB(skb)->csum;
- 	partial = csum_partial(skb_transport_header(skb), plen, skb->csum);
- 	skb->csum = res;
- 	SKB_GSO_CB(skb)->csum_start -= plen;
+ 	SKB_GSO_CB(skb)->csum = res;
+ 	SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
- 	return csum_fold(partial);
+ 	return csum_fold(csum_partial(csum_start, plen, partial));
  }
static inline bool skb_is_gso(const struct sk_buff *skb)
@@@ -3684,5 -3709,30 +3710,30 @@@ static inline unsigned int skb_gso_netw
    return hdr_len + skb_gso_transport_seglen(skb);
  }
+ /* Local Checksum Offload.
+  * Compute outer checksum based on the assumption that the
+  * inner checksum will be offloaded later.
+  * See Documentation/networking/checksum-offloads.txt for
+  * explanation of how this works.
+  * Fill in outer checksum adjustment (e.g. with sum of outer
+  * pseudo-header) before calling.
+  * Also ensure that inner checksum is in linear data area.
+  */
+ static inline __wsum lco_csum(struct sk_buff *skb)
+ {
+ 	unsigned char *csum_start = skb_checksum_start(skb);
+ 	unsigned char *l4_hdr = skb_transport_header(skb);
+ 	__wsum partial;
+ 
+ 	/* Start with complement of inner checksum adjustment */
+ 	partial = ~csum_unfold(*(__force __sum16 *)(csum_start +
+ 						    skb->csum_offset));
+ 
+ 	/* Add in checksum of our headers (incl. outer checksum
+ 	 * adjustment filled in by caller) and return result.
+ 	 */
+ 	return csum_partial(l4_hdr, csum_start - l4_hdr, partial);
+ }
+ 
  #endif	/* __KERNEL__ */
  #endif	/* _LINUX_SKBUFF_H */
diff --combined include/net/ip_tunnels.h
index dda9abf,4dd6163..5f28b60
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@@ -13,6 -13,7 +13,7 @@@
  #include <net/netns/generic.h>
  #include <net/rtnetlink.h>
  #include <net/lwtunnel.h>
+ #include <net/dst_cache.h>
#if IS_ENABLED(CONFIG_IPV6)
  #include <net/ipv6.h>
@@@ -57,6 -58,9 +58,9 @@@ struct ip_tunnel_key
struct ip_tunnel_info {
    struct ip_tunnel_key	key;
+ #ifdef CONFIG_DST_CACHE
+ 	struct dst_cache	dst_cache;
+ #endif
    u8			options_len;
    u8			mode;
  };
@@@ -85,11 -89,6 +89,6 @@@ struct ip_tunnel_prl_entry 
    struct rcu_head			rcu_head;
  };
- struct ip_tunnel_dst {
- 	struct dst_entry __rcu 		*dst;
- 	__be32				 saddr;
- };
- 
  struct metadata_dst;
struct ip_tunnel {
@@@ -108,7 -107,7 +107,7 @@@
    int		tun_hlen;	/* Precalculated header length */
    int		mlink;
- 	struct ip_tunnel_dst __percpu *dst_cache;
+ 	struct dst_cache dst_cache;
struct ip_tunnel_parm parms;
@@@ -230,7 -229,6 +229,7 @@@ void ip_tunnel_xmit(struct sk_buff *skb
  int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
  int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
    	    u8 *protocol, struct flowi4 *fl4);
 +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
  int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
@@@ -248,7 -246,6 +247,6 @@@ int ip_tunnel_changelink(struct net_dev
  int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
    	      struct ip_tunnel_parm *p);
  void ip_tunnel_setup(struct net_device *dev, int net_id);
- void ip_tunnel_dst_reset_all(struct ip_tunnel *t);
  int ip_tunnel_encap_setup(struct ip_tunnel *t,
    		  struct ip_tunnel_encap *ipencap);
@@@ -273,15 -270,15 +271,15 @@@ static inline u8 ip_tunnel_ecn_encap(u
    return INET_ECN_encapsulate(tos, inner);
  }
- int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto);
+ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
+ 			 bool xnet);
  void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
    	   __be32 src, __be32 dst, u8 proto,
    	   u8 tos, u8 ttl, __be16 df, bool xnet);
  struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
    				     gfp_t flags);
- struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, bool gre_csum,
- 					 int gso_type_mask);
+ struct sk_buff *iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
static inline void iptunnel_xmit_stats(struct net_device *dev, int pkt_len)
  {
diff --combined include/net/tcp.h
index ae6468f,9b2cb0c..e90db85
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@@ -239,13 -239,6 +239,6 @@@ extern struct inet_timewait_death_row t
  extern int sysctl_tcp_timestamps;
  extern int sysctl_tcp_window_scaling;
  extern int sysctl_tcp_sack;
- extern int sysctl_tcp_fin_timeout;
- extern int sysctl_tcp_syn_retries;
- extern int sysctl_tcp_synack_retries;
- extern int sysctl_tcp_retries1;
- extern int sysctl_tcp_retries2;
- extern int sysctl_tcp_orphan_retries;
- extern int sysctl_tcp_syncookies;
  extern int sysctl_tcp_fastopen;
  extern int sysctl_tcp_retrans_collapse;
  extern int sysctl_tcp_stdurg;
@@@ -274,7 -267,6 +267,6 @@@ extern int sysctl_tcp_thin_dupack
  extern int sysctl_tcp_early_retrans;
  extern int sysctl_tcp_limit_output_bytes;
  extern int sysctl_tcp_challenge_ack_limit;
- extern unsigned int sysctl_tcp_notsent_lowat;
  extern int sysctl_tcp_min_tso_segs;
  extern int sysctl_tcp_min_rtt_wlen;
  extern int sysctl_tcp_autocorking;
@@@ -447,7 -439,7 +439,7 @@@ const u8 *tcp_parse_md5sig_option(cons
void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
  void tcp_v4_mtu_reduced(struct sock *sk);
 -void tcp_req_err(struct sock *sk, u32 seq);
 +void tcp_req_err(struct sock *sk, u32 seq, bool abort);
  int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
  struct sock *tcp_create_openreq_child(const struct sock *sk,
    			      struct request_sock *req,
@@@ -568,6 -560,7 +560,7 @@@ void tcp_rearm_rto(struct sock *sk)
  void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req);
  void tcp_reset(struct sock *sk);
  void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb);
+ void tcp_fin(struct sock *sk);
/* tcp_timer.c */
  void tcp_init_xmit_timers(struct sock *);
@@@ -963,9 -956,11 +956,11 @@@ static inline void tcp_enable_fack(stru
   */
  static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
  {
+ 	struct net *net = sock_net((struct sock *)tp);
+ 
    tp->do_early_retrans = sysctl_tcp_early_retrans &&
    	sysctl_tcp_early_retrans < 4 && !sysctl_tcp_thin_dupack &&
- 		sysctl_tcp_reordering == 3;
+ 		net->ipv4.sysctl_tcp_reordering == 3;
  }
static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
@@@ -1252,7 -1247,7 +1247,7 @@@ static inline u32 keepalive_time_elapse
static inline int tcp_fin_time(const struct sock *sk)
  {
- 	int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
+ 	int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
    const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
@@@ -1437,6 -1432,7 +1432,7 @@@ void tcp_free_fastopen_req(struct tcp_s
extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx;
  int tcp_fastopen_reset_cipher(void *key, unsigned int len);
+ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb);
  struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
    		      struct request_sock *req,
    		      struct tcp_fastopen_cookie *foc,
@@@ -1685,7 -1681,8 +1681,8 @@@ void __tcp_v4_send_check(struct sk_buf
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
  {
- 	return tp->notsent_lowat ?: sysctl_tcp_notsent_lowat;
+ 	struct net *net = sock_net((struct sock *)tp);
+ 	return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
  }
static inline bool tcp_stream_memory_free(const struct sock *sk)
diff --combined kernel/bpf/verifier.c
index 2e7f7ab,36dc497..2e08f8e
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -246,6 -246,7 +246,7 @@@ static const struct 
    {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call},
    {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read},
    {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output},
+ 	{BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid},
  };
static void print_verifier_state(struct verifier_env *env)
@@@ -778,15 -779,24 +779,24 @@@ static int check_xadd(struct verifier_e
   * bytes from that pointer, make sure that it's within stack boundary
   * and all elements of stack are initialized
   */
- static int check_stack_boundary(struct verifier_env *env,
- 				int regno, int access_size)
+ static int check_stack_boundary(struct verifier_env *env, int regno,
+ 				int access_size, bool zero_size_allowed)
  {
    struct verifier_state *state = &env->cur_state;
    struct reg_state *regs = state->regs;
    int off, i;
- 	if (regs[regno].type != PTR_TO_STACK)
+ 	if (regs[regno].type != PTR_TO_STACK) {
+ 		if (zero_size_allowed && access_size == 0 &&
+ 		    regs[regno].type == CONST_IMM &&
+ 		    regs[regno].imm  == 0)
+ 			return 0;
+ 
+ 		verbose("R%d type=%s expected=%s\n", regno,
+ 			reg_type_str[regs[regno].type],
+ 			reg_type_str[PTR_TO_STACK]);
    	return -EACCES;
+ 	}
off = regs[regno].imm;
    if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
@@@ -829,15 -839,24 +839,24 @@@ static int check_func_arg(struct verifi
    	return 0;
    }
- 	if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
+ 	if (arg_type == ARG_PTR_TO_MAP_KEY ||
        arg_type == ARG_PTR_TO_MAP_VALUE) {
    	expected_type = PTR_TO_STACK;
- 	} else if (arg_type == ARG_CONST_STACK_SIZE) {
+ 	} else if (arg_type == ARG_CONST_STACK_SIZE ||
+ 		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
    	expected_type = CONST_IMM;
    } else if (arg_type == ARG_CONST_MAP_PTR) {
    	expected_type = CONST_PTR_TO_MAP;
    } else if (arg_type == ARG_PTR_TO_CTX) {
    	expected_type = PTR_TO_CTX;
+ 	} else if (arg_type == ARG_PTR_TO_STACK) {
+ 		expected_type = PTR_TO_STACK;
+ 		/* One exception here. In case function allows for NULL to be
+ 		 * passed in as argument, it's a CONST_IMM type. Final test
+ 		 * happens during stack boundary checking.
+ 		 */
+ 		if (reg->type == CONST_IMM && reg->imm == 0)
+ 			expected_type = CONST_IMM;
    } else {
    	verbose("unsupported arg_type %d\n", arg_type);
    	return -EFAULT;
@@@ -867,8 -886,8 +886,8 @@@
    		verbose("invalid map_ptr to access map->key\n");
    		return -EACCES;
    	}
- 		err = check_stack_boundary(env, regno, (*mapp)->key_size);
- 
+ 		err = check_stack_boundary(env, regno, (*mapp)->key_size,
+ 					   false);
    } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
    	/* bpf_map_xxx(..., map_ptr, ..., value) call:
    	 * check [value, value + map->value_size) validity
@@@ -878,9 -897,12 +897,12 @@@
    		verbose("invalid map_ptr to access map->value\n");
    		return -EACCES;
    	}
- 		err = check_stack_boundary(env, regno, (*mapp)->value_size);
+ 		err = check_stack_boundary(env, regno, (*mapp)->value_size,
+ 					   false);
+ 	} else if (arg_type == ARG_CONST_STACK_SIZE ||
+ 		   arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) {
+ 		bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO);
- 	} else if (arg_type == ARG_CONST_STACK_SIZE) {
    	/* bpf_xxx(..., buf, len) call will access 'len' bytes
    	 * from stack pointer 'buf'. Check it
    	 * note: regno == len, regno - 1 == buf
@@@ -890,7 -912,8 +912,8 @@@
    		verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
    		return -EACCES;
    	}
- 		err = check_stack_boundary(env, regno - 1, reg->imm);
+ 		err = check_stack_boundary(env, regno - 1, reg->imm,
+ 					   zero_size_allowed);
    }
return err;
@@@ -911,8 -934,11 +934,11 @@@ static int check_map_func_compatibility
    	 * don't allow any other map type to be passed into
    	 * the special func;
    	 */
- 		if (bool_func && bool_map != bool_func)
+ 		if (bool_func && bool_map != bool_func) {
+ 			verbose("cannot pass map_type %d into func %d\n",
+ 				map->map_type, func_id);
    		return -EINVAL;
+ 		}
    }
return 0;
@@@ -2082,7 -2108,7 +2108,7 @@@ static void adjust_branches(struct bpf_
    	/* adjust offset of jmps if necessary */
    	if (i < pos && i + insn->off + 1 > pos)
    		insn->off += delta;
 -		else if (i > pos && i + insn->off + 1 < pos)
 +		else if (i > pos + delta && i + insn->off + 1 <= pos + delta)
    		insn->off -= delta;
    }
  }
diff --combined lib/Kconfig.debug
index 8bfd1ac,f890ee5..60d09e9
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -1400,21 -1400,6 +1400,21 @@@ config RCU_EQS_DEBU
endmenu # "RCU Debugging"
+config DEBUG_WQ_FORCE_RR_CPU
 +	bool "Force round-robin CPU selection for unbound work items"
 +	depends on DEBUG_KERNEL
 +	default n
 +	help
 +	  Workqueue used to implicitly guarantee that work items queued
 +	  without explicit CPU specified are put on the local CPU.  This
 +	  guarantee is no longer true and while local CPU is still
 +	  preferred work items may be put on foreign CPUs.  Kernel
 +	  parameter "workqueue.debug_force_rr_cpu" is added to force
 +	  round-robin CPU selection to flush out usages which depend on the
 +	  now broken guarantee.  This config option enables the debug
 +	  feature by default.  When enabled, memory and cache locality will
 +	  be impacted.
 +
  config DEBUG_BLOCK_EXT_DEVT
          bool "Force extended block device numbers and spread them"
    depends on DEBUG_KERNEL
@@@ -1753,6 -1738,14 +1753,14 @@@ config TEST_KSTRTO
  config TEST_PRINTF
    tristate "Test printf() family of functions at runtime"
+ config TEST_BITMAP
+ 	tristate "Test bitmap_*() family of functions at runtime"
+ 	default n
+ 	help
+ 	  Enable this option to test the bitmap functions at boot.
+ 
+ 	  If unsure, say N.
+ 
  config TEST_RHASHTABLE
    tristate "Perform selftest on resizable hash table"
    default n
diff --combined net/batman-adv/gateway_client.c
index ccf70be,261866e..4b598bd
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@@ -1,4 -1,4 +1,4 @@@
- /* Copyright (C) 2009-2015 B.A.T.M.A.N. contributors:
+ /* Copyright (C) 2009-2016  B.A.T.M.A.N. contributors:
   *
   * Marek Lindner
   *
@@@ -28,6 -28,7 +28,7 @@@
  #include <linux/ip.h>
  #include <linux/ipv6.h>
  #include <linux/kernel.h>
+ #include <linux/kref.h>
  #include <linux/list.h>
  #include <linux/netdevice.h>
  #include <linux/rculist.h>
@@@ -59,12 -60,29 +60,29 @@@
   */
  #define BATADV_DHCP_CHADDR_OFFSET	28
+ /**
+  * batadv_gw_node_release - release gw_node from lists and queue for free after
+  *  rcu grace period
+  * @ref: kref pointer of the gw_node
+  */
+ static void batadv_gw_node_release(struct kref *ref)
+ {
+ 	struct batadv_gw_node *gw_node;
+ 
+ 	gw_node = container_of(ref, struct batadv_gw_node, refcount);
+ 
+ 	batadv_orig_node_free_ref(gw_node->orig_node);
+ 	kfree_rcu(gw_node, rcu);
+ }
+ 
+ /**
+  * batadv_gw_node_free_ref - decrement the gw_node refcounter and possibly
+  *  release it
+  * @gw_node: gateway node to free
+  */
  static void batadv_gw_node_free_ref(struct batadv_gw_node *gw_node)
  {
- 	if (atomic_dec_and_test(&gw_node->refcount)) {
- 		batadv_orig_node_free_ref(gw_node->orig_node);
- 		kfree_rcu(gw_node, rcu);
- 	}
+ 	kref_put(&gw_node->refcount, batadv_gw_node_release);
  }
static struct batadv_gw_node *
@@@ -77,7 -95,7 +95,7 @@@ batadv_gw_get_selected_gw_node(struct b
    if (!gw_node)
    	goto out;
- 	if (!atomic_inc_not_zero(&gw_node->refcount))
+ 	if (!kref_get_unless_zero(&gw_node->refcount))
    	gw_node = NULL;
out:
@@@ -100,7 -118,7 +118,7 @@@ batadv_gw_get_selected_orig(struct bata
    if (!orig_node)
    	goto unlock;
- 	if (!atomic_inc_not_zero(&orig_node->refcount))
+ 	if (!kref_get_unless_zero(&orig_node->refcount))
    	orig_node = NULL;
unlock:
@@@ -118,7 -136,7 +136,7 @@@ static void batadv_gw_select(struct bat
spin_lock_bh(&bat_priv->gw.list_lock);
- 	if (new_gw_node && !atomic_inc_not_zero(&new_gw_node->refcount))
+ 	if (new_gw_node && !kref_get_unless_zero(&new_gw_node->refcount))
    	new_gw_node = NULL;
curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
@@@ -170,7 -188,7 +188,7 @@@ batadv_gw_get_best_gw_node(struct batad
    	if (!router_ifinfo)
    		goto next;
- 		if (!atomic_inc_not_zero(&gw_node->refcount))
+ 		if (!kref_get_unless_zero(&gw_node->refcount))
    		goto next;
tq_avg = router_ifinfo->bat_iv.tq_avg;
@@@ -188,7 -206,7 +206,7 @@@
    			if (curr_gw)
    				batadv_gw_node_free_ref(curr_gw);
    			curr_gw = gw_node;
- 				atomic_inc(&curr_gw->refcount);
+ 				kref_get(&curr_gw->refcount);
    		}
    		break;
@@@ -203,7 -221,7 +221,7 @@@
    			if (curr_gw)
    				batadv_gw_node_free_ref(curr_gw);
    			curr_gw = gw_node;
- 				atomic_inc(&curr_gw->refcount);
+ 				kref_get(&curr_gw->refcount);
    		}
    		break;
    	}
@@@ -423,7 -441,7 +441,7 @@@ static void batadv_gw_node_add(struct b
    if (gateway->bandwidth_down == 0)
    	return;
- 	if (!atomic_inc_not_zero(&orig_node->refcount))
+ 	if (!kref_get_unless_zero(&orig_node->refcount))
    	return;
gw_node = kzalloc(sizeof(*gw_node), GFP_ATOMIC);
@@@ -436,7 -454,7 +454,7 @@@
    gw_node->orig_node = orig_node;
    gw_node->bandwidth_down = ntohl(gateway->bandwidth_down);
    gw_node->bandwidth_up = ntohl(gateway->bandwidth_up);
- 	atomic_set(&gw_node->refcount, 1);
+ 	kref_init(&gw_node->refcount);
spin_lock_bh(&bat_priv->gw.list_lock);
    hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list);
@@@ -456,7 -474,7 +474,7 @@@
   * @bat_priv: the bat priv with all the soft interface information
   * @orig_node: originator announcing gateway capabilities
   *
-  * Returns gateway node if found or NULL otherwise.
+  * Return: gateway node if found or NULL otherwise.
   */
  static struct batadv_gw_node *
  batadv_gw_node_get(struct batadv_priv *bat_priv,
@@@ -469,7 -487,7 +487,7 @@@
    	if (gw_node_tmp->orig_node != orig_node)
    		continue;
- 		if (!atomic_inc_not_zero(&gw_node_tmp->refcount))
+ 		if (!kref_get_unless_zero(&gw_node_tmp->refcount))
    		continue;
gw_node = gw_node_tmp;
@@@ -527,12 -545,11 +545,12 @@@ void batadv_gw_node_update(struct batad
    	 * gets dereferenced.
    	 */
    	spin_lock_bh(&bat_priv->gw.list_lock);
 -		hlist_del_init_rcu(&gw_node->list);
 +		if (!hlist_unhashed(&gw_node->list)) {
 +			hlist_del_init_rcu(&gw_node->list);
 +			batadv_gw_node_free_ref(gw_node);
 +		}
    	spin_unlock_bh(&bat_priv->gw.list_lock);
-		batadv_gw_node_free_ref(gw_node);
 -
    	curr_gw = batadv_gw_get_selected_gw_node(bat_priv);
    	if (gw_node == curr_gw)
    		batadv_gw_reselect(bat_priv);
@@@ -656,13 -673,13 +674,13 @@@ out
   * @chaddr: buffer where the client address will be stored. Valid
   *  only if the function returns BATADV_DHCP_TO_CLIENT
   *
-  * Returns:
+  * This function may re-allocate the data buffer of the skb passed as argument.
+  *
+  * Return:
   * - BATADV_DHCP_NO if the packet is not a dhcp message or if there was an error
   *   while parsing it
   * - BATADV_DHCP_TO_SERVER if this is a message going to the DHCP server
   * - BATADV_DHCP_TO_CLIENT if this is a message going to a DHCP client
-  *
-  * This function may re-allocate the data buffer of the skb passed as argument.
   */
  enum batadv_dhcp_recipient
  batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len,
@@@ -777,11 -794,11 +795,11 @@@
   * server. Due to topology changes it may be the case that the GW server
   * previously selected is not the best one anymore.
   *
-  * Returns true if the packet destination is unicast and it is not the best gw,
-  * false otherwise.
-  *
   * This call might reallocate skb data.
   * Must be invoked only when the DHCP packet is going TO a DHCP SERVER.
+  *
+  * Return: true if the packet destination is unicast and it is not the best gw,
+  * false otherwise.
   */
  bool batadv_gw_out_of_range(struct batadv_priv *bat_priv,
    		    struct sk_buff *skb)
diff --combined net/batman-adv/hard-interface.c
index 57f71071,fb2d9c0..e2aaa4c
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@@ -1,4 -1,4 +1,4 @@@
- /* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+ /* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
   *
   * Marek Lindner, Simon Wunderlich
   *
@@@ -18,6 -18,7 +18,7 @@@
  #include "hard-interface.h"
  #include "main.h"
+ #include <linux/atomic.h>
  #include <linux/bug.h>
  #include <linux/byteorder/generic.h>
  #include <linux/errno.h>
@@@ -26,6 -27,7 +27,7 @@@
  #include <linux/if_ether.h>
  #include <linux/if.h>
  #include <linux/kernel.h>
+ #include <linux/kref.h>
  #include <linux/list.h>
  #include <linux/netdevice.h>
  #include <linux/printk.h>
@@@ -47,13 -49,19 +49,19 @@@
  #include "sysfs.h"
  #include "translation-table.h"
- void batadv_hardif_free_rcu(struct rcu_head *rcu)
+ /**
+  * batadv_hardif_release - release hard interface from lists and queue for
+  *  free after rcu grace period
+  * @ref: kref pointer of the hard interface
+  */
+ void batadv_hardif_release(struct kref *ref)
  {
    struct batadv_hard_iface *hard_iface;
- 	hard_iface = container_of(rcu, struct batadv_hard_iface, rcu);
+ 	hard_iface = container_of(ref, struct batadv_hard_iface, refcount);
    dev_put(hard_iface->net_dev);
- 	kfree(hard_iface);
+ 
+ 	kfree_rcu(hard_iface, rcu);
  }
struct batadv_hard_iface *
@@@ -64,7 -72,7 +72,7 @@@ batadv_hardif_get_by_netdev(const struc
    rcu_read_lock();
    list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
    	if (hard_iface->net_dev == net_dev &&
- 		    atomic_inc_not_zero(&hard_iface->refcount))
+ 		    kref_get_unless_zero(&hard_iface->refcount))
    		goto out;
    }
@@@ -76,28 -84,6 +84,28 @@@ out
  }
/**
 + * batadv_mutual_parents - check if two devices are each others parent
 + * @dev1: 1st net_device
 + * @dev2: 2nd net_device
 + *
 + * veth devices come in pairs and each is the parent of the other!
 + *
 + * Return: true if the devices are each others parent, otherwise false
 + */
 +static bool batadv_mutual_parents(const struct net_device *dev1,
 +				  const struct net_device *dev2)
 +{
 +	int dev1_parent_iflink = dev_get_iflink(dev1);
 +	int dev2_parent_iflink = dev_get_iflink(dev2);
 +
 +	if (!dev1_parent_iflink || !dev2_parent_iflink)
 +		return false;
 +
 +	return (dev1_parent_iflink == dev2->ifindex) &&
 +	       (dev2_parent_iflink == dev1->ifindex);
 +}
 +
 +/**
   * batadv_is_on_batman_iface - check if a device is a batman iface descendant
   * @net_dev: the device to check
   *
@@@ -107,7 -93,7 +115,7 @@@
   * This function recursively checks all the fathers of the device passed as
   * argument looking for a batman-adv soft interface.
   *
-  * Returns true if the device is descendant of a batman-adv mesh interface (or
+  * Return: true if the device is descendant of a batman-adv mesh interface (or
   * if it is a batman-adv interface itself), false otherwise
   */
  static bool batadv_is_on_batman_iface(const struct net_device *net_dev)
@@@ -130,9 -116,6 +138,9 @@@
    if (WARN(!parent_dev, "Cannot find parent device"))
    	return false;
+	if (batadv_mutual_parents(net_dev, parent_dev))
 +		return false;
 +
    ret = batadv_is_on_batman_iface(parent_dev);
return ret;
@@@ -161,7 -144,7 +169,7 @@@ static int batadv_is_valid_iface(const 
   *  interface
   * @net_device: the device to check
   *
-  * Returns true if the net device is a 802.11 wireless device, false otherwise.
+  * Return: true if the net device is a 802.11 wireless device, false otherwise.
   */
  bool batadv_is_wifi_netdev(struct net_device *net_device)
  {
@@@ -194,7 -177,7 +202,7 @@@ batadv_hardif_get_active(const struct n
    		continue;
if (hard_iface->if_status == BATADV_IF_ACTIVE &&
- 		    atomic_inc_not_zero(&hard_iface->refcount))
+ 		    kref_get_unless_zero(&hard_iface->refcount))
    		goto out;
    }
@@@ -228,7 -211,7 +236,7 @@@ static void batadv_primary_if_select(st
ASSERT_RTNL();
- 	if (new_hard_iface && !atomic_inc_not_zero(&new_hard_iface->refcount))
+ 	if (new_hard_iface && !kref_get_unless_zero(&new_hard_iface->refcount))
    	new_hard_iface = NULL;
curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
@@@ -426,7 -409,8 +434,8 @@@ batadv_hardif_deactivate_interface(stru
   *
   * Invoke ndo_del_slave on master passing slave as argument. In this way slave
   * is free'd and master can correctly change its internal state.
-  * Return 0 on success, a negative value representing the error otherwise
+  *
+  * Return: 0 on success, a negative value representing the error otherwise
   */
  static int batadv_master_del_slave(struct batadv_hard_iface *slave,
    			   struct net_device *master)
@@@ -455,7 -439,7 +464,7 @@@ int batadv_hardif_enable_interface(stru
    if (hard_iface->if_status != BATADV_IF_NOT_IN_USE)
    	goto out;
- 	if (!atomic_inc_not_zero(&hard_iface->refcount))
+ 	if (!kref_get_unless_zero(&hard_iface->refcount))
    	goto out;
soft_iface = dev_get_by_name(&init_net, iface_name);
@@@ -676,7 -660,8 +685,8 @@@ batadv_hardif_add_interface(struct net_
    	hard_iface->num_bcasts = BATADV_NUM_BCASTS_WIRELESS;
/* extra reference for return */
- 	atomic_set(&hard_iface->refcount, 2);
+ 	kref_init(&hard_iface->refcount);
+ 	kref_get(&hard_iface->refcount);
batadv_check_known_mac_addr(hard_iface->net_dev);
    list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list);
diff --combined net/batman-adv/translation-table.c
index 0e80fd1,1188279..5c7fa02
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@@ -1,4 -1,4 +1,4 @@@
- /* Copyright (C) 2007-2015 B.A.T.M.A.N. contributors:
+ /* Copyright (C) 2007-2016  B.A.T.M.A.N. contributors:
   *
   * Marek Lindner, Simon Wunderlich, Antonio Quartulli
   *
@@@ -31,6 -31,7 +31,7 @@@
  #include <linux/jhash.h>
  #include <linux/jiffies.h>
  #include <linux/kernel.h>
+ #include <linux/kref.h>
  #include <linux/list.h>
  #include <linux/lockdep.h>
  #include <linux/netdevice.h>
@@@ -68,7 -69,15 +69,15 @@@ static void batadv_tt_global_del(struc
    			 unsigned short vid, const char *message,
    			 bool roaming);
- /* returns 1 if they are the same mac addr and vid */
+ /**
+  * batadv_compare_tt - check if two TT entries are the same
+  * @node: the list element pointer of the first TT entry
+  * @data2: pointer to the tt_common_entry of the second TT entry
+  *
+  * Compare the MAC address and the VLAN ID of the two TT entries and check if
+  * they are the same TT client.
+  * Return: 1 if the two TT clients are the same, 0 otherwise
+  */
  static int batadv_compare_tt(const struct hlist_node *node, const void *data2)
  {
    const void *data1 = container_of(node, struct batadv_tt_common_entry,
@@@ -84,7 -93,7 +93,7 @@@
   * @data: pointer to the tt_common_entry object to map
   * @size: the size of the hash table
   *
-  * Returns the hash index where the object represented by 'data' should be
+  * Return: the hash index where the object represented by 'data' should be
   * stored at.
   */
  static inline u32 batadv_choose_tt(const void *data, u32 size)
@@@ -105,7 -114,7 +114,7 @@@
   * @addr: the mac address of the client to look for
   * @vid: VLAN identifier
   *
-  * Returns a pointer to the tt_common struct belonging to the searched client if
+  * Return: a pointer to the tt_common struct belonging to the searched client if
   * found, NULL otherwise.
   */
  static struct batadv_tt_common_entry *
@@@ -133,7 -142,7 +142,7 @@@ batadv_tt_hash_find(struct batadv_hasht
    	if (tt->vid != vid)
    		continue;
- 		if (!atomic_inc_not_zero(&tt->refcount))
+ 		if (!kref_get_unless_zero(&tt->refcount))
    		continue;
tt_tmp = tt;
@@@ -150,7 -159,7 +159,7 @@@
   * @addr: the mac address of the client to look for
   * @vid: VLAN identifier
   *
-  * Returns a pointer to the corresponding tt_local_entry struct if the client is
+  * Return: a pointer to the corresponding tt_local_entry struct if the client is
   * found, NULL otherwise.
   */
  static struct batadv_tt_local_entry *
@@@ -175,7 -184,7 +184,7 @@@ batadv_tt_local_hash_find(struct batadv
   * @addr: the mac address of the client to look for
   * @vid: VLAN identifier
   *
-  * Returns a pointer to the corresponding tt_global_entry struct if the client
+  * Return: a pointer to the corresponding tt_global_entry struct if the client
   * is found, NULL otherwise.
   */
  static struct batadv_tt_global_entry *
@@@ -194,34 -203,68 +203,68 @@@ batadv_tt_global_hash_find(struct batad
    return tt_global_entry;
  }
+ /**
+  * batadv_tt_local_entry_release - release tt_local_entry from lists and queue
+  *  for free after rcu grace period
+  * @ref: kref pointer of the nc_node
+  */
+ static void batadv_tt_local_entry_release(struct kref *ref)
+ {
+ 	struct batadv_tt_local_entry *tt_local_entry;
+ 
+ 	tt_local_entry = container_of(ref, struct batadv_tt_local_entry,
+ 				      common.refcount);
+ 
+ 	kfree_rcu(tt_local_entry, common.rcu);
+ }
+ 
+ /**
+  * batadv_tt_local_entry_free_ref - decrement the tt_local_entry refcounter and
+  *  possibly release it
+  * @tt_local_entry: tt_local_entry to be free'd
+  */
  static void
  batadv_tt_local_entry_free_ref(struct batadv_tt_local_entry *tt_local_entry)
  {
- 	if (atomic_dec_and_test(&tt_local_entry->common.refcount))
- 		kfree_rcu(tt_local_entry, common.rcu);
+ 	kref_put(&tt_local_entry->common.refcount,
+ 		 batadv_tt_local_entry_release);
  }
/**
-  * batadv_tt_global_entry_free_ref - decrement the refcounter for a
-  *  tt_global_entry and possibly free it
-  * @tt_global_entry: the object to free
+  * batadv_tt_global_entry_release - release tt_global_entry from lists and queue
+  *  for free after rcu grace period
+  * @ref: kref pointer of the nc_node
+  */
+ static void batadv_tt_global_entry_release(struct kref *ref)
+ {
+ 	struct batadv_tt_global_entry *tt_global_entry;
+ 
+ 	tt_global_entry = container_of(ref, struct batadv_tt_global_entry,
+ 				       common.refcount);
+ 
+ 	batadv_tt_global_del_orig_list(tt_global_entry);
+ 	kfree_rcu(tt_global_entry, common.rcu);
+ }
+ 
+ /**
+  * batadv_tt_global_entry_free_ref - decrement the tt_global_entry refcounter
+  *  and possibly release it
+  * @tt_global_entry: tt_global_entry to be free'd
   */
  static void
  batadv_tt_global_entry_free_ref(struct batadv_tt_global_entry *tt_global_entry)
  {
- 	if (atomic_dec_and_test(&tt_global_entry->common.refcount)) {
- 		batadv_tt_global_del_orig_list(tt_global_entry);
- 		kfree_rcu(tt_global_entry, common.rcu);
- 	}
+ 	kref_put(&tt_global_entry->common.refcount,
+ 		 batadv_tt_global_entry_release);
  }
/**
   * batadv_tt_global_hash_count - count the number of orig entries
-  * @hash: hash table containing the tt entries
+  * @bat_priv: the bat priv with all the soft interface information
   * @addr: the mac address of the client to count entries for
   * @vid: VLAN identifier
   *
-  * Return the number of originators advertising the given address/data
+  * Return: the number of originators advertising the given address/data
   * (excluding ourself).
   */
  int batadv_tt_global_hash_count(struct batadv_priv *bat_priv,
@@@ -286,9 -329,9 +329,9 @@@ static void batadv_tt_local_size_dec(st
  }
/**
-  * batadv_tt_global_size_mod - change the size by v of the local table
-  *  identified by vid
-  * @bat_priv: the bat priv with all the soft interface information
+  * batadv_tt_global_size_mod - change the size by v of the global table
+  *  for orig_node identified by vid
+  * @orig_node: the originator for which the table has to be modified
   * @vid: the VLAN identifier
   * @v: the amount to sum to the global table size
   */
@@@ -303,11 -346,9 +346,11 @@@ static void batadv_tt_global_size_mod(s
if (atomic_add_return(v, &vlan->tt.num_entries) == 0) {
    	spin_lock_bh(&orig_node->vlan_list_lock);
 -		hlist_del_init_rcu(&vlan->list);
 +		if (!hlist_unhashed(&vlan->list)) {
 +			hlist_del_init_rcu(&vlan->list);
 +			batadv_orig_node_vlan_free_ref(vlan);
 +		}
    	spin_unlock_bh(&orig_node->vlan_list_lock);
 -		batadv_orig_node_vlan_free_ref(vlan);
    }
batadv_orig_node_vlan_free_ref(vlan);
@@@ -340,22 -381,28 +383,28 @@@ static void batadv_tt_global_size_dec(s
  /**
   * batadv_tt_orig_list_entry_release - release tt orig entry from lists and
   *  queue for free after rcu grace period
-  * @orig_entry: tt orig entry to be free'd
+  * @ref: kref pointer of the tt orig entry
   */
- static void
- batadv_tt_orig_list_entry_release(struct batadv_tt_orig_list_entry *orig_entry)
+ static void batadv_tt_orig_list_entry_release(struct kref *ref)
  {
+ 	struct batadv_tt_orig_list_entry *orig_entry;
+ 
+ 	orig_entry = container_of(ref, struct batadv_tt_orig_list_entry,
+ 				  refcount);
+ 
    batadv_orig_node_free_ref(orig_entry->orig_node);
    kfree_rcu(orig_entry, rcu);
  }
+ /**
+  * batadv_tt_orig_list_entry_free_ref - decrement the tt orig entry refcounter
+  *  and possibly release it
+  * @orig_entry: tt orig entry to be free'd
+  */
  static void
  batadv_tt_orig_list_entry_free_ref(struct batadv_tt_orig_list_entry *orig_entry)
  {
- 	if (!atomic_dec_and_test(&orig_entry->refcount))
- 		return;
- 
- 	batadv_tt_orig_list_entry_release(orig_entry);
+ 	kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release);
  }
/**
@@@ -437,7 -484,7 +486,7 @@@ unlock
   * batadv_tt_len - compute length in bytes of given number of tt changes
   * @changes_num: number of tt changes
   *
-  * Returns computed length in bytes.
+  * Return: computed length in bytes.
   */
  static int batadv_tt_len(int changes_num)
  {
@@@ -448,7 -495,7 +497,7 @@@
   * batadv_tt_entries - compute the number of entries fitting in tt_len bytes
   * @tt_len: available space
   *
-  * Returns the number of entries.
+  * Return: the number of entries.
   */
  static u16 batadv_tt_entries(u16 tt_len)
  {
@@@ -460,7 -507,7 +509,7 @@@
   *  size when transmitted over the air
   * @bat_priv: the bat priv with all the soft interface information
   *
-  * Returns local translation table size in bytes.
+  * Return: local translation table size in bytes.
   */
  static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv)
  {
@@@ -526,7 -573,7 +575,7 @@@ static void batadv_tt_global_free(struc
   * @mark: the value contained in the skb->mark field of the received packet (if
   *  any)
   *
-  * Returns true if the client was successfully added, false otherwise.
+  * Return: true if the client was successfully added, false otherwise.
   */
  bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr,
    		 unsigned short vid, int ifindex, u32 mark)
@@@ -620,7 -667,8 +669,8 @@@
    tt_local->common.vid = vid;
    if (batadv_is_wifi_netdev(in_dev))
    	tt_local->common.flags |= BATADV_TT_CLIENT_WIFI;
- 	atomic_set(&tt_local->common.refcount, 2);
+ 	kref_init(&tt_local->common.refcount);
+ 	kref_get(&tt_local->common.refcount);
    tt_local->last_seen = jiffies;
    tt_local->common.added_at = tt_local->last_seen;
@@@ -721,12 -769,11 +771,11 @@@ out
   *  function reserves the amount of space needed to send the entire global TT
   *  table. In case of success the value is updated with the real amount of
   *  reserved bytes
- 
   * Allocate the needed amount of memory for the entire TT TVLV and write its
   * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
   * objects, one per active VLAN served by the originator node.
   *
-  * Return the size of the allocated buffer or 0 in case of failure.
+  * Return: the size of the allocated buffer or 0 in case of failure.
   */
  static u16
  batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node,
@@@ -800,7 -847,7 +849,7 @@@ out
   * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data
   * objects, one per active VLAN.
   *
-  * Return the size of the allocated buffer or 0 in case of failure.
+  * Return: the size of the allocated buffer or 0 in case of failure.
   */
  static u16
  batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv,
@@@ -1042,7 -1089,7 +1091,7 @@@ batadv_tt_local_set_pending(struct bata
   * @message: message to append to the log on deletion
   * @roaming: true if the deletion is due to a roaming event
   *
-  * Returns the flags assigned to the local entry before being deleted
+  * Return: the flags assigned to the local entry before being deleted
   */
  u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr,
    		   unsigned short vid, const char *message,
@@@ -1242,10 -1289,16 +1291,16 @@@ static void batadv_tt_changes_list_free
    spin_unlock_bh(&bat_priv->tt.changes_list_lock);
  }
- /* retrieves the orig_tt_list_entry belonging to orig_node from the
+ /**
+  * batadv_tt_global_orig_entry_find - find a TT orig_list_entry
+  * @entry: the TT global entry where the orig_list_entry has to be
+  *  extracted from
+  * @orig_node: the originator for which the orig_list_entry has to be found
+  *
+  * retrieve the orig_tt_list_entry belonging to orig_node from the
   * batadv_tt_global_entry list
   *
-  * returns it with an increased refcounter, NULL if not found
+  * Return: it with an increased refcounter, NULL if not found
   */
  static struct batadv_tt_orig_list_entry *
  batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry,
@@@ -1259,7 -1312,7 +1314,7 @@@
    hlist_for_each_entry_rcu(tmp_orig_entry, head, list) {
    	if (tmp_orig_entry->orig_node != orig_node)
    		continue;
- 		if (!atomic_inc_not_zero(&tmp_orig_entry->refcount))
+ 		if (!kref_get_unless_zero(&tmp_orig_entry->refcount))
    		continue;
orig_entry = tmp_orig_entry;
@@@ -1270,8 -1323,15 +1325,15 @@@
    return orig_entry;
  }
- /* find out if an orig_node is already in the list of a tt_global_entry.
-  * returns true if found, false otherwise
+ /**
+  * batadv_tt_global_entry_has_orig - check if a TT global entry is also handled
+  *  by a given originator
+  * @entry: the TT global entry to check
+  * @orig_node: the originator to search in the list
+  *
+  * find out if an orig_node is already in the list of a tt_global_entry.
+  *
+  * Return: true if found, false otherwise
   */
  static bool
  batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
@@@ -1309,11 -1369,12 +1371,12 @@@ batadv_tt_global_orig_entry_add(struct 
    	goto out;
INIT_HLIST_NODE(&orig_entry->list);
- 	atomic_inc(&orig_node->refcount);
+ 	kref_get(&orig_node->refcount);
    batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
    orig_entry->orig_node = orig_node;
    orig_entry->ttvn = ttvn;
- 	atomic_set(&orig_entry->refcount, 2);
+ 	kref_init(&orig_entry->refcount);
+ 	kref_get(&orig_entry->refcount);
spin_lock_bh(&tt_global->list_lock);
    hlist_add_head_rcu(&orig_entry->list,
@@@ -1343,7 -1404,7 +1406,7 @@@ out
   *
   * The caller must hold orig_node refcount.
   *
-  * Return true if the new entry has been added, false otherwise
+  * Return: true if the new entry has been added, false otherwise
   */
  static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
    			 struct batadv_orig_node *orig_node,
@@@ -1389,7 -1450,8 +1452,8 @@@
    	 */
    	if (flags & BATADV_TT_CLIENT_ROAM)
    		tt_global_entry->roam_at = jiffies;
- 		atomic_set(&common->refcount, 2);
+ 		kref_init(&common->refcount);
+ 		kref_get(&common->refcount);
    	common->added_at = jiffies;
INIT_HLIST_HEAD(&tt_global_entry->orig_list);
@@@ -1501,7 -1563,7 +1565,7 @@@ out
   * @tt_global_entry: global translation table entry to be analyzed
   *
   * This functon assumes the caller holds rcu_read_lock().
-  * Returns best originator list entry or NULL on errors.
+  * Return: best originator list entry or NULL on errors.
   */
  static struct batadv_tt_orig_list_entry *
  batadv_transtable_best_orig(struct batadv_priv *bat_priv,
@@@ -2031,7 -2093,7 +2095,7 @@@ _batadv_is_ap_isolated(struct batadv_tt
   * @addr: mac address of the destination client
   * @vid: VLAN identifier
   *
-  * Returns a pointer to the originator that was selected as destination in the
+  * Return: a pointer to the originator that was selected as destination in the
   * mesh for contacting the client 'addr', NULL otherwise.
   * In case of multiple originators serving the same client, the function returns
   * the best one (best in terms of metric towards the destination node).
@@@ -2071,7 -2133,7 +2135,7 @@@ struct batadv_orig_node *batadv_transta
    /* found anything? */
    if (best_entry)
    	orig_node = best_entry->orig_node;
- 	if (orig_node && !atomic_inc_not_zero(&orig_node->refcount))
+ 	if (orig_node && !kref_get_unless_zero(&orig_node->refcount))
    	orig_node = NULL;
    rcu_read_unlock();
@@@ -2106,7 -2168,7 +2170,7 @@@ out
   * because the XOR operation can combine them all while trying to reduce the
   * noise as much as possible.
   *
-  * Returns the checksum of the global table of a given originator.
+  * Return: the checksum of the global table of a given originator.
   */
  static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
    			struct batadv_orig_node *orig_node,
@@@ -2183,7 -2245,7 +2247,7 @@@
   * For details about the computation, please refer to the documentation for
   * batadv_tt_global_crc().
   *
-  * Returns the checksum of the local table
+  * Return: the checksum of the local table
   */
  static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv,
    		       unsigned short vid)
@@@ -2289,7 -2351,7 +2353,7 @@@ static void batadv_tt_req_purge(struct 
   * @bat_priv: the bat priv with all the soft interface information
   * @orig_node: orig node this request is being issued for
   *
-  * Returns the pointer to the new tt_req_node struct if no request
+  * Return: the pointer to the new tt_req_node struct if no request
   * has already been issued for this orig_node, NULL otherwise.
   */
  static struct batadv_tt_req_node *
@@@ -2324,7 -2386,7 +2388,7 @@@ unlock
   * @entry_ptr: to be checked local tt entry
   * @data_ptr: not used but definition required to satisfy the callback prototype
   *
-  * Returns 1 if the entry is a valid, 0 otherwise.
+  * Return: 1 if the entry is a valid, 0 otherwise.
   */
  static int batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr)
  {
@@@ -2408,9 -2470,8 +2472,8 @@@ static void batadv_tt_tvlv_generate(str
   * @orig_node: originator for which the CRCs have to be checked
   * @tt_vlan: pointer to the first tvlv VLAN entry
   * @num_vlan: number of tvlv VLAN entries
-  * @create: if true, create VLAN objects if not found
   *
-  * Return true if all the received CRCs match the locally stored ones, false
+  * Return: true if all the received CRCs match the locally stored ones, false
   * otherwise
   */
  static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
@@@ -2513,6 -2574,8 +2576,8 @@@ static void batadv_tt_global_update_crc
   * @num_vlan: number of tvlv VLAN entries
   * @full_table: ask for the entire translation table if true, while only for the
   *  last TT diff otherwise
+  *
+  * Return: true if the TT Request was sent, false otherwise
   */
  static int batadv_send_tt_request(struct batadv_priv *bat_priv,
    			  struct batadv_orig_node *dst_orig_node,
@@@ -2593,7 -2656,7 +2658,7 @@@ out
   * @req_src: mac address of tt request sender
   * @req_dst: mac address of tt request recipient
   *
-  * Returns true if tt request reply was sent, false otherwise.
+  * Return: true if tt request reply was sent, false otherwise.
   */
  static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv,
    				  struct batadv_tvlv_tt_data *tt_data,
@@@ -2725,7 -2788,7 +2790,7 @@@ out
   * @tt_data: tt data containing the tt request information
   * @req_src: mac address of tt request sender
   *
-  * Returns true if tt request reply was sent, false otherwise.
+  * Return: true if tt request reply was sent, false otherwise.
   */
  static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv,
    			       struct batadv_tvlv_tt_data *tt_data,
@@@ -2843,7 -2906,7 +2908,7 @@@ out
   * @req_src: mac address of tt request sender
   * @req_dst: mac address of tt request recipient
   *
-  * Returns true if tt request reply was sent, false otherwise.
+  * Return: true if tt request reply was sent, false otherwise.
   */
  static bool batadv_send_tt_response(struct batadv_priv *bat_priv,
    			    struct batadv_tvlv_tt_data *tt_data,
@@@ -2938,7 -3001,7 +3003,7 @@@ static void batadv_tt_update_changes(st
   * @addr: the mac address of the client to check
   * @vid: VLAN identifier
   *
-  * Returns true if the client is served by this node, false otherwise.
+  * Return: true if the client is served by this node, false otherwise.
   */
  bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr,
    		 unsigned short vid)
@@@ -3055,11 -3118,16 +3120,16 @@@ static void batadv_tt_roam_purge(struc
    spin_unlock_bh(&bat_priv->tt.roam_list_lock);
  }
- /* This function checks whether the client already reached the
+ /**
+  * batadv_tt_check_roam_count - check if a client has roamed too frequently
+  * @bat_priv: the bat priv with all the soft interface information
+  * @client: mac address of the roaming client
+  *
+  * This function checks whether the client already reached the
   * maximum number of possible roaming phases. In this case the ROAMING_ADV
   * will not be sent.
   *
-  * returns true if the ROAMING_ADV can be sent, false otherwise
+  * Return: true if the ROAMING_ADV can be sent, false otherwise
   */
  static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client)
  {
@@@ -3371,13 -3439,12 +3441,12 @@@ out
   * batadv_tt_update_orig - update global translation table with new tt
   *  information received via ogms
   * @bat_priv: the bat priv with all the soft interface information
-  * @orig: the orig_node of the ogm
-  * @tt_vlan: pointer to the first tvlv VLAN entry
+  * @orig_node: the orig_node of the ogm
+  * @tt_buff: pointer to the first tvlv VLAN entry
   * @tt_num_vlan: number of tvlv VLAN entries
   * @tt_change: pointer to the first entry in the TT buffer
   * @tt_num_changes: number of tt changes inside the tt buffer
   * @ttvn: translation table version number of this changeset
-  * @tt_crc: crc32 checksum of orig node's translation table
   */
  static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
    			  struct batadv_orig_node *orig_node,
@@@ -3459,7 -3526,7 +3528,7 @@@ request_table
   * @addr: the mac address of the client to check
   * @vid: VLAN identifier
   *
-  * Returns true if we know that the client has moved from its old originator
+  * Return: true if we know that the client has moved from its old originator
   * to another one. This entry is still kept for consistency purposes and will be
   * deleted later by a DEL or because of timeout
   */
@@@ -3485,7 -3552,7 +3554,7 @@@ out
   * @addr: the mac address of the local client to query
   * @vid: VLAN identifier
   *
-  * Returns true if the local client is known to be roaming (it is not served by
+  * Return: true if the local client is known to be roaming (it is not served by
   * this node anymore) or not. If yes, the client is still present in the table
   * to keep the latter consistent with the node TTVN
   */
@@@ -3614,7 -3681,7 +3683,7 @@@ static void batadv_tt_tvlv_ogm_handler_
   * @tvlv_value: tvlv buffer containing the tt data
   * @tvlv_value_len: tvlv buffer length
   *
-  * Returns NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
+  * Return: NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS
   * otherwise.
   */
  static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@@ -3695,7 -3762,7 +3764,7 @@@
   * @tvlv_value: tvlv buffer containing the tt data
   * @tvlv_value_len: tvlv buffer length
   *
-  * Returns NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
+  * Return: NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS
   * otherwise.
   */
  static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv,
@@@ -3741,7 -3808,7 +3810,7 @@@ out
   * batadv_tt_init - initialise the translation table internals
   * @bat_priv: the bat priv with all the soft interface information
   *
-  * Return 0 on success or negative error number in case of failure.
+  * Return: 0 on success or negative error number in case of failure.
   */
  int batadv_tt_init(struct batadv_priv *bat_priv)
  {
@@@ -3779,7 -3846,7 +3848,7 @@@
   * @addr: the mac address of the client
   * @vid: the identifier of the VLAN where this client is connected
   *
-  * Returns true if the client is marked with the TT_CLIENT_ISOLA flag, false
+  * Return: true if the client is marked with the TT_CLIENT_ISOLA flag, false
   * otherwise
   */
  bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv,
diff --combined net/bridge/br_mdb.c
index 74c278e,cf51b7b..73786e2
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@@ -41,6 -41,14 +41,14 @@@ fail
    return -EMSGSIZE;
  }
+ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags)
+ {
+ 	e->state = flags & MDB_PG_FLAGS_PERMANENT;
+ 	e->flags = 0;
+ 	if (flags & MDB_PG_FLAGS_OFFLOAD)
+ 		e->flags |= MDB_FLAGS_OFFLOAD;
+ }
+ 
  static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
    		    struct net_device *dev)
  {
@@@ -80,26 -88,41 +88,41 @@@
    		for (pp = &mp->ports;
    		     (p = rcu_dereference(*pp)) != NULL;
    		      pp = &p->next) {
+ 				struct nlattr *nest_ent;
+ 				struct br_mdb_entry e;
+ 
    			port = p->port;
- 				if (port) {
- 					struct br_mdb_entry e;
- 					memset(&e, 0, sizeof(e));
- 					e.ifindex = port->dev->ifindex;
- 					e.state = p->state;
- 					e.vid = p->addr.vid;
- 					if (p->addr.proto == htons(ETH_P_IP))
- 						e.addr.u.ip4 = p->addr.u.ip4;
+ 				if (!port)
+ 					continue;
+ 
+ 				memset(&e, 0, sizeof(e));
+ 				e.ifindex = port->dev->ifindex;
+ 				e.vid = p->addr.vid;
+ 				__mdb_entry_fill_flags(&e, p->flags);
+ 				if (p->addr.proto == htons(ETH_P_IP))
+ 					e.addr.u.ip4 = p->addr.u.ip4;
  #if IS_ENABLED(CONFIG_IPV6)
- 					if (p->addr.proto == htons(ETH_P_IPV6))
- 						e.addr.u.ip6 = p->addr.u.ip6;
+ 				if (p->addr.proto == htons(ETH_P_IPV6))
+ 					e.addr.u.ip6 = p->addr.u.ip6;
  #endif
- 					e.addr.proto = p->addr.proto;
- 					if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) {
- 						nla_nest_cancel(skb, nest2);
- 						err = -EMSGSIZE;
- 						goto out;
- 					}
+ 				e.addr.proto = p->addr.proto;
+ 				nest_ent = nla_nest_start(skb,
+ 							  MDBA_MDB_ENTRY_INFO);
+ 				if (!nest_ent) {
+ 					nla_nest_cancel(skb, nest2);
+ 					err = -EMSGSIZE;
+ 					goto out;
    			}
+ 				if (nla_put_nohdr(skb, sizeof(e), &e) ||
+ 				    nla_put_u32(skb,
+ 						MDBA_MDB_EATTR_TIMER,
+ 						br_timer_value(&p->timer))) {
+ 					nla_nest_cancel(skb, nest_ent);
+ 					nla_nest_cancel(skb, nest2);
+ 					err = -EMSGSIZE;
+ 					goto out;
+ 				}
+ 				nla_nest_end(skb, nest_ent);
    		}
    		nla_nest_end(skb, nest2);
    	skip:
@@@ -209,7 -232,7 +232,7 @@@ static inline size_t rtnl_mdb_nlmsg_siz
  }
static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry,
- 			    int type)
+ 			    int type, struct net_bridge_port_group *pg)
  {
    struct switchdev_obj_port_mdb mdb = {
    	.obj = {
@@@ -232,10 -255,13 +255,13 @@@
  #endif
mdb.obj.orig_dev = port_dev;
- 	if (port_dev && type == RTM_NEWMDB)
- 		switchdev_port_obj_add(port_dev, &mdb.obj);
- 	else if (port_dev && type == RTM_DELMDB)
+ 	if (port_dev && type == RTM_NEWMDB) {
+ 		err = switchdev_port_obj_add(port_dev, &mdb.obj);
+ 		if (!err && pg)
+ 			pg->flags |= MDB_PG_FLAGS_OFFLOAD;
+ 	} else if (port_dev && type == RTM_DELMDB) {
    	switchdev_port_obj_del(port_dev, &mdb.obj);
+ 	}
skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC);
    if (!skb)
@@@ -253,21 -279,21 +279,21 @@@ errout
    rtnl_set_sk_err(net, RTNLGRP_MDB, err);
  }
- void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port,
- 		   struct br_ip *group, int type, u8 state)
+ void br_mdb_notify(struct net_device *dev, struct net_bridge_port_group *pg,
+ 		   int type)
  {
    struct br_mdb_entry entry;
memset(&entry, 0, sizeof(entry));
- 	entry.ifindex = port->dev->ifindex;
- 	entry.addr.proto = group->proto;
- 	entry.addr.u.ip4 = group->u.ip4;
+ 	entry.ifindex = pg->port->dev->ifindex;
+ 	entry.addr.proto = pg->addr.proto;
+ 	entry.addr.u.ip4 = pg->addr.u.ip4;
  #if IS_ENABLED(CONFIG_IPV6)
- 	entry.addr.u.ip6 = group->u.ip6;
+ 	entry.addr.u.ip6 = pg->addr.u.ip6;
  #endif
- 	entry.state = state;
- 	entry.vid = group->vid;
- 	__br_mdb_notify(dev, &entry, type);
+ 	entry.vid = pg->addr.vid;
+ 	__mdb_entry_fill_flags(&entry, pg->flags);
+ 	__br_mdb_notify(dev, &entry, type, pg);
  }
static int nlmsg_populate_rtr_fill(struct sk_buff *skb,
@@@ -412,7 -438,8 +438,8 @@@ static int br_mdb_parse(struct sk_buff 
  }
static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
- 			    struct br_ip *group, unsigned char state)
+ 			    struct br_ip *group, unsigned char state,
+ 			    struct net_bridge_port_group **pg)
  {
    struct net_bridge_mdb_entry *mp;
    struct net_bridge_port_group *p;
@@@ -425,8 -452,8 +452,8 @@@
    mp = br_mdb_ip_get(mdb, group);
    if (!mp) {
    	mp = br_multicast_new_group(br, port, group);
 -		err = PTR_ERR(mp);
 -		if (IS_ERR(mp))
 +		err = PTR_ERR_OR_ZERO(mp);
 +		if (err)
    		return err;
    }
@@@ -443,6 -470,7 +470,7 @@@
    if (unlikely(!p))
    	return -ENOMEM;
    rcu_assign_pointer(*pp, p);
+ 	*pg = p;
    if (state == MDB_TEMPORARY)
    	mod_timer(&p->timer, now + br->multicast_membership_interval);
@@@ -450,7 -478,8 +478,8 @@@
  }
static int __br_mdb_add(struct net *net, struct net_bridge *br,
- 			struct br_mdb_entry *entry)
+ 			struct br_mdb_entry *entry,
+ 			struct net_bridge_port_group **pg)
  {
    struct br_ip ip;
    struct net_device *dev;
@@@ -479,7 -508,7 +508,7 @@@
  #endif
spin_lock_bh(&br->multicast_lock);
- 	ret = br_mdb_add_group(br, p, &ip, entry->state);
+ 	ret = br_mdb_add_group(br, p, &ip, entry->state, pg);
    spin_unlock_bh(&br->multicast_lock);
    return ret;
  }
@@@ -487,6 -516,7 +516,7 @@@
  static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
  {
    struct net *net = sock_net(skb->sk);
+ 	struct net_bridge_port_group *pg;
    struct net_bridge_vlan_group *vg;
    struct net_device *dev, *pdev;
    struct br_mdb_entry *entry;
@@@ -516,15 -546,15 +546,15 @@@
    if (br_vlan_enabled(br) && vg && entry->vid == 0) {
    	list_for_each_entry(v, &vg->vlan_list, vlist) {
    		entry->vid = v->vid;
- 			err = __br_mdb_add(net, br, entry);
+ 			err = __br_mdb_add(net, br, entry, &pg);
    		if (err)
    			break;
- 			__br_mdb_notify(dev, entry, RTM_NEWMDB);
+ 			__br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
    	}
    } else {
- 		err = __br_mdb_add(net, br, entry);
+ 		err = __br_mdb_add(net, br, entry, &pg);
    	if (!err)
- 			__br_mdb_notify(dev, entry, RTM_NEWMDB);
+ 			__br_mdb_notify(dev, entry, RTM_NEWMDB, pg);
    }
return err;
@@@ -568,7 -598,7 +598,7 @@@ static int __br_mdb_del(struct net_brid
    	if (p->port->state == BR_STATE_DISABLED)
    		goto unlock;
- 		entry->state = p->state;
+ 		__mdb_entry_fill_flags(entry, p->flags);
    	rcu_assign_pointer(*pp, p->next);
    	hlist_del_init(&p->mglist);
    	del_timer(&p->timer);
@@@ -620,12 -650,12 +650,12 @@@ static int br_mdb_del(struct sk_buff *s
    		entry->vid = v->vid;
    		err = __br_mdb_del(br, entry);
    		if (!err)
- 				__br_mdb_notify(dev, entry, RTM_DELMDB);
+ 				__br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
    	}
    } else {
    	err = __br_mdb_del(br, entry);
    	if (!err)
- 			__br_mdb_notify(dev, entry, RTM_DELMDB);
+ 			__br_mdb_notify(dev, entry, RTM_DELMDB, NULL);
    }
return err;
diff --combined net/core/dev.c
index 0ef061b,3f4071a..edb7179
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@@ -3829,8 -3829,14 +3829,14 @@@ static void net_tx_action(struct softir
    			trace_consume_skb(skb);
    		else
    			trace_kfree_skb(skb, net_tx_action);
- 			__kfree_skb(skb);
+ 
+ 			if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
+ 				__kfree_skb(skb);
+ 			else
+ 				__kfree_skb_defer(skb);
    	}
+ 
+ 		__kfree_skb_flush();
    }
if (sd->output_queue) {
@@@ -4154,7 -4160,10 +4160,10 @@@ ncls
    		ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
    } else {
  drop:
- 		atomic_long_inc(&skb->dev->rx_dropped);
+ 		if (!deliver_exact)
+ 			atomic_long_inc(&skb->dev->rx_dropped);
+ 		else
+ 			atomic_long_inc(&skb->dev->rx_nohandler);
    	kfree_skb(skb);
    	/* Jamal, now you will not able to escape explaining
    	 * me how you were going to use this. :-)
@@@ -5152,6 -5161,7 +5161,7 @@@ static void net_rx_action(struct softir
    	}
    }
+ 	__kfree_skb_flush();
    local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@@ -5379,12 -5389,12 +5389,12 @@@ void *netdev_lower_get_next(struct net_
  {
    struct netdev_adjacent *lower;
-	lower = list_entry((*iter)->next, struct netdev_adjacent, list);
 +	lower = list_entry(*iter, struct netdev_adjacent, list);
if (&lower->list == &dev->adj_list.lower)
    	return NULL;
-	*iter = &lower->list;
 +	*iter = lower->list.next;
return lower->dev;
  }
@@@ -7253,24 -7263,31 +7263,31 @@@ void netdev_run_todo(void
    }
  }
- /* Convert net_device_stats to rtnl_link_stats64.  They have the same
-  * fields in the same order, with only the type differing.
+ /* Convert net_device_stats to rtnl_link_stats64. rtnl_link_stats64 has
+  * all the same fields in the same order as net_device_stats, with only
+  * the type differing, but rtnl_link_stats64 may have additional fields
+  * at the end for newer counters.
   */
  void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
    		     const struct net_device_stats *netdev_stats)
  {
  #if BITS_PER_LONG == 64
- 	BUILD_BUG_ON(sizeof(*stats64) != sizeof(*netdev_stats));
+ 	BUILD_BUG_ON(sizeof(*stats64) < sizeof(*netdev_stats));
    memcpy(stats64, netdev_stats, sizeof(*stats64));
+ 	/* zero out counters that only exist in rtnl_link_stats64 */
+ 	memset((char *)stats64 + sizeof(*netdev_stats), 0,
+ 	       sizeof(*stats64) - sizeof(*netdev_stats));
  #else
- 	size_t i, n = sizeof(*stats64) / sizeof(u64);
+ 	size_t i, n = sizeof(*netdev_stats) / sizeof(unsigned long);
    const unsigned long *src = (const unsigned long *)netdev_stats;
    u64 *dst = (u64 *)stats64;
- 	BUILD_BUG_ON(sizeof(*netdev_stats) / sizeof(unsigned long) !=
- 		     sizeof(*stats64) / sizeof(u64));
+ 	BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
    for (i = 0; i < n; i++)
    	dst[i] = src[i];
+ 	/* zero out counters that only exist in rtnl_link_stats64 */
+ 	memset((char *)stats64 + n * sizeof(u64), 0,
+ 	       sizeof(*stats64) - n * sizeof(u64));
  #endif
  }
  EXPORT_SYMBOL(netdev_stats_to_stats64);
@@@ -7300,6 -7317,7 +7317,7 @@@ struct rtnl_link_stats64 *dev_get_stats
    }
    storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
    storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
+ 	storage->rx_nohandler += atomic_long_read(&dev->rx_nohandler);
    return storage;
  }
  EXPORT_SYMBOL(dev_get_stats);
@@@ -7422,10 -7440,8 +7440,10 @@@ struct net_device *alloc_netdev_mqs(in
    dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
    setup(dev);
-	if (!dev->tx_queue_len)
 +	if (!dev->tx_queue_len) {
    	dev->priv_flags |= IFF_NO_QUEUE;
 +		dev->tx_queue_len = 1;
 +	}
dev->num_tx_queues = txqs;
    dev->real_num_tx_queues = txqs;
diff --combined net/core/skbuff.c
index 5bf88f5,8bd4b79..488566b
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@@ -79,8 -79,6 +79,8 @@@
struct kmem_cache *skbuff_head_cache __read_mostly;
  static struct kmem_cache *skbuff_fclone_cache __read_mostly;
 +int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
 +EXPORT_SYMBOL(sysctl_max_skb_frags);
/**
   *	skb_panic - private function for out-of-line support
@@@ -349,8 -347,16 +349,16 @@@ struct sk_buff *build_skb(void *data, u
  }
  EXPORT_SYMBOL(build_skb);
+ #define NAPI_SKB_CACHE_SIZE	64
+ 
+ struct napi_alloc_cache {
+ 	struct page_frag_cache page;
+ 	size_t skb_count;
+ 	void *skb_cache[NAPI_SKB_CACHE_SIZE];
+ };
+ 
  static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache);
- static DEFINE_PER_CPU(struct page_frag_cache, napi_alloc_cache);
+ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache);
static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  {
@@@ -380,9 -386,9 +388,9 @@@ EXPORT_SYMBOL(netdev_alloc_frag)
static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
  {
- 	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
- 	return __alloc_page_frag(nc, fragsz, gfp_mask);
+ 	return __alloc_page_frag(&nc->page, fragsz, gfp_mask);
  }
void *napi_alloc_frag(unsigned int fragsz)
@@@ -476,7 -482,7 +484,7 @@@ EXPORT_SYMBOL(__netdev_alloc_skb)
  struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
    			 gfp_t gfp_mask)
  {
- 	struct page_frag_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
    struct sk_buff *skb;
    void *data;
@@@ -496,7 -502,7 +504,7 @@@
    if (sk_memalloc_socks())
    	gfp_mask |= __GFP_MEMALLOC;
- 	data = __alloc_page_frag(nc, len, gfp_mask);
+ 	data = __alloc_page_frag(&nc->page, len, gfp_mask);
    if (unlikely(!data))
    	return NULL;
@@@ -507,7 -513,7 +515,7 @@@
    }
/* use OR instead of assignment to avoid clearing of bits in mask */
- 	if (nc->pfmemalloc)
+ 	if (nc->page.pfmemalloc)
    	skb->pfmemalloc = 1;
    skb->head_frag = 1;
@@@ -749,6 -755,73 +757,73 @@@ void consume_skb(struct sk_buff *skb
  }
  EXPORT_SYMBOL(consume_skb);
+ void __kfree_skb_flush(void)
+ {
+ 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ 
+ 	/* flush skb_cache if containing objects */
+ 	if (nc->skb_count) {
+ 		kmem_cache_free_bulk(skbuff_head_cache, nc->skb_count,
+ 				     nc->skb_cache);
+ 		nc->skb_count = 0;
+ 	}
+ }
+ 
+ static inline void _kfree_skb_defer(struct sk_buff *skb)
+ {
+ 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+ 
+ 	/* drop skb->head and call any destructors for packet */
+ 	skb_release_all(skb);
+ 
+ 	/* record skb to CPU local list */
+ 	nc->skb_cache[nc->skb_count++] = skb;
+ 
+ #ifdef CONFIG_SLUB
+ 	/* SLUB writes into objects when freeing */
+ 	prefetchw(skb);
+ #endif
+ 
+ 	/* flush skb_cache if it is filled */
+ 	if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
+ 		kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_SIZE,
+ 				     nc->skb_cache);
+ 		nc->skb_count = 0;
+ 	}
+ }
+ void __kfree_skb_defer(struct sk_buff *skb)
+ {
+ 	_kfree_skb_defer(skb);
+ }
+ 
+ void napi_consume_skb(struct sk_buff *skb, int budget)
+ {
+ 	if (unlikely(!skb))
+ 		return;
+ 
+ 	/* if budget is 0 assume netpoll w/ IRQs disabled */
+ 	if (unlikely(!budget)) {
+ 		dev_consume_skb_irq(skb);
+ 		return;
+ 	}
+ 
+ 	if (likely(atomic_read(&skb->users) == 1))
+ 		smp_rmb();
+ 	else if (likely(!atomic_dec_and_test(&skb->users)))
+ 		return;
+ 	/* if reaching here SKB is ready to free */
+ 	trace_consume_skb(skb);
+ 
+ 	/* if SKB is a clone, don't handle this case */
+ 	if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+ 		__kfree_skb(skb);
+ 		return;
+ 	}
+ 
+ 	_kfree_skb_defer(skb);
+ }
+ EXPORT_SYMBOL(napi_consume_skb);
+ 
  /* Make sure a field is enclosed inside headers_start/headers_end section */
  #define CHECK_SKB_FIELD(field) \
    BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\
@@@ -3006,8 -3079,7 +3081,7 @@@ struct sk_buff *skb_segment(struct sk_b
    if (unlikely(!proto))
    	return ERR_PTR(-EINVAL);
- 	csum = !head_skb->encap_hdr_csum &&
- 	    !!can_checksum_protocol(features, proto);
+ 	csum = !!can_checksum_protocol(features, proto);
headroom = skb_headroom(head_skb);
    pos = skb_headlen(head_skb);
@@@ -3100,13 -3172,15 +3174,15 @@@
    	if (nskb->len == len + doffset)
    		goto perform_csum_check;
- 		if (!sg && !nskb->remcsum_offload) {
- 			nskb->ip_summed = CHECKSUM_NONE;
- 			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
- 							    skb_put(nskb, len),
- 							    len, 0);
+ 		if (!sg) {
+ 			if (!nskb->remcsum_offload)
+ 				nskb->ip_summed = CHECKSUM_NONE;
+ 			SKB_GSO_CB(nskb)->csum =
+ 				skb_copy_and_csum_bits(head_skb, offset,
+ 						       skb_put(nskb, len),
+ 						       len, 0);
    		SKB_GSO_CB(nskb)->csum_start =
- 			    skb_headroom(nskb) + doffset;
+ 				skb_headroom(nskb) + doffset;
    		continue;
    	}
@@@ -3172,12 -3246,19 +3248,19 @@@ skip_fraglist
    	nskb->truesize += nskb->data_len;
perform_csum_check:
- 		if (!csum && !nskb->remcsum_offload) {
- 			nskb->csum = skb_checksum(nskb, doffset,
- 						  nskb->len - doffset, 0);
- 			nskb->ip_summed = CHECKSUM_NONE;
+ 		if (!csum) {
+ 			if (skb_has_shared_frag(nskb)) {
+ 				err = __skb_linearize(nskb);
+ 				if (err)
+ 					goto err;
+ 			}
+ 			if (!nskb->remcsum_offload)
+ 				nskb->ip_summed = CHECKSUM_NONE;
+ 			SKB_GSO_CB(nskb)->csum =
+ 				skb_checksum(nskb, doffset,
+ 					     nskb->len - doffset, 0);
    		SKB_GSO_CB(nskb)->csum_start =
- 			    skb_headroom(nskb) + doffset;
+ 				skb_headroom(nskb) + doffset;
    	}
    } while ((offset += len) < head_skb->len);
@@@ -4415,9 -4496,7 +4498,7 @@@ int skb_vlan_push(struct sk_buff *skb, 
    	skb->mac_len += VLAN_HLEN;
    	__skb_pull(skb, offset);
- 		if (skb->ip_summed == CHECKSUM_COMPLETE)
- 			skb->csum = csum_add(skb->csum, csum_partial(skb->data
- 					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+ 		skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
    }
    __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
    return 0;
diff --combined net/dccp/ipv4.c
index 902d606,1e0c600..b5672e5
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@@ -802,7 -802,7 +802,7 @@@ static int dccp_v4_rcv(struct sk_buff *
    }
lookup:
- 	sk = __inet_lookup_skb(&dccp_hashinfo, skb,
+ 	sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
    		       dh->dccph_sport, dh->dccph_dport);
    if (!sk) {
    	dccp_pr_debug("failed to look up flow ID in table and "
@@@ -824,26 -824,26 +824,26 @@@
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
    	struct request_sock *req = inet_reqsk(sk);
 -		struct sock *nsk = NULL;
 +		struct sock *nsk;
sk = req->rsk_listener;
 -		if (likely(sk->sk_state == DCCP_LISTEN)) {
 -			nsk = dccp_check_req(sk, skb, req);
 -		} else {
 +		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
    		inet_csk_reqsk_queue_drop_and_put(sk, req);
    		goto lookup;
    	}
 +		sock_hold(sk);
 +		nsk = dccp_check_req(sk, skb, req);
    	if (!nsk) {
    		reqsk_put(req);
 -			goto discard_it;
 +			goto discard_and_relse;
    	}
    	if (nsk == sk) {
 -			sock_hold(sk);
    		reqsk_put(req);
    	} else if (dccp_child_process(sk, nsk, skb)) {
    		dccp_v4_ctl_send_reset(sk, skb);
 -			goto discard_it;
 +			goto discard_and_relse;
    	} else {
 +			sock_put(sk);
    		return 0;
    	}
    }
diff --combined net/dccp/ipv6.c
index b8608b7,45cbe85..4663a01
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@@ -668,7 -668,7 +668,7 @@@ static int dccp_v6_rcv(struct sk_buff *
    	DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
lookup:
- 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb,
+ 	sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh),
    		        dh->dccph_sport, dh->dccph_dport,
    			inet6_iif(skb));
    if (!sk) {
@@@ -691,26 -691,26 +691,26 @@@
if (sk->sk_state == DCCP_NEW_SYN_RECV) {
    	struct request_sock *req = inet_reqsk(sk);
 -		struct sock *nsk = NULL;
 +		struct sock *nsk;
sk = req->rsk_listener;
 -		if (likely(sk->sk_state == DCCP_LISTEN)) {
 -			nsk = dccp_check_req(sk, skb, req);
 -		} else {
 +		if (unlikely(sk->sk_state != DCCP_LISTEN)) {
    		inet_csk_reqsk_queue_drop_and_put(sk, req);
    		goto lookup;
    	}
 +		sock_hold(sk);
 +		nsk = dccp_check_req(sk, skb, req);
    	if (!nsk) {
    		reqsk_put(req);
 -			goto discard_it;
 +			goto discard_and_relse;
    	}
    	if (nsk == sk) {
 -			sock_hold(sk);
    		reqsk_put(req);
    	} else if (dccp_child_process(sk, nsk, skb)) {
    		dccp_v6_ctl_send_reset(sk, skb);
 -			goto discard_it;
 +			goto discard_and_relse;
    	} else {
 +			sock_put(sk);
    		return 0;
    	}
    }
@@@ -993,7 -993,7 +993,7 @@@ static struct proto dccp_v6_prot = 
    .sendmsg	   = dccp_sendmsg,
    .recvmsg	   = dccp_recvmsg,
    .backlog_rcv	   = dccp_v6_do_rcv,
- 	.hash		   = inet_hash,
+ 	.hash		   = inet6_hash,
    .unhash		   = inet_unhash,
    .accept		   = inet_csk_accept,
    .get_port	   = inet_csk_get_port,
diff --combined net/ipv4/devinet.c
index f6303b1,3d83531..29b8d3a
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@@ -1847,7 -1847,7 +1847,7 @@@ static int inet_netconf_get_devconf(str
    if (err < 0)
    	goto errout;
-	err = EINVAL;
 +	err = -EINVAL;
    if (!tb[NETCONFA_IFINDEX])
    	goto errout;
@@@ -2185,6 -2185,8 +2185,8 @@@ static struct devinet_sysctl_table 
    				"igmpv3_unsolicited_report_interval"),
    	DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
    				"ignore_routes_with_linkdown"),
+ 		DEVINET_SYSCTL_RW_ENTRY(DROP_GRATUITOUS_ARP,
+ 					"drop_gratuitous_arp"),
DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
    	DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
@@@ -2192,6 -2194,8 +2194,8 @@@
    				      "promote_secondaries"),
    	DEVINET_SYSCTL_FLUSHING_ENTRY(ROUTE_LOCALNET,
    				      "route_localnet"),
+ 		DEVINET_SYSCTL_FLUSHING_ENTRY(DROP_UNICAST_IN_L2_MULTICAST,
+ 					      "drop_unicast_in_l2_multicast"),
    },
  };
diff --combined net/ipv4/inet_connection_sock.c
index 6414891,3d28c6d..d768230
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@@ -24,6 -24,7 +24,7 @@@
  #include <net/tcp_states.h>
  #include <net/xfrm.h>
  #include <net/tcp.h>
+ #include <net/sock_reuseport.h>
#ifdef INET_CSK_DEBUG
  const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n";
@@@ -67,7 -68,8 +68,8 @@@ int inet_csk_bind_conflict(const struc
    		if ((!reuse || !sk2->sk_reuse ||
    		    sk2->sk_state == TCP_LISTEN) &&
    		    (!reuseport || !sk2->sk_reuseport ||
- 			    (sk2->sk_state != TCP_TIME_WAIT &&
+ 			     rcu_access_pointer(sk->sk_reuseport_cb) ||
+ 			     (sk2->sk_state != TCP_TIME_WAIT &&
    		     !uid_eq(uid, sock_i_uid(sk2))))) {
if (!sk2->sk_rcv_saddr || !sk->sk_rcv_saddr ||
@@@ -89,161 -91,153 +91,153 @@@ EXPORT_SYMBOL_GPL(inet_csk_bind_conflic
/* Obtain a reference to a local port for the given sock,
   * if snum is zero it means select any available local port.
+  * We try to allocate an odd port (and leave even ports for connect())
   */
  int inet_csk_get_port(struct sock *sk, unsigned short snum)
  {
- 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
+ 	bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
+ 	struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
+ 	int ret = 1, attempts = 5, port = snum;
+ 	int smallest_size = -1, smallest_port;
    struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
- 	int ret, attempts = 5;
    struct net *net = sock_net(sk);
- 	int smallest_size = -1, smallest_rover;
+ 	int i, low, high, attempt_half;
+ 	struct inet_bind_bucket *tb;
    kuid_t uid = sock_i_uid(sk);
- 	int attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
+ 	u32 remaining, offset;
- 	local_bh_disable();
- 	if (!snum) {
- 		int remaining, rover, low, high;
+ 	if (port) {
+ have_port:
+ 		head = &hinfo->bhash[inet_bhashfn(net, port,
+ 						  hinfo->bhash_size)];
+ 		spin_lock_bh(&head->lock);
+ 		inet_bind_bucket_for_each(tb, &head->chain)
+ 			if (net_eq(ib_net(tb), net) && tb->port == port)
+ 				goto tb_found;
+ 		goto tb_not_found;
+ 	}
  again:
- 		inet_get_local_port_range(net, &low, &high);
- 		if (attempt_half) {
- 			int half = low + ((high - low) >> 1);
- 
- 			if (attempt_half == 1)
- 				high = half;
- 			else
- 				low = half;
- 		}
- 		remaining = (high - low) + 1;
- 		smallest_rover = rover = prandom_u32() % remaining + low;
- 
- 		smallest_size = -1;
- 		do {
- 			if (inet_is_local_reserved_port(net, rover))
- 				goto next_nolock;
- 			head = &hashinfo->bhash[inet_bhashfn(net, rover,
- 					hashinfo->bhash_size)];
- 			spin_lock(&head->lock);
- 			inet_bind_bucket_for_each(tb, &head->chain)
- 				if (net_eq(ib_net(tb), net) && tb->port == rover) {
- 					if (((tb->fastreuse > 0 &&
- 					      sk->sk_reuse &&
- 					      sk->sk_state != TCP_LISTEN) ||
- 					     (tb->fastreuseport > 0 &&
- 					      sk->sk_reuseport &&
- 					      uid_eq(tb->fastuid, uid))) &&
- 					    (tb->num_owners < smallest_size || smallest_size == -1)) {
- 						smallest_size = tb->num_owners;
- 						smallest_rover = rover;
- 					}
- 					if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) {
- 						snum = rover;
- 						goto tb_found;
- 					}
- 					goto next;
+ 	attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
+ other_half_scan:
+ 	inet_get_local_port_range(net, &low, &high);
+ 	high++; /* [32768, 60999] -> [32768, 61000[ */
+ 	if (high - low < 4)
+ 		attempt_half = 0;
+ 	if (attempt_half) {
+ 		int half = low + (((high - low) >> 2) << 1);
+ 
+ 		if (attempt_half == 1)
+ 			high = half;
+ 		else
+ 			low = half;
+ 	}
+ 	remaining = high - low;
+ 	if (likely(remaining > 1))
+ 		remaining &= ~1U;
+ 
+ 	offset = prandom_u32() % remaining;
+ 	/* __inet_hash_connect() favors ports having @low parity
+ 	 * We do the opposite to not pollute connect() users.
+ 	 */
+ 	offset |= 1U;
+ 	smallest_size = -1;
+ 	smallest_port = low; /* avoid compiler warning */
+ 
+ other_parity_scan:
+ 	port = low + offset;
+ 	for (i = 0; i < remaining; i += 2, port += 2) {
+ 		if (unlikely(port >= high))
+ 			port -= remaining;
+ 		if (inet_is_local_reserved_port(net, port))
+ 			continue;
+ 		head = &hinfo->bhash[inet_bhashfn(net, port,
+ 						  hinfo->bhash_size)];
+ 		spin_lock_bh(&head->lock);
+ 		inet_bind_bucket_for_each(tb, &head->chain)
+ 			if (net_eq(ib_net(tb), net) && tb->port == port) {
+ 				if (((tb->fastreuse > 0 && reuse) ||
+ 				     (tb->fastreuseport > 0 &&
+ 				      sk->sk_reuseport &&
+ 				      !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ 				      uid_eq(tb->fastuid, uid))) &&
+ 				    (tb->num_owners < smallest_size || smallest_size == -1)) {
+ 					smallest_size = tb->num_owners;
+ 					smallest_port = port;
    			}
- 			break;
- 		next:
- 			spin_unlock(&head->lock);
- 		next_nolock:
- 			if (++rover > high)
- 				rover = low;
- 		} while (--remaining > 0);
- 
- 		/* Exhausted local port range during search?  It is not
- 		 * possible for us to be holding one of the bind hash
- 		 * locks if this test triggers, because if 'remaining'
- 		 * drops to zero, we broke out of the do/while loop at
- 		 * the top level, not from the 'break;' statement.
- 		 */
- 		ret = 1;
- 		if (remaining <= 0) {
- 			if (smallest_size != -1) {
- 				snum = smallest_rover;
- 				goto have_snum;
- 			}
- 			if (attempt_half == 1) {
- 				/* OK we now try the upper half of the range */
- 				attempt_half = 2;
- 				goto again;
+ 				if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false))
+ 					goto tb_found;
+ 				goto next_port;
    		}
- 			goto fail;
- 		}
- 		/* OK, here is the one we will use.  HEAD is
- 		 * non-NULL and we hold it's mutex.
- 		 */
- 		snum = rover;
- 	} else {
- have_snum:
- 		head = &hashinfo->bhash[inet_bhashfn(net, snum,
- 				hashinfo->bhash_size)];
- 		spin_lock(&head->lock);
- 		inet_bind_bucket_for_each(tb, &head->chain)
- 			if (net_eq(ib_net(tb), net) && tb->port == snum)
- 				goto tb_found;
+ 		goto tb_not_found;
+ next_port:
+ 		spin_unlock_bh(&head->lock);
+ 		cond_resched();
+ 	}
+ 
+ 	if (smallest_size != -1) {
+ 		port = smallest_port;
+ 		goto have_port;
    }
- 	tb = NULL;
- 	goto tb_not_found;
+ 	offset--;
+ 	if (!(offset & 1))
+ 		goto other_parity_scan;
+ 
+ 	if (attempt_half == 1) {
+ 		/* OK we now try the upper half of the range */
+ 		attempt_half = 2;
+ 		goto other_half_scan;
+ 	}
+ 	return ret;
+ 
+ tb_not_found:
+ 	tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ 				     net, head, port);
+ 	if (!tb)
+ 		goto fail_unlock;
  tb_found:
    if (!hlist_empty(&tb->owners)) {
    	if (sk->sk_reuse == SK_FORCE_REUSE)
    		goto success;
- 		if (((tb->fastreuse > 0 &&
- 		      sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
+ 		if (((tb->fastreuse > 0 && reuse) ||
    	     (tb->fastreuseport > 0 &&
    	      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- 		    smallest_size == -1) {
+ 		    smallest_size == -1)
    		goto success;
- 		} else {
- 			ret = 1;
- 			if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
- 				if (((sk->sk_reuse && sk->sk_state != TCP_LISTEN) ||
- 				     (tb->fastreuseport > 0 &&
- 				      sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
- 				    smallest_size != -1 && --attempts >= 0) {
- 					spin_unlock(&head->lock);
- 					goto again;
- 				}
- 
- 				goto fail_unlock;
+ 		if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) {
+ 			if ((reuse ||
+ 			     (tb->fastreuseport > 0 &&
+ 			      sk->sk_reuseport &&
+ 			      !rcu_access_pointer(sk->sk_reuseport_cb) &&
+ 			      uid_eq(tb->fastuid, uid))) &&
+ 			    smallest_size != -1 && --attempts >= 0) {
+ 				spin_unlock_bh(&head->lock);
+ 				goto again;
    		}
+ 			goto fail_unlock;
    	}
- 	}
- tb_not_found:
- 	ret = 1;
- 	if (!tb && (tb = inet_bind_bucket_create(hashinfo->bind_bucket_cachep,
- 					net, head, snum)) == NULL)
- 		goto fail_unlock;
- 	if (hlist_empty(&tb->owners)) {
- 		if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
- 			tb->fastreuse = 1;
- 		else
+ 		if (!reuse)
    		tb->fastreuse = 0;
+ 		if (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid))
+ 			tb->fastreuseport = 0;
+ 	} else {
+ 		tb->fastreuse = reuse;
    	if (sk->sk_reuseport) {
    		tb->fastreuseport = 1;
    		tb->fastuid = uid;
- 		} else
- 			tb->fastreuseport = 0;
- 	} else {
- 		if (tb->fastreuse &&
- 		    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
- 			tb->fastreuse = 0;
- 		if (tb->fastreuseport &&
- 		    (!sk->sk_reuseport || !uid_eq(tb->fastuid, uid)))
+ 		} else {
    		tb->fastreuseport = 0;
+ 		}
    }
  success:
    if (!inet_csk(sk)->icsk_bind_hash)
- 		inet_bind_hash(sk, tb, snum);
+ 		inet_bind_hash(sk, tb, port);
    WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
    ret = 0;
fail_unlock:
- 	spin_unlock(&head->lock);
- fail:
- 	local_bh_enable();
+ 	spin_unlock_bh(&head->lock);
    return ret;
  }
  EXPORT_SYMBOL_GPL(inet_csk_get_port);
@@@ -482,10 -476,6 +476,6 @@@ EXPORT_SYMBOL_GPL(inet_csk_route_child_
  #define AF_INET_FAMILY(fam) true
  #endif
- /* Only thing we need from tcp.h */
- extern int sysctl_tcp_synack_retries;
- 
- 
  /* Decide when to expire the request and when to resend SYN-ACK */
  static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
    			  const int max_retries,
@@@ -557,6 -547,7 +547,7 @@@ static void reqsk_timer_handler(unsigne
  {
    struct request_sock *req = (struct request_sock *)data;
    struct sock *sk_listener = req->rsk_listener;
+ 	struct net *net = sock_net(sk_listener);
    struct inet_connection_sock *icsk = inet_csk(sk_listener);
    struct request_sock_queue *queue = &icsk->icsk_accept_queue;
    int qlen, expire = 0, resend = 0;
@@@ -566,7 -557,7 +557,7 @@@
    if (sk_state_load(sk_listener) != TCP_LISTEN)
    	goto drop;
- 	max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+ 	max_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
    thresh = max_retries;
    /* Normally all the openreqs are young and become mature
     * (i.e. converted to established socket) for first timeout.
@@@ -737,6 -728,7 +728,7 @@@ int inet_csk_listen_start(struct sock *
  {
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct inet_sock *inet = inet_sk(sk);
+ 	int err = -EADDRINUSE;
reqsk_queue_alloc(&icsk->icsk_accept_queue);
@@@ -754,13 -746,14 +746,14 @@@
    	inet->inet_sport = htons(inet->inet_num);
sk_dst_reset(sk);
- 		sk->sk_prot->hash(sk);
+ 		err = sk->sk_prot->hash(sk);
- 		return 0;
+ 		if (likely(!err))
+ 			return 0;
    }
sk->sk_state = TCP_CLOSE;
- 	return -EADDRINUSE;
+ 	return err;
  }
  EXPORT_SYMBOL_GPL(inet_csk_listen_start);
@@@ -789,16 -782,14 +782,16 @@@ static void inet_child_forget(struct so
    reqsk_put(req);
  }
-void inet_csk_reqsk_queue_add(struct sock *sk, struct request_sock *req,
 -			      struct sock *child)
 +struct sock *inet_csk_reqsk_queue_add(struct sock *sk,
 +				      struct request_sock *req,
 +				      struct sock *child)
  {
    struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
spin_lock(&queue->rskq_lock);
    if (unlikely(sk->sk_state != TCP_LISTEN)) {
    	inet_child_forget(sk, req, child);
 +		child = NULL;
    } else {
    	req->sk = child;
    	req->dl_next = NULL;
@@@ -810,7 -801,6 +803,7 @@@
    	sk_acceptq_added(sk);
    }
    spin_unlock(&queue->rskq_lock);
 +	return child;
  }
  EXPORT_SYMBOL(inet_csk_reqsk_queue_add);
@@@ -820,8 -810,11 +813,8 @@@ struct sock *inet_csk_complete_hashdanc
    if (own_req) {
    	inet_csk_reqsk_queue_drop(sk, req);
    	reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
 -		inet_csk_reqsk_queue_add(sk, req, child);
 -		/* Warning: caller must not call reqsk_put(req);
 -		 * child stole last reference on it.
 -		 */
 -		return child;
 +		if (inet_csk_reqsk_queue_add(sk, req, child))
 +			return child;
    }
    /* Too bad, another child took ownership of the request, undo. */
    bh_unlock_sock(child);
diff --combined net/ipv4/ip_gre.c
index 41ba68d,12071e2..202437d
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@@ -238,7 -238,7 +238,7 @@@ static int parse_gre_header(struct sk_b
    			return -EINVAL;
    	}
    }
- 	return iptunnel_pull_header(skb, hdr_len, tpi->proto);
+ 	return iptunnel_pull_header(skb, hdr_len, tpi->proto, false);
  }
static void ipgre_err(struct sk_buff *skb, u32 info,
@@@ -440,6 -440,17 +440,17 @@@ drop
    return 0;
  }
+ static __sum16 gre_checksum(struct sk_buff *skb)
+ {
+ 	__wsum csum;
+ 
+ 	if (skb->ip_summed == CHECKSUM_PARTIAL)
+ 		csum = lco_csum(skb);
+ 	else
+ 		csum = skb_checksum(skb, 0, skb->len, 0);
+ 	return csum_fold(csum);
+ }
+ 
  static void build_header(struct sk_buff *skb, int hdr_len, __be16 flags,
    		 __be16 proto, __be32 key, __be32 seq)
  {
@@@ -467,8 -478,7 +478,7 @@@
    	    !(skb_shinfo(skb)->gso_type &
    	      (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) {
    		*ptr = 0;
- 			*(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0,
- 								 skb->len, 0));
+ 			*(__sum16 *)ptr = gre_checksum(skb);
    	}
    }
  }
@@@ -493,8 -503,7 +503,7 @@@ static void __gre_xmit(struct sk_buff *
  static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
    				   bool csum)
  {
- 	return iptunnel_handle_offloads(skb, csum,
- 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+ 	return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
  }
static struct rtable *gre_get_rt(struct sk_buff *skb,
@@@ -531,9 -540,16 +540,16 @@@ static void gre_fb_xmit(struct sk_buff 
    	goto err_free_skb;
key = &tun_info->key;
- 	rt = gre_get_rt(skb, dev, &fl, key);
- 	if (IS_ERR(rt))
- 		goto err_free_skb;
+ 	rt = !skb->mark ? dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr) :
+ 			 NULL;
+ 	if (!rt) {
+ 		rt = gre_get_rt(skb, dev, &fl, key);
+ 		if (IS_ERR(rt))
+ 				goto err_free_skb;
+ 		if (!skb->mark)
+ 			dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
+ 					  fl.saddr);
+ 	}
tunnel_hlen = ip_gre_calc_hlen(key->tun_flags);
@@@ -1054,9 -1070,8 +1070,9 @@@ static const struct net_device_ops gre_
  static void ipgre_tap_setup(struct net_device *dev)
  {
    ether_setup(dev);
 -	dev->netdev_ops		= &gre_tap_netdev_ops;
 -	dev->priv_flags 	|= IFF_LIVE_ADDR_CHANGE;
 +	dev->netdev_ops	= &gre_tap_netdev_ops;
 +	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 +	dev->priv_flags	|= IFF_LIVE_ADDR_CHANGE;
    ip_tunnel_setup(dev, gre_tap_net_id);
  }
@@@ -1241,14 -1256,6 +1257,14 @@@ struct net_device *gretap_fb_dev_create
    err = ipgre_newlink(net, dev, tb, NULL);
    if (err < 0)
    	goto out;
 +
 +	/* openvswitch users expect packet sizes to be unrestricted,
 +	 * so set the largest MTU we can.
 +	 */
 +	err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
 +	if (err)
 +		goto out;
 +
    return dev;
  out:
    free_netdev(dev);
diff --combined net/ipv4/ip_sockglue.c
index a501242,3f1befc..035ad64
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@@ -249,8 -249,6 +249,8 @@@ int ip_cmsg_send(struct net *net, struc
    	switch (cmsg->cmsg_type) {
    	case IP_RETOPTS:
    		err = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
 +
 +			/* Our caller is responsible for freeing ipc->opt */
    		err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
    				     err < 40 ? err : 40);
    		if (err)
@@@ -573,6 -571,7 +573,7 @@@ static int do_ip_setsockopt(struct soc
    		    int optname, char __user *optval, unsigned int optlen)
  {
    struct inet_sock *inet = inet_sk(sk);
+ 	struct net *net = sock_net(sk);
    int val = 0, err;
    bool needs_rtnl = setsockopt_needs_rtnl(optname);
@@@ -912,7 -911,7 +913,7 @@@
    	}
    	/* numsrc >= (1G-4) overflow in 32 bits */
    	if (msf->imsf_numsrc >= 0x3ffffffcU ||
- 		    msf->imsf_numsrc > sysctl_igmp_max_msf) {
+ 		    msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
    		kfree(msf);
    		err = -ENOBUFS;
    		break;
@@@ -1067,7 -1066,7 +1068,7 @@@
/* numsrc >= (4G-140)/128 overflow in 32 bits */
    	if (gsf->gf_numsrc >= 0x1ffffff ||
- 		    gsf->gf_numsrc > sysctl_igmp_max_msf) {
+ 		    gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
    		err = -ENOBUFS;
    		goto mc_msf_out;
    	}
@@@ -1342,10 -1341,13 +1343,13 @@@ static int do_ip_getsockopt(struct soc
    	val = inet->tos;
    	break;
    case IP_TTL:
+ 	{
+ 		struct net *net = sock_net(sk);
    	val = (inet->uc_ttl == -1 ?
- 		       sysctl_ip_default_ttl :
+ 		       net->ipv4.sysctl_ip_default_ttl :
    	       inet->uc_ttl);
    	break;
+ 	}
    case IP_HDRINCL:
    	val = inet->hdrincl;
    	break;
diff --combined net/ipv4/ip_tunnel.c
index 89e8861,4569da7..dff8a05
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@@ -68,61 -68,6 +68,6 @@@ static unsigned int ip_tunnel_hash(__be
    		 IP_TNL_HASH_BITS);
  }
- static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
- 			     struct dst_entry *dst, __be32 saddr)
- {
- 	struct dst_entry *old_dst;
- 
- 	dst_clone(dst);
- 	old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
- 	dst_release(old_dst);
- 	idst->saddr = saddr;
- }
- 
- static noinline void tunnel_dst_set(struct ip_tunnel *t,
- 			   struct dst_entry *dst, __be32 saddr)
- {
- 	__tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
- }
- 
- static void tunnel_dst_reset(struct ip_tunnel *t)
- {
- 	tunnel_dst_set(t, NULL, 0);
- }
- 
- void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
- {
- 	int i;
- 
- 	for_each_possible_cpu(i)
- 		__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
- }
- EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
- 
- static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
- 					u32 cookie, __be32 *saddr)
- {
- 	struct ip_tunnel_dst *idst;
- 	struct dst_entry *dst;
- 
- 	rcu_read_lock();
- 	idst = raw_cpu_ptr(t->dst_cache);
- 	dst = rcu_dereference(idst->dst);
- 	if (dst && !atomic_inc_not_zero(&dst->__refcnt))
- 		dst = NULL;
- 	if (dst) {
- 		if (!dst->obsolete || dst->ops->check(dst, cookie)) {
- 			*saddr = idst->saddr;
- 		} else {
- 			tunnel_dst_reset(t);
- 			dst_release(dst);
- 			dst = NULL;
- 		}
- 	}
- 	rcu_read_unlock();
- 	return (struct rtable *)dst;
- }
- 
  static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
    			__be16 flags, __be32 key)
  {
@@@ -381,7 -326,8 +326,8 @@@ static int ip_tunnel_bind_dev(struct ne
if (!IS_ERR(rt)) {
    		tdev = rt->dst.dev;
- 			tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
+ 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+ 					  fl4.saddr);
    		ip_rt_put(rt);
    	}
    	if (dev->type != ARPHRD_ETHER)
@@@ -729,7 -675,8 +675,8 @@@ void ip_tunnel_xmit(struct sk_buff *skb
    if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
    	goto tx_error;
- 	rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
+ 	rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
+ 			 NULL;
if (!rt) {
    	rt = ip_route_output_key(tunnel->net, &fl4);
@@@ -739,7 -686,8 +686,8 @@@
    		goto tx_error;
    	}
    	if (connected)
- 			tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
+ 			dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
+ 					  fl4.saddr);
    }
if (rt->dst.dev == dev) {
@@@ -836,7 -784,7 +784,7 @@@ static void ip_tunnel_update(struct ip_
    	if (set_mtu)
    		dev->mtu = mtu;
    }
- 	ip_tunnel_dst_reset_all(t);
+ 	dst_cache_reset(&t->dst_cache);
    netdev_state_change(dev);
  }
@@@ -943,31 -891,17 +891,31 @@@ done
  }
  EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
-int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 +int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
  {
    struct ip_tunnel *tunnel = netdev_priv(dev);
    int t_hlen = tunnel->hlen + sizeof(struct iphdr);
 +	int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
-	if (new_mtu < 68 ||
 -	    new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
 +	if (new_mtu < 68)
    	return -EINVAL;
 +
 +	if (new_mtu > max_mtu) {
 +		if (strict)
 +			return -EINVAL;
 +
 +		new_mtu = max_mtu;
 +	}
 +
    dev->mtu = new_mtu;
    return 0;
  }
 +EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
 +
 +int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
 +{
 +	return __ip_tunnel_change_mtu(dev, new_mtu, true);
 +}
  EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
static void ip_tunnel_dev_free(struct net_device *dev)
@@@ -975,7 -909,7 +923,7 @@@
    struct ip_tunnel *tunnel = netdev_priv(dev);
gro_cells_destroy(&tunnel->gro_cells);
- 	free_percpu(tunnel->dst_cache);
+ 	dst_cache_destroy(&tunnel->dst_cache);
    free_percpu(dev->tstats);
    free_netdev(dev);
  }
@@@ -1169,15 -1103,15 +1117,15 @@@ int ip_tunnel_init(struct net_device *d
    if (!dev->tstats)
    	return -ENOMEM;
- 	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
- 	if (!tunnel->dst_cache) {
+ 	err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ 	if (err) {
    	free_percpu(dev->tstats);
- 		return -ENOMEM;
+ 		return err;
    }
err = gro_cells_init(&tunnel->gro_cells, dev);
    if (err) {
- 		free_percpu(tunnel->dst_cache);
+ 		dst_cache_destroy(&tunnel->dst_cache);
    	free_percpu(dev->tstats);
    	return err;
    }
@@@ -1207,7 -1141,7 +1155,7 @@@ void ip_tunnel_uninit(struct net_devic
    if (itn->fb_tunnel_dev != dev)
    	ip_tunnel_del(itn, netdev_priv(dev));
- 	ip_tunnel_dst_reset_all(tunnel);
+ 	dst_cache_reset(&tunnel->dst_cache);
  }
  EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
diff --combined net/ipv4/ping.c
index d3a2716,f6f93fc..76dce90
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@@ -145,10 -145,12 +145,12 @@@ fail
  }
  EXPORT_SYMBOL_GPL(ping_get_port);
- void ping_hash(struct sock *sk)
+ int ping_hash(struct sock *sk)
  {
    pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num);
    BUG(); /* "Please do not press this button again." */
+ 
+ 	return 0;
  }
void ping_unhash(struct sock *sk)
@@@ -746,10 -748,8 +748,10 @@@ static int ping_v4_sendmsg(struct sock
if (msg->msg_controllen) {
    	err = ip_cmsg_send(sock_net(sk), msg, &ipc, false);
 -		if (err)
 +		if (unlikely(err)) {
 +			kfree(ipc.opt);
    		return err;
 +		}
    	if (ipc.opt)
    		free = 1;
    }
diff --combined net/ipv4/raw.c
index 7113bae,d635251..8d22de7
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@@ -93,7 -93,7 +93,7 @@@ static struct raw_hashinfo raw_v4_hashi
    .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock),
  };
- void raw_hash_sk(struct sock *sk)
+ int raw_hash_sk(struct sock *sk)
  {
    struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
    struct hlist_head *head;
@@@ -104,6 -104,8 +104,8 @@@
    sk_add_node(sk, head);
    sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
    write_unlock_bh(&h->lock);
+ 
+ 	return 0;
  }
  EXPORT_SYMBOL_GPL(raw_hash_sk);
@@@ -547,10 -549,8 +549,10 @@@ static int raw_sendmsg(struct sock *sk
if (msg->msg_controllen) {
    	err = ip_cmsg_send(net, msg, &ipc, false);
 -		if (err)
 +		if (unlikely(err)) {
 +			kfree(ipc.opt);
    		goto out;
 +		}
    	if (ipc.opt)
    		free = 1;
    }
diff --combined net/ipv4/tcp.c
index 483ffdf,f93150d..f9faadb
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@@ -282,8 -282,6 +282,6 @@@
  #include <asm/unaligned.h>
  #include <net/busy_poll.h>
- int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
- 
  int sysctl_tcp_min_tso_segs __read_mostly = 2;
int sysctl_tcp_autocorking __read_mostly = 1;
@@@ -406,7 -404,7 +404,7 @@@ void tcp_init_sock(struct sock *sk
    tp->mss_cache = TCP_MSS_DEFAULT;
    u64_stats_init(&tp->syncp);
- 	tp->reordering = sysctl_tcp_reordering;
+ 	tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
    tcp_enable_early_retrans(tp);
    tcp_assign_congestion_control(sk);
@@@ -940,7 -938,7 +938,7 @@@ new_segment
i = skb_shinfo(skb)->nr_frags;
    	can_coalesce = skb_can_coalesce(skb, i, page, offset);
 -		if (!can_coalesce && i >= MAX_SKB_FRAGS) {
 +		if (!can_coalesce && i >= sysctl_max_skb_frags) {
    		tcp_mark_push(tp, skb);
    		goto new_segment;
    	}
@@@ -1213,7 -1211,7 +1211,7 @@@ new_segment
if (!skb_can_coalesce(skb, i, pfrag->page,
    				      pfrag->offset)) {
 -				if (i == MAX_SKB_FRAGS || !sg) {
 +				if (i == sysctl_max_skb_frags || !sg) {
    				tcp_mark_push(tp, skb);
    				goto new_segment;
    			}
@@@ -1466,8 -1464,10 +1464,10 @@@ static struct sk_buff *tcp_recv_skb(str
while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
    	offset = seq - TCP_SKB_CB(skb)->seq;
- 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ 		if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ 			pr_err_once("%s: found a SYN, please report !\n", __func__);
    		offset--;
+ 		}
    	if (offset < skb->len || (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)) {
    		*off = offset;
    		return skb;
@@@ -1657,8 -1657,10 +1657,10 @@@ int tcp_recvmsg(struct sock *sk, struc
    			break;
offset = *seq - TCP_SKB_CB(skb)->seq;
- 			if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
+ 			if (unlikely(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) {
+ 				pr_err_once("%s: found a SYN, please report !\n", __func__);
    			offset--;
+ 			}
    		if (offset < skb->len)
    			goto found_ok_skb;
    		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@@ -2326,6 -2328,7 +2328,7 @@@ static int do_tcp_setsockopt(struct soc
  {
    struct tcp_sock *tp = tcp_sk(sk);
    struct inet_connection_sock *icsk = inet_csk(sk);
+ 	struct net *net = sock_net(sk);
    int val;
    int err = 0;
@@@ -2522,7 -2525,7 +2525,7 @@@
    case TCP_LINGER2:
    	if (val < 0)
    		tp->linger2 = -1;
- 		else if (val > sysctl_tcp_fin_timeout / HZ)
+ 		else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
    		tp->linger2 = 0;
    	else
    		tp->linger2 = val * HZ;
@@@ -2639,6 -2642,7 +2642,7 @@@ void tcp_get_info(struct sock *sk, stru
    const struct inet_connection_sock *icsk = inet_csk(sk);
    u32 now = tcp_time_stamp;
    unsigned int start;
+ 	int notsent_bytes;
    u64 rate64;
    u32 rate;
@@@ -2719,6 -2723,11 +2723,11 @@@
    } while (u64_stats_fetch_retry_irq(&tp->syncp, start));
    info->tcpi_segs_out = tp->segs_out;
    info->tcpi_segs_in = tp->segs_in;
+ 
+ 	notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt);
+ 	info->tcpi_notsent_bytes = max(0, notsent_bytes);
+ 
+ 	info->tcpi_min_rtt = tcp_min_rtt(tp);
  }
  EXPORT_SYMBOL_GPL(tcp_get_info);
@@@ -2727,6 -2736,7 +2736,7 @@@ static int do_tcp_getsockopt(struct soc
  {
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);
+ 	struct net *net = sock_net(sk);
    int val, len;
if (get_user(len, optlen))
@@@ -2761,12 -2771,12 +2771,12 @@@
    	val = keepalive_probes(tp);
    	break;
    case TCP_SYNCNT:
- 		val = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
+ 		val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
    	break;
    case TCP_LINGER2:
    	val = tp->linger2;
    	if (val >= 0)
- 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
+ 			val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
    	break;
    case TCP_DEFER_ACCEPT:
    	val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
@@@ -2950,7 -2960,7 +2960,7 @@@ static void __tcp_alloc_md5sig_pool(voi
    		struct crypto_hash *hash;
hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
 -			if (IS_ERR_OR_NULL(hash))
 +			if (IS_ERR(hash))
    			return;
    		per_cpu(tcp_md5sig_pool, cpu).md5_desc.tfm = hash;
    	}
diff --combined net/ipv4/tcp_input.c
index 3b2c8e9,5ee6fe0..e6e65f7
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -80,9 -80,7 +80,7 @@@ int sysctl_tcp_timestamps __read_mostl
  int sysctl_tcp_window_scaling __read_mostly = 1;
  int sysctl_tcp_sack __read_mostly = 1;
  int sysctl_tcp_fack __read_mostly = 1;
- int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
  int sysctl_tcp_max_reordering __read_mostly = 300;
- EXPORT_SYMBOL(sysctl_tcp_reordering);
  int sysctl_tcp_dsack __read_mostly = 1;
  int sysctl_tcp_app_win __read_mostly = 31;
  int sysctl_tcp_adv_win_scale __read_mostly = 1;
@@@ -126,6 -124,10 +124,10 @@@ int sysctl_tcp_invalid_ratelimit __read
  #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
  #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
+ #define REXMIT_NONE	0 /* no loss recovery to do */
+ #define REXMIT_LOST	1 /* retransmit packets marked lost */
+ #define REXMIT_NEW	2 /* FRTO-style transmit of unsent/new packets */
+ 
  /* Adapt the MSS value used to make delayed ack decision to the
   * real world.
   */
@@@ -1210,6 -1212,7 +1212,7 @@@ static u8 tcp_sacktag_one(struct sock *
    	sacked |= TCPCB_SACKED_ACKED;
    	state->flag |= FLAG_DATA_SACKED;
    	tp->sacked_out += pcount;
+ 		tp->delivered += pcount;  /* Out-of-order packets delivered */
fack_count += pcount;
@@@ -1821,8 -1824,12 +1824,12 @@@ static void tcp_check_reno_reordering(s
  static void tcp_add_reno_sack(struct sock *sk)
  {
    struct tcp_sock *tp = tcp_sk(sk);
+ 	u32 prior_sacked = tp->sacked_out;
+ 
    tp->sacked_out++;
    tcp_check_reno_reordering(sk, 0);
+ 	if (tp->sacked_out > prior_sacked)
+ 		tp->delivered++; /* Some out-of-order packet is delivered */
    tcp_verify_left_out(tp);
  }
@@@ -1834,6 -1841,7 +1841,7 @@@ static void tcp_remove_reno_sacks(struc
if (acked > 0) {
    	/* One ACK acked hole. The rest eat duplicate ACKs. */
+ 		tp->delivered += max_t(int, acked - tp->sacked_out, 1);
    	if (acked - 1 >= tp->sacked_out)
    		tp->sacked_out = 0;
    	else
@@@ -1873,6 -1881,7 +1881,7 @@@ void tcp_enter_loss(struct sock *sk
  {
    const struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);
+ 	struct net *net = sock_net(sk);
    struct sk_buff *skb;
    bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
    bool is_reneg;			/* is receiver reneging on SACKs? */
@@@ -1923,9 -1932,9 +1932,9 @@@
     * suggests that the degree of reordering is over-estimated.
     */
    if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- 	    tp->sacked_out >= sysctl_tcp_reordering)
+ 	    tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
    	tp->reordering = min_t(unsigned int, tp->reordering,
- 				       sysctl_tcp_reordering);
+ 				       net->ipv4.sysctl_tcp_reordering);
    tcp_set_ca_state(sk, TCP_CA_Loss);
    tp->high_seq = tp->snd_nxt;
    tcp_ecn_queue_cwr(tp);
@@@ -2109,6 -2118,7 +2118,7 @@@ static bool tcp_time_to_recover(struct 
  {
    struct tcp_sock *tp = tcp_sk(sk);
    __u32 packets_out;
+ 	int tcp_reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
/* Trick#1: The loss is proven. */
    if (tp->lost_out)
@@@ -2123,7 -2133,7 +2133,7 @@@
     */
    packets_out = tp->packets_out;
    if (packets_out <= tp->reordering &&
- 	    tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
+ 	    tp->sacked_out >= max_t(__u32, packets_out/2, tcp_reordering) &&
        !tcp_may_send_now(sk)) {
    	/* We have nothing to send. This connection is limited
    	 * either by receiver window or by application.
@@@ -2467,14 -2477,12 +2477,12 @@@ static void tcp_init_cwnd_reduction(str
    tcp_ecn_queue_cwr(tp);
  }
- static void tcp_cwnd_reduction(struct sock *sk, const int prior_unsacked,
- 			       int fast_rexmit, int flag)
+ static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+ 			       int flag)
  {
    struct tcp_sock *tp = tcp_sk(sk);
    int sndcnt = 0;
    int delta = tp->snd_ssthresh - tcp_packets_in_flight(tp);
- 	int newly_acked_sacked = prior_unsacked -
- 				 (tp->packets_out - tp->sacked_out);
if (newly_acked_sacked <= 0 || WARN_ON_ONCE(!tp->prior_cwnd))
    	return;
@@@ -2492,7 -2500,8 +2500,8 @@@
    } else {
    	sndcnt = min(delta, newly_acked_sacked);
    }
- 	sndcnt = max(sndcnt, (fast_rexmit ? 1 : 0));
+ 	/* Force a fast retransmit upon entering fast recovery */
+ 	sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
    tp->snd_cwnd = tcp_packets_in_flight(tp) + sndcnt;
  }
@@@ -2537,7 -2546,7 +2546,7 @@@ static void tcp_try_keep_open(struct so
    }
  }
- static void tcp_try_to_open(struct sock *sk, int flag, const int prior_unsacked)
+ static void tcp_try_to_open(struct sock *sk, int flag)
  {
    struct tcp_sock *tp = tcp_sk(sk);
@@@ -2551,8 -2560,6 +2560,6 @@@
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
    	tcp_try_keep_open(sk);
- 	} else {
- 		tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
    }
  }
@@@ -2662,7 -2669,8 +2669,8 @@@ static void tcp_enter_recovery(struct s
  /* Process an ACK in CA_Loss state. Move to CA_Open if lost data are
   * recovered or spurious. Otherwise retransmits more on partial ACKs.
   */
- static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack)
+ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack,
+ 			     int *rexmit)
  {
    struct tcp_sock *tp = tcp_sk(sk);
    bool recovered = !before(tp->snd_una, tp->high_seq);
@@@ -2684,10 -2692,15 +2692,15 @@@
    			tp->frto = 0; /* Step 3.a. loss was real */
    	} else if (flag & FLAG_SND_UNA_ADVANCED && !recovered) {
    		tp->high_seq = tp->snd_nxt;
- 			__tcp_push_pending_frames(sk, tcp_current_mss(sk),
- 						  TCP_NAGLE_OFF);
- 			if (after(tp->snd_nxt, tp->high_seq))
- 				return; /* Step 2.b */
+ 			/* Step 2.b. Try send new data (but deferred until cwnd
+ 			 * is updated in tcp_ack()). Otherwise fall back to
+ 			 * the conventional recovery.
+ 			 */
+ 			if (tcp_send_head(sk) &&
+ 			    after(tcp_wnd_end(tp), tp->snd_nxt)) {
+ 				*rexmit = REXMIT_NEW;
+ 				return;
+ 			}
    		tp->frto = 0;
    	}
    }
@@@ -2706,12 -2719,11 +2719,11 @@@
    	else if (flag & FLAG_SND_UNA_ADVANCED)
    		tcp_reset_reno_sack(tp);
    }
- 	tcp_xmit_retransmit_queue(sk);
+ 	*rexmit = REXMIT_LOST;
  }
/* Undo during fast recovery after partial ACK. */
- static bool tcp_try_undo_partial(struct sock *sk, const int acked,
- 				 const int prior_unsacked, int flag)
+ static bool tcp_try_undo_partial(struct sock *sk, const int acked)
  {
    struct tcp_sock *tp = tcp_sk(sk);
@@@ -2726,10 -2738,8 +2738,8 @@@
    	 * can undo. Otherwise we clock out new packets but do not
    	 * mark more packets lost or retransmit more.
    	 */
- 		if (tp->retrans_out) {
- 			tcp_cwnd_reduction(sk, prior_unsacked, 0, flag);
+ 		if (tp->retrans_out)
    		return true;
- 		}
if (!tcp_any_retrans_done(sk))
    		tp->retrans_stamp = 0;
@@@ -2748,21 -2758,21 +2758,21 @@@
   * taking into account both packets sitting in receiver's buffer and
   * packets lost by network.
   *
-  * Besides that it does CWND reduction, when packet loss is detected
-  * and changes state of machine.
+  * Besides that it updates the congestion state when packet loss or ECN
+  * is detected. But it does not reduce the cwnd, it is done by the
+  * congestion control later.
   *
   * It does _not_ decide what to send, it is made in function
   * tcp_xmit_retransmit_queue().
   */
  static void tcp_fastretrans_alert(struct sock *sk, const int acked,
- 				  const int prior_unsacked,
- 				  bool is_dupack, int flag)
+ 				  bool is_dupack, int *ack_flag, int *rexmit)
  {
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);
+ 	int fast_rexmit = 0, flag = *ack_flag;
    bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
    			    (tcp_fackets_out(tp) > tp->reordering));
- 	int fast_rexmit = 0;
if (WARN_ON(!tp->packets_out && tp->sacked_out))
    	tp->sacked_out = 0;
@@@ -2809,8 -2819,10 +2819,10 @@@
/* Use RACK to detect loss */
    if (sysctl_tcp_recovery & TCP_RACK_LOST_RETRANS &&
- 	    tcp_rack_mark_lost(sk))
+ 	    tcp_rack_mark_lost(sk)) {
    	flag |= FLAG_LOST_RETRANS;
+ 		*ack_flag |= FLAG_LOST_RETRANS;
+ 	}
/* E. Process state. */
    switch (icsk->icsk_ca_state) {
@@@ -2819,7 -2831,7 +2831,7 @@@
    		if (tcp_is_reno(tp) && is_dupack)
    			tcp_add_reno_sack(sk);
    	} else {
- 			if (tcp_try_undo_partial(sk, acked, prior_unsacked, flag))
+ 			if (tcp_try_undo_partial(sk, acked))
    			return;
    		/* Partial ACK arrived. Force fast retransmit. */
    		do_lost = tcp_is_reno(tp) ||
@@@ -2831,7 -2843,7 +2843,7 @@@
    	}
    	break;
    case TCP_CA_Loss:
- 		tcp_process_loss(sk, flag, is_dupack);
+ 		tcp_process_loss(sk, flag, is_dupack, rexmit);
    	if (icsk->icsk_ca_state != TCP_CA_Open &&
    	    !(flag & FLAG_LOST_RETRANS))
    		return;
@@@ -2848,7 -2860,7 +2860,7 @@@
    		tcp_try_undo_dsack(sk);
if (!tcp_time_to_recover(sk, flag)) {
- 			tcp_try_to_open(sk, flag, prior_unsacked);
+ 			tcp_try_to_open(sk, flag);
    		return;
    	}
@@@ -2870,8 -2882,7 +2882,7 @@@
if (do_lost)
    	tcp_update_scoreboard(sk, fast_rexmit);
- 	tcp_cwnd_reduction(sk, prior_unsacked, fast_rexmit, flag);
- 	tcp_xmit_retransmit_queue(sk);
+ 	*rexmit = REXMIT_LOST;
  }
/* Kathleen Nichols' algorithm for tracking the minimum value of
@@@ -2896,10 -2907,7 +2907,10 @@@ static void tcp_update_rtt_min(struct s
  {
    const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ;
    struct rtt_meas *m = tcp_sk(sk)->rtt_min;
 -	struct rtt_meas rttm = { .rtt = (rtt_us ? : 1), .ts = now };
 +	struct rtt_meas rttm = {
 +		.rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1),
 +		.ts = now,
 +	};
    u32 elapsed;
/* Check if the new measurement updates the 1st, 2nd, or 3rd choices */
@@@ -3096,7 -3104,7 +3107,7 @@@ static void tcp_ack_tstamp(struct sock 
   * arrived at the other end.
   */
  static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
- 			       u32 prior_snd_una,
+ 			       u32 prior_snd_una, int *acked,
    		       struct tcp_sacktag_state *sack)
  {
    const struct inet_connection_sock *icsk = inet_csk(sk);
@@@ -3154,10 -3162,13 +3165,13 @@@
    			flag |= FLAG_ORIG_SACK_ACKED;
    	}
- 		if (sacked & TCPCB_SACKED_ACKED)
+ 		if (sacked & TCPCB_SACKED_ACKED) {
    		tp->sacked_out -= acked_pcount;
- 		else if (tcp_is_sack(tp) && !tcp_skb_spurious_retrans(tp, skb))
- 			tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+ 		} else if (tcp_is_sack(tp)) {
+ 			tp->delivered += acked_pcount;
+ 			if (!tcp_skb_spurious_retrans(tp, skb))
+ 				tcp_rack_advance(tp, &skb->skb_mstamp, sacked);
+ 		}
    	if (sacked & TCPCB_LOST)
    		tp->lost_out -= acked_pcount;
@@@ -3266,6 -3277,7 +3280,7 @@@
    	}
    }
  #endif
+ 	*acked = pkts_acked;
    return flag;
  }
@@@ -3299,21 -3311,36 +3314,36 @@@ static inline bool tcp_ack_is_dubious(c
  /* Decide wheather to run the increase function of congestion control. */
  static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
  {
- 	if (tcp_in_cwnd_reduction(sk))
- 		return false;
- 
    /* If reordering is high then always grow cwnd whenever data is
     * delivered regardless of its ordering. Otherwise stay conservative
     * and only grow cwnd on in-order delivery (RFC5681). A stretched ACK w/
     * new SACK or ECE mark may first advance cwnd here and later reduce
     * cwnd in tcp_fastretrans_alert() based on more states.
     */
- 	if (tcp_sk(sk)->reordering > sysctl_tcp_reordering)
+ 	if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
    	return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
  }
+ /* The "ultimate" congestion control function that aims to replace the rigid
+  * cwnd increase and decrease control (tcp_cong_avoid,tcp_*cwnd_reduction).
+  * It's called toward the end of processing an ACK with precise rate
+  * information. All transmission or retransmission are delayed afterwards.
+  */
+ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked,
+ 			     int flag)
+ {
+ 	if (tcp_in_cwnd_reduction(sk)) {
+ 		/* Reduce cwnd if state mandates */
+ 		tcp_cwnd_reduction(sk, acked_sacked, flag);
+ 	} else if (tcp_may_raise_cwnd(sk, flag)) {
+ 		/* Advance cwnd if state allows */
+ 		tcp_cong_avoid(sk, ack, acked_sacked);
+ 	}
+ 	tcp_update_pacing_rate(sk);
+ }
+ 
  /* Check that window update is acceptable.
   * The function assumes that snd_una<=ack<=snd_next.
   */
@@@ -3509,6 -3536,27 +3539,27 @@@ static inline void tcp_in_ack_event(str
    	icsk->icsk_ca_ops->in_ack_event(sk, flags);
  }
+ /* Congestion control has updated the cwnd already. So if we're in
+  * loss recovery then now we do any new sends (for FRTO) or
+  * retransmits (for CA_Loss or CA_recovery) that make sense.
+  */
+ static void tcp_xmit_recovery(struct sock *sk, int rexmit)
+ {
+ 	struct tcp_sock *tp = tcp_sk(sk);
+ 
+ 	if (rexmit == REXMIT_NONE)
+ 		return;
+ 
+ 	if (unlikely(rexmit == 2)) {
+ 		__tcp_push_pending_frames(sk, tcp_current_mss(sk),
+ 					  TCP_NAGLE_OFF);
+ 		if (after(tp->snd_nxt, tp->high_seq))
+ 			return;
+ 		tp->frto = 0;
+ 	}
+ 	tcp_xmit_retransmit_queue(sk);
+ }
+ 
  /* This routine deals with incoming acks, but not outgoing ones. */
  static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
  {
@@@ -3521,8 -3569,9 +3572,9 @@@
    bool is_dupack = false;
    u32 prior_fackets;
    int prior_packets = tp->packets_out;
- 	const int prior_unsacked = tp->packets_out - tp->sacked_out;
+ 	u32 prior_delivered = tp->delivered;
    int acked = 0; /* Number of packets newly acked */
+ 	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
sack_state.first_sackt.v64 = 0;
@@@ -3611,23 -3660,16 +3663,16 @@@
    	goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- 	acked = tp->packets_out;
- 	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una,
+ 	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
    			    &sack_state);
- 	acked -= tp->packets_out;
if (tcp_ack_is_dubious(sk, flag)) {
    	is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
- 		tcp_fastretrans_alert(sk, acked, prior_unsacked,
- 				      is_dupack, flag);
+ 		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
    }
    if (tp->tlp_high_seq)
    	tcp_process_tlp_ack(sk, ack, flag);
- 	/* Advance cwnd if state allows */
- 	if (tcp_may_raise_cwnd(sk, flag))
- 		tcp_cong_avoid(sk, ack, acked);
- 
    if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) {
    	struct dst_entry *dst = __sk_dst_get(sk);
    	if (dst)
@@@ -3636,14 -3678,14 +3681,14 @@@
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
    	tcp_schedule_loss_probe(sk);
- 	tcp_update_pacing_rate(sk);
+ 	tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag);
+ 	tcp_xmit_recovery(sk, rexmit);
    return 1;
no_queue:
    /* If data was DSACKed, see if we can undo a cwnd reduction. */
    if (flag & FLAG_DSACKING_ACK)
- 		tcp_fastretrans_alert(sk, acked, prior_unsacked,
- 				      is_dupack, flag);
+ 		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
    /* If this ack opens up a zero window, clear backoff.  It was
     * being used to time the probes, and is probably far higher than
     * it needs to be for normal retransmission.
@@@ -3666,8 -3708,8 +3711,8 @@@ old_ack
    if (TCP_SKB_CB(skb)->sacked) {
    	flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
    					&sack_state);
- 		tcp_fastretrans_alert(sk, acked, prior_unsacked,
- 				      is_dupack, flag);
+ 		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
+ 		tcp_xmit_recovery(sk, rexmit);
    }
SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt);
@@@ -3998,7 -4040,7 +4043,7 @@@ void tcp_reset(struct sock *sk
   *
   *	If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
   */
- static void tcp_fin(struct sock *sk)
+ void tcp_fin(struct sock *sk)
  {
    struct tcp_sock *tp = tcp_sk(sk);
@@@ -5512,6 -5554,9 +5557,9 @@@ static bool tcp_rcv_fastopen_synack(str
    tp->syn_data_acked = tp->syn_data;
    if (tp->syn_data_acked)
    	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE);
+ 
+ 	tcp_fastopen_add_skb(sk, synack);
+ 
    return false;
  }
@@@ -6118,9 -6163,10 +6166,10 @@@ static bool tcp_syn_flood_action(const 
    struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
    const char *msg = "Dropping request";
    bool want_cookie = false;
+ 	struct net *net = sock_net(sk);
#ifdef CONFIG_SYN_COOKIES
- 	if (sysctl_tcp_syncookies) {
+ 	if (net->ipv4.sysctl_tcp_syncookies) {
    	msg = "Sending cookies";
    	want_cookie = true;
    	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
@@@ -6129,7 -6175,7 +6178,7 @@@
    	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
if (!queue->synflood_warned &&
- 	    sysctl_tcp_syncookies != 2 &&
+ 	    net->ipv4.sysctl_tcp_syncookies != 2 &&
        xchg(&queue->synflood_warned, 1) == 0)
    	pr_info("%s: Possible SYN flooding on port %d. %s.  Check SNMP counters.\n",
    		proto, ntohs(tcp_hdr(skb)->dest), msg);
@@@ -6162,6 -6208,7 +6211,7 @@@ int tcp_conn_request(struct request_soc
    __u32 isn = TCP_SKB_CB(skb)->tcp_tw_isn;
    struct tcp_options_received tmp_opt;
    struct tcp_sock *tp = tcp_sk(sk);
+ 	struct net *net = sock_net(sk);
    struct sock *fastopen_sk = NULL;
    struct dst_entry *dst = NULL;
    struct request_sock *req;
@@@ -6172,7 -6219,7 +6222,7 @@@
     * limitations, they conserve resources and peer is
     * evidently real one.
     */
- 	if ((sysctl_tcp_syncookies == 2 ||
+ 	if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
         inet_csk_reqsk_queue_is_full(sk)) && !isn) {
    	want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name);
    	if (!want_cookie)
@@@ -6238,7 -6285,7 +6288,7 @@@
    		}
    	}
    	/* Kill the following clause, if you dislike this way. */
- 		else if (!sysctl_tcp_syncookies &&
+ 		else if (!net->ipv4.sysctl_tcp_syncookies &&
    		 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
    		  (sysctl_max_syn_backlog >> 2)) &&
    		 !tcp_peer_is_proven(req, dst, false,
diff --combined net/ipv4/tcp_ipv4.c
index 487ac67,3f872a6..4c8d58d
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@@ -311,7 -311,7 +311,7 @@@ static void do_redirect(struct sk_buff
/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
 -void tcp_req_err(struct sock *sk, u32 seq)
 +void tcp_req_err(struct sock *sk, u32 seq, bool abort)
  {
    struct request_sock *req = inet_reqsk(sk);
    struct net *net = sock_net(sk);
@@@ -323,7 -323,7 +323,7 @@@
if (seq != tcp_rsk(req)->snt_isn) {
    	NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
 -	} else {
 +	} else if (abort) {
    	/*
    	 * Still in SYN_RECV, just remove it silently.
    	 * There is no good way to pass the error to the newly
@@@ -383,12 -383,7 +383,12 @@@ void tcp_v4_err(struct sk_buff *icmp_sk
    }
    seq = ntohl(th->seq);
    if (sk->sk_state == TCP_NEW_SYN_RECV)
 -		return tcp_req_err(sk, seq);
 +		return tcp_req_err(sk, seq,
 +				  type == ICMP_PARAMETERPROB ||
 +				  type == ICMP_TIME_EXCEEDED ||
 +				  (type == ICMP_DEST_UNREACH &&
 +				   (code == ICMP_NET_UNREACH ||
 +				    code == ICMP_HOST_UNREACH)));
bh_lock_sock(sk);
    /* If too many ICMPs get dropped on busy
@@@ -642,8 -637,8 +642,8 @@@ static void tcp_v4_send_reset(const str
    	 * Incoming packet is checked with md5 hash with finding key,
    	 * no RST generated if md5 hash doesn't match.
    	 */
- 		sk1 = __inet_lookup_listener(net,
- 					     &tcp_hashinfo, ip_hdr(skb)->saddr,
+ 		sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
+ 					     ip_hdr(skb)->saddr,
    				     th->source, ip_hdr(skb)->daddr,
    				     ntohs(th->source), inet_iif(skb));
    	/* don't send rst if it can't find key */
@@@ -865,7 -860,6 +865,6 @@@ static void tcp_v4_reqsk_destructor(str
    kfree(inet_rsk(req)->opt);
  }
- 
  #ifdef CONFIG_TCP_MD5SIG
  /*
   * RFC2385 MD5 checksumming requires a mapping of
@@@ -1587,7 -1581,8 +1586,8 @@@ int tcp_v4_rcv(struct sk_buff *skb
    TCP_SKB_CB(skb)->sacked	 = 0;
lookup:
- 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
+ 	sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
+ 			       th->dest);
    if (!sk)
    	goto no_tcp_socket;
@@@ -1597,30 -1592,28 +1597,30 @@@ process
if (sk->sk_state == TCP_NEW_SYN_RECV) {
    	struct request_sock *req = inet_reqsk(sk);
 -		struct sock *nsk = NULL;
 +		struct sock *nsk;
sk = req->rsk_listener;
 -		if (tcp_v4_inbound_md5_hash(sk, skb))
 -			goto discard_and_relse;
 -		if (likely(sk->sk_state == TCP_LISTEN)) {
 -			nsk = tcp_check_req(sk, skb, req, false);
 -		} else {
 +		if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
 +			reqsk_put(req);
 +			goto discard_it;
 +		}
 +		if (unlikely(sk->sk_state != TCP_LISTEN)) {
    		inet_csk_reqsk_queue_drop_and_put(sk, req);
    		goto lookup;
    	}
 +		sock_hold(sk);
 +		nsk = tcp_check_req(sk, skb, req, false);
    	if (!nsk) {
    		reqsk_put(req);
 -			goto discard_it;
 +			goto discard_and_relse;
    	}
    	if (nsk == sk) {
 -			sock_hold(sk);
    		reqsk_put(req);
    	} else if (tcp_child_process(sk, nsk, skb)) {
    		tcp_v4_send_reset(nsk, skb);
 -			goto discard_it;
 +			goto discard_and_relse;
    	} else {
 +			sock_put(sk);
    		return 0;
    	}
    }
@@@ -1703,7 -1696,8 +1703,8 @@@ do_time_wait
    switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
    case TCP_TW_SYN: {
    	struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
- 							&tcp_hashinfo,
+ 							&tcp_hashinfo, skb,
+ 							__tcp_hdrlen(th),
    						iph->saddr, th->source,
    						iph->daddr, th->dest,
    						inet_iif(skb));
@@@ -2395,6 -2389,16 +2396,16 @@@ static int __net_init tcp_sk_init(struc
    net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
    net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
+ 	net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
+ 	net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
+ 	net->ipv4.sysctl_tcp_syncookies = 1;
+ 	net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
+ 	net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
+ 	net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
+ 	net->ipv4.sysctl_tcp_orphan_retries = 0;
+ 	net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+ 	net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
+ 
    return 0;
  fail:
    tcp_sk_exit(net);
diff --combined net/ipv4/udp.c
index 95d2f19,9fc4e9c..836abe5
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@@ -356,8 -356,8 +356,8 @@@ EXPORT_SYMBOL(udp_lib_get_port)
   * match_wildcard == false: addresses must be exactly the same, i.e.
   *                          0.0.0.0 only equals to 0.0.0.0
   */
- static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
- 				bool match_wildcard)
+ int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2,
+ 			 bool match_wildcard)
  {
    struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@@ -848,32 -848,20 +848,20 @@@ void udp_set_csum(bool nocheck, struct 
  {
    struct udphdr *uh = udp_hdr(skb);
- 	if (nocheck)
+ 	if (nocheck) {
    	uh->check = 0;
- 	else if (skb_is_gso(skb))
+ 	} else if (skb_is_gso(skb)) {
    	uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- 	else if (skb_dst(skb) && skb_dst(skb)->dev &&
- 		 (skb_dst(skb)->dev->features &
- 		  (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM))) {
- 
- 		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
- 
+ 	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ 		uh->check = 0;
+ 		uh->check = udp_v4_check(len, saddr, daddr, lco_csum(skb));
+ 		if (uh->check == 0)
+ 			uh->check = CSUM_MANGLED_0;
+ 	} else {
    	skb->ip_summed = CHECKSUM_PARTIAL;
    	skb->csum_start = skb_transport_header(skb) - skb->head;
    	skb->csum_offset = offsetof(struct udphdr, check);
    	uh->check = ~udp_v4_check(len, saddr, daddr, 0);
- 	} else {
- 		__wsum csum;
- 
- 		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL);
- 
- 		uh->check = 0;
- 		csum = skb_checksum(skb, 0, len, 0);
- 		uh->check = udp_v4_check(len, saddr, daddr, csum);
- 		if (uh->check == 0)
- 			uh->check = CSUM_MANGLED_0;
- 
- 		skb->ip_summed = CHECKSUM_UNNECESSARY;
    }
  }
  EXPORT_SYMBOL(udp_set_csum);
@@@ -1048,10 -1036,8 +1036,10 @@@ int udp_sendmsg(struct sock *sk, struc
    if (msg->msg_controllen) {
    	err = ip_cmsg_send(sock_net(sk), msg, &ipc,
    			   sk->sk_family == AF_INET6);
 -		if (err)
 +		if (unlikely(err)) {
 +			kfree(ipc.opt);
    		return err;
 +		}
    	if (ipc.opt)
    		free = 1;
    	connected = 0;
diff --combined net/ipv6/addrconf.c
index bdd7eac,ac0ba9e..4751f89
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@@ -583,7 -583,7 +583,7 @@@ static int inet6_netconf_get_devconf(st
    if (err < 0)
    	goto errout;
-	err = EINVAL;
 +	err = -EINVAL;
    if (!tb[NETCONFA_IFINDEX])
    	goto errout;
@@@ -3538,7 -3538,6 +3538,7 @@@ static void addrconf_dad_begin(struct i
  {
    struct inet6_dev *idev = ifp->idev;
    struct net_device *dev = idev->dev;
 +	bool notify = false;
addrconf_join_solict(dev, &ifp->addr);
@@@ -3584,7 -3583,7 +3584,7 @@@
    		/* Because optimistic nodes can use this address,
    		 * notify listeners. If DAD fails, RTM_DELADDR is sent.
    		 */
 -			ipv6_ifa_notify(RTM_NEWADDR, ifp);
 +			notify = true;
    	}
    }
@@@ -3592,8 -3591,6 +3592,8 @@@
  out:
    spin_unlock(&ifp->lock);
    read_unlock_bh(&idev->lock);
 +	if (notify)
 +		ipv6_ifa_notify(RTM_NEWADDR, ifp);
  }
static void addrconf_dad_start(struct inet6_ifaddr *ifp)
@@@ -4714,6 -4711,8 +4714,8 @@@ static inline void ipv6_store_devconf(s
    array[DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = cnf->ignore_routes_with_linkdown;
    /* we omit DEVCONF_STABLE_SECRET for now */
    array[DEVCONF_USE_OIF_ADDRS_ONLY] = cnf->use_oif_addrs_only;
+ 	array[DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = cnf->drop_unicast_in_l2_multicast;
+ 	array[DEVCONF_DROP_UNSOLICITED_NA] = cnf->drop_unsolicited_na;
  }
static inline size_t inet6_ifla6_size(void)
@@@ -5788,6 -5787,20 +5790,20 @@@ static struct addrconf_sysctl_tabl
    		.proc_handler	= addrconf_sysctl_ignore_routes_with_linkdown,
    	},
    	{
+ 			.procname	= "drop_unicast_in_l2_multicast",
+ 			.data		= &ipv6_devconf.drop_unicast_in_l2_multicast,
+ 			.maxlen		= sizeof(int),
+ 			.mode		= 0644,
+ 			.proc_handler	= proc_dointvec,
+ 		},
+ 		{
+ 			.procname	= "drop_unsolicited_na",
+ 			.data		= &ipv6_devconf.drop_unsolicited_na,
+ 			.maxlen		= sizeof(int),
+ 			.mode		= 0644,
+ 			.proc_handler	= proc_dointvec,
+ 		},
+ 		{
    		/* sentinel */
    	}
    },
diff --combined net/ipv6/ip6_gre.c
index a69aad1,a94e506..f7c9560
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@@ -360,7 -360,7 +360,7 @@@ static void ip6gre_tunnel_uninit(struc
    struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t);
- 	ip6_tnl_dst_reset(t);
+ 	dst_cache_reset(&t->dst_cache);
    dev_put(dev);
  }
@@@ -633,7 -633,7 +633,7 @@@ static netdev_tx_t ip6gre_xmit2(struct 
    }
if (!fl6->flowi6_mark)
- 		dst = ip6_tnl_dst_get(tunnel);
+ 		dst = dst_cache_get(&tunnel->dst_cache);
if (!dst) {
    	dst = ip6_route_output(net, NULL, fl6);
@@@ -702,7 -702,7 +702,7 @@@
    }
if (!fl6->flowi6_mark && ndst)
- 		ip6_tnl_dst_set(tunnel, ndst);
+ 		dst_cache_set_ip6(&tunnel->dst_cache, ndst, &fl6->saddr);
    skb_dst_set(skb, dst);
proto = NEXTHDR_GRE;
@@@ -1009,7 -1009,7 +1009,7 @@@ static int ip6gre_tnl_change(struct ip6
    t->parms.o_key = p->o_key;
    t->parms.i_flags = p->i_flags;
    t->parms.o_flags = p->o_flags;
- 	ip6_tnl_dst_reset(t);
+ 	dst_cache_reset(&t->dst_cache);
    ip6gre_tnl_link_config(t, set_mtu);
    return 0;
  }
@@@ -1219,7 -1219,7 +1219,7 @@@ static void ip6gre_dev_free(struct net_
  {
    struct ip6_tnl *t = netdev_priv(dev);
- 	ip6_tnl_dst_destroy(t);
+ 	dst_cache_destroy(&t->dst_cache);
    free_percpu(dev->tstats);
    free_netdev(dev);
  }
@@@ -1257,7 -1257,7 +1257,7 @@@ static int ip6gre_tunnel_init_common(st
    if (!dev->tstats)
    	return -ENOMEM;
- 	ret = ip6_tnl_dst_init(tunnel);
+ 	ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
    if (ret) {
    	free_percpu(dev->tstats);
    	dev->tstats = NULL;
@@@ -1512,7 -1512,6 +1512,7 @@@ static void ip6gre_tap_setup(struct net
    dev->destructor = ip6gre_dev_free;
dev->features |= NETIF_F_NETNS_LOCAL;
 +	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
  }
static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
diff --combined net/ipv6/tcp_ipv6.c
index 5c8c842,9977b6f..33f2820
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@@ -327,7 -327,6 +327,7 @@@ static void tcp_v6_err(struct sk_buff *
    struct tcp_sock *tp;
    __u32 seq, snd_una;
    struct sock *sk;
 +	bool fatal;
    int err;
sk = __inet6_lookup_established(net, &tcp_hashinfo,
@@@ -346,9 -345,8 +346,9 @@@
    	return;
    }
    seq = ntohl(th->seq);
 +	fatal = icmpv6_err_convert(type, code, &err);
    if (sk->sk_state == TCP_NEW_SYN_RECV)
 -		return tcp_req_err(sk, seq);
 +		return tcp_req_err(sk, seq, fatal);
bh_lock_sock(sk);
    if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
@@@ -402,6 -400,7 +402,6 @@@
    	goto out;
    }
-	icmpv6_err_convert(type, code, &err);
/* Might be for an request_sock */
    switch (sk->sk_state) {
@@@ -867,7 -866,8 +867,8 @@@ static void tcp_v6_send_reset(const str
    	 * no RST generated if md5 hash doesn't match.
    	 */
    	sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
- 					   &tcp_hashinfo, &ipv6h->saddr,
+ 					   &tcp_hashinfo, NULL, 0,
+ 					   &ipv6h->saddr,
    				   th->source, &ipv6h->daddr,
    				   ntohs(th->source), tcp_v6_iif(skb));
    	if (!sk1)
@@@ -1376,8 -1376,8 +1377,8 @@@ static int tcp_v6_rcv(struct sk_buff *s
    hdr = ipv6_hdr(skb);
lookup:
- 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest,
- 				inet6_iif(skb));
+ 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+ 				th->source, th->dest, inet6_iif(skb));
    if (!sk)
    	goto no_tcp_socket;
@@@ -1387,7 -1387,7 +1388,7 @@@ process
if (sk->sk_state == TCP_NEW_SYN_RECV) {
    	struct request_sock *req = inet_reqsk(sk);
 -		struct sock *nsk = NULL;
 +		struct sock *nsk;
sk = req->rsk_listener;
    	tcp_v6_fill_cb(skb, hdr, th);
@@@ -1395,24 -1395,24 +1396,24 @@@
    		reqsk_put(req);
    		goto discard_it;
    	}
 -		if (likely(sk->sk_state == TCP_LISTEN)) {
 -			nsk = tcp_check_req(sk, skb, req, false);
 -		} else {
 +		if (unlikely(sk->sk_state != TCP_LISTEN)) {
    		inet_csk_reqsk_queue_drop_and_put(sk, req);
    		goto lookup;
    	}
 +		sock_hold(sk);
 +		nsk = tcp_check_req(sk, skb, req, false);
    	if (!nsk) {
    		reqsk_put(req);
 -			goto discard_it;
 +			goto discard_and_relse;
    	}
    	if (nsk == sk) {
 -			sock_hold(sk);
    		reqsk_put(req);
    		tcp_v6_restore_cb(skb);
    	} else if (tcp_child_process(sk, nsk, skb)) {
    		tcp_v6_send_reset(nsk, skb);
 -			goto discard_it;
 +			goto discard_and_relse;
    	} else {
 +			sock_put(sk);
    		return 0;
    	}
    }
@@@ -1501,6 -1501,7 +1502,7 @@@ do_time_wait
    	struct sock *sk2;
sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ 					    skb, __tcp_hdrlen(th),
    				    &ipv6_hdr(skb)->saddr, th->source,
    				    &ipv6_hdr(skb)->daddr,
    				    ntohs(th->dest), tcp_v6_iif(skb));
@@@ -1866,7 -1867,7 +1868,7 @@@ struct proto tcpv6_prot = 
    .sendpage		= tcp_sendpage,
    .backlog_rcv		= tcp_v6_do_rcv,
    .release_cb		= tcp_release_cb,
- 	.hash			= inet_hash,
+ 	.hash			= inet6_hash,
    .unhash			= inet_unhash,
    .get_port		= inet_csk_get_port,
    .enter_memory_pressure	= tcp_enter_memory_pressure,
diff --combined net/netfilter/nfnetlink.c
index 857ae89,9a99f68..2278d9a
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@@ -127,13 -127,6 +127,6 @@@ int nfnetlink_has_listeners(struct net 
  }
  EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
- struct sk_buff *nfnetlink_alloc_skb(struct net *net, unsigned int size,
- 				    u32 dst_portid, gfp_t gfp_mask)
- {
- 	return netlink_alloc_skb(net->nfnl, size, dst_portid, gfp_mask);
- }
- EXPORT_SYMBOL_GPL(nfnetlink_alloc_skb);
- 
  int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid,
    	   unsigned int group, int echo, gfp_t flags)
  {
@@@ -311,14 -304,14 +304,14 @@@ replay
  #endif
    	{
    		nfnl_unlock(subsys_id);
 -			netlink_ack(skb, nlh, -EOPNOTSUPP);
 +			netlink_ack(oskb, nlh, -EOPNOTSUPP);
    		return kfree_skb(skb);
    	}
    }
if (!ss->commit || !ss->abort) {
    	nfnl_unlock(subsys_id);
 -		netlink_ack(skb, nlh, -EOPNOTSUPP);
 +		netlink_ack(oskb, nlh, -EOPNOTSUPP);
    	return kfree_skb(skb);
    }
@@@ -328,12 -321,10 +321,12 @@@
    	nlh = nlmsg_hdr(skb);
    	err = 0;
-		if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) ||
 -		    skb->len < nlh->nlmsg_len) {
 -			err = -EINVAL;
 -			goto ack;
 +		if (nlh->nlmsg_len < NLMSG_HDRLEN ||
 +		    skb->len < nlh->nlmsg_len ||
 +		    nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
 +			nfnl_err_reset(&err_list);
 +			status |= NFNL_BATCH_FAILURE;
 +			goto done;
    	}
/* Only requests are handled by the kernel */
@@@ -408,7 -399,7 +401,7 @@@ ack
    			 * pointing to the batch header.
    			 */
    			nfnl_err_reset(&err_list);
 -				netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM);
 +				netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
    			status |= NFNL_BATCH_FAILURE;
    			goto done;
    		}
diff --combined net/sched/sch_api.c
index af1acf0,c9673b5e..de1e176
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@@ -1841,7 -1841,7 +1841,7 @@@ reclassify
    		return err;
    }
- 	return -1;
+ 	return TC_ACT_UNSPEC; /* signal: continue lookup */
  #ifdef CONFIG_NET_CLS_ACT
  reset:
    if (unlikely(limit++ >= MAX_REC_LOOP)) {
@@@ -1852,7 -1852,6 +1852,7 @@@
    }
tp = old_tp;
 +	protocol = tc_skb_protocol(skb);
    goto reclassify;
  #endif
  }
diff --combined net/sctp/socket.c
index e878da0,6427b9d..b89501e
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@@ -5538,7 -5538,6 +5538,7 @@@ static int sctp_getsockopt_hmac_ident(s
    struct sctp_hmac_algo_param *hmacs;
    __u16 data_len = 0;
    u32 num_idents;
 +	int i;
if (!ep->auth_enable)
    	return -EACCES;
@@@ -5556,12 -5555,8 +5556,12 @@@
    	return -EFAULT;
    if (put_user(num_idents, &p->shmac_num_idents))
    	return -EFAULT;
 -	if (copy_to_user(p->shmac_idents, hmacs->hmac_ids, data_len))
 -		return -EFAULT;
 +	for (i = 0; i < num_idents; i++) {
 +		__u16 hmacid = ntohs(hmacs->hmac_ids[i]);
 +
 +		if (copy_to_user(&p->shmac_idents[i], &hmacid, sizeof(__u16)))
 +			return -EFAULT;
 +	}
    return 0;
  }
@@@ -6106,9 -6101,10 +6106,10 @@@ static int sctp_getsockopt(struct sock 
    return retval;
  }
- static void sctp_hash(struct sock *sk)
+ static int sctp_hash(struct sock *sk)
  {
    /* STUB */
+ 	return 0;
  }
static void sctp_unhash(struct sock *sk)
diff --combined net/tipc/link.c
index 347cdc9,3e513da..e31d92f
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@@ -123,7 -123,6 +123,6 @@@ struct tipc_stats 
  struct tipc_link {
    u32 addr;
    char name[TIPC_MAX_LINK_NAME];
- 	struct tipc_media_addr *media_addr;
    struct net *net;
/* Management and link supervision data */
@@@ -904,8 -903,10 +903,10 @@@ int tipc_link_xmit(struct tipc_link *l
    	if (unlikely(l->backlog[i].len >= l->backlog[i].limit))
    		return link_schedule_user(l, list);
    }
- 	if (unlikely(msg_size(hdr) > mtu))
+ 	if (unlikely(msg_size(hdr) > mtu)) {
+ 		skb_queue_purge(list);
    	return -EMSGSIZE;
+ 	}
/* Prepare each packet for sending, and add to relevant queue: */
    while (skb_queue_len(list)) {
@@@ -917,8 -918,10 +918,10 @@@
if (likely(skb_queue_len(transmq) < maxwin)) {
    		_skb = skb_clone(skb, GFP_ATOMIC);
- 			if (!_skb)
+ 			if (!_skb) {
+ 				skb_queue_purge(list);
    			return -ENOBUFS;
+ 			}
    		__skb_dequeue(list);
    		__skb_queue_tail(transmq, skb);
    		__skb_queue_tail(xmitq, _skb);
@@@ -1261,26 -1264,6 +1264,6 @@@ drop
    return rc;
  }
- /*
-  * Send protocol message to the other endpoint.
-  */
- static void tipc_link_proto_xmit(struct tipc_link *l, u32 msg_typ,
- 				 int probe_msg, u32 gap, u32 tolerance,
- 				 u32 priority)
- {
- 	struct sk_buff *skb = NULL;
- 	struct sk_buff_head xmitq;
- 
- 	__skb_queue_head_init(&xmitq);
- 	tipc_link_build_proto_msg(l, msg_typ, probe_msg, gap,
- 				  tolerance, priority, &xmitq);
- 	skb = __skb_dequeue(&xmitq);
- 	if (!skb)
- 		return;
- 	tipc_bearer_xmit_skb(l->net, l->bearer_id, skb, l->media_addr);
- 	l->rcv_unacked = 0;
- }
- 
  static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
    			      u16 rcvgap, int tolerance, int priority,
    			      struct sk_buff_head *xmitq)
@@@ -1479,6 -1462,12 +1462,12 @@@ static int tipc_link_proto_rcv(struct t
    	if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL))
    		l->tolerance = peers_tol;
+ 		if (peers_prio && in_range(peers_prio, TIPC_MIN_LINK_PRI,
+ 					   TIPC_MAX_LINK_PRI)) {
+ 			l->priority = peers_prio;
+ 			rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+ 		}
+ 
    	l->silent_intv_cnt = 0;
    	l->stats.recv_states++;
    	if (msg_probe(hdr))
@@@ -1973,10 -1962,8 +1962,10 @@@ int tipc_nl_add_bc_link(struct net *net
hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
    		  NLM_F_MULTI, TIPC_NL_LINK_GET);
 -	if (!hdr)
 +	if (!hdr) {
 +		tipc_bcast_unlock(net);
    	return -EMSGSIZE;
 +	}
attrs = nla_nest_start(msg->skb, TIPC_NLA_LINK);
    if (!attrs)
@@@ -2023,16 -2010,18 +2012,18 @@@ msg_full
    return -EMSGSIZE;
  }
- void tipc_link_set_tolerance(struct tipc_link *l, u32 tol)
+ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
+ 			     struct sk_buff_head *xmitq)
  {
    l->tolerance = tol;
- 	tipc_link_proto_xmit(l, STATE_MSG, 0, 0, tol, 0);
+ 	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq);
  }
- void tipc_link_set_prio(struct tipc_link *l, u32 prio)
+ void tipc_link_set_prio(struct tipc_link *l, u32 prio,
+ 			struct sk_buff_head *xmitq)
  {
    l->priority = prio;
- 	tipc_link_proto_xmit(l, STATE_MSG, 0, 0, 0, prio);
+ 	tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq);
  }
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
diff --combined net/tipc/node.c
index 9d7a16f,10a1e87..9fcc2fb
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@@ -346,6 -346,12 +346,6 @@@ struct tipc_node *tipc_node_create(stru
    skb_queue_head_init(&n->bc_entry.inputq2);
    for (i = 0; i < MAX_BEARERS; i++)
    	spin_lock_init(&n->links[i].lock);
 -	hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
 -	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
 -		if (n->addr < temp_node->addr)
 -			break;
 -	}
 -	list_add_tail_rcu(&n->list, &temp_node->list);
    n->state = SELF_DOWN_PEER_LEAVING;
    n->signature = INVALID_NODE_SIG;
    n->active_links[0] = INVALID_BEARER_ID;
@@@ -366,12 -372,6 +366,12 @@@
    tipc_node_get(n);
    setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n);
    n->keepalive_intv = U32_MAX;
 +	hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]);
 +	list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
 +		if (n->addr < temp_node->addr)
 +			break;
 +	}
 +	list_add_tail_rcu(&n->list, &temp_node->list);
  exit:
    spin_unlock_bh(&tn->node_list_lock);
    return n;
@@@ -1166,7 -1166,7 +1166,7 @@@ msg_full
   * @dnode: address of destination node
   * @selector: a number used for deterministic link selection
   * Consumes the buffer chain, except when returning -ELINKCONG
-  * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE
+  * Returns 0 if success, otherwise: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE,-ENOBUF
   */
  int tipc_node_xmit(struct net *net, struct sk_buff_head *list,
    	   u32 dnode, int selector)
@@@ -1174,33 -1174,43 +1174,43 @@@
    struct tipc_link_entry *le = NULL;
    struct tipc_node *n;
    struct sk_buff_head xmitq;
- 	int bearer_id = -1;
- 	int rc = -EHOSTUNREACH;
+ 	int bearer_id;
+ 	int rc;
+ 
+ 	if (in_own_node(net, dnode)) {
+ 		tipc_sk_rcv(net, list);
+ 		return 0;
+ 	}
- 	__skb_queue_head_init(&xmitq);
    n = tipc_node_find(net, dnode);
- 	if (likely(n)) {
- 		tipc_node_read_lock(n);
- 		bearer_id = n->active_links[selector & 1];
- 		if (bearer_id >= 0) {
- 			le = &n->links[bearer_id];
- 			spin_lock_bh(&le->lock);
- 			rc = tipc_link_xmit(le->link, list, &xmitq);
- 			spin_unlock_bh(&le->lock);
- 		}
+ 	if (unlikely(!n)) {
+ 		skb_queue_purge(list);
+ 		return -EHOSTUNREACH;
+ 	}
+ 
+ 	tipc_node_read_lock(n);
+ 	bearer_id = n->active_links[selector & 1];
+ 	if (unlikely(bearer_id == INVALID_BEARER_ID)) {
    	tipc_node_read_unlock(n);
- 		if (likely(!rc))
- 			tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
- 		else if (rc == -ENOBUFS)
- 			tipc_node_link_down(n, bearer_id, false);
    	tipc_node_put(n);
- 		return rc;
+ 		skb_queue_purge(list);
+ 		return -EHOSTUNREACH;
    }
- 	if (likely(in_own_node(net, dnode))) {
- 		tipc_sk_rcv(net, list);
- 		return 0;
- 	}
+ 	__skb_queue_head_init(&xmitq);
+ 	le = &n->links[bearer_id];
+ 	spin_lock_bh(&le->lock);
+ 	rc = tipc_link_xmit(le->link, list, &xmitq);
+ 	spin_unlock_bh(&le->lock);
+ 	tipc_node_read_unlock(n);
+ 
+ 	if (likely(rc == 0))
+ 		tipc_bearer_xmit(net, bearer_id, &xmitq, &le->maddr);
+ 	else if (rc == -ENOBUFS)
+ 		tipc_node_link_down(n, bearer_id, false);
+ 
+ 	tipc_node_put(n);
+ 
    return rc;
  }
@@@ -1637,9 -1647,12 +1647,12 @@@ int tipc_nl_node_set_link(struct sk_buf
    char *name;
    struct tipc_link *link;
    struct tipc_node *node;
+ 	struct sk_buff_head xmitq;
    struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
    struct net *net = sock_net(skb->sk);
+ 	__skb_queue_head_init(&xmitq);
+ 
    if (!info->attrs[TIPC_NLA_LINK])
    	return -EINVAL;
@@@ -1683,13 -1696,13 +1696,13 @@@
    		u32 tol;
tol = nla_get_u32(props[TIPC_NLA_PROP_TOL]);
- 			tipc_link_set_tolerance(link, tol);
+ 			tipc_link_set_tolerance(link, tol, &xmitq);
    	}
    	if (props[TIPC_NLA_PROP_PRIO]) {
    		u32 prio;
prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
- 			tipc_link_set_prio(link, prio);
+ 			tipc_link_set_prio(link, prio, &xmitq);
    	}
    	if (props[TIPC_NLA_PROP_WIN]) {
    		u32 win;
@@@ -1701,7 -1714,7 +1714,7 @@@
out:
    tipc_node_read_unlock(node);
- 
+ 	tipc_bearer_xmit(net, bearer_id, &xmitq, &node->links[bearer_id].maddr);
    return res;
  }
diff --combined net/unix/af_unix.c
index f75f847,b374555..8269da7
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@@ -1496,7 -1496,7 +1496,7 @@@ static void unix_detach_fds(struct scm_
    UNIXCB(skb).fp = NULL;
for (i = scm->fp->count-1; i >= 0; i--)
 -		unix_notinflight(scm->fp->fp[i]);
 +		unix_notinflight(scm->fp->user, scm->fp->fp[i]);
  }
static void unix_destruct_scm(struct sk_buff *skb)
@@@ -1534,7 -1534,6 +1534,6 @@@ static int unix_attach_fds(struct scm_c
  {
    int i;
    unsigned char max_level = 0;
- 	int unix_sock_count = 0;
if (too_many_unix_fds(current))
    	return -ETOOMANYREFS;
@@@ -1542,11 -1541,9 +1541,9 @@@
    for (i = scm->fp->count - 1; i >= 0; i--) {
    	struct sock *sk = unix_get_socket(scm->fp->fp[i]);
- 		if (sk) {
- 			unix_sock_count++;
+ 		if (sk)
    		max_level = max(max_level,
    				unix_sk(sk)->recursion_level);
- 		}
    }
    if (unlikely(max_level > MAX_RECURSION_LEVEL))
    	return -ETOOMANYREFS;
@@@ -1561,7 -1558,7 +1558,7 @@@
    	return -ENOMEM;
for (i = scm->fp->count - 1; i >= 0; i--)
 -		unix_inflight(scm->fp->fp[i]);
 +		unix_inflight(scm->fp->user, scm->fp->fp[i]);
    return max_level;
  }
@@@ -1781,12 -1778,7 +1778,12 @@@ restart_locked
    		goto out_unlock;
    }
-	if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
 +	/* other == sk && unix_peer(other) != sk if
 +	 * - unix_peer(sk) == NULL, destination address bound to sk
 +	 * - unix_peer(sk) == sk by time of get but disconnected before lock
 +	 */
 +	if (other != sk &&
 +	    unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
    	if (timeo) {
    		timeo = unix_wait_for_peer(other, timeo);
@@@ -2282,15 -2274,13 +2279,15 @@@ static int unix_stream_read_generic(str
    size_t size = state->size;
    unsigned int last_len;
-	err = -EINVAL;
 -	if (sk->sk_state != TCP_ESTABLISHED)
 +	if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
 +		err = -EINVAL;
    	goto out;
 +	}
-	err = -EOPNOTSUPP;
 -	if (flags & MSG_OOB)
 +	if (unlikely(flags & MSG_OOB)) {
 +		err = -EOPNOTSUPP;
    	goto out;
 +	}
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
    timeo = sock_rcvtimeo(sk, noblock);
@@@ -2312,7 -2302,6 +2309,7 @@@
    	bool drop_skb;
    	struct sk_buff *skb, *last;
+redo:
    	unix_state_lock(sk);
    	if (sock_flag(sk, SOCK_DEAD)) {
    		err = -ECONNRESET;
@@@ -2337,11 -2326,9 +2334,11 @@@ again
    			goto unlock;
unix_state_unlock(sk);
 -			err = -EAGAIN;
 -			if (!timeo)
 +			if (!timeo) {
 +				err = -EAGAIN;
    			break;
 +			}
 +
    		mutex_unlock(&u->readlock);
timeo = unix_stream_data_wait(sk, timeo, last,
@@@ -2354,7 -2341,7 +2351,7 @@@
    		}
mutex_lock(&u->readlock);
 -			continue;
 +			goto redo;
  unlock:
    		unix_state_unlock(sk);
    		break;
-- 
LinuxNextTracking

    

2025

2024

2023

2022

2021

2020

2019

2018

2017

2016

2015

2014

2013

2012

2011

[linux-next] LinuxNextTracking branch, master, updated. next-20160223