[linux-next] LinuxNextTracking branch, master, updated. next-20170808 - linux-merge - lists.open-mesh.org

8 Aug 2017

The following commit has been merged in the master branch:
commit 3485630f8f66dc2edbffb89893158880575dcf87
Merge: 9ce39d1671368323832bf6b96a56d8b63534bac4 53b948356554376ec6f89016376825d48bf396c3
Author: Stephen Rothwell sfr@canb.auug.org.au
Date:   Tue Aug 8 11:36:19 2017 +1000
Merge remote-tracking branch 'net-next/master'
diff --combined MAINTAINERS
index 6aab3b7172c2,b3a8ca6aa3ed..433f3cf91390

--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -301,7 -301,6 +301,7 @@@ S:	Supporte
  F:	drivers/acpi/
  F:	drivers/pnp/pnpacpi/
  F:	include/linux/acpi.h
 +F:	include/linux/fwnode.h
  F:	include/acpi/
  F:	Documentation/acpi/
  F:	Documentation/ABI/testing/sysfs-bus-acpi
@@@ -311,14 -310,6 +311,14 @@@ F:	drivers/pci/*/*acpi
  F:	drivers/pci/*/*/*acpi*
  F:	tools/power/acpi/
+ACPI APEI
 +M:	"Rafael J. Wysocki" rjw@rjwysocki.net
 +M:	Len Brown lenb@kernel.org
 +L:	linux-acpi@vger.kernel.org
 +R:	Tony Luck tony.luck@intel.com
 +R:	Borislav Petkov bp@alien8.de
 +F:	drivers/acpi/apei/
 +
  ACPI COMPONENT ARCHITECTURE (ACPICA)
  M:	Robert Moore robert.moore@intel.com
  M:	Lv Zheng lv.zheng@intel.com
@@@ -778,12 -769,6 +778,12 @@@ W:	http://ez.analog.com/community/linux
  S:	Supported
  F:	drivers/media/i2c/adv7180.c
+ANALOG DEVICES INC ADV748X DRIVER
 +M:	Kieran Bingham kieran.bingham@ideasonboard.com
 +L:	linux-media@vger.kernel.org
 +S:	Maintained
 +F:	drivers/media/i2c/adv748x/*
 +
  ANALOG DEVICES INC ADV7511 DRIVER
  M:	Hans Verkuil hans.verkuil@cisco.com
  L:	linux-media@vger.kernel.org
@@@ -2492,7 -2477,7 +2492,7 @@@ Q:	https://patchwork.open-mesh.org/proj
  S:	Maintained
  F:	Documentation/ABI/testing/sysfs-class-net-batman-adv
  F:	Documentation/ABI/testing/sysfs-class-net-mesh
- F:	Documentation/networking/batman-adv.txt
+ F:	Documentation/networking/batman-adv.rst
  F:	include/uapi/linux/batman_adv.h
  F:	net/batman-adv/
@@@ -5116,6 -5101,7 +5116,7 @@@ F:	include/linux/of_net.
  F:	include/linux/phy.h
  F:	include/linux/phy_fixed.h
  F:	include/linux/platform_data/mdio-gpio.h
+ F:	include/linux/platform_data/mdio-bcm-unimac.h
  F:	include/trace/events/mdio.h
  F:	include/uapi/linux/mdio.h
  F:	include/uapi/linux/mii.h
@@@ -6162,6 -6148,14 +6163,14 @@@ S:	Maintaine
  F:	drivers/net/ethernet/hisilicon/
  F:	Documentation/devicetree/bindings/net/hisilicon*.txt
+ HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
+ M:	Yisen Zhuang yisen.zhuang@huawei.com
+ M:	Salil Mehta salil.mehta@huawei.com
+ L:	netdev@vger.kernel.org
+ W:	http://www.hisilicon.com
+ S:	Maintained
+ F:	drivers/net/ethernet/hisilicon/hns3/
+ 
  HISILICON ROCE DRIVER
  M:	Lijun Ou oulijun@huawei.com
  M:	Wei Hu(Xavier) xavier.huwei@huawei.com
@@@ -6272,6 -6266,7 +6281,7 @@@ M:	Haiyang Zhang <haiyangz@microsoft.co
  M:	Stephen Hemminger sthemmin@microsoft.com
  L:	devel@linuxdriverproject.org
  S:	Maintained
+ F:	Documentation/networking/netvsc.txt
  F:	arch/x86/include/asm/mshyperv.h
  F:	arch/x86/include/uapi/asm/hyperv.h
  F:	arch/x86/kernel/cpu/mshyperv.c
@@@ -8414,22 -8409,6 +8424,22 @@@ S:	Supporte
  F:	Documentation/devicetree/bindings/media/renesas,vsp1.txt
  F:	drivers/media/platform/vsp1/
+MEDIA DRIVERS FOR ST STV0910 DEMODULATOR ICs
 +M:	Daniel Scheller d.scheller.oss@gmail.com
 +L:	linux-media@vger.kernel.org
 +W:	https://linuxtv.org
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Maintained
 +F:	drivers/media/dvb-frontends/stv0910*
 +
 +MEDIA DRIVERS FOR ST STV6111 TUNER ICs
 +M:	Daniel Scheller d.scheller.oss@gmail.com
 +L:	linux-media@vger.kernel.org
 +W:	https://linuxtv.org
 +T:	git git://linuxtv.org/media_tree.git
 +S:	Maintained
 +F:	drivers/media/dvb-frontends/stv6111*
 +
  MEDIA INPUT INFRASTRUCTURE (V4L/DVB)
  M:	Mauro Carvalho Chehab mchehab@s-opensource.com
  M:	Mauro Carvalho Chehab mchehab@kernel.org
@@@ -8455,7 -8434,9 +8465,9 @@@ F:	include/uapi/linux/uvcvideo.
MEDIATEK ETHERNET DRIVER
  M:	Felix Fietkau nbd@openwrt.org
- M:	John Crispin blogic@openwrt.org
+ M:	John Crispin john@phrozen.org
+ M:	Sean Wang sean.wang@mediatek.com
+ M:	Nelson Chang nelson.chang@mediatek.com
  L:	netdev@vger.kernel.org
  S:	Maintained
  F:	drivers/net/ethernet/mediatek/
@@@ -9740,7 -9721,7 +9752,7 @@@ S:	Maintaine
  F:	drivers/media/i2c/ov5640.c
OMNIVISION OV5647 SENSOR DRIVER
 -M:	Ramiro Oliveira roliveir@synopsys.com
 +M:	Luis Oliveira lolivei@synopsys.com
  L:	linux-media@vger.kernel.org
  T:	git git://linuxtv.org/media_tree.git
  S:	Maintained
@@@ -10738,7 -10719,6 +10750,7 @@@ L:	linux-media@vger.kernel.or
  T:	git git://linuxtv.org/media_tree.git
  S:	Maintained
  F:	drivers/media/usb/pulse8-cec/*
 +F:	Documentation/media/cec-drivers/pulse8-cec.rst
PVRUSB2 VIDEO4LINUX DRIVER
  M:	Mike Isely isely@pobox.com
@@@ -12639,12 -12619,6 +12651,12 @@@ F:	drivers/clocksource/arc_timer.
  F:	drivers/tty/serial/arc_uart.c
  T:	git git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc.git
+SYNOPSYS ARC SDP clock driver
 +M:	Eugeniy Paltsev Eugeniy.Paltsev@synopsys.com
 +S:	Supported
 +F:	drivers/clk/axs10x/*
 +F:	Documentation/devicetree/bindings/clock/snps,pll-clock.txt
 +
  SYNOPSYS ARC SDP platform support
  M:	Alexey Brodkin abrodkin@synopsys.com
  S:	Supported
@@@ -12681,13 -12655,6 +12693,13 @@@ L:	linux-mmc@vger.kernel.or
  S:	Maintained
  F:	drivers/mmc/host/dw_mmc*
+SYNOPSYS HSDK RESET CONTROLLER DRIVER
 +M:	Eugeniy Paltsev Eugeniy.Paltsev@synopsys.com
 +S:	Supported
 +F:	drivers/reset/reset-hsdk-v1.c
 +F:	include/dt-bindings/reset/snps,hsdk-v1-reset.h
 +F:	Documentation/devicetree/bindings/reset/snps,hsdk-v1-reset.txt
 +
  SYSTEM CONFIGURATION (SYSCON)
  M:	Lee Jones lee.jones@linaro.org
  M:	Arnd Bergmann arnd@arndb.de
diff --combined drivers/net/ethernet/ibm/ibmvnic.c
index c45e8e3b82d3,99576ba4187f..32c116652755
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@@ -111,7 -111,6 +111,7 @@@ static void send_request_map(struct ibm
  static void send_request_unmap(struct ibmvnic_adapter *, u8);
  static void send_login(struct ibmvnic_adapter *adapter);
  static void send_cap_queries(struct ibmvnic_adapter *adapter);
 +static int init_sub_crqs(struct ibmvnic_adapter *);
  static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
  static int ibmvnic_init(struct ibmvnic_adapter *);
  static void release_crq_queue(struct ibmvnic_adapter *);
@@@ -347,6 -346,31 +347,31 @@@ static void replenish_pools(struct ibmv
    }
  }
+ static void release_stats_buffers(struct ibmvnic_adapter *adapter)
+ {
+ 	kfree(adapter->tx_stats_buffers);
+ 	kfree(adapter->rx_stats_buffers);
+ }
+ 
+ static int init_stats_buffers(struct ibmvnic_adapter *adapter)
+ {
+ 	adapter->tx_stats_buffers =
+ 				kcalloc(adapter->req_tx_queues,
+ 					sizeof(struct ibmvnic_tx_queue_stats),
+ 					GFP_KERNEL);
+ 	if (!adapter->tx_stats_buffers)
+ 		return -ENOMEM;
+ 
+ 	adapter->rx_stats_buffers =
+ 				kcalloc(adapter->req_rx_queues,
+ 					sizeof(struct ibmvnic_rx_queue_stats),
+ 					GFP_KERNEL);
+ 	if (!adapter->rx_stats_buffers)
+ 		return -ENOMEM;
+ 
+ 	return 0;
+ }
+ 
  static void release_stats_token(struct ibmvnic_adapter *adapter)
  {
    struct device *dev = &adapter->vdev->dev;
@@@ -652,7 -676,6 +677,7 @@@ static int ibmvnic_login(struct net_dev
    struct ibmvnic_adapter *adapter = netdev_priv(netdev);
    unsigned long timeout = msecs_to_jiffies(30000);
    struct device *dev = &adapter->vdev->dev;
 +	int rc;
do {
    	if (adapter->renegotiate) {
@@@ -666,18 -689,6 +691,18 @@@
    			dev_err(dev, "Capabilities query timeout\n");
    			return -1;
    		}
 +			rc = init_sub_crqs(adapter);
 +			if (rc) {
 +				dev_err(dev,
 +					"Initialization of SCRQ's failed\n");
 +				return -1;
 +			}
 +			rc = init_sub_crq_irqs(adapter);
 +			if (rc) {
 +				dev_err(dev,
 +					"Initialization of SCRQ's irqs failed\n");
 +				return -1;
 +			}
    	}
reinit_completion(&adapter->init_done);
@@@ -700,6 -711,7 +725,7 @@@ static void release_resources(struct ib
    release_rx_pools(adapter);
release_stats_token(adapter);
+ 	release_stats_buffers(adapter);
    release_error_buffers(adapter);
if (adapter->napi) {
@@@ -777,6 -789,10 +803,10 @@@ static int init_resources(struct ibmvni
    if (rc)
    	return rc;
+ 	rc = init_stats_buffers(adapter);
+ 	if (rc)
+ 		return rc;
+ 
    rc = init_stats_token(adapter);
    if (rc)
    	return rc;
@@@ -1259,6 -1275,9 +1289,9 @@@ out
    netdev->stats.tx_packets += tx_packets;
    adapter->tx_send_failed += tx_send_failed;
    adapter->tx_map_failed += tx_map_failed;
+ 	adapter->tx_stats_buffers[queue_num].packets += tx_packets;
+ 	adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
+ 	adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
return ret;
  }
@@@ -1560,7 -1579,8 +1593,8 @@@ restart_poll
    						  rx_comp.correlator);
    	/* do error checking */
    	if (next->rx_comp.rc) {
- 			netdev_err(netdev, "rx error %x\n", next->rx_comp.rc);
+ 			netdev_dbg(netdev, "rx buffer returned with rc %x\n",
+ 				   be16_to_cpu(next->rx_comp.rc));
    		/* free the entry */
    		next->rx_comp.first = 0;
    		remove_buff_from_pool(adapter, rx_buff);
@@@ -1599,6 -1619,8 +1633,8 @@@
    	napi_gro_receive(napi, skb); /* send it up */
    	netdev->stats.rx_packets++;
    	netdev->stats.rx_bytes += length;
+ 		adapter->rx_stats_buffers[scrq_num].packets++;
+ 		adapter->rx_stats_buffers[scrq_num].bytes += length;
    	frames_processed++;
    }
@@@ -1708,18 -1730,36 +1744,36 @@@ static u32 ibmvnic_get_link(struct net_
  static void ibmvnic_get_ringparam(struct net_device *netdev,
    			  struct ethtool_ringparam *ring)
  {
- 	ring->rx_max_pending = 0;
- 	ring->tx_max_pending = 0;
+ 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ 
+ 	ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
+ 	ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
    ring->rx_mini_max_pending = 0;
    ring->rx_jumbo_max_pending = 0;
- 	ring->rx_pending = 0;
- 	ring->tx_pending = 0;
+ 	ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
+ 	ring->tx_pending = adapter->req_tx_entries_per_subcrq;
    ring->rx_mini_pending = 0;
    ring->rx_jumbo_pending = 0;
  }
+ static void ibmvnic_get_channels(struct net_device *netdev,
+ 				 struct ethtool_channels *channels)
+ {
+ 	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
+ 
+ 	channels->max_rx = adapter->max_rx_queues;
+ 	channels->max_tx = adapter->max_tx_queues;
+ 	channels->max_other = 0;
+ 	channels->max_combined = 0;
+ 	channels->rx_count = adapter->req_rx_queues;
+ 	channels->tx_count = adapter->req_tx_queues;
+ 	channels->other_count = 0;
+ 	channels->combined_count = 0;
+ }
+ 
  static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
  {
+ 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
    int i;
if (stringset != ETH_SS_STATS)
@@@ -1727,13 -1767,39 +1781,39 @@@
for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
    	memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
+ 
+ 	for (i = 0; i < adapter->req_tx_queues; i++) {
+ 		snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
+ 		data += ETH_GSTRING_LEN;
+ 
+ 		snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
+ 		data += ETH_GSTRING_LEN;
+ 
+ 		snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
+ 		data += ETH_GSTRING_LEN;
+ 	}
+ 
+ 	for (i = 0; i < adapter->req_rx_queues; i++) {
+ 		snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
+ 		data += ETH_GSTRING_LEN;
+ 
+ 		snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
+ 		data += ETH_GSTRING_LEN;
+ 
+ 		snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
+ 		data += ETH_GSTRING_LEN;
+ 	}
  }
static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
  {
+ 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
+ 
    switch (sset) {
    case ETH_SS_STATS:
- 		return ARRAY_SIZE(ibmvnic_stats);
+ 		return ARRAY_SIZE(ibmvnic_stats) +
+ 		       adapter->req_tx_queues * NUM_TX_STATS +
+ 		       adapter->req_rx_queues * NUM_RX_STATS;
    default:
    	return -EOPNOTSUPP;
    }
@@@ -1744,7 -1810,7 +1824,7 @@@ static void ibmvnic_get_ethtool_stats(s
  {
    struct ibmvnic_adapter *adapter = netdev_priv(dev);
    union ibmvnic_crq crq;
- 	int i;
+ 	int i, j;
memset(&crq, 0, sizeof(crq));
    crq.request_statistics.first = IBMVNIC_CRQ_CMD;
@@@ -1759,7 -1825,26 +1839,26 @@@
    wait_for_completion(&adapter->stats_done);
for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
- 		data[i] = IBMVNIC_GET_STAT(adapter, ibmvnic_stats[i].offset);
+ 		data[i] = be64_to_cpu(IBMVNIC_GET_STAT(adapter,
+ 						ibmvnic_stats[i].offset));
+ 
+ 	for (j = 0; j < adapter->req_tx_queues; j++) {
+ 		data[i] = adapter->tx_stats_buffers[j].packets;
+ 		i++;
+ 		data[i] = adapter->tx_stats_buffers[j].bytes;
+ 		i++;
+ 		data[i] = adapter->tx_stats_buffers[j].dropped_packets;
+ 		i++;
+ 	}
+ 
+ 	for (j = 0; j < adapter->req_rx_queues; j++) {
+ 		data[i] = adapter->rx_stats_buffers[j].packets;
+ 		i++;
+ 		data[i] = adapter->rx_stats_buffers[j].bytes;
+ 		i++;
+ 		data[i] = adapter->rx_stats_buffers[j].interrupts;
+ 		i++;
+ 	}
  }
static const struct ethtool_ops ibmvnic_ethtool_ops = {
@@@ -1768,6 -1853,7 +1867,7 @@@
    .set_msglevel		= ibmvnic_set_msglevel,
    .get_link		= ibmvnic_get_link,
    .get_ringparam		= ibmvnic_get_ringparam,
+ 	.get_channels		= ibmvnic_get_channels,
    .get_strings            = ibmvnic_get_strings,
    .get_sset_count         = ibmvnic_get_sset_count,
    .get_ethtool_stats	= ibmvnic_get_ethtool_stats,
@@@ -2064,6 -2150,8 +2164,8 @@@ static irqreturn_t ibmvnic_interrupt_rx
    struct ibmvnic_sub_crq_queue *scrq = instance;
    struct ibmvnic_adapter *adapter = scrq->adapter;
+ 	adapter->rx_stats_buffers[scrq->scrq_num].interrupts++;
+ 
    if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) {
    	disable_scrq_irq(adapter, scrq);
    	__napi_schedule(&adapter->napi[scrq->scrq_num]);
@@@ -3018,6 -3106,7 +3120,6 @@@ static void handle_request_cap_rsp(unio
    		 *req_value,
    		 (long int)be64_to_cpu(crq->request_capability_rsp.
    				       number), name);
 -		release_sub_crqs(adapter);
    	*req_value = be64_to_cpu(crq->request_capability_rsp.number);
    	ibmvnic_send_req_caps(adapter, 1);
    	return;
@@@ -3864,10 -3953,7 +3966,7 @@@ static int ibmvnic_resume(struct devic
    if (adapter->state != VNIC_OPEN)
    	return 0;
- 	/* kick the interrupt handlers just in case we lost an interrupt */
- 	for (i = 0; i < adapter->req_rx_queues; i++)
- 		ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq,
- 				     adapter->rx_scrq[i]);
+ 	tasklet_schedule(&adapter->tasklet);
return 0;
  }
diff --combined drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 2194960d5855,d464fceb300f..8a969d8f0790
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@@ -860,7 -860,7 +860,7 @@@ static bool i40e_clean_tx_irq(struct i4
    netdev_tx_completed_queue(txring_txq(tx_ring),
    			  total_packets, total_bytes);
- #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+ #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
    if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
    	     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
    	/* Make sure that anybody stopping the queue after this
@@@ -1113,8 -1113,6 +1113,8 @@@ int i40e_setup_tx_descriptors(struct i4
    if (!tx_ring->tx_bi)
    	goto err;
+	u64_stats_init(&tx_ring->syncp);
 +
    /* round up to nearest 4K */
    tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
    /* add u32 for head writeback, align after this takes care of
@@@ -2065,7 -2063,7 +2065,7 @@@ static int i40e_clean_rx_irq(struct i40
    u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
    bool failure = false, xdp_xmit = false;
- 	while (likely(total_rx_packets < budget)) {
+ 	while (likely(total_rx_packets < (unsigned int)budget)) {
    	struct i40e_rx_buffer *rx_buffer;
    	union i40e_rx_desc *rx_desc;
    	struct xdp_buff xdp;
@@@ -2198,7 -2196,7 +2198,7 @@@
    rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
/* guarantee a trip back through this routine if there was a failure */
- 	return failure ? budget : total_rx_packets;
+ 	return failure ? budget : (int)total_rx_packets;
  }
static u32 i40e_buildreg_itr(const int type, const u16 itr)
@@@ -2453,9 -2451,15 +2453,15 @@@ static void i40e_atr(struct i40e_ring *
    	hlen = (hdr.network[0] & 0x0F) << 2;
    	l4_proto = hdr.ipv4->protocol;
    } else {
- 		hlen = hdr.network - skb->data;
- 		l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
- 		hlen -= hdr.network - skb->data;
+ 		/* find the start of the innermost ipv6 header */
+ 		unsigned int inner_hlen = hdr.network - skb->data;
+ 		unsigned int h_offset = inner_hlen;
+ 
+ 		/* this function updates h_offset to the end of the header */
+ 		l4_proto =
+ 		  ipv6_find_hdr(skb, &h_offset, IPPROTO_TCP, NULL, NULL);
+ 		/* hlen will contain our best estimate of the tcp header */
+ 		hlen = h_offset - inner_hlen;
    }
if (l4_proto != IPPROTO_TCP)
diff --combined drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 4631ca8b8eb2,ea471604450e..4a990033c4d5
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@@ -513,7 -513,6 +513,7 @@@ nfp_net_tx_ring_init(struct nfp_net_tx_
    tx_ring->idx = idx;
    tx_ring->r_vec = r_vec;
    tx_ring->is_xdp = is_xdp;
 +	u64_stats_init(&tx_ring->r_vec->tx_sync);
tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
    tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
@@@ -533,7 -532,6 +533,7 @@@ nfp_net_rx_ring_init(struct nfp_net_rx_
rx_ring->idx = idx;
    rx_ring->r_vec = r_vec;
 +	u64_stats_init(&rx_ring->r_vec->rx_sync);
rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
    rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
@@@ -2660,6 -2658,7 +2660,7 @@@ static int nfp_net_netdev_close(struct 
    /* Step 2: Tell NFP
     */
    nfp_net_clear_config_and_disable(nn);
+ 	nfp_port_configure(netdev, false);
/* Step 3: Free resources
     */
@@@ -2777,16 -2776,21 +2778,21 @@@ static int nfp_net_netdev_open(struct n
    	goto err_free_all;
/* Step 2: Configure the NFP
+ 	 * - Ifup the physical interface if it exists
     * - Enable rings from 0 to tx_rings/rx_rings - 1.
     * - Write MAC address (in case it changed)
     * - Set the MTU
     * - Set the Freelist buffer size
     * - Enable the FW
     */
- 	err = nfp_net_set_config_and_enable(nn);
+ 	err = nfp_port_configure(netdev, true);
    if (err)
    	goto err_free_all;
+ 	err = nfp_net_set_config_and_enable(nn);
+ 	if (err)
+ 		goto err_port_disable;
+ 
    /* Step 3: Enable for kernel
     * - put some freelist descriptors on each RX ring
     * - enable NAPI on each ring
@@@ -2797,6 -2801,8 +2803,8 @@@
return 0;
+ err_port_disable:
+ 	nfp_port_configure(netdev, false);
  err_free_all:
    nfp_net_close_free_all(nn);
    return err;
diff --combined drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 3eb241657368,c1ecce6b9141..376485d99357
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@@ -253,7 -253,7 +253,7 @@@ int qed_mcp_cmd_init(struct qed_hwfn *p
    size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
    p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL);
    p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL);
 -	if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
 +	if (!p_info->mfw_mb_cur || !p_info->mfw_mb_shadow)
    	goto err;
return 0;
@@@ -1097,6 -1097,31 +1097,31 @@@ static void qed_mcp_handle_transceiver_
    	DP_NOTICE(p_hwfn, "Transceiver is unplugged.\n");
  }
+ static void qed_mcp_read_eee_config(struct qed_hwfn *p_hwfn,
+ 				    struct qed_ptt *p_ptt,
+ 				    struct qed_mcp_link_state *p_link)
+ {
+ 	u32 eee_status, val;
+ 
+ 	p_link->eee_adv_caps = 0;
+ 	p_link->eee_lp_adv_caps = 0;
+ 	eee_status = qed_rd(p_hwfn,
+ 			    p_ptt,
+ 			    p_hwfn->mcp_info->port_addr +
+ 			    offsetof(struct public_port, eee_status));
+ 	p_link->eee_active = !!(eee_status & EEE_ACTIVE_BIT);
+ 	val = (eee_status & EEE_LD_ADV_STATUS_MASK) >> EEE_LD_ADV_STATUS_OFFSET;
+ 	if (val & EEE_1G_ADV)
+ 		p_link->eee_adv_caps |= QED_EEE_1G_ADV;
+ 	if (val & EEE_10G_ADV)
+ 		p_link->eee_adv_caps |= QED_EEE_10G_ADV;
+ 	val = (eee_status & EEE_LP_ADV_STATUS_MASK) >> EEE_LP_ADV_STATUS_OFFSET;
+ 	if (val & EEE_1G_ADV)
+ 		p_link->eee_lp_adv_caps |= QED_EEE_1G_ADV;
+ 	if (val & EEE_10G_ADV)
+ 		p_link->eee_lp_adv_caps |= QED_EEE_10G_ADV;
+ }
+ 
  static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
    			       struct qed_ptt *p_ptt, bool b_reset)
  {
@@@ -1228,6 -1253,9 +1253,9 @@@
p_link->sfp_tx_fault = !!(status & LINK_STATUS_SFP_TX_FAULT);
+ 	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE)
+ 		qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link);
+ 
    qed_link_update(p_hwfn);
  out:
    spin_unlock_bh(&p_hwfn->mcp_info->link_lock);
@@@ -1251,6 -1279,19 +1279,19 @@@ int qed_mcp_set_link(struct qed_hwfn *p
    phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0;
    phy_cfg.adv_speed = params->speed.advertised_speeds;
    phy_cfg.loopback_mode = params->loopback_mode;
+ 	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) {
+ 		if (params->eee.enable)
+ 			phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED;
+ 		if (params->eee.tx_lpi_enable)
+ 			phy_cfg.eee_cfg |= EEE_CFG_TX_LPI;
+ 		if (params->eee.adv_caps & QED_EEE_1G_ADV)
+ 			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_1G;
+ 		if (params->eee.adv_caps & QED_EEE_10G_ADV)
+ 			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_10G;
+ 		phy_cfg.eee_cfg |= (params->eee.tx_lpi_timer <<
+ 				    EEE_TX_TIMER_USEC_OFFSET) &
+ 				   EEE_TX_TIMER_USEC_MASK;
+ 	}
p_hwfn->b_drv_link_init = b_up;
@@@ -2822,3 -2863,28 +2863,28 @@@ void qed_mcp_resc_lock_default_init(str
    	p_unlock->resource = resource;
    }
  }
+ 
+ int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+ {
+ 	u32 mcp_resp;
+ 	int rc;
+ 
+ 	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT,
+ 			 0, &mcp_resp, &p_hwfn->mcp_info->capabilities);
+ 	if (!rc)
+ 		DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_PROBE),
+ 			   "MFW supported features: %08x\n",
+ 			   p_hwfn->mcp_info->capabilities);
+ 
+ 	return rc;
+ }
+ 
+ int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+ {
+ 	u32 mcp_resp, mcp_param, features;
+ 
+ 	features = DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE;
+ 
+ 	return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
+ 			   features, &mcp_resp, &mcp_param);
+ }
diff --combined drivers/net/hyperv/hyperv_net.h
index 12cc64bfcff8,c701b059c5ac..cffdc1829006
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@@ -147,7 -147,6 +147,6 @@@ struct hv_netvsc_packet 
  struct netvsc_device_info {
    unsigned char mac_adr[ETH_ALEN];
    int  ring_size;
- 	u32  max_num_vrss_chns;
    u32  num_chn;
  };
@@@ -183,13 -182,16 +182,16 @@@ struct rndis_device 
  /* Interface */
  struct rndis_message;
  struct netvsc_device;
- int netvsc_device_add(struct hv_device *device,
- 		      const struct netvsc_device_info *info);
+ struct net_device_context;
+ 
+ struct netvsc_device *netvsc_device_add(struct hv_device *device,
+ 					const struct netvsc_device_info *info);
+ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
  void netvsc_device_remove(struct hv_device *device);
- int netvsc_send(struct hv_device *device,
+ int netvsc_send(struct net_device_context *ndc,
    	struct hv_netvsc_packet *packet,
    	struct rndis_message *rndis_msg,
- 		struct hv_page_buffer **page_buffer,
+ 		struct hv_page_buffer *page_buffer,
    	struct sk_buff *skb);
  void netvsc_linkstatus_callback(struct hv_device *device_obj,
    			struct rndis_message *resp);
@@@ -200,10 -202,11 +202,11 @@@ int netvsc_recv_callback(struct net_dev
    		 const struct ndis_pkt_8021q_info *vlan);
  void netvsc_channel_cb(void *context);
  int netvsc_poll(struct napi_struct *napi, int budget);
+ bool rndis_filter_opened(const struct netvsc_device *nvdev);
  int rndis_filter_open(struct netvsc_device *nvdev);
  int rndis_filter_close(struct netvsc_device *nvdev);
- int rndis_filter_device_add(struct hv_device *dev,
- 			    struct netvsc_device_info *info);
+ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+ 					      struct netvsc_device_info *info);
  void rndis_filter_update(struct netvsc_device *nvdev);
  void rndis_filter_device_remove(struct hv_device *dev,
    			struct netvsc_device *nvdev);
@@@ -215,7 -218,8 +218,8 @@@ int rndis_filter_receive(struct net_dev
    		 struct vmbus_channel *channel,
    		 void *data, u32 buflen);
- int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
+ int rndis_filter_set_device_mac(struct netvsc_device *ndev,
+ 				const char *mac);
void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
@@@ -654,13 -658,10 +658,10 @@@ struct recv_comp_data 
    u32 status;
  };
- /* Netvsc Receive Slots Max */
- #define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
- 
  struct multi_recv_comp {
- 	void *buf; /* queued receive completions */
- 	u32 first; /* first data entry */
- 	u32 next; /* next entry for writing */
+ 	struct recv_comp_data *slots;
+ 	u32 first;	/* first data entry */
+ 	u32 next;	/* next entry for writing */
  };
struct netvsc_stats {
@@@ -679,6 -680,15 +680,15 @@@ struct netvsc_ethtool_stats 
    unsigned long tx_busy;
  };
+ struct netvsc_vf_pcpu_stats {
+ 	u64     rx_packets;
+ 	u64     rx_bytes;
+ 	u64     tx_packets;
+ 	u64     tx_bytes;
+ 	struct u64_stats_sync   syncp;
+ 	u32	tx_dropped;
+ };
+ 
  struct netvsc_reconfig {
    struct list_head list;
    u32 event;
@@@ -712,6 -722,9 +722,9 @@@ struct net_device_context
/* State to manage the associated VF interface. */
    struct net_device __rcu *vf_netdev;
+ 	struct netvsc_vf_pcpu_stats __percpu *vf_stats;
+ 	struct work_struct vf_takeover;
+ 	struct work_struct vf_notify;
/* 1: allocated, serial number is valid. 0: not allocated */
    u32 vf_alloc;
@@@ -724,6 -737,7 +737,7 @@@
  /* Per channel data */
  struct netvsc_channel {
    struct vmbus_channel *channel;
+ 	struct netvsc_device *net_device;
    const struct vmpacket_descriptor *desc;
    struct napi_struct napi;
    struct multi_send_data msd;
@@@ -746,7 -760,7 +760,7 @@@ struct netvsc_device 
    u32 recv_buf_size;
    u32 recv_buf_gpadl_handle;
    u32 recv_section_cnt;
- 	struct nvsp_1_receive_buffer_section *recv_section;
+ 	u32 recv_completion_cnt;
/* Send buffer allocated by us */
    void *send_buf;
@@@ -765,8 -779,7 +779,8 @@@
    u32 max_chn;
    u32 num_chn;
-	refcount_t sc_offered;
 +	atomic_t open_chn;
 +	wait_queue_head_t subchan_open;
struct rndis_device *extension;
@@@ -775,8 -788,6 +789,6 @@@
    u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
    u32 pkt_align; /* alignment bytes, e.g. 8 */
- 	atomic_t num_outstanding_recvs;
- 
    atomic_t open_cnt;
struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
@@@ -784,18 -795,6 +796,6 @@@
    struct rcu_head rcu;
  };
- static inline struct netvsc_device *
- net_device_to_netvsc_device(struct net_device *ndev)
- {
- 	return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
- }
- 
- static inline struct netvsc_device *
- hv_device_to_netvsc_device(struct hv_device *device)
- {
- 	return net_device_to_netvsc_device(hv_get_drvdata(device));
- }
- 
  /* NdisInitialize message */
  struct rndis_initialize_request {
    u32 req_id;
diff --combined drivers/net/hyperv/netvsc.c
index d18c3326a1f7,9598220b3bcc..27102069c229
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@@ -29,6 -29,9 +29,9 @@@
  #include <linux/netdevice.h>
  #include <linux/if_ether.h>
  #include <linux/vmalloc.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/prefetch.h>
+ 
  #include <asm/sync_bitops.h>
#include "hyperv_net.h"
@@@ -41,7 -44,7 +44,7 @@@ void netvsc_switch_datapath(struct net_
  {
    struct net_device_context *net_device_ctx = netdev_priv(ndev);
    struct hv_device *dev = net_device_ctx->device_ctx;
- 	struct netvsc_device *nv_dev = net_device_ctx->nvdev;
+ 	struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
    struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
memset(init_pkt, 0, sizeof(struct nvsp_message));
@@@ -69,16 -72,12 +72,13 @@@ static struct netvsc_device *alloc_net_
    if (!net_device)
    	return NULL;
- 	net_device->chan_table[0].mrc.buf
- 		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
- 
    init_waitqueue_head(&net_device->wait_drain);
    net_device->destroy = false;
    atomic_set(&net_device->open_cnt, 0);
    net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
    net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
    init_completion(&net_device->channel_init_wait);
 +	init_waitqueue_head(&net_device->subchan_open);
return net_device;
  }
@@@ -90,7 -89,7 +90,7 @@@ static void free_netvsc_device(struct r
    int i;
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
- 		vfree(nvdev->chan_table[i].mrc.buf);
+ 		vfree(nvdev->chan_table[i].mrc.slots);
kfree(nvdev);
  }
@@@ -104,7 -103,8 +104,8 @@@ static void netvsc_destroy_buf(struct h
  {
    struct nvsp_message *revoke_packet;
    struct net_device *ndev = hv_get_drvdata(device);
- 	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+ 	struct net_device_context *ndc = netdev_priv(ndev);
+ 	struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
    int ret;
/*
@@@ -168,12 -168,6 +169,6 @@@
    	net_device->recv_buf = NULL;
    }
- 	if (net_device->recv_section) {
- 		net_device->recv_section_cnt = 0;
- 		kfree(net_device->recv_section);
- 		net_device->recv_section = NULL;
- 	}
- 
    /* Deal with the send buffer we may have setup.
     * If we got a  send section size, it means we received a
     * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
@@@ -236,11 -230,26 +231,26 @@@
    kfree(net_device->send_section_map);
  }
+ int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
+ {
+ 	struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
+ 	int node = cpu_to_node(nvchan->channel->target_cpu);
+ 	size_t size;
+ 
+ 	size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
+ 	nvchan->mrc.slots = vzalloc_node(size, node);
+ 	if (!nvchan->mrc.slots)
+ 		nvchan->mrc.slots = vzalloc(size);
+ 
+ 	return nvchan->mrc.slots ? 0 : -ENOMEM;
+ }
+ 
  static int netvsc_init_buf(struct hv_device *device,
    		   struct netvsc_device *net_device)
  {
    int ret = 0;
    struct nvsp_message *init_packet;
+ 	struct nvsp_1_message_send_receive_buffer_complete *resp;
    struct net_device *ndev;
    size_t map_words;
    int node;
@@@ -297,43 -306,41 +307,41 @@@
    wait_for_completion(&net_device->channel_init_wait);
/* Check the response */
- 	if (init_packet->msg.v1_msg.
- 	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
- 		netdev_err(ndev, "Unable to complete receive buffer "
- 			   "initialization with NetVsp - status %d\n",
- 			   init_packet->msg.v1_msg.
- 			   send_recv_buf_complete.status);
+ 	resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
+ 	if (resp->status != NVSP_STAT_SUCCESS) {
+ 		netdev_err(ndev,
+ 			   "Unable to complete receive buffer initialization with NetVsp - status %d\n",
+ 			   resp->status);
    	ret = -EINVAL;
    	goto cleanup;
    }
/* Parse the response */
+ 	netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
+ 		   resp->num_sections, resp->sections[0].sub_alloc_size,
+ 		   resp->sections[0].num_sub_allocs);
- 	net_device->recv_section_cnt = init_packet->msg.
- 		v1_msg.send_recv_buf_complete.num_sections;
- 
- 	net_device->recv_section = kmemdup(
- 		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
- 		net_device->recv_section_cnt *
- 		sizeof(struct nvsp_1_receive_buffer_section),
- 		GFP_KERNEL);
- 	if (net_device->recv_section == NULL) {
- 		ret = -EINVAL;
- 		goto cleanup;
- 	}
+ 	net_device->recv_section_cnt = resp->num_sections;
/*
     * For 1st release, there should only be 1 section that represents the
     * entire receive buffer
     */
    if (net_device->recv_section_cnt != 1 ||
- 	    net_device->recv_section->offset != 0) {
+ 	    resp->sections[0].offset != 0) {
    	ret = -EINVAL;
    	goto cleanup;
    }
- 	/* Now setup the send buffer.
- 	 */
+ 	/* Setup receive completion ring */
+ 	net_device->recv_completion_cnt
+ 		= round_up(resp->sections[0].num_sub_allocs + 1,
+ 			   PAGE_SIZE / sizeof(u64));
+ 	ret = netvsc_alloc_recv_comp_ring(net_device, 0);
+ 	if (ret)
+ 		goto cleanup;
+ 
+ 	/* Now setup the send buffer. */
    net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
    if (!net_device->send_buf)
    	net_device->send_buf = vzalloc(net_device->send_buf_size);
@@@ -550,7 -557,8 +558,8 @@@ void netvsc_device_remove(struct hv_dev
  {
    struct net_device *ndev = hv_get_drvdata(device);
    struct net_device_context *net_device_ctx = netdev_priv(ndev);
- 	struct netvsc_device *net_device = net_device_ctx->nvdev;
+ 	struct netvsc_device *net_device
+ 		= rtnl_dereference(net_device_ctx->nvdev);
    int i;
netvsc_disconnect_vsp(device);
@@@ -693,7 -701,7 +702,7 @@@ static u32 netvsc_copy_to_send_buf(stru
    			   u32 pend_size,
    			   struct hv_netvsc_packet *packet,
    			   struct rndis_message *rndis_msg,
- 				   struct hv_page_buffer **pb,
+ 				   struct hv_page_buffer *pb,
    			   struct sk_buff *skb)
  {
    char *start = net_device->send_buf;
@@@ -714,9 -722,9 +723,9 @@@
    }
for (i = 0; i < page_count; i++) {
- 		char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT);
- 		u32 offset = (*pb)[i].offset;
- 		u32 len = (*pb)[i].len;
+ 		char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
+ 		u32 offset = pb[i].offset;
+ 		u32 len = pb[i].len;
memcpy(dest, (src + offset), len);
    	msg_size += len;
@@@ -735,36 -743,32 +744,32 @@@ static inline int netvsc_send_pkt
    struct hv_device *device,
    struct hv_netvsc_packet *packet,
    struct netvsc_device *net_device,
- 	struct hv_page_buffer **pb,
+ 	struct hv_page_buffer *pb,
    struct sk_buff *skb)
  {
    struct nvsp_message nvmsg;
- 	struct netvsc_channel *nvchan
- 		= &net_device->chan_table[packet->q_idx];
+ 	struct nvsp_1_message_send_rndis_packet * const rpkt =
+ 		&nvmsg.msg.v1_msg.send_rndis_pkt;
+ 	struct netvsc_channel * const nvchan =
+ 		&net_device->chan_table[packet->q_idx];
    struct vmbus_channel *out_channel = nvchan->channel;
    struct net_device *ndev = hv_get_drvdata(device);
    struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
    u64 req_id;
    int ret;
- 	struct hv_page_buffer *pgbuf;
    u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
- 	if (skb != NULL) {
- 		/* 0 is RMC_DATA; */
- 		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
- 	} else {
- 		/* 1 is RMC_CONTROL; */
- 		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
- 	}
+ 	if (skb)
+ 		rpkt->channel_type = 0;		/* 0 is RMC_DATA */
+ 	else
+ 		rpkt->channel_type = 1;		/* 1 is RMC_CONTROL */
- 	nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
- 		packet->send_buf_index;
+ 	rpkt->send_buf_section_index = packet->send_buf_index;
    if (packet->send_buf_index == NETVSC_INVALID_INDEX)
- 		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
+ 		rpkt->send_buf_section_size = 0;
    else
- 		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
- 			packet->total_data_buflen;
+ 		rpkt->send_buf_section_size = packet->total_data_buflen;
req_id = (ulong)skb;
@@@ -772,11 -776,11 +777,11 @@@
    	return -ENODEV;
if (packet->page_buf_cnt) {
- 		pgbuf = packet->cp_partial ? (*pb) +
- 			packet->rmsg_pgcnt : (*pb);
+ 		if (packet->cp_partial)
+ 			pb += packet->rmsg_pgcnt;
+ 
    	ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
- 						      pgbuf,
- 						      packet->page_buf_cnt,
+ 						      pb, packet->page_buf_cnt,
    					      &nvmsg,
    					      sizeof(struct nvsp_message),
    					      req_id,
@@@ -801,8 -805,10 +806,10 @@@
    		ret = -ENOSPC;
    	}
    } else {
- 		netdev_err(ndev, "Unable to send packet %p ret %d\n",
- 			   packet, ret);
+ 		netdev_err(ndev,
+ 			   "Unable to send packet pages %u len %u, ret %d\n",
+ 			   packet->page_buf_cnt, packet->total_data_buflen,
+ 			   ret);
    }
return ret;
@@@ -820,13 -826,16 +827,16 @@@ static inline void move_pkt_msd(struct 
    msdp->count = 0;
  }
- int netvsc_send(struct hv_device *device,
+ /* RCU already held by caller */
+ int netvsc_send(struct net_device_context *ndev_ctx,
    	struct hv_netvsc_packet *packet,
    	struct rndis_message *rndis_msg,
- 		struct hv_page_buffer **pb,
+ 		struct hv_page_buffer *pb,
    	struct sk_buff *skb)
  {
- 	struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
+ 	struct netvsc_device *net_device
+ 		= rcu_dereference_bh(ndev_ctx->nvdev);
+ 	struct hv_device *device = ndev_ctx->device_ctx;
    int ret = 0;
    struct netvsc_channel *nvchan;
    u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@@ -838,7 -847,7 +848,7 @@@
    bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
/* If device is rescinded, return error and packet will get dropped. */
- 	if (unlikely(net_device->destroy))
+ 	if (unlikely(!net_device || net_device->destroy))
    	return -ENODEV;
/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
@@@ -943,130 -952,94 +953,94 @@@ send_now
    return ret;
  }
- static int netvsc_send_recv_completion(struct vmbus_channel *channel,
- 				       u64 transaction_id, u32 status)
+ /* Send pending recv completions */
+ static int send_recv_completions(struct netvsc_channel *nvchan)
  {
- 	struct nvsp_message recvcompMessage;
+ 	struct netvsc_device *nvdev = nvchan->net_device;
+ 	struct multi_recv_comp *mrc = &nvchan->mrc;
+ 	struct recv_comp_msg {
+ 		struct nvsp_message_header hdr;
+ 		u32 status;
+ 	}  __packed;
+ 	struct recv_comp_msg msg = {
+ 		.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
+ 	};
    int ret;
- 	recvcompMessage.hdr.msg_type =
- 				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
+ 	while (mrc->first != mrc->next) {
+ 		const struct recv_comp_data *rcd
+ 			= mrc->slots + mrc->first;
- 	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
+ 		msg.status = rcd->status;
+ 		ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
+ 				       rcd->tid, VM_PKT_COMP, 0);
+ 		if (unlikely(ret))
+ 			return ret;
- 	/* Send the completion */
- 	ret = vmbus_sendpacket(channel, &recvcompMessage,
- 			       sizeof(struct nvsp_message_header) + sizeof(u32),
- 			       transaction_id, VM_PKT_COMP, 0);
- 
- 	return ret;
- }
- 
- static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
- 					u32 *filled, u32 *avail)
- {
- 	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- 	u32 first = mrc->first;
- 	u32 next = mrc->next;
- 
- 	*filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
- 		  next - first;
- 
- 	*avail = NETVSC_RECVSLOT_MAX - *filled - 1;
- }
- 
- /* Read the first filled slot, no change to index */
- static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
- 							 *nvdev, u16 q_idx)
- {
- 	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- 	u32 filled, avail;
- 
- 	if (unlikely(!mrc->buf))
- 		return NULL;
+ 		if (++mrc->first == nvdev->recv_completion_cnt)
+ 			mrc->first = 0;
+ 	}
- 	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
- 	if (!filled)
- 		return NULL;
+ 	/* receive completion ring has been emptied */
+ 	if (unlikely(nvdev->destroy))
+ 		wake_up(&nvdev->wait_drain);
- 	return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
+ 	return 0;
  }
- /* Put the first filled slot back to available pool */
- static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
+ /* Count how many receive completions are outstanding */
+ static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
+ 				 const struct multi_recv_comp *mrc,
+ 				 u32 *filled, u32 *avail)
  {
- 	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- 	int num_recv;
- 
- 	mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
+ 	u32 count = nvdev->recv_completion_cnt;
- 	num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
+ 	if (mrc->next >= mrc->first)
+ 		*filled = mrc->next - mrc->first;
+ 	else
+ 		*filled = (count - mrc->first) + mrc->next;
- 	if (nvdev->destroy && num_recv == 0)
- 		wake_up(&nvdev->wait_drain);
+ 	*avail = count - *filled - 1;
  }
- /* Check and send pending recv completions */
- static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
- 				 struct vmbus_channel *channel, u16 q_idx)
+ /* Add receive complete to ring to send to host. */
+ static void enq_receive_complete(struct net_device *ndev,
+ 				 struct netvsc_device *nvdev, u16 q_idx,
+ 				 u64 tid, u32 status)
  {
+ 	struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
+ 	struct multi_recv_comp *mrc = &nvchan->mrc;
    struct recv_comp_data *rcd;
- 	int ret;
+ 	u32 filled, avail;
- 	while (true) {
- 		rcd = read_recv_comp_slot(nvdev, q_idx);
- 		if (!rcd)
- 			break;
+ 	recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
- 		ret = netvsc_send_recv_completion(channel, rcd->tid,
- 						  rcd->status);
- 		if (ret)
- 			break;
- 
- 		put_recv_comp_slot(nvdev, q_idx);
+ 	if (unlikely(filled > NAPI_POLL_WEIGHT)) {
+ 		send_recv_completions(nvchan);
+ 		recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
    }
- }
- #define NETVSC_RCD_WATERMARK 80
- 
- /* Get next available slot */
- static inline struct recv_comp_data *get_recv_comp_slot(
- 	struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
- {
- 	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
- 	u32 filled, avail, next;
- 	struct recv_comp_data *rcd;
- 
- 	if (unlikely(!nvdev->recv_section))
- 		return NULL;
- 
- 	if (unlikely(!mrc->buf))
- 		return NULL;
- 
- 	if (atomic_read(&nvdev->num_outstanding_recvs) >
- 	    nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
- 		netvsc_chk_recv_comp(nvdev, channel, q_idx);
- 
- 	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
- 	if (!avail)
- 		return NULL;
- 
- 	next = mrc->next;
- 	rcd = mrc->buf + next * sizeof(struct recv_comp_data);
- 	mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
+ 	if (unlikely(!avail)) {
+ 		netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+ 			   q_idx, tid);
+ 		return;
+ 	}
- 	atomic_inc(&nvdev->num_outstanding_recvs);
+ 	rcd = mrc->slots + mrc->next;
+ 	rcd->tid = tid;
+ 	rcd->status = status;
- 	return rcd;
+ 	if (++mrc->next == nvdev->recv_completion_cnt)
+ 		mrc->next = 0;
  }
static int netvsc_receive(struct net_device *ndev,
- 		   struct netvsc_device *net_device,
- 		   struct net_device_context *net_device_ctx,
- 		   struct hv_device *device,
- 		   struct vmbus_channel *channel,
- 		   const struct vmpacket_descriptor *desc,
- 		   struct nvsp_message *nvsp)
+ 			  struct netvsc_device *net_device,
+ 			  struct net_device_context *net_device_ctx,
+ 			  struct hv_device *device,
+ 			  struct vmbus_channel *channel,
+ 			  const struct vmpacket_descriptor *desc,
+ 			  struct nvsp_message *nvsp)
  {
    const struct vmtransfer_page_packet_header *vmxferpage_packet
    	= container_of(desc, const struct vmtransfer_page_packet_header, d);
@@@ -1075,7 -1048,6 +1049,6 @@@
    u32 status = NVSP_STAT_SUCCESS;
    int i;
    int count = 0;
- 	int ret;
/* Make sure this is a valid nvsp packet */
    if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
@@@ -1106,25 -1078,9 +1079,9 @@@
    				      channel, data, buflen);
    }
- 	if (net_device->chan_table[q_idx].mrc.buf) {
- 		struct recv_comp_data *rcd;
+ 	enq_receive_complete(ndev, net_device, q_idx,
+ 			     vmxferpage_packet->d.trans_id, status);
- 		rcd = get_recv_comp_slot(net_device, channel, q_idx);
- 		if (rcd) {
- 			rcd->tid = vmxferpage_packet->d.trans_id;
- 			rcd->status = status;
- 		} else {
- 			netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
- 				   q_idx, vmxferpage_packet->d.trans_id);
- 		}
- 	} else {
- 		ret = netvsc_send_recv_completion(channel,
- 						  vmxferpage_packet->d.trans_id,
- 						  status);
- 		if (ret)
- 			netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
- 				   q_idx, vmxferpage_packet->d.trans_id, ret);
- 	}
    return count;
  }
@@@ -1220,11 -1176,10 +1177,10 @@@ int netvsc_poll(struct napi_struct *nap
  {
    struct netvsc_channel *nvchan
    	= container_of(napi, struct netvsc_channel, napi);
+ 	struct netvsc_device *net_device = nvchan->net_device;
    struct vmbus_channel *channel = nvchan->channel;
    struct hv_device *device = netvsc_channel_to_device(channel);
- 	u16 q_idx = channel->offermsg.offer.sub_channel_index;
    struct net_device *ndev = hv_get_drvdata(device);
- 	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
    int work_done = 0;
/* If starting a new interval */
@@@ -1237,17 -1192,23 +1193,23 @@@
    	nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
    }
- 	/* If receive ring was exhausted
- 	 * and not doing busy poll
+ 	/* if ring is empty, signal host */
+ 	if (!nvchan->desc)
+ 		hv_pkt_iter_close(channel);
+ 
+ 	/* If send of pending receive completions suceeded
+ 	 *   and did not exhaust NAPI budget this time
+ 	 *   and not doing busy poll
     * then re-enable host interrupts
- 	 *  and reschedule if ring is not empty.
+ 	 *     and reschedule if ring is not empty.
     */
- 	if (work_done < budget &&
+ 	if (send_recv_completions(nvchan) == 0 &&
+ 	    work_done < budget &&
        napi_complete_done(napi, work_done) &&
- 	    hv_end_read(&channel->inbound) != 0)
+ 	    hv_end_read(&channel->inbound)) {
+ 		hv_begin_read(&channel->inbound);
    	napi_reschedule(napi);
- 
- 	netvsc_chk_recv_comp(net_device, channel, q_idx);
+ 	}
/* Driver may overshoot since multiple packets per descriptor */
    return min(work_done, budget);
@@@ -1259,10 -1220,15 +1221,15 @@@
  void netvsc_channel_cb(void *context)
  {
    struct netvsc_channel *nvchan = context;
+ 	struct vmbus_channel *channel = nvchan->channel;
+ 	struct hv_ring_buffer_info *rbi = &channel->inbound;
+ 
+ 	/* preload first vmpacket descriptor */
+ 	prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
if (napi_schedule_prep(&nvchan->napi)) {
    	/* disable interupts from host */
- 		hv_begin_read(&nvchan->channel->inbound);
+ 		hv_begin_read(rbi);
__napi_schedule(&nvchan->napi);
    }
@@@ -1272,8 -1238,8 +1239,8 @@@
   * netvsc_device_add - Callback when the device belonging to this
   * driver is added
   */
- int netvsc_device_add(struct hv_device *device,
- 		      const struct netvsc_device_info *device_info)
+ struct netvsc_device *netvsc_device_add(struct hv_device *device,
+ 				const struct netvsc_device_info *device_info)
  {
    int i, ret = 0;
    int ring_size = device_info->ring_size;
@@@ -1283,7 -1249,7 +1250,7 @@@
net_device = alloc_net_device();
    if (!net_device)
- 		return -ENOMEM;
+ 		return ERR_PTR(-ENOMEM);
net_device->ring_size = ring_size;
@@@ -1303,8 -1269,7 +1270,9 @@@
    	struct netvsc_channel *nvchan = &net_device->chan_table[i];
nvchan->channel = device->channel;
+ 		nvchan->net_device = net_device;
 +		u64_stats_init(&nvchan->tx_stats.syncp);
 +		u64_stats_init(&nvchan->rx_stats.syncp);
    }
/* Enable NAPI handler before init callbacks */
@@@ -1341,10 -1306,11 +1309,11 @@@
    	goto close;
    }
- 	return ret;
+ 	return net_device;
close:
- 	netif_napi_del(&net_device->chan_table[0].napi);
+ 	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
+ 	napi_disable(&net_device->chan_table[0].napi);
/* Now, we can close the channel safely */
    vmbus_close(device->channel);
@@@ -1352,6 -1318,5 +1321,5 @@@
  cleanup:
    free_netvsc_device(&net_device->rcu);
- 	return ret;
- 
+ 	return ERR_PTR(ret);
  }
diff --combined drivers/net/hyperv/rndis_filter.c
index d6308ffda53e,44165fe328a4..36e9ee82ec6f
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@@ -28,6 -28,7 +28,7 @@@
  #include <linux/if_vlan.h>
  #include <linux/nls.h>
  #include <linux/vmalloc.h>
+ #include <linux/rtnetlink.h>
#include "hyperv_net.h"
@@@ -213,11 -214,11 +214,11 @@@ static void dump_rndis_message(struct h
  static int rndis_filter_send_request(struct rndis_device *dev,
    			  struct rndis_request *req)
  {
- 	int ret;
    struct hv_netvsc_packet *packet;
    struct hv_page_buffer page_buf[2];
    struct hv_page_buffer *pb = page_buf;
    struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
+ 	int ret;
/* Setup the packet to send it */
    packet = &req->pkt;
@@@ -243,7 -244,10 +244,10 @@@
    		pb[0].len;
    }
- 	ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, &pb, NULL);
+ 	rcu_read_lock_bh();
+ 	ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL);
+ 	rcu_read_unlock_bh();
+ 
    return ret;
  }
@@@ -443,8 -447,9 +447,9 @@@ int rndis_filter_receive(struct net_dev
    return 0;
  }
- static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
- 				  void *result, u32 *result_size)
+ static int rndis_filter_query_device(struct rndis_device *dev,
+ 				     struct netvsc_device *nvdev,
+ 				     u32 oid, void *result, u32 *result_size)
  {
    struct rndis_request *request;
    u32 inresult_size = *result_size;
@@@ -471,8 -476,6 +476,6 @@@
    query->dev_vc_handle = 0;
if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
- 		struct net_device_context *ndevctx = netdev_priv(dev->ndev);
- 		struct netvsc_device *nvdev = ndevctx->nvdev;
    	struct ndis_offload *hwcaps;
    	u32 nvsp_version = nvdev->nvsp_version;
    	u8 ndis_rev;
@@@ -541,14 -544,15 +544,15 @@@ cleanup
/* Get the hardware offload capabilities */
  static int
- rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
+ rndis_query_hwcaps(struct rndis_device *dev, struct netvsc_device *net_device,
+ 		   struct ndis_offload *caps)
  {
    u32 caps_len = sizeof(*caps);
    int ret;
memset(caps, 0, sizeof(*caps));
- 	ret = rndis_filter_query_device(dev,
+ 	ret = rndis_filter_query_device(dev, net_device,
    				OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
    				caps, &caps_len);
    if (ret)
@@@ -577,11 -581,12 +581,12 @@@
    return 0;
  }
- static int rndis_filter_query_device_mac(struct rndis_device *dev)
+ static int rndis_filter_query_device_mac(struct rndis_device *dev,
+ 					 struct netvsc_device *net_device)
  {
    u32 size = ETH_ALEN;
- 	return rndis_filter_query_device(dev,
+ 	return rndis_filter_query_device(dev, net_device,
    			      RNDIS_OID_802_3_PERMANENT_ADDRESS,
    			      dev->hw_mac_adr, &size);
  }
@@@ -589,9 -594,9 +594,9 @@@
  #define NWADR_STR "NetworkAddress"
  #define NWADR_STRLEN 14
- int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
+ int rndis_filter_set_device_mac(struct netvsc_device *nvdev,
+ 				const char *mac)
  {
- 	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
    struct rndis_device *rdev = nvdev->extension;
    struct rndis_request *request;
    struct rndis_set_request *set;
@@@ -645,11 -650,8 +650,8 @@@
    wait_for_completion(&request->wait_event);
set_complete = &request->response_msg.msg.set_complete;
- 	if (set_complete->status != RNDIS_STATUS_SUCCESS) {
- 		netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
- 			   set_complete->status);
- 		ret = -EINVAL;
- 	}
+ 	if (set_complete->status != RNDIS_STATUS_SUCCESS)
+ 		ret = -EIO;
cleanup:
    put_rndis_request(rdev, request);
@@@ -658,9 -660,9 +660,9 @@@
static int
  rndis_filter_set_offload_params(struct net_device *ndev,
+ 				struct netvsc_device *nvdev,
    			struct ndis_offload_params *req_offloads)
  {
- 	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
    struct rndis_device *rdev = nvdev->extension;
    struct rndis_request *request;
    struct rndis_set_request *set;
@@@ -782,27 -784,27 +784,27 @@@ cleanup
    return ret;
  }
- static int rndis_filter_query_device_link_status(struct rndis_device *dev)
+ static int rndis_filter_query_device_link_status(struct rndis_device *dev,
+ 						 struct netvsc_device *net_device)
  {
    u32 size = sizeof(u32);
    u32 link_status;
- 	int ret;
- 
- 	ret = rndis_filter_query_device(dev,
- 				      RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
- 				      &link_status, &size);
- 	return ret;
+ 	return rndis_filter_query_device(dev, net_device,
+ 					 RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
+ 					 &link_status, &size);
  }
- static int rndis_filter_query_link_speed(struct rndis_device *dev)
+ static int rndis_filter_query_link_speed(struct rndis_device *dev,
+ 					 struct netvsc_device *net_device)
  {
    u32 size = sizeof(u32);
    u32 link_speed;
    struct net_device_context *ndc;
    int ret;
- 	ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
+ 	ret = rndis_filter_query_device(dev, net_device,
+ 					RNDIS_OID_GEN_LINK_SPEED,
    				&link_speed, &size);
if (!ret) {
@@@ -871,14 -873,14 +873,14 @@@ void rndis_filter_update(struct netvsc_
    schedule_work(&rdev->mcast_work);
  }
- static int rndis_filter_init_device(struct rndis_device *dev)
+ static int rndis_filter_init_device(struct rndis_device *dev,
+ 				    struct netvsc_device *nvdev)
  {
    struct rndis_request *request;
    struct rndis_initialize_request *init;
    struct rndis_initialize_complete *init_complete;
    u32 status;
    int ret;
- 	struct netvsc_device *nvdev = net_device_to_netvsc_device(dev->ndev);
request = get_rndis_request(dev, RNDIS_MSG_INIT,
    		RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
@@@ -926,12 -928,12 +928,12 @@@ static bool netvsc_device_idle(const st
  {
    int i;
- 	if (atomic_read(&nvdev->num_outstanding_recvs) > 0)
- 		return false;
- 
    for (i = 0; i < nvdev->num_chn; i++) {
    	const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+ 		if (nvchan->mrc.first != nvchan->mrc.next)
+ 			return false;
+ 
    	if (atomic_read(&nvchan->queue_sends) > 0)
    		return false;
    }
@@@ -944,7 -946,7 +946,7 @@@ static void rndis_filter_halt_device(st
    struct rndis_request *request;
    struct rndis_halt_request *halt;
    struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
- 	struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ 	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
/* Attempt to do a rndis device halt */
    request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@@ -1015,20 -1017,20 +1017,20 @@@ static void netvsc_sc_open(struct vmbus
  {
    struct net_device *ndev =
    	hv_get_drvdata(new_sc->primary_channel->device_obj);
- 	struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
+ 	struct net_device_context *ndev_ctx = netdev_priv(ndev);
+ 	struct netvsc_device *nvscdev;
    u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
    struct netvsc_channel *nvchan;
    int ret;
- 	if (chn_index >= nvscdev->num_chn)
+ 	/* This is safe because this callback only happens when
+ 	 * new device is being setup and waiting on the channel_init_wait.
+ 	 */
+ 	nvscdev = rcu_dereference_raw(ndev_ctx->nvdev);
+ 	if (!nvscdev || chn_index >= nvscdev->num_chn)
    	return;
nvchan = nvscdev->chan_table + chn_index;
- 	nvchan->mrc.buf
- 		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
- 
- 	if (!nvchan->mrc.buf)
- 		return;
/* Because the device uses NAPI, all the interrupt batching and
     * control is done via Net softirq, not the channel handling
@@@ -1048,12 -1050,12 +1050,12 @@@
    else
    	netif_napi_del(&nvchan->napi);
-	if (refcount_dec_and_test(&nvscdev->sc_offered))
 -		complete(&nvscdev->channel_init_wait);
 +	atomic_inc(&nvscdev->open_chn);
 +	wake_up(&nvscdev->subchan_open);
  }
- int rndis_filter_device_add(struct hv_device *dev,
- 			    struct netvsc_device_info *device_info)
+ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
+ 				      struct netvsc_device_info *device_info)
  {
    struct net_device *net = hv_get_drvdata(dev);
    struct net_device_context *net_device_ctx = netdev_priv(net);
@@@ -1072,57 -1074,52 +1074,50 @@@
rndis_device = get_rndis_device();
    if (!rndis_device)
- 		return -ENODEV;
+ 		return ERR_PTR(-ENODEV);
/*
     * Let the inner driver handle this first to create the netvsc channel
     * NOTE! Once the channel is created, we may get a receive callback
     * (RndisFilterOnReceive()) before this call is completed
     */
- 	ret = netvsc_device_add(dev, device_info);
- 	if (ret != 0) {
+ 	net_device = netvsc_device_add(dev, device_info);
+ 	if (IS_ERR(net_device)) {
    	kfree(rndis_device);
- 		return ret;
+ 		return net_device;
    }
/* Initialize the rndis device */
- 	net_device = net_device_ctx->nvdev;
    net_device->max_chn = 1;
    net_device->num_chn = 1;
-	refcount_set(&net_device->sc_offered, 0);
 -
    net_device->extension = rndis_device;
    rndis_device->ndev = net;
/* Send the rndis initialization message */
- 	ret = rndis_filter_init_device(rndis_device);
- 	if (ret != 0) {
- 		rndis_filter_device_remove(dev, net_device);
- 		return ret;
- 	}
+ 	ret = rndis_filter_init_device(rndis_device, net_device);
+ 	if (ret != 0)
+ 		goto err_dev_remv;
/* Get the MTU from the host */
    size = sizeof(u32);
- 	ret = rndis_filter_query_device(rndis_device,
+ 	ret = rndis_filter_query_device(rndis_device, net_device,
    				RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
    				&mtu, &size);
    if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
    	net->mtu = mtu;
/* Get the mac address */
- 	ret = rndis_filter_query_device_mac(rndis_device);
- 	if (ret != 0) {
- 		rndis_filter_device_remove(dev, net_device);
- 		return ret;
- 	}
+ 	ret = rndis_filter_query_device_mac(rndis_device, net_device);
+ 	if (ret != 0)
+ 		goto err_dev_remv;
memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
/* Find HW offload capabilities */
- 	ret = rndis_query_hwcaps(rndis_device, &hwcaps);
- 	if (ret != 0) {
- 		rndis_filter_device_remove(dev, net_device);
- 		return ret;
- 	}
+ 	ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps);
+ 	if (ret != 0)
+ 		goto err_dev_remv;
/* A value of zero means "no change"; now turn on what we want. */
    memset(&offloads, 0, sizeof(struct ndis_offload_params));
@@@ -1177,24 -1174,24 +1172,24 @@@
netif_set_gso_max_size(net, gso_max_size);
- 	ret = rndis_filter_set_offload_params(net, &offloads);
+ 	ret = rndis_filter_set_offload_params(net, net_device, &offloads);
    if (ret)
    	goto err_dev_remv;
- 	rndis_filter_query_device_link_status(rndis_device);
+ 	rndis_filter_query_device_link_status(rndis_device, net_device);
netdev_dbg(net, "Device MAC %pM link state %s\n",
    	   rndis_device->hw_mac_adr,
    	   rndis_device->link_state ? "down" : "up");
if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
- 		return 0;
+ 		return net_device;
- 	rndis_filter_query_link_speed(rndis_device);
+ 	rndis_filter_query_link_speed(rndis_device, net_device);
/* vRSS setup */
    memset(&rsscap, 0, rsscap_size);
- 	ret = rndis_filter_query_device(rndis_device,
+ 	ret = rndis_filter_query_device(rndis_device, net_device,
    				OID_GEN_RECEIVE_SCALE_CAPABILITIES,
    				&rsscap, &rsscap_size);
    if (ret || rsscap.num_recv_que < 2)
@@@ -1219,11 -1216,20 +1214,20 @@@
    	rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
    						net_device->num_chn);
+	atomic_set(&net_device->open_chn, 1);
    num_rss_qs = net_device->num_chn - 1;
    if (num_rss_qs == 0)
- 		return 0;
+ 		return net_device;
+ 
+ 	for (i = 1; i < net_device->num_chn; i++) {
+ 		ret = netvsc_alloc_recv_comp_ring(net_device, i);
+ 		if (ret) {
+ 			while (--i != 0)
+ 				vfree(net_device->chan_table[i].mrc.slots);
+ 			goto out;
+ 		}
+ 	}
-	refcount_set(&net_device->sc_offered, num_rss_qs);
    vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
init_packet = &net_device->channel_init_pkt;
@@@ -1240,19 -1246,15 +1244,19 @@@
    if (ret)
    	goto out;
+	wait_for_completion(&net_device->channel_init_wait);
    if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
    	ret = -ENODEV;
    	goto out;
    }
 -	wait_for_completion(&net_device->channel_init_wait);
net_device->num_chn = 1 +
    	init_packet->msg.v5_msg.subchn_comp.num_subchannels;
+	/* wait for all sub channels to open */
 +	wait_event(net_device->subchan_open,
 +		   atomic_read(&net_device->open_chn) == net_device->num_chn);
 +
    /* ignore failues from setting rss parameters, still have channels */
    rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
    			   net_device->num_chn);
@@@ -1262,11 -1264,11 +1266,11 @@@ out
    	net_device->num_chn = 1;
    }
- 	return 0; /* return 0 because primary channel can be used alone */
+ 	return net_device;
err_dev_remv:
    rndis_filter_device_remove(dev, net_device);
- 	return ret;
+ 	return ERR_PTR(ret);
  }
void rndis_filter_device_remove(struct hv_device *dev,
@@@ -1304,3 -1306,8 +1308,8 @@@ int rndis_filter_close(struct netvsc_de
return rndis_filter_close_device(nvdev->extension);
  }
+ 
+ bool rndis_filter_opened(const struct netvsc_device *nvdev)
+ {
+ 	return atomic_read(&nvdev->open_cnt) > 0;
+ }
diff --combined drivers/net/ipvlan/ipvlan_main.c
index 8dab74a81303,fdde20735416..58a9f990b553
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@@ -169,7 -169,7 +169,7 @@@ static void ipvlan_port_destroy(struct
#define IPVLAN_FEATURES \
    (NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
- 	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
+ 	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
     NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
     NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
@@@ -192,7 -192,7 +192,7 @@@ static int ipvlan_init(struct net_devic
netdev_lockdep_set_classes(dev);
-	ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats);
 +	ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
    if (!ipvlan->pcpu_stats)
    	return -ENOMEM;
diff --combined drivers/net/vxlan.c
index e17baac70f43,dbca067540d0..35e84a9e1cfb
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -623,7 -623,6 +623,7 @@@ static struct sk_buff **vxlan_gro_recei
out:
    skb_gro_remcsum_cleanup(skb, &grc);
 +	skb->remcsum_offload = 0;
    NAPI_GRO_CB(skb)->flush |= flush;
return pp;
@@@ -2609,7 -2608,7 +2609,7 @@@ static struct device_type vxlan_type = 
   * supply the listening VXLAN udp ports. Callers are expected
   * to implement the ndo_udp_tunnel_add.
   */
- static void vxlan_push_rx_ports(struct net_device *dev)
+ static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
  {
    struct vxlan_sock *vs;
    struct net *net = dev_net(dev);
@@@ -2618,11 -2617,19 +2618,19 @@@
spin_lock(&vn->sock_lock);
    for (i = 0; i < PORT_HASH_SIZE; ++i) {
- 		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist)
- 			udp_tunnel_push_rx_port(dev, vs->sock,
- 						(vs->flags & VXLAN_F_GPE) ?
- 						UDP_TUNNEL_TYPE_VXLAN_GPE :
- 						UDP_TUNNEL_TYPE_VXLAN);
+ 		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
+ 			unsigned short type;
+ 
+ 			if (vs->flags & VXLAN_F_GPE)
+ 				type = UDP_TUNNEL_TYPE_VXLAN_GPE;
+ 			else
+ 				type = UDP_TUNNEL_TYPE_VXLAN;
+ 
+ 			if (push)
+ 				udp_tunnel_push_rx_port(dev, vs->sock, type);
+ 			else
+ 				udp_tunnel_drop_rx_port(dev, vs->sock, type);
+ 		}
    }
    spin_unlock(&vn->sock_lock);
  }
@@@ -3631,10 -3638,15 +3639,15 @@@ static int vxlan_netdevice_event(struc
    struct net_device *dev = netdev_notifier_info_to_dev(ptr);
    struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
- 	if (event == NETDEV_UNREGISTER)
+ 	if (event == NETDEV_UNREGISTER) {
+ 		vxlan_offload_rx_ports(dev, false);
    	vxlan_handle_lowerdev_unregister(vn, dev);
- 	else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
- 		vxlan_push_rx_ports(dev);
+ 	} else if (event == NETDEV_REGISTER) {
+ 		vxlan_offload_rx_ports(dev, true);
+ 	} else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
+ 		   event == NETDEV_UDP_TUNNEL_DROP_INFO) {
+ 		vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
+ 	}
return NOTIFY_DONE;
  }
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index dcaef7c043ac,922cd5379841..ca3e9df1d98a
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@@ -121,8 -121,7 +121,8 @@@ int iwl_mvm_sta_send_to_fw(struct iwl_m
    	.mac_id_n_color = cpu_to_le32(mvm_sta->mac_id_n_color),
    	.add_modify = update ? 1 : 0,
    	.station_flags_msk = cpu_to_le32(STA_FLG_FAT_EN_MSK |
 -						 STA_FLG_MIMO_EN_MSK),
 +						 STA_FLG_MIMO_EN_MSK |
 +						 STA_FLG_RTS_MIMO_PROT),
    	.tid_disable_tx = cpu_to_le16(mvm_sta->tid_disable_agg),
    };
    int ret;
@@@ -297,60 -296,6 +297,6 @@@ unlock
    rcu_read_unlock();
  }
- static int iwl_mvm_tdls_sta_init(struct iwl_mvm *mvm,
- 				 struct ieee80211_sta *sta)
- {
- 	unsigned long used_hw_queues;
- 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
- 	unsigned int wdg_timeout =
- 		iwl_mvm_get_wd_timeout(mvm, NULL, true, false);
- 	u32 ac;
- 
- 	lockdep_assert_held(&mvm->mutex);
- 
- 	used_hw_queues = iwl_mvm_get_used_hw_queues(mvm, NULL);
- 
- 	/* Find available queues, and allocate them to the ACs */
- 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- 		u8 queue = find_first_zero_bit(&used_hw_queues,
- 					       mvm->first_agg_queue);
- 
- 		if (queue >= mvm->first_agg_queue) {
- 			IWL_ERR(mvm, "Failed to allocate STA queue\n");
- 			return -EBUSY;
- 		}
- 
- 		__set_bit(queue, &used_hw_queues);
- 		mvmsta->hw_queue[ac] = queue;
- 	}
- 
- 	/* Found a place for all queues - enable them */
- 	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- 		iwl_mvm_enable_ac_txq(mvm, mvmsta->hw_queue[ac],
- 				      mvmsta->hw_queue[ac],
- 				      iwl_mvm_ac_to_tx_fifo[ac], 0,
- 				      wdg_timeout);
- 		mvmsta->tfd_queue_msk |= BIT(mvmsta->hw_queue[ac]);
- 	}
- 
- 	return 0;
- }
- 
- static void iwl_mvm_tdls_sta_deinit(struct iwl_mvm *mvm,
- 				    struct ieee80211_sta *sta)
- {
- 	struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
- 	unsigned long sta_msk;
- 	int i;
- 
- 	lockdep_assert_held(&mvm->mutex);
- 
- 	/* disable the TDLS STA-specific queues */
- 	sta_msk = mvmsta->tfd_queue_msk;
- 	for_each_set_bit(i, &sta_msk, sizeof(sta_msk) * BITS_PER_BYTE)
- 		iwl_mvm_disable_txq(mvm, i, i, IWL_MAX_TID_COUNT, 0);
- }
- 
  /* Disable aggregations for a bitmap of TIDs for a given station */
  static int iwl_mvm_invalidate_sta_queue(struct iwl_mvm *mvm, int queue,
    				unsigned long disable_agg_tids,
@@@ -758,7 -703,7 +704,7 @@@ static int iwl_mvm_sta_alloc_queue(stru
  {
    struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta);
    struct iwl_trans_txq_scd_cfg cfg = {
- 		.fifo = iwl_mvm_ac_to_tx_fifo[ac],
+ 		.fifo = iwl_mvm_mac_ac_to_tx_fifo(mvm, ac),
    	.sta_id = mvmsta->sta_id,
    	.tid = tid,
    	.frame_limit = IWL_FRAME_LIMIT,
@@@ -1316,7 -1261,7 +1262,7 @@@ static void iwl_mvm_realloc_queues_afte
    		u16 seq = IEEE80211_SEQ_TO_SN(tid_data->seq_number);
cfg.tid = i;
- 			cfg.fifo = iwl_mvm_ac_to_tx_fifo[ac];
+ 			cfg.fifo = iwl_mvm_mac_ac_to_tx_fifo(mvm, ac);
    		cfg.aggregate = (txq_id >= IWL_MVM_DQA_MIN_DATA_QUEUE ||
    				 txq_id ==
    				 IWL_MVM_DQA_BSS_CLIENT_QUEUE);
@@@ -1330,8 -1275,6 +1276,6 @@@
    		mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_READY;
    	}
    }
- 
- 	atomic_set(&mvm->pending_frames[mvm_sta->sta_id], 0);
  }
int iwl_mvm_add_sta(struct iwl_mvm *mvm,
@@@ -1356,9 -1299,8 +1300,8 @@@
spin_lock_init(&mvm_sta->lock);
- 	/* In DQA mode, if this is a HW restart, re-alloc existing queues */
- 	if (iwl_mvm_is_dqa_supported(mvm) &&
- 	    test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
+ 	/* if this is a HW restart re-alloc existing queues */
+ 	if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
    	iwl_mvm_realloc_queues_after_restart(mvm, mvm_sta);
    	goto update_fw;
    }
@@@ -1376,33 -1318,15 +1319,15 @@@
    mvm_sta->sta_type = sta->tdls ? IWL_STA_TDLS_LINK : IWL_STA_LINK;
/* HW restart, don't assume the memory has been zeroed */
- 	atomic_set(&mvm->pending_frames[sta_id], 0);
    mvm_sta->tid_disable_agg = 0xffff; /* No aggs at first */
    mvm_sta->tfd_queue_msk = 0;
- 	/*
- 	 * Allocate new queues for a TDLS station, unless we're in DQA mode,
- 	 * and then they'll be allocated dynamically
- 	 */
- 	if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls) {
- 		ret = iwl_mvm_tdls_sta_init(mvm, sta);
- 		if (ret)
- 			return ret;
- 	} else if (!iwl_mvm_is_dqa_supported(mvm)) {
- 		for (i = 0; i < IEEE80211_NUM_ACS; i++)
- 			if (vif->hw_queue[i] != IEEE80211_INVAL_HW_QUEUE)
- 				mvm_sta->tfd_queue_msk |= BIT(vif->hw_queue[i]);
- 	}
- 
    /* for HW restart - reset everything but the sequence number */
    for (i = 0; i <= IWL_MAX_TID_COUNT; i++) {
    	u16 seq = mvm_sta->tid_data[i].seq_number;
    	memset(&mvm_sta->tid_data[i], 0, sizeof(mvm_sta->tid_data[i]));
    	mvm_sta->tid_data[i].seq_number = seq;
- 		if (!iwl_mvm_is_dqa_supported(mvm))
- 			continue;
- 
    	/*
    	 * Mark all queues for this STA as unallocated and defer TX
    	 * frames until the queue is allocated
@@@ -1436,7 -1360,7 +1361,7 @@@
    	mvm_sta->dup_data = dup_data;
    }
- 	if (iwl_mvm_is_dqa_supported(mvm) && !iwl_mvm_has_new_tx_api(mvm)) {
+ 	if (!iwl_mvm_has_new_tx_api(mvm)) {
    	ret = iwl_mvm_reserve_sta_stream(mvm, sta,
    					 ieee80211_vif_type_p2p(vif));
    	if (ret)
@@@ -1462,8 -1386,6 +1387,6 @@@ update_fw
    return 0;
err:
- 	if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls)
- 		iwl_mvm_tdls_sta_deinit(mvm, sta);
    return ret;
  }
@@@ -1536,79 -1458,6 +1459,6 @@@ static int iwl_mvm_rm_sta_common(struc
    return 0;
  }
- void iwl_mvm_sta_drained_wk(struct work_struct *wk)
- {
- 	struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, sta_drained_wk);
- 	u8 sta_id;
- 
- 	/*
- 	 * The mutex is needed because of the SYNC cmd, but not only: if the
- 	 * work would run concurrently with iwl_mvm_rm_sta, it would run before
- 	 * iwl_mvm_rm_sta sets the station as busy, and exit. Then
- 	 * iwl_mvm_rm_sta would set the station as busy, and nobody will clean
- 	 * that later.
- 	 */
- 	mutex_lock(&mvm->mutex);
- 
- 	for_each_set_bit(sta_id, mvm->sta_drained, IWL_MVM_STATION_COUNT) {
- 		int ret;
- 		struct ieee80211_sta *sta =
- 			rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id],
- 						  lockdep_is_held(&mvm->mutex));
- 
- 		/*
- 		 * This station is in use or RCU-removed; the latter happens in
- 		 * managed mode, where mac80211 removes the station before we
- 		 * can remove it from firmware (we can only do that after the
- 		 * MAC is marked unassociated), and possibly while the deauth
- 		 * frame to disconnect from the AP is still queued. Then, the
- 		 * station pointer is -ENOENT when the last skb is reclaimed.
- 		 */
- 		if (!IS_ERR(sta) || PTR_ERR(sta) == -ENOENT)
- 			continue;
- 
- 		if (PTR_ERR(sta) == -EINVAL) {
- 			IWL_ERR(mvm, "Drained sta %d, but it is internal?\n",
- 				sta_id);
- 			continue;
- 		}
- 
- 		if (!sta) {
- 			IWL_ERR(mvm, "Drained sta %d, but it was NULL?\n",
- 				sta_id);
- 			continue;
- 		}
- 
- 		WARN_ON(PTR_ERR(sta) != -EBUSY);
- 		/* This station was removed and we waited until it got drained,
- 		 * we can now proceed and remove it.
- 		 */
- 		ret = iwl_mvm_rm_sta_common(mvm, sta_id);
- 		if (ret) {
- 			IWL_ERR(mvm,
- 				"Couldn't remove sta %d after it was drained\n",
- 				sta_id);
- 			continue;
- 		}
- 		RCU_INIT_POINTER(mvm->fw_id_to_mac_id[sta_id], NULL);
- 		clear_bit(sta_id, mvm->sta_drained);
- 
- 		if (mvm->tfd_drained[sta_id]) {
- 			unsigned long i, msk = mvm->tfd_drained[sta_id];
- 
- 			for_each_set_bit(i, &msk, sizeof(msk) * BITS_PER_BYTE)
- 				iwl_mvm_disable_txq(mvm, i, i,
- 						    IWL_MAX_TID_COUNT, 0);
- 
- 			mvm->tfd_drained[sta_id] = 0;
- 			IWL_DEBUG_TDLS(mvm, "Drained sta %d, with queues %ld\n",
- 				       sta_id, msk);
- 		}
- 	}
- 
- 	mutex_unlock(&mvm->mutex);
- }
- 
  static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm,
    			       struct ieee80211_vif *vif,
    			       struct iwl_mvm_sta *mvm_sta)
@@@ -1632,10 -1481,11 +1482,11 @@@
  int iwl_mvm_wait_sta_queues_empty(struct iwl_mvm *mvm,
    			  struct iwl_mvm_sta *mvm_sta)
  {
- 	int i, ret;
+ 	int i;
for (i = 0; i < ARRAY_SIZE(mvm_sta->tid_data); i++) {
    	u16 txq_id;
+ 		int ret;
spin_lock_bh(&mvm_sta->lock);
    	txq_id = mvm_sta->tid_data[i].txq_id;
@@@ -1646,10 -1496,10 +1497,10 @@@
ret = iwl_trans_wait_txq_empty(mvm->trans, txq_id);
    	if (ret)
- 			break;
+ 			return ret;
    }
- 	return ret;
+ 	return 0;
  }
int iwl_mvm_rm_sta(struct iwl_mvm *mvm,
@@@ -1666,79 -1516,65 +1517,65 @@@
    if (iwl_mvm_has_new_rx_api(mvm))
    	kfree(mvm_sta->dup_data);
- 	if ((vif->type == NL80211_IFTYPE_STATION &&
- 	     mvmvif->ap_sta_id == sta_id) ||
- 	    iwl_mvm_is_dqa_supported(mvm)){
- 		ret = iwl_mvm_drain_sta(mvm, mvm_sta, true);
- 		if (ret)
- 			return ret;
- 		/* flush its queues here since we are freeing mvm_sta */
- 		ret = iwl_mvm_flush_sta(mvm, mvm_sta, false, 0);
- 		if (ret)
- 			return ret;
- 		if (iwl_mvm_has_new_tx_api(mvm)) {
- 			ret = iwl_mvm_wait_sta_queues_empty(mvm, mvm_sta);
- 		} else {
- 			u32 q_mask = mvm_sta->tfd_queue_msk;
+ 	ret = iwl_mvm_drain_sta(mvm, mvm_sta, true);
+ 	if (ret)
+ 		return ret;
- 			ret = iwl_trans_wait_tx_queues_empty(mvm->trans,
- 							     q_mask);
- 		}
- 		if (ret)
- 			return ret;
- 		ret = iwl_mvm_drain_sta(mvm, mvm_sta, false);
- 
- 		/* If DQA is supported - the queues can be disabled now */
- 		if (iwl_mvm_is_dqa_supported(mvm)) {
- 			iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta);
- 			/*
- 			 * If pending_frames is set at this point - it must be
- 			 * driver internal logic error, since queues are empty
- 			 * and removed successuly.
- 			 * warn on it but set it to 0 anyway to avoid station
- 			 * not being removed later in the function
- 			 */
- 			WARN_ON(atomic_xchg(&mvm->pending_frames[sta_id], 0));
- 		}
+ 	/* flush its queues here since we are freeing mvm_sta */
+ 	ret = iwl_mvm_flush_sta(mvm, mvm_sta, false, 0);
+ 	if (ret)
+ 		return ret;
+ 	if (iwl_mvm_has_new_tx_api(mvm)) {
+ 		ret = iwl_mvm_wait_sta_queues_empty(mvm, mvm_sta);
+ 	} else {
+ 		u32 q_mask = mvm_sta->tfd_queue_msk;
- 		/* If there is a TXQ still marked as reserved - free it */
- 		if (iwl_mvm_is_dqa_supported(mvm) &&
- 		    mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) {
- 			u8 reserved_txq = mvm_sta->reserved_queue;
- 			enum iwl_mvm_queue_status *status;
- 
- 			/*
- 			 * If no traffic has gone through the reserved TXQ - it
- 			 * is still marked as IWL_MVM_QUEUE_RESERVED, and
- 			 * should be manually marked as free again
- 			 */
- 			spin_lock_bh(&mvm->queue_info_lock);
- 			status = &mvm->queue_info[reserved_txq].status;
- 			if (WARN((*status != IWL_MVM_QUEUE_RESERVED) &&
- 				 (*status != IWL_MVM_QUEUE_FREE),
- 				 "sta_id %d reserved txq %d status %d",
- 				 sta_id, reserved_txq, *status)) {
- 				spin_unlock_bh(&mvm->queue_info_lock);
- 				return -EINVAL;
- 			}
+ 		ret = iwl_trans_wait_tx_queues_empty(mvm->trans,
+ 						     q_mask);
+ 	}
+ 	if (ret)
+ 		return ret;
+ 
+ 	ret = iwl_mvm_drain_sta(mvm, mvm_sta, false);
+ 
+ 	iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta);
+ 
+ 	/* If there is a TXQ still marked as reserved - free it */
+ 	if (mvm_sta->reserved_queue != IEEE80211_INVAL_HW_QUEUE) {
+ 		u8 reserved_txq = mvm_sta->reserved_queue;
+ 		enum iwl_mvm_queue_status *status;
- 			*status = IWL_MVM_QUEUE_FREE;
+ 		/*
+ 		 * If no traffic has gone through the reserved TXQ - it
+ 		 * is still marked as IWL_MVM_QUEUE_RESERVED, and
+ 		 * should be manually marked as free again
+ 		 */
+ 		spin_lock_bh(&mvm->queue_info_lock);
+ 		status = &mvm->queue_info[reserved_txq].status;
+ 		if (WARN((*status != IWL_MVM_QUEUE_RESERVED) &&
+ 			 (*status != IWL_MVM_QUEUE_FREE),
+ 			 "sta_id %d reserved txq %d status %d",
+ 			 sta_id, reserved_txq, *status)) {
    		spin_unlock_bh(&mvm->queue_info_lock);
+ 			return -EINVAL;
    	}
- 		if (vif->type == NL80211_IFTYPE_STATION &&
- 		    mvmvif->ap_sta_id == sta_id) {
- 			/* if associated - we can't remove the AP STA now */
- 			if (vif->bss_conf.assoc)
- 				return ret;
+ 		*status = IWL_MVM_QUEUE_FREE;
+ 		spin_unlock_bh(&mvm->queue_info_lock);
+ 	}
+ 
+ 	if (vif->type == NL80211_IFTYPE_STATION &&
+ 	    mvmvif->ap_sta_id == sta_id) {
+ 		/* if associated - we can't remove the AP STA now */
+ 		if (vif->bss_conf.assoc)
+ 			return ret;
- 			/* unassoc - go ahead - remove the AP STA now */
- 			mvmvif->ap_sta_id = IWL_MVM_INVALID_STA;
+ 		/* unassoc - go ahead - remove the AP STA now */
+ 		mvmvif->ap_sta_id = IWL_MVM_INVALID_STA;
- 			/* clear d0i3_ap_sta_id if no longer relevant */
- 			if (mvm->d0i3_ap_sta_id == sta_id)
- 				mvm->d0i3_ap_sta_id = IWL_MVM_INVALID_STA;
- 		}
+ 		/* clear d0i3_ap_sta_id if no longer relevant */
+ 		if (mvm->d0i3_ap_sta_id == sta_id)
+ 			mvm->d0i3_ap_sta_id = IWL_MVM_INVALID_STA;
    }
/*
@@@ -1755,32 -1591,10 +1592,10 @@@
     * calls the drain worker.
     */
    spin_lock_bh(&mvm_sta->lock);
+ 	spin_unlock_bh(&mvm_sta->lock);
- 	/*
- 	 * There are frames pending on the AC queues for this station.
- 	 * We need to wait until all the frames are drained...
- 	 */
- 	if (atomic_read(&mvm->pending_frames[sta_id])) {
- 		rcu_assign_pointer(mvm->fw_id_to_mac_id[sta_id],
- 				   ERR_PTR(-EBUSY));
- 		spin_unlock_bh(&mvm_sta->lock);
- 
- 		/* disable TDLS sta queues on drain complete */
- 		if (sta->tdls) {
- 			mvm->tfd_drained[sta_id] = mvm_sta->tfd_queue_msk;
- 			IWL_DEBUG_TDLS(mvm, "Draining TDLS sta %d\n", sta_id);
- 		}
- 
- 		ret = iwl_mvm_drain_sta(mvm, mvm_sta, true);
- 	} else {
- 		spin_unlock_bh(&mvm_sta->lock);
- 
- 		if (!iwl_mvm_is_dqa_supported(mvm) && sta->tdls)
- 			iwl_mvm_tdls_sta_deinit(mvm, sta);
- 
- 		ret = iwl_mvm_rm_sta_common(mvm, mvm_sta->sta_id);
- 		RCU_INIT_POINTER(mvm->fw_id_to_mac_id[mvm_sta->sta_id], NULL);
- 	}
+ 	ret = iwl_mvm_rm_sta_common(mvm, mvm_sta->sta_id);
+ 	RCU_INIT_POINTER(mvm->fw_id_to_mac_id[mvm_sta->sta_id], NULL);
return ret;
  }
@@@ -1879,7 -1693,7 +1694,7 @@@ static void iwl_mvm_enable_aux_queue(st
    					    IWL_MAX_TID_COUNT,
    					    wdg_timeout);
    	mvm->aux_queue = queue;
- 	} else if (iwl_mvm_is_dqa_supported(mvm)) {
+ 	} else {
    	struct iwl_trans_txq_scd_cfg cfg = {
    		.fifo = IWL_MVM_TX_FIFO_MCAST,
    		.sta_id = mvm->aux_sta.sta_id,
@@@ -1890,9 -1704,6 +1705,6 @@@
iwl_mvm_enable_txq(mvm, mvm->aux_queue, mvm->aux_queue, 0, &cfg,
    			   wdg_timeout);
- 	} else {
- 		iwl_mvm_enable_ac_txq(mvm, mvm->aux_queue, mvm->aux_queue,
- 				      IWL_MVM_TX_FIFO_MCAST, 0, wdg_timeout);
    }
  }
@@@ -1992,7 -1803,7 +1804,7 @@@ int iwl_mvm_send_add_bcast_sta(struct i
lockdep_assert_held(&mvm->mutex);
- 	if (iwl_mvm_is_dqa_supported(mvm) && !iwl_mvm_has_new_tx_api(mvm)) {
+ 	if (!iwl_mvm_has_new_tx_api(mvm)) {
    	if (vif->type == NL80211_IFTYPE_AP ||
    	    vif->type == NL80211_IFTYPE_ADHOC)
    		queue = mvm->probe_queue;
@@@ -2079,8 -1890,7 +1891,7 @@@ int iwl_mvm_send_rm_bcast_sta(struct iw
lockdep_assert_held(&mvm->mutex);
- 	if (iwl_mvm_is_dqa_supported(mvm))
- 		iwl_mvm_free_bcast_sta_queues(mvm, vif);
+ 	iwl_mvm_free_bcast_sta_queues(mvm, vif);
ret = iwl_mvm_rm_sta_common(mvm, mvmvif->bcast_sta.sta_id);
    if (ret)
@@@ -2091,23 -1901,10 +1902,10 @@@
  int iwl_mvm_alloc_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
  {
    struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
- 	u32 qmask = 0;
lockdep_assert_held(&mvm->mutex);
- 	if (!iwl_mvm_is_dqa_supported(mvm)) {
- 		qmask = iwl_mvm_mac_get_queues_mask(vif);
- 
- 		/*
- 		 * The firmware defines the TFD queue mask to only be relevant
- 		 * for *unicast* queues, so the multicast (CAB) queue shouldn't
- 		 * be included. This only happens in NL80211_IFTYPE_AP vif type,
- 		 * so the next line will only have an effect there.
- 		 */
- 		qmask &= ~BIT(vif->cab_queue);
- 	}
- 
- 	return iwl_mvm_allocate_int_sta(mvm, &mvmvif->bcast_sta, qmask,
+ 	return iwl_mvm_allocate_int_sta(mvm, &mvmvif->bcast_sta, 0,
    				ieee80211_vif_type_p2p(vif),
    				IWL_STA_GENERAL_PURPOSE);
  }
@@@ -2119,7 -1916,7 +1917,7 @@@
   * @mvm: the mvm component
   * @vif: the interface to which the broadcast station is added
   * @bsta: the broadcast station to add. */
- int iwl_mvm_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
+ int iwl_mvm_add_p2p_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
  {
    struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
    struct iwl_mvm_int_sta *bsta = &mvmvif->bcast_sta;
@@@ -2150,7 -1947,7 +1948,7 @@@ void iwl_mvm_dealloc_bcast_sta(struct i
   * Send the FW a request to remove the station from it's internal data
   * structures, and in addition remove it from the local data structure.
   */
- int iwl_mvm_rm_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
+ int iwl_mvm_rm_p2p_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
  {
    int ret;
@@@ -2189,9 -1986,6 +1987,6 @@@ int iwl_mvm_add_mcast_sta(struct iwl_mv
lockdep_assert_held(&mvm->mutex);
- 	if (!iwl_mvm_is_dqa_supported(mvm))
- 		return 0;
- 
    if (WARN_ON(vif->type != NL80211_IFTYPE_AP &&
    	    vif->type != NL80211_IFTYPE_ADHOC))
    	return -ENOTSUPP;
@@@ -2256,9 -2050,6 +2051,6 @@@ int iwl_mvm_rm_mcast_sta(struct iwl_mv
lockdep_assert_held(&mvm->mutex);
- 	if (!iwl_mvm_is_dqa_supported(mvm))
- 		return 0;
- 
    iwl_mvm_flush_sta(mvm, &mvmvif->mcast_sta, true, 0);
iwl_mvm_disable_txq(mvm, mvmvif->cab_queue, vif->cab_queue,
@@@ -2508,8 -2299,6 +2300,6 @@@ int iwl_mvm_sta_tx_agg(struct iwl_mvm *
    	mvm_sta->tid_disable_agg &= ~BIT(tid);
    } else {
    	/* In DQA-mode the queue isn't removed on agg termination */
- 		if (!iwl_mvm_is_dqa_supported(mvm))
- 			mvm_sta->tfd_queue_msk &= ~BIT(queue);
    	mvm_sta->tid_disable_agg |= BIT(tid);
    }
@@@ -2612,19 -2401,17 +2402,17 @@@ int iwl_mvm_sta_tx_agg_start(struct iwl
    		ret = -ENXIO;
    		goto release_locks;
    	}
- 	} else if (iwl_mvm_is_dqa_supported(mvm) &&
- 		   unlikely(mvm->queue_info[txq_id].status ==
+ 	} else if (unlikely(mvm->queue_info[txq_id].status ==
    		    IWL_MVM_QUEUE_SHARED)) {
    	ret = -ENXIO;
    	IWL_DEBUG_TX_QUEUES(mvm,
    			    "Can't start tid %d agg on shared queue!\n",
    			    tid);
    	goto release_locks;
- 	} else if (!iwl_mvm_is_dqa_supported(mvm) ||
- 	    mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) {
+ 	} else if (mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) {
    	txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id,
- 						 mvm->first_agg_queue,
- 						 mvm->last_agg_queue);
+ 						 IWL_MVM_DQA_MIN_DATA_QUEUE,
+ 						 IWL_MVM_DQA_MAX_DATA_QUEUE);
    	if (txq_id < 0) {
    		ret = txq_id;
    		IWL_ERR(mvm, "Failed to allocate agg queue\n");
@@@ -2742,37 -2529,34 +2530,34 @@@ int iwl_mvm_sta_tx_agg_oper(struct iwl_
    queue_status = mvm->queue_info[queue].status;
    spin_unlock_bh(&mvm->queue_info_lock);
- 	/* In DQA mode, the existing queue might need to be reconfigured */
- 	if (iwl_mvm_is_dqa_supported(mvm)) {
- 		/* Maybe there is no need to even alloc a queue... */
- 		if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_READY)
- 			alloc_queue = false;
+ 	/* Maybe there is no need to even alloc a queue... */
+ 	if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_READY)
+ 		alloc_queue = false;
+ 	/*
+ 	 * Only reconfig the SCD for the queue if the window size has
+ 	 * changed from current (become smaller)
+ 	 */
+ 	if (!alloc_queue && buf_size < mvmsta->max_agg_bufsize) {
    	/*
- 		 * Only reconfig the SCD for the queue if the window size has
- 		 * changed from current (become smaller)
+ 		 * If reconfiguring an existing queue, it first must be
+ 		 * drained
    	 */
- 		if (!alloc_queue && buf_size < mvmsta->max_agg_bufsize) {
- 			/*
- 			 * If reconfiguring an existing queue, it first must be
- 			 * drained
- 			 */
- 			ret = iwl_trans_wait_tx_queues_empty(mvm->trans,
- 							     BIT(queue));
- 			if (ret) {
- 				IWL_ERR(mvm,
- 					"Error draining queue before reconfig\n");
- 				return ret;
- 			}
+ 		ret = iwl_trans_wait_tx_queues_empty(mvm->trans,
+ 						     BIT(queue));
+ 		if (ret) {
+ 			IWL_ERR(mvm,
+ 				"Error draining queue before reconfig\n");
+ 			return ret;
+ 		}
- 			ret = iwl_mvm_reconfig_scd(mvm, queue, cfg.fifo,
- 						   mvmsta->sta_id, tid,
- 						   buf_size, ssn);
- 			if (ret) {
- 				IWL_ERR(mvm,
- 					"Error reconfiguring TXQ #%d\n", queue);
- 				return ret;
- 			}
+ 		ret = iwl_mvm_reconfig_scd(mvm, queue, cfg.fifo,
+ 					   mvmsta->sta_id, tid,
+ 					   buf_size, ssn);
+ 		if (ret) {
+ 			IWL_ERR(mvm,
+ 				"Error reconfiguring TXQ #%d\n", queue);
+ 			return ret;
    	}
    }
@@@ -2868,18 -2652,6 +2653,6 @@@ int iwl_mvm_sta_tx_agg_stop(struct iwl_
    			    "ssn = %d, next_recl = %d\n",
    			    tid_data->ssn, tid_data->next_reclaimed);
- 		/*
- 		 * There are still packets for this RA / TID in the HW.
- 		 * Not relevant for DQA mode, since there is no need to disable
- 		 * the queue.
- 		 */
- 		if (!iwl_mvm_is_dqa_supported(mvm) &&
- 		    tid_data->ssn != tid_data->next_reclaimed) {
- 			tid_data->state = IWL_EMPTYING_HW_QUEUE_DELBA;
- 			err = 0;
- 			break;
- 		}
- 
    	tid_data->ssn = 0xffff;
    	tid_data->state = IWL_AGG_OFF;
    	spin_unlock_bh(&mvmsta->lock);
@@@ -2887,12 -2659,6 +2660,6 @@@
    	ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
iwl_mvm_sta_tx_agg(mvm, sta, tid, txq_id, false);
- 
- 		if (!iwl_mvm_is_dqa_supported(mvm)) {
- 			int mac_queue = vif->hw_queue[tid_to_mac80211_ac[tid]];
- 
- 			iwl_mvm_disable_txq(mvm, txq_id, mac_queue, tid, 0);
- 		}
    	return 0;
    case IWL_AGG_STARTING:
    case IWL_EMPTYING_HW_QUEUE_ADDBA:
@@@ -2962,13 -2728,6 +2729,6 @@@ int iwl_mvm_sta_tx_agg_flush(struct iwl
    	iwl_mvm_drain_sta(mvm, mvmsta, false);
iwl_mvm_sta_tx_agg(mvm, sta, tid, txq_id, false);
- 
- 		if (!iwl_mvm_is_dqa_supported(mvm)) {
- 			int mac_queue = vif->hw_queue[tid_to_mac80211_ac[tid]];
- 
- 			iwl_mvm_disable_txq(mvm, tid_data->txq_id, mac_queue,
- 					    tid, 0);
- 		}
    }
return 0;
@@@ -3587,15 -3346,6 +3347,6 @@@ void iwl_mvm_sta_modify_sleep_tx_count(
    		u16 n_queued;
tid_data = &mvmsta->tid_data[tid];
- 			if (WARN(!iwl_mvm_is_dqa_supported(mvm) &&
- 				 tid_data->state != IWL_AGG_ON &&
- 				 tid_data->state != IWL_EMPTYING_HW_QUEUE_DELBA,
- 				 "TID %d state is %d\n",
- 				 tid, tid_data->state)) {
- 				spin_unlock_bh(&mvmsta->lock);
- 				ieee80211_sta_eosp(sta);
- 				return;
- 			}
n_queued = iwl_mvm_tid_queued(mvm, tid_data);
    		if (n_queued > remaining) {
@@@ -3689,13 -3439,8 +3440,8 @@@ void iwl_mvm_sta_modify_disable_tx_ap(s
mvm_sta->disable_tx = disable;
- 	/*
- 	 * Tell mac80211 to start/stop queuing tx for this station,
- 	 * but don't stop queuing if there are still pending frames
- 	 * for this station.
- 	 */
- 	if (disable || !atomic_read(&mvm->pending_frames[mvm_sta->sta_id]))
- 		ieee80211_sta_block_awake(mvm->hw, sta, disable);
+ 	/* Tell mac80211 to start/stop queuing tx for this station */
+ 	ieee80211_sta_block_awake(mvm->hw, sta, disable);
iwl_mvm_sta_modify_disable_tx(mvm, mvm_sta, disable);
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index 5fcc9dd6be56,6d7d1a66af81..321e47874ceb
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@@ -74,7 -74,6 +74,6 @@@
  #include "iwl-eeprom-parse.h"
  #include "mvm.h"
  #include "sta.h"
- #include "fw-dbg.h"
static void
  iwl_mvm_bar_check_trigger(struct iwl_mvm *mvm, const u8 *addr,
@@@ -89,15 -88,15 +88,15 @@@
    trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_BA);
    ba_trig = (void *)trig->data;
- 	if (!iwl_fw_dbg_trigger_check_stop(mvm, NULL, trig))
+ 	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt, NULL, trig))
    	return;
if (!(le16_to_cpu(ba_trig->tx_bar) & BIT(tid)))
    	return;
- 	iwl_mvm_fw_dbg_collect_trig(mvm, trig,
- 				    "BAR sent to %pM, tid %d, ssn %d",
- 				    addr, tid, ssn);
+ 	iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+ 				"BAR sent to %pM, tid %d, ssn %d",
+ 				addr, tid, ssn);
  }
#define OPT_HDR(type, skb, off) \
@@@ -185,14 -184,8 +184,14 @@@ static u16 iwl_mvm_tx_csum(struct iwl_m
    else
    	udp_hdr(skb)->check = 0;
-	/* mac header len should include IV, size is in words */
 -	if (info->control.hw_key)
 +	/*
 +	 * mac header len should include IV, size is in words unless
 +	 * the IV is added by the firmware like in WEP.
 +	 * In new Tx API, the IV is always added by the firmware.
 +	 */
 +	if (!iwl_mvm_has_new_tx_api(mvm) && info->control.hw_key &&
 +	    info->control.hw_key->cipher != WLAN_CIPHER_SUITE_WEP40 &&
 +	    info->control.hw_key->cipher != WLAN_CIPHER_SUITE_WEP104)
    	mh_len += info->control.hw_key->iv_len;
    mh_len /= 2;
    offload_assist |= mh_len << TX_CMD_OFFLD_MH_SIZE;
@@@ -559,9 -552,6 +558,6 @@@ static int iwl_mvm_get_ctrl_vif_queue(s
  {
    struct iwl_mvm_vif *mvmvif;
- 	if (!iwl_mvm_is_dqa_supported(mvm))
- 		return info->hw_queue;
- 
    mvmvif = iwl_mvm_vif_from_mac80211(info->control.vif);
switch (info->control.vif->type) {
@@@ -660,8 -650,7 +656,7 @@@ int iwl_mvm_tx_skb_non_sta(struct iwl_m
if (ap_sta_id != IWL_MVM_INVALID_STA)
    			sta_id = ap_sta_id;
- 		} else if (iwl_mvm_is_dqa_supported(mvm) &&
- 			   info.control.vif->type == NL80211_IFTYPE_MONITOR) {
+ 		} else if (info.control.vif->type == NL80211_IFTYPE_MONITOR) {
    		queue = mvm->aux_queue;
    	}
    }
@@@ -680,17 -669,6 +675,6 @@@
    	return -1;
    }
- 	/*
- 	 * Increase the pending frames counter, so that later when a reply comes
- 	 * in and the counter is decreased - we don't start getting negative
- 	 * values.
- 	 * Note that we don't need to make sure it isn't agg'd, since we're
- 	 * TXing non-sta
- 	 * For DQA mode - we shouldn't increase it though
- 	 */
- 	if (!iwl_mvm_is_dqa_supported(mvm))
- 		atomic_inc(&mvm->pending_frames[sta_id]);
- 
    return 0;
  }
@@@ -758,7 -736,7 +742,7 @@@ static int iwl_mvm_tx_tso(struct iwl_mv
    max_amsdu_len = sta->max_amsdu_len;
/* the Tx FIFO to which this A-MSDU will be routed */
- 	txf = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]];
+ 	txf = iwl_mvm_mac_ac_to_tx_fifo(mvm, tid_to_mac80211_ac[tid]);
/*
     * Don't send an AMSDU that will be longer than the TXF.
@@@ -767,7 -745,8 +751,8 @@@
     * fifo to be able to send bursts.
     */
    max_amsdu_len = min_t(unsigned int, max_amsdu_len,
- 			      mvm->smem_cfg.lmac[0].txfifo_size[txf] - 256);
+ 			      mvm->fwrt.smem_cfg.lmac[0].txfifo_size[txf] -
+ 			      256);
if (unlikely(dbg_max_amsdu_len))
    	max_amsdu_len = min_t(unsigned int, max_amsdu_len,
@@@ -1000,22 -979,13 +985,13 @@@ static int iwl_mvm_tx_mpdu(struct iwl_m
    	}
    }
- 	if (iwl_mvm_is_dqa_supported(mvm) || is_ampdu)
- 		txq_id = mvmsta->tid_data[tid].txq_id;
- 
- 	if (sta->tdls && !iwl_mvm_is_dqa_supported(mvm)) {
- 		/* default to TID 0 for non-QoS packets */
- 		u8 tdls_tid = tid == IWL_MAX_TID_COUNT ? 0 : tid;
- 
- 		txq_id = mvmsta->hw_queue[tid_to_mac80211_ac[tdls_tid]];
- 	}
+ 	txq_id = mvmsta->tid_data[tid].txq_id;
WARN_ON_ONCE(info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM);
/* Check if TXQ needs to be allocated or re-activated */
    if (unlikely(txq_id == IWL_MVM_INVALID_QUEUE ||
- 		     !mvmsta->tid_data[tid].is_tid_active) &&
- 	    iwl_mvm_is_dqa_supported(mvm)) {
+ 		     !mvmsta->tid_data[tid].is_tid_active)) {
    	/* If TXQ needs to be allocated... */
    	if (txq_id == IWL_MVM_INVALID_QUEUE) {
    		iwl_mvm_tx_add_stream(mvm, mvmsta, tid, skb);
@@@ -1042,7 -1012,7 +1018,7 @@@
    			    txq_id);
    }
- 	if (iwl_mvm_is_dqa_supported(mvm) && !iwl_mvm_has_new_tx_api(mvm)) {
+ 	if (!iwl_mvm_has_new_tx_api(mvm)) {
    	/* Keep track of the time of the last frame for this RA/TID */
    	mvm->queue_info[txq_id].last_frame_time[tid] = jiffies;
@@@ -1076,10 -1046,6 +1052,6 @@@
spin_unlock(&mvmsta->lock);
- 	/* Increase pending frames count if this isn't AMPDU or DQA queue */
- 	if (!iwl_mvm_is_dqa_supported(mvm) && !is_ampdu)
- 		atomic_inc(&mvm->pending_frames[mvmsta->sta_id]);
- 
    return 0;
drop_unlock_sta:
@@@ -1148,8 -1114,7 +1120,7 @@@ static void iwl_mvm_check_ratid_empty(s
    lockdep_assert_held(&mvmsta->lock);
if ((tid_data->state == IWL_AGG_ON ||
- 	     tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA ||
- 	     iwl_mvm_is_dqa_supported(mvm)) &&
+ 	     tid_data->state == IWL_EMPTYING_HW_QUEUE_DELBA) &&
        iwl_mvm_tid_queued(mvm, tid_data) == 0) {
    	/*
    	 * Now that this aggregation or DQA queue is empty tell
@@@ -1183,13 -1148,6 +1154,6 @@@
    	IWL_DEBUG_TX_QUEUES(mvm,
    			    "Can continue DELBA flow ssn = next_recl = %d\n",
    			    tid_data->next_reclaimed);
- 		if (!iwl_mvm_is_dqa_supported(mvm)) {
- 			u8 mac80211_ac = tid_to_mac80211_ac[tid];
- 
- 			iwl_mvm_disable_txq(mvm, tid_data->txq_id,
- 					    vif->hw_queue[mac80211_ac], tid,
- 					    CMD_ASYNC);
- 		}
    	tid_data->state = IWL_AGG_OFF;
    	ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
    	break;
@@@ -1301,7 -1259,7 +1265,7 @@@ static void iwl_mvm_tx_status_check_tri
    trig = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_TX_STATUS);
    status_trig = (void *)trig->data;
- 	if (!iwl_fw_dbg_trigger_check_stop(mvm, NULL, trig))
+ 	if (!iwl_fw_dbg_trigger_check_stop(&mvm->fwrt, NULL, trig))
    	return;
for (i = 0; i < ARRAY_SIZE(status_trig->statuses); i++) {
@@@ -1312,9 -1270,9 +1276,9 @@@
    	if (status_trig->statuses[i].status != (status & TX_STATUS_MSK))
    		continue;
- 		iwl_mvm_fw_dbg_collect_trig(mvm, trig,
- 					    "Tx status %d was received",
- 					    status & TX_STATUS_MSK);
+ 		iwl_fw_dbg_collect_trig(&mvm->fwrt, trig,
+ 					"Tx status %d was received",
+ 					status & TX_STATUS_MSK);
    	break;
    }
  }
@@@ -1387,10 -1345,10 +1351,10 @@@ static void iwl_mvm_rx_tx_cmd_single(st
    		info->flags |= IEEE80211_TX_STAT_ACK;
    		break;
    	case TX_STATUS_FAIL_DEST_PS:
- 			/* In DQA, the FW should have stopped the queue and not
+ 			/* the FW should have stopped the queue and not
    		 * return this status
    		 */
- 			WARN_ON(iwl_mvm_is_dqa_supported(mvm));
+ 			WARN_ON(1);
    		info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
    		break;
    	default:
@@@ -1446,26 -1404,21 +1410,21 @@@
    	ieee80211_tx_status(mvm->hw, skb);
    }
- 	if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue) {
- 		/* If this is an aggregation queue, we use the ssn since:
- 		 * ssn = wifi seq_num % 256.
- 		 * The seq_ctl is the sequence control of the packet to which
- 		 * this Tx response relates. But if there is a hole in the
- 		 * bitmap of the BA we received, this Tx response may allow to
- 		 * reclaim the hole and all the subsequent packets that were
- 		 * already acked. In that case, seq_ctl != ssn, and the next
- 		 * packet to be reclaimed will be ssn and not seq_ctl. In that
- 		 * case, several packets will be reclaimed even if
- 		 * frame_count = 1.
- 		 *
- 		 * The ssn is the index (% 256) of the latest packet that has
- 		 * treated (acked / dropped) + 1.
- 		 */
- 		next_reclaimed = ssn;
- 	} else {
- 		/* The next packet to be reclaimed is the one after this one */
- 		next_reclaimed = IEEE80211_SEQ_TO_SN(seq_ctl + 0x10);
- 	}
+ 	/* This is an aggregation queue or might become one, so we use
+ 	 * the ssn since: ssn = wifi seq_num % 256.
+ 	 * The seq_ctl is the sequence control of the packet to which
+ 	 * this Tx response relates. But if there is a hole in the
+ 	 * bitmap of the BA we received, this Tx response may allow to
+ 	 * reclaim the hole and all the subsequent packets that were
+ 	 * already acked. In that case, seq_ctl != ssn, and the next
+ 	 * packet to be reclaimed will be ssn and not seq_ctl. In that
+ 	 * case, several packets will be reclaimed even if
+ 	 * frame_count = 1.
+ 	 *
+ 	 * The ssn is the index (% 256) of the latest packet that has
+ 	 * treated (acked / dropped) + 1.
+ 	 */
+ 	next_reclaimed = ssn;
IWL_DEBUG_TX_REPLY(mvm,
    		   "TXQ %d status %s (0x%08x)\n",
@@@ -1548,49 -1501,6 +1507,6 @@@
    	mvmsta = NULL;
    }
- 	/*
- 	 * If the txq is not an AMPDU queue, there is no chance we freed
- 	 * several skbs. Check that out...
- 	 */
- 	if (iwl_mvm_is_dqa_supported(mvm) || txq_id >= mvm->first_agg_queue)
- 		goto out;
- 
- 	/* We can't free more than one frame at once on a shared queue */
- 	WARN_ON(skb_freed > 1);
- 
- 	/* If we have still frames for this STA nothing to do here */
- 	if (!atomic_sub_and_test(skb_freed, &mvm->pending_frames[sta_id]))
- 		goto out;
- 
- 	if (mvmsta && mvmsta->vif->type == NL80211_IFTYPE_AP) {
- 
- 		/*
- 		 * If there are no pending frames for this STA and
- 		 * the tx to this station is not disabled, notify
- 		 * mac80211 that this station can now wake up in its
- 		 * STA table.
- 		 * If mvmsta is not NULL, sta is valid.
- 		 */
- 
- 		spin_lock_bh(&mvmsta->lock);
- 
- 		if (!mvmsta->disable_tx)
- 			ieee80211_sta_block_awake(mvm->hw, sta, false);
- 
- 		spin_unlock_bh(&mvmsta->lock);
- 	}
- 
- 	if (PTR_ERR(sta) == -EBUSY || PTR_ERR(sta) == -ENOENT) {
- 		/*
- 		 * We are draining and this was the last packet - pre_rcu_remove
- 		 * has been called already. We might be after the
- 		 * synchronize_net already.
- 		 * Don't rely on iwl_mvm_rm_sta to see the empty Tx queues.
- 		 */
- 		set_bit(sta_id, mvm->sta_drained);
- 		schedule_work(&mvm->sta_drained_wk);
- 	}
- 
  out:
    rcu_read_unlock();
  }
@@@ -1654,9 -1564,8 +1570,8 @@@ static void iwl_mvm_rx_tx_cmd_agg(struc
    struct iwl_mvm_sta *mvmsta;
    int queue = SEQ_TO_QUEUE(sequence);
- 	if (WARN_ON_ONCE(queue < mvm->first_agg_queue &&
- 			 (!iwl_mvm_is_dqa_supported(mvm) ||
- 			  (queue != IWL_MVM_DQA_BSS_CLIENT_QUEUE))))
+ 	if (WARN_ON_ONCE(queue < IWL_MVM_DQA_MIN_DATA_QUEUE &&
+ 			 (queue != IWL_MVM_DQA_BSS_CLIENT_QUEUE)))
    	return;
if (WARN_ON_ONCE(tid == IWL_TID_NON_QOS))
@@@ -1821,8 -1730,6 +1736,8 @@@ void iwl_mvm_rx_ba_notif(struct iwl_mv
    struct iwl_mvm_tid_data *tid_data;
    struct iwl_mvm_sta *mvmsta;
+	ba_info.flags = IEEE80211_TX_STAT_AMPDU;
 +
    if (iwl_mvm_has_new_tx_api(mvm)) {
    	struct iwl_mvm_compressed_ba_notif *ba_res =
    		(void *)pkt->data;
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 84f4ba01e14f,87712aeac31f..2126b9adbb08
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@@ -510,17 -510,9 +510,17 @@@ static const struct pci_device_id iwl_h
/* 9000 Series */
    {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x271B, 0x0014, iwl9160_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x271B, 0x0210, iwl9160_2ac_cfg)},
    {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)},
    {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x0014, iwl9260_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0xA014, iwl9260_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x4010, iwl9260_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x0210, iwl9260_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x0214, iwl9260_2ac_cfg)},
    {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x1610, iwl9270_2ac_cfg)},
    {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)},
    {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)},
    {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)},
@@@ -535,22 -527,10 +535,22 @@@
    {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)},
    {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)},
    {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x0260, iwl9460_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x0064, iwl9460_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x00A4, iwl9460_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x40A4, iwl9460_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x02A4, iwl9460_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x00A0, iwl9460_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x02A0, iwl9460_2ac_cfg)},
    {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)},
    {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)},
    {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)},
    {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x4030, iwl9560_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x0230, iwl9560_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x0234, iwl9560_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x0238, iwl9560_2ac_cfg)},
 +	{IWL_PCI_DEVICE(0x2526, 0x023C, iwl9560_2ac_cfg)},
    {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)},
    {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)},
    {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)},
@@@ -825,11 -805,11 +825,11 @@@ static int iwl_pci_resume(struct devic
    /*
     * Enable rfkill interrupt (in order to keep track of the rfkill
     * status). Must be locked to avoid processing a possible rfkill
- 	 * interrupt while in iwl_trans_check_hw_rf_kill().
+ 	 * interrupt while in iwl_pcie_check_hw_rf_kill().
     */
    mutex_lock(&trans_pcie->mutex);
    iwl_enable_rfkill_int(trans);
- 	iwl_trans_check_hw_rf_kill(trans);
+ 	iwl_pcie_check_hw_rf_kill(trans);
    mutex_unlock(&trans_pcie->mutex);
return 0;
diff --combined include/linux/syscalls.h
index 138c94535864,c9170218e9e6..00b4cbf8559c
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@@ -100,12 -100,11 +100,12 @@@ union bpf_attr
  #define __MAP(n,...) __MAP##n(__VA_ARGS__)
#define __SC_DECL(t, a)	t a
 -#define __TYPE_IS_L(t)	(__same_type((t)0, 0L))
 -#define __TYPE_IS_UL(t)	(__same_type((t)0, 0UL))
 -#define __TYPE_IS_LL(t) (__same_type((t)0, 0LL) || __same_type((t)0, 0ULL))
 +#define __TYPE_AS(t, v)	__same_type((__force t)0, v)
 +#define __TYPE_IS_L(t)	(__TYPE_AS(t, 0L))
 +#define __TYPE_IS_UL(t)	(__TYPE_AS(t, 0UL))
 +#define __TYPE_IS_LL(t) (__TYPE_AS(t, 0LL) || __TYPE_AS(t, 0ULL))
  #define __SC_LONG(t, a) __typeof(__builtin_choose_expr(__TYPE_IS_LL(t), 0LL, 0L)) a
 -#define __SC_CAST(t, a)	(t) a
 +#define __SC_CAST(t, a)	(__force t) a
  #define __SC_ARGS(t, a)	a
  #define __SC_TEST(t, a) (void)BUILD_BUG_ON_ZERO(!__TYPE_IS_LL(t) && sizeof(t) > sizeof(long))
@@@ -173,8 -172,20 +173,20 @@@ extern struct trace_event_functions exi
    static struct syscall_metadata __used			\
      __attribute__((section("__syscalls_metadata")))	\
     *__p_syscall_meta_##sname = &__syscall_meta_##sname;
+ 
+ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
+ {
+ 	return tp_event->class == &event_class_syscall_enter ||
+ 	       tp_event->class == &event_class_syscall_exit;
+ }
+ 
  #else
  #define SYSCALL_METADATA(sname, nb, ...)
+ 
+ static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
+ {
+ 	return 0;
+ }
  #endif
#define SYSCALL_DEFINE0(sname)					\
@@@ -579,12 -590,12 +591,12 @@@ asmlinkage long sys_preadv(unsigned lon
    		   unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
  asmlinkage long sys_preadv2(unsigned long fd, const struct iovec __user *vec,
    		    unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
 -			    int flags);
 +			    rwf_t flags);
  asmlinkage long sys_pwritev(unsigned long fd, const struct iovec __user *vec,
    		    unsigned long vlen, unsigned long pos_l, unsigned long pos_h);
  asmlinkage long sys_pwritev2(unsigned long fd, const struct iovec __user *vec,
    		    unsigned long vlen, unsigned long pos_l, unsigned long pos_h,
 -			    int flags);
 +			    rwf_t flags);
  asmlinkage long sys_getcwd(char __user *buf, unsigned long size);
  asmlinkage long sys_mkdir(const char __user *pathname, umode_t mode);
  asmlinkage long sys_chdir(const char __user *filename);
diff --combined include/net/tcp.h
index ada65e767b28,999f3efe572b..afdab3781425
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@@ -139,6 -139,7 +139,7 @@@ void tcp_time_wait(struct sock *sk, in
  #endif
  #define TCP_RTO_MAX	((unsigned)(120*HZ))
  #define TCP_RTO_MIN	((unsigned)(HZ/5))
+ #define TCP_TIMEOUT_MIN	(2U) /* Min timeout for TCP timers in jiffies */
  #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/
  #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now
    					 * used as a fallback RTO for the
@@@ -150,8 -151,6 +151,6 @@@
  #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
    				                 * for local resources.
    				                 */
- #define TCP_REO_TIMEOUT_MIN	(2000) /* Min RACK reordering timeout in usec */
- 
  #define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
  #define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
  #define TCP_KEEPALIVE_INTVL	(75*HZ)
@@@ -257,7 -256,6 +256,6 @@@ extern int sysctl_tcp_rmem[3]
  extern int sysctl_tcp_app_win;
  extern int sysctl_tcp_adv_win_scale;
  extern int sysctl_tcp_frto;
- extern int sysctl_tcp_low_latency;
  extern int sysctl_tcp_nometrics_save;
  extern int sysctl_tcp_moderate_rcvbuf;
  extern int sysctl_tcp_tso_win_divisor;
@@@ -352,8 -350,11 +350,11 @@@ int tcp_v4_rcv(struct sk_buff *skb)
int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
  int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
+ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
  int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
    	 int flags);
+ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
+ 			size_t size, int flags);
  ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
    	 size_t size, int flags);
  void tcp_release_cb(struct sock *sk);
@@@ -363,7 -364,7 +364,7 @@@ void tcp_delack_timer_handler(struct so
  int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- 			 const struct tcphdr *th, unsigned int len);
+ 			 const struct tcphdr *th);
  void tcp_rcv_space_adjust(struct sock *sk);
  int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
  void tcp_twsk_destructor(struct sock *sk);
@@@ -633,29 -634,6 +634,6 @@@ static inline u32 __tcp_set_rto(const s
    return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
  }
- static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
- {
- 	tp->pred_flags = htonl((tp->tcp_header_len << 26) |
- 			       ntohl(TCP_FLAG_ACK) |
- 			       snd_wnd);
- }
- 
- static inline void tcp_fast_path_on(struct tcp_sock *tp)
- {
- 	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
- }
- 
- static inline void tcp_fast_path_check(struct sock *sk)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
- 
- 	if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
- 	    tp->rcv_wnd &&
- 	    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
- 	    !tp->urg_data)
- 		tcp_fast_path_on(tp);
- }
- 
  /* Compute the actual rto_min value */
  static inline u32 tcp_rto_min(struct sock *sk)
  {
@@@ -849,6 -827,16 +827,16 @@@ static inline int tcp_v6_iif(const stru
return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
  }
+ 
+ /* TCP_SKB_CB reference means this can not be used from early demux */
+ static inline int tcp_v6_sdif(const struct sk_buff *skb)
+ {
+ #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ 	if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
+ 		return TCP_SKB_CB(skb)->header.h6.iif;
+ #endif
+ 	return 0;
+ }
  #endif
/* TCP_SKB_CB reference means this can not be used from early demux */
@@@ -862,6 -850,16 +850,16 @@@ static inline bool inet_exact_dif_match
    return false;
  }
+ /* TCP_SKB_CB reference means this can not be used from early demux */
+ static inline int tcp_v4_sdif(struct sk_buff *skb)
+ {
+ #if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ 	if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
+ 		return TCP_SKB_CB(skb)->header.h4.iif;
+ #endif
+ 	return 0;
+ }
+ 
  /* Due to TSO, an SKB can be composed of multiple actual
   * packets.  To keep these tracked properly, we use this.
   */
@@@ -905,9 -903,8 +903,8 @@@ enum tcp_ca_event
/* Information about inbound ACK, passed to cong_ops->in_ack_event() */
  enum tcp_ca_ack_event_flags {
- 	CA_ACK_SLOWPATH		= (1 << 0),	/* In slow path processing */
- 	CA_ACK_WIN_UPDATE	= (1 << 1),	/* ACK updated window */
- 	CA_ACK_ECE		= (1 << 2),	/* ECE bit is set on ack */
+ 	CA_ACK_WIN_UPDATE	= (1 << 0),	/* ACK updated window */
+ 	CA_ACK_ECE		= (1 << 1),	/* ECE bit is set on ack */
  };
/*
@@@ -1245,17 -1242,6 +1242,6 @@@ static inline bool tcp_checksum_complet
    	__tcp_checksum_complete(skb);
  }
- /* Prequeue for VJ style copy to user, combined with checksumming. */
- 
- static inline void tcp_prequeue_init(struct tcp_sock *tp)
- {
- 	tp->ucopy.task = NULL;
- 	tp->ucopy.len = 0;
- 	tp->ucopy.memory = 0;
- 	skb_queue_head_init(&tp->ucopy.prequeue);
- }
- 
- bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
  int tcp_filter(struct sock *sk, struct sk_buff *skb);
@@@ -1916,20 -1902,11 +1902,21 @@@ extern void tcp_rack_advance(struct tcp
    		     u64 xmit_time);
  extern void tcp_rack_reo_timeout(struct sock *sk);
+/* At how many usecs into the future should the RTO fire? */
 +static inline s64 tcp_rto_delta_us(const struct sock *sk)
 +{
 +	const struct sk_buff *skb = tcp_write_queue_head(sk);
 +	u32 rto = inet_csk(sk)->icsk_rto;
 +	u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
 +
 +	return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
 +}
 +
  /*
   * Save and compile IPv4 options, return a pointer to it
   */
- static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
+ static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
+ 							 struct sk_buff *skb)
  {
    const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
    struct ip_options_rcu *dopt = NULL;
@@@ -1938,7 -1915,7 +1925,7 @@@
    	int opt_size = sizeof(*dopt) + opt->optlen;
dopt = kmalloc(opt_size, GFP_ATOMIC);
- 		if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
+ 		if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
    		kfree(dopt);
    		dopt = NULL;
    	}
diff --combined net/ipv4/tcp_input.c
index 53de1424c13c,842ed75ccb25..d73903fe8c83
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -103,11 -103,9 +103,10 @@@ int sysctl_tcp_invalid_ratelimit __read
  #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
  #define FLAG_ECE		0x40 /* ECE in this ACK				*/
  #define FLAG_LOST_RETRANS	0x80 /* This ACK marks some retransmission lost */
- #define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
  #define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
  #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
  #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
 +#define FLAG_SET_XMIT_TIMER	0x1000 /* Set TLP or RTO timer */
  #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
  #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
  #define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
@@@ -1952,6 -1950,7 +1951,7 @@@ void tcp_enter_loss(struct sock *sk
        !after(tp->high_seq, tp->snd_una) ||
        (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
    	tp->prior_ssthresh = tcp_current_ssthresh(sk);
+ 		tp->prior_cwnd = tp->snd_cwnd;
    	tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
    	tcp_ca_event(sk, CA_EVENT_LOSS);
    	tcp_init_undo(tp);
@@@ -2521,8 -2520,8 +2521,8 @@@ static inline void tcp_end_cwnd_reducti
    	return;
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
 -	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
 -	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
 +	if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
 +	    (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
    	tp->snd_cwnd = tp->snd_ssthresh;
    	tp->snd_cwnd_stamp = tcp_jiffies32;
    }
@@@ -3005,7 -3004,10 +3005,7 @@@ void tcp_rearm_rto(struct sock *sk
    	/* Offset the time elapsed after installing regular RTO */
    	if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
    	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 -			struct sk_buff *skb = tcp_write_queue_head(sk);
 -			u64 rto_time_stamp = skb->skb_mstamp +
 -					     jiffies_to_usecs(rto);
 -			s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
 +			s64 delta_us = tcp_rto_delta_us(sk);
    		/* delta_us may not be positive if the socket is locked
    		 * when the retrans timer fires and is rescheduled.
    		 */
@@@ -3017,13 -3019,6 +3017,13 @@@
    }
  }
+/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
 +static void tcp_set_xmit_timer(struct sock *sk)
 +{
 +	if (!tcp_schedule_loss_probe(sk))
 +		tcp_rearm_rto(sk);
 +}
 +
  /* If we get here, the whole TSO packet has not been acked. */
  static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  {
@@@ -3185,7 -3180,7 +3185,7 @@@ static int tcp_clean_rtx_queue(struct s
    				ca_rtt_us, sack->rate);
if (flag & FLAG_ACKED) {
 -		tcp_rearm_rto(sk);
 +		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
    	if (unlikely(icsk->icsk_mtup.probe_size &&
    		     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
    		tcp_mtup_probe_success(sk);
@@@ -3213,7 -3208,7 +3213,7 @@@
    	 * after when the head was last (re)transmitted. Otherwise the
    	 * timeout may continue to extend in loss recovery.
    	 */
 -		tcp_rearm_rto(sk);
 +		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
    }
if (icsk->icsk_ca_ops->pkts_acked) {
@@@ -3372,12 -3367,6 +3372,6 @@@ static int tcp_ack_update_window(struc
    	if (tp->snd_wnd != nwin) {
    		tp->snd_wnd = nwin;
- 			/* Note, it is the only place, where
- 			 * fast path is recovered for sending TCP.
- 			 */
- 			tp->pred_flags = 0;
- 			tcp_fast_path_check(sk);
- 
    		if (tcp_send_head(sk))
    			tcp_slow_start_after_idle_check(sk);
@@@ -3559,6 -3548,7 +3553,7 @@@ static int tcp_ack(struct sock *sk, con
    u32 lost = tp->lost;
    int acked = 0; /* Number of packets newly acked */
    int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
+ 	u32 ack_ev_flags = 0;
sack_state.first_sackt = 0;
    sack_state.rate = &rs;
@@@ -3585,6 -3575,9 +3580,6 @@@
    if (after(ack, tp->snd_nxt))
    	goto invalid_ack;
-	if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
 -		tcp_rearm_rto(sk);
 -
    if (after(ack, prior_snd_una)) {
    	flag |= FLAG_SND_UNA_ADVANCED;
    	icsk->icsk_retransmits = 0;
@@@ -3599,42 -3592,26 +3594,26 @@@
    if (flag & FLAG_UPDATE_TS_RECENT)
    	tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
- 	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
- 		/* Window is constant, pure forward advance.
- 		 * No more checks are required.
- 		 * Note, we use the fact that SND.UNA>=SND.WL2.
- 		 */
- 		tcp_update_wl(tp, ack_seq);
- 		tcp_snd_una_update(tp, ack);
- 		flag |= FLAG_WIN_UPDATE;
- 
- 		tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
- 
- 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
- 	} else {
- 		u32 ack_ev_flags = CA_ACK_SLOWPATH;
- 
- 		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
- 			flag |= FLAG_DATA;
- 		else
- 			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
+ 	if (ack_seq != TCP_SKB_CB(skb)->end_seq)
+ 		flag |= FLAG_DATA;
+ 	else
+ 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
- 		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
+ 	flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
- 		if (TCP_SKB_CB(skb)->sacked)
- 			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
- 							&sack_state);
+ 	if (TCP_SKB_CB(skb)->sacked)
+ 		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
+ 						&sack_state);
- 		if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
- 			flag |= FLAG_ECE;
- 			ack_ev_flags |= CA_ACK_ECE;
- 		}
+ 	if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
+ 		flag |= FLAG_ECE;
+ 		ack_ev_flags = CA_ACK_ECE;
+ 	}
- 		if (flag & FLAG_WIN_UPDATE)
- 			ack_ev_flags |= CA_ACK_WIN_UPDATE;
+ 	if (flag & FLAG_WIN_UPDATE)
+ 		ack_ev_flags |= CA_ACK_WIN_UPDATE;
- 		tcp_in_ack_event(sk, ack_ev_flags);
- 	}
+ 	tcp_in_ack_event(sk, ack_ev_flags);
/* We passed data and got it acked, remove any soft error
     * log. Something worked...
@@@ -3649,20 -3626,18 +3628,20 @@@
    flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
    			    &sack_state);
+	if (tp->tlp_high_seq)
 +		tcp_process_tlp_ack(sk, ack, flag);
 +	/* If needed, reset TLP/RTO timer; RACK may later override this. */
 +	if (flag & FLAG_SET_XMIT_TIMER)
 +		tcp_set_xmit_timer(sk);
 +
    if (tcp_ack_is_dubious(sk, flag)) {
    	is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
    	tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
    }
 -	if (tp->tlp_high_seq)
 -		tcp_process_tlp_ack(sk, ack, flag);
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
    	sk_dst_confirm(sk);
-	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
 -		tcp_schedule_loss_probe(sk);
    delivered = tp->delivered - delivered;	/* freshly ACKed or SACKed */
    lost = tp->lost - lost;			/* freshly marked lost */
    tcp_rate_gen(sk, delivered, lost, sack_state.rate);
@@@ -4402,8 -4377,6 +4381,6 @@@ static void tcp_data_queue_ofo(struct s
    	return;
    }
- 	/* Disable header prediction. */
- 	tp->pred_flags = 0;
    inet_csk_schedule_ack(sk);
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@@ -4592,8 -4565,8 +4569,8 @@@ err
  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
  {
    struct tcp_sock *tp = tcp_sk(sk);
- 	bool fragstolen = false;
- 	int eaten = -1;
+ 	bool fragstolen;
+ 	int eaten;
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
    	__kfree_skb(skb);
@@@ -4615,32 -4588,13 +4592,13 @@@
    		goto out_of_window;
/* Ok. In sequence. In window. */
- 		if (tp->ucopy.task == current &&
- 		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
- 		    sock_owned_by_user(sk) && !tp->urg_data) {
- 			int chunk = min_t(unsigned int, skb->len,
- 					  tp->ucopy.len);
- 
- 			__set_current_state(TASK_RUNNING);
- 
- 			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
- 				tp->ucopy.len -= chunk;
- 				tp->copied_seq += chunk;
- 				eaten = (chunk == skb->len);
- 				tcp_rcv_space_adjust(sk);
- 			}
- 		}
- 
- 		if (eaten <= 0) {
  queue_and_out:
- 			if (eaten < 0) {
- 				if (skb_queue_len(&sk->sk_receive_queue) == 0)
- 					sk_forced_mem_schedule(sk, skb->truesize);
- 				else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
- 					goto drop;
- 			}
- 			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
- 		}
+ 		if (skb_queue_len(&sk->sk_receive_queue) == 0)
+ 			sk_forced_mem_schedule(sk, skb->truesize);
+ 		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
+ 			goto drop;
+ 
+ 		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
    	tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
    	if (skb->len)
    		tcp_event_data_recv(sk, skb);
@@@ -4660,8 -4614,6 +4618,6 @@@
    	if (tp->rx_opt.num_sacks)
    		tcp_sack_remove(tp);
- 		tcp_fast_path_check(sk);
- 
    	if (eaten > 0)
    		kfree_skb_partial(skb, fragstolen);
    	if (!sock_flag(sk, SOCK_DEAD))
@@@ -4987,7 -4939,6 +4943,6 @@@ static int tcp_prune_queue(struct sock 
    NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
/* Massive buffer overcommit. */
- 	tp->pred_flags = 0;
    return -1;
  }
@@@ -5159,9 -5110,6 +5114,6 @@@ static void tcp_check_urg(struct sock *
tp->urg_data = TCP_URG_NOTYET;
    tp->urg_seq = ptr;
- 
- 	/* Disable header prediction. */
- 	tp->pred_flags = 0;
  }
/* This is the 'fast' part of urgent handling. */
@@@ -5190,26 -5138,6 +5142,6 @@@ static void tcp_urg(struct sock *sk, st
    }
  }
- static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
- {
- 	struct tcp_sock *tp = tcp_sk(sk);
- 	int chunk = skb->len - hlen;
- 	int err;
- 
- 	if (skb_csum_unnecessary(skb))
- 		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
- 	else
- 		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
- 
- 	if (!err) {
- 		tp->ucopy.len -= chunk;
- 		tp->copied_seq += chunk;
- 		tcp_rcv_space_adjust(sk);
- 	}
- 
- 	return err;
- }
- 
  /* Accept RST for rcv_nxt - 1 after a FIN.
   * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
   * FIN is sent followed by a RST packet. The RST is sent with the same
@@@ -5340,201 -5268,29 +5272,29 @@@ discard
/*
   *	TCP receive function for the ESTABLISHED state.
-  *
-  *	It is split into a fast path and a slow path. The fast path is
-  * 	disabled when:
-  *	- A zero window was announced from us - zero window probing
-  *        is only handled properly in the slow path.
-  *	- Out of order segments arrived.
-  *	- Urgent data is expected.
-  *	- There is no buffer space left
-  *	- Unexpected TCP flags/window values/header lengths are received
-  *	  (detected by checking the TCP header against pred_flags)
-  *	- Data is sent in both directions. Fast path only supports pure senders
-  *	  or pure receivers (this means either the sequence number or the ack
-  *	  value must stay constant)
-  *	- Unexpected TCP option.
-  *
-  *	When these conditions are not satisfied it drops into a standard
-  *	receive procedure patterned after RFC793 to handle all cases.
-  *	The first three cases are guaranteed by proper pred_flags setting,
-  *	the rest is checked inline. Fast processing is turned on in
-  *	tcp_data_queue when everything is OK.
   */
  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- 			 const struct tcphdr *th, unsigned int len)
+ 			 const struct tcphdr *th)
  {
+ 	unsigned int len = skb->len;
    struct tcp_sock *tp = tcp_sk(sk);
tcp_mstamp_refresh(tp);
    if (unlikely(!sk->sk_rx_dst))
    	inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
- 	/*
- 	 *	Header prediction.
- 	 *	The code loosely follows the one in the famous
- 	 *	"30 instruction TCP receive" Van Jacobson mail.
- 	 *
- 	 *	Van's trick is to deposit buffers into socket queue
- 	 *	on a device interrupt, to call tcp_recv function
- 	 *	on the receive process context and checksum and copy
- 	 *	the buffer to user space. smart...
- 	 *
- 	 *	Our current scheme is not silly either but we take the
- 	 *	extra cost of the net_bh soft interrupt processing...
- 	 *	We do checksum and copy also but from device to kernel.
- 	 */
tp->rx_opt.saw_tstamp = 0;
- 	/*	pred_flags is 0xS?10 << 16 + snd_wnd
- 	 *	if header_prediction is to be made
- 	 *	'S' will always be tp->tcp_header_len >> 2
- 	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
- 	 *  turn it off	(when there are holes in the receive
- 	 *	 space for instance)
- 	 *	PSH flag is ignored.
- 	 */
- 
- 	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
- 	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
- 	    !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
- 		int tcp_header_len = tp->tcp_header_len;
- 
- 		/* Timestamp header prediction: tcp_header_len
- 		 * is automatically equal to th->doff*4 due to pred_flags
- 		 * match.
- 		 */
- 
- 		/* Check timestamp */
- 		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
- 			/* No? Slow path! */
- 			if (!tcp_parse_aligned_timestamp(tp, th))
- 				goto slow_path;
- 
- 			/* If PAWS failed, check it more carefully in slow path */
- 			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
- 				goto slow_path;
- 
- 			/* DO NOT update ts_recent here, if checksum fails
- 			 * and timestamp was corrupted part, it will result
- 			 * in a hung connection since we will drop all
- 			 * future packets due to the PAWS test.
- 			 */
- 		}
- 
- 		if (len <= tcp_header_len) {
- 			/* Bulk data transfer: sender */
- 			if (len == tcp_header_len) {
- 				/* Predicted packet is in window by definition.
- 				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- 				 * Hence, check seq<=rcv_wup reduces to:
- 				 */
- 				if (tcp_header_len ==
- 				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
- 				    tp->rcv_nxt == tp->rcv_wup)
- 					tcp_store_ts_recent(tp);
- 
- 				/* We know that such packets are checksummed
- 				 * on entry.
- 				 */
- 				tcp_ack(sk, skb, 0);
- 				__kfree_skb(skb);
- 				tcp_data_snd_check(sk);
- 				return;
- 			} else { /* Header too small */
- 				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
- 				goto discard;
- 			}
- 		} else {
- 			int eaten = 0;
- 			bool fragstolen = false;
- 
- 			if (tp->ucopy.task == current &&
- 			    tp->copied_seq == tp->rcv_nxt &&
- 			    len - tcp_header_len <= tp->ucopy.len &&
- 			    sock_owned_by_user(sk)) {
- 				__set_current_state(TASK_RUNNING);
- 
- 				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
- 					/* Predicted packet is in window by definition.
- 					 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- 					 * Hence, check seq<=rcv_wup reduces to:
- 					 */
- 					if (tcp_header_len ==
- 					    (sizeof(struct tcphdr) +
- 					     TCPOLEN_TSTAMP_ALIGNED) &&
- 					    tp->rcv_nxt == tp->rcv_wup)
- 						tcp_store_ts_recent(tp);
- 
- 					tcp_rcv_rtt_measure_ts(sk, skb);
- 
- 					__skb_pull(skb, tcp_header_len);
- 					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
- 					NET_INC_STATS(sock_net(sk),
- 							LINUX_MIB_TCPHPHITSTOUSER);
- 					eaten = 1;
- 				}
- 			}
- 			if (!eaten) {
- 				if (tcp_checksum_complete(skb))
- 					goto csum_error;
- 
- 				if ((int)skb->truesize > sk->sk_forward_alloc)
- 					goto step5;
- 
- 				/* Predicted packet is in window by definition.
- 				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
- 				 * Hence, check seq<=rcv_wup reduces to:
- 				 */
- 				if (tcp_header_len ==
- 				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
- 				    tp->rcv_nxt == tp->rcv_wup)
- 					tcp_store_ts_recent(tp);
- 
- 				tcp_rcv_rtt_measure_ts(sk, skb);
- 
- 				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
- 
- 				/* Bulk data transfer: receiver */
- 				eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
- 						      &fragstolen);
- 			}
- 
- 			tcp_event_data_recv(sk, skb);
- 
- 			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
- 				/* Well, only one small jumplet in fast path... */
- 				tcp_ack(sk, skb, FLAG_DATA);
- 				tcp_data_snd_check(sk);
- 				if (!inet_csk_ack_scheduled(sk))
- 					goto no_ack;
- 			}
- 
- 			__tcp_ack_snd_check(sk, 0);
- no_ack:
- 			if (eaten)
- 				kfree_skb_partial(skb, fragstolen);
- 			sk->sk_data_ready(sk);
- 			return;
- 		}
- 	}
- 
- slow_path:
    if (len < (th->doff << 2) || tcp_checksum_complete(skb))
    	goto csum_error;
if (!th->ack && !th->rst && !th->syn)
    	goto discard;
- 	/*
- 	 *	Standard slow path.
- 	 */
- 
    if (!tcp_validate_incoming(sk, skb, th, 1))
    	return;
- step5:
- 	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
+ 	if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0)
    	goto discard;
tcp_rcv_rtt_measure_ts(sk, skb);
@@@ -5587,12 -5343,6 +5347,6 @@@ void tcp_finish_connect(struct sock *sk
if (sock_flag(sk, SOCK_KEEPOPEN))
    	inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
- 
- 	if (!tp->rx_opt.snd_wscale)
- 		__tcp_fast_path_on(tp, tp->snd_wnd);
- 	else
- 		tp->pred_flags = 0;
- 
  }
static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@@ -5721,7 -5471,7 +5475,7 @@@ static int tcp_rcv_synsent_state_proces
    	tcp_ecn_rcv_synack(tp, th);
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
- 		tcp_ack(sk, skb, FLAG_SLOWPATH);
+ 		tcp_ack(sk, skb, 0);
/* Ok.. it's good. Set up sequence numbers and
    	 * move to established.
@@@ -5957,8 -5707,8 +5711,8 @@@ int tcp_rcv_state_process(struct sock *
    	return 0;
/* step 5: check the ACK field */
- 	acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
- 				      FLAG_UPDATE_TS_RECENT |
+ 
+ 	acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT |
    			      FLAG_NO_CHALLENGE_ACK) > 0;
if (!acceptable) {
@@@ -6026,7 -5776,6 +5780,6 @@@
    	tp->lsndtime = tcp_jiffies32;
tcp_initialize_rcv_mss(sk);
- 		tcp_fast_path_on(tp);
    	break;
case TCP_FIN_WAIT1: {
diff --combined net/ipv4/tcp_output.c
index 276406a83a37,d49bff51bdb7..7ae5de0018b5
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -295,9 -295,7 +295,7 @@@ static u16 tcp_select_window(struct soc
    /* RFC1323 scaling applied */
    new_win >>= tp->rx_opt.rcv_wscale;
- 	/* If we advertise zero window, disable fast path. */
    if (new_win == 0) {
- 		tp->pred_flags = 0;
    	if (old_win)
    		NET_INC_STATS(sock_net(sk),
    			      LINUX_MIB_TCPTOZEROWINDOWADV);
@@@ -2377,15 -2375,23 +2375,14 @@@ bool tcp_schedule_loss_probe(struct soc
  {
    struct inet_connection_sock *icsk = inet_csk(sk);
    struct tcp_sock *tp = tcp_sk(sk);
- 	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 -	u32 timeout, tlp_time_stamp, rto_time_stamp;
 +	u32 timeout, rto_delta_us;
-	/* No consecutive loss probes. */
 -	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
 -		tcp_rearm_rto(sk);
 -		return false;
 -	}
    /* Don't do any loss probe on a Fast Open connection before 3WHS
     * finishes.
     */
    if (tp->fastopen_rsk)
    	return false;
-	/* TLP is only scheduled when next timer event is RTO. */
 -	if (icsk->icsk_pending != ICSK_TIME_RETRANS)
 -		return false;
 -
    /* Schedule a loss probe in 2*RTT for SACK capable connections
     * in Open state, that are either limited by cwnd or application.
     */
@@@ -2398,20 -2404,28 +2395,24 @@@
         tcp_send_head(sk))
    	return false;
- 	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+ 	/* Probe timeout is 2*rtt. Add minimum RTO to account
     * for delayed ack when there's one outstanding packet. If no RTT
     * sample is available then probe after TCP_TIMEOUT_INIT.
     */
- 	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
- 	if (tp->packets_out == 1)
- 		timeout = max_t(u32, timeout,
- 				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
- 	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+ 	if (tp->srtt_us) {
+ 		timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+ 		if (tp->packets_out == 1)
+ 			timeout += TCP_RTO_MIN;
+ 		else
+ 			timeout += TCP_TIMEOUT_MIN;
+ 	} else {
+ 		timeout = TCP_TIMEOUT_INIT;
+ 	}
-	/* If RTO is shorter, just schedule TLP in its place. */
 -	tlp_time_stamp = tcp_jiffies32 + timeout;
 -	rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
 -	if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
 -		s32 delta = rto_time_stamp - tcp_jiffies32;
 -		if (delta > 0)
 -			timeout = delta;
 -	}
 +	/* If the RTO formula yields an earlier time, then use that time. */
 +	rto_delta_us = tcp_rto_delta_us(sk);  /* How far in future is RTO? */
 +	if (rto_delta_us > 0)
 +		timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
    			  TCP_RTO_MAX);
diff --combined net/ipv4/tcp_timer.c
index e906014890b6,f753f9d2fee3..655dd8d7f064
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@@ -239,7 -239,6 +239,6 @@@ static int tcp_write_timeout(struct soc
  /* Called with BH disabled */
  void tcp_delack_timer_handler(struct sock *sk)
  {
- 	struct tcp_sock *tp = tcp_sk(sk);
    struct inet_connection_sock *icsk = inet_csk(sk);
sk_mem_reclaim_partial(sk);
@@@ -254,17 -253,6 +253,6 @@@
    }
    icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
- 	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
- 		struct sk_buff *skb;
- 
- 		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
- 
- 		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
- 			sk_backlog_rcv(sk, skb);
- 
- 		tp->ucopy.memory = 0;
- 	}
- 
    if (inet_csk_ack_scheduled(sk)) {
    	if (!icsk->icsk_ack.pingpong) {
    		/* Delayed ACK missed: inflate ATO. */
@@@ -652,8 -640,7 +640,8 @@@ static void tcp_keepalive_timer (unsign
    	goto death;
    }
-	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 +	if (!sock_flag(sk, SOCK_KEEPOPEN) ||
 +	    ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
    	goto out;
elapsed = keepalive_time_when(tp);
diff --combined net/ipv6/route.c
index a640fbcba15d,aba07fce67fb..521f58183240
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -1820,6 -1820,11 +1820,11 @@@ static struct rt6_info *ip6_route_info_
    	goto out;
    }
+ 	if (cfg->fc_flags & RTF_OFFLOAD) {
+ 		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD");
+ 		goto out;
+ 	}
+ 
    if (cfg->fc_dst_len > 128) {
    	NL_SET_ERR_MSG(extack, "Invalid prefix length");
    	goto out;
@@@ -2351,7 -2356,6 +2356,7 @@@ static void rt6_do_redirect(struct dst_
    if (on_link)
    	nrt->rt6i_flags &= ~RTF_GATEWAY;
+	nrt->rt6i_protocol = RTPROT_REDIRECT;
    nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
if (ip6_ins_rt(nrt))
@@@ -2462,7 -2466,6 +2467,7 @@@ static struct rt6_info *rt6_add_route_i
    	.fc_dst_len	= prefixlen,
    	.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
    			  RTF_UP | RTF_PREF(pref),
 +		.fc_protocol = RTPROT_RA,
    	.fc_nlinfo.portid = 0,
    	.fc_nlinfo.nlh = NULL,
    	.fc_nlinfo.nl_net = net,
@@@ -2515,7 -2518,6 +2520,7 @@@ struct rt6_info *rt6_add_dflt_router(co
    	.fc_ifindex	= dev->ifindex,
    	.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
    			  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
 +		.fc_protocol = RTPROT_RA,
    	.fc_nlinfo.portid = 0,
    	.fc_nlinfo.nlh = NULL,
    	.fc_nlinfo.nl_net = dev_net(dev),
@@@ -3330,6 -3332,9 +3335,9 @@@ static int rt6_nexthop_info(struct sk_b
    		goto nla_put_failure;
    }
+ 	if (rt->rt6i_flags & RTF_OFFLOAD)
+ 		*flags |= RTNH_F_OFFLOAD;
+ 
    /* not needed for multipath encoding b/c it has a rtnexthop struct */
    if (!skip_oif && rt->dst.dev &&
        nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@@ -3427,6 -3432,14 +3435,6 @@@ static int rt6_fill_node(struct net *ne
    rtm->rtm_flags = 0;
    rtm->rtm_scope = RT_SCOPE_UNIVERSE;
    rtm->rtm_protocol = rt->rt6i_protocol;
 -	if (rt->rt6i_flags & RTF_DYNAMIC)
 -		rtm->rtm_protocol = RTPROT_REDIRECT;
 -	else if (rt->rt6i_flags & RTF_ADDRCONF) {
 -		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
 -			rtm->rtm_protocol = RTPROT_RA;
 -		else
 -			rtm->rtm_protocol = RTPROT_KERNEL;
 -	}
if (rt->rt6i_flags & RTF_CACHE)
    	rtm->rtm_flags |= RTM_F_CLONED;
diff --combined net/xfrm/xfrm_policy.c
index 6f5a0dad502f,8da428f56aec..7af472f9db7e
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@@ -24,6 -24,7 +24,7 @@@
  #include <linux/netfilter.h>
  #include <linux/module.h>
  #include <linux/cache.h>
+ #include <linux/cpu.h>
  #include <linux/audit.h>
  #include <net/dst.h>
  #include <net/flow.h>
@@@ -44,6 -45,8 +45,8 @@@ struct xfrm_flo 
    u8 flags;
  };
+ static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst);
+ static struct work_struct *xfrm_pcpu_work __read_mostly;
  static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
  static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1]
    					__read_mostly;
@@@ -246,36 -249,6 +249,6 @@@ expired
    xfrm_pol_put(xp);
  }
- static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
- {
- 	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
- 
- 	if (unlikely(pol->walk.dead))
- 		flo = NULL;
- 	else
- 		xfrm_pol_hold(pol);
- 
- 	return flo;
- }
- 
- static int xfrm_policy_flo_check(struct flow_cache_object *flo)
- {
- 	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
- 
- 	return !pol->walk.dead;
- }
- 
- static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
- {
- 	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
- }
- 
- static const struct flow_cache_ops xfrm_policy_fc_ops = {
- 	.get = xfrm_policy_flo_get,
- 	.check = xfrm_policy_flo_check,
- 	.delete = xfrm_policy_flo_delete,
- };
- 
  /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
   * SPD calls.
   */
@@@ -298,7 -271,6 +271,6 @@@ struct xfrm_policy *xfrm_policy_alloc(s
    			(unsigned long)policy);
    	setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
    		    (unsigned long)policy);
- 		policy->flo.ops = &xfrm_policy_fc_ops;
    }
    return policy;
  }
@@@ -798,7 -770,6 +770,6 @@@ int xfrm_policy_insert(int dir, struct 
    else
    	hlist_add_head(&policy->bydst, chain);
    __xfrm_policy_link(policy, dir);
- 	atomic_inc(&net->xfrm.flow_cache_genid);
/* After previous checking, family can either be AF_INET or AF_INET6 */
    if (policy->family == AF_INET)
@@@ -1004,6 -975,8 +975,8 @@@ int xfrm_policy_flush(struct net *net, 
    }
    if (!cnt)
    	err = -ESRCH;
+ 	else
+ 		xfrm_policy_cache_flush();
  out:
    spin_unlock_bh(&net->xfrm.xfrm_policy_lock);
    return err;
@@@ -1175,7 -1148,7 +1148,7 @@@ fail
  }
static struct xfrm_policy *
- __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
+ xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
  {
  #ifdef CONFIG_XFRM_SUB_POLICY
    struct xfrm_policy *pol;
@@@ -1187,61 -1160,6 +1160,6 @@@
    return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
  }
- static int flow_to_policy_dir(int dir)
- {
- 	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
- 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
- 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
- 		return dir;
- 
- 	switch (dir) {
- 	default:
- 	case FLOW_DIR_IN:
- 		return XFRM_POLICY_IN;
- 	case FLOW_DIR_OUT:
- 		return XFRM_POLICY_OUT;
- 	case FLOW_DIR_FWD:
- 		return XFRM_POLICY_FWD;
- 	}
- }
- 
- static struct flow_cache_object *
- xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
- 		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
- {
- 	struct xfrm_policy *pol;
- 
- 	if (old_obj)
- 		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
- 
- 	pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
- 	if (IS_ERR_OR_NULL(pol))
- 		return ERR_CAST(pol);
- 
- 	/* Resolver returns two references:
- 	 * one for cache and one for caller of flow_cache_lookup() */
- 	xfrm_pol_hold(pol);
- 
- 	return &pol->flo;
- }
- 
- static inline int policy_to_flow_dir(int dir)
- {
- 	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
- 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
- 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
- 		return dir;
- 	switch (dir) {
- 	default:
- 	case XFRM_POLICY_IN:
- 		return FLOW_DIR_IN;
- 	case XFRM_POLICY_OUT:
- 		return FLOW_DIR_OUT;
- 	case XFRM_POLICY_FWD:
- 		return FLOW_DIR_FWD;
- 	}
- }
- 
  static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
    					 const struct flowi *fl, u16 family)
  {
@@@ -1261,7 -1179,7 +1179,7 @@@
    		}
    		err = security_xfrm_policy_lookup(pol->security,
    					      fl->flowi_secid,
- 						      policy_to_flow_dir(dir));
+ 						      dir);
    		if (!err) {
    			if (!xfrm_pol_hold_rcu(pol))
    				goto again;
@@@ -1545,58 -1463,6 +1463,6 @@@ static int xfrm_get_tos(const struct fl
    return tos;
  }
- static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
- {
- 	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- 	struct dst_entry *dst = &xdst->u.dst;
- 
- 	if (xdst->route == NULL) {
- 		/* Dummy bundle - if it has xfrms we were not
- 		 * able to build bundle as template resolution failed.
- 		 * It means we need to try again resolving. */
- 		if (xdst->num_xfrms > 0)
- 			return NULL;
- 	} else if (dst->flags & DST_XFRM_QUEUE) {
- 		return NULL;
- 	} else {
- 		/* Real bundle */
- 		if (stale_bundle(dst))
- 			return NULL;
- 	}
- 
- 	dst_hold(dst);
- 	return flo;
- }
- 
- static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
- {
- 	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- 	struct dst_entry *dst = &xdst->u.dst;
- 
- 	if (!xdst->route)
- 		return 0;
- 	if (stale_bundle(dst))
- 		return 0;
- 
- 	return 1;
- }
- 
- static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
- {
- 	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
- 	struct dst_entry *dst = &xdst->u.dst;
- 
- 	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- 	dst->obsolete = DST_OBSOLETE_DEAD;
- 	dst_release_immediate(dst);
- }
- 
- static const struct flow_cache_ops xfrm_bundle_fc_ops = {
- 	.get = xfrm_bundle_flo_get,
- 	.check = xfrm_bundle_flo_check,
- 	.delete = xfrm_bundle_flo_delete,
- };
- 
  static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
  {
    const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@@ -1624,7 -1490,6 +1490,6 @@@
    	struct dst_entry *dst = &xdst->u.dst;
memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
- 		xdst->flo.ops = &xfrm_bundle_fc_ops;
    } else
    	xdst = ERR_PTR(-ENOBUFS);
@@@ -1840,6 -1705,102 +1705,102 @@@ static int xfrm_expand_policies(const s
}
+ static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old)
+ {
+ 	this_cpu_write(xfrm_last_dst, xdst);
+ 	if (old)
+ 		dst_release(&old->u.dst);
+ }
+ 
+ static void __xfrm_pcpu_work_fn(void)
+ {
+ 	struct xfrm_dst *old;
+ 
+ 	old = this_cpu_read(xfrm_last_dst);
+ 	if (old && !xfrm_bundle_ok(old))
+ 		xfrm_last_dst_update(NULL, old);
+ }
+ 
+ static void xfrm_pcpu_work_fn(struct work_struct *work)
+ {
+ 	local_bh_disable();
+ 	rcu_read_lock();
+ 	__xfrm_pcpu_work_fn();
+ 	rcu_read_unlock();
+ 	local_bh_enable();
+ }
+ 
+ void xfrm_policy_cache_flush(void)
+ {
+ 	struct xfrm_dst *old;
+ 	bool found = 0;
+ 	int cpu;
+ 
+ 	local_bh_disable();
+ 	rcu_read_lock();
+ 	for_each_possible_cpu(cpu) {
+ 		old = per_cpu(xfrm_last_dst, cpu);
+ 		if (old && !xfrm_bundle_ok(old)) {
+ 			if (smp_processor_id() == cpu) {
+ 				__xfrm_pcpu_work_fn();
+ 				continue;
+ 			}
+ 			found = true;
+ 			break;
+ 		}
+ 	}
+ 
+ 	rcu_read_unlock();
+ 	local_bh_enable();
+ 
+ 	if (!found)
+ 		return;
+ 
+ 	get_online_cpus();
+ 
+ 	for_each_possible_cpu(cpu) {
+ 		bool bundle_release;
+ 
+ 		rcu_read_lock();
+ 		old = per_cpu(xfrm_last_dst, cpu);
+ 		bundle_release = old && !xfrm_bundle_ok(old);
+ 		rcu_read_unlock();
+ 
+ 		if (!bundle_release)
+ 			continue;
+ 
+ 		if (cpu_online(cpu)) {
+ 			schedule_work_on(cpu, &xfrm_pcpu_work[cpu]);
+ 			continue;
+ 		}
+ 
+ 		rcu_read_lock();
+ 		old = per_cpu(xfrm_last_dst, cpu);
+ 		if (old && !xfrm_bundle_ok(old)) {
+ 			per_cpu(xfrm_last_dst, cpu) = NULL;
+ 			dst_release(&old->u.dst);
+ 		}
+ 		rcu_read_unlock();
+ 	}
+ 
+ 	put_online_cpus();
+ }
+ 
+ static bool xfrm_pol_dead(struct xfrm_dst *xdst)
+ {
+ 	unsigned int num_pols = xdst->num_pols;
+ 	unsigned int pol_dead = 0, i;
+ 
+ 	for (i = 0; i < num_pols; i++)
+ 		pol_dead |= xdst->pols[i]->walk.dead;
+ 
+ 	/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
+ 	if (pol_dead)
+ 		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
+ 
+ 	return pol_dead;
+ }
+ 
  static struct xfrm_dst *
  xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
    		       const struct flowi *fl, u16 family,
@@@ -1847,10 -1808,23 +1808,23 @@@
  {
    struct net *net = xp_net(pols[0]);
    struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+ 	struct xfrm_dst *xdst, *old;
    struct dst_entry *dst;
- 	struct xfrm_dst *xdst;
    int err;
+ 	xdst = this_cpu_read(xfrm_last_dst);
+ 	if (xdst &&
+ 	    xdst->u.dst.dev == dst_orig->dev &&
+ 	    xdst->num_pols == num_pols &&
+ 	    !xfrm_pol_dead(xdst) &&
+ 	    memcmp(xdst->pols, pols,
+ 		   sizeof(struct xfrm_policy *) * num_pols) == 0 &&
+ 	    xfrm_bundle_ok(xdst)) {
+ 		dst_hold(&xdst->u.dst);
+ 		return xdst;
+ 	}
+ 
+ 	old = xdst;
    /* Try to instantiate a bundle */
    err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
    if (err <= 0) {
@@@ -1871,6 -1845,9 +1845,9 @@@
    memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
    xdst->policy_genid = atomic_read(&pols[0]->genid);
+ 	atomic_set(&xdst->u.dst.__refcnt, 2);
+ 	xfrm_last_dst_update(xdst, old);
+ 
    return xdst;
  }
@@@ -2051,86 -2028,39 +2028,39 @@@ free_dst
    goto out;
  }
- static struct flow_cache_object *
- xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
- 		   struct flow_cache_object *oldflo, void *ctx)
+ static struct xfrm_dst *
+ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo)
  {
- 	struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
    struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
- 	struct xfrm_dst *xdst, *new_xdst;
- 	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
- 
- 	/* Check if the policies from old bundle are usable */
- 	xdst = NULL;
- 	if (oldflo) {
- 		xdst = container_of(oldflo, struct xfrm_dst, flo);
- 		num_pols = xdst->num_pols;
- 		num_xfrms = xdst->num_xfrms;
- 		pol_dead = 0;
- 		for (i = 0; i < num_pols; i++) {
- 			pols[i] = xdst->pols[i];
- 			pol_dead |= pols[i]->walk.dead;
- 		}
- 		if (pol_dead) {
- 			/* Mark DST_OBSOLETE_DEAD to fail the next
- 			 * xfrm_dst_check()
- 			 */
- 			xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- 			dst_release_immediate(&xdst->u.dst);
- 			xdst = NULL;
- 			num_pols = 0;
- 			num_xfrms = 0;
- 			oldflo = NULL;
- 		}
- 	}
+ 	int num_pols = 0, num_xfrms = 0, err;
+ 	struct xfrm_dst *xdst;
/* Resolve policies to use if we couldn't get them from
     * previous cache entry */
- 	if (xdst == NULL) {
- 		num_pols = 1;
- 		pols[0] = __xfrm_policy_lookup(net, fl, family,
- 					       flow_to_policy_dir(dir));
- 		err = xfrm_expand_policies(fl, family, pols,
+ 	num_pols = 1;
+ 	pols[0] = xfrm_policy_lookup(net, fl, family, dir);
+ 	err = xfrm_expand_policies(fl, family, pols,
    				   &num_pols, &num_xfrms);
- 		if (err < 0)
- 			goto inc_error;
- 		if (num_pols == 0)
- 			return NULL;
- 		if (num_xfrms <= 0)
- 			goto make_dummy_bundle;
- 	}
+ 	if (err < 0)
+ 		goto inc_error;
+ 	if (num_pols == 0)
+ 		return NULL;
+ 	if (num_xfrms <= 0)
+ 		goto make_dummy_bundle;
- 	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
+ 	xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
    					  xflo->dst_orig);
- 	if (IS_ERR(new_xdst)) {
- 		err = PTR_ERR(new_xdst);
+ 	if (IS_ERR(xdst)) {
+ 		err = PTR_ERR(xdst);
    	if (err != -EAGAIN)
    		goto error;
- 		if (oldflo == NULL)
- 			goto make_dummy_bundle;
- 		dst_hold(&xdst->u.dst);
- 		return oldflo;
- 	} else if (new_xdst == NULL) {
+ 		goto make_dummy_bundle;
+ 	} else if (xdst == NULL) {
    	num_xfrms = 0;
- 		if (oldflo == NULL)
- 			goto make_dummy_bundle;
- 		xdst->num_xfrms = 0;
- 		dst_hold(&xdst->u.dst);
- 		return oldflo;
- 	}
- 
- 	/* Kill the previous bundle */
- 	if (xdst) {
- 		/* The policies were stolen for newly generated bundle */
- 		xdst->num_pols = 0;
- 		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- 		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- 		dst_release_immediate(&xdst->u.dst);
+ 		goto make_dummy_bundle;
    }
- 	/* We do need to return one reference for original caller */
- 	dst_hold(&new_xdst->u.dst);
- 	return &new_xdst->flo;
+ 	return xdst;
make_dummy_bundle:
    /* We found policies, but there's no bundles to instantiate:
@@@ -2146,17 -2076,12 +2076,12 @@@
    memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
dst_hold(&xdst->u.dst);
- 	return &xdst->flo;
+ 	return xdst;
inc_error:
    XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
  error:
- 	if (xdst != NULL) {
- 		/* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */
- 		xdst->u.dst.obsolete = DST_OBSOLETE_DEAD;
- 		dst_release_immediate(&xdst->u.dst);
- 	} else
- 		xfrm_pols_put(pols, num_pols);
+ 	xfrm_pols_put(pols, num_pols);
    return ERR_PTR(err);
  }
@@@ -2187,11 -2112,10 +2112,10 @@@ struct dst_entry *xfrm_lookup(struct ne
    		      const struct sock *sk, int flags)
  {
    struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
- 	struct flow_cache_object *flo;
    struct xfrm_dst *xdst;
    struct dst_entry *dst, *route;
    u16 family = dst_orig->ops->family;
- 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+ 	u8 dir = XFRM_POLICY_OUT;
    int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
dst = NULL;
@@@ -2242,15 -2166,13 +2166,13 @@@
    	    !net->xfrm.policy_count[XFRM_POLICY_OUT])
    		goto nopol;
- 		flo = flow_cache_lookup(net, fl, family, dir,
- 					xfrm_bundle_lookup, &xflo);
- 		if (flo == NULL)
+ 		xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo);
+ 		if (xdst == NULL)
    		goto nopol;
- 		if (IS_ERR(flo)) {
- 			err = PTR_ERR(flo);
+ 		if (IS_ERR(xdst)) {
+ 			err = PTR_ERR(xdst);
    		goto dropdst;
    	}
- 		xdst = container_of(flo, struct xfrm_dst, flo);
num_pols = xdst->num_pols;
    	num_xfrms = xdst->num_xfrms;
@@@ -2449,12 -2371,10 +2371,10 @@@ int __xfrm_policy_check(struct sock *sk
    int pi;
    int reverse;
    struct flowi fl;
- 	u8 fl_dir;
    int xerr_idx = -1;
reverse = dir & ~XFRM_POLICY_MASK;
    dir &= XFRM_POLICY_MASK;
- 	fl_dir = policy_to_flow_dir(dir);
if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
    	XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
@@@ -2486,16 -2406,8 +2406,8 @@@
    	}
    }
- 	if (!pol) {
- 		struct flow_cache_object *flo;
- 
- 		flo = flow_cache_lookup(net, &fl, family, fl_dir,
- 					xfrm_policy_lookup, NULL);
- 		if (IS_ERR_OR_NULL(flo))
- 			pol = ERR_CAST(flo);
- 		else
- 			pol = container_of(flo, struct xfrm_policy, flo);
- 	}
+ 	if (!pol)
+ 		pol = xfrm_policy_lookup(net, &fl, family, dir);
if (IS_ERR(pol)) {
    	XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
@@@ -2641,11 -2553,9 +2553,9 @@@ static struct dst_entry *xfrm_dst_check
     * notice.  That's what we are validating here via the
     * stale_bundle() check.
     *
- 	 * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will
- 	 * be marked on it.
     * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will
     * be marked on it.
- 	 * Both will force stable_bundle() to fail on any xdst bundle with
+ 	 * This will force stale_bundle() to fail on any xdst bundle with
     * this dst linked in it.
     */
    if (dst->obsolete < 0 && !stale_bundle(dst))
@@@ -2685,18 -2595,6 +2595,6 @@@ static struct dst_entry *xfrm_negative_
    return dst;
  }
- void xfrm_garbage_collect(struct net *net)
- {
- 	flow_cache_flush(net);
- }
- EXPORT_SYMBOL(xfrm_garbage_collect);
- 
- void xfrm_garbage_collect_deferred(struct net *net)
- {
- 	flow_cache_flush_deferred(net);
- }
- EXPORT_SYMBOL(xfrm_garbage_collect_deferred);
- 
  static void xfrm_init_pmtu(struct dst_entry *dst)
  {
    do {
@@@ -3034,14 -2932,9 +2932,9 @@@ static int __net_init xfrm_net_init(str
    rv = xfrm_sysctl_init(net);
    if (rv < 0)
    	goto out_sysctl;
- 	rv = flow_cache_init(net);
- 	if (rv < 0)
- 		goto out;
return 0;
- out:
- 	xfrm_sysctl_fini(net);
  out_sysctl:
    xfrm_policy_fini(net);
  out_policy:
@@@ -3054,7 -2947,6 +2947,6 @@@ out_statistics
static void __net_exit xfrm_net_exit(struct net *net)
  {
- 	flow_cache_fini(net);
    xfrm_sysctl_fini(net);
    xfrm_policy_fini(net);
    xfrm_state_fini(net);
@@@ -3068,7 -2960,15 +2960,15 @@@ static struct pernet_operations __net_i
void __init xfrm_init(void)
  {
- 	flow_cache_hp_init();
+ 	int i;
+ 
+ 	xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work),
+ 				       GFP_KERNEL);
+ 	BUG_ON(!xfrm_pcpu_work);
+ 
+ 	for (i = 0; i < NR_CPUS; i++)
+ 		INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn);
+ 
    register_pernet_subsys(&xfrm_net_ops);
    seqcount_init(&xfrm_policy_hash_generation);
    xfrm_input_init();
@@@ -3308,15 -3208,9 +3208,15 @@@ int xfrm_migrate(const struct xfrm_sele
    struct xfrm_state *x_new[XFRM_MAX_DEPTH];
    struct xfrm_migrate *mp;
+	/* Stage 0 - sanity checks */
    if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
    	goto out;
+	if (dir >= XFRM_POLICY_MAX) {
 +		err = -EINVAL;
 +		goto out;
 +	}
 +
    /* Stage 1 - find policy */
    if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
    	err = -ENOENT;
diff --combined net/xfrm/xfrm_state.c
index a792effdb0b5,82cbbce69b79..fd63c2fdbc39
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@@ -724,9 -724,10 +724,10 @@@ restart
    		}
    	}
    }
- 	if (cnt)
+ 	if (cnt) {
    	err = 0;
- 
+ 		xfrm_policy_cache_flush();
+ 	}
  out:
    spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    return err;
@@@ -1620,7 -1621,6 +1621,7 @@@ in
  xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
           unsigned short family, struct net *net)
  {
 +	int i;
    int err = 0;
    struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
    if (!afinfo)
@@@ -1629,9 -1629,6 +1630,9 @@@
    spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
    if (afinfo->tmpl_sort)
    	err = afinfo->tmpl_sort(dst, src, n);
 +	else
 +		for (i = 0; i < n; i++)
 +			dst[i] = src[i];
    spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    rcu_read_unlock();
    return err;
@@@ -1642,7 -1639,6 +1643,7 @@@ in
  xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
    	unsigned short family)
  {
 +	int i;
    int err = 0;
    struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
    struct net *net = xs_net(*src);
@@@ -1653,9 -1649,6 +1654,9 @@@
    spin_lock_bh(&net->xfrm.xfrm_state_lock);
    if (afinfo->state_sort)
    	err = afinfo->state_sort(dst, src, n);
 +	else
 +		for (i = 0; i < n; i++)
 +			dst[i] = src[i];
    spin_unlock_bh(&net->xfrm.xfrm_state_lock);
    rcu_read_unlock();
    return err;
-- 
LinuxNextTracking