[linux-next] LinuxNextTracking branch, master, updated. next-20170810

batman at open-mesh.org batman at open-mesh.org
Fri Aug 11 00:15:58 CEST 2017


The following commit has been merged in the master branch:
commit 3118e6e19da7b8d76b2456b880c74a9aa3a2268b
Merge: feca7d8c135bc1527b244fe817b8b6498066ccec 48fb6f4db940e92cfb16cd878cddd59ea6120d06
Author: David S. Miller <davem at davemloft.net>
Date:   Wed Aug 9 16:28:45 2017 -0700

    Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
    
    The UDP offload conflict is dealt with by simply taking what is
    in net-next where we have removed all of the UFO handling code
    entirely.
    
    The TCP conflict was a case of local variables in a function
    being removed from both net and net-next.
    
    In netvsc we had an assignment right next to where a missing
    set of u64 stats sync object inits were added.
    
    Signed-off-by: David S. Miller <davem at davemloft.net>

diff --combined MAINTAINERS
index b3a8ca6aa3ed,3c419022ed93..7cb7f4c3ad3f
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@@ -1161,7 -1161,7 +1161,7 @@@ M:	Brendan Higgins <brendanhiggins at goog
  R:	Benjamin Herrenschmidt <benh at kernel.crashing.org>
  R:	Joel Stanley <joel at jms.id.au>
  L:	linux-i2c at vger.kernel.org
- L:	openbmc at lists.ozlabs.org
+ L:	openbmc at lists.ozlabs.org (moderated for non-subscribers)
  S:	Maintained
  F:	drivers/irqchip/irq-aspeed-i2c-ic.c
  F:	drivers/i2c/busses/i2c-aspeed.c
@@@ -2477,7 -2477,7 +2477,7 @@@ Q:	https://patchwork.open-mesh.org/proj
  S:	Maintained
  F:	Documentation/ABI/testing/sysfs-class-net-batman-adv
  F:	Documentation/ABI/testing/sysfs-class-net-mesh
 -F:	Documentation/networking/batman-adv.txt
 +F:	Documentation/networking/batman-adv.rst
  F:	include/uapi/linux/batman_adv.h
  F:	net/batman-adv/
  
@@@ -5101,7 -5101,6 +5101,7 @@@ F:	include/linux/of_net.
  F:	include/linux/phy.h
  F:	include/linux/phy_fixed.h
  F:	include/linux/platform_data/mdio-gpio.h
 +F:	include/linux/platform_data/mdio-bcm-unimac.h
  F:	include/trace/events/mdio.h
  F:	include/uapi/linux/mdio.h
  F:	include/uapi/linux/mii.h
@@@ -5835,7 -5834,7 +5835,7 @@@ F:	drivers/staging/greybus/spi.
  F:	drivers/staging/greybus/spilib.c
  F:	drivers/staging/greybus/spilib.h
  
- GREYBUS LOOBACK/TIME PROTOCOLS DRIVERS
+ GREYBUS LOOPBACK/TIME PROTOCOLS DRIVERS
  M:	Bryan O'Donoghue <pure.logic at nexus-software.ie>
  S:	Maintained
  F:	drivers/staging/greybus/loopback.c
@@@ -6148,14 -6147,6 +6148,14 @@@ S:	Maintaine
  F:	drivers/net/ethernet/hisilicon/
  F:	Documentation/devicetree/bindings/net/hisilicon*.txt
  
 +HISILICON NETWORK SUBSYSTEM 3 DRIVER (HNS3)
 +M:	Yisen Zhuang <yisen.zhuang at huawei.com>
 +M:	Salil Mehta <salil.mehta at huawei.com>
 +L:	netdev at vger.kernel.org
 +W:	http://www.hisilicon.com
 +S:	Maintained
 +F:	drivers/net/ethernet/hisilicon/hns3/
 +
  HISILICON ROCE DRIVER
  M:	Lijun Ou <oulijun at huawei.com>
  M:	Wei Hu(Xavier) <xavier.huwei at huawei.com>
@@@ -6266,7 -6257,6 +6266,7 @@@ M:	Haiyang Zhang <haiyangz at microsoft.co
  M:	Stephen Hemminger <sthemmin at microsoft.com>
  L:	devel at linuxdriverproject.org
  S:	Maintained
 +F:	Documentation/networking/netvsc.txt
  F:	arch/x86/include/asm/mshyperv.h
  F:	arch/x86/include/uapi/asm/hyperv.h
  F:	arch/x86/kernel/cpu/mshyperv.c
@@@ -8434,9 -8424,7 +8434,9 @@@ F:	include/uapi/linux/uvcvideo.
  
  MEDIATEK ETHERNET DRIVER
  M:	Felix Fietkau <nbd at openwrt.org>
 -M:	John Crispin <blogic at openwrt.org>
 +M:	John Crispin <john at phrozen.org>
 +M:	Sean Wang <sean.wang at mediatek.com>
 +M:	Nelson Chang <nelson.chang at mediatek.com>
  L:	netdev at vger.kernel.org
  S:	Maintained
  F:	drivers/net/ethernet/mediatek/
diff --combined drivers/net/dsa/mt7530.c
index 12700710f26d,264b281eb86b..8faa796a115f
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@@ -625,6 -625,44 +625,44 @@@ static void mt7530_adjust_link(struct d
  		 * all finished.
  		 */
  		mt7623_pad_clk_setup(ds);
+ 	} else {
+ 		u16 lcl_adv = 0, rmt_adv = 0;
+ 		u8 flowctrl;
+ 		u32 mcr = PMCR_USERP_LINK | PMCR_FORCE_MODE;
+ 
+ 		switch (phydev->speed) {
+ 		case SPEED_1000:
+ 			mcr |= PMCR_FORCE_SPEED_1000;
+ 			break;
+ 		case SPEED_100:
+ 			mcr |= PMCR_FORCE_SPEED_100;
+ 			break;
+ 		};
+ 
+ 		if (phydev->link)
+ 			mcr |= PMCR_FORCE_LNK;
+ 
+ 		if (phydev->duplex) {
+ 			mcr |= PMCR_FORCE_FDX;
+ 
+ 			if (phydev->pause)
+ 				rmt_adv = LPA_PAUSE_CAP;
+ 			if (phydev->asym_pause)
+ 				rmt_adv |= LPA_PAUSE_ASYM;
+ 
+ 			if (phydev->advertising & ADVERTISED_Pause)
+ 				lcl_adv |= ADVERTISE_PAUSE_CAP;
+ 			if (phydev->advertising & ADVERTISED_Asym_Pause)
+ 				lcl_adv |= ADVERTISE_PAUSE_ASYM;
+ 
+ 			flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+ 
+ 			if (flowctrl & FLOW_CTRL_TX)
+ 				mcr |= PMCR_TX_FC_EN;
+ 			if (flowctrl & FLOW_CTRL_RX)
+ 				mcr |= PMCR_RX_FC_EN;
+ 		}
+ 		mt7530_write(priv, MT7530_PMCR_P(port), mcr);
  	}
  }
  
@@@ -801,31 -839,49 +839,31 @@@ mt7530_port_bridge_leave(struct dsa_swi
  }
  
  static int
 -mt7530_port_fdb_prepare(struct dsa_switch *ds, int port,
 -			const struct switchdev_obj_port_fdb *fdb,
 -			struct switchdev_trans *trans)
 +mt7530_port_fdb_add(struct dsa_switch *ds, int port,
 +		    const unsigned char *addr, u16 vid)
  {
  	struct mt7530_priv *priv = ds->priv;
  	int ret;
 +	u8 port_mask = BIT(port);
  
 -	/* Because auto-learned entrie shares the same FDB table.
 -	 * an entry is reserved with no port_mask to make sure fdb_add
 -	 * is called while the entry is still available.
 -	 */
  	mutex_lock(&priv->reg_mutex);
 -	mt7530_fdb_write(priv, fdb->vid, 0, fdb->addr, -1, STATIC_ENT);
 +	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_ENT);
  	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
  	mutex_unlock(&priv->reg_mutex);
  
  	return ret;
  }
  
 -static void
 -mt7530_port_fdb_add(struct dsa_switch *ds, int port,
 -		    const struct switchdev_obj_port_fdb *fdb,
 -		    struct switchdev_trans *trans)
 -{
 -	struct mt7530_priv *priv = ds->priv;
 -	u8 port_mask = BIT(port);
 -
 -	mutex_lock(&priv->reg_mutex);
 -	mt7530_fdb_write(priv, fdb->vid, port_mask, fdb->addr, -1, STATIC_ENT);
 -	mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
 -	mutex_unlock(&priv->reg_mutex);
 -}
 -
  static int
  mt7530_port_fdb_del(struct dsa_switch *ds, int port,
 -		    const struct switchdev_obj_port_fdb *fdb)
 +		    const unsigned char *addr, u16 vid)
  {
  	struct mt7530_priv *priv = ds->priv;
  	int ret;
  	u8 port_mask = BIT(port);
  
  	mutex_lock(&priv->reg_mutex);
 -	mt7530_fdb_write(priv, fdb->vid, port_mask, fdb->addr, -1, STATIC_EMP);
 +	mt7530_fdb_write(priv, vid, port_mask, addr, -1, STATIC_EMP);
  	ret = mt7530_fdb_cmd(priv, MT7530_FDB_WRITE, 0);
  	mutex_unlock(&priv->reg_mutex);
  
@@@ -834,7 -890,8 +872,7 @@@
  
  static int
  mt7530_port_fdb_dump(struct dsa_switch *ds, int port,
 -		     struct switchdev_obj_port_fdb *fdb,
 -		     switchdev_obj_dump_cb_t *cb)
 +		     dsa_fdb_dump_cb_t *cb, void *data)
  {
  	struct mt7530_priv *priv = ds->priv;
  	struct mt7530_fdb _fdb = { 0 };
@@@ -852,8 -909,11 +890,8 @@@
  		if (rsp & ATC_SRCH_HIT) {
  			mt7530_fdb_read(priv, &_fdb);
  			if (_fdb.port_mask & BIT(port)) {
 -				ether_addr_copy(fdb->addr, _fdb.mac);
 -				fdb->vid = _fdb.vid;
 -				fdb->ndm_state = _fdb.noarp ?
 -						NUD_NOARP : NUD_REACHABLE;
 -				ret = cb(&fdb->obj);
 +				ret = cb(_fdb.mac, _fdb.vid, _fdb.noarp,
 +					 data);
  				if (ret < 0)
  					break;
  			}
@@@ -993,6 -1053,7 +1031,6 @@@ static struct dsa_switch_ops mt7530_swi
  	.port_stp_state_set	= mt7530_stp_state_set,
  	.port_bridge_join	= mt7530_port_bridge_join,
  	.port_bridge_leave	= mt7530_port_bridge_leave,
 -	.port_fdb_prepare	= mt7530_port_fdb_prepare,
  	.port_fdb_add		= mt7530_port_fdb_add,
  	.port_fdb_del		= mt7530_port_fdb_del,
  	.port_fdb_dump		= mt7530_port_fdb_dump,
diff --combined drivers/net/ethernet/ibm/ibmvnic.c
index 99576ba4187f,c45e8e3b82d3..32c116652755
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@@ -111,6 -111,7 +111,7 @@@ static void send_request_map(struct ibm
  static void send_request_unmap(struct ibmvnic_adapter *, u8);
  static void send_login(struct ibmvnic_adapter *adapter);
  static void send_cap_queries(struct ibmvnic_adapter *adapter);
+ static int init_sub_crqs(struct ibmvnic_adapter *);
  static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter);
  static int ibmvnic_init(struct ibmvnic_adapter *);
  static void release_crq_queue(struct ibmvnic_adapter *);
@@@ -346,31 -347,6 +347,31 @@@ static void replenish_pools(struct ibmv
  	}
  }
  
 +static void release_stats_buffers(struct ibmvnic_adapter *adapter)
 +{
 +	kfree(adapter->tx_stats_buffers);
 +	kfree(adapter->rx_stats_buffers);
 +}
 +
 +static int init_stats_buffers(struct ibmvnic_adapter *adapter)
 +{
 +	adapter->tx_stats_buffers =
 +				kcalloc(adapter->req_tx_queues,
 +					sizeof(struct ibmvnic_tx_queue_stats),
 +					GFP_KERNEL);
 +	if (!adapter->tx_stats_buffers)
 +		return -ENOMEM;
 +
 +	adapter->rx_stats_buffers =
 +				kcalloc(adapter->req_rx_queues,
 +					sizeof(struct ibmvnic_rx_queue_stats),
 +					GFP_KERNEL);
 +	if (!adapter->rx_stats_buffers)
 +		return -ENOMEM;
 +
 +	return 0;
 +}
 +
  static void release_stats_token(struct ibmvnic_adapter *adapter)
  {
  	struct device *dev = &adapter->vdev->dev;
@@@ -676,6 -652,7 +677,7 @@@ static int ibmvnic_login(struct net_dev
  	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
  	unsigned long timeout = msecs_to_jiffies(30000);
  	struct device *dev = &adapter->vdev->dev;
+ 	int rc;
  
  	do {
  		if (adapter->renegotiate) {
@@@ -689,6 -666,18 +691,18 @@@
  				dev_err(dev, "Capabilities query timeout\n");
  				return -1;
  			}
+ 			rc = init_sub_crqs(adapter);
+ 			if (rc) {
+ 				dev_err(dev,
+ 					"Initialization of SCRQ's failed\n");
+ 				return -1;
+ 			}
+ 			rc = init_sub_crq_irqs(adapter);
+ 			if (rc) {
+ 				dev_err(dev,
+ 					"Initialization of SCRQ's irqs failed\n");
+ 				return -1;
+ 			}
  		}
  
  		reinit_completion(&adapter->init_done);
@@@ -711,7 -700,6 +725,7 @@@ static void release_resources(struct ib
  	release_rx_pools(adapter);
  
  	release_stats_token(adapter);
 +	release_stats_buffers(adapter);
  	release_error_buffers(adapter);
  
  	if (adapter->napi) {
@@@ -789,10 -777,6 +803,10 @@@ static int init_resources(struct ibmvni
  	if (rc)
  		return rc;
  
 +	rc = init_stats_buffers(adapter);
 +	if (rc)
 +		return rc;
 +
  	rc = init_stats_token(adapter);
  	if (rc)
  		return rc;
@@@ -1275,9 -1259,6 +1289,9 @@@ out
  	netdev->stats.tx_packets += tx_packets;
  	adapter->tx_send_failed += tx_send_failed;
  	adapter->tx_map_failed += tx_map_failed;
 +	adapter->tx_stats_buffers[queue_num].packets += tx_packets;
 +	adapter->tx_stats_buffers[queue_num].bytes += tx_bytes;
 +	adapter->tx_stats_buffers[queue_num].dropped_packets += tx_dropped;
  
  	return ret;
  }
@@@ -1579,8 -1560,7 +1593,8 @@@ restart_poll
  							  rx_comp.correlator);
  		/* do error checking */
  		if (next->rx_comp.rc) {
 -			netdev_err(netdev, "rx error %x\n", next->rx_comp.rc);
 +			netdev_dbg(netdev, "rx buffer returned with rc %x\n",
 +				   be16_to_cpu(next->rx_comp.rc));
  			/* free the entry */
  			next->rx_comp.first = 0;
  			remove_buff_from_pool(adapter, rx_buff);
@@@ -1619,8 -1599,6 +1633,8 @@@
  		napi_gro_receive(napi, skb); /* send it up */
  		netdev->stats.rx_packets++;
  		netdev->stats.rx_bytes += length;
 +		adapter->rx_stats_buffers[scrq_num].packets++;
 +		adapter->rx_stats_buffers[scrq_num].bytes += length;
  		frames_processed++;
  	}
  
@@@ -1730,36 -1708,18 +1744,36 @@@ static u32 ibmvnic_get_link(struct net_
  static void ibmvnic_get_ringparam(struct net_device *netdev,
  				  struct ethtool_ringparam *ring)
  {
 -	ring->rx_max_pending = 0;
 -	ring->tx_max_pending = 0;
 +	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 +
 +	ring->rx_max_pending = adapter->max_rx_add_entries_per_subcrq;
 +	ring->tx_max_pending = adapter->max_tx_entries_per_subcrq;
  	ring->rx_mini_max_pending = 0;
  	ring->rx_jumbo_max_pending = 0;
 -	ring->rx_pending = 0;
 -	ring->tx_pending = 0;
 +	ring->rx_pending = adapter->req_rx_add_entries_per_subcrq;
 +	ring->tx_pending = adapter->req_tx_entries_per_subcrq;
  	ring->rx_mini_pending = 0;
  	ring->rx_jumbo_pending = 0;
  }
  
 +static void ibmvnic_get_channels(struct net_device *netdev,
 +				 struct ethtool_channels *channels)
 +{
 +	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
 +
 +	channels->max_rx = adapter->max_rx_queues;
 +	channels->max_tx = adapter->max_tx_queues;
 +	channels->max_other = 0;
 +	channels->max_combined = 0;
 +	channels->rx_count = adapter->req_rx_queues;
 +	channels->tx_count = adapter->req_tx_queues;
 +	channels->other_count = 0;
 +	channels->combined_count = 0;
 +}
 +
  static void ibmvnic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
  {
 +	struct ibmvnic_adapter *adapter = netdev_priv(dev);
  	int i;
  
  	if (stringset != ETH_SS_STATS)
@@@ -1767,39 -1727,13 +1781,39 @@@
  
  	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++, data += ETH_GSTRING_LEN)
  		memcpy(data, ibmvnic_stats[i].name, ETH_GSTRING_LEN);
 +
 +	for (i = 0; i < adapter->req_tx_queues; i++) {
 +		snprintf(data, ETH_GSTRING_LEN, "tx%d_packets", i);
 +		data += ETH_GSTRING_LEN;
 +
 +		snprintf(data, ETH_GSTRING_LEN, "tx%d_bytes", i);
 +		data += ETH_GSTRING_LEN;
 +
 +		snprintf(data, ETH_GSTRING_LEN, "tx%d_dropped_packets", i);
 +		data += ETH_GSTRING_LEN;
 +	}
 +
 +	for (i = 0; i < adapter->req_rx_queues; i++) {
 +		snprintf(data, ETH_GSTRING_LEN, "rx%d_packets", i);
 +		data += ETH_GSTRING_LEN;
 +
 +		snprintf(data, ETH_GSTRING_LEN, "rx%d_bytes", i);
 +		data += ETH_GSTRING_LEN;
 +
 +		snprintf(data, ETH_GSTRING_LEN, "rx%d_interrupts", i);
 +		data += ETH_GSTRING_LEN;
 +	}
  }
  
  static int ibmvnic_get_sset_count(struct net_device *dev, int sset)
  {
 +	struct ibmvnic_adapter *adapter = netdev_priv(dev);
 +
  	switch (sset) {
  	case ETH_SS_STATS:
 -		return ARRAY_SIZE(ibmvnic_stats);
 +		return ARRAY_SIZE(ibmvnic_stats) +
 +		       adapter->req_tx_queues * NUM_TX_STATS +
 +		       adapter->req_rx_queues * NUM_RX_STATS;
  	default:
  		return -EOPNOTSUPP;
  	}
@@@ -1810,7 -1744,7 +1824,7 @@@ static void ibmvnic_get_ethtool_stats(s
  {
  	struct ibmvnic_adapter *adapter = netdev_priv(dev);
  	union ibmvnic_crq crq;
 -	int i;
 +	int i, j;
  
  	memset(&crq, 0, sizeof(crq));
  	crq.request_statistics.first = IBMVNIC_CRQ_CMD;
@@@ -1825,26 -1759,7 +1839,26 @@@
  	wait_for_completion(&adapter->stats_done);
  
  	for (i = 0; i < ARRAY_SIZE(ibmvnic_stats); i++)
 -		data[i] = IBMVNIC_GET_STAT(adapter, ibmvnic_stats[i].offset);
 +		data[i] = be64_to_cpu(IBMVNIC_GET_STAT(adapter,
 +						ibmvnic_stats[i].offset));
 +
 +	for (j = 0; j < adapter->req_tx_queues; j++) {
 +		data[i] = adapter->tx_stats_buffers[j].packets;
 +		i++;
 +		data[i] = adapter->tx_stats_buffers[j].bytes;
 +		i++;
 +		data[i] = adapter->tx_stats_buffers[j].dropped_packets;
 +		i++;
 +	}
 +
 +	for (j = 0; j < adapter->req_rx_queues; j++) {
 +		data[i] = adapter->rx_stats_buffers[j].packets;
 +		i++;
 +		data[i] = adapter->rx_stats_buffers[j].bytes;
 +		i++;
 +		data[i] = adapter->rx_stats_buffers[j].interrupts;
 +		i++;
 +	}
  }
  
  static const struct ethtool_ops ibmvnic_ethtool_ops = {
@@@ -1853,7 -1768,6 +1867,7 @@@
  	.set_msglevel		= ibmvnic_set_msglevel,
  	.get_link		= ibmvnic_get_link,
  	.get_ringparam		= ibmvnic_get_ringparam,
 +	.get_channels		= ibmvnic_get_channels,
  	.get_strings            = ibmvnic_get_strings,
  	.get_sset_count         = ibmvnic_get_sset_count,
  	.get_ethtool_stats	= ibmvnic_get_ethtool_stats,
@@@ -2150,8 -2064,6 +2164,8 @@@ static irqreturn_t ibmvnic_interrupt_rx
  	struct ibmvnic_sub_crq_queue *scrq = instance;
  	struct ibmvnic_adapter *adapter = scrq->adapter;
  
 +	adapter->rx_stats_buffers[scrq->scrq_num].interrupts++;
 +
  	if (napi_schedule_prep(&adapter->napi[scrq->scrq_num])) {
  		disable_scrq_irq(adapter, scrq);
  		__napi_schedule(&adapter->napi[scrq->scrq_num]);
@@@ -3106,7 -3018,6 +3120,6 @@@ static void handle_request_cap_rsp(unio
  			 *req_value,
  			 (long int)be64_to_cpu(crq->request_capability_rsp.
  					       number), name);
- 		release_sub_crqs(adapter);
  		*req_value = be64_to_cpu(crq->request_capability_rsp.number);
  		ibmvnic_send_req_caps(adapter, 1);
  		return;
@@@ -3953,7 -3864,10 +3966,7 @@@ static int ibmvnic_resume(struct devic
  	if (adapter->state != VNIC_OPEN)
  		return 0;
  
 -	/* kick the interrupt handlers just in case we lost an interrupt */
 -	for (i = 0; i < adapter->req_rx_queues; i++)
 -		ibmvnic_interrupt_rx(adapter->rx_scrq[i]->irq,
 -				     adapter->rx_scrq[i]);
 +	tasklet_schedule(&adapter->tasklet);
  
  	return 0;
  }
diff --combined drivers/net/ethernet/intel/i40e/i40e_txrx.c
index d464fceb300f,2194960d5855..8a969d8f0790
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@@ -860,7 -860,7 +860,7 @@@ static bool i40e_clean_tx_irq(struct i4
  	netdev_tx_completed_queue(txring_txq(tx_ring),
  				  total_packets, total_bytes);
  
 -#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
 +#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
  	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
  		     (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
  		/* Make sure that anybody stopping the queue after this
@@@ -1113,6 -1113,8 +1113,8 @@@ int i40e_setup_tx_descriptors(struct i4
  	if (!tx_ring->tx_bi)
  		goto err;
  
+ 	u64_stats_init(&tx_ring->syncp);
+ 
  	/* round up to nearest 4K */
  	tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
  	/* add u32 for head writeback, align after this takes care of
@@@ -2063,7 -2065,7 +2065,7 @@@ static int i40e_clean_rx_irq(struct i40
  	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
  	bool failure = false, xdp_xmit = false;
  
 -	while (likely(total_rx_packets < budget)) {
 +	while (likely(total_rx_packets < (unsigned int)budget)) {
  		struct i40e_rx_buffer *rx_buffer;
  		union i40e_rx_desc *rx_desc;
  		struct xdp_buff xdp;
@@@ -2196,7 -2198,7 +2198,7 @@@
  	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
  
  	/* guarantee a trip back through this routine if there was a failure */
 -	return failure ? budget : total_rx_packets;
 +	return failure ? budget : (int)total_rx_packets;
  }
  
  static u32 i40e_buildreg_itr(const int type, const u16 itr)
@@@ -2451,15 -2453,9 +2453,15 @@@ static void i40e_atr(struct i40e_ring *
  		hlen = (hdr.network[0] & 0x0F) << 2;
  		l4_proto = hdr.ipv4->protocol;
  	} else {
 -		hlen = hdr.network - skb->data;
 -		l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
 -		hlen -= hdr.network - skb->data;
 +		/* find the start of the innermost ipv6 header */
 +		unsigned int inner_hlen = hdr.network - skb->data;
 +		unsigned int h_offset = inner_hlen;
 +
 +		/* this function updates h_offset to the end of the header */
 +		l4_proto =
 +		  ipv6_find_hdr(skb, &h_offset, IPPROTO_TCP, NULL, NULL);
 +		/* hlen will contain our best estimate of the tcp header */
 +		hlen = h_offset - inner_hlen;
  	}
  
  	if (l4_proto != IPPROTO_TCP)
diff --combined drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index ea471604450e,4631ca8b8eb2..4a990033c4d5
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@@ -513,6 -513,7 +513,7 @@@ nfp_net_tx_ring_init(struct nfp_net_tx_
  	tx_ring->idx = idx;
  	tx_ring->r_vec = r_vec;
  	tx_ring->is_xdp = is_xdp;
+ 	u64_stats_init(&tx_ring->r_vec->tx_sync);
  
  	tx_ring->qcidx = tx_ring->idx * nn->stride_tx;
  	tx_ring->qcp_q = nn->tx_bar + NFP_QCP_QUEUE_OFF(tx_ring->qcidx);
@@@ -532,6 -533,7 +533,7 @@@ nfp_net_rx_ring_init(struct nfp_net_rx_
  
  	rx_ring->idx = idx;
  	rx_ring->r_vec = r_vec;
+ 	u64_stats_init(&rx_ring->r_vec->rx_sync);
  
  	rx_ring->fl_qcidx = rx_ring->idx * nn->stride_rx;
  	rx_ring->qcp_fl = nn->rx_bar + NFP_QCP_QUEUE_OFF(rx_ring->fl_qcidx);
@@@ -2658,7 -2660,6 +2660,7 @@@ static int nfp_net_netdev_close(struct 
  	/* Step 2: Tell NFP
  	 */
  	nfp_net_clear_config_and_disable(nn);
 +	nfp_port_configure(netdev, false);
  
  	/* Step 3: Free resources
  	 */
@@@ -2776,21 -2777,16 +2778,21 @@@ static int nfp_net_netdev_open(struct n
  		goto err_free_all;
  
  	/* Step 2: Configure the NFP
 +	 * - Ifup the physical interface if it exists
  	 * - Enable rings from 0 to tx_rings/rx_rings - 1.
  	 * - Write MAC address (in case it changed)
  	 * - Set the MTU
  	 * - Set the Freelist buffer size
  	 * - Enable the FW
  	 */
 -	err = nfp_net_set_config_and_enable(nn);
 +	err = nfp_port_configure(netdev, true);
  	if (err)
  		goto err_free_all;
  
 +	err = nfp_net_set_config_and_enable(nn);
 +	if (err)
 +		goto err_port_disable;
 +
  	/* Step 3: Enable for kernel
  	 * - put some freelist descriptors on each RX ring
  	 * - enable NAPI on each ring
@@@ -2801,8 -2797,6 +2803,8 @@@
  
  	return 0;
  
 +err_port_disable:
 +	nfp_port_configure(netdev, false);
  err_free_all:
  	nfp_net_close_free_all(nn);
  	return err;
diff --combined drivers/net/ethernet/qlogic/qed/qed_mcp.c
index c1ecce6b9141,3eb241657368..376485d99357
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@@ -253,7 -253,7 +253,7 @@@ int qed_mcp_cmd_init(struct qed_hwfn *p
  	size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32);
  	p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL);
  	p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL);
- 	if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr)
+ 	if (!p_info->mfw_mb_cur || !p_info->mfw_mb_shadow)
  		goto err;
  
  	return 0;
@@@ -1097,31 -1097,6 +1097,31 @@@ static void qed_mcp_handle_transceiver_
  		DP_NOTICE(p_hwfn, "Transceiver is unplugged.\n");
  }
  
 +static void qed_mcp_read_eee_config(struct qed_hwfn *p_hwfn,
 +				    struct qed_ptt *p_ptt,
 +				    struct qed_mcp_link_state *p_link)
 +{
 +	u32 eee_status, val;
 +
 +	p_link->eee_adv_caps = 0;
 +	p_link->eee_lp_adv_caps = 0;
 +	eee_status = qed_rd(p_hwfn,
 +			    p_ptt,
 +			    p_hwfn->mcp_info->port_addr +
 +			    offsetof(struct public_port, eee_status));
 +	p_link->eee_active = !!(eee_status & EEE_ACTIVE_BIT);
 +	val = (eee_status & EEE_LD_ADV_STATUS_MASK) >> EEE_LD_ADV_STATUS_OFFSET;
 +	if (val & EEE_1G_ADV)
 +		p_link->eee_adv_caps |= QED_EEE_1G_ADV;
 +	if (val & EEE_10G_ADV)
 +		p_link->eee_adv_caps |= QED_EEE_10G_ADV;
 +	val = (eee_status & EEE_LP_ADV_STATUS_MASK) >> EEE_LP_ADV_STATUS_OFFSET;
 +	if (val & EEE_1G_ADV)
 +		p_link->eee_lp_adv_caps |= QED_EEE_1G_ADV;
 +	if (val & EEE_10G_ADV)
 +		p_link->eee_lp_adv_caps |= QED_EEE_10G_ADV;
 +}
 +
  static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn,
  				       struct qed_ptt *p_ptt, bool b_reset)
  {
@@@ -1253,9 -1228,6 +1253,9 @@@
  
  	p_link->sfp_tx_fault = !!(status & LINK_STATUS_SFP_TX_FAULT);
  
 +	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE)
 +		qed_mcp_read_eee_config(p_hwfn, p_ptt, p_link);
 +
  	qed_link_update(p_hwfn);
  out:
  	spin_unlock_bh(&p_hwfn->mcp_info->link_lock);
@@@ -1279,19 -1251,6 +1279,19 @@@ int qed_mcp_set_link(struct qed_hwfn *p
  	phy_cfg.pause |= (params->pause.forced_tx) ? ETH_PAUSE_TX : 0;
  	phy_cfg.adv_speed = params->speed.advertised_speeds;
  	phy_cfg.loopback_mode = params->loopback_mode;
 +	if (p_hwfn->mcp_info->capabilities & FW_MB_PARAM_FEATURE_SUPPORT_EEE) {
 +		if (params->eee.enable)
 +			phy_cfg.eee_cfg |= EEE_CFG_EEE_ENABLED;
 +		if (params->eee.tx_lpi_enable)
 +			phy_cfg.eee_cfg |= EEE_CFG_TX_LPI;
 +		if (params->eee.adv_caps & QED_EEE_1G_ADV)
 +			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_1G;
 +		if (params->eee.adv_caps & QED_EEE_10G_ADV)
 +			phy_cfg.eee_cfg |= EEE_CFG_ADV_SPEED_10G;
 +		phy_cfg.eee_cfg |= (params->eee.tx_lpi_timer <<
 +				    EEE_TX_TIMER_USEC_OFFSET) &
 +				   EEE_TX_TIMER_USEC_MASK;
 +	}
  
  	p_hwfn->b_drv_link_init = b_up;
  
@@@ -2863,28 -2822,3 +2863,28 @@@ void qed_mcp_resc_lock_default_init(str
  		p_unlock->resource = resource;
  	}
  }
 +
 +int qed_mcp_get_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 +{
 +	u32 mcp_resp;
 +	int rc;
 +
 +	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_GET_MFW_FEATURE_SUPPORT,
 +			 0, &mcp_resp, &p_hwfn->mcp_info->capabilities);
 +	if (!rc)
 +		DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_PROBE),
 +			   "MFW supported features: %08x\n",
 +			   p_hwfn->mcp_info->capabilities);
 +
 +	return rc;
 +}
 +
 +int qed_mcp_set_capabilities(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 +{
 +	u32 mcp_resp, mcp_param, features;
 +
 +	features = DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE;
 +
 +	return qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_FEATURE_SUPPORT,
 +			   features, &mcp_resp, &mcp_param);
 +}
diff --combined drivers/net/hyperv/hyperv_net.h
index d1ea99a12cf2,12cc64bfcff8..98b25f6900c8
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@@ -147,6 -147,7 +147,6 @@@ struct hv_netvsc_packet 
  struct netvsc_device_info {
  	unsigned char mac_adr[ETH_ALEN];
  	int  ring_size;
 -	u32  max_num_vrss_chns;
  	u32  num_chn;
  };
  
@@@ -182,16 -183,13 +182,16 @@@ struct rndis_device 
  /* Interface */
  struct rndis_message;
  struct netvsc_device;
 -int netvsc_device_add(struct hv_device *device,
 -		      const struct netvsc_device_info *info);
 +struct net_device_context;
 +
 +struct netvsc_device *netvsc_device_add(struct hv_device *device,
 +					const struct netvsc_device_info *info);
 +int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx);
  void netvsc_device_remove(struct hv_device *device);
 -int netvsc_send(struct hv_device *device,
 +int netvsc_send(struct net_device_context *ndc,
  		struct hv_netvsc_packet *packet,
  		struct rndis_message *rndis_msg,
 -		struct hv_page_buffer **page_buffer,
 +		struct hv_page_buffer *page_buffer,
  		struct sk_buff *skb);
  void netvsc_linkstatus_callback(struct hv_device *device_obj,
  				struct rndis_message *resp);
@@@ -202,11 -200,10 +202,11 @@@ int netvsc_recv_callback(struct net_dev
  			 const struct ndis_pkt_8021q_info *vlan);
  void netvsc_channel_cb(void *context);
  int netvsc_poll(struct napi_struct *napi, int budget);
 +bool rndis_filter_opened(const struct netvsc_device *nvdev);
  int rndis_filter_open(struct netvsc_device *nvdev);
  int rndis_filter_close(struct netvsc_device *nvdev);
 -int rndis_filter_device_add(struct hv_device *dev,
 -			    struct netvsc_device_info *info);
 +struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 +					      struct netvsc_device_info *info);
  void rndis_filter_update(struct netvsc_device *nvdev);
  void rndis_filter_device_remove(struct hv_device *dev,
  				struct netvsc_device *nvdev);
@@@ -218,8 -215,7 +218,8 @@@ int rndis_filter_receive(struct net_dev
  			 struct vmbus_channel *channel,
  			 void *data, u32 buflen);
  
 -int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
 +int rndis_filter_set_device_mac(struct netvsc_device *ndev,
 +				const char *mac);
  
  void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
  
@@@ -658,10 -654,13 +658,10 @@@ struct recv_comp_data 
  	u32 status;
  };
  
 -/* Netvsc Receive Slots Max */
 -#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1)
 -
  struct multi_recv_comp {
 -	void *buf; /* queued receive completions */
 -	u32 first; /* first data entry */
 -	u32 next; /* next entry for writing */
 +	struct recv_comp_data *slots;
 +	u32 first;	/* first data entry */
 +	u32 next;	/* next entry for writing */
  };
  
  struct netvsc_stats {
@@@ -680,15 -679,6 +680,15 @@@ struct netvsc_ethtool_stats 
  	unsigned long tx_busy;
  };
  
 +struct netvsc_vf_pcpu_stats {
 +	u64     rx_packets;
 +	u64     rx_bytes;
 +	u64     tx_packets;
 +	u64     tx_bytes;
 +	struct u64_stats_sync   syncp;
 +	u32	tx_dropped;
 +};
 +
  struct netvsc_reconfig {
  	struct list_head list;
  	u32 event;
@@@ -722,19 -712,18 +722,19 @@@ struct net_device_context 
  
  	/* State to manage the associated VF interface. */
  	struct net_device __rcu *vf_netdev;
 +	struct netvsc_vf_pcpu_stats __percpu *vf_stats;
 +	struct work_struct vf_takeover;
  
  	/* 1: allocated, serial number is valid. 0: not allocated */
  	u32 vf_alloc;
  	/* Serial number of the VF to team with */
  	u32 vf_serial;
 -
 -	bool datapath;	/* 0 - synthetic, 1 - VF nic */
  };
  
  /* Per channel data */
  struct netvsc_channel {
  	struct vmbus_channel *channel;
 +	struct netvsc_device *net_device;
  	const struct vmpacket_descriptor *desc;
  	struct napi_struct napi;
  	struct multi_send_data msd;
@@@ -757,7 -746,7 +757,7 @@@ struct netvsc_device 
  	u32 recv_buf_size;
  	u32 recv_buf_gpadl_handle;
  	u32 recv_section_cnt;
 -	struct nvsp_1_receive_buffer_section *recv_section;
 +	u32 recv_completion_cnt;
  
  	/* Send buffer allocated by us */
  	void *send_buf;
@@@ -776,7 -765,8 +776,8 @@@
  	u32 max_chn;
  	u32 num_chn;
  
- 	refcount_t sc_offered;
+ 	atomic_t open_chn;
+ 	wait_queue_head_t subchan_open;
  
  	struct rndis_device *extension;
  
@@@ -785,6 -775,8 +786,6 @@@
  	u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
  	u32 pkt_align; /* alignment bytes, e.g. 8 */
  
 -	atomic_t num_outstanding_recvs;
 -
  	atomic_t open_cnt;
  
  	struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
@@@ -792,6 -784,18 +793,6 @@@
  	struct rcu_head rcu;
  };
  
 -static inline struct netvsc_device *
 -net_device_to_netvsc_device(struct net_device *ndev)
 -{
 -	return ((struct net_device_context *)netdev_priv(ndev))->nvdev;
 -}
 -
 -static inline struct netvsc_device *
 -hv_device_to_netvsc_device(struct hv_device *device)
 -{
 -	return net_device_to_netvsc_device(hv_get_drvdata(device));
 -}
 -
  /* NdisInitialize message */
  struct rndis_initialize_request {
  	u32 req_id;
diff --combined drivers/net/hyperv/netvsc.c
index 208f03aa83de,d18c3326a1f7..bffaf93d3cb0
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@@ -29,9 -29,6 +29,9 @@@
  #include <linux/netdevice.h>
  #include <linux/if_ether.h>
  #include <linux/vmalloc.h>
 +#include <linux/rtnetlink.h>
 +#include <linux/prefetch.h>
 +
  #include <asm/sync_bitops.h>
  
  #include "hyperv_net.h"
@@@ -44,7 -41,7 +44,7 @@@ void netvsc_switch_datapath(struct net_
  {
  	struct net_device_context *net_device_ctx = netdev_priv(ndev);
  	struct hv_device *dev = net_device_ctx->device_ctx;
 -	struct netvsc_device *nv_dev = net_device_ctx->nvdev;
 +	struct netvsc_device *nv_dev = rtnl_dereference(net_device_ctx->nvdev);
  	struct nvsp_message *init_pkt = &nv_dev->channel_init_pkt;
  
  	memset(init_pkt, 0, sizeof(struct nvsp_message));
@@@ -60,6 -57,8 +60,6 @@@
  			       sizeof(struct nvsp_message),
  			       (unsigned long)init_pkt,
  			       VM_PKT_DATA_INBAND, 0);
 -
 -	net_device_ctx->datapath = vf;
  }
  
  static struct netvsc_device *alloc_net_device(void)
@@@ -70,12 -69,16 +70,13 @@@
  	if (!net_device)
  		return NULL;
  
 -	net_device->chan_table[0].mrc.buf
 -		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
 -
  	init_waitqueue_head(&net_device->wait_drain);
  	net_device->destroy = false;
  	atomic_set(&net_device->open_cnt, 0);
  	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
  	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;
  	init_completion(&net_device->channel_init_wait);
+ 	init_waitqueue_head(&net_device->subchan_open);
  
  	return net_device;
  }
@@@ -87,7 -90,7 +88,7 @@@ static void free_netvsc_device(struct r
  	int i;
  
  	for (i = 0; i < VRSS_CHANNEL_MAX; i++)
 -		vfree(nvdev->chan_table[i].mrc.buf);
 +		vfree(nvdev->chan_table[i].mrc.slots);
  
  	kfree(nvdev);
  }
@@@ -101,8 -104,7 +102,8 @@@ static void netvsc_destroy_buf(struct h
  {
  	struct nvsp_message *revoke_packet;
  	struct net_device *ndev = hv_get_drvdata(device);
 -	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
 +	struct net_device_context *ndc = netdev_priv(ndev);
 +	struct netvsc_device *net_device = rtnl_dereference(ndc->nvdev);
  	int ret;
  
  	/*
@@@ -166,6 -168,12 +167,6 @@@
  		net_device->recv_buf = NULL;
  	}
  
 -	if (net_device->recv_section) {
 -		net_device->recv_section_cnt = 0;
 -		kfree(net_device->recv_section);
 -		net_device->recv_section = NULL;
 -	}
 -
  	/* Deal with the send buffer we may have setup.
  	 * If we got a  send section size, it means we received a
  	 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
@@@ -228,26 -236,11 +229,26 @@@
  	kfree(net_device->send_section_map);
  }
  
 +int netvsc_alloc_recv_comp_ring(struct netvsc_device *net_device, u32 q_idx)
 +{
 +	struct netvsc_channel *nvchan = &net_device->chan_table[q_idx];
 +	int node = cpu_to_node(nvchan->channel->target_cpu);
 +	size_t size;
 +
 +	size = net_device->recv_completion_cnt * sizeof(struct recv_comp_data);
 +	nvchan->mrc.slots = vzalloc_node(size, node);
 +	if (!nvchan->mrc.slots)
 +		nvchan->mrc.slots = vzalloc(size);
 +
 +	return nvchan->mrc.slots ? 0 : -ENOMEM;
 +}
 +
  static int netvsc_init_buf(struct hv_device *device,
  			   struct netvsc_device *net_device)
  {
  	int ret = 0;
  	struct nvsp_message *init_packet;
 +	struct nvsp_1_message_send_receive_buffer_complete *resp;
  	struct net_device *ndev;
  	size_t map_words;
  	int node;
@@@ -304,41 -297,43 +305,41 @@@
  	wait_for_completion(&net_device->channel_init_wait);
  
  	/* Check the response */
 -	if (init_packet->msg.v1_msg.
 -	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
 -		netdev_err(ndev, "Unable to complete receive buffer "
 -			   "initialization with NetVsp - status %d\n",
 -			   init_packet->msg.v1_msg.
 -			   send_recv_buf_complete.status);
 +	resp = &init_packet->msg.v1_msg.send_recv_buf_complete;
 +	if (resp->status != NVSP_STAT_SUCCESS) {
 +		netdev_err(ndev,
 +			   "Unable to complete receive buffer initialization with NetVsp - status %d\n",
 +			   resp->status);
  		ret = -EINVAL;
  		goto cleanup;
  	}
  
  	/* Parse the response */
 +	netdev_dbg(ndev, "Receive sections: %u sub_allocs: size %u count: %u\n",
 +		   resp->num_sections, resp->sections[0].sub_alloc_size,
 +		   resp->sections[0].num_sub_allocs);
  
 -	net_device->recv_section_cnt = init_packet->msg.
 -		v1_msg.send_recv_buf_complete.num_sections;
 -
 -	net_device->recv_section = kmemdup(
 -		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
 -		net_device->recv_section_cnt *
 -		sizeof(struct nvsp_1_receive_buffer_section),
 -		GFP_KERNEL);
 -	if (net_device->recv_section == NULL) {
 -		ret = -EINVAL;
 -		goto cleanup;
 -	}
 +	net_device->recv_section_cnt = resp->num_sections;
  
  	/*
  	 * For 1st release, there should only be 1 section that represents the
  	 * entire receive buffer
  	 */
  	if (net_device->recv_section_cnt != 1 ||
 -	    net_device->recv_section->offset != 0) {
 +	    resp->sections[0].offset != 0) {
  		ret = -EINVAL;
  		goto cleanup;
  	}
  
 -	/* Now setup the send buffer.
 -	 */
 +	/* Setup receive completion ring */
 +	net_device->recv_completion_cnt
 +		= round_up(resp->sections[0].num_sub_allocs + 1,
 +			   PAGE_SIZE / sizeof(u64));
 +	ret = netvsc_alloc_recv_comp_ring(net_device, 0);
 +	if (ret)
 +		goto cleanup;
 +
 +	/* Now setup the send buffer. */
  	net_device->send_buf = vzalloc_node(net_device->send_buf_size, node);
  	if (!net_device->send_buf)
  		net_device->send_buf = vzalloc(net_device->send_buf_size);
@@@ -555,8 -550,7 +556,8 @@@ void netvsc_device_remove(struct hv_dev
  {
  	struct net_device *ndev = hv_get_drvdata(device);
  	struct net_device_context *net_device_ctx = netdev_priv(ndev);
 -	struct netvsc_device *net_device = net_device_ctx->nvdev;
 +	struct netvsc_device *net_device
 +		= rtnl_dereference(net_device_ctx->nvdev);
  	int i;
  
  	netvsc_disconnect_vsp(device);
@@@ -699,7 -693,7 +700,7 @@@ static u32 netvsc_copy_to_send_buf(stru
  				   u32 pend_size,
  				   struct hv_netvsc_packet *packet,
  				   struct rndis_message *rndis_msg,
 -				   struct hv_page_buffer **pb,
 +				   struct hv_page_buffer *pb,
  				   struct sk_buff *skb)
  {
  	char *start = net_device->send_buf;
@@@ -720,9 -714,9 +721,9 @@@
  	}
  
  	for (i = 0; i < page_count; i++) {
 -		char *src = phys_to_virt((*pb)[i].pfn << PAGE_SHIFT);
 -		u32 offset = (*pb)[i].offset;
 -		u32 len = (*pb)[i].len;
 +		char *src = phys_to_virt(pb[i].pfn << PAGE_SHIFT);
 +		u32 offset = pb[i].offset;
 +		u32 len = pb[i].len;
  
  		memcpy(dest, (src + offset), len);
  		msg_size += len;
@@@ -741,32 -735,36 +742,32 @@@ static inline int netvsc_send_pkt
  	struct hv_device *device,
  	struct hv_netvsc_packet *packet,
  	struct netvsc_device *net_device,
 -	struct hv_page_buffer **pb,
 +	struct hv_page_buffer *pb,
  	struct sk_buff *skb)
  {
  	struct nvsp_message nvmsg;
 -	struct netvsc_channel *nvchan
 -		= &net_device->chan_table[packet->q_idx];
 +	struct nvsp_1_message_send_rndis_packet * const rpkt =
 +		&nvmsg.msg.v1_msg.send_rndis_pkt;
 +	struct netvsc_channel * const nvchan =
 +		&net_device->chan_table[packet->q_idx];
  	struct vmbus_channel *out_channel = nvchan->channel;
  	struct net_device *ndev = hv_get_drvdata(device);
  	struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
  	u64 req_id;
  	int ret;
 -	struct hv_page_buffer *pgbuf;
  	u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
  
  	nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
 -	if (skb != NULL) {
 -		/* 0 is RMC_DATA; */
 -		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
 -	} else {
 -		/* 1 is RMC_CONTROL; */
 -		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
 -	}
 +	if (skb)
 +		rpkt->channel_type = 0;		/* 0 is RMC_DATA */
 +	else
 +		rpkt->channel_type = 1;		/* 1 is RMC_CONTROL */
  
 -	nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
 -		packet->send_buf_index;
 +	rpkt->send_buf_section_index = packet->send_buf_index;
  	if (packet->send_buf_index == NETVSC_INVALID_INDEX)
 -		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
 +		rpkt->send_buf_section_size = 0;
  	else
 -		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
 -			packet->total_data_buflen;
 +		rpkt->send_buf_section_size = packet->total_data_buflen;
  
  	req_id = (ulong)skb;
  
@@@ -774,11 -772,11 +775,11 @@@
  		return -ENODEV;
  
  	if (packet->page_buf_cnt) {
 -		pgbuf = packet->cp_partial ? (*pb) +
 -			packet->rmsg_pgcnt : (*pb);
 +		if (packet->cp_partial)
 +			pb += packet->rmsg_pgcnt;
 +
  		ret = vmbus_sendpacket_pagebuffer_ctl(out_channel,
 -						      pgbuf,
 -						      packet->page_buf_cnt,
 +						      pb, packet->page_buf_cnt,
  						      &nvmsg,
  						      sizeof(struct nvsp_message),
  						      req_id,
@@@ -803,10 -801,8 +804,10 @@@
  			ret = -ENOSPC;
  		}
  	} else {
 -		netdev_err(ndev, "Unable to send packet %p ret %d\n",
 -			   packet, ret);
 +		netdev_err(ndev,
 +			   "Unable to send packet pages %u len %u, ret %d\n",
 +			   packet->page_buf_cnt, packet->total_data_buflen,
 +			   ret);
  	}
  
  	return ret;
@@@ -824,16 -820,13 +825,16 @@@ static inline void move_pkt_msd(struct 
  	msdp->count = 0;
  }
  
 -int netvsc_send(struct hv_device *device,
 +/* RCU already held by caller */
 +int netvsc_send(struct net_device_context *ndev_ctx,
  		struct hv_netvsc_packet *packet,
  		struct rndis_message *rndis_msg,
 -		struct hv_page_buffer **pb,
 +		struct hv_page_buffer *pb,
  		struct sk_buff *skb)
  {
 -	struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
 +	struct netvsc_device *net_device
 +		= rcu_dereference_bh(ndev_ctx->nvdev);
 +	struct hv_device *device = ndev_ctx->device_ctx;
  	int ret = 0;
  	struct netvsc_channel *nvchan;
  	u32 pktlen = packet->total_data_buflen, msd_len = 0;
@@@ -845,7 -838,7 +846,7 @@@
  	bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
  
  	/* If device is rescinded, return error and packet will get dropped. */
 -	if (unlikely(net_device->destroy))
 +	if (unlikely(!net_device || net_device->destroy))
  		return -ENODEV;
  
  	/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
@@@ -950,94 -943,130 +951,94 @@@ send_now
  	return ret;
  }
  
 -static int netvsc_send_recv_completion(struct vmbus_channel *channel,
 -				       u64 transaction_id, u32 status)
 +/* Send pending recv completions */
 +static int send_recv_completions(struct netvsc_channel *nvchan)
  {
 -	struct nvsp_message recvcompMessage;
 +	struct netvsc_device *nvdev = nvchan->net_device;
 +	struct multi_recv_comp *mrc = &nvchan->mrc;
 +	struct recv_comp_msg {
 +		struct nvsp_message_header hdr;
 +		u32 status;
 +	}  __packed;
 +	struct recv_comp_msg msg = {
 +		.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE,
 +	};
  	int ret;
  
 -	recvcompMessage.hdr.msg_type =
 -				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;
 -
 -	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
 -
 -	/* Send the completion */
 -	ret = vmbus_sendpacket(channel, &recvcompMessage,
 -			       sizeof(struct nvsp_message_header) + sizeof(u32),
 -			       transaction_id, VM_PKT_COMP, 0);
 +	while (mrc->first != mrc->next) {
 +		const struct recv_comp_data *rcd
 +			= mrc->slots + mrc->first;
  
 -	return ret;
 -}
 +		msg.status = rcd->status;
 +		ret = vmbus_sendpacket(nvchan->channel, &msg, sizeof(msg),
 +				       rcd->tid, VM_PKT_COMP, 0);
 +		if (unlikely(ret))
 +			return ret;
  
 -static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
 -					u32 *filled, u32 *avail)
 -{
 -	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
 -	u32 first = mrc->first;
 -	u32 next = mrc->next;
 -
 -	*filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
 -		  next - first;
 -
 -	*avail = NETVSC_RECVSLOT_MAX - *filled - 1;
 -}
 -
 -/* Read the first filled slot, no change to index */
 -static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
 -							 *nvdev, u16 q_idx)
 -{
 -	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
 -	u32 filled, avail;
 -
 -	if (unlikely(!mrc->buf))
 -		return NULL;
 +		if (++mrc->first == nvdev->recv_completion_cnt)
 +			mrc->first = 0;
 +	}
  
 -	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
 -	if (!filled)
 -		return NULL;
 +	/* receive completion ring has been emptied */
 +	if (unlikely(nvdev->destroy))
 +		wake_up(&nvdev->wait_drain);
  
 -	return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
 +	return 0;
  }
  
 -/* Put the first filled slot back to available pool */
 -static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
 +/* Count how many receive completions are outstanding */
 +static void recv_comp_slot_avail(const struct netvsc_device *nvdev,
 +				 const struct multi_recv_comp *mrc,
 +				 u32 *filled, u32 *avail)
  {
 -	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
 -	int num_recv;
 +	u32 count = nvdev->recv_completion_cnt;
  
 -	mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
 -
 -	num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
 +	if (mrc->next >= mrc->first)
 +		*filled = mrc->next - mrc->first;
 +	else
 +		*filled = (count - mrc->first) + mrc->next;
  
 -	if (nvdev->destroy && num_recv == 0)
 -		wake_up(&nvdev->wait_drain);
 +	*avail = count - *filled - 1;
  }
  
 -/* Check and send pending recv completions */
 -static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
 -				 struct vmbus_channel *channel, u16 q_idx)
 +/* Add receive complete to ring to send to host. */
 +static void enq_receive_complete(struct net_device *ndev,
 +				 struct netvsc_device *nvdev, u16 q_idx,
 +				 u64 tid, u32 status)
  {
 +	struct netvsc_channel *nvchan = &nvdev->chan_table[q_idx];
 +	struct multi_recv_comp *mrc = &nvchan->mrc;
  	struct recv_comp_data *rcd;
 -	int ret;
 -
 -	while (true) {
 -		rcd = read_recv_comp_slot(nvdev, q_idx);
 -		if (!rcd)
 -			break;
 +	u32 filled, avail;
  
 -		ret = netvsc_send_recv_completion(channel, rcd->tid,
 -						  rcd->status);
 -		if (ret)
 -			break;
 +	recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
  
 -		put_recv_comp_slot(nvdev, q_idx);
 +	if (unlikely(filled > NAPI_POLL_WEIGHT)) {
 +		send_recv_completions(nvchan);
 +		recv_comp_slot_avail(nvdev, mrc, &filled, &avail);
  	}
 -}
 -
 -#define NETVSC_RCD_WATERMARK 80
 -
 -/* Get next available slot */
 -static inline struct recv_comp_data *get_recv_comp_slot(
 -	struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
 -{
 -	struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
 -	u32 filled, avail, next;
 -	struct recv_comp_data *rcd;
 -
 -	if (unlikely(!nvdev->recv_section))
 -		return NULL;
 -
 -	if (unlikely(!mrc->buf))
 -		return NULL;
 -
 -	if (atomic_read(&nvdev->num_outstanding_recvs) >
 -	    nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100)
 -		netvsc_chk_recv_comp(nvdev, channel, q_idx);
  
 -	count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
 -	if (!avail)
 -		return NULL;
 -
 -	next = mrc->next;
 -	rcd = mrc->buf + next * sizeof(struct recv_comp_data);
 -	mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
 +	if (unlikely(!avail)) {
 +		netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
 +			   q_idx, tid);
 +		return;
 +	}
  
 -	atomic_inc(&nvdev->num_outstanding_recvs);
 +	rcd = mrc->slots + mrc->next;
 +	rcd->tid = tid;
 +	rcd->status = status;
  
 -	return rcd;
 +	if (++mrc->next == nvdev->recv_completion_cnt)
 +		mrc->next = 0;
  }
  
  static int netvsc_receive(struct net_device *ndev,
 -		   struct netvsc_device *net_device,
 -		   struct net_device_context *net_device_ctx,
 -		   struct hv_device *device,
 -		   struct vmbus_channel *channel,
 -		   const struct vmpacket_descriptor *desc,
 -		   struct nvsp_message *nvsp)
 +			  struct netvsc_device *net_device,
 +			  struct net_device_context *net_device_ctx,
 +			  struct hv_device *device,
 +			  struct vmbus_channel *channel,
 +			  const struct vmpacket_descriptor *desc,
 +			  struct nvsp_message *nvsp)
  {
  	const struct vmtransfer_page_packet_header *vmxferpage_packet
  		= container_of(desc, const struct vmtransfer_page_packet_header, d);
@@@ -1046,6 -1075,7 +1047,6 @@@
  	u32 status = NVSP_STAT_SUCCESS;
  	int i;
  	int count = 0;
 -	int ret;
  
  	/* Make sure this is a valid nvsp packet */
  	if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
@@@ -1076,9 -1106,25 +1077,9 @@@
  					      channel, data, buflen);
  	}
  
 -	if (net_device->chan_table[q_idx].mrc.buf) {
 -		struct recv_comp_data *rcd;
 +	enq_receive_complete(ndev, net_device, q_idx,
 +			     vmxferpage_packet->d.trans_id, status);
  
 -		rcd = get_recv_comp_slot(net_device, channel, q_idx);
 -		if (rcd) {
 -			rcd->tid = vmxferpage_packet->d.trans_id;
 -			rcd->status = status;
 -		} else {
 -			netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
 -				   q_idx, vmxferpage_packet->d.trans_id);
 -		}
 -	} else {
 -		ret = netvsc_send_recv_completion(channel,
 -						  vmxferpage_packet->d.trans_id,
 -						  status);
 -		if (ret)
 -			netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
 -				   q_idx, vmxferpage_packet->d.trans_id, ret);
 -	}
  	return count;
  }
  
@@@ -1174,10 -1220,11 +1175,10 @@@ int netvsc_poll(struct napi_struct *nap
  {
  	struct netvsc_channel *nvchan
  		= container_of(napi, struct netvsc_channel, napi);
 +	struct netvsc_device *net_device = nvchan->net_device;
  	struct vmbus_channel *channel = nvchan->channel;
  	struct hv_device *device = netvsc_channel_to_device(channel);
 -	u16 q_idx = channel->offermsg.offer.sub_channel_index;
  	struct net_device *ndev = hv_get_drvdata(device);
 -	struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
  	int work_done = 0;
  
  	/* If starting a new interval */
@@@ -1190,23 -1237,17 +1191,23 @@@
  		nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
  	}
  
 -	/* If receive ring was exhausted
 -	 * and not doing busy poll
 +	/* if ring is empty, signal host */
 +	if (!nvchan->desc)
 +		hv_pkt_iter_close(channel);
 +
 +	/* If send of pending receive completions suceeded
 +	 *   and did not exhaust NAPI budget this time
 +	 *   and not doing busy poll
  	 * then re-enable host interrupts
 -	 *  and reschedule if ring is not empty.
 +	 *     and reschedule if ring is not empty.
  	 */
 -	if (work_done < budget &&
 +	if (send_recv_completions(nvchan) == 0 &&
 +	    work_done < budget &&
  	    napi_complete_done(napi, work_done) &&
 -	    hv_end_read(&channel->inbound) != 0)
 +	    hv_end_read(&channel->inbound)) {
 +		hv_begin_read(&channel->inbound);
  		napi_reschedule(napi);
 -
 -	netvsc_chk_recv_comp(net_device, channel, q_idx);
 +	}
  
  	/* Driver may overshoot since multiple packets per descriptor */
  	return min(work_done, budget);
@@@ -1218,15 -1259,10 +1219,15 @@@
  void netvsc_channel_cb(void *context)
  {
  	struct netvsc_channel *nvchan = context;
 +	struct vmbus_channel *channel = nvchan->channel;
 +	struct hv_ring_buffer_info *rbi = &channel->inbound;
 +
 +	/* preload first vmpacket descriptor */
 +	prefetch(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
  
  	if (napi_schedule_prep(&nvchan->napi)) {
  		/* disable interupts from host */
 -		hv_begin_read(&nvchan->channel->inbound);
 +		hv_begin_read(rbi);
  
  		__napi_schedule(&nvchan->napi);
  	}
@@@ -1236,8 -1272,8 +1237,8 @@@
   * netvsc_device_add - Callback when the device belonging to this
   * driver is added
   */
 -int netvsc_device_add(struct hv_device *device,
 -		      const struct netvsc_device_info *device_info)
 +struct netvsc_device *netvsc_device_add(struct hv_device *device,
 +				const struct netvsc_device_info *device_info)
  {
  	int i, ret = 0;
  	int ring_size = device_info->ring_size;
@@@ -1247,7 -1283,7 +1248,7 @@@
  
  	net_device = alloc_net_device();
  	if (!net_device)
 -		return -ENOMEM;
 +		return ERR_PTR(-ENOMEM);
  
  	net_device->ring_size = ring_size;
  
@@@ -1267,7 -1303,8 +1268,9 @@@
  		struct netvsc_channel *nvchan = &net_device->chan_table[i];
  
  		nvchan->channel = device->channel;
 +		nvchan->net_device = net_device;
+ 		u64_stats_init(&nvchan->tx_stats.syncp);
+ 		u64_stats_init(&nvchan->rx_stats.syncp);
  	}
  
  	/* Enable NAPI handler before init callbacks */
@@@ -1304,11 -1341,10 +1307,11 @@@
  		goto close;
  	}
  
 -	return ret;
 +	return net_device;
  
  close:
 -	netif_napi_del(&net_device->chan_table[0].napi);
 +	RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
 +	napi_disable(&net_device->chan_table[0].napi);
  
  	/* Now, we can close the channel safely */
  	vmbus_close(device->channel);
@@@ -1316,5 -1352,6 +1319,5 @@@
  cleanup:
  	free_netvsc_device(&net_device->rcu);
  
 -	return ret;
 -
 +	return ERR_PTR(ret);
  }
diff --combined drivers/net/hyperv/rndis_filter.c
index 44165fe328a4,d6308ffda53e..36e9ee82ec6f
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@@ -28,7 -28,6 +28,7 @@@
  #include <linux/if_vlan.h>
  #include <linux/nls.h>
  #include <linux/vmalloc.h>
 +#include <linux/rtnetlink.h>
  
  #include "hyperv_net.h"
  
@@@ -214,11 -213,11 +214,11 @@@ static void dump_rndis_message(struct h
  static int rndis_filter_send_request(struct rndis_device *dev,
  				  struct rndis_request *req)
  {
 -	int ret;
  	struct hv_netvsc_packet *packet;
  	struct hv_page_buffer page_buf[2];
  	struct hv_page_buffer *pb = page_buf;
  	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
 +	int ret;
  
  	/* Setup the packet to send it */
  	packet = &req->pkt;
@@@ -244,10 -243,7 +244,10 @@@
  			pb[0].len;
  	}
  
 -	ret = netvsc_send(net_device_ctx->device_ctx, packet, NULL, &pb, NULL);
 +	rcu_read_lock_bh();
 +	ret = netvsc_send(net_device_ctx, packet, NULL, pb, NULL);
 +	rcu_read_unlock_bh();
 +
  	return ret;
  }
  
@@@ -447,9 -443,8 +447,9 @@@ int rndis_filter_receive(struct net_dev
  	return 0;
  }
  
 -static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
 -				  void *result, u32 *result_size)
 +static int rndis_filter_query_device(struct rndis_device *dev,
 +				     struct netvsc_device *nvdev,
 +				     u32 oid, void *result, u32 *result_size)
  {
  	struct rndis_request *request;
  	u32 inresult_size = *result_size;
@@@ -476,6 -471,8 +476,6 @@@
  	query->dev_vc_handle = 0;
  
  	if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
 -		struct net_device_context *ndevctx = netdev_priv(dev->ndev);
 -		struct netvsc_device *nvdev = ndevctx->nvdev;
  		struct ndis_offload *hwcaps;
  		u32 nvsp_version = nvdev->nvsp_version;
  		u8 ndis_rev;
@@@ -544,15 -541,14 +544,15 @@@ cleanup
  
  /* Get the hardware offload capabilities */
  static int
 -rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
 +rndis_query_hwcaps(struct rndis_device *dev, struct netvsc_device *net_device,
 +		   struct ndis_offload *caps)
  {
  	u32 caps_len = sizeof(*caps);
  	int ret;
  
  	memset(caps, 0, sizeof(*caps));
  
 -	ret = rndis_filter_query_device(dev,
 +	ret = rndis_filter_query_device(dev, net_device,
  					OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
  					caps, &caps_len);
  	if (ret)
@@@ -581,12 -577,11 +581,12 @@@
  	return 0;
  }
  
 -static int rndis_filter_query_device_mac(struct rndis_device *dev)
 +static int rndis_filter_query_device_mac(struct rndis_device *dev,
 +					 struct netvsc_device *net_device)
  {
  	u32 size = ETH_ALEN;
  
 -	return rndis_filter_query_device(dev,
 +	return rndis_filter_query_device(dev, net_device,
  				      RNDIS_OID_802_3_PERMANENT_ADDRESS,
  				      dev->hw_mac_adr, &size);
  }
@@@ -594,9 -589,9 +594,9 @@@
  #define NWADR_STR "NetworkAddress"
  #define NWADR_STRLEN 14
  
 -int rndis_filter_set_device_mac(struct net_device *ndev, char *mac)
 +int rndis_filter_set_device_mac(struct netvsc_device *nvdev,
 +				const char *mac)
  {
 -	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
  	struct rndis_device *rdev = nvdev->extension;
  	struct rndis_request *request;
  	struct rndis_set_request *set;
@@@ -650,8 -645,11 +650,8 @@@
  	wait_for_completion(&request->wait_event);
  
  	set_complete = &request->response_msg.msg.set_complete;
 -	if (set_complete->status != RNDIS_STATUS_SUCCESS) {
 -		netdev_err(ndev, "Fail to set MAC on host side:0x%x\n",
 -			   set_complete->status);
 -		ret = -EINVAL;
 -	}
 +	if (set_complete->status != RNDIS_STATUS_SUCCESS)
 +		ret = -EIO;
  
  cleanup:
  	put_rndis_request(rdev, request);
@@@ -660,9 -658,9 +660,9 @@@
  
  static int
  rndis_filter_set_offload_params(struct net_device *ndev,
 +				struct netvsc_device *nvdev,
  				struct ndis_offload_params *req_offloads)
  {
 -	struct netvsc_device *nvdev = net_device_to_netvsc_device(ndev);
  	struct rndis_device *rdev = nvdev->extension;
  	struct rndis_request *request;
  	struct rndis_set_request *set;
@@@ -784,27 -782,27 +784,27 @@@ cleanup
  	return ret;
  }
  
 -static int rndis_filter_query_device_link_status(struct rndis_device *dev)
 +static int rndis_filter_query_device_link_status(struct rndis_device *dev,
 +						 struct netvsc_device *net_device)
  {
  	u32 size = sizeof(u32);
  	u32 link_status;
 -	int ret;
 -
 -	ret = rndis_filter_query_device(dev,
 -				      RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
 -				      &link_status, &size);
  
 -	return ret;
 +	return rndis_filter_query_device(dev, net_device,
 +					 RNDIS_OID_GEN_MEDIA_CONNECT_STATUS,
 +					 &link_status, &size);
  }
  
 -static int rndis_filter_query_link_speed(struct rndis_device *dev)
 +static int rndis_filter_query_link_speed(struct rndis_device *dev,
 +					 struct netvsc_device *net_device)
  {
  	u32 size = sizeof(u32);
  	u32 link_speed;
  	struct net_device_context *ndc;
  	int ret;
  
 -	ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED,
 +	ret = rndis_filter_query_device(dev, net_device,
 +					RNDIS_OID_GEN_LINK_SPEED,
  					&link_speed, &size);
  
  	if (!ret) {
@@@ -873,14 -871,14 +873,14 @@@ void rndis_filter_update(struct netvsc_
  	schedule_work(&rdev->mcast_work);
  }
  
 -static int rndis_filter_init_device(struct rndis_device *dev)
 +static int rndis_filter_init_device(struct rndis_device *dev,
 +				    struct netvsc_device *nvdev)
  {
  	struct rndis_request *request;
  	struct rndis_initialize_request *init;
  	struct rndis_initialize_complete *init_complete;
  	u32 status;
  	int ret;
 -	struct netvsc_device *nvdev = net_device_to_netvsc_device(dev->ndev);
  
  	request = get_rndis_request(dev, RNDIS_MSG_INIT,
  			RNDIS_MESSAGE_SIZE(struct rndis_initialize_request));
@@@ -928,12 -926,12 +928,12 @@@ static bool netvsc_device_idle(const st
  {
  	int i;
  
 -	if (atomic_read(&nvdev->num_outstanding_recvs) > 0)
 -		return false;
 -
  	for (i = 0; i < nvdev->num_chn; i++) {
  		const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
  
 +		if (nvchan->mrc.first != nvchan->mrc.next)
 +			return false;
 +
  		if (atomic_read(&nvchan->queue_sends) > 0)
  			return false;
  	}
@@@ -946,7 -944,7 +946,7 @@@ static void rndis_filter_halt_device(st
  	struct rndis_request *request;
  	struct rndis_halt_request *halt;
  	struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
 -	struct netvsc_device *nvdev = net_device_ctx->nvdev;
 +	struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
  
  	/* Attempt to do a rndis device halt */
  	request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@@ -1017,20 -1015,20 +1017,20 @@@ static void netvsc_sc_open(struct vmbus
  {
  	struct net_device *ndev =
  		hv_get_drvdata(new_sc->primary_channel->device_obj);
 -	struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
 +	struct net_device_context *ndev_ctx = netdev_priv(ndev);
 +	struct netvsc_device *nvscdev;
  	u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
  	struct netvsc_channel *nvchan;
  	int ret;
  
 -	if (chn_index >= nvscdev->num_chn)
 +	/* This is safe because this callback only happens when
 +	 * new device is being setup and waiting on the channel_init_wait.
 +	 */
 +	nvscdev = rcu_dereference_raw(ndev_ctx->nvdev);
 +	if (!nvscdev || chn_index >= nvscdev->num_chn)
  		return;
  
  	nvchan = nvscdev->chan_table + chn_index;
 -	nvchan->mrc.buf
 -		= vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
 -
 -	if (!nvchan->mrc.buf)
 -		return;
  
  	/* Because the device uses NAPI, all the interrupt batching and
  	 * control is done via Net softirq, not the channel handling
@@@ -1050,12 -1048,12 +1050,12 @@@
  	else
  		netif_napi_del(&nvchan->napi);
  
- 	if (refcount_dec_and_test(&nvscdev->sc_offered))
- 		complete(&nvscdev->channel_init_wait);
+ 	atomic_inc(&nvscdev->open_chn);
+ 	wake_up(&nvscdev->subchan_open);
  }
  
 -int rndis_filter_device_add(struct hv_device *dev,
 -			    struct netvsc_device_info *device_info)
 +struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
 +				      struct netvsc_device_info *device_info)
  {
  	struct net_device *net = hv_get_drvdata(dev);
  	struct net_device_context *net_device_ctx = netdev_priv(net);
@@@ -1074,52 -1072,57 +1074,50 @@@
  
  	rndis_device = get_rndis_device();
  	if (!rndis_device)
 -		return -ENODEV;
 +		return ERR_PTR(-ENODEV);
  
  	/*
  	 * Let the inner driver handle this first to create the netvsc channel
  	 * NOTE! Once the channel is created, we may get a receive callback
  	 * (RndisFilterOnReceive()) before this call is completed
  	 */
 -	ret = netvsc_device_add(dev, device_info);
 -	if (ret != 0) {
 +	net_device = netvsc_device_add(dev, device_info);
 +	if (IS_ERR(net_device)) {
  		kfree(rndis_device);
 -		return ret;
 +		return net_device;
  	}
  
  	/* Initialize the rndis device */
 -	net_device = net_device_ctx->nvdev;
  	net_device->max_chn = 1;
  	net_device->num_chn = 1;
  
- 	refcount_set(&net_device->sc_offered, 0);
- 
  	net_device->extension = rndis_device;
  	rndis_device->ndev = net;
  
  	/* Send the rndis initialization message */
 -	ret = rndis_filter_init_device(rndis_device);
 -	if (ret != 0) {
 -		rndis_filter_device_remove(dev, net_device);
 -		return ret;
 -	}
 +	ret = rndis_filter_init_device(rndis_device, net_device);
 +	if (ret != 0)
 +		goto err_dev_remv;
  
  	/* Get the MTU from the host */
  	size = sizeof(u32);
 -	ret = rndis_filter_query_device(rndis_device,
 +	ret = rndis_filter_query_device(rndis_device, net_device,
  					RNDIS_OID_GEN_MAXIMUM_FRAME_SIZE,
  					&mtu, &size);
  	if (ret == 0 && size == sizeof(u32) && mtu < net->mtu)
  		net->mtu = mtu;
  
  	/* Get the mac address */
 -	ret = rndis_filter_query_device_mac(rndis_device);
 -	if (ret != 0) {
 -		rndis_filter_device_remove(dev, net_device);
 -		return ret;
 -	}
 +	ret = rndis_filter_query_device_mac(rndis_device, net_device);
 +	if (ret != 0)
 +		goto err_dev_remv;
  
  	memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
  
  	/* Find HW offload capabilities */
 -	ret = rndis_query_hwcaps(rndis_device, &hwcaps);
 -	if (ret != 0) {
 -		rndis_filter_device_remove(dev, net_device);
 -		return ret;
 -	}
 +	ret = rndis_query_hwcaps(rndis_device, net_device, &hwcaps);
 +	if (ret != 0)
 +		goto err_dev_remv;
  
  	/* A value of zero means "no change"; now turn on what we want. */
  	memset(&offloads, 0, sizeof(struct ndis_offload_params));
@@@ -1174,24 -1177,24 +1172,24 @@@
  
  	netif_set_gso_max_size(net, gso_max_size);
  
 -	ret = rndis_filter_set_offload_params(net, &offloads);
 +	ret = rndis_filter_set_offload_params(net, net_device, &offloads);
  	if (ret)
  		goto err_dev_remv;
  
 -	rndis_filter_query_device_link_status(rndis_device);
 +	rndis_filter_query_device_link_status(rndis_device, net_device);
  
  	netdev_dbg(net, "Device MAC %pM link state %s\n",
  		   rndis_device->hw_mac_adr,
  		   rndis_device->link_state ? "down" : "up");
  
  	if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
 -		return 0;
 +		return net_device;
  
 -	rndis_filter_query_link_speed(rndis_device);
 +	rndis_filter_query_link_speed(rndis_device, net_device);
  
  	/* vRSS setup */
  	memset(&rsscap, 0, rsscap_size);
 -	ret = rndis_filter_query_device(rndis_device,
 +	ret = rndis_filter_query_device(rndis_device, net_device,
  					OID_GEN_RECEIVE_SCALE_CAPABILITIES,
  					&rsscap, &rsscap_size);
  	if (ret || rsscap.num_recv_que < 2)
@@@ -1216,20 -1219,11 +1214,20 @@@
  		rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
  							net_device->num_chn);
  
+ 	atomic_set(&net_device->open_chn, 1);
  	num_rss_qs = net_device->num_chn - 1;
  	if (num_rss_qs == 0)
 -		return 0;
 +		return net_device;
 +
 +	for (i = 1; i < net_device->num_chn; i++) {
 +		ret = netvsc_alloc_recv_comp_ring(net_device, i);
 +		if (ret) {
 +			while (--i != 0)
 +				vfree(net_device->chan_table[i].mrc.slots);
 +			goto out;
 +		}
 +	}
  
- 	refcount_set(&net_device->sc_offered, num_rss_qs);
  	vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
  
  	init_packet = &net_device->channel_init_pkt;
@@@ -1246,15 -1240,19 +1244,19 @@@
  	if (ret)
  		goto out;
  
+ 	wait_for_completion(&net_device->channel_init_wait);
  	if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
  		ret = -ENODEV;
  		goto out;
  	}
- 	wait_for_completion(&net_device->channel_init_wait);
  
  	net_device->num_chn = 1 +
  		init_packet->msg.v5_msg.subchn_comp.num_subchannels;
  
+ 	/* wait for all sub channels to open */
+ 	wait_event(net_device->subchan_open,
+ 		   atomic_read(&net_device->open_chn) == net_device->num_chn);
+ 
  	/* ignore failues from setting rss parameters, still have channels */
  	rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
  				   net_device->num_chn);
@@@ -1264,11 -1262,11 +1266,11 @@@ out
  		net_device->num_chn = 1;
  	}
  
 -	return 0; /* return 0 because primary channel can be used alone */
 +	return net_device;
  
  err_dev_remv:
  	rndis_filter_device_remove(dev, net_device);
 -	return ret;
 +	return ERR_PTR(ret);
  }
  
  void rndis_filter_device_remove(struct hv_device *dev,
@@@ -1306,8 -1304,3 +1308,8 @@@ int rndis_filter_close(struct netvsc_de
  
  	return rndis_filter_close_device(nvdev->extension);
  }
 +
 +bool rndis_filter_opened(const struct netvsc_device *nvdev)
 +{
 +	return atomic_read(&nvdev->open_cnt) > 0;
 +}
diff --combined drivers/net/ipvlan/ipvlan_main.c
index fdde20735416,8dab74a81303..58a9f990b553
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@@ -169,7 -169,7 +169,7 @@@ static void ipvlan_port_destroy(struct 
  
  #define IPVLAN_FEATURES \
  	(NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
 -	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
 +	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_GSO_ROBUST | \
  	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
  	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
  
@@@ -192,7 -192,7 +192,7 @@@ static int ipvlan_init(struct net_devic
  
  	netdev_lockdep_set_classes(dev);
  
- 	ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats);
+ 	ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
  	if (!ipvlan->pcpu_stats)
  		return -ENOMEM;
  
diff --combined drivers/net/vxlan.c
index dbca067540d0,e17baac70f43..35e84a9e1cfb
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@@ -623,6 -623,7 +623,7 @@@ static struct sk_buff **vxlan_gro_recei
  
  out:
  	skb_gro_remcsum_cleanup(skb, &grc);
+ 	skb->remcsum_offload = 0;
  	NAPI_GRO_CB(skb)->flush |= flush;
  
  	return pp;
@@@ -2608,7 -2609,7 +2609,7 @@@ static struct device_type vxlan_type = 
   * supply the listening VXLAN udp ports. Callers are expected
   * to implement the ndo_udp_tunnel_add.
   */
 -static void vxlan_push_rx_ports(struct net_device *dev)
 +static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
  {
  	struct vxlan_sock *vs;
  	struct net *net = dev_net(dev);
@@@ -2617,19 -2618,11 +2618,19 @@@
  
  	spin_lock(&vn->sock_lock);
  	for (i = 0; i < PORT_HASH_SIZE; ++i) {
 -		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist)
 -			udp_tunnel_push_rx_port(dev, vs->sock,
 -						(vs->flags & VXLAN_F_GPE) ?
 -						UDP_TUNNEL_TYPE_VXLAN_GPE :
 -						UDP_TUNNEL_TYPE_VXLAN);
 +		hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
 +			unsigned short type;
 +
 +			if (vs->flags & VXLAN_F_GPE)
 +				type = UDP_TUNNEL_TYPE_VXLAN_GPE;
 +			else
 +				type = UDP_TUNNEL_TYPE_VXLAN;
 +
 +			if (push)
 +				udp_tunnel_push_rx_port(dev, vs->sock, type);
 +			else
 +				udp_tunnel_drop_rx_port(dev, vs->sock, type);
 +		}
  	}
  	spin_unlock(&vn->sock_lock);
  }
@@@ -3638,15 -3631,10 +3639,15 @@@ static int vxlan_netdevice_event(struc
  	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
  	struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
  
 -	if (event == NETDEV_UNREGISTER)
 +	if (event == NETDEV_UNREGISTER) {
 +		vxlan_offload_rx_ports(dev, false);
  		vxlan_handle_lowerdev_unregister(vn, dev);
 -	else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
 -		vxlan_push_rx_ports(dev);
 +	} else if (event == NETDEV_REGISTER) {
 +		vxlan_offload_rx_ports(dev, true);
 +	} else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
 +		   event == NETDEV_UDP_TUNNEL_DROP_INFO) {
 +		vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
 +	}
  
  	return NOTIFY_DONE;
  }
diff --combined include/net/tcp.h
index 999f3efe572b,ada65e767b28..afdab3781425
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@@ -139,7 -139,6 +139,7 @@@ void tcp_time_wait(struct sock *sk, in
  #endif
  #define TCP_RTO_MAX	((unsigned)(120*HZ))
  #define TCP_RTO_MIN	((unsigned)(HZ/5))
 +#define TCP_TIMEOUT_MIN	(2U) /* Min timeout for TCP timers in jiffies */
  #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	/* RFC6298 2.1 initial RTO value	*/
  #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	/* RFC 1122 initial RTO value, now
  						 * used as a fallback RTO for the
@@@ -151,6 -150,8 +151,6 @@@
  #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval between probes
  					                 * for local resources.
  					                 */
 -#define TCP_REO_TIMEOUT_MIN	(2000) /* Min RACK reordering timeout in usec */
 -
  #define TCP_KEEPALIVE_TIME	(120*60*HZ)	/* two hours */
  #define TCP_KEEPALIVE_PROBES	9		/* Max of 9 keepalive probes	*/
  #define TCP_KEEPALIVE_INTVL	(75*HZ)
@@@ -256,6 -257,7 +256,6 @@@ extern int sysctl_tcp_rmem[3]
  extern int sysctl_tcp_app_win;
  extern int sysctl_tcp_adv_win_scale;
  extern int sysctl_tcp_frto;
 -extern int sysctl_tcp_low_latency;
  extern int sysctl_tcp_nometrics_save;
  extern int sysctl_tcp_moderate_rcvbuf;
  extern int sysctl_tcp_tso_win_divisor;
@@@ -350,11 -352,8 +350,11 @@@ int tcp_v4_rcv(struct sk_buff *skb)
  
  int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw);
  int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 +int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size);
  int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size,
  		 int flags);
 +int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
 +			size_t size, int flags);
  ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
  		 size_t size, int flags);
  void tcp_release_cb(struct sock *sk);
@@@ -364,7 -363,7 +364,7 @@@ void tcp_delack_timer_handler(struct so
  int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg);
  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb);
  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 -			 const struct tcphdr *th, unsigned int len);
 +			 const struct tcphdr *th);
  void tcp_rcv_space_adjust(struct sock *sk);
  int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
  void tcp_twsk_destructor(struct sock *sk);
@@@ -634,6 -633,29 +634,6 @@@ static inline u32 __tcp_set_rto(const s
  	return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us);
  }
  
 -static inline void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd)
 -{
 -	tp->pred_flags = htonl((tp->tcp_header_len << 26) |
 -			       ntohl(TCP_FLAG_ACK) |
 -			       snd_wnd);
 -}
 -
 -static inline void tcp_fast_path_on(struct tcp_sock *tp)
 -{
 -	__tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale);
 -}
 -
 -static inline void tcp_fast_path_check(struct sock *sk)
 -{
 -	struct tcp_sock *tp = tcp_sk(sk);
 -
 -	if (RB_EMPTY_ROOT(&tp->out_of_order_queue) &&
 -	    tp->rcv_wnd &&
 -	    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf &&
 -	    !tp->urg_data)
 -		tcp_fast_path_on(tp);
 -}
 -
  /* Compute the actual rto_min value */
  static inline u32 tcp_rto_min(struct sock *sk)
  {
@@@ -827,16 -849,6 +827,16 @@@ static inline int tcp_v6_iif(const stru
  
  	return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif;
  }
 +
 +/* TCP_SKB_CB reference means this can not be used from early demux */
 +static inline int tcp_v6_sdif(const struct sk_buff *skb)
 +{
 +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
 +	if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags))
 +		return TCP_SKB_CB(skb)->header.h6.iif;
 +#endif
 +	return 0;
 +}
  #endif
  
  /* TCP_SKB_CB reference means this can not be used from early demux */
@@@ -850,16 -862,6 +850,16 @@@ static inline bool inet_exact_dif_match
  	return false;
  }
  
 +/* TCP_SKB_CB reference means this can not be used from early demux */
 +static inline int tcp_v4_sdif(struct sk_buff *skb)
 +{
 +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
 +	if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags))
 +		return TCP_SKB_CB(skb)->header.h4.iif;
 +#endif
 +	return 0;
 +}
 +
  /* Due to TSO, an SKB can be composed of multiple actual
   * packets.  To keep these tracked properly, we use this.
   */
@@@ -903,8 -905,9 +903,8 @@@ enum tcp_ca_event 
  
  /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
  enum tcp_ca_ack_event_flags {
 -	CA_ACK_SLOWPATH		= (1 << 0),	/* In slow path processing */
 -	CA_ACK_WIN_UPDATE	= (1 << 1),	/* ACK updated window */
 -	CA_ACK_ECE		= (1 << 2),	/* ECE bit is set on ack */
 +	CA_ACK_WIN_UPDATE	= (1 << 0),	/* ACK updated window */
 +	CA_ACK_ECE		= (1 << 1),	/* ECE bit is set on ack */
  };
  
  /*
@@@ -1242,6 -1245,17 +1242,6 @@@ static inline bool tcp_checksum_complet
  		__tcp_checksum_complete(skb);
  }
  
 -/* Prequeue for VJ style copy to user, combined with checksumming. */
 -
 -static inline void tcp_prequeue_init(struct tcp_sock *tp)
 -{
 -	tp->ucopy.task = NULL;
 -	tp->ucopy.len = 0;
 -	tp->ucopy.memory = 0;
 -	skb_queue_head_init(&tp->ucopy.prequeue);
 -}
 -
 -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
  bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb);
  int tcp_filter(struct sock *sk, struct sk_buff *skb);
  
@@@ -1902,11 -1916,20 +1902,21 @@@ extern void tcp_rack_advance(struct tcp
  			     u64 xmit_time);
  extern void tcp_rack_reo_timeout(struct sock *sk);
  
+ /* At how many usecs into the future should the RTO fire? */
+ static inline s64 tcp_rto_delta_us(const struct sock *sk)
+ {
+ 	const struct sk_buff *skb = tcp_write_queue_head(sk);
+ 	u32 rto = inet_csk(sk)->icsk_rto;
+ 	u64 rto_time_stamp_us = skb->skb_mstamp + jiffies_to_usecs(rto);
+ 
+ 	return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp;
+ }
+ 
  /*
   * Save and compile IPv4 options, return a pointer to it
   */
 -static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
 +static inline struct ip_options_rcu *tcp_v4_save_options(struct net *net,
 +							 struct sk_buff *skb)
  {
  	const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt;
  	struct ip_options_rcu *dopt = NULL;
@@@ -1915,7 -1938,7 +1925,7 @@@
  		int opt_size = sizeof(*dopt) + opt->optlen;
  
  		dopt = kmalloc(opt_size, GFP_ATOMIC);
 -		if (dopt && __ip_options_echo(&dopt->opt, skb, opt)) {
 +		if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) {
  			kfree(dopt);
  			dopt = NULL;
  		}
diff --combined net/core/dev.c
index 1d75499add72,ce15a06d5558..3f69f6e71824
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@@ -144,7 -144,6 +144,7 @@@
  #include <linux/netfilter_ingress.h>
  #include <linux/crash_dump.h>
  #include <linux/sctp.h>
 +#include <net/udp_tunnel.h>
  
  #include "net-sysfs.h"
  
@@@ -1414,7 -1413,7 +1414,7 @@@ int dev_open(struct net_device *dev
  }
  EXPORT_SYMBOL(dev_open);
  
 -static int __dev_close_many(struct list_head *head)
 +static void __dev_close_many(struct list_head *head)
  {
  	struct net_device *dev;
  
@@@ -1456,18 -1455,23 +1456,18 @@@
  		dev->flags &= ~IFF_UP;
  		netpoll_poll_enable(dev);
  	}
 -
 -	return 0;
  }
  
 -static int __dev_close(struct net_device *dev)
 +static void __dev_close(struct net_device *dev)
  {
 -	int retval;
  	LIST_HEAD(single);
  
  	list_add(&dev->close_list, &single);
 -	retval = __dev_close_many(&single);
 +	__dev_close_many(&single);
  	list_del(&single);
 -
 -	return retval;
  }
  
 -int dev_close_many(struct list_head *head, bool unlink)
 +void dev_close_many(struct list_head *head, bool unlink)
  {
  	struct net_device *dev, *tmp;
  
@@@ -1484,6 -1488,8 +1484,6 @@@
  		if (unlink)
  			list_del_init(&dev->close_list);
  	}
 -
 -	return 0;
  }
  EXPORT_SYMBOL(dev_close_many);
  
@@@ -1496,7 -1502,7 +1496,7 @@@
   *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
   *	chain.
   */
 -int dev_close(struct net_device *dev)
 +void dev_close(struct net_device *dev)
  {
  	if (dev->flags & IFF_UP) {
  		LIST_HEAD(single);
@@@ -1505,6 -1511,7 +1505,6 @@@
  		dev_close_many(&single, true);
  		list_del(&single);
  	}
 -	return 0;
  }
  EXPORT_SYMBOL(dev_close);
  
@@@ -1853,7 -1860,7 +1853,7 @@@ static inline int deliver_skb(struct sk
  			      struct packet_type *pt_prev,
  			      struct net_device *orig_dev)
  {
 -	if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 +	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
  		return -ENOMEM;
  	refcount_inc(&skb->users);
  	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@@ -2732,7 -2739,7 +2732,7 @@@ static inline bool skb_needs_check(stru
  {
  	if (tx_path)
  		return skb->ip_summed != CHECKSUM_PARTIAL &&
- 		       skb->ip_summed != CHECKSUM_NONE;
+ 		       skb->ip_summed != CHECKSUM_UNNECESSARY;
  
  	return skb->ip_summed == CHECKSUM_NONE;
  }
@@@ -3858,121 -3865,6 +3858,121 @@@ drop
  	return NET_RX_DROP;
  }
  
 +static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 +				     struct bpf_prog *xdp_prog)
 +{
 +	struct xdp_buff xdp;
 +	u32 act = XDP_DROP;
 +	void *orig_data;
 +	int hlen, off;
 +	u32 mac_len;
 +
 +	/* Reinjected packets coming from act_mirred or similar should
 +	 * not get XDP generic processing.
 +	 */
 +	if (skb_cloned(skb))
 +		return XDP_PASS;
 +
 +	if (skb_linearize(skb))
 +		goto do_drop;
 +
 +	/* The XDP program wants to see the packet starting at the MAC
 +	 * header.
 +	 */
 +	mac_len = skb->data - skb_mac_header(skb);
 +	hlen = skb_headlen(skb) + mac_len;
 +	xdp.data = skb->data - mac_len;
 +	xdp.data_end = xdp.data + hlen;
 +	xdp.data_hard_start = skb->data - skb_headroom(skb);
 +	orig_data = xdp.data;
 +
 +	act = bpf_prog_run_xdp(xdp_prog, &xdp);
 +
 +	off = xdp.data - orig_data;
 +	if (off > 0)
 +		__skb_pull(skb, off);
 +	else if (off < 0)
 +		__skb_push(skb, -off);
 +
 +	switch (act) {
 +	case XDP_REDIRECT:
 +	case XDP_TX:
 +		__skb_push(skb, mac_len);
 +		/* fall through */
 +	case XDP_PASS:
 +		break;
 +
 +	default:
 +		bpf_warn_invalid_xdp_action(act);
 +		/* fall through */
 +	case XDP_ABORTED:
 +		trace_xdp_exception(skb->dev, xdp_prog, act);
 +		/* fall through */
 +	case XDP_DROP:
 +	do_drop:
 +		kfree_skb(skb);
 +		break;
 +	}
 +
 +	return act;
 +}
 +
 +/* When doing generic XDP we have to bypass the qdisc layer and the
 + * network taps in order to match in-driver-XDP behavior.
 + */
 +static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
 +{
 +	struct net_device *dev = skb->dev;
 +	struct netdev_queue *txq;
 +	bool free_skb = true;
 +	int cpu, rc;
 +
 +	txq = netdev_pick_tx(dev, skb, NULL);
 +	cpu = smp_processor_id();
 +	HARD_TX_LOCK(dev, txq, cpu);
 +	if (!netif_xmit_stopped(txq)) {
 +		rc = netdev_start_xmit(skb, dev, txq, 0);
 +		if (dev_xmit_complete(rc))
 +			free_skb = false;
 +	}
 +	HARD_TX_UNLOCK(dev, txq);
 +	if (free_skb) {
 +		trace_xdp_exception(dev, xdp_prog, XDP_TX);
 +		kfree_skb(skb);
 +	}
 +}
 +
 +static struct static_key generic_xdp_needed __read_mostly;
 +
 +static int do_xdp_generic(struct sk_buff *skb)
 +{
 +	struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
 +
 +	if (xdp_prog) {
 +		u32 act = netif_receive_generic_xdp(skb, xdp_prog);
 +		int err;
 +
 +		if (act != XDP_PASS) {
 +			switch (act) {
 +			case XDP_REDIRECT:
 +				err = xdp_do_generic_redirect(skb->dev, skb);
 +				if (err)
 +					goto out_redir;
 +			/* fallthru to submit skb */
 +			case XDP_TX:
 +				generic_xdp_tx(skb, xdp_prog);
 +				break;
 +			}
 +			return XDP_DROP;
 +		}
 +	}
 +	return XDP_PASS;
 +out_redir:
 +	trace_xdp_exception(skb->dev, xdp_prog, XDP_REDIRECT);
 +	kfree_skb(skb);
 +	return XDP_DROP;
 +}
 +
  static int netif_rx_internal(struct sk_buff *skb)
  {
  	int ret;
@@@ -3980,18 -3872,6 +3980,18 @@@
  	net_timestamp_check(netdev_tstamp_prequeue, skb);
  
  	trace_netif_rx(skb);
 +
 +	if (static_key_false(&generic_xdp_needed)) {
 +		int ret = do_xdp_generic(skb);
 +
 +		/* Consider XDP consuming the packet a success from
 +		 * the netdev point of view we do not want to count
 +		 * this as an error.
 +		 */
 +		if (ret != XDP_PASS)
 +			return NET_RX_SUCCESS;
 +	}
 +
  #ifdef CONFIG_RPS
  	if (static_key_false(&rps_needed)) {
  		struct rps_dev_flow voidflow, *rflow = &voidflow;
@@@ -4412,7 -4292,7 +4412,7 @@@ skip_classify
  	}
  
  	if (pt_prev) {
 -		if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
 +		if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
  			goto drop;
  		else
  			ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
@@@ -4458,6 -4338,8 +4458,6 @@@ static int __netif_receive_skb(struct s
  	return ret;
  }
  
 -static struct static_key generic_xdp_needed __read_mostly;
 -
  static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp)
  {
  	struct bpf_prog *old = rtnl_dereference(dev->xdp_prog);
@@@ -4491,6 -4373,89 +4491,6 @@@
  	return ret;
  }
  
 -static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 -				     struct bpf_prog *xdp_prog)
 -{
 -	struct xdp_buff xdp;
 -	u32 act = XDP_DROP;
 -	void *orig_data;
 -	int hlen, off;
 -	u32 mac_len;
 -
 -	/* Reinjected packets coming from act_mirred or similar should
 -	 * not get XDP generic processing.
 -	 */
 -	if (skb_cloned(skb))
 -		return XDP_PASS;
 -
 -	if (skb_linearize(skb))
 -		goto do_drop;
 -
 -	/* The XDP program wants to see the packet starting at the MAC
 -	 * header.
 -	 */
 -	mac_len = skb->data - skb_mac_header(skb);
 -	hlen = skb_headlen(skb) + mac_len;
 -	xdp.data = skb->data - mac_len;
 -	xdp.data_end = xdp.data + hlen;
 -	xdp.data_hard_start = skb->data - skb_headroom(skb);
 -	orig_data = xdp.data;
 -
 -	act = bpf_prog_run_xdp(xdp_prog, &xdp);
 -
 -	off = xdp.data - orig_data;
 -	if (off > 0)
 -		__skb_pull(skb, off);
 -	else if (off < 0)
 -		__skb_push(skb, -off);
 -
 -	switch (act) {
 -	case XDP_TX:
 -		__skb_push(skb, mac_len);
 -		/* fall through */
 -	case XDP_PASS:
 -		break;
 -
 -	default:
 -		bpf_warn_invalid_xdp_action(act);
 -		/* fall through */
 -	case XDP_ABORTED:
 -		trace_xdp_exception(skb->dev, xdp_prog, act);
 -		/* fall through */
 -	case XDP_DROP:
 -	do_drop:
 -		kfree_skb(skb);
 -		break;
 -	}
 -
 -	return act;
 -}
 -
 -/* When doing generic XDP we have to bypass the qdisc layer and the
 - * network taps in order to match in-driver-XDP behavior.
 - */
 -static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
 -{
 -	struct net_device *dev = skb->dev;
 -	struct netdev_queue *txq;
 -	bool free_skb = true;
 -	int cpu, rc;
 -
 -	txq = netdev_pick_tx(dev, skb, NULL);
 -	cpu = smp_processor_id();
 -	HARD_TX_LOCK(dev, txq, cpu);
 -	if (!netif_xmit_stopped(txq)) {
 -		rc = netdev_start_xmit(skb, dev, txq, 0);
 -		if (dev_xmit_complete(rc))
 -			free_skb = false;
 -	}
 -	HARD_TX_UNLOCK(dev, txq);
 -	if (free_skb) {
 -		trace_xdp_exception(dev, xdp_prog, XDP_TX);
 -		kfree_skb(skb);
 -	}
 -}
 -
  static int netif_receive_skb_internal(struct sk_buff *skb)
  {
  	int ret;
@@@ -4503,11 -4468,17 +4503,11 @@@
  	rcu_read_lock();
  
  	if (static_key_false(&generic_xdp_needed)) {
 -		struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog);
 +		int ret = do_xdp_generic(skb);
  
 -		if (xdp_prog) {
 -			u32 act = netif_receive_generic_xdp(skb, xdp_prog);
 -
 -			if (act != XDP_PASS) {
 -				rcu_read_unlock();
 -				if (act == XDP_TX)
 -					generic_xdp_tx(skb, xdp_prog);
 -				return NET_RX_DROP;
 -			}
 +		if (ret != XDP_PASS) {
 +			rcu_read_unlock();
 +			return NET_RX_DROP;
  		}
  	}
  
@@@ -6718,12 -6689,8 +6718,12 @@@ int __dev_change_flags(struct net_devic
  	 */
  
  	ret = 0;
 -	if ((old_flags ^ flags) & IFF_UP)
 -		ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev);
 +	if ((old_flags ^ flags) & IFF_UP) {
 +		if (old_flags & IFF_UP)
 +			__dev_close(dev);
 +		else
 +			ret = __dev_open(dev);
 +	}
  
  	if ((flags ^ dev->gflags) & IFF_PROMISC) {
  		int inc = (flags & IFF_PROMISC) ? 1 : -1;
@@@ -7268,6 -7235,24 +7268,6 @@@ static netdev_features_t netdev_fix_fea
  		features &= ~NETIF_F_GSO;
  	}
  
 -	/* UFO needs SG and checksumming */
 -	if (features & NETIF_F_UFO) {
 -		/* maybe split UFO into V4 and V6? */
 -		if (!(features & NETIF_F_HW_CSUM) &&
 -		    ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) !=
 -		     (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) {
 -			netdev_dbg(dev,
 -				"Dropping NETIF_F_UFO since no checksum offload features.\n");
 -			features &= ~NETIF_F_UFO;
 -		}
 -
 -		if (!(features & NETIF_F_SG)) {
 -			netdev_dbg(dev,
 -				"Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n");
 -			features &= ~NETIF_F_UFO;
 -		}
 -	}
 -
  	/* GSO partial features require GSO partial be set */
  	if ((features & dev->gso_partial_features) &&
  	    !(features & NETIF_F_GSO_PARTIAL)) {
@@@ -7328,27 -7313,8 +7328,27 @@@ sync_lower
  	netdev_for_each_lower_dev(dev, lower, iter)
  		netdev_sync_lower_features(dev, lower, features);
  
 -	if (!err)
 +	if (!err) {
 +		netdev_features_t diff = features ^ dev->features;
 +
 +		if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) {
 +			/* udp_tunnel_{get,drop}_rx_info both need
 +			 * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the
 +			 * device, or they won't do anything.
 +			 * Thus we need to update dev->features
 +			 * *before* calling udp_tunnel_get_rx_info,
 +			 * but *after* calling udp_tunnel_drop_rx_info.
 +			 */
 +			if (features & NETIF_F_RX_UDP_TUNNEL_PORT) {
 +				dev->features = features;
 +				udp_tunnel_get_rx_info(dev);
 +			} else {
 +				udp_tunnel_drop_rx_info(dev);
 +			}
 +		}
 +
  		dev->features = features;
 +	}
  
  	return err < 0 ? 0 : 1;
  }
@@@ -7550,12 -7516,6 +7550,12 @@@ int register_netdevice(struct net_devic
  	 */
  	dev->hw_features |= NETIF_F_SOFT_FEATURES;
  	dev->features |= NETIF_F_SOFT_FEATURES;
 +
 +	if (dev->netdev_ops->ndo_udp_tunnel_add) {
 +		dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
 +		dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
 +	}
 +
  	dev->wanted_features = dev->features & dev->hw_features;
  
  	if (!(dev->flags & IFF_LOOPBACK))
diff --combined net/ipv4/tcp_input.c
index 842ed75ccb25,53de1424c13c..d73903fe8c83
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@@ -103,9 -103,11 +103,10 @@@ int sysctl_tcp_invalid_ratelimit __read
  #define FLAG_DATA_SACKED	0x20 /* New SACK.				*/
  #define FLAG_ECE		0x40 /* ECE in this ACK				*/
  #define FLAG_LOST_RETRANS	0x80 /* This ACK marks some retransmission lost */
 -#define FLAG_SLOWPATH		0x100 /* Do not skip RFC checks for window update.*/
  #define FLAG_ORIG_SACK_ACKED	0x200 /* Never retransmitted data are (s)acked	*/
  #define FLAG_SND_UNA_ADVANCED	0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
  #define FLAG_DSACKING_ACK	0x800 /* SACK blocks contained D-SACK info */
+ #define FLAG_SET_XMIT_TIMER	0x1000 /* Set TLP or RTO timer */
  #define FLAG_SACK_RENEGING	0x2000 /* snd_una advanced to a sacked seq */
  #define FLAG_UPDATE_TS_RECENT	0x4000 /* tcp_replace_ts_recent() */
  #define FLAG_NO_CHALLENGE_ACK	0x8000 /* do not call tcp_send_challenge_ack()	*/
@@@ -1950,7 -1952,6 +1951,7 @@@ void tcp_enter_loss(struct sock *sk
  	    !after(tp->high_seq, tp->snd_una) ||
  	    (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) {
  		tp->prior_ssthresh = tcp_current_ssthresh(sk);
 +		tp->prior_cwnd = tp->snd_cwnd;
  		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
  		tcp_ca_event(sk, CA_EVENT_LOSS);
  		tcp_init_undo(tp);
@@@ -2520,8 -2521,8 +2521,8 @@@ static inline void tcp_end_cwnd_reducti
  		return;
  
  	/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
- 	if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
- 	    (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+ 	if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ 	    (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
  		tp->snd_cwnd = tp->snd_ssthresh;
  		tp->snd_cwnd_stamp = tcp_jiffies32;
  	}
@@@ -3004,10 -3005,7 +3005,7 @@@ void tcp_rearm_rto(struct sock *sk
  		/* Offset the time elapsed after installing regular RTO */
  		if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
  		    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
- 			struct sk_buff *skb = tcp_write_queue_head(sk);
- 			u64 rto_time_stamp = skb->skb_mstamp +
- 					     jiffies_to_usecs(rto);
- 			s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+ 			s64 delta_us = tcp_rto_delta_us(sk);
  			/* delta_us may not be positive if the socket is locked
  			 * when the retrans timer fires and is rescheduled.
  			 */
@@@ -3019,6 -3017,13 +3017,13 @@@
  	}
  }
  
+ /* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
+ static void tcp_set_xmit_timer(struct sock *sk)
+ {
+ 	if (!tcp_schedule_loss_probe(sk))
+ 		tcp_rearm_rto(sk);
+ }
+ 
  /* If we get here, the whole TSO packet has not been acked. */
  static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
  {
@@@ -3180,7 -3185,7 +3185,7 @@@ static int tcp_clean_rtx_queue(struct s
  					ca_rtt_us, sack->rate);
  
  	if (flag & FLAG_ACKED) {
- 		tcp_rearm_rto(sk);
+ 		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
  		if (unlikely(icsk->icsk_mtup.probe_size &&
  			     !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
  			tcp_mtup_probe_success(sk);
@@@ -3208,7 -3213,7 +3213,7 @@@
  		 * after when the head was last (re)transmitted. Otherwise the
  		 * timeout may continue to extend in loss recovery.
  		 */
- 		tcp_rearm_rto(sk);
+ 		flag |= FLAG_SET_XMIT_TIMER;  /* set TLP or RTO timer */
  	}
  
  	if (icsk->icsk_ca_ops->pkts_acked) {
@@@ -3367,6 -3372,12 +3372,6 @@@ static int tcp_ack_update_window(struc
  		if (tp->snd_wnd != nwin) {
  			tp->snd_wnd = nwin;
  
 -			/* Note, it is the only place, where
 -			 * fast path is recovered for sending TCP.
 -			 */
 -			tp->pred_flags = 0;
 -			tcp_fast_path_check(sk);
 -
  			if (tcp_send_head(sk))
  				tcp_slow_start_after_idle_check(sk);
  
@@@ -3548,7 -3559,6 +3553,7 @@@ static int tcp_ack(struct sock *sk, con
  	u32 lost = tp->lost;
  	int acked = 0; /* Number of packets newly acked */
  	int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */
 +	u32 ack_ev_flags = 0;
  
  	sack_state.first_sackt = 0;
  	sack_state.rate = &rs;
@@@ -3575,9 -3585,6 +3580,6 @@@
  	if (after(ack, tp->snd_nxt))
  		goto invalid_ack;
  
- 	if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
- 		tcp_rearm_rto(sk);
- 
  	if (after(ack, prior_snd_una)) {
  		flag |= FLAG_SND_UNA_ADVANCED;
  		icsk->icsk_retransmits = 0;
@@@ -3592,26 -3599,42 +3594,26 @@@
  	if (flag & FLAG_UPDATE_TS_RECENT)
  		tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
  
 -	if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
 -		/* Window is constant, pure forward advance.
 -		 * No more checks are required.
 -		 * Note, we use the fact that SND.UNA>=SND.WL2.
 -		 */
 -		tcp_update_wl(tp, ack_seq);
 -		tcp_snd_una_update(tp, ack);
 -		flag |= FLAG_WIN_UPDATE;
 -
 -		tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
 -
 -		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPACKS);
 -	} else {
 -		u32 ack_ev_flags = CA_ACK_SLOWPATH;
 -
 -		if (ack_seq != TCP_SKB_CB(skb)->end_seq)
 -			flag |= FLAG_DATA;
 -		else
 -			NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
 +	if (ack_seq != TCP_SKB_CB(skb)->end_seq)
 +		flag |= FLAG_DATA;
 +	else
 +		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPUREACKS);
  
 -		flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
 +	flag |= tcp_ack_update_window(sk, skb, ack, ack_seq);
  
 -		if (TCP_SKB_CB(skb)->sacked)
 -			flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
 -							&sack_state);
 +	if (TCP_SKB_CB(skb)->sacked)
 +		flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una,
 +						&sack_state);
  
 -		if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
 -			flag |= FLAG_ECE;
 -			ack_ev_flags |= CA_ACK_ECE;
 -		}
 +	if (tcp_ecn_rcv_ecn_echo(tp, tcp_hdr(skb))) {
 +		flag |= FLAG_ECE;
 +		ack_ev_flags = CA_ACK_ECE;
 +	}
  
 -		if (flag & FLAG_WIN_UPDATE)
 -			ack_ev_flags |= CA_ACK_WIN_UPDATE;
 +	if (flag & FLAG_WIN_UPDATE)
 +		ack_ev_flags |= CA_ACK_WIN_UPDATE;
  
 -		tcp_in_ack_event(sk, ack_ev_flags);
 -	}
 +	tcp_in_ack_event(sk, ack_ev_flags);
  
  	/* We passed data and got it acked, remove any soft error
  	 * log. Something worked...
@@@ -3626,18 -3649,20 +3628,20 @@@
  	flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
  				    &sack_state);
  
+ 	if (tp->tlp_high_seq)
+ 		tcp_process_tlp_ack(sk, ack, flag);
+ 	/* If needed, reset TLP/RTO timer; RACK may later override this. */
+ 	if (flag & FLAG_SET_XMIT_TIMER)
+ 		tcp_set_xmit_timer(sk);
+ 
  	if (tcp_ack_is_dubious(sk, flag)) {
  		is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
  		tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
  	}
- 	if (tp->tlp_high_seq)
- 		tcp_process_tlp_ack(sk, ack, flag);
  
  	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
  		sk_dst_confirm(sk);
  
- 	if (icsk->icsk_pending == ICSK_TIME_RETRANS)
- 		tcp_schedule_loss_probe(sk);
  	delivered = tp->delivered - delivered;	/* freshly ACKed or SACKed */
  	lost = tp->lost - lost;			/* freshly marked lost */
  	tcp_rate_gen(sk, delivered, lost, sack_state.rate);
@@@ -4377,6 -4402,8 +4381,6 @@@ static void tcp_data_queue_ofo(struct s
  		return;
  	}
  
 -	/* Disable header prediction. */
 -	tp->pred_flags = 0;
  	inet_csk_schedule_ack(sk);
  
  	NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE);
@@@ -4565,8 -4592,8 +4569,8 @@@ err
  static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
  {
  	struct tcp_sock *tp = tcp_sk(sk);
 -	bool fragstolen = false;
 -	int eaten = -1;
 +	bool fragstolen;
 +	int eaten;
  
  	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) {
  		__kfree_skb(skb);
@@@ -4588,13 -4615,32 +4592,13 @@@
  			goto out_of_window;
  
  		/* Ok. In sequence. In window. */
 -		if (tp->ucopy.task == current &&
 -		    tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
 -		    sock_owned_by_user(sk) && !tp->urg_data) {
 -			int chunk = min_t(unsigned int, skb->len,
 -					  tp->ucopy.len);
 -
 -			__set_current_state(TASK_RUNNING);
 -
 -			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
 -				tp->ucopy.len -= chunk;
 -				tp->copied_seq += chunk;
 -				eaten = (chunk == skb->len);
 -				tcp_rcv_space_adjust(sk);
 -			}
 -		}
 -
 -		if (eaten <= 0) {
  queue_and_out:
 -			if (eaten < 0) {
 -				if (skb_queue_len(&sk->sk_receive_queue) == 0)
 -					sk_forced_mem_schedule(sk, skb->truesize);
 -				else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 -					goto drop;
 -			}
 -			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
 -		}
 +		if (skb_queue_len(&sk->sk_receive_queue) == 0)
 +			sk_forced_mem_schedule(sk, skb->truesize);
 +		else if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 +			goto drop;
 +
 +		eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
  		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
  		if (skb->len)
  			tcp_event_data_recv(sk, skb);
@@@ -4614,6 -4660,8 +4618,6 @@@
  		if (tp->rx_opt.num_sacks)
  			tcp_sack_remove(tp);
  
 -		tcp_fast_path_check(sk);
 -
  		if (eaten > 0)
  			kfree_skb_partial(skb, fragstolen);
  		if (!sock_flag(sk, SOCK_DEAD))
@@@ -4939,6 -4987,7 +4943,6 @@@ static int tcp_prune_queue(struct sock 
  	NET_INC_STATS(sock_net(sk), LINUX_MIB_RCVPRUNED);
  
  	/* Massive buffer overcommit. */
 -	tp->pred_flags = 0;
  	return -1;
  }
  
@@@ -5110,6 -5159,9 +5114,6 @@@ static void tcp_check_urg(struct sock *
  
  	tp->urg_data = TCP_URG_NOTYET;
  	tp->urg_seq = ptr;
 -
 -	/* Disable header prediction. */
 -	tp->pred_flags = 0;
  }
  
  /* This is the 'fast' part of urgent handling. */
@@@ -5138,6 -5190,26 +5142,6 @@@ static void tcp_urg(struct sock *sk, st
  	}
  }
  
 -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 -{
 -	struct tcp_sock *tp = tcp_sk(sk);
 -	int chunk = skb->len - hlen;
 -	int err;
 -
 -	if (skb_csum_unnecessary(skb))
 -		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
 -	else
 -		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
 -
 -	if (!err) {
 -		tp->ucopy.len -= chunk;
 -		tp->copied_seq += chunk;
 -		tcp_rcv_space_adjust(sk);
 -	}
 -
 -	return err;
 -}
 -
  /* Accept RST for rcv_nxt - 1 after a FIN.
   * When tcp connections are abruptly terminated from Mac OSX (via ^C), a
   * FIN is sent followed by a RST packet. The RST is sent with the same
@@@ -5268,29 -5340,201 +5272,29 @@@ discard
  
  /*
   *	TCP receive function for the ESTABLISHED state.
 - *
 - *	It is split into a fast path and a slow path. The fast path is
 - * 	disabled when:
 - *	- A zero window was announced from us - zero window probing
 - *        is only handled properly in the slow path.
 - *	- Out of order segments arrived.
 - *	- Urgent data is expected.
 - *	- There is no buffer space left
 - *	- Unexpected TCP flags/window values/header lengths are received
 - *	  (detected by checking the TCP header against pred_flags)
 - *	- Data is sent in both directions. Fast path only supports pure senders
 - *	  or pure receivers (this means either the sequence number or the ack
 - *	  value must stay constant)
 - *	- Unexpected TCP option.
 - *
 - *	When these conditions are not satisfied it drops into a standard
 - *	receive procedure patterned after RFC793 to handle all cases.
 - *	The first three cases are guaranteed by proper pred_flags setting,
 - *	the rest is checked inline. Fast processing is turned on in
 - *	tcp_data_queue when everything is OK.
   */
  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 -			 const struct tcphdr *th, unsigned int len)
 +			 const struct tcphdr *th)
  {
 +	unsigned int len = skb->len;
  	struct tcp_sock *tp = tcp_sk(sk);
  
  	tcp_mstamp_refresh(tp);
  	if (unlikely(!sk->sk_rx_dst))
  		inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb);
 -	/*
 -	 *	Header prediction.
 -	 *	The code loosely follows the one in the famous
 -	 *	"30 instruction TCP receive" Van Jacobson mail.
 -	 *
 -	 *	Van's trick is to deposit buffers into socket queue
 -	 *	on a device interrupt, to call tcp_recv function
 -	 *	on the receive process context and checksum and copy
 -	 *	the buffer to user space. smart...
 -	 *
 -	 *	Our current scheme is not silly either but we take the
 -	 *	extra cost of the net_bh soft interrupt processing...
 -	 *	We do checksum and copy also but from device to kernel.
 -	 */
  
  	tp->rx_opt.saw_tstamp = 0;
  
 -	/*	pred_flags is 0xS?10 << 16 + snd_wnd
 -	 *	if header_prediction is to be made
 -	 *	'S' will always be tp->tcp_header_len >> 2
 -	 *	'?' will be 0 for the fast path, otherwise pred_flags is 0 to
 -	 *  turn it off	(when there are holes in the receive
 -	 *	 space for instance)
 -	 *	PSH flag is ignored.
 -	 */
 -
 -	if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
 -	    TCP_SKB_CB(skb)->seq == tp->rcv_nxt &&
 -	    !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) {
 -		int tcp_header_len = tp->tcp_header_len;
 -
 -		/* Timestamp header prediction: tcp_header_len
 -		 * is automatically equal to th->doff*4 due to pred_flags
 -		 * match.
 -		 */
 -
 -		/* Check timestamp */
 -		if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) {
 -			/* No? Slow path! */
 -			if (!tcp_parse_aligned_timestamp(tp, th))
 -				goto slow_path;
 -
 -			/* If PAWS failed, check it more carefully in slow path */
 -			if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0)
 -				goto slow_path;
 -
 -			/* DO NOT update ts_recent here, if checksum fails
 -			 * and timestamp was corrupted part, it will result
 -			 * in a hung connection since we will drop all
 -			 * future packets due to the PAWS test.
 -			 */
 -		}
 -
 -		if (len <= tcp_header_len) {
 -			/* Bulk data transfer: sender */
 -			if (len == tcp_header_len) {
 -				/* Predicted packet is in window by definition.
 -				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
 -				 * Hence, check seq<=rcv_wup reduces to:
 -				 */
 -				if (tcp_header_len ==
 -				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
 -				    tp->rcv_nxt == tp->rcv_wup)
 -					tcp_store_ts_recent(tp);
 -
 -				/* We know that such packets are checksummed
 -				 * on entry.
 -				 */
 -				tcp_ack(sk, skb, 0);
 -				__kfree_skb(skb);
 -				tcp_data_snd_check(sk);
 -				return;
 -			} else { /* Header too small */
 -				TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
 -				goto discard;
 -			}
 -		} else {
 -			int eaten = 0;
 -			bool fragstolen = false;
 -
 -			if (tp->ucopy.task == current &&
 -			    tp->copied_seq == tp->rcv_nxt &&
 -			    len - tcp_header_len <= tp->ucopy.len &&
 -			    sock_owned_by_user(sk)) {
 -				__set_current_state(TASK_RUNNING);
 -
 -				if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) {
 -					/* Predicted packet is in window by definition.
 -					 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
 -					 * Hence, check seq<=rcv_wup reduces to:
 -					 */
 -					if (tcp_header_len ==
 -					    (sizeof(struct tcphdr) +
 -					     TCPOLEN_TSTAMP_ALIGNED) &&
 -					    tp->rcv_nxt == tp->rcv_wup)
 -						tcp_store_ts_recent(tp);
 -
 -					tcp_rcv_rtt_measure_ts(sk, skb);
 -
 -					__skb_pull(skb, tcp_header_len);
 -					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 -					NET_INC_STATS(sock_net(sk),
 -							LINUX_MIB_TCPHPHITSTOUSER);
 -					eaten = 1;
 -				}
 -			}
 -			if (!eaten) {
 -				if (tcp_checksum_complete(skb))
 -					goto csum_error;
 -
 -				if ((int)skb->truesize > sk->sk_forward_alloc)
 -					goto step5;
 -
 -				/* Predicted packet is in window by definition.
 -				 * seq == rcv_nxt and rcv_wup <= rcv_nxt.
 -				 * Hence, check seq<=rcv_wup reduces to:
 -				 */
 -				if (tcp_header_len ==
 -				    (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) &&
 -				    tp->rcv_nxt == tp->rcv_wup)
 -					tcp_store_ts_recent(tp);
 -
 -				tcp_rcv_rtt_measure_ts(sk, skb);
 -
 -				NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
 -
 -				/* Bulk data transfer: receiver */
 -				eaten = tcp_queue_rcv(sk, skb, tcp_header_len,
 -						      &fragstolen);
 -			}
 -
 -			tcp_event_data_recv(sk, skb);
 -
 -			if (TCP_SKB_CB(skb)->ack_seq != tp->snd_una) {
 -				/* Well, only one small jumplet in fast path... */
 -				tcp_ack(sk, skb, FLAG_DATA);
 -				tcp_data_snd_check(sk);
 -				if (!inet_csk_ack_scheduled(sk))
 -					goto no_ack;
 -			}
 -
 -			__tcp_ack_snd_check(sk, 0);
 -no_ack:
 -			if (eaten)
 -				kfree_skb_partial(skb, fragstolen);
 -			sk->sk_data_ready(sk);
 -			return;
 -		}
 -	}
 -
 -slow_path:
  	if (len < (th->doff << 2) || tcp_checksum_complete(skb))
  		goto csum_error;
  
  	if (!th->ack && !th->rst && !th->syn)
  		goto discard;
  
 -	/*
 -	 *	Standard slow path.
 -	 */
 -
  	if (!tcp_validate_incoming(sk, skb, th, 1))
  		return;
  
 -step5:
 -	if (tcp_ack(sk, skb, FLAG_SLOWPATH | FLAG_UPDATE_TS_RECENT) < 0)
 +	if (tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT) < 0)
  		goto discard;
  
  	tcp_rcv_rtt_measure_ts(sk, skb);
@@@ -5343,6 -5587,12 +5347,6 @@@ void tcp_finish_connect(struct sock *sk
  
  	if (sock_flag(sk, SOCK_KEEPOPEN))
  		inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp));
 -
 -	if (!tp->rx_opt.snd_wscale)
 -		__tcp_fast_path_on(tp, tp->snd_wnd);
 -	else
 -		tp->pred_flags = 0;
 -
  }
  
  static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack,
@@@ -5471,7 -5721,7 +5475,7 @@@ static int tcp_rcv_synsent_state_proces
  		tcp_ecn_rcv_synack(tp, th);
  
  		tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
 -		tcp_ack(sk, skb, FLAG_SLOWPATH);
 +		tcp_ack(sk, skb, 0);
  
  		/* Ok.. it's good. Set up sequence numbers and
  		 * move to established.
@@@ -5707,8 -5957,8 +5711,8 @@@ int tcp_rcv_state_process(struct sock *
  		return 0;
  
  	/* step 5: check the ACK field */
 -	acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH |
 -				      FLAG_UPDATE_TS_RECENT |
 +
 +	acceptable = tcp_ack(sk, skb, FLAG_UPDATE_TS_RECENT |
  				      FLAG_NO_CHALLENGE_ACK) > 0;
  
  	if (!acceptable) {
@@@ -5776,6 -6026,7 +5780,6 @@@
  		tp->lsndtime = tcp_jiffies32;
  
  		tcp_initialize_rcv_mss(sk);
 -		tcp_fast_path_on(tp);
  		break;
  
  	case TCP_FIN_WAIT1: {
diff --combined net/ipv4/tcp_output.c
index d49bff51bdb7,b7661a68d498..3e0d19631534
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@@ -295,7 -295,9 +295,7 @@@ static u16 tcp_select_window(struct soc
  	/* RFC1323 scaling applied */
  	new_win >>= tp->rx_opt.rcv_wscale;
  
 -	/* If we advertise zero window, disable fast path. */
  	if (new_win == 0) {
 -		tp->pred_flags = 0;
  		if (old_win)
  			NET_INC_STATS(sock_net(sk),
  				      LINUX_MIB_TCPTOZEROWINDOWADV);
@@@ -2375,23 -2377,15 +2375,14 @@@ bool tcp_schedule_loss_probe(struct soc
  {
  	struct inet_connection_sock *icsk = inet_csk(sk);
  	struct tcp_sock *tp = tcp_sk(sk);
- 	u32 timeout, tlp_time_stamp, rto_time_stamp;
 -	u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
+ 	u32 timeout, rto_delta_us;
  
- 	/* No consecutive loss probes. */
- 	if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
- 		tcp_rearm_rto(sk);
- 		return false;
- 	}
  	/* Don't do any loss probe on a Fast Open connection before 3WHS
  	 * finishes.
  	 */
  	if (tp->fastopen_rsk)
  		return false;
  
- 	/* TLP is only scheduled when next timer event is RTO. */
- 	if (icsk->icsk_pending != ICSK_TIME_RETRANS)
- 		return false;
- 
  	/* Schedule a loss probe in 2*RTT for SACK capable connections
  	 * in Open state, that are either limited by cwnd or application.
  	 */
@@@ -2404,28 -2398,20 +2395,24 @@@
  	     tcp_send_head(sk))
  		return false;
  
 -	/* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
 +	/* Probe timeout is 2*rtt. Add minimum RTO to account
  	 * for delayed ack when there's one outstanding packet. If no RTT
  	 * sample is available then probe after TCP_TIMEOUT_INIT.
  	 */
 -	timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
 -	if (tp->packets_out == 1)
 -		timeout = max_t(u32, timeout,
 -				(rtt + (rtt >> 1) + TCP_DELACK_MAX));
 -	timeout = max_t(u32, timeout, msecs_to_jiffies(10));
 +	if (tp->srtt_us) {
 +		timeout = usecs_to_jiffies(tp->srtt_us >> 2);
 +		if (tp->packets_out == 1)
 +			timeout += TCP_RTO_MIN;
 +		else
 +			timeout += TCP_TIMEOUT_MIN;
 +	} else {
 +		timeout = TCP_TIMEOUT_INIT;
 +	}
  
- 	/* If RTO is shorter, just schedule TLP in its place. */
- 	tlp_time_stamp = tcp_jiffies32 + timeout;
- 	rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
- 	if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
- 		s32 delta = rto_time_stamp - tcp_jiffies32;
- 		if (delta > 0)
- 			timeout = delta;
- 	}
+ 	/* If the RTO formula yields an earlier time, then use that time. */
+ 	rto_delta_us = tcp_rto_delta_us(sk);  /* How far in future is RTO? */
+ 	if (rto_delta_us > 0)
+ 		timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
  
  	inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
  				  TCP_RTO_MAX);
@@@ -3450,6 -3436,10 +3437,10 @@@ int tcp_connect(struct sock *sk
  	int err;
  
  	tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
+ 
+ 	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+ 		return -EHOSTUNREACH; /* Routing failure or similar. */
+ 
  	tcp_connect_init(sk);
  
  	if (unlikely(tp->repair)) {
diff --combined net/ipv4/tcp_timer.c
index f753f9d2fee3,e906014890b6..655dd8d7f064
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@@ -239,6 -239,7 +239,6 @@@ static int tcp_write_timeout(struct soc
  /* Called with BH disabled */
  void tcp_delack_timer_handler(struct sock *sk)
  {
 -	struct tcp_sock *tp = tcp_sk(sk);
  	struct inet_connection_sock *icsk = inet_csk(sk);
  
  	sk_mem_reclaim_partial(sk);
@@@ -253,6 -254,17 +253,6 @@@
  	}
  	icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
  
 -	if (!skb_queue_empty(&tp->ucopy.prequeue)) {
 -		struct sk_buff *skb;
 -
 -		__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED);
 -
 -		while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
 -			sk_backlog_rcv(sk, skb);
 -
 -		tp->ucopy.memory = 0;
 -	}
 -
  	if (inet_csk_ack_scheduled(sk)) {
  		if (!icsk->icsk_ack.pingpong) {
  			/* Delayed ACK missed: inflate ATO. */
@@@ -640,7 -652,8 +640,8 @@@ static void tcp_keepalive_timer (unsign
  		goto death;
  	}
  
- 	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
+ 	if (!sock_flag(sk, SOCK_KEEPOPEN) ||
+ 	    ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
  		goto out;
  
  	elapsed = keepalive_time_when(tp);
diff --combined net/ipv6/route.c
index 7ecbe5eb19f8,a640fbcba15d..c73e61750642
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -1820,11 -1820,6 +1820,11 @@@ static struct rt6_info *ip6_route_info_
  		goto out;
  	}
  
 +	if (cfg->fc_flags & RTF_OFFLOAD) {
 +		NL_SET_ERR_MSG(extack, "Userspace can not set RTF_OFFLOAD");
 +		goto out;
 +	}
 +
  	if (cfg->fc_dst_len > 128) {
  		NL_SET_ERR_MSG(extack, "Invalid prefix length");
  		goto out;
@@@ -2356,6 -2351,7 +2356,7 @@@ static void rt6_do_redirect(struct dst_
  	if (on_link)
  		nrt->rt6i_flags &= ~RTF_GATEWAY;
  
+ 	nrt->rt6i_protocol = RTPROT_REDIRECT;
  	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
  
  	if (ip6_ins_rt(nrt))
@@@ -2466,6 -2462,7 +2467,7 @@@ static struct rt6_info *rt6_add_route_i
  		.fc_dst_len	= prefixlen,
  		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
  				  RTF_UP | RTF_PREF(pref),
+ 		.fc_protocol = RTPROT_RA,
  		.fc_nlinfo.portid = 0,
  		.fc_nlinfo.nlh = NULL,
  		.fc_nlinfo.nl_net = net,
@@@ -2518,6 -2515,7 +2520,7 @@@ struct rt6_info *rt6_add_dflt_router(co
  		.fc_ifindex	= dev->ifindex,
  		.fc_flags	= RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
  				  RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+ 		.fc_protocol = RTPROT_RA,
  		.fc_nlinfo.portid = 0,
  		.fc_nlinfo.nlh = NULL,
  		.fc_nlinfo.nl_net = dev_net(dev),
@@@ -3332,9 -3330,6 +3335,9 @@@ static int rt6_nexthop_info(struct sk_b
  			goto nla_put_failure;
  	}
  
 +	if (rt->rt6i_flags & RTF_OFFLOAD)
 +		*flags |= RTNH_F_OFFLOAD;
 +
  	/* not needed for multipath encoding b/c it has a rtnexthop struct */
  	if (!skip_oif && rt->dst.dev &&
  	    nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
@@@ -3432,14 -3427,6 +3435,6 @@@ static int rt6_fill_node(struct net *ne
  	rtm->rtm_flags = 0;
  	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
  	rtm->rtm_protocol = rt->rt6i_protocol;
- 	if (rt->rt6i_flags & RTF_DYNAMIC)
- 		rtm->rtm_protocol = RTPROT_REDIRECT;
- 	else if (rt->rt6i_flags & RTF_ADDRCONF) {
- 		if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
- 			rtm->rtm_protocol = RTPROT_RA;
- 		else
- 			rtm->rtm_protocol = RTPROT_KERNEL;
- 	}
  
  	if (rt->rt6i_flags & RTF_CACHE)
  		rtm->rtm_flags |= RTM_F_CLONED;
@@@ -3934,7 -3921,6 +3929,7 @@@ static int __net_init ip6_route_net_ini
  			 ip6_template_metrics, true);
  
  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 +	net->ipv6.fib6_has_custom_rules = false;
  	net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
  					       sizeof(*net->ipv6.ip6_prohibit_entry),
  					       GFP_KERNEL);
diff --combined tools/testing/selftests/bpf/test_verifier.c
index 65aa562cff87,d3ed7324105e..ab0cd1198326
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@@ -8,6 -8,7 +8,7 @@@
   * License as published by the Free Software Foundation.
   */
  
+ #include <endian.h>
  #include <asm/types.h>
  #include <linux/types.h>
  #include <stdint.h>
@@@ -421,7 -422,7 +422,7 @@@ static struct bpf_test tests[] = 
  			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr_unpriv = "R1 pointer arithmetic",
 +		.errstr_unpriv = "R1 subtraction from stack pointer",
  		.result_unpriv = REJECT,
  		.errstr = "R1 invalid mem access",
  		.result = REJECT,
@@@ -603,9 -604,8 +604,9 @@@
  			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned stack access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"invalid map_fd for function call",
@@@ -651,9 -651,8 +652,9 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned value access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"sometimes access memory with incorrect alignment",
@@@ -674,7 -673,6 +675,7 @@@
  		.errstr = "R0 invalid mem access",
  		.errstr_unpriv = "R0 leaks addr",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"jump test 1",
@@@ -1101,7 -1099,7 +1102,7 @@@
  		"check skb->hash byte load permitted",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct __sk_buff, hash)),
  #else
@@@ -1138,7 -1136,7 +1139,7 @@@
  		"check skb->hash byte load not permitted 3",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct __sk_buff, hash) + 3),
  #else
@@@ -1218,9 -1216,8 +1219,9 @@@
  				    offsetof(struct __sk_buff, cb[0]) + 1),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned context access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"check __sk_buff->hash, offset 0, half store not permitted",
@@@ -1248,7 -1245,7 +1249,7 @@@
  		"check skb->hash half load permitted",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct __sk_buff, hash)),
  #else
@@@ -1263,7 -1260,7 +1264,7 @@@
  		"check skb->hash half load not permitted",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct __sk_buff, hash) + 2),
  #else
@@@ -1323,9 -1320,8 +1324,9 @@@
  				    offsetof(struct __sk_buff, cb[0]) + 2),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned context access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"check cb access: word, unaligned 2",
@@@ -1335,9 -1331,8 +1336,9 @@@
  				    offsetof(struct __sk_buff, cb[4]) + 1),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned context access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"check cb access: word, unaligned 3",
@@@ -1347,9 -1342,8 +1348,9 @@@
  				    offsetof(struct __sk_buff, cb[4]) + 2),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned context access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"check cb access: word, unaligned 4",
@@@ -1359,9 -1353,8 +1360,9 @@@
  				    offsetof(struct __sk_buff, cb[4]) + 3),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned context access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"check cb access: double",
@@@ -1387,9 -1380,8 +1388,9 @@@
  				    offsetof(struct __sk_buff, cb[1])),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned context access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"check cb access: double, unaligned 2",
@@@ -1399,9 -1391,8 +1400,9 @@@
  				    offsetof(struct __sk_buff, cb[3])),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "misaligned access",
 +		.errstr = "misaligned context access",
  		.result = REJECT,
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"check cb access: double, oob 1",
@@@ -1533,8 -1524,7 +1534,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "misaligned access off -6 size 8",
 +		.errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"PTR_TO_STACK store/load - bad alignment on reg",
@@@ -1546,8 -1536,7 +1547,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "misaligned access off -2 size 8",
 +		.errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
 +		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
  	},
  	{
  		"PTR_TO_STACK store/load - out of bounds low",
@@@ -1591,6 -1580,8 +1592,6 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = ACCEPT,
 -		.result_unpriv = REJECT,
 -		.errstr_unpriv = "R1 pointer arithmetic",
  	},
  	{
  		"unpriv: add pointer to pointer",
@@@ -1601,7 -1592,7 +1602,7 @@@
  		},
  		.result = ACCEPT,
  		.result_unpriv = REJECT,
 -		.errstr_unpriv = "R1 pointer arithmetic",
 +		.errstr_unpriv = "R1 pointer += pointer",
  	},
  	{
  		"unpriv: neg pointer",
@@@ -1942,7 -1933,10 +1943,7 @@@
  			BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr_unpriv = "pointer arithmetic prohibited",
 -		.result_unpriv = REJECT,
 -		.errstr = "R1 invalid mem access",
 -		.result = REJECT,
 +		.result = ACCEPT,
  	},
  	{
  		"unpriv: cmp of stack pointer",
@@@ -2006,7 -2000,7 +2007,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "invalid stack type R3",
 +		.errstr = "R4 min value is negative",
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
@@@ -2023,7 -2017,7 +2024,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "invalid stack type R3",
 +		.errstr = "R4 min value is negative",
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
@@@ -2225,7 -2219,7 +2226,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "invalid stack type R3 off=-1 access_size=-1",
 +		.errstr = "R4 min value is negative",
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
@@@ -2242,7 -2236,7 +2243,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "invalid stack type R3 off=-1 access_size=2147483647",
 +		.errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
@@@ -2259,7 -2253,7 +2260,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "invalid stack type R3 off=-512 access_size=2147483647",
 +		.errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
@@@ -2330,8 -2324,8 +2331,8 @@@
  				    offsetof(struct __sk_buff, data)),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
 -			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 48),
 -			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 48),
 +			BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
  			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
@@@ -2659,7 -2653,7 +2660,7 @@@
  			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
  			BPF_JMP_A(-6),
  		},
 -		.errstr = "misaligned packet access off 2+15+-4 size 4",
 +		.errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  		.flags = F_LOAD_WITH_STRICT_ALIGNMENT,
@@@ -2710,11 -2704,11 +2711,11 @@@
  			BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
  			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
  			BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
 -			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
 +			BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff),
  			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
  			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
 -			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
  			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
  			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
  			BPF_MOV64_IMM(BPF_REG_0, 0),
@@@ -2736,10 -2730,10 +2737,10 @@@
  			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
  			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
  			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
 -			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0xffff),
 +			BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
  			BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
 -			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xffff - 1),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
  			BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
  			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
  			BPF_MOV64_IMM(BPF_REG_0, 0),
@@@ -2765,7 -2759,7 +2766,7 @@@
  			BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
  			BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
  			BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
 -			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 48),
 +			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
  			BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
  			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
@@@ -2802,7 -2796,7 +2803,7 @@@
  		},
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  		.result = REJECT,
 -		.errstr = "cannot add integer value with 47 upper zero bits to ptr_to_packet",
 +		.errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
  	},
  	{
  		"direct packet access: test24 (x += pkt_ptr, 5)",
@@@ -2820,7 -2814,7 +2821,7 @@@
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
  			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
 -			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1),
  			BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
  			BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
  			BPF_MOV64_IMM(BPF_REG_0, 0),
@@@ -3119,7 -3113,7 +3120,7 @@@
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
 -		"helper access to packet: test14, cls helper fail sub",
 +		"helper access to packet: test14, cls helper ok sub",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
@@@ -3139,36 -3133,12 +3140,36 @@@
  			BPF_MOV64_IMM(BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 +		.result = ACCEPT,
 +		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 +	},
 +	{
 +		"helper access to packet: test15, cls helper fail sub",
 +		.insns = {
 +			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
 +				    offsetof(struct __sk_buff, data)),
 +			BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
 +				    offsetof(struct __sk_buff, data_end)),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
 +			BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
 +			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
 +			BPF_MOV64_IMM(BPF_REG_2, 4),
 +			BPF_MOV64_IMM(BPF_REG_3, 0),
 +			BPF_MOV64_IMM(BPF_REG_4, 0),
 +			BPF_MOV64_IMM(BPF_REG_5, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_csum_diff),
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
  		.result = REJECT,
 -		.errstr = "type=inv expected=fp",
 +		.errstr = "invalid access to packet",
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
 -		"helper access to packet: test15, cls helper fail range 1",
 +		"helper access to packet: test16, cls helper fail range 1",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
@@@ -3193,7 -3163,7 +3194,7 @@@
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
 -		"helper access to packet: test16, cls helper fail range 2",
 +		"helper access to packet: test17, cls helper fail range 2",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
@@@ -3214,11 -3184,11 +3215,11 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "invalid access to packet",
 +		.errstr = "R2 min value is negative",
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
 -		"helper access to packet: test17, cls helper fail range 3",
 +		"helper access to packet: test18, cls helper fail range 3",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
@@@ -3239,11 -3209,11 +3240,11 @@@
  			BPF_EXIT_INSN(),
  		},
  		.result = REJECT,
 -		.errstr = "invalid access to packet",
 +		.errstr = "R2 min value is negative",
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
 -		"helper access to packet: test18, cls helper fail range zero",
 +		"helper access to packet: test19, cls helper fail range zero",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
@@@ -3268,7 -3238,7 +3269,7 @@@
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
 -		"helper access to packet: test19, pkt end as input",
 +		"helper access to packet: test20, pkt end as input",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
@@@ -3293,7 -3263,7 +3294,7 @@@
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
  	{
 -		"helper access to packet: test20, wrong reg",
 +		"helper access to packet: test21, wrong reg",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
@@@ -3353,7 -3323,7 +3354,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 leaks addr",
  		.result_unpriv = REJECT,
  		.result = ACCEPT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@@ -3377,7 -3347,7 +3378,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 leaks addr",
  		.result_unpriv = REJECT,
  		.result = ACCEPT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@@ -3405,7 -3375,7 +3406,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 leaks addr",
  		.result_unpriv = REJECT,
  		.result = ACCEPT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@@ -3446,7 -3416,9 +3447,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is outside of the array range",
 -		.result_unpriv = REJECT,
  		.result = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
  	},
@@@ -3468,7 -3440,9 +3469,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 -		.result_unpriv = REJECT,
 +		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
  		.result = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
  	},
@@@ -3482,7 -3456,7 +3483,7 @@@
  			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
  				     BPF_FUNC_map_lookup_elem),
  			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
 -			BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
 +			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
  			BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
  			BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
  			BPF_MOV32_IMM(BPF_REG_1, 0),
@@@ -3493,8 -3467,8 +3494,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 +		.errstr_unpriv = "R0 leaks addr",
 +		.errstr = "R0 unbounded memory access",
  		.result_unpriv = REJECT,
  		.result = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@@ -3520,7 -3494,7 +3521,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 leaks addr",
  		.errstr = "invalid access to map value, value_size=48 off=44 size=8",
  		.result_unpriv = REJECT,
  		.result = REJECT,
@@@ -3550,8 -3524,8 +3551,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3, 11 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 +		.errstr_unpriv = "R0 pointer += pointer",
 +		.errstr = "R0 invalid mem access 'inv'",
  		.result_unpriv = REJECT,
  		.result = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@@ -3693,6 -3667,34 +3694,6 @@@
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS
  	},
  	{
 -		"multiple registers share map_lookup_elem bad reg type",
 -		.insns = {
 -			BPF_MOV64_IMM(BPF_REG_1, 10),
 -			BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
 -			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 -			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 -			BPF_LD_MAP_FD(BPF_REG_1, 0),
 -			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 -				     BPF_FUNC_map_lookup_elem),
 -			BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
 -			BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
 -			BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
 -			BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
 -			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 -			BPF_MOV64_IMM(BPF_REG_1, 1),
 -			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 -			BPF_MOV64_IMM(BPF_REG_1, 2),
 -			BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1),
 -			BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 0),
 -			BPF_MOV64_IMM(BPF_REG_1, 3),
 -			BPF_EXIT_INSN(),
 -		},
 -		.fixup_map1 = { 4 },
 -		.result = REJECT,
 -		.errstr = "R3 invalid mem access 'inv'",
 -		.prog_type = BPF_PROG_TYPE_SCHED_CLS
 -	},
 -	{
  		"invalid map access from else condition",
  		.insns = {
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
@@@ -3710,9 -3712,9 +3711,9 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
 +		.errstr = "R0 unbounded memory access",
  		.result = REJECT,
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 leaks addr",
  		.result_unpriv = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
  	},
@@@ -4090,7 -4092,7 +4091,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr = "invalid access to map value, value_size=48 off=0 size=-8",
 +		.errstr = "R2 min value is negative",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
@@@ -4156,7 -4158,7 +4157,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr = "R1 min value is outside of the array range",
 +		.errstr = "invalid access to map value, value_size=48 off=4 size=0",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
@@@ -4202,7 -4204,7 +4203,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr = "invalid access to map value, value_size=48 off=4 size=-8",
 +		.errstr = "R2 min value is negative",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
@@@ -4224,7 -4226,7 +4225,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr = "R1 min value is outside of the array range",
 +		.errstr = "R2 min value is negative",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
@@@ -4340,7 -4342,7 +4341,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr = "invalid access to map value, value_size=48 off=4 size=-8",
 +		.errstr = "R2 min value is negative",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
@@@ -4363,7 -4365,7 +4364,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr = "R1 min value is outside of the array range",
 +		.errstr = "R2 min value is negative",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
@@@ -4451,13 -4453,13 +4452,13 @@@
  			BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
  			BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 -			BPF_MOV64_IMM(BPF_REG_2, 0),
 +			BPF_MOV64_IMM(BPF_REG_2, 1),
  			BPF_MOV64_IMM(BPF_REG_3, 0),
  			BPF_EMIT_CALL(BPF_FUNC_probe_read),
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr = "R1 min value is negative, either use unsigned index or do a if (index >=0) check",
 +		.errstr = "R1 unbounded memory access",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
@@@ -4577,7 -4579,7 +4578,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 leaks addr",
  		.result = ACCEPT,
  		.result_unpriv = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@@ -4605,7 -4607,7 +4606,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 leaks addr",
  		.result = ACCEPT,
  		.result_unpriv = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@@ -4624,7 -4626,7 +4625,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 bitwise operator &= on pointer",
  		.errstr = "invalid mem access 'inv'",
  		.result = REJECT,
  		.result_unpriv = REJECT,
@@@ -4643,7 -4645,7 +4644,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 32-bit pointer arithmetic prohibited",
  		.errstr = "invalid mem access 'inv'",
  		.result = REJECT,
  		.result_unpriv = REJECT,
@@@ -4662,7 -4664,7 +4663,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 pointer arithmetic with /= operator",
  		.errstr = "invalid mem access 'inv'",
  		.result = REJECT,
  		.result_unpriv = REJECT,
@@@ -4705,8 -4707,10 +4706,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 invalid mem access 'inv'",
  		.errstr = "R0 invalid mem access 'inv'",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"map element value is preserved across register spilling",
@@@ -4728,7 -4732,7 +4729,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 leaks addr",
  		.result = ACCEPT,
  		.result_unpriv = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@@ -4910,8 -4914,7 +4911,8 @@@
  			BPF_MOV64_IMM(BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "R2 unbounded memory access",
 +		/* because max wasn't checked, signed min is negative */
 +		.errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
@@@ -5060,20 -5063,6 +5061,20 @@@
  		.prog_type = BPF_PROG_TYPE_TRACEPOINT,
  	},
  	{
 +		"helper access to variable memory: size = 0 allowed on NULL",
 +		.insns = {
 +			BPF_MOV64_IMM(BPF_REG_1, 0),
 +			BPF_MOV64_IMM(BPF_REG_2, 0),
 +			BPF_MOV64_IMM(BPF_REG_3, 0),
 +			BPF_MOV64_IMM(BPF_REG_4, 0),
 +			BPF_MOV64_IMM(BPF_REG_5, 0),
 +			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
 +			BPF_EXIT_INSN(),
 +		},
 +		.result = ACCEPT,
 +		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 +	},
 +	{
  		"helper access to variable memory: size > 0 not allowed on NULL",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_1, 0),
@@@ -5087,7 -5076,7 +5088,7 @@@
  			BPF_EMIT_CALL(BPF_FUNC_csum_diff),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr = "R1 type=imm expected=fp",
 +		.errstr = "R1 type=inv expected=fp",
  		.result = REJECT,
  		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
  	},
@@@ -5172,7 -5161,7 +5173,7 @@@
  			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
  				     BPF_FUNC_map_lookup_elem),
  			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
 -			BPF_MOV64_IMM(BPF_REG_1, 6),
 +			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
  			BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
  			BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
  			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
@@@ -5181,8 -5170,10 +5182,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 +		.errstr = "R0 max value is outside of the array range",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
  	},
  	{
@@@ -5211,8 -5202,10 +5212,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map2 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
 +		.errstr = "R0 max value is outside of the array range",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  		.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
  	},
  	{
@@@ -5259,7 -5252,7 +5260,7 @@@
  		},
  		.fixup_map_in_map = { 3 },
  		.errstr = "R1 type=inv expected=map_ptr",
 -		.errstr_unpriv = "R1 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
  		.result = REJECT,
  	},
  	{
@@@ -5430,7 -5423,7 +5431,7 @@@
  		"check bpf_perf_event_data->sample_period byte load permitted",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct bpf_perf_event_data, sample_period)),
  #else
@@@ -5446,7 -5439,7 +5447,7 @@@
  		"check bpf_perf_event_data->sample_period half load permitted",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct bpf_perf_event_data, sample_period)),
  #else
@@@ -5462,7 -5455,7 +5463,7 @@@
  		"check bpf_perf_event_data->sample_period word load permitted",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct bpf_perf_event_data, sample_period)),
  #else
@@@ -5489,7 -5482,7 +5490,7 @@@
  		"check skb->data half load not permitted",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct __sk_buff, data)),
  #else
@@@ -5505,7 -5498,7 +5506,7 @@@
  		"check skb->tc_classid half load not permitted for lwt prog",
  		.insns = {
  			BPF_MOV64_IMM(BPF_REG_0, 0),
- #ifdef __LITTLE_ENDIAN
+ #if __BYTE_ORDER == __LITTLE_ENDIAN
  			BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
  				    offsetof(struct __sk_buff, tc_classid)),
  #else
@@@ -5539,8 -5532,10 +5540,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"bounds checks mixing signed and unsigned",
@@@ -5563,8 -5558,10 +5564,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"bounds checks mixing signed and unsigned, variant 2",
@@@ -5589,8 -5586,10 +5590,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R8 invalid mem access 'inv'",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"bounds checks mixing signed and unsigned, variant 3",
@@@ -5614,8 -5613,10 +5615,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R8 invalid mem access 'inv'",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"bounds checks mixing signed and unsigned, variant 4",
@@@ -5638,7 -5639,10 +5639,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative",
 -		.result = REJECT,
 -		.result_unpriv = REJECT,
 +		.result = ACCEPT,
  	},
  	{
  		"bounds checks mixing signed and unsigned, variant 5",
@@@ -5662,8 -5666,10 +5663,8 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 invalid mem access",
 +		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"bounds checks mixing signed and unsigned, variant 6",
@@@ -5684,8 -5690,10 +5685,8 @@@
  			BPF_MOV64_IMM(BPF_REG_0, 0),
  			BPF_EXIT_INSN(),
  		},
 -		.errstr_unpriv = "R4 min value is negative, either use unsigned",
  		.errstr = "R4 min value is negative, either use unsigned",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
  		"bounds checks mixing signed and unsigned, variant 7",
@@@ -5708,7 -5716,10 +5709,7 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative",
 -		.result = REJECT,
 -		.result_unpriv = REJECT,
 +		.result = ACCEPT,
  	},
  	{
  		"bounds checks mixing signed and unsigned, variant 8",
@@@ -5719,6 -5730,32 +5720,6 @@@
  			BPF_LD_MAP_FD(BPF_REG_1, 0),
  			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
  				     BPF_FUNC_map_lookup_elem),
 -			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
 -			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
 -			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 -			BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024 + 1),
 -			BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
 -			BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
 -			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 -			BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
 -			BPF_MOV64_IMM(BPF_REG_0, 0),
 -			BPF_EXIT_INSN(),
 -		},
 -		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative",
 -		.result = REJECT,
 -		.result_unpriv = REJECT,
 -	},
 -	{
 -		"bounds checks mixing signed and unsigned, variant 9",
 -		.insns = {
 -			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 -			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 -			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 -			BPF_LD_MAP_FD(BPF_REG_1, 0),
 -			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 -				     BPF_FUNC_map_lookup_elem),
  			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
  			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
@@@ -5733,11 -5770,13 +5734,11 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
 -		"bounds checks mixing signed and unsigned, variant 10",
 +		"bounds checks mixing signed and unsigned, variant 9",
  		.insns = {
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@@ -5759,10 -5798,13 +5760,10 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 -		.errstr = "R0 min value is negative",
 -		.result = REJECT,
 -		.result_unpriv = REJECT,
 +		.result = ACCEPT,
  	},
  	{
 -		"bounds checks mixing signed and unsigned, variant 11",
 +		"bounds checks mixing signed and unsigned, variant 10",
  		.insns = {
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@@ -5784,11 -5826,13 +5785,11 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
 -		"bounds checks mixing signed and unsigned, variant 12",
 +		"bounds checks mixing signed and unsigned, variant 11",
  		.insns = {
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@@ -5811,11 -5855,13 +5812,11 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
 -		"bounds checks mixing signed and unsigned, variant 13",
 +		"bounds checks mixing signed and unsigned, variant 12",
  		.insns = {
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@@ -5837,11 -5883,13 +5838,11 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
 -		"bounds checks mixing signed and unsigned, variant 14",
 +		"bounds checks mixing signed and unsigned, variant 13",
  		.insns = {
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@@ -5866,11 -5914,13 +5867,11 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
 -		"bounds checks mixing signed and unsigned, variant 15",
 +		"bounds checks mixing signed and unsigned, variant 14",
  		.insns = {
  			BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
  				    offsetof(struct __sk_buff, mark)),
@@@ -5896,11 -5946,13 +5897,11 @@@
  			BPF_JMP_IMM(BPF_JA, 0, 0, -7),
  		},
  		.fixup_map1 = { 4 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
  	},
  	{
 -		"bounds checks mixing signed and unsigned, variant 16",
 +		"bounds checks mixing signed and unsigned, variant 15",
  		.insns = {
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@@ -5924,13 -5976,13 +5925,13 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr_unpriv = "R0 pointer comparison prohibited",
  		.errstr = "R0 min value is negative",
  		.result = REJECT,
  		.result_unpriv = REJECT,
  	},
  	{
 -		"subtraction bounds (map value)",
 +		"subtraction bounds (map value) variant 1",
  		.insns = {
  			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
  			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@@ -5952,74 -6004,10 +5953,74 @@@
  			BPF_EXIT_INSN(),
  		},
  		.fixup_map1 = { 3 },
 -		.errstr_unpriv = "R0 pointer arithmetic prohibited",
 +		.errstr = "R0 max value is outside of the array range",
 +		.result = REJECT,
 +	},
 +	{
 +		"subtraction bounds (map value) variant 2",
 +		.insns = {
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
 +			BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
 +			BPF_LD_MAP_FD(BPF_REG_1, 0),
 +			BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
 +				     BPF_FUNC_map_lookup_elem),
 +			BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
 +			BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
 +			BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6),
 +			BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
 +			BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4),
 +			BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
 +			BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +			BPF_MOV64_IMM(BPF_REG_0, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.fixup_map1 = { 3 },
  		.errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
  		.result = REJECT,
 -		.result_unpriv = REJECT,
 +	},
 +	{
 +		"variable-offset ctx access",
 +		.insns = {
 +			/* Get an unknown value */
 +			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
 +			/* Make it small and 4-byte aligned */
 +			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
 +			/* add it to skb.  We now have either &skb->len or
 +			 * &skb->pkt_type, but we don't know which
 +			 */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
 +			/* dereference it */
 +			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.errstr = "variable ctx access var_off=(0x0; 0x4)",
 +		.result = REJECT,
 +		.prog_type = BPF_PROG_TYPE_LWT_IN,
 +	},
 +	{
 +		"variable-offset stack access",
 +		.insns = {
 +			/* Fill the top 8 bytes of the stack */
 +			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
 +			/* Get an unknown value */
 +			BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
 +			/* Make it small and 4-byte aligned */
 +			BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
 +			BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
 +			/* add it to fp.  We now have either fp-4 or fp-8, but
 +			 * we don't know which
 +			 */
 +			BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
 +			/* dereference it */
 +			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
 +			BPF_EXIT_INSN(),
 +		},
 +		.errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)",
 +		.result = REJECT,
 +		.prog_type = BPF_PROG_TYPE_LWT_IN,
  	},
  };
  

-- 
LinuxNextTracking


More information about the linux-merge mailing list