[linux-next] LinuxNextTracking branch, master, updated. next-20190506

batman at open-mesh.org batman at open-mesh.org
Tue May 7 00:18:30 CEST 2019


The following commit has been merged in the master branch:
commit ff24e4980a68d83090a02fda081741a410fe8eef
Merge: 26f146ed971c0e4a264ce525d7a66a71ef73690d ea9866793d1e925b4d320eaea409263b2a568f38
Author: David S. Miller <davem at davemloft.net>
Date:   Thu May 2 22:14:21 2019 -0400

    Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
    
    Three trivial overlapping conflicts.
    
    Signed-off-by: David S. Miller <davem at davemloft.net>

diff --combined Documentation/networking/ip-sysctl.txt
index c9538a30ef7e,c4ac35234f05..725b8bea58a7
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@@ -81,11 -81,6 +81,11 @@@ fib_multipath_hash_policy - INTEGE
  	0 - Layer 3
  	1 - Layer 4
  
 +fib_sync_mem - UNSIGNED INTEGER
 +	Amount of dirty memory from fib entries that can be backlogged before
 +	synchronize_rcu is forced.
 +	  Default: 512kB   Minimum: 64kB   Maximum: 64MB
 +
  ip_forward_update_priority - INTEGER
  	Whether to update SKB priority from "TOS" field in IPv4 header after it
  	is forwarded. The new SKB priority is mapped from TOS field value
@@@ -1342,6 -1337,7 +1342,7 @@@ tag - INTEGE
  	Default value is 0.
  
  xfrm4_gc_thresh - INTEGER
+ 	(Obsolete since linux-4.14)
  	The threshold at which we will start garbage collecting for IPv4
  	destination cache entries.  At twice this value the system will
  	refuse new allocations.
@@@ -1914,42 -1910,18 +1915,43 @@@ enhanced_dad - BOOLEA
  
  icmp/*:
  ratelimit - INTEGER
 -	Limit the maximal rates for sending ICMPv6 packets.
 +	Limit the maximal rates for sending ICMPv6 messages.
  	0 to disable any limiting,
  	otherwise the minimal space between responses in milliseconds.
  	Default: 1000
  
 +ratemask - list of comma separated ranges
 +	For ICMPv6 message types matching the ranges in the ratemask, limit
 +	the sending of the message according to ratelimit parameter.
 +
 +	The format used for both input and output is a comma separated
 +	list of ranges (e.g. "0-127,129" for ICMPv6 message type 0 to 127 and
 +	129). Writing to the file will clear all previous ranges of ICMPv6
 +	message types and update the current list with the input.
 +
 +	Refer to: https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml
 +	for numerical values of ICMPv6 message types, e.g. echo request is 128
 +	and echo reply is 129.
 +
 +	Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big)
 +
  echo_ignore_all - BOOLEAN
  	If set non-zero, then the kernel will ignore all ICMP ECHO
  	requests sent to it over the IPv6 protocol.
  	Default: 0
  
 +echo_ignore_multicast - BOOLEAN
 +	If set non-zero, then the kernel will ignore all ICMP ECHO
 +	requests sent to it over the IPv6 protocol via multicast.
 +	Default: 0
 +
 +echo_ignore_anycast - BOOLEAN
 +	If set non-zero, then the kernel will ignore all ICMP ECHO
 +	requests sent to it over the IPv6 protocol destined to anycast address.
 +	Default: 0
 +
  xfrm6_gc_thresh - INTEGER
+ 	(Obsolete since linux-4.14)
  	The threshold at which we will start garbage collecting for IPv6
  	destination cache entries.  At twice this value the system will
  	refuse new allocations.
diff --combined Makefile
index b6e7ee4f1fc4,633d1196bf00..e1bb7345cdd1
--- a/Makefile
+++ b/Makefile
@@@ -2,7 -2,7 +2,7 @@@
  VERSION = 5
  PATCHLEVEL = 1
  SUBLEVEL = 0
- EXTRAVERSION = -rc6
+ EXTRAVERSION = -rc7
  NAME = Shy Crocodile
  
  # *DOCUMENTATION*
@@@ -401,7 -401,6 +401,7 @@@ NM		= $(CROSS_COMPILE)n
  STRIP		= $(CROSS_COMPILE)strip
  OBJCOPY		= $(CROSS_COMPILE)objcopy
  OBJDUMP		= $(CROSS_COMPILE)objdump
 +PAHOLE		= pahole
  LEX		= flex
  YACC		= bison
  AWK		= awk
@@@ -456,7 -455,7 +456,7 @@@ KBUILD_LDFLAGS :
  GCC_PLUGINS_CFLAGS :=
  
  export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC
 -export CPP AR NM STRIP OBJCOPY OBJDUMP KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS
 +export CPP AR NM STRIP OBJCOPY OBJDUMP PAHOLE KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS
  export MAKE LEX YACC AWK INSTALLKERNEL PERL PYTHON PYTHON2 PYTHON3 UTS_MACHINE
  export HOSTCXX KBUILD_HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
  
@@@ -679,6 -678,7 +679,7 @@@ KBUILD_CFLAGS	+= $(call cc-disable-warn
  KBUILD_CFLAGS	+= $(call cc-disable-warning, format-truncation)
  KBUILD_CFLAGS	+= $(call cc-disable-warning, format-overflow)
  KBUILD_CFLAGS	+= $(call cc-disable-warning, int-in-bool-context)
+ KBUILD_CFLAGS	+= $(call cc-disable-warning, address-of-packed-member)
  
  ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
  KBUILD_CFLAGS	+= -Os
@@@ -720,7 -720,6 +721,6 @@@ ifdef CONFIG_CC_IS_CLAN
  KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,)
  KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier)
  KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
- KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
  # Quiet clang warning: comparison of unsigned expression < 0 is always false
  KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare)
  # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the
diff --combined drivers/infiniband/hw/mlx5/main.c
index 0845e95d2d11,d3dd290ae1b1..347e3cac254e
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@@ -1119,6 -1119,8 +1119,8 @@@ static int mlx5_ib_query_device(struct 
  		if (MLX5_CAP_GEN(mdev, qp_packet_based))
  			resp.flags |=
  				MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
+ 
+ 		resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
  	}
  
  	if (field_avail(typeof(resp), sw_parsing_caps,
@@@ -2009,7 -2011,7 +2011,7 @@@ static phys_addr_t uar_index2pfn(struc
  
  	fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
  
 -	return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
 +	return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
  }
  
  static int get_command(unsigned long offset)
@@@ -2066,6 -2068,7 +2068,7 @@@ static int mlx5_ib_mmap_clock_info_page
  
  	if (vma->vm_flags & VM_WRITE)
  		return -EPERM;
+ 	vma->vm_flags &= ~VM_MAYWRITE;
  
  	if (!dev->mdev->clock_info_page)
  		return -EOPNOTSUPP;
@@@ -2199,7 -2202,7 +2202,7 @@@ static int dm_mmap(struct ib_ucontext *
  	    page_idx + npages)
  		return -EINVAL;
  
 -	pfn = ((pci_resource_start(dev->mdev->pdev, 0) +
 +	pfn = ((dev->mdev->bar_addr +
  	      MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
  	      PAGE_SHIFT) +
  	      page_idx;
@@@ -2231,19 -2234,18 +2234,18 @@@ static int mlx5_ib_mmap(struct ib_ucont
  
  		if (vma->vm_flags & VM_WRITE)
  			return -EPERM;
+ 		vma->vm_flags &= ~VM_MAYWRITE;
  
  		/* Don't expose to user-space information it shouldn't have */
  		if (PAGE_SIZE > 4096)
  			return -EOPNOTSUPP;
  
- 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
  		pfn = (dev->mdev->iseg_base +
  		       offsetof(struct mlx5_init_seg, internal_timer_h)) >>
  			PAGE_SHIFT;
- 		if (io_remap_pfn_range(vma, vma->vm_start, pfn,
- 				       PAGE_SIZE, vma->vm_page_prot))
- 			return -EAGAIN;
- 		break;
+ 		return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
+ 					 PAGE_SIZE,
+ 					 pgprot_noncached(vma->vm_page_prot));
  	case MLX5_IB_MMAP_CLOCK_INFO:
  		return mlx5_ib_mmap_clock_info_page(dev, vma, context);
  
@@@ -2283,7 -2285,7 +2285,7 @@@ struct ib_dm *mlx5_ib_alloc_dm(struct i
  		goto err_free;
  
  	start_offset = memic_addr & ~PAGE_MASK;
 -	page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) -
 +	page_idx = (memic_addr - memic->dev->bar_addr -
  		    MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
  		    PAGE_SHIFT;
  
@@@ -2326,7 -2328,7 +2328,7 @@@ int mlx5_ib_dealloc_dm(struct ib_dm *ib
  	if (ret)
  		return ret;
  
 -	page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) -
 +	page_idx = (dm->dev_addr - memic->dev->bar_addr -
  		    MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >>
  		    PAGE_SHIFT;
  	bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages,
diff --combined drivers/infiniband/hw/mlx5/qp.c
index ef7d69269a88,8870c350fda0..fc67d78ca959
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@@ -1818,13 -1818,16 +1818,16 @@@ static void configure_responder_scat_cq
  
  	rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
  
- 	if (rcqe_sz == 128) {
- 		MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+ 	if (init_attr->qp_type == MLX5_IB_QPT_DCT) {
+ 		if (rcqe_sz == 128)
+ 			MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE);
+ 
  		return;
  	}
  
- 	if (init_attr->qp_type != MLX5_IB_QPT_DCT)
- 		MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE);
+ 	MLX5_SET(qpc, qpc, cs_res,
+ 		 rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE :
+ 				  MLX5_RES_SCAT_DATA32_CQE);
  }
  
  static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev,
@@@ -5119,7 -5122,7 +5122,7 @@@ out
  		wmb();
  
  		/* currently we support only regular doorbells */
 -		mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset, NULL);
 +		mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset);
  		/* Make sure doorbells don't leak out of SQ spinlock
  		 * and reach the HCA out of order.
  		 */
diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 526f36dcb204,52ade133b57c..a0de3c368f4a
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@@ -551,7 -551,7 +551,7 @@@ normal_tx
  	prod = NEXT_TX(prod);
  	txr->tx_prod = prod;
  
 -	if (!skb->xmit_more || netif_xmit_stopped(txq))
 +	if (!netdev_xmit_more() || netif_xmit_stopped(txq))
  		bnxt_db_write(bp, &txr->tx_db, prod);
  
  tx_done:
@@@ -559,7 -559,7 +559,7 @@@
  	mmiowb();
  
  	if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) {
 -		if (skb->xmit_more && !tx_buf->is_push)
 +		if (netdev_xmit_more() && !tx_buf->is_push)
  			bnxt_db_write(bp, &txr->tx_db, prod);
  
  		netif_tx_stop_queue(txq);
@@@ -899,7 -899,7 +899,7 @@@ static struct sk_buff *bnxt_rx_page_skb
  			     DMA_ATTR_WEAK_ORDERING);
  
  	if (unlikely(!payload))
 -		payload = eth_get_headlen(data_ptr, len);
 +		payload = eth_get_headlen(bp->dev, data_ptr, len);
  
  	skb = napi_alloc_skb(&rxr->bnapi->napi, payload);
  	if (!skb) {
@@@ -1625,7 -1625,7 +1625,7 @@@ static int bnxt_rx_pkt(struct bnxt *bp
  			netdev_warn(bp->dev, "RX buffer error %x\n", rx_err);
  			bnxt_sched_reset(bp, rxr);
  		}
- 		goto next_rx;
+ 		goto next_rx_no_len;
  	}
  
  	len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT;
@@@ -1706,12 -1706,13 +1706,13 @@@
  	rc = 1;
  
  next_rx:
- 	rxr->rx_prod = NEXT_RX(prod);
- 	rxr->rx_next_cons = NEXT_RX(cons);
- 
  	cpr->rx_packets += 1;
  	cpr->rx_bytes += len;
  
+ next_rx_no_len:
+ 	rxr->rx_prod = NEXT_RX(prod);
+ 	rxr->rx_next_cons = NEXT_RX(cons);
+ 
  next_rx_no_prod_no_len:
  	*raw_cons = tmp_raw_cons;
  
@@@ -5135,10 -5136,10 +5136,10 @@@ static void bnxt_hwrm_ring_free(struct 
  	for (i = 0; i < bp->tx_nr_rings; i++) {
  		struct bnxt_tx_ring_info *txr = &bp->tx_ring[i];
  		struct bnxt_ring_struct *ring = &txr->tx_ring_struct;
- 		u32 cmpl_ring_id;
  
- 		cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr);
  		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+ 			u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr);
+ 
  			hwrm_ring_free_send_msg(bp, ring,
  						RING_FREE_REQ_RING_TYPE_TX,
  						close_path ? cmpl_ring_id :
@@@ -5151,10 -5152,10 +5152,10 @@@
  		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
  		struct bnxt_ring_struct *ring = &rxr->rx_ring_struct;
  		u32 grp_idx = rxr->bnapi->index;
- 		u32 cmpl_ring_id;
  
- 		cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
  		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+ 			u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+ 
  			hwrm_ring_free_send_msg(bp, ring,
  						RING_FREE_REQ_RING_TYPE_RX,
  						close_path ? cmpl_ring_id :
@@@ -5173,10 -5174,10 +5174,10 @@@
  		struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
  		struct bnxt_ring_struct *ring = &rxr->rx_agg_ring_struct;
  		u32 grp_idx = rxr->bnapi->index;
- 		u32 cmpl_ring_id;
  
- 		cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
  		if (ring->fw_ring_id != INVALID_HW_RING_ID) {
+ 			u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr);
+ 
  			hwrm_ring_free_send_msg(bp, ring, type,
  						close_path ? cmpl_ring_id :
  						INVALID_HW_RING_ID);
@@@ -5315,17 -5316,16 +5316,16 @@@ __bnxt_hwrm_reserve_pf_rings(struct bnx
  	req->num_tx_rings = cpu_to_le16(tx_rings);
  	if (BNXT_NEW_RM(bp)) {
  		enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0;
+ 		enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
  		if (bp->flags & BNXT_FLAG_CHIP_P5) {
  			enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0;
  			enables |= tx_rings + ring_grps ?
- 				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- 				   FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ 				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
  			enables |= rx_rings ?
  				FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
  		} else {
  			enables |= cp_rings ?
- 				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- 				   FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ 				   FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
  			enables |= ring_grps ?
  				   FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS |
  				   FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
@@@ -5365,14 -5365,13 +5365,13 @@@ __bnxt_hwrm_reserve_vf_rings(struct bnx
  	enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0;
  	enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS |
  			      FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0;
+ 	enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
  	if (bp->flags & BNXT_FLAG_CHIP_P5) {
  		enables |= tx_rings + ring_grps ?
- 			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- 			   FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ 			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
  	} else {
  		enables |= cp_rings ?
- 			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS |
- 			   FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0;
+ 			   FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0;
  		enables |= ring_grps ?
  			   FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0;
  	}
@@@ -6753,6 -6752,7 +6752,7 @@@ static int bnxt_hwrm_port_qstats_ext(st
  	struct hwrm_queue_pri2cos_qcfg_input req2 = {0};
  	struct hwrm_port_qstats_ext_input req = {0};
  	struct bnxt_pf_info *pf = &bp->pf;
+ 	u32 tx_stat_size;
  	int rc;
  
  	if (!(bp->flags & BNXT_FLAG_PORT_STATS_EXT))
@@@ -6762,13 -6762,16 +6762,16 @@@
  	req.port_id = cpu_to_le16(pf->port_id);
  	req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext));
  	req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_ext_map);
- 	req.tx_stat_size = cpu_to_le16(sizeof(struct tx_port_stats_ext));
+ 	tx_stat_size = bp->hw_tx_port_stats_ext ?
+ 		       sizeof(*bp->hw_tx_port_stats_ext) : 0;
+ 	req.tx_stat_size = cpu_to_le16(tx_stat_size);
  	req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_ext_map);
  	mutex_lock(&bp->hwrm_cmd_lock);
  	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
  	if (!rc) {
  		bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8;
- 		bp->fw_tx_stats_ext_size = le16_to_cpu(resp->tx_stat_size) / 8;
+ 		bp->fw_tx_stats_ext_size = tx_stat_size ?
+ 			le16_to_cpu(resp->tx_stat_size) / 8 : 0;
  	} else {
  		bp->fw_rx_stats_ext_size = 0;
  		bp->fw_tx_stats_ext_size = 0;
@@@ -8961,8 -8964,15 +8964,15 @@@ static int bnxt_cfg_rx_mode(struct bnx
  
  skip_uc:
  	rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+ 	if (rc && vnic->mc_list_count) {
+ 		netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n",
+ 			    rc);
+ 		vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST;
+ 		vnic->mc_list_count = 0;
+ 		rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0);
+ 	}
  	if (rc)
- 		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n",
+ 		netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %d\n",
  			   rc);
  
  	return rc;
@@@ -10058,6 -10068,23 +10068,6 @@@ static int bnxt_bridge_setlink(struct n
  	return rc;
  }
  
 -static int bnxt_get_phys_port_name(struct net_device *dev, char *buf,
 -				   size_t len)
 -{
 -	struct bnxt *bp = netdev_priv(dev);
 -	int rc;
 -
 -	/* The PF and it's VF-reps only support the switchdev framework */
 -	if (!BNXT_PF(bp))
 -		return -EOPNOTSUPP;
 -
 -	rc = snprintf(buf, len, "p%d", bp->pf.port_id);
 -
 -	if (rc >= len)
 -		return -EOPNOTSUPP;
 -	return 0;
 -}
 -
  int bnxt_get_port_parent_id(struct net_device *dev,
  			    struct netdev_phys_item_id *ppid)
  {
@@@ -10076,13 -10103,6 +10086,13 @@@
  	return 0;
  }
  
 +static struct devlink_port *bnxt_get_devlink_port(struct net_device *dev)
 +{
 +	struct bnxt *bp = netdev_priv(dev);
 +
 +	return &bp->dl_port;
 +}
 +
  static const struct net_device_ops bnxt_netdev_ops = {
  	.ndo_open		= bnxt_open,
  	.ndo_start_xmit		= bnxt_start_xmit,
@@@ -10114,7 -10134,8 +10124,7 @@@
  	.ndo_bpf		= bnxt_xdp,
  	.ndo_bridge_getlink	= bnxt_bridge_getlink,
  	.ndo_bridge_setlink	= bnxt_bridge_setlink,
 -	.ndo_get_port_parent_id	= bnxt_get_port_parent_id,
 -	.ndo_get_phys_port_name = bnxt_get_phys_port_name
 +	.ndo_get_devlink_port	= bnxt_get_devlink_port,
  };
  
  static void bnxt_remove_one(struct pci_dev *pdev)
@@@ -10438,26 -10459,6 +10448,26 @@@ static int bnxt_init_mac_addr(struct bn
  	return rc;
  }
  
 +static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[])
 +{
 +	struct pci_dev *pdev = bp->pdev;
 +	int pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN);
 +	u32 dw;
 +
 +	if (!pos) {
 +		netdev_info(bp->dev, "Unable do read adapter's DSN");
 +		return -EOPNOTSUPP;
 +	}
 +
 +	/* DSN (two dw) is at an offset of 4 from the cap pos */
 +	pos += 4;
 +	pci_read_config_dword(pdev, pos, &dw);
 +	put_unaligned_le32(dw, &dsn[0]);
 +	pci_read_config_dword(pdev, pos + 4, &dw);
 +	put_unaligned_le32(dw, &dsn[4]);
 +	return 0;
 +}
 +
  static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
  {
  	static int version_printed;
@@@ -10598,11 -10599,6 +10608,11 @@@
  		goto init_err_pci_clean;
  	}
  
 +	/* Read the adapter's DSN to use as the eswitch switch_id */
 +	rc = bnxt_pcie_dsn_get(bp, bp->switch_id);
 +	if (rc)
 +		goto init_err_pci_clean;
 +
  	bnxt_hwrm_func_qcfg(bp);
  	bnxt_hwrm_vnic_qcaps(bp);
  	bnxt_hwrm_port_led_qcaps(bp);
@@@ -10699,6 -10695,7 +10709,7 @@@ init_err_cleanup_tc
  	bnxt_clear_int_mode(bp);
  
  init_err_pci_clean:
+ 	bnxt_free_hwrm_short_cmd_req(bp);
  	bnxt_free_hwrm_resources(bp);
  	bnxt_free_ctx_mem(bp);
  	kfree(bp->ctx);
diff --combined drivers/net/phy/marvell.c
index a7e8c8113d97,f76c4048b978..a7796134e3be
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@@ -29,7 -29,6 +29,7 @@@
  #include <linux/ethtool.h>
  #include <linux/phy.h>
  #include <linux/marvell_phy.h>
 +#include <linux/bitfield.h>
  #include <linux/of.h>
  
  #include <linux/io.h>
@@@ -92,14 -91,6 +92,14 @@@
  #define MII_88E1510_TEMP_SENSOR		0x1b
  #define MII_88E1510_TEMP_SENSOR_MASK	0xff
  
 +#define MII_88E1540_COPPER_CTRL3	0x1a
 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK	GENMASK(11, 10)
 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS	0
 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS	1
 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS	2
 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS	3
 +#define MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN		BIT(9)
 +
  #define MII_88E6390_MISC_TEST		0x1b
  #define MII_88E6390_MISC_TEST_SAMPLE_1S		0
  #define MII_88E6390_MISC_TEST_SAMPLE_10MS	BIT(14)
@@@ -137,7 -128,6 +137,7 @@@
  #define MII_PHY_LED_CTRL	        16
  #define MII_88E1121_PHY_LED_DEF		0x0030
  #define MII_88E1510_PHY_LED_DEF		0x1177
 +#define MII_88E1510_PHY_LED0_LINK_LED1_ACTIVE	0x1040
  
  #define MII_M1011_PHY_STATUS		0x11
  #define MII_M1011_PHY_STATUS_1000	0x8000
@@@ -634,10 -624,7 +634,10 @@@ static void marvell_config_led(struct p
  	 * LED[2] .. Blink, Activity
  	 */
  	case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1510):
 -		def_config = MII_88E1510_PHY_LED_DEF;
 +		if (phydev->dev_flags & MARVELL_PHY_LED0_LINK_LED1_ACTIVE)
 +			def_config = MII_88E1510_PHY_LED0_LINK_LED1_ACTIVE;
 +		else
 +			def_config = MII_88E1510_PHY_LED_DEF;
  		break;
  	default:
  		return;
@@@ -1038,101 -1025,6 +1038,101 @@@ static int m88e1145_config_init(struct 
  	return 0;
  }
  
 +static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs)
 +{
 +	int val;
 +
 +	val = phy_read(phydev, MII_88E1540_COPPER_CTRL3);
 +	if (val < 0)
 +		return val;
 +
 +	if (!(val & MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN)) {
 +		*msecs = ETHTOOL_PHY_FAST_LINK_DOWN_OFF;
 +		return 0;
 +	}
 +
 +	val = FIELD_GET(MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val);
 +
 +	switch (val) {
 +	case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS:
 +		*msecs = 0;
 +		break;
 +	case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS:
 +		*msecs = 10;
 +		break;
 +	case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS:
 +		*msecs = 20;
 +		break;
 +	case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS:
 +		*msecs = 40;
 +		break;
 +	default:
 +		return -EINVAL;
 +	}
 +
 +	return 0;
 +}
 +
 +static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs)
 +{
 +	struct ethtool_eee eee;
 +	int val, ret;
 +
 +	if (*msecs == ETHTOOL_PHY_FAST_LINK_DOWN_OFF)
 +		return phy_clear_bits(phydev, MII_88E1540_COPPER_CTRL3,
 +				      MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN);
 +
 +	/* According to the Marvell data sheet EEE must be disabled for
 +	 * Fast Link Down detection to work properly
 +	 */
 +	ret = phy_ethtool_get_eee(phydev, &eee);
 +	if (!ret && eee.eee_enabled) {
 +		phydev_warn(phydev, "Fast Link Down detection requires EEE to be disabled!\n");
 +		return -EBUSY;
 +	}
 +
 +	if (*msecs <= 5)
 +		val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS;
 +	else if (*msecs <= 15)
 +		val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS;
 +	else if (*msecs <= 30)
 +		val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS;
 +	else
 +		val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS;
 +
 +	val = FIELD_PREP(MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val);
 +
 +	ret = phy_modify(phydev, MII_88E1540_COPPER_CTRL3,
 +			 MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val);
 +	if (ret)
 +		return ret;
 +
 +	return phy_set_bits(phydev, MII_88E1540_COPPER_CTRL3,
 +			    MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN);
 +}
 +
 +static int m88e1540_get_tunable(struct phy_device *phydev,
 +				struct ethtool_tunable *tuna, void *data)
 +{
 +	switch (tuna->id) {
 +	case ETHTOOL_PHY_FAST_LINK_DOWN:
 +		return m88e1540_get_fld(phydev, data);
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +}
 +
 +static int m88e1540_set_tunable(struct phy_device *phydev,
 +				struct ethtool_tunable *tuna, const void *data)
 +{
 +	switch (tuna->id) {
 +	case ETHTOOL_PHY_FAST_LINK_DOWN:
 +		return m88e1540_set_fld(phydev, data);
 +	default:
 +		return -EOPNOTSUPP;
 +	}
 +}
 +
  /* The VOD can be out of specification on link up. Poke an
   * undocumented register, in an undocumented page, with a magic value
   * to fix this.
@@@ -1597,9 -1489,10 +1597,10 @@@ static int marvell_get_sset_count(struc
  
  static void marvell_get_strings(struct phy_device *phydev, u8 *data)
  {
+ 	int count = marvell_get_sset_count(phydev);
  	int i;
  
- 	for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) {
+ 	for (i = 0; i < count; i++) {
  		strlcpy(data + i * ETH_GSTRING_LEN,
  			marvell_hw_stats[i].string, ETH_GSTRING_LEN);
  	}
@@@ -1627,9 -1520,10 +1628,10 @@@ static u64 marvell_get_stat(struct phy_
  static void marvell_get_stats(struct phy_device *phydev,
  			      struct ethtool_stats *stats, u64 *data)
  {
+ 	int count = marvell_get_sset_count(phydev);
  	int i;
  
- 	for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++)
+ 	for (i = 0; i < count; i++)
  		data[i] = marvell_get_stat(phydev, i);
  }
  
@@@ -2130,7 -2024,7 +2132,7 @@@ static struct phy_driver marvell_driver
  		.phy_id = MARVELL_PHY_ID_88E1101,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1101",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &marvell_config_init,
  		.config_aneg = &m88e1101_config_aneg,
@@@ -2148,7 -2042,7 +2150,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1112,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1112",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e1111_config_init,
  		.config_aneg = &marvell_config_aneg,
@@@ -2166,7 -2060,7 +2168,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1111,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1111",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e1111_config_init,
  		.config_aneg = &marvell_config_aneg,
@@@ -2185,7 -2079,7 +2187,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1118,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1118",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e1118_config_init,
  		.config_aneg = &m88e1118_config_aneg,
@@@ -2203,7 -2097,7 +2205,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1121R,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1121R",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = &m88e1121_probe,
  		.config_init = &marvell_config_init,
  		.config_aneg = &m88e1121_config_aneg,
@@@ -2223,7 -2117,7 +2225,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1318S,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1318S",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e1318_config_init,
  		.config_aneg = &m88e1318_config_aneg,
@@@ -2245,7 -2139,7 +2247,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1145,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1145",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e1145_config_init,
  		.config_aneg = &m88e1101_config_aneg,
@@@ -2264,7 -2158,7 +2266,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1149R,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1149R",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e1149_config_init,
  		.config_aneg = &m88e1118_config_aneg,
@@@ -2282,7 -2176,7 +2284,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1240,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1240",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e1111_config_init,
  		.config_aneg = &marvell_config_aneg,
@@@ -2300,7 -2194,7 +2302,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1116R,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1116R",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e1116r_config_init,
  		.ack_interrupt = &marvell_ack_interrupt,
@@@ -2340,7 -2234,7 +2342,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E1540,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1540",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = m88e1510_probe,
  		.config_init = &marvell_config_init,
  		.config_aneg = &m88e1510_config_aneg,
@@@ -2355,15 -2249,13 +2357,15 @@@
  		.get_sset_count = marvell_get_sset_count,
  		.get_strings = marvell_get_strings,
  		.get_stats = marvell_get_stats,
 +		.get_tunable = m88e1540_get_tunable,
 +		.set_tunable = m88e1540_set_tunable,
  	},
  	{
  		.phy_id = MARVELL_PHY_ID_88E1545,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E1545",
  		.probe = m88e1510_probe,
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.config_init = &marvell_config_init,
  		.config_aneg = &m88e1510_config_aneg,
  		.read_status = &marvell_read_status,
@@@ -2382,7 -2274,7 +2384,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E3016,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E3016",
 -		.features = PHY_BASIC_FEATURES,
 +		/* PHY_BASIC_FEATURES */
  		.probe = marvell_probe,
  		.config_init = &m88e3016_config_init,
  		.aneg_done = &marvell_aneg_done,
@@@ -2402,7 -2294,7 +2404,7 @@@
  		.phy_id = MARVELL_PHY_ID_88E6390,
  		.phy_id_mask = MARVELL_PHY_ID_MASK,
  		.name = "Marvell 88E6390",
 -		.features = PHY_GBIT_FEATURES,
 +		/* PHY_GBIT_FEATURES */
  		.probe = m88e6390_probe,
  		.config_init = &marvell_config_init,
  		.config_aneg = &m88e6390_config_aneg,
@@@ -2417,8 -2309,6 +2419,8 @@@
  		.get_sset_count = marvell_get_sset_count,
  		.get_strings = marvell_get_strings,
  		.get_stats = marvell_get_stats,
 +		.get_tunable = m88e1540_get_tunable,
 +		.set_tunable = m88e1540_set_tunable,
  	},
  };
  
diff --combined drivers/net/usb/qmi_wwan.c
index 18c4e5d17b05,679e404a5224..5c3ac97519b7
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@@ -63,7 -63,6 +63,7 @@@ enum qmi_wwan_flags 
  
  enum qmi_wwan_quirks {
  	QMI_WWAN_QUIRK_DTR = 1 << 0,	/* needs "set DTR" request */
 +	QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1,	/* check num. endpoints */
  };
  
  struct qmimux_hdr {
@@@ -846,16 -845,6 +846,16 @@@ static const struct driver_info	qmi_wwa
  	.data           = QMI_WWAN_QUIRK_DTR,
  };
  
 +static const struct driver_info	qmi_wwan_info_quirk_quectel_dyncfg = {
 +	.description	= "WWAN/QMI device",
 +	.flags		= FLAG_WWAN | FLAG_SEND_ZLP,
 +	.bind		= qmi_wwan_bind,
 +	.unbind		= qmi_wwan_unbind,
 +	.manage_power	= qmi_wwan_manage_power,
 +	.rx_fixup       = qmi_wwan_rx_fixup,
 +	.data           = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG,
 +};
 +
  #define HUAWEI_VENDOR_ID	0x12D1
  
  /* map QMI/wwan function by a fixed interface number */
@@@ -876,15 -865,6 +876,15 @@@
  #define QMI_GOBI_DEVICE(vend, prod) \
  	QMI_FIXED_INTF(vend, prod, 0)
  
 +/* Quectel does not use fixed interface numbers on at least some of their
 + * devices. We need to check the number of endpoints to ensure that we bind to
 + * the correct interface.
 + */
 +#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \
 +	USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \
 +				      USB_SUBCLASS_VENDOR_SPEC, 0xff), \
 +	.driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg
 +
  static const struct usb_device_id products[] = {
  	/* 1. CDC ECM like devices match on the control interface */
  	{	/* Huawei E392, E398 and possibly others sharing both device id and more... */
@@@ -989,9 -969,20 +989,9 @@@
  		USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7),
  		.driver_info = (unsigned long)&qmi_wwan_info,
  	},
 -	{	/* Quectel EP06/EG06/EM06 */
 -		USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0306,
 -					      USB_CLASS_VENDOR_SPEC,
 -					      USB_SUBCLASS_VENDOR_SPEC,
 -					      0xff),
 -		.driver_info	    = (unsigned long)&qmi_wwan_info_quirk_dtr,
 -	},
 -	{	/* Quectel EG12/EM12 */
 -		USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0512,
 -					      USB_CLASS_VENDOR_SPEC,
 -					      USB_SUBCLASS_VENDOR_SPEC,
 -					      0xff),
 -		.driver_info	    = (unsigned long)&qmi_wwan_info_quirk_dtr,
 -	},
 +	{QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)},	/* Quectel EC25, EC20 R2.0  Mini PCIe */
 +	{QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)},	/* Quectel EP06/EG06/EM06 */
 +	{QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)},	/* Quectel EG12/EM12 */
  
  	/* 3. Combined interface devices matching on interface number */
  	{QMI_FIXED_INTF(0x0408, 0xea42, 4)},	/* Yota / Megafon M100-1 */
@@@ -1131,9 -1122,16 +1131,16 @@@
  	{QMI_FIXED_INTF(0x0846, 0x68d3, 8)},	/* Netgear Aircard 779S */
  	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
  	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
+ 	{QMI_FIXED_INTF(0x1435, 0x0918, 3)},	/* Wistron NeWeb D16Q1 */
+ 	{QMI_FIXED_INTF(0x1435, 0x0918, 4)},	/* Wistron NeWeb D16Q1 */
+ 	{QMI_FIXED_INTF(0x1435, 0x0918, 5)},	/* Wistron NeWeb D16Q1 */
+ 	{QMI_FIXED_INTF(0x1435, 0x3185, 4)},	/* Wistron NeWeb M18Q5 */
+ 	{QMI_FIXED_INTF(0x1435, 0xd111, 4)},	/* M9615A DM11-1 D51QC */
  	{QMI_FIXED_INTF(0x1435, 0xd181, 3)},	/* Wistron NeWeb D18Q1 */
  	{QMI_FIXED_INTF(0x1435, 0xd181, 4)},	/* Wistron NeWeb D18Q1 */
  	{QMI_FIXED_INTF(0x1435, 0xd181, 5)},	/* Wistron NeWeb D18Q1 */
+ 	{QMI_FIXED_INTF(0x1435, 0xd182, 4)},	/* Wistron NeWeb D18 */
+ 	{QMI_FIXED_INTF(0x1435, 0xd182, 5)},	/* Wistron NeWeb D18 */
  	{QMI_FIXED_INTF(0x1435, 0xd191, 4)},	/* Wistron NeWeb D19Q1 */
  	{QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)},	/* Fibocom NL668 series */
  	{QMI_FIXED_INTF(0x16d8, 0x6003, 0)},	/* CMOTech 6003 */
@@@ -1189,6 -1187,7 +1196,7 @@@
  	{QMI_FIXED_INTF(0x19d2, 0x0265, 4)},	/* ONDA MT8205 4G LTE */
  	{QMI_FIXED_INTF(0x19d2, 0x0284, 4)},	/* ZTE MF880 */
  	{QMI_FIXED_INTF(0x19d2, 0x0326, 4)},	/* ZTE MF821D */
+ 	{QMI_FIXED_INTF(0x19d2, 0x0396, 3)},	/* ZTE ZM8620 */
  	{QMI_FIXED_INTF(0x19d2, 0x0412, 4)},	/* Telewell TW-LTE 4G */
  	{QMI_FIXED_INTF(0x19d2, 0x1008, 4)},	/* ZTE (Vodafone) K3570-Z */
  	{QMI_FIXED_INTF(0x19d2, 0x1010, 4)},	/* ZTE (Vodafone) K3571-Z */
@@@ -1209,7 -1208,9 +1217,9 @@@
  	{QMI_FIXED_INTF(0x19d2, 0x1425, 2)},
  	{QMI_FIXED_INTF(0x19d2, 0x1426, 2)},	/* ZTE MF91 */
  	{QMI_FIXED_INTF(0x19d2, 0x1428, 2)},	/* Telewell TW-LTE 4G v2 */
+ 	{QMI_FIXED_INTF(0x19d2, 0x1432, 3)},	/* ZTE ME3620 */
  	{QMI_FIXED_INTF(0x19d2, 0x2002, 4)},	/* ZTE (Vodafone) K3765-Z */
+ 	{QMI_FIXED_INTF(0x2001, 0x7e16, 3)},	/* D-Link DWM-221 */
  	{QMI_FIXED_INTF(0x2001, 0x7e19, 4)},	/* D-Link DWM-221 B1 */
  	{QMI_FIXED_INTF(0x2001, 0x7e35, 4)},	/* D-Link DWM-222 */
  	{QMI_FIXED_INTF(0x2020, 0x2031, 4)},	/* Olicard 600 */
@@@ -1280,6 -1281,7 +1290,6 @@@
  	{QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)},	/* HP lt4120 Snapdragon X5 LTE */
  	{QMI_FIXED_INTF(0x22de, 0x9061, 3)},	/* WeTelecom WPD-600N */
  	{QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)},	/* SIMCom 7100E, 7230E, 7600E ++ */
 -	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)},	/* Quectel EC25, EC20 R2.0  Mini PCIe */
  	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)},	/* Quectel EC21 Mini PCIe */
  	{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)},	/* Quectel EG91 */
  	{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)},	/* Quectel BG96 */
@@@ -1359,12 -1361,27 +1369,12 @@@ static bool quectel_ec20_detected(struc
  	return false;
  }
  
 -static bool quectel_diag_detected(struct usb_interface *intf)
 -{
 -	struct usb_device *dev = interface_to_usbdev(intf);
 -	struct usb_interface_descriptor intf_desc = intf->cur_altsetting->desc;
 -	u16 id_vendor = le16_to_cpu(dev->descriptor.idVendor);
 -	u16 id_product = le16_to_cpu(dev->descriptor.idProduct);
 -
 -	if (id_vendor != 0x2c7c || intf_desc.bNumEndpoints != 2)
 -		return false;
 -
 -	if (id_product == 0x0306 || id_product == 0x0512)
 -		return true;
 -	else
 -		return false;
 -}
 -
  static int qmi_wwan_probe(struct usb_interface *intf,
  			  const struct usb_device_id *prod)
  {
  	struct usb_device_id *id = (struct usb_device_id *)prod;
  	struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc;
 +	const struct driver_info *info;
  
  	/* Workaround to enable dynamic IDs.  This disables usbnet
  	 * blacklisting functionality.  Which, if required, can be
@@@ -1398,14 -1415,10 +1408,14 @@@
  	 * we need to match on class/subclass/protocol. These values are
  	 * identical for the diagnostic- and QMI-interface, but bNumEndpoints is
  	 * different. Ignore the current interface if the number of endpoints
 -	 * the number for the diag interface (two).
 +	 * equals the number for the diag interface (two).
  	 */
 -	if (quectel_diag_detected(intf))
 -		return -ENODEV;
 +	info = (void *)&id->driver_info;
 +
 +	if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) {
 +		if (desc->bNumEndpoints == 2)
 +			return -ENODEV;
 +	}
  
  	return usbnet_probe(intf, id);
  }
diff --combined drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index fc915ecfb06e,0a87d87fbb4f..17b34f6e4515
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@@ -89,7 -89,6 +89,7 @@@
  #define IWL_22000_SO_A_HR_B_FW_PRE      "iwlwifi-so-a0-hr-b0-"
  #define IWL_22000_SO_A_GF_A_FW_PRE      "iwlwifi-so-a0-gf-a0-"
  #define IWL_22000_TY_A_GF_A_FW_PRE      "iwlwifi-ty-a0-gf-a0-"
 +#define IWL_22000_SO_A_GF4_A_FW_PRE     "iwlwifi-so-a0-gf4-a0-"
  
  #define IWL_22000_HR_MODULE_FIRMWARE(api) \
  	IWL_22000_HR_FW_PRE __stringify(api) ".ucode"
@@@ -181,11 -180,7 +181,11 @@@ static const struct iwl_ht_params iwl_2
  	.dbgc_supported = true,						\
  	.min_umac_error_event_table = 0x400000,				\
  	.d3_debug_data_base_addr = 0x401000,				\
 -	.d3_debug_data_length = 60 * 1024
 +	.d3_debug_data_length = 60 * 1024,				\
 +	.fw_mon_smem_write_ptr_addr = 0xa0c16c,				\
 +	.fw_mon_smem_write_ptr_msk = 0xfffff,				\
 +	.fw_mon_smem_cycle_cnt_ptr_addr = 0xa0c174,			\
 +	.fw_mon_smem_cycle_cnt_ptr_msk = 0xfffff
  
  #define IWL_DEVICE_AX200_COMMON						\
  	IWL_DEVICE_22000_COMMON,					\
@@@ -195,8 -190,7 +195,8 @@@
  	IWL_DEVICE_22000_COMMON,					\
  	.device_family = IWL_DEVICE_FAMILY_22000,			\
  	.base_params = &iwl_22000_base_params,				\
 -	.csr = &iwl_csr_v1
 +	.csr = &iwl_csr_v1,						\
 +	.gp2_reg_addr = 0xa02c68
  
  #define IWL_DEVICE_22560						\
  	IWL_DEVICE_22000_COMMON,					\
@@@ -207,11 -201,9 +207,11 @@@
  #define IWL_DEVICE_AX210						\
  	IWL_DEVICE_AX200_COMMON,					\
  	.device_family = IWL_DEVICE_FAMILY_AX210,			\
- 	.base_params = &iwl_22000_base_params,				\
+ 	.base_params = &iwl_22560_base_params,				\
  	.csr = &iwl_csr_v1,						\
 -	.min_txq_size = 128
 +	.min_txq_size = 128,						\
 +	.gp2_reg_addr = 0xd02c68,					\
 +	.min_256_ba_txq_size = 512
  
  const struct iwl_cfg iwl22000_2ac_cfg_hr = {
  	.name = "Intel(R) Dual Band Wireless AC 22000",
@@@ -448,20 -440,12 +448,20 @@@ const struct iwl_cfg iwlax210_2ax_cfg_s
  const struct iwl_cfg iwlax210_2ax_cfg_so_gf_a0 = {
  	.name = "Intel(R) Wi-Fi 7 AX211 160MHz",
  	.fw_name_pre = IWL_22000_SO_A_GF_A_FW_PRE,
 +	.uhb_supported = true,
  	IWL_DEVICE_AX210,
  };
  
  const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0 = {
  	.name = "Intel(R) Wi-Fi 7 AX210 160MHz",
  	.fw_name_pre = IWL_22000_TY_A_GF_A_FW_PRE,
 +	.uhb_supported = true,
 +	IWL_DEVICE_AX210,
 +};
 +
 +const struct iwl_cfg iwlax210_2ax_cfg_so_gf4_a0 = {
 +	.name = "Intel(R) Wi-Fi 7 AX210 160MHz",
 +	.fw_name_pre = IWL_22000_SO_A_GF4_A_FW_PRE,
  	IWL_DEVICE_AX210,
  };
  
diff --combined drivers/net/wireless/intel/iwlwifi/fw/file.h
index abfdcabdcbf7,e06407dc088b..cd622af90077
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@@ -93,7 -93,7 +93,7 @@@ struct iwl_ucode_header 
  	} u;
  };
  
- #define IWL_UCODE_INI_TLV_GROUP	BIT(24)
+ #define IWL_UCODE_INI_TLV_GROUP	0x1000000
  
  /*
   * new TLV uCode file layout
@@@ -148,11 -148,14 +148,14 @@@ enum iwl_ucode_tlv_type 
  	IWL_UCODE_TLV_UMAC_DEBUG_ADDRS	= 54,
  	IWL_UCODE_TLV_LMAC_DEBUG_ADDRS	= 55,
  	IWL_UCODE_TLV_FW_RECOVERY_INFO	= 57,
- 	IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION	= IWL_UCODE_INI_TLV_GROUP | 0x1,
- 	IWL_UCODE_TLV_TYPE_HCMD			= IWL_UCODE_INI_TLV_GROUP | 0x2,
- 	IWL_UCODE_TLV_TYPE_REGIONS		= IWL_UCODE_INI_TLV_GROUP | 0x3,
- 	IWL_UCODE_TLV_TYPE_TRIGGERS		= IWL_UCODE_INI_TLV_GROUP | 0x4,
- 	IWL_UCODE_TLV_TYPE_DEBUG_FLOW		= IWL_UCODE_INI_TLV_GROUP | 0x5,
+ 
+ 	IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION	= IWL_UCODE_INI_TLV_GROUP + 0x1,
+ 	IWL_UCODE_TLV_DEBUG_BASE = IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION,
+ 	IWL_UCODE_TLV_TYPE_HCMD			= IWL_UCODE_INI_TLV_GROUP + 0x2,
+ 	IWL_UCODE_TLV_TYPE_REGIONS		= IWL_UCODE_INI_TLV_GROUP + 0x3,
+ 	IWL_UCODE_TLV_TYPE_TRIGGERS		= IWL_UCODE_INI_TLV_GROUP + 0x4,
+ 	IWL_UCODE_TLV_TYPE_DEBUG_FLOW		= IWL_UCODE_INI_TLV_GROUP + 0x5,
+ 	IWL_UCODE_TLV_DEBUG_MAX = IWL_UCODE_TLV_TYPE_DEBUG_FLOW,
  
  	/* TLVs 0x1000-0x2000 are for internal driver usage */
  	IWL_UCODE_TLV_FW_DBG_DUMP_LST	= 0x1000,
@@@ -272,15 -275,8 +275,15 @@@ typedef unsigned int __bitwise iwl_ucod
   *	version of the beacon notification.
   * @IWL_UCODE_TLV_API_BEACON_FILTER_V4: This ucode supports v4 of
   *	BEACON_FILTER_CONFIG_API_S_VER_4.
 + * @IWL_UCODE_TLV_API_REGULATORY_NVM_INFO: This ucode supports v4 of
 + *	REGULATORY_NVM_GET_INFO_RSP_API_S.
   * @IWL_UCODE_TLV_API_FTM_NEW_RANGE_REQ: This ucode supports v7 of
   *	LOCATION_RANGE_REQ_CMD_API_S and v6 of LOCATION_RANGE_RESP_NTFY_API_S.
 + * @IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS: This ucode supports v2 of
 + *	SCAN_OFFLOAD_PROFILE_MATCH_RESULTS_S and v3 of
 + *	SCAN_OFFLOAD_PROFILES_QUERY_RSP_S.
 + * @IWL_UCODE_TLV_API_MBSSID_HE: This ucode supports v2 of
 + *	STA_CONTEXT_DOT11AX_API_S
   *
   * @NUM_IWL_UCODE_TLV_API: number of bits used
   */
@@@ -307,10 -303,7 +310,10 @@@ enum iwl_ucode_tlv_api 
  	IWL_UCODE_TLV_API_REDUCE_TX_POWER	= (__force iwl_ucode_tlv_api_t)45,
  	IWL_UCODE_TLV_API_SHORT_BEACON_NOTIF	= (__force iwl_ucode_tlv_api_t)46,
  	IWL_UCODE_TLV_API_BEACON_FILTER_V4      = (__force iwl_ucode_tlv_api_t)47,
 +	IWL_UCODE_TLV_API_REGULATORY_NVM_INFO   = (__force iwl_ucode_tlv_api_t)48,
  	IWL_UCODE_TLV_API_FTM_NEW_RANGE_REQ     = (__force iwl_ucode_tlv_api_t)49,
 +	IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS    = (__force iwl_ucode_tlv_api_t)50,
 +	IWL_UCODE_TLV_API_MBSSID_HE		= (__force iwl_ucode_tlv_api_t)52,
  
  	NUM_IWL_UCODE_TLV_API
  #ifdef __CHECKER__
@@@ -360,7 -353,6 +363,7 @@@ typedef unsigned int __bitwise iwl_ucod
   * IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD: firmware supports CSA command
   * @IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS: firmware supports ultra high band
   *	(6 GHz).
 + * @IWL_UCODE_TLV_CAPA_CS_MODIFY: firmware supports modify action CSA command
   * @IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE: extended DTS measurement
   * @IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS: supports short PM timeouts
   * @IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT: supports bt-coex Multi-priority LUT
@@@ -431,7 -423,6 +434,7 @@@ enum iwl_ucode_tlv_capa 
  	IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD		= (__force iwl_ucode_tlv_capa_t)46,
  	IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS		= (__force iwl_ucode_tlv_capa_t)48,
  	IWL_UCODE_TLV_CAPA_FTM_CALIBRATED		= (__force iwl_ucode_tlv_capa_t)47,
 +	IWL_UCODE_TLV_CAPA_CS_MODIFY			= (__force iwl_ucode_tlv_capa_t)49,
  
  	/* set 2 */
  	IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE		= (__force iwl_ucode_tlv_capa_t)64,
diff --combined drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 9107302cc444,c7070760a10a..0e8664375298
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@@ -5,7 -5,7 +5,7 @@@
   *
   * GPL LICENSE SUMMARY
   *
 - * Copyright (C) 2018 Intel Corporation
 + * Copyright (C) 2018 - 2019 Intel Corporation
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of version 2 of the GNU General Public License as
@@@ -28,7 -28,7 +28,7 @@@
   *
   * BSD LICENSE
   *
 - * Copyright (C) 2018 Intel Corporation
 + * Copyright (C) 2018 - 2019 Intel Corporation
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@@ -73,9 -73,6 +73,9 @@@ void iwl_fw_dbg_copy_tlv(struct iwl_tra
  	int copy_size = le32_to_cpu(tlv->length) + sizeof(*tlv);
  	int offset_size = copy_size;
  
 +	if (le32_to_cpu(header->tlv_version) != 1)
 +		return;
 +
  	if (WARN_ONCE(apply_point >= IWL_FW_INI_APPLY_NUM,
  		      "Invalid apply point id %d\n", apply_point))
  		return;
@@@ -129,15 -126,13 +129,16 @@@ void iwl_alloc_dbg_tlv(struct iwl_tran
  		len -= ALIGN(tlv_len, 4);
  		data += sizeof(*tlv) + ALIGN(tlv_len, 4);
  
- 		if (!(tlv_type & IWL_UCODE_INI_TLV_GROUP))
+ 		if (tlv_type < IWL_UCODE_TLV_DEBUG_BASE ||
+ 		    tlv_type > IWL_UCODE_TLV_DEBUG_MAX)
  			continue;
  
  		hdr = (void *)&tlv->data[0];
  		apply = le32_to_cpu(hdr->apply_point);
  
 +		if (le32_to_cpu(hdr->tlv_version) != 1)
 +			continue;
 +
  		IWL_DEBUG_FW(trans, "Read TLV %x, apply point %d\n",
  			     le32_to_cpu(tlv->type), apply);
  
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
index 9bf2407c9b4b,6925527d8457..f043eefabb4e
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c
@@@ -743,8 -743,9 +743,8 @@@ static ssize_t iwl_dbgfs_quota_min_read
  #define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \
  	_MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz, struct ieee80211_vif)
  #define MVM_DEBUGFS_ADD_FILE_VIF(name, parent, mode) do {		\
 -		if (!debugfs_create_file(#name, mode, parent, vif,	\
 -					 &iwl_dbgfs_##name##_ops))	\
 -			goto err;					\
 +		debugfs_create_file(#name, mode, parent, vif,		\
 +				    &iwl_dbgfs_##name##_ops);		\
  	} while (0)
  
  MVM_DEBUGFS_READ_FILE_OPS(mac_params);
@@@ -773,6 -774,11 +773,11 @@@ void iwl_mvm_vif_dbgfs_register(struct 
  		return;
  
  	mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir);
+ 	if (IS_ERR_OR_NULL(mvmvif->dbgfs_dir)) {
+ 		IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n",
+ 			dbgfs_dir);
+ 		return;
+ 	}
  
  	if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM &&
  	    ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) ||
@@@ -805,6 -811,12 +810,6 @@@
  
  	mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name,
  						     mvm->debugfs_dir, buf);
 -	if (!mvmvif->dbgfs_slink)
 -		IWL_ERR(mvm, "Can't create debugfs symbolic link under %pd\n",
 -			dbgfs_dir);
 -	return;
 -err:
 -	IWL_ERR(mvm, "Can't create debugfs entity\n");
  }
  
  void iwl_mvm_vif_dbgfs_clean(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index 55d399899d1c,13681b03c10e..8da9e5572fcf
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@@ -8,7 -8,7 +8,7 @@@
   * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
   * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
   * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
 - * Copyright(c) 2018        Intel Corporation
 + * Copyright(c) 2018 - 2019 Intel Corporation
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of version 2 of the GNU General Public License as
@@@ -31,7 -31,7 +31,7 @@@
   * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
   * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
   * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
 - * Copyright(c) 2018        Intel Corporation
 + * Copyright(c) 2018 - 2019 Intel Corporation
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@@ -834,7 -834,7 +834,7 @@@ iwl_op_mode_mvm_start(struct iwl_trans 
  	mutex_lock(&mvm->mutex);
  	iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE);
  	err = iwl_run_init_mvm_ucode(mvm, true);
- 	if (err)
+ 	if (err && err != -ERFKILL)
  		iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER);
  	if (!iwlmvm_mod_params.init_dbg || !err)
  		iwl_mvm_stop_device(mvm);
@@@ -862,7 -862,9 +862,7 @@@
  	min_backoff = iwl_mvm_min_backoff(mvm);
  	iwl_mvm_thermal_initialize(mvm, min_backoff);
  
 -	err = iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
 -	if (err)
 -		goto out_unregister;
 +	iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
  
  	if (!iwl_mvm_has_new_rx_stats_api(mvm))
  		memset(&mvm->rx_stats_v3, 0,
@@@ -879,6 -881,14 +879,6 @@@
  
  	return op_mode;
  
 - out_unregister:
 -	if (iwlmvm_mod_params.init_dbg)
 -		return op_mode;
 -
 -	ieee80211_unregister_hw(mvm->hw);
 -	mvm->hw_registered = false;
 -	iwl_mvm_leds_exit(mvm);
 -	iwl_mvm_thermal_exit(mvm);
   out_free:
  	iwl_fw_flush_dump(&mvm->fwrt);
  	iwl_fw_runtime_free(&mvm->fwrt);
@@@ -1095,7 -1105,7 +1095,7 @@@ static void iwl_mvm_rx_mq(struct iwl_op
  	else if (cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE))
  		iwl_mvm_rx_frame_release(mvm, napi, rxb, 0);
  	else if (cmd == WIDE_ID(DATA_PATH_GROUP, RX_NO_DATA_NOTIF))
 -		iwl_mvm_rx_monitor_ndp(mvm, napi, rxb, 0);
 +		iwl_mvm_rx_monitor_no_data(mvm, napi, rxb, 0);
  	else
  		iwl_mvm_rx_common(mvm, rxb, pkt);
  }
@@@ -1281,7 -1291,8 +1281,7 @@@ void iwl_mvm_nic_restart(struct iwl_mv
  	 * can't recover this since we're already half suspended.
  	 */
  	if (!mvm->fw_restart && fw_error) {
 -		iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert,
 -					false, 0);
 +		iwl_fw_error_collect(&mvm->fwrt);
  	} else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) {
  		struct iwl_mvm_reprobe *reprobe;
  
@@@ -1329,8 -1340,6 +1329,8 @@@
  			}
  		}
  
 +		iwl_fw_error_collect(&mvm->fwrt);
 +
  		if (fw_error && mvm->fw_restart > 0)
  			mvm->fw_restart--;
  		set_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status);
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
index 0b1b208de767,b516fd1867ec..1824566d08fc
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c
@@@ -8,7 -8,7 +8,7 @@@
   * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
   * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
   * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
 - * Copyright(c) 2018 Intel Corporation
 + * Copyright(c) 2018 - 2019 Intel Corporation
   *
   * This program is free software; you can redistribute it and/or modify
   * it under the terms of version 2 of the GNU General Public License as
@@@ -31,7 -31,7 +31,7 @@@
   * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
   * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
   * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
 - * Copyright(c) 2018 Intel Corporation
 + * Copyright(c) 2018 - 2019 Intel Corporation
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
@@@ -169,9 -169,9 +169,9 @@@ static inline int iwl_mvm_check_pn(stru
  }
  
  /* iwl_mvm_create_skb Adds the rxb to a new skb */
- static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr,
- 			       u16 len, u8 crypt_len,
- 			       struct iwl_rx_cmd_buffer *rxb)
+ static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb,
+ 			      struct ieee80211_hdr *hdr, u16 len, u8 crypt_len,
+ 			      struct iwl_rx_cmd_buffer *rxb)
  {
  	struct iwl_rx_packet *pkt = rxb_addr(rxb);
  	struct iwl_rx_mpdu_desc *desc = (void *)pkt->data;
@@@ -204,6 -204,20 +204,20 @@@
  	 * present before copying packet data.
  	 */
  	hdrlen += crypt_len;
+ 
+ 	if (WARN_ONCE(headlen < hdrlen,
+ 		      "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n",
+ 		      hdrlen, len, crypt_len)) {
+ 		/*
+ 		 * We warn and trace because we want to be able to see
+ 		 * it in trace-cmd as well.
+ 		 */
+ 		IWL_DEBUG_RX(mvm,
+ 			     "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n",
+ 			     hdrlen, len, crypt_len);
+ 		return -EINVAL;
+ 	}
+ 
  	skb_put_data(skb, hdr, hdrlen);
  	skb_put_data(skb, (u8 *)hdr + hdrlen + pad_len, headlen - hdrlen);
  
@@@ -216,6 -230,8 +230,8 @@@
  		skb_add_rx_frag(skb, 0, rxb_steal_page(rxb), offset,
  				fraglen, rxb->truesize);
  	}
+ 
+ 	return 0;
  }
  
  static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm,
@@@ -1671,7 -1687,11 +1687,11 @@@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm 
  			rx_status->boottime_ns = ktime_get_boot_ns();
  	}
  
- 	iwl_mvm_create_skb(skb, hdr, len, crypt_len, rxb);
+ 	if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) {
+ 		kfree_skb(skb);
+ 		goto out;
+ 	}
+ 
  	if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc))
  		iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue,
  						sta, csi);
@@@ -1679,8 -1699,8 +1699,8 @@@ out
  	rcu_read_unlock();
  }
  
 -void iwl_mvm_rx_monitor_ndp(struct iwl_mvm *mvm, struct napi_struct *napi,
 -			    struct iwl_rx_cmd_buffer *rxb, int queue)
 +void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi,
 +				struct iwl_rx_cmd_buffer *rxb, int queue)
  {
  	struct ieee80211_rx_status *rx_status;
  	struct iwl_rx_packet *pkt = rxb_addr(rxb);
@@@ -1701,6 -1721,10 +1721,6 @@@
  	if (unlikely(test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)))
  		return;
  
 -	/* Currently only NDP type is supported */
 -	if (info_type != RX_NO_DATA_INFO_TYPE_NDP)
 -		return;
 -
  	energy_a = (rssi & RX_NO_DATA_CHAIN_A_MSK) >> RX_NO_DATA_CHAIN_A_POS;
  	energy_b = (rssi & RX_NO_DATA_CHAIN_B_MSK) >> RX_NO_DATA_CHAIN_B_POS;
  	channel = (rssi & RX_NO_DATA_CHANNEL_MSK) >> RX_NO_DATA_CHANNEL_POS;
@@@ -1722,22 -1746,9 +1742,22 @@@
  
  	/* 0-length PSDU */
  	rx_status->flag |= RX_FLAG_NO_PSDU;
 -	/* currently this is the only type for which we get this notif */
 -	rx_status->zero_length_psdu_type =
 -		IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING;
 +
 +	switch (info_type) {
 +	case RX_NO_DATA_INFO_TYPE_NDP:
 +		rx_status->zero_length_psdu_type =
 +			IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING;
 +		break;
 +	case RX_NO_DATA_INFO_TYPE_MU_UNMATCHED:
 +	case RX_NO_DATA_INFO_TYPE_HE_TB_UNMATCHED:
 +		rx_status->zero_length_psdu_type =
 +			IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED;
 +		break;
 +	default:
 +		rx_status->zero_length_psdu_type =
 +			IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR;
 +		break;
 +	}
  
  	/* This may be overridden by iwl_mvm_rx_he() to HE_RU */
  	switch (rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK) {
diff --combined drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index c5baaae8d38e,c4375b868901..cccb8bbd7ea7
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@@ -2442,8 -2442,9 +2442,8 @@@ void iwl_pcie_dump_csr(struct iwl_tran
  #ifdef CONFIG_IWLWIFI_DEBUGFS
  /* create and remove of files */
  #define DEBUGFS_ADD_FILE(name, parent, mode) do {			\
 -	if (!debugfs_create_file(#name, mode, parent, trans,		\
 -				 &iwl_dbgfs_##name##_ops))		\
 -		goto err;						\
 +	debugfs_create_file(#name, mode, parent, trans,			\
 +			    &iwl_dbgfs_##name##_ops);			\
  } while (0)
  
  /* file operation */
@@@ -2846,7 -2847,7 +2846,7 @@@ static const struct file_operations iwl
  };
  
  /* Create the debugfs files and directories */
 -int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans)
 +void iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans)
  {
  	struct dentry *dir = trans->dbgfs_dir;
  
@@@ -2857,6 -2858,11 +2857,6 @@@
  	DEBUGFS_ADD_FILE(fh_reg, dir, 0400);
  	DEBUGFS_ADD_FILE(rfkill, dir, 0600);
  	DEBUGFS_ADD_FILE(monitor_data, dir, 0400);
 -	return 0;
 -
 -err:
 -	IWL_ERR(trans, "failed to create the trans debugfs entry\n");
 -	return -ENOMEM;
  }
  
  static void iwl_trans_pcie_debugfs_cleanup(struct iwl_trans *trans)
@@@ -3006,14 -3012,10 +3006,14 @@@ static voi
  iwl_trans_pcie_dump_pointers(struct iwl_trans *trans,
  			     struct iwl_fw_error_dump_fw_mon *fw_mon_data)
  {
 -	u32 base, write_ptr, wrap_cnt;
 +	u32 base, base_high, write_ptr, write_ptr_val, wrap_cnt;
  
 -	/* If there was a dest TLV - use the values from there */
 -	if (trans->ini_valid) {
 +	if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
 +		base = DBGC_CUR_DBGBUF_BASE_ADDR_LSB;
 +		base_high = DBGC_CUR_DBGBUF_BASE_ADDR_MSB;
 +		write_ptr = DBGC_CUR_DBGBUF_STATUS;
 +		wrap_cnt = DBGC_DBGBUF_WRAP_AROUND;
 +	} else if (trans->ini_valid) {
  		base = iwl_umac_prph(trans, MON_BUFF_BASE_ADDR_VER2);
  		write_ptr = iwl_umac_prph(trans, MON_BUFF_WRPTR_VER2);
  		wrap_cnt = iwl_umac_prph(trans, MON_BUFF_CYCLE_CNT_VER2);
@@@ -3026,18 -3028,12 +3026,18 @@@
  		write_ptr = MON_BUFF_WRPTR;
  		wrap_cnt = MON_BUFF_CYCLE_CNT;
  	}
 -	fw_mon_data->fw_mon_wr_ptr =
 -		cpu_to_le32(iwl_read_prph(trans, write_ptr));
 +
 +	write_ptr_val = iwl_read_prph(trans, write_ptr);
  	fw_mon_data->fw_mon_cycle_cnt =
  		cpu_to_le32(iwl_read_prph(trans, wrap_cnt));
  	fw_mon_data->fw_mon_base_ptr =
  		cpu_to_le32(iwl_read_prph(trans, base));
 +	if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
 +		fw_mon_data->fw_mon_base_high_ptr =
 +			cpu_to_le32(iwl_read_prph(trans, base_high));
 +		write_ptr_val &= DBGC_CUR_DBGBUF_STATUS_OFFSET_MSK;
 +	}
 +	fw_mon_data->fw_mon_wr_ptr = cpu_to_le32(write_ptr_val);
  }
  
  static u32
@@@ -3048,10 -3044,9 +3048,10 @@@ iwl_trans_pcie_dump_monitor(struct iwl_
  	u32 len = 0;
  
  	if ((trans->num_blocks &&
 -	     trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) ||
 -	     (trans->dbg_dest_tlv && !trans->ini_valid) ||
 -	     (trans->ini_valid && trans->num_blocks)) {
 +	     (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000 ||
 +	      trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210 ||
 +	      trans->ini_valid)) ||
 +	    (trans->dbg_dest_tlv && !trans->ini_valid)) {
  		struct iwl_fw_error_dump_fw_mon *fw_mon_data;
  
  		(*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_FW_MONITOR);
@@@ -3170,10 -3165,8 +3170,10 @@@ static struct iwl_trans_dump_dat
  	len = sizeof(*dump_data);
  
  	/* host commands */
 -	len += sizeof(*data) +
 -		cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE);
 +	if (dump_mask & BIT(IWL_FW_ERROR_DUMP_TXCMD))
 +		len += sizeof(*data) +
 +			cmdq->n_window * (sizeof(*txcmd) +
 +					  TFD_MAX_PAYLOAD_SIZE);
  
  	/* FW monitor */
  	if (dump_mask & BIT(IWL_FW_ERROR_DUMP_FW_MONITOR))
@@@ -3547,9 -3540,6 +3547,9 @@@ struct iwl_trans *iwl_trans_pcie_alloc(
  		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
  			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF)) {
  			trans->cfg = &iwlax210_2ax_cfg_so_gf_a0;
 +		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
 +			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF4)) {
 +			trans->cfg = &iwlax210_2ax_cfg_so_gf4_a0;
  		}
  	} else if (cfg == &iwl_ax101_cfg_qu_hr) {
  		if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) ==
@@@ -3654,20 -3644,27 +3654,27 @@@ out_no_pci
  
  void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans)
  {
+ 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
  	unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT;
+ 	u32 inta_addr, sw_err_bit;
+ 
+ 	if (trans_pcie->msix_enabled) {
+ 		inta_addr = CSR_MSIX_HW_INT_CAUSES_AD;
+ 		sw_err_bit = MSIX_HW_INT_CAUSES_REG_SW_ERR;
+ 	} else {
+ 		inta_addr = CSR_INT;
+ 		sw_err_bit = CSR_INT_BIT_SW_ERR;
+ 	}
  
  	iwl_disable_interrupts(trans);
  	iwl_force_nmi(trans);
  	while (time_after(timeout, jiffies)) {
- 		u32 inta_hw = iwl_read32(trans,
- 					 CSR_MSIX_HW_INT_CAUSES_AD);
+ 		u32 inta_hw = iwl_read32(trans, inta_addr);
  
  		/* Error detected by uCode */
- 		if (inta_hw & MSIX_HW_INT_CAUSES_REG_SW_ERR) {
+ 		if (inta_hw & sw_err_bit) {
  			/* Clear causes register */
- 			iwl_write32(trans, CSR_MSIX_HW_INT_CAUSES_AD,
- 				    inta_hw &
- 				    MSIX_HW_INT_CAUSES_REG_SW_ERR);
+ 			iwl_write32(trans, inta_addr, inta_hw & sw_err_bit);
  			break;
  		}
  
diff --combined fs/proc/proc_sysctl.c
index 2d61e5e8c863,7325baa8f9d4..c74570736b24
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@@ -13,7 -13,6 +13,7 @@@
  #include <linux/namei.h>
  #include <linux/mm.h>
  #include <linux/module.h>
 +#include <linux/bpf-cgroup.h>
  #include "internal.h"
  
  static const struct dentry_operations proc_sys_dentry_operations;
@@@ -570,8 -569,8 +570,8 @@@ static ssize_t proc_sys_call_handler(st
  	struct inode *inode = file_inode(filp);
  	struct ctl_table_header *head = grab_header(inode);
  	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
 +	void *new_buf = NULL;
  	ssize_t error;
 -	size_t res;
  
  	if (IS_ERR(head))
  		return PTR_ERR(head);
@@@ -589,27 -588,11 +589,27 @@@
  	if (!table->proc_handler)
  		goto out;
  
 +	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
 +					   ppos, &new_buf);
 +	if (error)
 +		goto out;
 +
  	/* careful: calling conventions are nasty here */
 -	res = count;
 -	error = table->proc_handler(table, write, buf, &res, ppos);
 +	if (new_buf) {
 +		mm_segment_t old_fs;
 +
 +		old_fs = get_fs();
 +		set_fs(KERNEL_DS);
 +		error = table->proc_handler(table, write, (void __user *)new_buf,
 +					    &count, ppos);
 +		set_fs(old_fs);
 +		kfree(new_buf);
 +	} else {
 +		error = table->proc_handler(table, write, buf, &count, ppos);
 +	}
 +
  	if (!error)
 -		error = res;
 +		error = count;
  out:
  	sysctl_head_finish(head);
  
@@@ -1643,9 -1626,11 +1643,11 @@@ static void drop_sysctl_table(struct ct
  	if (--header->nreg)
  		return;
  
- 	if (parent)
+ 	if (parent) {
  		put_links(header);
- 	start_unregistering(header);
+ 		start_unregistering(header);
+ 	}
+ 
  	if (!--header->count)
  		kfree_rcu(header, rcu);
  
diff --combined include/linux/bpf.h
index 9a21848fdb07,944ccc310201..59631dd0777c
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@@ -57,12 -57,6 +57,12 @@@ struct bpf_map_ops 
  			     const struct btf *btf,
  			     const struct btf_type *key_type,
  			     const struct btf_type *value_type);
 +
 +	/* Direct value access helpers. */
 +	int (*map_direct_value_addr)(const struct bpf_map *map,
 +				     u64 *imm, u32 off);
 +	int (*map_direct_value_meta)(const struct bpf_map *map,
 +				     u64 imm, u32 *off);
  };
  
  struct bpf_map {
@@@ -87,8 -81,7 +87,8 @@@
  	struct btf *btf;
  	u32 pages;
  	bool unpriv_array;
 -	/* 51 bytes hole */
 +	bool frozen; /* write-once */
 +	/* 48 bytes hole */
  
  	/* The 3rd and 4th cacheline with misc members to avoid false sharing
  	 * particularly with refcounting.
@@@ -184,7 -177,6 +184,7 @@@ enum bpf_arg_type 
  	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
  	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
  	ARG_PTR_TO_UNINIT_MAP_VALUE,	/* pointer to valid memory used to store a map value */
 +	ARG_PTR_TO_MAP_VALUE_OR_NULL,	/* pointer to stack used as map value or NULL */
  
  	/* the following constraints used to prototype bpf_memcmp() and other
  	 * functions that access data on eBPF program stack
@@@ -203,9 -195,6 +203,9 @@@
  	ARG_ANYTHING,		/* any (initialized) argument is ok */
  	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
  	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
 +	ARG_PTR_TO_INT,		/* pointer to int */
 +	ARG_PTR_TO_LONG,	/* pointer to long */
 +	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
  };
  
  /* type of values returned from helper functions */
@@@ -216,7 -205,6 +216,7 @@@ enum bpf_return_type 
  	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
  	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
  	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
 +	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
  };
  
  /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@@ -274,7 -262,6 +274,7 @@@ enum bpf_reg_type 
  	PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
  	PTR_TO_TCP_SOCK,	 /* reg points to struct tcp_sock */
  	PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
 +	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
  };
  
  /* The information passed from prog-specific *_is_valid_access
@@@ -364,7 -351,6 +364,7 @@@ struct bpf_prog_aux 
  	u32 used_map_cnt;
  	u32 max_ctx_offset;
  	u32 max_pkt_offset;
 +	u32 max_tp_access;
  	u32 stack_depth;
  	u32 id;
  	u32 func_cnt; /* used by non-func prog as the number of func progs */
@@@ -434,38 -420,8 +434,38 @@@ struct bpf_array 
  	};
  };
  
 +#define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
  #define MAX_TAIL_CALL_CNT 32
  
 +#define BPF_F_ACCESS_MASK	(BPF_F_RDONLY |		\
 +				 BPF_F_RDONLY_PROG |	\
 +				 BPF_F_WRONLY |		\
 +				 BPF_F_WRONLY_PROG)
 +
 +#define BPF_MAP_CAN_READ	BIT(0)
 +#define BPF_MAP_CAN_WRITE	BIT(1)
 +
 +static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
 +{
 +	u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
 +
 +	/* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is
 +	 * not possible.
 +	 */
 +	if (access_flags & BPF_F_RDONLY_PROG)
 +		return BPF_MAP_CAN_READ;
 +	else if (access_flags & BPF_F_WRONLY_PROG)
 +		return BPF_MAP_CAN_WRITE;
 +	else
 +		return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE;
 +}
 +
 +static inline bool bpf_map_flags_access_ok(u32 access_flags)
 +{
 +	return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) !=
 +	       (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
 +}
 +
  struct bpf_event_entry {
  	struct perf_event *event;
  	struct file *perf_file;
@@@ -489,6 -445,14 +489,6 @@@ typedef u32 (*bpf_convert_ctx_access_t)
  u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
  		     void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
  
 -int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 -			  union bpf_attr __user *uattr);
 -int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 -			  union bpf_attr __user *uattr);
 -int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 -				     const union bpf_attr *kattr,
 -				     union bpf_attr __user *uattr);
 -
  /* an array of programs to be executed under rcu_lock.
   *
   * Typical usage:
@@@ -546,7 -510,7 +546,7 @@@ int bpf_prog_array_copy(struct bpf_prog
  		}					\
  _out:							\
  		rcu_read_unlock();			\
- 		preempt_enable_no_resched();		\
+ 		preempt_enable();			\
  		_ret;					\
  	 })
  
@@@ -679,13 -643,6 +679,13 @@@ static inline int bpf_map_attr_numa_nod
  struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
  int array_map_alloc_check(union bpf_attr *attr);
  
 +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 +			  union bpf_attr __user *uattr);
 +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 +			  union bpf_attr __user *uattr);
 +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 +				     const union bpf_attr *kattr,
 +				     union bpf_attr __user *uattr);
  #else /* !CONFIG_BPF_SYSCALL */
  static inline struct bpf_prog *bpf_prog_get(u32 ufd)
  {
@@@ -797,27 -754,6 +797,27 @@@ static inline struct bpf_prog *bpf_prog
  {
  	return ERR_PTR(-EOPNOTSUPP);
  }
 +
 +static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog,
 +					const union bpf_attr *kattr,
 +					union bpf_attr __user *uattr)
 +{
 +	return -ENOTSUPP;
 +}
 +
 +static inline int bpf_prog_test_run_skb(struct bpf_prog *prog,
 +					const union bpf_attr *kattr,
 +					union bpf_attr __user *uattr)
 +{
 +	return -ENOTSUPP;
 +}
 +
 +static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 +						   const union bpf_attr *kattr,
 +						   union bpf_attr __user *uattr)
 +{
 +	return -ENOTSUPP;
 +}
  #endif /* CONFIG_BPF_SYSCALL */
  
  static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@@ -993,8 -929,6 +993,8 @@@ extern const struct bpf_func_proto bpf_
  extern const struct bpf_func_proto bpf_spin_lock_proto;
  extern const struct bpf_func_proto bpf_spin_unlock_proto;
  extern const struct bpf_func_proto bpf_get_local_storage_proto;
 +extern const struct bpf_func_proto bpf_strtol_proto;
 +extern const struct bpf_func_proto bpf_strtoul_proto;
  
  /* Shared helpers among cBPF and eBPF. */
  void bpf_user_rnd_init_once(void);
diff --combined include/net/xfrm.h
index eb5018b1cf9c,c9b0b2b5d672..debcc5198e33
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@@ -132,17 -132,6 +132,17 @@@ struct xfrm_state_offload 
  	u8			flags;
  };
  
 +struct xfrm_mode {
 +	u8 encap;
 +	u8 family;
 +	u8 flags;
 +};
 +
 +/* Flags for xfrm_mode. */
 +enum {
 +	XFRM_MODE_FLAG_TUNNEL = 1,
 +};
 +
  /* Full description of state of transformer. */
  struct xfrm_state {
  	possible_net_t		xs_net;
@@@ -245,9 -234,9 +245,9 @@@
  	/* Reference to data common to all the instances of this
  	 * transformer. */
  	const struct xfrm_type	*type;
 -	struct xfrm_mode	*inner_mode;
 -	struct xfrm_mode	*inner_mode_iaf;
 -	struct xfrm_mode	*outer_mode;
 +	struct xfrm_mode	inner_mode;
 +	struct xfrm_mode	inner_mode_iaf;
 +	struct xfrm_mode	outer_mode;
  
  	const struct xfrm_type_offload	*type_offload;
  
@@@ -306,7 -295,8 +306,8 @@@ struct xfrm_replay 
  };
  
  struct xfrm_if_cb {
- 	struct xfrm_if	*(*decode_session)(struct sk_buff *skb);
+ 	struct xfrm_if	*(*decode_session)(struct sk_buff *skb,
+ 					   unsigned short family);
  };
  
  void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb);
@@@ -326,6 -316,13 +327,6 @@@ struct xfrm_policy_afinfo 
  					     xfrm_address_t *saddr,
  					     xfrm_address_t *daddr,
  					     u32 mark);
 -	void			(*decode_session)(struct sk_buff *skb,
 -						  struct flowi *fl,
 -						  int reverse);
 -	int			(*get_tos)(const struct flowi *fl);
 -	int			(*init_path)(struct xfrm_dst *path,
 -					     struct dst_entry *dst,
 -					     int nfheader_len);
  	int			(*fill_dst)(struct xfrm_dst *xdst,
  					    struct net_device *dev,
  					    const struct flowi *fl);
@@@ -351,6 -348,7 +352,6 @@@ struct xfrm_state_afinfo 
  	struct module			*owner;
  	const struct xfrm_type		*type_map[IPPROTO_MAX];
  	const struct xfrm_type_offload	*type_offload_map[IPPROTO_MAX];
 -	struct xfrm_mode		*mode_map[XFRM_MODE_MAX];
  
  	int			(*init_flags)(struct xfrm_state *x);
  	void			(*init_tempsel)(struct xfrm_selector *sel,
@@@ -425,6 -423,78 +426,6 @@@ struct xfrm_type_offload 
  int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family);
  int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
  
 -struct xfrm_mode {
 -	/*
 -	 * Remove encapsulation header.
 -	 *
 -	 * The IP header will be moved over the top of the encapsulation
 -	 * header.
 -	 *
 -	 * On entry, the transport header shall point to where the IP header
 -	 * should be and the network header shall be set to where the IP
 -	 * header currently is.  skb->data shall point to the start of the
 -	 * payload.
 -	 */
 -	int (*input2)(struct xfrm_state *x, struct sk_buff *skb);
 -
 -	/*
 -	 * This is the actual input entry point.
 -	 *
 -	 * For transport mode and equivalent this would be identical to
 -	 * input2 (which does not need to be set).  While tunnel mode
 -	 * and equivalent would set this to the tunnel encapsulation function
 -	 * xfrm4_prepare_input that would in turn call input2.
 -	 */
 -	int (*input)(struct xfrm_state *x, struct sk_buff *skb);
 -
 -	/*
 -	 * Add encapsulation header.
 -	 *
 -	 * On exit, the transport header will be set to the start of the
 -	 * encapsulation header to be filled in by x->type->output and
 -	 * the mac header will be set to the nextheader (protocol for
 -	 * IPv4) field of the extension header directly preceding the
 -	 * encapsulation header, or in its absence, that of the top IP
 -	 * header.  The value of the network header will always point
 -	 * to the top IP header while skb->data will point to the payload.
 -	 */
 -	int (*output2)(struct xfrm_state *x,struct sk_buff *skb);
 -
 -	/*
 -	 * This is the actual output entry point.
 -	 *
 -	 * For transport mode and equivalent this would be identical to
 -	 * output2 (which does not need to be set).  While tunnel mode
 -	 * and equivalent would set this to a tunnel encapsulation function
 -	 * (xfrm4_prepare_output or xfrm6_prepare_output) that would in turn
 -	 * call output2.
 -	 */
 -	int (*output)(struct xfrm_state *x, struct sk_buff *skb);
 -
 -	/*
 -	 * Adjust pointers into the packet and do GSO segmentation.
 -	 */
 -	struct sk_buff *(*gso_segment)(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features);
 -
 -	/*
 -	 * Adjust pointers into the packet when IPsec is done at layer2.
 -	 */
 -	void (*xmit)(struct xfrm_state *x, struct sk_buff *skb);
 -
 -	struct xfrm_state_afinfo *afinfo;
 -	struct module *owner;
 -	unsigned int encap;
 -	int flags;
 -};
 -
 -/* Flags for xfrm_mode. */
 -enum {
 -	XFRM_MODE_FLAG_TUNNEL = 1,
 -};
 -
 -int xfrm_register_mode(struct xfrm_mode *mode, int family);
 -int xfrm_unregister_mode(struct xfrm_mode *mode, int family);
 -
  static inline int xfrm_af2proto(unsigned int family)
  {
  	switch(family) {
@@@ -437,13 -507,13 +438,13 @@@
  	}
  }
  
 -static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
 +static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto)
  {
  	if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) ||
  	    (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6))
 -		return x->inner_mode;
 +		return &x->inner_mode;
  	else
 -		return x->inner_mode_iaf;
 +		return &x->inner_mode_iaf;
  }
  
  struct xfrm_tmpl {
@@@ -1335,6 -1405,23 +1336,23 @@@ static inline int xfrm_state_kern(cons
  	return atomic_read(&x->tunnel_users);
  }
  
+ static inline bool xfrm_id_proto_valid(u8 proto)
+ {
+ 	switch (proto) {
+ 	case IPPROTO_AH:
+ 	case IPPROTO_ESP:
+ 	case IPPROTO_COMP:
+ #if IS_ENABLED(CONFIG_IPV6)
+ 	case IPPROTO_ROUTING:
+ 	case IPPROTO_DSTOPTS:
+ #endif
+ 		return true;
+ 	default:
+ 		return false;
+ 	}
+ }
+ 
+ /* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */
  static inline int xfrm_id_proto_match(u8 proto, u8 userproto)
  {
  	return (!userproto || proto == userproto ||
@@@ -1536,6 -1623,7 +1554,6 @@@ int xfrm_init_replay(struct xfrm_state 
  int xfrm_state_mtu(struct xfrm_state *x, int mtu);
  int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload);
  int xfrm_init_state(struct xfrm_state *x);
 -int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb);
  int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type);
  int xfrm_input_resume(struct sk_buff *skb, int nexthdr);
  int xfrm_trans_queue(struct sk_buff *skb,
@@@ -1543,11 -1631,7 +1561,11 @@@
  				   struct sk_buff *));
  int xfrm_output_resume(struct sk_buff *skb, int err);
  int xfrm_output(struct sock *sk, struct sk_buff *skb);
 -int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 +
 +#if IS_ENABLED(CONFIG_NET_PKTGEN)
 +int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
 +#endif
 +
  void xfrm_local_error(struct sk_buff *skb, int mtu);
  int xfrm4_extract_header(struct sk_buff *skb);
  int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
@@@ -1566,8 -1650,10 +1584,8 @@@ static inline int xfrm4_rcv_spi(struct 
  }
  
  int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 -int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
  int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb);
  int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb);
 -int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
  int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
  int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol);
  int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
@@@ -1583,6 -1669,7 +1601,6 @@@ int xfrm6_rcv(struct sk_buff *skb)
  int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
  		     xfrm_address_t *saddr, u8 proto);
  void xfrm6_local_error(struct sk_buff *skb, u32 mtu);
 -int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err);
  int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol);
  int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol);
  int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
@@@ -1590,6 -1677,7 +1608,6 @@@ int xfrm6_tunnel_deregister(struct xfrm
  __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
  __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
  int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 -int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
  int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
  int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb);
  int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
@@@ -1981,7 -2069,7 +1999,7 @@@ static inline int xfrm_tunnel_check(str
  			tunnel = true;
  		break;
  	}
 -	if (tunnel && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL))
 +	if (tunnel && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL))
  		return -EINVAL;
  
  	return 0;
diff --combined kernel/bpf/verifier.c
index 271717246af3,09d5d972c9ff..7b05e8938d5c
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@@ -176,6 -176,7 +176,6 @@@ struct bpf_verifier_stack_elem 
  	struct bpf_verifier_stack_elem *next;
  };
  
 -#define BPF_COMPLEXITY_LIMIT_INSNS	131072
  #define BPF_COMPLEXITY_LIMIT_STACK	1024
  #define BPF_COMPLEXITY_LIMIT_STATES	64
  
@@@ -376,8 -377,7 +376,8 @@@ static bool is_release_function(enum bp
  static bool is_acquire_function(enum bpf_func_id func_id)
  {
  	return func_id == BPF_FUNC_sk_lookup_tcp ||
 -		func_id == BPF_FUNC_sk_lookup_udp;
 +		func_id == BPF_FUNC_sk_lookup_udp ||
 +		func_id == BPF_FUNC_skc_lookup_tcp;
  }
  
  static bool is_ptr_cast_function(enum bpf_func_id func_id)
@@@ -405,7 -405,6 +405,7 @@@ static const char * const reg_type_str[
  	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
  	[PTR_TO_TCP_SOCK]	= "tcp_sock",
  	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 +	[PTR_TO_TP_BUFFER]	= "tp_buffer",
  };
  
  static char slot_type_char[] = {
@@@ -1092,7 -1091,7 +1092,7 @@@ static int check_subprogs(struct bpf_ve
  	 */
  	subprog[env->subprog_cnt].start = insn_cnt;
  
 -	if (env->log.level > 1)
 +	if (env->log.level & BPF_LOG_LEVEL2)
  		for (i = 0; i < env->subprog_cnt; i++)
  			verbose(env, "func#%d @%d\n", i, subprog[i].start);
  
@@@ -1139,7 -1138,6 +1139,7 @@@ static int mark_reg_read(struct bpf_ver
  			 struct bpf_reg_state *parent)
  {
  	bool writes = parent == state->parent; /* Observe write marks */
 +	int cnt = 0;
  
  	while (parent) {
  		/* if read wasn't screened by an earlier write ... */
@@@ -1151,25 -1149,12 +1151,25 @@@
  				parent->var_off.value, parent->off);
  			return -EFAULT;
  		}
 +		if (parent->live & REG_LIVE_READ)
 +			/* The parentage chain never changes and
 +			 * this parent was already marked as LIVE_READ.
 +			 * There is no need to keep walking the chain again and
 +			 * keep re-marking all parents as LIVE_READ.
 +			 * This case happens when the same register is read
 +			 * multiple times without writes into it in-between.
 +			 */
 +			break;
  		/* ... then we depend on parent's value */
  		parent->live |= REG_LIVE_READ;
  		state = parent;
  		parent = state->parent;
  		writes = true;
 +		cnt++;
  	}
 +
 +	if (env->longest_mark_read_walk < cnt)
 +		env->longest_mark_read_walk = cnt;
  	return 0;
  }
  
@@@ -1178,32 -1163,30 +1178,32 @@@ static int check_reg_arg(struct bpf_ver
  {
  	struct bpf_verifier_state *vstate = env->cur_state;
  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 -	struct bpf_reg_state *regs = state->regs;
 +	struct bpf_reg_state *reg, *regs = state->regs;
  
  	if (regno >= MAX_BPF_REG) {
  		verbose(env, "R%d is invalid\n", regno);
  		return -EINVAL;
  	}
  
 +	reg = &regs[regno];
  	if (t == SRC_OP) {
  		/* check whether register used as source operand can be read */
 -		if (regs[regno].type == NOT_INIT) {
 +		if (reg->type == NOT_INIT) {
  			verbose(env, "R%d !read_ok\n", regno);
  			return -EACCES;
  		}
  		/* We don't need to worry about FP liveness because it's read-only */
 -		if (regno != BPF_REG_FP)
 -			return mark_reg_read(env, &regs[regno],
 -					     regs[regno].parent);
 +		if (regno == BPF_REG_FP)
 +			return 0;
 +
 +		return mark_reg_read(env, reg, reg->parent);
  	} else {
  		/* check whether register used as dest operand can be written to */
  		if (regno == BPF_REG_FP) {
  			verbose(env, "frame pointer is read only\n");
  			return -EACCES;
  		}
 -		regs[regno].live |= REG_LIVE_WRITTEN;
 +		reg->live |= REG_LIVE_WRITTEN;
  		if (t == DST_OP)
  			mark_reg_unknown(env, regs, regno);
  	}
@@@ -1429,7 -1412,7 +1429,7 @@@ static int check_stack_access(struct bp
  		char tn_buf[48];
  
  		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 -		verbose(env, "variable stack access var_off=%s off=%d size=%d",
 +		verbose(env, "variable stack access var_off=%s off=%d size=%d\n",
  			tn_buf, off, size);
  		return -EACCES;
  	}
@@@ -1442,28 -1425,6 +1442,28 @@@
  	return 0;
  }
  
 +static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
 +				 int off, int size, enum bpf_access_type type)
 +{
 +	struct bpf_reg_state *regs = cur_regs(env);
 +	struct bpf_map *map = regs[regno].map_ptr;
 +	u32 cap = bpf_map_flags_to_cap(map);
 +
 +	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
 +		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
 +			map->value_size, off, size);
 +		return -EACCES;
 +	}
 +
 +	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
 +		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
 +			map->value_size, off, size);
 +		return -EACCES;
 +	}
 +
 +	return 0;
 +}
 +
  /* check read/write into map element returned by bpf_map_lookup_elem() */
  static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
  			      int size, bool zero_size_allowed)
@@@ -1493,7 -1454,7 +1493,7 @@@ static int check_map_access(struct bpf_
  	 * need to try adding each of min_value and max_value to off
  	 * to make sure our theoretical access will be safe.
  	 */
 -	if (env->log.level)
 +	if (env->log.level & BPF_LOG_LEVEL)
  		print_verifier_state(env, state);
  
  	/* The minimum value is only important with signed
@@@ -1994,32 -1955,6 +1994,32 @@@ static int check_ctx_reg(struct bpf_ver
  	return 0;
  }
  
 +static int check_tp_buffer_access(struct bpf_verifier_env *env,
 +				  const struct bpf_reg_state *reg,
 +				  int regno, int off, int size)
 +{
 +	if (off < 0) {
 +		verbose(env,
 +			"R%d invalid tracepoint buffer access: off=%d, size=%d",
 +			regno, off, size);
 +		return -EACCES;
 +	}
 +	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
 +		char tn_buf[48];
 +
 +		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 +		verbose(env,
 +			"R%d invalid variable buffer offset: off=%d, var_off=%s",
 +			regno, off, tn_buf);
 +		return -EACCES;
 +	}
 +	if (off + size > env->prog->aux->max_tp_access)
 +		env->prog->aux->max_tp_access = off + size;
 +
 +	return 0;
 +}
 +
 +
  /* truncate register to smaller size (in bytes)
   * must be called with size < BPF_REG_SIZE
   */
@@@ -2076,9 -2011,7 +2076,9 @@@ static int check_mem_access(struct bpf_
  			verbose(env, "R%d leaks addr into map\n", value_regno);
  			return -EACCES;
  		}
 -
 +		err = check_map_access_type(env, regno, off, size, t);
 +		if (err)
 +			return err;
  		err = check_map_access(env, regno, off, size, false);
  		if (!err && t == BPF_READ && value_regno >= 0)
  			mark_reg_unknown(env, regs, value_regno);
@@@ -2164,10 -2097,6 +2164,10 @@@
  		err = check_sock_access(env, insn_idx, regno, off, size, t);
  		if (!err && value_regno >= 0)
  			mark_reg_unknown(env, regs, value_regno);
 +	} else if (reg->type == PTR_TO_TP_BUFFER) {
 +		err = check_tp_buffer_access(env, reg, regno, off, size);
 +		if (!err && t == BPF_READ && value_regno >= 0)
 +			mark_reg_unknown(env, regs, value_regno);
  	} else {
  		verbose(env, "R%d invalid mem access '%s'\n", regno,
  			reg_type_str[reg->type]);
@@@ -2228,29 -2157,6 +2228,29 @@@ static int check_xadd(struct bpf_verifi
  				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
  }
  
 +static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno,
 +				  int off, int access_size,
 +				  bool zero_size_allowed)
 +{
 +	struct bpf_reg_state *reg = reg_state(env, regno);
 +
 +	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
 +	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
 +		if (tnum_is_const(reg->var_off)) {
 +			verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
 +				regno, off, access_size);
 +		} else {
 +			char tn_buf[48];
 +
 +			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 +			verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n",
 +				regno, tn_buf, access_size);
 +		}
 +		return -EACCES;
 +	}
 +	return 0;
 +}
 +
  /* when register 'regno' is passed into function that will read 'access_size'
   * bytes from that pointer, make sure that it's within stack boundary
   * and all elements of stack are initialized.
@@@ -2263,7 -2169,7 +2263,7 @@@ static int check_stack_boundary(struct 
  {
  	struct bpf_reg_state *reg = reg_state(env, regno);
  	struct bpf_func_state *state = func(env, reg);
 -	int off, i, slot, spi;
 +	int err, min_off, max_off, i, slot, spi;
  
  	if (reg->type != PTR_TO_STACK) {
  		/* Allow zero-byte read from NULL, regardless of pointer type */
@@@ -2277,57 -2183,21 +2277,57 @@@
  		return -EACCES;
  	}
  
 -	/* Only allow fixed-offset stack reads */
 -	if (!tnum_is_const(reg->var_off)) {
 -		char tn_buf[48];
 +	if (tnum_is_const(reg->var_off)) {
 +		min_off = max_off = reg->var_off.value + reg->off;
 +		err = __check_stack_boundary(env, regno, min_off, access_size,
 +					     zero_size_allowed);
 +		if (err)
 +			return err;
 +	} else {
 +		/* Variable offset is prohibited for unprivileged mode for
 +		 * simplicity since it requires corresponding support in
 +		 * Spectre masking for stack ALU.
 +		 * See also retrieve_ptr_limit().
 +		 */
 +		if (!env->allow_ptr_leaks) {
 +			char tn_buf[48];
  
 -		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 -		verbose(env, "invalid variable stack read R%d var_off=%s\n",
 -			regno, tn_buf);
 -		return -EACCES;
 -	}
 -	off = reg->off + reg->var_off.value;
 -	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
 -	    access_size < 0 || (access_size == 0 && !zero_size_allowed)) {
 -		verbose(env, "invalid stack type R%d off=%d access_size=%d\n",
 -			regno, off, access_size);
 -		return -EACCES;
 +			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 +			verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n",
 +				regno, tn_buf);
 +			return -EACCES;
 +		}
 +		/* Only initialized buffer on stack is allowed to be accessed
 +		 * with variable offset. With uninitialized buffer it's hard to
 +		 * guarantee that whole memory is marked as initialized on
 +		 * helper return since specific bounds are unknown what may
 +		 * cause uninitialized stack leaking.
 +		 */
 +		if (meta && meta->raw_mode)
 +			meta = NULL;
 +
 +		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
 +		    reg->smax_value <= -BPF_MAX_VAR_OFF) {
 +			verbose(env, "R%d unbounded indirect variable offset stack access\n",
 +				regno);
 +			return -EACCES;
 +		}
 +		min_off = reg->smin_value + reg->off;
 +		max_off = reg->smax_value + reg->off;
 +		err = __check_stack_boundary(env, regno, min_off, access_size,
 +					     zero_size_allowed);
 +		if (err) {
 +			verbose(env, "R%d min value is outside of stack bound\n",
 +				regno);
 +			return err;
 +		}
 +		err = __check_stack_boundary(env, regno, max_off, access_size,
 +					     zero_size_allowed);
 +		if (err) {
 +			verbose(env, "R%d max value is outside of stack bound\n",
 +				regno);
 +			return err;
 +		}
  	}
  
  	if (meta && meta->raw_mode) {
@@@ -2336,10 -2206,10 +2336,10 @@@
  		return 0;
  	}
  
 -	for (i = 0; i < access_size; i++) {
 +	for (i = min_off; i < max_off + access_size; i++) {
  		u8 *stype;
  
 -		slot = -(off + i) - 1;
 +		slot = -i - 1;
  		spi = slot / BPF_REG_SIZE;
  		if (state->allocated_stack <= slot)
  			goto err;
@@@ -2352,16 -2222,8 +2352,16 @@@
  			goto mark;
  		}
  err:
 -		verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
 -			off, i, access_size);
 +		if (tnum_is_const(reg->var_off)) {
 +			verbose(env, "invalid indirect read from stack off %d+%d size %d\n",
 +				min_off, i - min_off, access_size);
 +		} else {
 +			char tn_buf[48];
 +
 +			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 +			verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n",
 +				tn_buf, i - min_off, access_size);
 +		}
  		return -EACCES;
  mark:
  		/* reading any byte out of 8-byte 'spill_slot' will cause
@@@ -2370,7 -2232,7 +2370,7 @@@
  		mark_reg_read(env, &state->stack[spi].spilled_ptr,
  			      state->stack[spi].spilled_ptr.parent);
  	}
 -	return update_stack_depth(env, state, off);
 +	return update_stack_depth(env, state, min_off);
  }
  
  static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
@@@ -2385,10 -2247,6 +2385,10 @@@
  		return check_packet_access(env, regno, reg->off, access_size,
  					   zero_size_allowed);
  	case PTR_TO_MAP_VALUE:
 +		if (check_map_access_type(env, regno, reg->off, access_size,
 +					  meta && meta->raw_mode ? BPF_WRITE :
 +					  BPF_READ))
 +			return -EACCES;
  		return check_map_access(env, regno, reg->off, access_size,
  					zero_size_allowed);
  	default: /* scalar_value|ptr_to_stack or invalid ptr */
@@@ -2495,22 -2353,6 +2495,22 @@@ static bool arg_type_is_mem_size(enum b
  	       type == ARG_CONST_SIZE_OR_ZERO;
  }
  
 +static bool arg_type_is_int_ptr(enum bpf_arg_type type)
 +{
 +	return type == ARG_PTR_TO_INT ||
 +	       type == ARG_PTR_TO_LONG;
 +}
 +
 +static int int_ptr_type_to_size(enum bpf_arg_type type)
 +{
 +	if (type == ARG_PTR_TO_INT)
 +		return sizeof(u32);
 +	else if (type == ARG_PTR_TO_LONG)
 +		return sizeof(u64);
 +
 +	return -EINVAL;
 +}
 +
  static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
  			  enum bpf_arg_type arg_type,
  			  struct bpf_call_arg_meta *meta)
@@@ -2543,15 -2385,10 +2543,15 @@@
  
  	if (arg_type == ARG_PTR_TO_MAP_KEY ||
  	    arg_type == ARG_PTR_TO_MAP_VALUE ||
 -	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
 +	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
 +	    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
  		expected_type = PTR_TO_STACK;
 -		if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
 -		    type != expected_type)
 +		if (register_is_null(reg) &&
 +		    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
 +			/* final test in check_stack_boundary() */;
 +		else if (!type_is_pkt_pointer(type) &&
 +			 type != PTR_TO_MAP_VALUE &&
 +			 type != expected_type)
  			goto err_type;
  	} else if (arg_type == ARG_CONST_SIZE ||
  		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
@@@ -2583,10 -2420,6 +2583,10 @@@
  			}
  			meta->ref_obj_id = reg->ref_obj_id;
  		}
 +	} else if (arg_type == ARG_PTR_TO_SOCKET) {
 +		expected_type = PTR_TO_SOCKET;
 +		if (type != expected_type)
 +			goto err_type;
  	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
  		if (meta->func_id == BPF_FUNC_spin_lock) {
  			if (process_spin_lock(env, regno, true))
@@@ -2612,12 -2445,6 +2612,12 @@@
  			 type != expected_type)
  			goto err_type;
  		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
 +	} else if (arg_type_is_int_ptr(arg_type)) {
 +		expected_type = PTR_TO_STACK;
 +		if (!type_is_pkt_pointer(type) &&
 +		    type != PTR_TO_MAP_VALUE &&
 +		    type != expected_type)
 +			goto err_type;
  	} else {
  		verbose(env, "unsupported arg_type %d\n", arg_type);
  		return -EFAULT;
@@@ -2644,8 -2471,6 +2644,8 @@@
  					      meta->map_ptr->key_size, false,
  					      NULL);
  	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
 +		   (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
 +		    !register_is_null(reg)) ||
  		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
  		/* bpf_map_xxx(..., map_ptr, ..., value) call:
  		 * check [value, value + map->value_size) validity
@@@ -2701,13 -2526,6 +2701,13 @@@
  		err = check_helper_mem_access(env, regno - 1,
  					      reg->umax_value,
  					      zero_size_allowed, meta);
 +	} else if (arg_type_is_int_ptr(arg_type)) {
 +		int size = int_ptr_type_to_size(arg_type);
 +
 +		err = check_helper_mem_access(env, regno, size, false, meta);
 +		if (err)
 +			return err;
 +		err = check_ptr_alignment(env, reg, 0, size, true);
  	}
  
  	return err;
@@@ -2795,11 -2613,6 +2795,11 @@@ static int check_map_func_compatibility
  		    func_id != BPF_FUNC_map_push_elem)
  			goto error;
  		break;
 +	case BPF_MAP_TYPE_SK_STORAGE:
 +		if (func_id != BPF_FUNC_sk_storage_get &&
 +		    func_id != BPF_FUNC_sk_storage_delete)
 +			goto error;
 +		break;
  	default:
  		break;
  	}
@@@ -2863,11 -2676,6 +2863,11 @@@
  		    map->map_type != BPF_MAP_TYPE_STACK)
  			goto error;
  		break;
 +	case BPF_FUNC_sk_storage_get:
 +	case BPF_FUNC_sk_storage_delete:
 +		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
 +			goto error;
 +		break;
  	default:
  		break;
  	}
@@@ -3097,7 -2905,7 +3097,7 @@@ static int check_func_call(struct bpf_v
  	/* and go analyze first insn of the callee */
  	*insn_idx = target_insn;
  
 -	if (env->log.level) {
 +	if (env->log.level & BPF_LOG_LEVEL) {
  		verbose(env, "caller:\n");
  		print_verifier_state(env, caller);
  		verbose(env, "callee:\n");
@@@ -3137,7 -2945,7 +3137,7 @@@ static int prepare_func_exit(struct bpf
  		return err;
  
  	*insn_idx = callee->callsite + 1;
 -	if (env->log.level) {
 +	if (env->log.level & BPF_LOG_LEVEL) {
  		verbose(env, "returning from callee:\n");
  		print_verifier_state(env, callee);
  		verbose(env, "to caller at %d:\n", *insn_idx);
@@@ -3171,7 -2979,6 +3171,7 @@@ record_func_map(struct bpf_verifier_en
  		int func_id, int insn_idx)
  {
  	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
 +	struct bpf_map *map = meta->map_ptr;
  
  	if (func_id != BPF_FUNC_tail_call &&
  	    func_id != BPF_FUNC_map_lookup_elem &&
@@@ -3182,24 -2989,11 +3182,24 @@@
  	    func_id != BPF_FUNC_map_peek_elem)
  		return 0;
  
 -	if (meta->map_ptr == NULL) {
 +	if (map == NULL) {
  		verbose(env, "kernel subsystem misconfigured verifier\n");
  		return -EINVAL;
  	}
  
 +	/* In case of read-only, some additional restrictions
 +	 * need to be applied in order to prevent altering the
 +	 * state of the map from program side.
 +	 */
 +	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
 +	    (func_id == BPF_FUNC_map_delete_elem ||
 +	     func_id == BPF_FUNC_map_update_elem ||
 +	     func_id == BPF_FUNC_map_push_elem ||
 +	     func_id == BPF_FUNC_map_pop_elem)) {
 +		verbose(env, "write into map forbidden\n");
 +		return -EACCES;
 +	}
 +
  	if (!BPF_MAP_PTR(aux->map_state))
  		bpf_map_ptr_store(aux, meta->map_ptr,
  				  meta->map_ptr->unpriv_array);
@@@ -3363,11 -3157,19 +3363,11 @@@ static int check_helper_call(struct bpf
  	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
  		mark_reg_known_zero(env, regs, BPF_REG_0);
  		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
 -		if (is_acquire_function(func_id)) {
 -			int id = acquire_reference_state(env, insn_idx);
 -
 -			if (id < 0)
 -				return id;
 -			/* For mark_ptr_or_null_reg() */
 -			regs[BPF_REG_0].id = id;
 -			/* For release_reference() */
 -			regs[BPF_REG_0].ref_obj_id = id;
 -		} else {
 -			/* For mark_ptr_or_null_reg() */
 -			regs[BPF_REG_0].id = ++env->id_gen;
 -		}
 +		regs[BPF_REG_0].id = ++env->id_gen;
 +	} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
 +		mark_reg_known_zero(env, regs, BPF_REG_0);
 +		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
 +		regs[BPF_REG_0].id = ++env->id_gen;
  	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
  		mark_reg_known_zero(env, regs, BPF_REG_0);
  		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
@@@ -3378,19 -3180,9 +3378,19 @@@
  		return -EINVAL;
  	}
  
 -	if (is_ptr_cast_function(func_id))
 +	if (is_ptr_cast_function(func_id)) {
  		/* For release_reference() */
  		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
 +	} else if (is_acquire_function(func_id)) {
 +		int id = acquire_reference_state(env, insn_idx);
 +
 +		if (id < 0)
 +			return id;
 +		/* For mark_ptr_or_null_reg() */
 +		regs[BPF_REG_0].id = id;
 +		/* For release_reference() */
 +		regs[BPF_REG_0].ref_obj_id = id;
 +	}
  
  	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
  
@@@ -3490,9 -3282,6 +3490,9 @@@ static int retrieve_ptr_limit(const str
  
  	switch (ptr_reg->type) {
  	case PTR_TO_STACK:
 +		/* Indirect variable offset stack access is prohibited in
 +		 * unprivileged mode so it's not handled here.
 +		 */
  		off = ptr_reg->off + ptr_reg->var_off.value;
  		if (mask_to_left)
  			*ptr_limit = MAX_BPF_STACK + off;
@@@ -4349,15 -4138,35 +4349,35 @@@ static int check_alu_op(struct bpf_veri
  	return 0;
  }
  
+ static void __find_good_pkt_pointers(struct bpf_func_state *state,
+ 				     struct bpf_reg_state *dst_reg,
+ 				     enum bpf_reg_type type, u16 new_range)
+ {
+ 	struct bpf_reg_state *reg;
+ 	int i;
+ 
+ 	for (i = 0; i < MAX_BPF_REG; i++) {
+ 		reg = &state->regs[i];
+ 		if (reg->type == type && reg->id == dst_reg->id)
+ 			/* keep the maximum range already checked */
+ 			reg->range = max(reg->range, new_range);
+ 	}
+ 
+ 	bpf_for_each_spilled_reg(i, state, reg) {
+ 		if (!reg)
+ 			continue;
+ 		if (reg->type == type && reg->id == dst_reg->id)
+ 			reg->range = max(reg->range, new_range);
+ 	}
+ }
+ 
  static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
  				   struct bpf_reg_state *dst_reg,
  				   enum bpf_reg_type type,
  				   bool range_right_open)
  {
- 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
- 	struct bpf_reg_state *regs = state->regs, *reg;
  	u16 new_range;
- 	int i, j;
+ 	int i;
  
  	if (dst_reg->off < 0 ||
  	    (dst_reg->off == 0 && range_right_open))
@@@ -4422,20 -4231,9 +4442,9 @@@
  	 * the range won't allow anything.
  	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
  	 */
- 	for (i = 0; i < MAX_BPF_REG; i++)
- 		if (regs[i].type == type && regs[i].id == dst_reg->id)
- 			/* keep the maximum range already checked */
- 			regs[i].range = max(regs[i].range, new_range);
- 
- 	for (j = 0; j <= vstate->curframe; j++) {
- 		state = vstate->frame[j];
- 		bpf_for_each_spilled_reg(i, state, reg) {
- 			if (!reg)
- 				continue;
- 			if (reg->type == type && reg->id == dst_reg->id)
- 				reg->range = max(reg->range, new_range);
- 		}
- 	}
+ 	for (i = 0; i <= vstate->curframe; i++)
+ 		__find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
+ 					 new_range);
  }
  
  /* compute branch direction of the expression "if (reg opcode val) goto target;"
@@@ -4909,6 -4707,22 +4918,22 @@@ static void mark_ptr_or_null_reg(struc
  	}
  }
  
+ static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
+ 				    bool is_null)
+ {
+ 	struct bpf_reg_state *reg;
+ 	int i;
+ 
+ 	for (i = 0; i < MAX_BPF_REG; i++)
+ 		mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
+ 
+ 	bpf_for_each_spilled_reg(i, state, reg) {
+ 		if (!reg)
+ 			continue;
+ 		mark_ptr_or_null_reg(state, reg, id, is_null);
+ 	}
+ }
+ 
  /* The logic is similar to find_good_pkt_pointers(), both could eventually
   * be folded together at some point.
   */
@@@ -4916,10 -4730,10 +4941,10 @@@ static void mark_ptr_or_null_regs(struc
  				  bool is_null)
  {
  	struct bpf_func_state *state = vstate->frame[vstate->curframe];
- 	struct bpf_reg_state *reg, *regs = state->regs;
+ 	struct bpf_reg_state *regs = state->regs;
  	u32 ref_obj_id = regs[regno].ref_obj_id;
  	u32 id = regs[regno].id;
- 	int i, j;
+ 	int i;
  
  	if (ref_obj_id && ref_obj_id == id && is_null)
  		/* regs[regno] is in the " == NULL" branch.
@@@ -4928,17 -4742,8 +4953,8 @@@
  		 */
  		WARN_ON_ONCE(release_reference_state(state, id));
  
- 	for (i = 0; i < MAX_BPF_REG; i++)
- 		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
- 
- 	for (j = 0; j <= vstate->curframe; j++) {
- 		state = vstate->frame[j];
- 		bpf_for_each_spilled_reg(i, state, reg) {
- 			if (!reg)
- 				continue;
- 			mark_ptr_or_null_reg(state, reg, id, is_null);
- 		}
- 	}
+ 	for (i = 0; i <= vstate->curframe; i++)
+ 		__mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
  }
  
  static bool try_match_pkt_pointers(const struct bpf_insn *insn,
@@@ -5177,17 -4982,23 +5193,17 @@@ static int check_cond_jmp_op(struct bpf
  			insn->dst_reg);
  		return -EACCES;
  	}
 -	if (env->log.level)
 +	if (env->log.level & BPF_LOG_LEVEL)
  		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
  	return 0;
  }
  
 -/* return the map pointer stored inside BPF_LD_IMM64 instruction */
 -static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
 -{
 -	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
 -
 -	return (struct bpf_map *) (unsigned long) imm64;
 -}
 -
  /* verify BPF_LD_IMM64 instruction */
  static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
  {
 +	struct bpf_insn_aux_data *aux = cur_aux(env);
  	struct bpf_reg_state *regs = cur_regs(env);
 +	struct bpf_map *map;
  	int err;
  
  	if (BPF_SIZE(insn->code) != BPF_DW) {
@@@ -5211,22 -5022,11 +5227,22 @@@
  		return 0;
  	}
  
 -	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
 -	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
 +	map = env->used_maps[aux->map_index];
 +	mark_reg_known_zero(env, regs, insn->dst_reg);
 +	regs[insn->dst_reg].map_ptr = map;
 +
 +	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
 +		regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
 +		regs[insn->dst_reg].off = aux->map_off;
 +		if (map_value_has_spin_lock(map))
 +			regs[insn->dst_reg].id = ++env->id_gen;
 +	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
 +		regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
 +	} else {
 +		verbose(env, "bpf verifier is misconfigured\n");
 +		return -EINVAL;
 +	}
  
 -	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
 -	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
  	return 0;
  }
  
@@@ -5350,7 -5150,6 +5366,7 @@@ static int check_return_code(struct bpf
  	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
  	case BPF_PROG_TYPE_SOCK_OPS:
  	case BPF_PROG_TYPE_CGROUP_DEVICE:
 +	case BPF_PROG_TYPE_CGROUP_SYSCTL:
  		break;
  	default:
  		return 0;
@@@ -5421,6 -5220,10 +5437,6 @@@ enum 
  
  #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
  
 -static int *insn_stack;	/* stack of insns to process */
 -static int cur_stack;	/* current stack index */
 -static int *insn_state;
 -
  /* t, w, e - match pseudo-code above:
   * t - index of current instruction
   * w - next instruction
@@@ -5428,9 -5231,6 +5444,9 @@@
   */
  static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
  {
 +	int *insn_stack = env->cfg.insn_stack;
 +	int *insn_state = env->cfg.insn_state;
 +
  	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
  		return 0;
  
@@@ -5451,9 -5251,9 +5467,9 @@@
  		/* tree-edge */
  		insn_state[t] = DISCOVERED | e;
  		insn_state[w] = DISCOVERED;
 -		if (cur_stack >= env->prog->len)
 +		if (env->cfg.cur_stack >= env->prog->len)
  			return -E2BIG;
 -		insn_stack[cur_stack++] = w;
 +		insn_stack[env->cfg.cur_stack++] = w;
  		return 1;
  	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
  		verbose_linfo(env, t, "%d: ", t);
@@@ -5477,28 -5277,27 +5493,28 @@@ static int check_cfg(struct bpf_verifie
  {
  	struct bpf_insn *insns = env->prog->insnsi;
  	int insn_cnt = env->prog->len;
 +	int *insn_stack, *insn_state;
  	int ret = 0;
  	int i, t;
  
 -	insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
 +	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
  	if (!insn_state)
  		return -ENOMEM;
  
 -	insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
 +	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
  	if (!insn_stack) {
 -		kfree(insn_state);
 +		kvfree(insn_state);
  		return -ENOMEM;
  	}
  
  	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
  	insn_stack[0] = 0; /* 0 is the first instruction */
 -	cur_stack = 1;
 +	env->cfg.cur_stack = 1;
  
  peek_stack:
 -	if (cur_stack == 0)
 +	if (env->cfg.cur_stack == 0)
  		goto check_state;
 -	t = insn_stack[cur_stack - 1];
 +	t = insn_stack[env->cfg.cur_stack - 1];
  
  	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
  	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
@@@ -5567,7 -5366,7 +5583,7 @@@
  
  mark_explored:
  	insn_state[t] = EXPLORED;
 -	if (cur_stack-- <= 0) {
 +	if (env->cfg.cur_stack-- <= 0) {
  		verbose(env, "pop stack internal bug\n");
  		ret = -EFAULT;
  		goto err_free;
@@@ -5585,9 -5384,8 +5601,9 @@@ check_state
  	ret = 0; /* cfg looks good */
  
  err_free:
 -	kfree(insn_state);
 -	kfree(insn_stack);
 +	kvfree(insn_state);
 +	kvfree(insn_stack);
 +	env->cfg.insn_state = env->cfg.insn_stack = NULL;
  	return ret;
  }
  
@@@ -6276,22 -6074,6 +6292,22 @@@ static bool states_equal(struct bpf_ver
  	return true;
  }
  
 +static int propagate_liveness_reg(struct bpf_verifier_env *env,
 +				  struct bpf_reg_state *reg,
 +				  struct bpf_reg_state *parent_reg)
 +{
 +	int err;
 +
 +	if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ))
 +		return 0;
 +
 +	err = mark_reg_read(env, reg, parent_reg);
 +	if (err)
 +		return err;
 +
 +	return 0;
 +}
 +
  /* A write screens off any subsequent reads; but write marks come from the
   * straight-line code between a state and its parent.  When we arrive at an
   * equivalent state (jump target or such) we didn't arrive by the straight-line
@@@ -6303,9 -6085,8 +6319,9 @@@ static int propagate_liveness(struct bp
  			      const struct bpf_verifier_state *vstate,
  			      struct bpf_verifier_state *vparent)
  {
 -	int i, frame, err = 0;
 +	struct bpf_reg_state *state_reg, *parent_reg;
  	struct bpf_func_state *state, *parent;
 +	int i, frame, err = 0;
  
  	if (vparent->curframe != vstate->curframe) {
  		WARN(1, "propagate_live: parent frame %d current frame %d\n",
@@@ -6315,27 -6096,30 +6331,27 @@@
  	/* Propagate read liveness of registers... */
  	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
  	for (frame = 0; frame <= vstate->curframe; frame++) {
 +		parent = vparent->frame[frame];
 +		state = vstate->frame[frame];
 +		parent_reg = parent->regs;
 +		state_reg = state->regs;
  		/* We don't need to worry about FP liveness, it's read-only */
  		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
 -			if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ)
 -				continue;
 -			if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) {
 -				err = mark_reg_read(env, &vstate->frame[frame]->regs[i],
 -						    &vparent->frame[frame]->regs[i]);
 -				if (err)
 -					return err;
 -			}
 +			err = propagate_liveness_reg(env, &state_reg[i],
 +						     &parent_reg[i]);
 +			if (err)
 +				return err;
  		}
 -	}
  
 -	/* ... and stack slots */
 -	for (frame = 0; frame <= vstate->curframe; frame++) {
 -		state = vstate->frame[frame];
 -		parent = vparent->frame[frame];
 +		/* Propagate stack slots. */
  		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
  			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
 -			if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ)
 -				continue;
 -			if (state->stack[i].spilled_ptr.live & REG_LIVE_READ)
 -				mark_reg_read(env, &state->stack[i].spilled_ptr,
 -					      &parent->stack[i].spilled_ptr);
 +			parent_reg = &parent->stack[i].spilled_ptr;
 +			state_reg = &state->stack[i].spilled_ptr;
 +			err = propagate_liveness_reg(env, state_reg,
 +						     parent_reg);
 +			if (err)
 +				return err;
  		}
  	}
  	return err;
@@@ -6344,13 -6128,11 +6360,13 @@@
  static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
  {
  	struct bpf_verifier_state_list *new_sl;
 -	struct bpf_verifier_state_list *sl;
 +	struct bpf_verifier_state_list *sl, **pprev;
  	struct bpf_verifier_state *cur = env->cur_state, *new;
  	int i, j, err, states_cnt = 0;
  
 -	sl = env->explored_states[insn_idx];
 +	pprev = &env->explored_states[insn_idx];
 +	sl = *pprev;
 +
  	if (!sl)
  		/* this 'insn_idx' instruction wasn't marked, so we will not
  		 * be doing state search here
@@@ -6361,7 -6143,6 +6377,7 @@@
  
  	while (sl != STATE_LIST_MARK) {
  		if (states_equal(env, &sl->state, cur)) {
 +			sl->hit_cnt++;
  			/* reached equivalent register/stack state,
  			 * prune the search.
  			 * Registers read by the continuation are read by us.
@@@ -6377,40 -6158,10 +6393,40 @@@
  				return err;
  			return 1;
  		}
 -		sl = sl->next;
  		states_cnt++;
 +		sl->miss_cnt++;
 +		/* heuristic to determine whether this state is beneficial
 +		 * to keep checking from state equivalence point of view.
 +		 * Higher numbers increase max_states_per_insn and verification time,
 +		 * but do not meaningfully decrease insn_processed.
 +		 */
 +		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
 +			/* the state is unlikely to be useful. Remove it to
 +			 * speed up verification
 +			 */
 +			*pprev = sl->next;
 +			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
 +				free_verifier_state(&sl->state, false);
 +				kfree(sl);
 +				env->peak_states--;
 +			} else {
 +				/* cannot free this state, since parentage chain may
 +				 * walk it later. Add it for free_list instead to
 +				 * be freed at the end of verification
 +				 */
 +				sl->next = env->free_list;
 +				env->free_list = sl;
 +			}
 +			sl = *pprev;
 +			continue;
 +		}
 +		pprev = &sl->next;
 +		sl = *pprev;
  	}
  
 +	if (env->max_states_per_insn < states_cnt)
 +		env->max_states_per_insn = states_cnt;
 +
  	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
  		return 0;
  
@@@ -6424,8 -6175,6 +6440,8 @@@
  	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
  	if (!new_sl)
  		return -ENOMEM;
 +	env->total_states++;
 +	env->peak_states++;
  
  	/* add new state to the head of linked list */
  	new = &new_sl->state;
@@@ -6510,7 -6259,8 +6526,7 @@@ static int do_check(struct bpf_verifier
  	struct bpf_verifier_state *state;
  	struct bpf_insn *insns = env->prog->insnsi;
  	struct bpf_reg_state *regs;
 -	int insn_cnt = env->prog->len, i;
 -	int insn_processed = 0;
 +	int insn_cnt = env->prog->len;
  	bool do_print_state = false;
  
  	env->prev_linfo = NULL;
@@@ -6545,10 -6295,10 +6561,10 @@@
  		insn = &insns[env->insn_idx];
  		class = BPF_CLASS(insn->code);
  
 -		if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
 +		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
  			verbose(env,
  				"BPF program is too large. Processed %d insn\n",
 -				insn_processed);
 +				env->insn_processed);
  			return -E2BIG;
  		}
  
@@@ -6557,7 -6307,7 +6573,7 @@@
  			return err;
  		if (err == 1) {
  			/* found equivalent state, can prune the search */
 -			if (env->log.level) {
 +			if (env->log.level & BPF_LOG_LEVEL) {
  				if (do_print_state)
  					verbose(env, "\nfrom %d to %d%s: safe\n",
  						env->prev_insn_idx, env->insn_idx,
@@@ -6575,9 -6325,8 +6591,9 @@@
  		if (need_resched())
  			cond_resched();
  
 -		if (env->log.level > 1 || (env->log.level && do_print_state)) {
 -			if (env->log.level > 1)
 +		if (env->log.level & BPF_LOG_LEVEL2 ||
 +		    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
 +			if (env->log.level & BPF_LOG_LEVEL2)
  				verbose(env, "%d:", env->insn_idx);
  			else
  				verbose(env, "\nfrom %d to %d%s:",
@@@ -6588,7 -6337,7 +6604,7 @@@
  			do_print_state = false;
  		}
  
 -		if (env->log.level) {
 +		if (env->log.level & BPF_LOG_LEVEL) {
  			const struct bpf_insn_cbs cbs = {
  				.cb_print	= verbose,
  				.private_data	= env,
@@@ -6853,6 -6602,16 +6869,6 @@@ process_bpf_exit
  		env->insn_idx++;
  	}
  
 -	verbose(env, "processed %d insns (limit %d), stack depth ",
 -		insn_processed, BPF_COMPLEXITY_LIMIT_INSNS);
 -	for (i = 0; i < env->subprog_cnt; i++) {
 -		u32 depth = env->subprog_info[i].stack_depth;
 -
 -		verbose(env, "%d", depth);
 -		if (i + 1 < env->subprog_cnt)
 -			verbose(env, "+");
 -	}
 -	verbose(env, "\n");
  	env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
  	return 0;
  }
@@@ -6950,10 -6709,8 +6966,10 @@@ static int replace_map_fd_with_map_ptr(
  		}
  
  		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
 +			struct bpf_insn_aux_data *aux;
  			struct bpf_map *map;
  			struct fd f;
 +			u64 addr;
  
  			if (i == insn_cnt - 1 || insn[1].code != 0 ||
  			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
@@@ -6962,19 -6719,13 +6978,19 @@@
  				return -EINVAL;
  			}
  
 -			if (insn->src_reg == 0)
 +			if (insn[0].src_reg == 0)
  				/* valid generic load 64-bit imm */
  				goto next_insn;
  
 -			if (insn[0].src_reg != BPF_PSEUDO_MAP_FD ||
 -			    insn[1].imm != 0) {
 -				verbose(env, "unrecognized bpf_ld_imm64 insn\n");
 +			/* In final convert_pseudo_ld_imm64() step, this is
 +			 * converted into regular 64-bit imm load insn.
 +			 */
 +			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
 +			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
 +			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
 +			     insn[1].imm != 0)) {
 +				verbose(env,
 +					"unrecognized bpf_ld_imm64 insn\n");
  				return -EINVAL;
  			}
  
@@@ -6992,47 -6743,16 +7008,47 @@@
  				return err;
  			}
  
 -			/* store map pointer inside BPF_LD_IMM64 instruction */
 -			insn[0].imm = (u32) (unsigned long) map;
 -			insn[1].imm = ((u64) (unsigned long) map) >> 32;
 +			aux = &env->insn_aux_data[i];
 +			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
 +				addr = (unsigned long)map;
 +			} else {
 +				u32 off = insn[1].imm;
 +
 +				if (off >= BPF_MAX_VAR_OFF) {
 +					verbose(env, "direct value offset of %u is not allowed\n", off);
 +					fdput(f);
 +					return -EINVAL;
 +				}
 +
 +				if (!map->ops->map_direct_value_addr) {
 +					verbose(env, "no direct value access support for this map type\n");
 +					fdput(f);
 +					return -EINVAL;
 +				}
 +
 +				err = map->ops->map_direct_value_addr(map, &addr, off);
 +				if (err) {
 +					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
 +						map->value_size, off);
 +					fdput(f);
 +					return err;
 +				}
 +
 +				aux->map_off = off;
 +				addr += off;
 +			}
 +
 +			insn[0].imm = (u32)addr;
 +			insn[1].imm = addr >> 32;
  
  			/* check whether we recorded this map already */
 -			for (j = 0; j < env->used_map_cnt; j++)
 +			for (j = 0; j < env->used_map_cnt; j++) {
  				if (env->used_maps[j] == map) {
 +					aux->map_index = j;
  					fdput(f);
  					goto next_insn;
  				}
 +			}
  
  			if (env->used_map_cnt >= MAX_USED_MAPS) {
  				fdput(f);
@@@ -7049,8 -6769,6 +7065,8 @@@
  				fdput(f);
  				return PTR_ERR(map);
  			}
 +
 +			aux->map_index = env->used_map_cnt;
  			env->used_maps[env->used_map_cnt++] = map;
  
  			if (bpf_map_is_cgroup_storage(map) &&
@@@ -7156,13 -6874,8 +7172,13 @@@ static struct bpf_prog *bpf_patch_insn_
  	struct bpf_prog *new_prog;
  
  	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
 -	if (!new_prog)
 +	if (IS_ERR(new_prog)) {
 +		if (PTR_ERR(new_prog) == -ERANGE)
 +			verbose(env,
 +				"insn %d cannot be patched due to 16-bit range\n",
 +				env->insn_aux_data[off].orig_idx);
  		return NULL;
 +	}
  	if (adjust_insn_aux_data(env, new_prog->len, off, len))
  		return NULL;
  	adjust_subprog_starts(env, off, len);
@@@ -7700,8 -7413,9 +7716,8 @@@ static int jit_subprogs(struct bpf_veri
  			    insn->src_reg != BPF_PSEUDO_CALL)
  				continue;
  			subprog = insn->off;
 -			insn->imm = (u64 (*)(u64, u64, u64, u64, u64))
 -				func[subprog]->bpf_func -
 -				__bpf_call_base;
 +			insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
 +				    __bpf_call_base;
  		}
  
  		/* we use the aux data to keep a list of the start addresses
@@@ -8103,14 -7817,6 +8119,14 @@@ static void free_states(struct bpf_veri
  	struct bpf_verifier_state_list *sl, *sln;
  	int i;
  
 +	sl = env->free_list;
 +	while (sl) {
 +		sln = sl->next;
 +		free_verifier_state(&sl->state, false);
 +		kfree(sl);
 +		sl = sln;
 +	}
 +
  	if (!env->explored_states)
  		return;
  
@@@ -8126,37 -7832,12 +8142,37 @@@
  			}
  	}
  
 -	kfree(env->explored_states);
 +	kvfree(env->explored_states);
 +}
 +
 +static void print_verification_stats(struct bpf_verifier_env *env)
 +{
 +	int i;
 +
 +	if (env->log.level & BPF_LOG_STATS) {
 +		verbose(env, "verification time %lld usec\n",
 +			div_u64(env->verification_time, 1000));
 +		verbose(env, "stack depth ");
 +		for (i = 0; i < env->subprog_cnt; i++) {
 +			u32 depth = env->subprog_info[i].stack_depth;
 +
 +			verbose(env, "%d", depth);
 +			if (i + 1 < env->subprog_cnt)
 +				verbose(env, "+");
 +		}
 +		verbose(env, "\n");
 +	}
 +	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
 +		"total_states %d peak_states %d mark_read %d\n",
 +		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
 +		env->max_states_per_insn, env->total_states,
 +		env->peak_states, env->longest_mark_read_walk);
  }
  
  int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
  	      union bpf_attr __user *uattr)
  {
 +	u64 start_time = ktime_get_ns();
  	struct bpf_verifier_env *env;
  	struct bpf_verifier_log *log;
  	int i, len, ret = -EINVAL;
@@@ -8184,11 -7865,9 +8200,11 @@@
  		env->insn_aux_data[i].orig_idx = i;
  	env->prog = *prog;
  	env->ops = bpf_verifier_ops[env->prog->type];
 +	is_priv = capable(CAP_SYS_ADMIN);
  
  	/* grab the mutex to protect few globals used by verifier */
 -	mutex_lock(&bpf_verifier_lock);
 +	if (!is_priv)
 +		mutex_lock(&bpf_verifier_lock);
  
  	if (attr->log_level || attr->log_buf || attr->log_size) {
  		/* user requested verbose verifier output
@@@ -8200,8 -7879,8 +8216,8 @@@
  
  		ret = -EINVAL;
  		/* log attributes have to be sane */
 -		if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 ||
 -		    !log->level || !log->ubuf)
 +		if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
 +		    !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
  			goto err_unlock;
  	}
  
@@@ -8211,6 -7890,7 +8227,6 @@@
  	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
  		env->strict_alignment = false;
  
 -	is_priv = capable(CAP_SYS_ADMIN);
  	env->allow_ptr_leaks = is_priv;
  
  	ret = replace_map_fd_with_map_ptr(env);
@@@ -8223,7 -7903,7 +8239,7 @@@
  			goto skip_full_check;
  	}
  
 -	env->explored_states = kcalloc(env->prog->len,
 +	env->explored_states = kvcalloc(env->prog->len,
  				       sizeof(struct bpf_verifier_state_list *),
  				       GFP_USER);
  	ret = -ENOMEM;
@@@ -8281,9 -7961,6 +8297,9 @@@ skip_full_check
  	if (ret == 0)
  		ret = fixup_call_args(env);
  
 +	env->verification_time = ktime_get_ns() - start_time;
 +	print_verification_stats(env);
 +
  	if (log->level && bpf_verifier_log_full(log))
  		ret = -ENOSPC;
  	if (log->level && !log->ubuf) {
@@@ -8323,8 -8000,7 +8339,8 @@@ err_release_maps
  		release_maps(env);
  	*prog = env->prog;
  err_unlock:
 -	mutex_unlock(&bpf_verifier_lock);
 +	if (!is_priv)
 +		mutex_unlock(&bpf_verifier_lock);
  	vfree(env->insn_aux_data);
  err_free_env:
  	kfree(env);
diff --combined lib/Kconfig.debug
index 7117ac61174e,d5a4a4036d2f..8ed7d276fe7d
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@@ -219,14 -219,6 +219,14 @@@ config DEBUG_INFO_DWARF
  	  But it significantly improves the success of resolving
  	  variables in gdb on optimized code.
  
 +config DEBUG_INFO_BTF
 +	bool "Generate BTF typeinfo"
 +	depends on DEBUG_INFO
 +	help
 +	  Generate deduplicated BTF type information from DWARF debug info.
 +	  Turning this on expects presence of pahole tool, which will convert
 +	  DWARF type info into equivalent deduplicated BTF type info.
 +
  config GDB_SCRIPTS
  	bool "Provide GDB scripts for kernel debugging"
  	depends on DEBUG_INFO
@@@ -1937,6 -1929,7 +1937,7 @@@ config TEST_KMO
  	depends on m
  	depends on BLOCK && (64BIT || LBDAF)	  # for XFS, BTRFS
  	depends on NETDEVICES && NET_CORE && INET # for TUN
+ 	depends on BLOCK
  	select TEST_LKM
  	select XFS_FS
  	select TUN
diff --combined net/appletalk/ddp.c
index e2511027d19b,dbe8b1993be9..a2555023c654
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@@ -1806,6 -1806,12 +1806,6 @@@ static int atalk_ioctl(struct socket *s
  		rc = put_user(amount, (int __user *)argp);
  		break;
  	}
 -	case SIOCGSTAMP:
 -		rc = sock_get_timestamp(sk, argp);
 -		break;
 -	case SIOCGSTAMPNS:
 -		rc = sock_get_timestampns(sk, argp);
 -		break;
  	/* Routing */
  	case SIOCADDRT:
  	case SIOCDELRT:
@@@ -1865,7 -1871,6 +1865,7 @@@ static const struct proto_ops atalk_dgr
  	.getname	= atalk_getname,
  	.poll		= datagram_poll,
  	.ioctl		= atalk_ioctl,
 +	.gettstamp	= sock_gettstamp,
  #ifdef CONFIG_COMPAT
  	.compat_ioctl	= atalk_compat_ioctl,
  #endif
@@@ -1915,6 -1920,7 +1915,7 @@@ static int __init atalk_init(void
  	ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
  	if (!ddp_dl) {
  		pr_crit("Unable to register DDP with SNAP.\n");
+ 		rc = -ENOMEM;
  		goto out_sock;
  	}
  
diff --combined net/ipv4/esp4_offload.c
index b61a8ff558f9,d3170a8001b2..8edcfa66d1e5
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@@ -52,13 -52,13 +52,13 @@@ static struct sk_buff *esp4_gro_receive
  			goto out;
  
  		if (sp->len == XFRM_MAX_DEPTH)
- 			goto out;
+ 			goto out_reset;
  
  		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
  				      (xfrm_address_t *)&ip_hdr(skb)->daddr,
  				      spi, IPPROTO_ESP, AF_INET);
  		if (!x)
- 			goto out;
+ 			goto out_reset;
  
  		sp->xvec[sp->len++] = x;
  		sp->olen++;
@@@ -66,7 -66,7 +66,7 @@@
  		xo = xfrm_offload(skb);
  		if (!xo) {
  			xfrm_state_put(x);
- 			goto out;
+ 			goto out_reset;
  		}
  	}
  
@@@ -82,6 -82,8 +82,8 @@@
  	xfrm_input(skb, IPPROTO_ESP, spi, -2);
  
  	return ERR_PTR(-EINPROGRESS);
+ out_reset:
+ 	secpath_reset(skb);
  out:
  	skb_push(skb, offset);
  	NAPI_GRO_CB(skb)->same_flow = 0;
@@@ -107,44 -109,6 +109,44 @@@ static void esp4_gso_encap(struct xfrm_
  	xo->proto = proto;
  }
  
 +static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x,
 +						struct sk_buff *skb,
 +						netdev_features_t features)
 +{
 +	__skb_push(skb, skb->mac_len);
 +	return skb_mac_gso_segment(skb, features);
 +}
 +
 +static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x,
 +						   struct sk_buff *skb,
 +						   netdev_features_t features)
 +{
 +	const struct net_offload *ops;
 +	struct sk_buff *segs = ERR_PTR(-EINVAL);
 +	struct xfrm_offload *xo = xfrm_offload(skb);
 +
 +	skb->transport_header += x->props.header_len;
 +	ops = rcu_dereference(inet_offloads[xo->proto]);
 +	if (likely(ops && ops->callbacks.gso_segment))
 +		segs = ops->callbacks.gso_segment(skb, features);
 +
 +	return segs;
 +}
 +
 +static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x,
 +						    struct sk_buff *skb,
 +						    netdev_features_t features)
 +{
 +	switch (x->outer_mode.encap) {
 +	case XFRM_MODE_TUNNEL:
 +		return xfrm4_tunnel_gso_segment(x, skb, features);
 +	case XFRM_MODE_TRANSPORT:
 +		return xfrm4_transport_gso_segment(x, skb, features);
 +	}
 +
 +	return ERR_PTR(-EOPNOTSUPP);
 +}
 +
  static struct sk_buff *esp4_gso_segment(struct sk_buff *skb,
  				        netdev_features_t features)
  {
@@@ -176,16 -140,14 +178,16 @@@
  
  	skb->encap_hdr_csum = 1;
  
 -	if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev)
 +	if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) &&
 +	     !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev)
  		esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
 -	else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
 +	else if (!(features & NETIF_F_HW_ESP_TX_CSUM) &&
 +		 !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM))
  		esp_features = features & ~NETIF_F_CSUM_MASK;
  
  	xo->flags |= XFRM_GSO_SEGMENT;
  
 -	return x->outer_mode->gso_segment(x, skb, esp_features);
 +	return xfrm4_outer_mode_gso_segment(x, skb, esp_features);
  }
  
  static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@@ -221,9 -183,7 +223,9 @@@ static int esp_xmit(struct xfrm_state *
  	if (!xo)
  		return -EINVAL;
  
 -	if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) {
 +	if ((!(features & NETIF_F_HW_ESP) &&
 +	     !(skb->dev->gso_partial_features & NETIF_F_HW_ESP)) ||
 +	    x->xso.dev != skb->dev) {
  		xo->flags |= CRYPTO_FALLBACK;
  		hw_offload = false;
  	}
diff --combined net/ipv4/ip_output.c
index 4e42c1974ba2,e8bb2e85c5a4..ac880beda8a7
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@@ -188,7 -188,7 +188,7 @@@ static int ip_finish_output2(struct ne
  	struct net_device *dev = dst->dev;
  	unsigned int hh_len = LL_RESERVED_SPACE(dev);
  	struct neighbour *neigh;
 -	u32 nexthop;
 +	bool is_v6gw = false;
  
  	if (rt->rt_type == RTN_MULTICAST) {
  		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
@@@ -218,13 -218,16 +218,13 @@@
  	}
  
  	rcu_read_lock_bh();
 -	nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
 -	neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
 -	if (unlikely(!neigh))
 -		neigh = __neigh_create(&arp_tbl, &nexthop, dev, false);
 +	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
  	if (!IS_ERR(neigh)) {
  		int res;
  
  		sock_confirm_neigh(skb, neigh);
 -		res = neigh_output(neigh, skb);
 -
 +		/* if crossing protocols, can not use the cached header */
 +		res = neigh_output(neigh, skb, is_v6gw);
  		rcu_read_unlock_bh();
  		return res;
  	}
@@@ -469,7 -472,7 +469,7 @@@ int __ip_queue_xmit(struct sock *sk, st
  	skb_dst_set_noref(skb, &rt->dst);
  
  packet_routed:
 -	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
 +	if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family)
  		goto no_route;
  
  	/* OK, we know where to send it, allocate and build IP header. */
@@@ -516,6 -519,7 +516,7 @@@ static void ip_copy_metadata(struct sk_
  	to->pkt_type = from->pkt_type;
  	to->priority = from->priority;
  	to->protocol = from->protocol;
+ 	to->skb_iif = from->skb_iif;
  	skb_dst_drop(to);
  	skb_dst_copy(to, from);
  	to->dev = from->dev;
@@@ -690,8 -694,11 +691,8 @@@ int ip_do_fragment(struct net *net, str
  			return 0;
  		}
  
 -		while (frag) {
 -			skb = frag->next;
 -			kfree_skb(frag);
 -			frag = skb;
 -		}
 +		kfree_skb_list(frag);
 +
  		IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
  		return err;
  
diff --combined net/ipv4/ip_vti.c
index cc5d9c0a8a10,35d8346742e2..254a42e83ff9
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@@ -50,7 -50,7 +50,7 @@@ static unsigned int vti_net_id __read_m
  static int vti_tunnel_init(struct net_device *dev);
  
  static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
 -		     int encap_type)
 +		     int encap_type, bool update_skb_dev)
  {
  	struct ip_tunnel *tunnel;
  	const struct iphdr *iph = ip_hdr(skb);
@@@ -65,9 -65,6 +65,9 @@@
  
  		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
  
 +		if (update_skb_dev)
 +			skb->dev = tunnel->dev;
 +
  		return xfrm_input(skb, nexthdr, spi, encap_type);
  	}
  
@@@ -77,28 -74,47 +77,28 @@@ drop
  	return 0;
  }
  
 -static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi,
 -		     int encap_type)
 +static int vti_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi,
 +			   int encap_type)
  {
 -	struct ip_tunnel *tunnel;
 -	const struct iphdr *iph = ip_hdr(skb);
 -	struct net *net = dev_net(skb->dev);
 -	struct ip_tunnel_net *itn = net_generic(net, vti_net_id);
 -
 -	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
 -				  iph->saddr, iph->daddr, 0);
 -	if (tunnel) {
 -		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
 -			goto drop;
 -
 -		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
 -
 -		skb->dev = tunnel->dev;
 -
 -		return xfrm_input(skb, nexthdr, spi, encap_type);
 -	}
 -
 -	return -EINVAL;
 -drop:
 -	kfree_skb(skb);
 -	return 0;
 +	return vti_input(skb, nexthdr, spi, encap_type, false);
  }
  
 -static int vti_rcv(struct sk_buff *skb)
 +static int vti_rcv(struct sk_buff *skb, __be32 spi, bool update_skb_dev)
  {
  	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
  	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
  
 -	return vti_input(skb, ip_hdr(skb)->protocol, 0, 0);
 +	return vti_input(skb, ip_hdr(skb)->protocol, spi, 0, update_skb_dev);
  }
  
 -static int vti_rcv_ipip(struct sk_buff *skb)
 +static int vti_rcv_proto(struct sk_buff *skb)
  {
 -	XFRM_SPI_SKB_CB(skb)->family = AF_INET;
 -	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
 +	return vti_rcv(skb, 0, false);
 +}
  
 -	return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0);
 +static int vti_rcv_tunnel(struct sk_buff *skb)
 +{
 +	return vti_rcv(skb, ip_hdr(skb)->saddr, true);
  }
  
  static int vti_rcv_cb(struct sk_buff *skb, int err)
@@@ -107,7 -123,7 +107,7 @@@
  	struct net_device *dev;
  	struct pcpu_sw_netstats *tstats;
  	struct xfrm_state *x;
 -	struct xfrm_mode *inner_mode;
 +	const struct xfrm_mode *inner_mode;
  	struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
  	u32 orig_mark = skb->mark;
  	int ret;
@@@ -126,7 -142,7 +126,7 @@@
  
  	x = xfrm_input_state(skb);
  
 -	inner_mode = x->inner_mode;
 +	inner_mode = &x->inner_mode;
  
  	if (x->sel.family == AF_UNSPEC) {
  		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@@ -137,7 -153,7 +137,7 @@@
  		}
  	}
  
 -	family = inner_mode->afinfo->family;
 +	family = inner_mode->family;
  
  	skb->mark = be32_to_cpu(tunnel->parms.i_key);
  	ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
@@@ -431,31 -447,31 +431,31 @@@ static void __net_init vti_fb_tunnel_in
  }
  
  static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {
 -	.handler	=	vti_rcv,
 -	.input_handler	=	vti_input,
 +	.handler	=	vti_rcv_proto,
 +	.input_handler	=	vti_input_proto,
  	.cb_handler	=	vti_rcv_cb,
  	.err_handler	=	vti4_err,
  	.priority	=	100,
  };
  
  static struct xfrm4_protocol vti_ah4_protocol __read_mostly = {
 -	.handler	=	vti_rcv,
 -	.input_handler	=	vti_input,
 +	.handler	=	vti_rcv_proto,
 +	.input_handler	=	vti_input_proto,
  	.cb_handler	=	vti_rcv_cb,
  	.err_handler	=	vti4_err,
  	.priority	=	100,
  };
  
  static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = {
 -	.handler	=	vti_rcv,
 -	.input_handler	=	vti_input,
 +	.handler	=	vti_rcv_proto,
 +	.input_handler	=	vti_input_proto,
  	.cb_handler	=	vti_rcv_cb,
  	.err_handler	=	vti4_err,
  	.priority	=	100,
  };
  
  static struct xfrm_tunnel ipip_handler __read_mostly = {
 -	.handler	=	vti_rcv_ipip,
 +	.handler	=	vti_rcv_tunnel,
  	.err_handler	=	vti4_err,
  	.priority	=	0,
  };
@@@ -630,10 -646,8 +630,8 @@@ static int __init vti_init(void
  
  	msg = "ipip tunnel";
  	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
- 	if (err < 0) {
- 		pr_info("%s: cant't register tunnel\n",__func__);
+ 	if (err < 0)
  		goto xfrm_tunnel_failed;
- 	}
  
  	msg = "netlink interface";
  	err = rtnl_link_register(&vti_link_ops);
@@@ -643,9 -657,9 +641,9 @@@
  	return err;
  
  rtnl_link_failed:
- 	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
- xfrm_tunnel_failed:
  	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
+ xfrm_tunnel_failed:
+ 	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
  xfrm_proto_comp_failed:
  	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
  xfrm_proto_ah_failed:
@@@ -660,6 -674,7 +658,7 @@@ pernet_dev_failed
  static void __exit vti_fini(void)
  {
  	rtnl_link_unregister(&vti_link_ops);
+ 	xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
  	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP);
  	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH);
  	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);
diff --combined net/ipv4/tcp_ipv4.c
index faa6fa619f59,a2896944aa37..af81e4a6a8d8
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@@ -1673,7 -1673,9 +1673,9 @@@ bool tcp_add_backlog(struct sock *sk, s
  	if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq ||
  	    TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield ||
  	    ((TCP_SKB_CB(tail)->tcp_flags |
- 	      TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) ||
+ 	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) ||
+ 	    !((TCP_SKB_CB(tail)->tcp_flags &
+ 	      TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) ||
  	    ((TCP_SKB_CB(tail)->tcp_flags ^
  	      TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) ||
  #ifdef CONFIG_TLS_DEVICE
@@@ -1692,6 -1694,15 +1694,15 @@@
  		if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq))
  			TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
  
+ 		/* We have to update both TCP_SKB_CB(tail)->tcp_flags and
+ 		 * thtail->fin, so that the fast path in tcp_rcv_established()
+ 		 * is not entered if we append a packet with a FIN.
+ 		 * SYN, RST, URG are not present.
+ 		 * ACK is set on both packets.
+ 		 * PSH : we do not really care in TCP stack,
+ 		 *       at least for 'GRO' packets.
+ 		 */
+ 		thtail->fin |= th->fin;
  		TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
  
  		if (TCP_SKB_CB(skb)->has_rxtstamp) {
@@@ -1774,7 -1785,6 +1785,7 @@@ static void tcp_v4_fill_cb(struct sk_bu
  int tcp_v4_rcv(struct sk_buff *skb)
  {
  	struct net *net = dev_net(skb->dev);
 +	struct sk_buff *skb_to_free;
  	int sdif = inet_sdif(skb);
  	const struct iphdr *iph;
  	const struct tcphdr *th;
@@@ -1906,17 -1916,11 +1917,17 @@@ process
  	tcp_segs_in(tcp_sk(sk), skb);
  	ret = 0;
  	if (!sock_owned_by_user(sk)) {
 +		skb_to_free = sk->sk_rx_skb_cache;
 +		sk->sk_rx_skb_cache = NULL;
  		ret = tcp_v4_do_rcv(sk, skb);
 -	} else if (tcp_add_backlog(sk, skb)) {
 -		goto discard_and_relse;
 +	} else {
 +		if (tcp_add_backlog(sk, skb))
 +			goto discard_and_relse;
 +		skb_to_free = NULL;
  	}
  	bh_unlock_sock(sk);
 +	if (skb_to_free)
 +		__kfree_skb(skb_to_free);
  
  put_and_return:
  	if (refcounted)
diff --combined net/ipv6/esp6_offload.c
index bff83279d76f,cb99f6fb79b7..d453cf417b03
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@@ -74,13 -74,13 +74,13 @@@ static struct sk_buff *esp6_gro_receive
  			goto out;
  
  		if (sp->len == XFRM_MAX_DEPTH)
- 			goto out;
+ 			goto out_reset;
  
  		x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
  				      (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
  				      spi, IPPROTO_ESP, AF_INET6);
  		if (!x)
- 			goto out;
+ 			goto out_reset;
  
  		sp->xvec[sp->len++] = x;
  		sp->olen++;
@@@ -88,7 -88,7 +88,7 @@@
  		xo = xfrm_offload(skb);
  		if (!xo) {
  			xfrm_state_put(x);
- 			goto out;
+ 			goto out_reset;
  		}
  	}
  
@@@ -109,6 -109,8 +109,8 @@@
  	xfrm_input(skb, IPPROTO_ESP, spi, -2);
  
  	return ERR_PTR(-EINPROGRESS);
+ out_reset:
+ 	secpath_reset(skb);
  out:
  	skb_push(skb, offset);
  	NAPI_GRO_CB(skb)->same_flow = 0;
@@@ -134,44 -136,6 +136,44 @@@ static void esp6_gso_encap(struct xfrm_
  	xo->proto = proto;
  }
  
 +static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x,
 +						struct sk_buff *skb,
 +						netdev_features_t features)
 +{
 +	__skb_push(skb, skb->mac_len);
 +	return skb_mac_gso_segment(skb, features);
 +}
 +
 +static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x,
 +						   struct sk_buff *skb,
 +						   netdev_features_t features)
 +{
 +	const struct net_offload *ops;
 +	struct sk_buff *segs = ERR_PTR(-EINVAL);
 +	struct xfrm_offload *xo = xfrm_offload(skb);
 +
 +	skb->transport_header += x->props.header_len;
 +	ops = rcu_dereference(inet6_offloads[xo->proto]);
 +	if (likely(ops && ops->callbacks.gso_segment))
 +		segs = ops->callbacks.gso_segment(skb, features);
 +
 +	return segs;
 +}
 +
 +static struct sk_buff *xfrm6_outer_mode_gso_segment(struct xfrm_state *x,
 +						    struct sk_buff *skb,
 +						    netdev_features_t features)
 +{
 +	switch (x->outer_mode.encap) {
 +	case XFRM_MODE_TUNNEL:
 +		return xfrm6_tunnel_gso_segment(x, skb, features);
 +	case XFRM_MODE_TRANSPORT:
 +		return xfrm6_transport_gso_segment(x, skb, features);
 +	}
 +
 +	return ERR_PTR(-EOPNOTSUPP);
 +}
 +
  static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
  				        netdev_features_t features)
  {
@@@ -210,7 -174,7 +212,7 @@@
  
  	xo->flags |= XFRM_GSO_SEGMENT;
  
 -	return x->outer_mode->gso_segment(x, skb, esp_features);
 +	return xfrm6_outer_mode_gso_segment(x, skb, esp_features);
  }
  
  static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
diff --combined net/ipv6/ip6_fib.c
index a8919c217cc2,91247a6fc67f..08e0390e001c
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@@ -162,7 -162,7 +162,7 @@@ struct fib6_info *fib6_info_alloc(gfp_
  	}
  
  	INIT_LIST_HEAD(&f6i->fib6_siblings);
 -	atomic_inc(&f6i->fib6_ref);
 +	refcount_set(&f6i->fib6_ref, 1);
  
  	return f6i;
  }
@@@ -175,7 -175,10 +175,7 @@@ void fib6_info_destroy_rcu(struct rcu_h
  	WARN_ON(f6i->fib6_node);
  
  	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1);
 -	if (bucket) {
 -		f6i->rt6i_exception_bucket = NULL;
 -		kfree(bucket);
 -	}
 +	kfree(bucket);
  
  	if (f6i->rt6i_pcpu) {
  		int cpu;
@@@ -196,7 -199,10 +196,7 @@@
  		free_percpu(f6i->rt6i_pcpu);
  	}
  
 -	lwtstate_put(f6i->fib6_nh.nh_lwtstate);
 -
 -	if (f6i->fib6_nh.nh_dev)
 -		dev_put(f6i->fib6_nh.nh_dev);
 +	fib6_nh_release(&f6i->fib6_nh);
  
  	ip_fib_metrics_put(f6i->fib6_metrics);
  
@@@ -351,11 -357,10 +351,11 @@@ struct dst_entry *fib6_rule_lookup(stru
  }
  
  /* called with rcu lock held; no reference taken on fib6_info */
 -struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
 -			      int flags)
 +int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6,
 +		struct fib6_result *res, int flags)
  {
 -	return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags);
 +	return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6,
 +				 res, flags);
  }
  
  static void __net_init fib6_tables_init(struct net *net)
@@@ -846,8 -851,8 +846,8 @@@ insert_above
  
  		RCU_INIT_POINTER(in->parent, pn);
  		in->leaf = fn->leaf;
 -		atomic_inc(&rcu_dereference_protected(in->leaf,
 -				lockdep_is_held(&table->tb6_lock))->fib6_ref);
 +		fib6_info_hold(rcu_dereference_protected(in->leaf,
 +				lockdep_is_held(&table->tb6_lock)));
  
  		/* update parent pointer */
  		if (dir)
@@@ -916,9 -921,7 +916,7 @@@ static void fib6_drop_pcpu_from(struct 
  		if (pcpu_rt) {
  			struct fib6_info *from;
  
- 			from = rcu_dereference_protected(pcpu_rt->from,
- 					     lockdep_is_held(&table->tb6_lock));
- 			rcu_assign_pointer(pcpu_rt->from, NULL);
+ 			from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL);
  			fib6_info_release(from);
  		}
  	}
@@@ -929,7 -932,7 +927,7 @@@ static void fib6_purge_rt(struct fib6_i
  {
  	struct fib6_table *table = rt->fib6_table;
  
 -	if (atomic_read(&rt->fib6_ref) != 1) {
 +	if (refcount_read(&rt->fib6_ref) != 1) {
  		/* This route is used as dummy address holder in some split
  		 * nodes. It is not leaked, but it still holds other resources,
  		 * which must be released in time. So, scan ascendant nodes
@@@ -942,7 -945,7 +940,7 @@@
  			struct fib6_info *new_leaf;
  			if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) {
  				new_leaf = fib6_find_prefix(net, table, fn);
 -				atomic_inc(&new_leaf->fib6_ref);
 +				fib6_info_hold(new_leaf);
  
  				rcu_assign_pointer(fn->leaf, new_leaf);
  				fib6_info_release(rt);
@@@ -1108,7 -1111,7 +1106,7 @@@ add
  			return err;
  
  		rcu_assign_pointer(rt->fib6_next, iter);
 -		atomic_inc(&rt->fib6_ref);
 +		fib6_info_hold(rt);
  		rcu_assign_pointer(rt->fib6_node, fn);
  		rcu_assign_pointer(*ins, rt);
  		if (!info->skip_notify)
@@@ -1136,7 -1139,7 +1134,7 @@@
  		if (err)
  			return err;
  
 -		atomic_inc(&rt->fib6_ref);
 +		fib6_info_hold(rt);
  		rcu_assign_pointer(rt->fib6_node, fn);
  		rt->fib6_next = iter->fib6_next;
  		rcu_assign_pointer(*ins, rt);
@@@ -1278,7 -1281,7 +1276,7 @@@ int fib6_add(struct fib6_node *root, st
  			if (!sfn)
  				goto failure;
  
 -			atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref);
 +			fib6_info_hold(info->nl_net->ipv6.fib6_null_entry);
  			rcu_assign_pointer(sfn->leaf,
  					   info->nl_net->ipv6.fib6_null_entry);
  			sfn->fn_flags = RTN_ROOT;
@@@ -1321,7 -1324,7 +1319,7 @@@
  				rcu_assign_pointer(fn->leaf,
  					    info->nl_net->ipv6.fib6_null_entry);
  			} else {
 -				atomic_inc(&rt->fib6_ref);
 +				fib6_info_hold(rt);
  				rcu_assign_pointer(fn->leaf, rt);
  			}
  		}
@@@ -2292,7 -2295,6 +2290,7 @@@ static int ipv6_route_seq_show(struct s
  {
  	struct fib6_info *rt = v;
  	struct ipv6_route_iter *iter = seq->private;
 +	unsigned int flags = rt->fib6_flags;
  	const struct net_device *dev;
  
  	seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
@@@ -2302,17 -2304,15 +2300,17 @@@
  #else
  	seq_puts(seq, "00000000000000000000000000000000 00 ");
  #endif
 -	if (rt->fib6_flags & RTF_GATEWAY)
 -		seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw);
 -	else
 +	if (rt->fib6_nh.fib_nh_gw_family) {
 +		flags |= RTF_GATEWAY;
 +		seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6);
 +	} else {
  		seq_puts(seq, "00000000000000000000000000000000");
 +	}
  
 -	dev = rt->fib6_nh.nh_dev;
 +	dev = rt->fib6_nh.fib_nh_dev;
  	seq_printf(seq, " %08x %08x %08x %08x %8s\n",
 -		   rt->fib6_metric, atomic_read(&rt->fib6_ref), 0,
 -		   rt->fib6_flags, dev ? dev->name : "");
 +		   rt->fib6_metric, refcount_read(&rt->fib6_ref), 0,
 +		   flags, dev ? dev->name : "");
  	iter->w.leaf = NULL;
  	return 0;
  }
diff --combined net/ipv6/route.c
index b18e85cd7587,0520aca3354b..23a20d62daac
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@@ -59,7 -59,7 +59,7 @@@
  #include <net/xfrm.h>
  #include <net/netevent.h>
  #include <net/netlink.h>
 -#include <net/nexthop.h>
 +#include <net/rtnh.h>
  #include <net/lwtunnel.h>
  #include <net/ip_tunnels.h>
  #include <net/l3mdev.h>
@@@ -102,15 -102,14 +102,15 @@@ static void		ip6_rt_update_pmtu(struct 
  					   struct sk_buff *skb, u32 mtu);
  static void		rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
  					struct sk_buff *skb);
 -static int rt6_score_route(struct fib6_info *rt, int oif, int strict);
 +static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 +			   int strict);
  static size_t rt6_nlmsg_size(struct fib6_info *rt);
  static int rt6_fill_node(struct net *net, struct sk_buff *skb,
  			 struct fib6_info *rt, struct dst_entry *dst,
  			 struct in6_addr *dest, struct in6_addr *src,
  			 int iif, int type, u32 portid, u32 seq,
  			 unsigned int flags);
 -static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 +static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
  					   struct in6_addr *daddr,
  					   struct in6_addr *saddr);
  
@@@ -296,7 -295,7 +296,7 @@@ static const struct fib6_info fib6_null
  	.fib6_flags	= (RTF_REJECT | RTF_NONEXTHOP),
  	.fib6_protocol  = RTPROT_KERNEL,
  	.fib6_metric	= ~(u32)0,
 -	.fib6_ref	= ATOMIC_INIT(1),
 +	.fib6_ref	= REFCOUNT_INIT(1),
  	.fib6_type	= RTN_UNREACHABLE,
  	.fib6_metrics	= (struct dst_metrics *)&dst_default_metrics,
  };
@@@ -380,11 -379,8 +380,8 @@@ static void ip6_dst_destroy(struct dst_
  		in6_dev_put(idev);
  	}
  
- 	rcu_read_lock();
- 	from = rcu_dereference(rt->from);
- 	rcu_assign_pointer(rt->from, NULL);
+ 	from = xchg((__force struct fib6_info **)&rt->from, NULL);
  	fib6_info_release(from);
- 	rcu_read_unlock();
  }
  
  static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@@ -428,15 -424,13 +425,15 @@@ static bool rt6_check_expired(const str
  	return false;
  }
  
 -struct fib6_info *fib6_multipath_select(const struct net *net,
 -					struct fib6_info *match,
 -					struct flowi6 *fl6, int oif,
 -					const struct sk_buff *skb,
 -					int strict)
 +void fib6_select_path(const struct net *net, struct fib6_result *res,
 +		      struct flowi6 *fl6, int oif, bool have_oif_match,
 +		      const struct sk_buff *skb, int strict)
  {
  	struct fib6_info *sibling, *next_sibling;
 +	struct fib6_info *match = res->f6i;
 +
 +	if (!match->fib6_nsiblings || have_oif_match)
 +		goto out;
  
  	/* We might have already computed the hash for ICMPv6 errors. In such
  	 * case it will always be non-zero. Otherwise now is the time to do it.
@@@ -444,89 -438,61 +441,89 @@@
  	if (!fl6->mp_hash)
  		fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
  
 -	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound))
 -		return match;
 +	if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound))
 +		goto out;
  
  	list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
  				 fib6_siblings) {
 +		const struct fib6_nh *nh = &sibling->fib6_nh;
  		int nh_upper_bound;
  
 -		nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound);
 +		nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
  		if (fl6->mp_hash > nh_upper_bound)
  			continue;
 -		if (rt6_score_route(sibling, oif, strict) < 0)
 +		if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
  			break;
  		match = sibling;
  		break;
  	}
  
 -	return match;
 +out:
 +	res->f6i = match;
 +	res->nh = &match->fib6_nh;
  }
  
  /*
   *	Route lookup. rcu_read_lock() should be held.
   */
  
 -static inline struct fib6_info *rt6_device_match(struct net *net,
 -						 struct fib6_info *rt,
 -						    const struct in6_addr *saddr,
 -						    int oif,
 -						    int flags)
 +static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
 +			       const struct in6_addr *saddr, int oif, int flags)
  {
 -	struct fib6_info *sprt;
 +	const struct net_device *dev;
  
 -	if (!oif && ipv6_addr_any(saddr) &&
 -	    !(rt->fib6_nh.nh_flags & RTNH_F_DEAD))
 -		return rt;
 +	if (nh->fib_nh_flags & RTNH_F_DEAD)
 +		return false;
  
 -	for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
 -		const struct net_device *dev = sprt->fib6_nh.nh_dev;
 +	dev = nh->fib_nh_dev;
 +	if (oif) {
 +		if (dev->ifindex == oif)
 +			return true;
 +	} else {
 +		if (ipv6_chk_addr(net, saddr, dev,
 +				  flags & RT6_LOOKUP_F_IFACE))
 +			return true;
 +	}
  
 -		if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD)
 -			continue;
 +	return false;
 +}
  
 -		if (oif) {
 -			if (dev->ifindex == oif)
 -				return sprt;
 -		} else {
 -			if (ipv6_chk_addr(net, saddr, dev,
 -					  flags & RT6_LOOKUP_F_IFACE))
 -				return sprt;
 +static void rt6_device_match(struct net *net, struct fib6_result *res,
 +			     const struct in6_addr *saddr, int oif, int flags)
 +{
 +	struct fib6_info *f6i = res->f6i;
 +	struct fib6_info *spf6i;
 +	struct fib6_nh *nh;
 +
 +	if (!oif && ipv6_addr_any(saddr)) {
 +		nh = &f6i->fib6_nh;
 +		if (!(nh->fib_nh_flags & RTNH_F_DEAD))
 +			goto out;
 +	}
 +
 +	for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) {
 +		nh = &spf6i->fib6_nh;
 +		if (__rt6_device_match(net, nh, saddr, oif, flags)) {
 +			res->f6i = spf6i;
 +			goto out;
  		}
  	}
  
 -	if (oif && flags & RT6_LOOKUP_F_IFACE)
 -		return net->ipv6.fib6_null_entry;
 +	if (oif && flags & RT6_LOOKUP_F_IFACE) {
 +		res->f6i = net->ipv6.fib6_null_entry;
 +		nh = &res->f6i->fib6_nh;
 +		goto out;
 +	}
  
 -	return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt;
 +	nh = &f6i->fib6_nh;
 +	if (nh->fib_nh_flags & RTNH_F_DEAD) {
 +		res->f6i = net->ipv6.fib6_null_entry;
 +		nh = &res->f6i->fib6_nh;
 +	}
 +out:
 +	res->nh = nh;
 +	res->fib6_type = res->f6i->fib6_type;
 +	res->fib6_flags = res->f6i->fib6_flags;
  }
  
  #ifdef CONFIG_IPV6_ROUTER_PREF
@@@ -548,7 -514,7 +545,7 @@@ static void rt6_probe_deferred(struct w
  	kfree(work);
  }
  
 -static void rt6_probe(struct fib6_info *rt)
 +static void rt6_probe(struct fib6_nh *fib6_nh)
  {
  	struct __rt6_probe_work *work = NULL;
  	const struct in6_addr *nh_gw;
@@@ -564,11 -530,11 +561,11 @@@
  	 * Router Reachability Probe MUST be rate-limited
  	 * to no more than one per minute.
  	 */
 -	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
 +	if (fib6_nh->fib_nh_gw_family)
  		return;
  
 -	nh_gw = &rt->fib6_nh.nh_gw;
 -	dev = rt->fib6_nh.nh_dev;
 +	nh_gw = &fib6_nh->fib_nh_gw6;
 +	dev = fib6_nh->fib_nh_dev;
  	rcu_read_lock_bh();
  	idev = __in6_dev_get(dev);
  	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
@@@ -585,13 -551,13 +582,13 @@@
  				__neigh_set_probe_once(neigh);
  		}
  		write_unlock(&neigh->lock);
 -	} else if (time_after(jiffies, rt->last_probe +
 +	} else if (time_after(jiffies, fib6_nh->last_probe +
  				       idev->cnf.rtr_probe_interval)) {
  		work = kmalloc(sizeof(*work), GFP_ATOMIC);
  	}
  
  	if (work) {
 -		rt->last_probe = jiffies;
 +		fib6_nh->last_probe = jiffies;
  		INIT_WORK(&work->work, rt6_probe_deferred);
  		work->target = *nh_gw;
  		dev_hold(dev);
@@@ -603,7 -569,7 +600,7 @@@ out
  	rcu_read_unlock_bh();
  }
  #else
 -static inline void rt6_probe(struct fib6_info *rt)
 +static inline void rt6_probe(struct fib6_nh *fib6_nh)
  {
  }
  #endif
@@@ -611,14 -577,27 +608,14 @@@
  /*
   * Default Router Selection (RFC 2461 6.3.6)
   */
 -static inline int rt6_check_dev(struct fib6_info *rt, int oif)
 -{
 -	const struct net_device *dev = rt->fib6_nh.nh_dev;
 -
 -	if (!oif || dev->ifindex == oif)
 -		return 2;
 -	return 0;
 -}
 -
 -static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
 +static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
  {
  	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
  	struct neighbour *neigh;
  
 -	if (rt->fib6_flags & RTF_NONEXTHOP ||
 -	    !(rt->fib6_flags & RTF_GATEWAY))
 -		return RT6_NUD_SUCCEED;
 -
  	rcu_read_lock_bh();
 -	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev,
 -					  &rt->fib6_nh.nh_gw);
 +	neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
 +					  &fib6_nh->fib_nh_gw6);
  	if (neigh) {
  		read_lock(&neigh->lock);
  		if (neigh->nud_state & NUD_VALID)
@@@ -639,44 -618,58 +636,44 @@@
  	return ret;
  }
  
 -static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
 +static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
 +			   int strict)
  {
 -	int m;
 +	int m = 0;
 +
 +	if (!oif || nh->fib_nh_dev->ifindex == oif)
 +		m = 2;
  
 -	m = rt6_check_dev(rt, oif);
  	if (!m && (strict & RT6_LOOKUP_F_IFACE))
  		return RT6_NUD_FAIL_HARD;
  #ifdef CONFIG_IPV6_ROUTER_PREF
 -	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2;
 +	m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
  #endif
 -	if (strict & RT6_LOOKUP_F_REACHABLE) {
 -		int n = rt6_check_neigh(rt);
 +	if ((strict & RT6_LOOKUP_F_REACHABLE) &&
 +	    !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
 +		int n = rt6_check_neigh(nh);
  		if (n < 0)
  			return n;
  	}
  	return m;
  }
  
 -/* called with rc_read_lock held */
 -static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
 +static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
 +		       int oif, int strict, int *mpri, bool *do_rr)
  {
 -	const struct net_device *dev = fib6_info_nh_dev(f6i);
 +	bool match_do_rr = false;
  	bool rc = false;
 -
 -	if (dev) {
 -		const struct inet6_dev *idev = __in6_dev_get(dev);
 -
 -		rc = !!idev->cnf.ignore_routes_with_linkdown;
 -	}
 -
 -	return rc;
 -}
 -
 -static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
 -				   int *mpri, struct fib6_info *match,
 -				   bool *do_rr)
 -{
  	int m;
 -	bool match_do_rr = false;
  
 -	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 +	if (nh->fib_nh_flags & RTNH_F_DEAD)
  		goto out;
  
 -	if (fib6_ignore_linkdown(rt) &&
 -	    rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
 +	if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
 +	    nh->fib_nh_flags & RTNH_F_LINKDOWN &&
  	    !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
  		goto out;
  
 -	if (fib6_check_expired(rt))
 -		goto out;
 -
 -	m = rt6_score_route(rt, oif, strict);
 +	m = rt6_score_route(nh, fib6_flags, oif, strict);
  	if (m == RT6_NUD_FAIL_DO_RR) {
  		match_do_rr = true;
  		m = 0; /* lowest valid score */
@@@ -685,82 -678,67 +682,82 @@@
  	}
  
  	if (strict & RT6_LOOKUP_F_REACHABLE)
 -		rt6_probe(rt);
 +		rt6_probe(nh);
  
  	/* note that m can be RT6_NUD_FAIL_PROBE at this point */
  	if (m > *mpri) {
  		*do_rr = match_do_rr;
  		*mpri = m;
 -		match = rt;
 +		rc = true;
  	}
  out:
 -	return match;
 +	return rc;
  }
  
 -static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
 -				     struct fib6_info *leaf,
 -				     struct fib6_info *rr_head,
 -				     u32 metric, int oif, int strict,
 -				     bool *do_rr)
 +static void __find_rr_leaf(struct fib6_info *f6i_start,
 +			   struct fib6_info *nomatch, u32 metric,
 +			   struct fib6_result *res, struct fib6_info **cont,
 +			   int oif, int strict, bool *do_rr, int *mpri)
  {
 -	struct fib6_info *rt, *match, *cont;
 -	int mpri = -1;
 +	struct fib6_info *f6i;
  
 -	match = NULL;
 -	cont = NULL;
 -	for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
 -		if (rt->fib6_metric != metric) {
 -			cont = rt;
 -			break;
 +	for (f6i = f6i_start;
 +	     f6i && f6i != nomatch;
 +	     f6i = rcu_dereference(f6i->fib6_next)) {
 +		struct fib6_nh *nh;
 +
 +		if (cont && f6i->fib6_metric != metric) {
 +			*cont = f6i;
 +			return;
  		}
  
 -		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 -	}
 +		if (fib6_check_expired(f6i))
 +			continue;
  
 -	for (rt = leaf; rt && rt != rr_head;
 -	     rt = rcu_dereference(rt->fib6_next)) {
 -		if (rt->fib6_metric != metric) {
 -			cont = rt;
 -			break;
 +		nh = &f6i->fib6_nh;
 +		if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) {
 +			res->f6i = f6i;
 +			res->nh = nh;
 +			res->fib6_flags = f6i->fib6_flags;
 +			res->fib6_type = f6i->fib6_type;
  		}
 -
 -		match = find_match(rt, oif, strict, &mpri, match, do_rr);
  	}
 +}
  
 -	if (match || !cont)
 -		return match;
 +static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf,
 +			 struct fib6_info *rr_head, int oif, int strict,
 +			 bool *do_rr, struct fib6_result *res)
 +{
 +	u32 metric = rr_head->fib6_metric;
 +	struct fib6_info *cont = NULL;
 +	int mpri = -1;
  
 -	for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next))
 -		match = find_match(rt, oif, strict, &mpri, match, do_rr);
 +	__find_rr_leaf(rr_head, NULL, metric, res, &cont,
 +		       oif, strict, do_rr, &mpri);
  
 -	return match;
 +	__find_rr_leaf(leaf, rr_head, metric, res, &cont,
 +		       oif, strict, do_rr, &mpri);
 +
 +	if (res->f6i || !cont)
 +		return;
 +
 +	__find_rr_leaf(cont, NULL, metric, res, NULL,
 +		       oif, strict, do_rr, &mpri);
  }
  
 -static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn,
 -				   int oif, int strict)
 +static void rt6_select(struct net *net, struct fib6_node *fn, int oif,
 +		       struct fib6_result *res, int strict)
  {
  	struct fib6_info *leaf = rcu_dereference(fn->leaf);
 -	struct fib6_info *match, *rt0;
 +	struct fib6_info *rt0;
  	bool do_rr = false;
  	int key_plen;
  
 +	/* make sure this function or its helpers sets f6i */
 +	res->f6i = NULL;
 +
  	if (!leaf || leaf == net->ipv6.fib6_null_entry)
 -		return net->ipv6.fib6_null_entry;
 +		goto out;
  
  	rt0 = rcu_dereference(fn->rr_ptr);
  	if (!rt0)
@@@ -777,9 -755,11 +774,9 @@@
  		key_plen = rt0->fib6_src.plen;
  #endif
  	if (fn->fn_bit != key_plen)
 -		return net->ipv6.fib6_null_entry;
 -
 -	match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict,
 -			     &do_rr);
 +		goto out;
  
 +	find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res);
  	if (do_rr) {
  		struct fib6_info *next = rcu_dereference(rt0->fib6_next);
  
@@@ -796,19 -776,12 +793,19 @@@
  		}
  	}
  
 -	return match ? match : net->ipv6.fib6_null_entry;
 +out:
 +	if (!res->f6i) {
 +		res->f6i = net->ipv6.fib6_null_entry;
 +		res->nh = &res->f6i->fib6_nh;
 +		res->fib6_flags = res->f6i->fib6_flags;
 +		res->fib6_type = res->f6i->fib6_type;
 +	}
  }
  
 -static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt)
 +static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res)
  {
 -	return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY));
 +	return (res->f6i->fib6_flags & RTF_NONEXTHOP) ||
 +	       res->nh->fib_nh_gw_family;
  }
  
  #ifdef CONFIG_IPV6_ROUTE_INFO
@@@ -892,17 -865,17 +889,17 @@@ int rt6_route_rcv(struct net_device *de
   */
  
  /* called with rcu_lock held */
 -static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
 +static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res)
  {
 -	struct net_device *dev = rt->fib6_nh.nh_dev;
 +	struct net_device *dev = res->nh->fib_nh_dev;
  
 -	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
 +	if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
  		/* for copies of local routes, dst->dev needs to be the
  		 * device if it is a master device, the master device if
  		 * device is enslaved, and the loopback as the default
  		 */
  		if (netif_is_l3_slave(dev) &&
 -		    !rt6_need_strict(&rt->fib6_dst.addr))
 +		    !rt6_need_strict(&res->f6i->fib6_dst.addr))
  			dev = l3mdev_master_dev_rcu(dev);
  		else if (!netif_is_l3_master(dev))
  			dev = dev_net(dev)->loopback_dev;
@@@ -948,11 -921,11 +945,11 @@@ static unsigned short fib6_info_dst_fla
  	return flags;
  }
  
 -static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
 +static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type)
  {
 -	rt->dst.error = ip6_rt_type_to_error(ort->fib6_type);
 +	rt->dst.error = ip6_rt_type_to_error(fib6_type);
  
 -	switch (ort->fib6_type) {
 +	switch (fib6_type) {
  	case RTN_BLACKHOLE:
  		rt->dst.output = dst_discard_out;
  		rt->dst.input = dst_discard;
@@@ -970,28 -943,26 +967,28 @@@
  	}
  }
  
 -static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
 +static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res)
  {
 -	if (ort->fib6_flags & RTF_REJECT) {
 -		ip6_rt_init_dst_reject(rt, ort);
 +	struct fib6_info *f6i = res->f6i;
 +
 +	if (res->fib6_flags & RTF_REJECT) {
 +		ip6_rt_init_dst_reject(rt, res->fib6_type);
  		return;
  	}
  
  	rt->dst.error = 0;
  	rt->dst.output = ip6_output;
  
 -	if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) {
 +	if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) {
  		rt->dst.input = ip6_input;
 -	} else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
 +	} else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) {
  		rt->dst.input = ip6_mc_input;
  	} else {
  		rt->dst.input = ip6_forward;
  	}
  
 -	if (ort->fib6_nh.nh_lwtstate) {
 -		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
 +	if (res->nh->fib_nh_lws) {
 +		rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws);
  		lwtunnel_set_redirect(&rt->dst);
  	}
  
@@@ -1006,25 -977,20 +1003,25 @@@ static void rt6_set_from(struct rt6_inf
  	ip_dst_init_metrics(&rt->dst, from->fib6_metrics);
  }
  
 -/* Caller must already hold reference to @ort */
 -static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 +/* Caller must already hold reference to f6i in result */
 +static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res)
  {
 -	struct net_device *dev = fib6_info_nh_dev(ort);
 +	const struct fib6_nh *nh = res->nh;
 +	const struct net_device *dev = nh->fib_nh_dev;
 +	struct fib6_info *f6i = res->f6i;
  
 -	ip6_rt_init_dst(rt, ort);
 +	ip6_rt_init_dst(rt, res);
  
 -	rt->rt6i_dst = ort->fib6_dst;
 +	rt->rt6i_dst = f6i->fib6_dst;
  	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
 -	rt->rt6i_gateway = ort->fib6_nh.nh_gw;
 -	rt->rt6i_flags = ort->fib6_flags;
 -	rt6_set_from(rt, ort);
 +	rt->rt6i_flags = res->fib6_flags;
 +	if (nh->fib_nh_gw_family) {
 +		rt->rt6i_gateway = nh->fib_nh_gw6;
 +		rt->rt6i_flags |= RTF_GATEWAY;
 +	}
 +	rt6_set_from(rt, f6i);
  #ifdef CONFIG_IPV6_SUBTREES
 -	rt->rt6i_src = ort->fib6_src;
 +	rt->rt6i_src = f6i->fib6_src;
  #endif
  }
  
@@@ -1046,13 -1012,14 +1043,13 @@@ static struct fib6_node* fib6_backtrack
  	}
  }
  
 -static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 -			  bool null_fallback)
 +static bool ip6_hold_safe(struct net *net, struct rt6_info **prt)
  {
  	struct rt6_info *rt = *prt;
  
  	if (dst_hold_safe(&rt->dst))
  		return true;
 -	if (null_fallback) {
 +	if (net) {
  		rt = net->ipv6.ip6_null_entry;
  		dst_hold(&rt->dst);
  	} else {
@@@ -1063,24 -1030,22 +1060,24 @@@
  }
  
  /* called with rcu_lock held */
 -static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
 +static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res)
  {
 -	unsigned short flags = fib6_info_dst_flags(rt);
 -	struct net_device *dev = rt->fib6_nh.nh_dev;
 +	struct net_device *dev = res->nh->fib_nh_dev;
 +	struct fib6_info *f6i = res->f6i;
 +	unsigned short flags;
  	struct rt6_info *nrt;
  
 -	if (!fib6_info_hold_safe(rt))
 +	if (!fib6_info_hold_safe(f6i))
  		goto fallback;
  
 +	flags = fib6_info_dst_flags(f6i);
  	nrt = ip6_dst_alloc(dev_net(dev), dev, flags);
  	if (!nrt) {
 -		fib6_info_release(rt);
 +		fib6_info_release(f6i);
  		goto fallback;
  	}
  
 -	ip6_rt_copy_init(nrt, rt);
 +	ip6_rt_copy_init(nrt, res);
  	return nrt;
  
  fallback:
@@@ -1095,7 -1060,7 +1092,7 @@@ static struct rt6_info *ip6_pol_route_l
  					     const struct sk_buff *skb,
  					     int flags)
  {
 -	struct fib6_info *f6i;
 +	struct fib6_result res = {};
  	struct fib6_node *fn;
  	struct rt6_info *rt;
  
@@@ -1105,38 -1070,37 +1102,38 @@@
  	rcu_read_lock();
  	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
  restart:
 -	f6i = rcu_dereference(fn->leaf);
 -	if (!f6i) {
 -		f6i = net->ipv6.fib6_null_entry;
 -	} else {
 -		f6i = rt6_device_match(net, f6i, &fl6->saddr,
 -				      fl6->flowi6_oif, flags);
 -		if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
 -			f6i = fib6_multipath_select(net, f6i, fl6,
 -						    fl6->flowi6_oif, skb,
 -						    flags);
 -	}
 -	if (f6i == net->ipv6.fib6_null_entry) {
 +	res.f6i = rcu_dereference(fn->leaf);
 +	if (!res.f6i)
 +		res.f6i = net->ipv6.fib6_null_entry;
 +	else
 +		rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif,
 +				 flags);
 +
 +	if (res.f6i == net->ipv6.fib6_null_entry) {
  		fn = fib6_backtrack(fn, &fl6->saddr);
  		if (fn)
  			goto restart;
 +
 +		rt = net->ipv6.ip6_null_entry;
 +		dst_hold(&rt->dst);
 +		goto out;
  	}
  
 -	trace_fib6_table_lookup(net, f6i, table, fl6);
 +	fib6_select_path(net, &res, fl6, fl6->flowi6_oif,
 +			 fl6->flowi6_oif != 0, skb, flags);
  
  	/* Search through exception table */
 -	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
 +	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
  	if (rt) {
 -		if (ip6_hold_safe(net, &rt, true))
 +		if (ip6_hold_safe(net, &rt))
  			dst_use_noref(&rt->dst, jiffies);
 -	} else if (f6i == net->ipv6.fib6_null_entry) {
 -		rt = net->ipv6.ip6_null_entry;
 -		dst_hold(&rt->dst);
  	} else {
 -		rt = ip6_create_rt_rcu(f6i);
 +		rt = ip6_create_rt_rcu(&res);
  	}
  
 +out:
 +	trace_fib6_table_lookup(net, &res, table, fl6);
 +
  	rcu_read_unlock();
  
  	return rt;
@@@ -1202,11 -1166,10 +1199,11 @@@ int ip6_ins_rt(struct net *net, struct 
  	return __ip6_ins_rt(rt, &info, NULL);
  }
  
 -static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort,
 +static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res,
  					   const struct in6_addr *daddr,
  					   const struct in6_addr *saddr)
  {
 +	struct fib6_info *f6i = res->f6i;
  	struct net_device *dev;
  	struct rt6_info *rt;
  
@@@ -1214,25 -1177,25 +1211,25 @@@
  	 *	Clone the route.
  	 */
  
 -	if (!fib6_info_hold_safe(ort))
 +	if (!fib6_info_hold_safe(f6i))
  		return NULL;
  
 -	dev = ip6_rt_get_dev_rcu(ort);
 +	dev = ip6_rt_get_dev_rcu(res);
  	rt = ip6_dst_alloc(dev_net(dev), dev, 0);
  	if (!rt) {
 -		fib6_info_release(ort);
 +		fib6_info_release(f6i);
  		return NULL;
  	}
  
 -	ip6_rt_copy_init(rt, ort);
 +	ip6_rt_copy_init(rt, res);
  	rt->rt6i_flags |= RTF_CACHE;
  	rt->dst.flags |= DST_HOST;
  	rt->rt6i_dst.addr = *daddr;
  	rt->rt6i_dst.plen = 128;
  
 -	if (!rt6_is_gw_or_nonexthop(ort)) {
 -		if (ort->fib6_dst.plen != 128 &&
 -		    ipv6_addr_equal(&ort->fib6_dst.addr, daddr))
 +	if (!rt6_is_gw_or_nonexthop(res)) {
 +		if (f6i->fib6_dst.plen != 128 &&
 +		    ipv6_addr_equal(&f6i->fib6_dst.addr, daddr))
  			rt->rt6i_flags |= RTF_ANYCAST;
  #ifdef CONFIG_IPV6_SUBTREES
  		if (rt->rt6i_src.plen && saddr) {
@@@ -1245,56 -1208,55 +1242,56 @@@
  	return rt;
  }
  
 -static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt)
 +static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
  {
 -	unsigned short flags = fib6_info_dst_flags(rt);
 +	struct fib6_info *f6i = res->f6i;
 +	unsigned short flags = fib6_info_dst_flags(f6i);
  	struct net_device *dev;
  	struct rt6_info *pcpu_rt;
  
 -	if (!fib6_info_hold_safe(rt))
 +	if (!fib6_info_hold_safe(f6i))
  		return NULL;
  
  	rcu_read_lock();
 -	dev = ip6_rt_get_dev_rcu(rt);
 +	dev = ip6_rt_get_dev_rcu(res);
  	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
  	rcu_read_unlock();
  	if (!pcpu_rt) {
 -		fib6_info_release(rt);
 +		fib6_info_release(f6i);
  		return NULL;
  	}
 -	ip6_rt_copy_init(pcpu_rt, rt);
 +	ip6_rt_copy_init(pcpu_rt, res);
  	pcpu_rt->rt6i_flags |= RTF_PCPU;
  	return pcpu_rt;
  }
  
  /* It should be called with rcu_read_lock() acquired */
 -static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt)
 +static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res)
  {
  	struct rt6_info *pcpu_rt, **p;
  
 -	p = this_cpu_ptr(rt->rt6i_pcpu);
 +	p = this_cpu_ptr(res->f6i->rt6i_pcpu);
  	pcpu_rt = *p;
  
  	if (pcpu_rt)
 -		ip6_hold_safe(NULL, &pcpu_rt, false);
 +		ip6_hold_safe(NULL, &pcpu_rt);
  
  	return pcpu_rt;
  }
  
  static struct rt6_info *rt6_make_pcpu_route(struct net *net,
 -					    struct fib6_info *rt)
 +					    const struct fib6_result *res)
  {
  	struct rt6_info *pcpu_rt, *prev, **p;
  
 -	pcpu_rt = ip6_rt_pcpu_alloc(rt);
 +	pcpu_rt = ip6_rt_pcpu_alloc(res);
  	if (!pcpu_rt) {
  		dst_hold(&net->ipv6.ip6_null_entry->dst);
  		return net->ipv6.ip6_null_entry;
  	}
  
  	dst_hold(&pcpu_rt->dst);
 -	p = this_cpu_ptr(rt->rt6i_pcpu);
 +	p = this_cpu_ptr(res->f6i->rt6i_pcpu);
  	prev = cmpxchg(p, NULL, pcpu_rt);
  	BUG_ON(prev);
  
@@@ -1323,9 -1285,7 +1320,7 @@@ static void rt6_remove_exception(struc
  	/* purge completely the exception to allow releasing the held resources:
  	 * some [sk] cache may keep the dst around for unlimited time
  	 */
- 	from = rcu_dereference_protected(rt6_ex->rt6i->from,
- 					 lockdep_is_held(&rt6_exception_lock));
- 	rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
+ 	from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL);
  	fib6_info_release(from);
  	dst_dev_put(&rt6_ex->rt6i->dst);
  
@@@ -1437,15 -1397,14 +1432,15 @@@ __rt6_find_exception_rcu(struct rt6_exc
  	return NULL;
  }
  
 -static unsigned int fib6_mtu(const struct fib6_info *rt)
 +static unsigned int fib6_mtu(const struct fib6_result *res)
  {
 +	const struct fib6_nh *nh = res->nh;
  	unsigned int mtu;
  
 -	if (rt->fib6_pmtu) {
 -		mtu = rt->fib6_pmtu;
 +	if (res->f6i->fib6_pmtu) {
 +		mtu = res->f6i->fib6_pmtu;
  	} else {
 -		struct net_device *dev = fib6_info_nh_dev(rt);
 +		struct net_device *dev = nh->fib_nh_dev;
  		struct inet6_dev *idev;
  
  		rcu_read_lock();
@@@ -1456,27 -1415,26 +1451,27 @@@
  
  	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
  
 -	return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu);
 +	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
  }
  
  static int rt6_insert_exception(struct rt6_info *nrt,
 -				struct fib6_info *ort)
 +				const struct fib6_result *res)
  {
  	struct net *net = dev_net(nrt->dst.dev);
  	struct rt6_exception_bucket *bucket;
  	struct in6_addr *src_key = NULL;
  	struct rt6_exception *rt6_ex;
 +	struct fib6_info *f6i = res->f6i;
  	int err = 0;
  
  	spin_lock_bh(&rt6_exception_lock);
  
 -	if (ort->exception_bucket_flushed) {
 +	if (f6i->exception_bucket_flushed) {
  		err = -EINVAL;
  		goto out;
  	}
  
 -	bucket = rcu_dereference_protected(ort->rt6i_exception_bucket,
 +	bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket,
  					lockdep_is_held(&rt6_exception_lock));
  	if (!bucket) {
  		bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket),
@@@ -1485,24 -1443,24 +1480,24 @@@
  			err = -ENOMEM;
  			goto out;
  		}
 -		rcu_assign_pointer(ort->rt6i_exception_bucket, bucket);
 +		rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket);
  	}
  
  #ifdef CONFIG_IPV6_SUBTREES
 -	/* rt6i_src.plen != 0 indicates ort is in subtree
 +	/* fib6_src.plen != 0 indicates f6i is in subtree
  	 * and exception table is indexed by a hash of
 -	 * both rt6i_dst and rt6i_src.
 +	 * both fib6_dst and fib6_src.
  	 * Otherwise, the exception table is indexed by
 -	 * a hash of only rt6i_dst.
 +	 * a hash of only fib6_dst.
  	 */
 -	if (ort->fib6_src.plen)
 +	if (f6i->fib6_src.plen)
  		src_key = &nrt->rt6i_src.addr;
  #endif
 -	/* rt6_mtu_change() might lower mtu on ort.
 +	/* rt6_mtu_change() might lower mtu on f6i.
  	 * Only insert this exception route if its mtu
 -	 * is less than ort's mtu value.
 +	 * is less than f6i's mtu value.
  	 */
 -	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) {
 +	if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) {
  		err = -EINVAL;
  		goto out;
  	}
@@@ -1531,9 -1489,9 +1526,9 @@@ out
  
  	/* Update fn->fn_sernum to invalidate all cached dst */
  	if (!err) {
 -		spin_lock_bh(&ort->fib6_table->tb6_lock);
 -		fib6_update_sernum(net, ort);
 -		spin_unlock_bh(&ort->fib6_table->tb6_lock);
 +		spin_lock_bh(&f6i->fib6_table->tb6_lock);
 +		fib6_update_sernum(net, f6i);
 +		spin_unlock_bh(&f6i->fib6_table->tb6_lock);
  		fib6_force_start_gc(net);
  	}
  
@@@ -1570,33 -1528,33 +1565,33 @@@ out
  /* Find cached rt in the hash table inside passed in rt
   * Caller has to hold rcu_read_lock()
   */
 -static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt,
 +static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res,
  					   struct in6_addr *daddr,
  					   struct in6_addr *saddr)
  {
  	struct rt6_exception_bucket *bucket;
  	struct in6_addr *src_key = NULL;
  	struct rt6_exception *rt6_ex;
 -	struct rt6_info *res = NULL;
 +	struct rt6_info *ret = NULL;
  
 -	bucket = rcu_dereference(rt->rt6i_exception_bucket);
 +	bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
  
  #ifdef CONFIG_IPV6_SUBTREES
 -	/* rt6i_src.plen != 0 indicates rt is in subtree
 +	/* fib6i_src.plen != 0 indicates f6i is in subtree
  	 * and exception table is indexed by a hash of
 -	 * both rt6i_dst and rt6i_src.
 +	 * both fib6_dst and fib6_src.
  	 * Otherwise, the exception table is indexed by
 -	 * a hash of only rt6i_dst.
 +	 * a hash of only fib6_dst.
  	 */
 -	if (rt->fib6_src.plen)
 +	if (res->f6i->fib6_src.plen)
  		src_key = saddr;
  #endif
  	rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
  
  	if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i))
 -		res = rt6_ex->rt6i;
 +		ret = rt6_ex->rt6i;
  
 -	return res;
 +	return ret;
  }
  
  /* Remove the passed in cached rt from the hash table that contains it */
@@@ -1844,10 -1802,11 +1839,10 @@@ void rt6_age_exceptions(struct fib6_inf
  }
  
  /* must be called with rcu lock held */
 -struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table,
 -				    int oif, struct flowi6 *fl6, int strict)
 +int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif,
 +		      struct flowi6 *fl6, struct fib6_result *res, int strict)
  {
  	struct fib6_node *fn, *saved_fn;
 -	struct fib6_info *f6i;
  
  	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
  	saved_fn = fn;
@@@ -1856,8 -1815,8 +1851,8 @@@
  		oif = 0;
  
  redo_rt6_select:
 -	f6i = rt6_select(net, fn, oif, strict);
 -	if (f6i == net->ipv6.fib6_null_entry) {
 +	rt6_select(net, fn, oif, res, strict);
 +	if (res->f6i == net->ipv6.fib6_null_entry) {
  		fn = fib6_backtrack(fn, &fl6->saddr);
  		if (fn)
  			goto redo_rt6_select;
@@@ -1869,16 -1828,16 +1864,16 @@@
  		}
  	}
  
 -	trace_fib6_table_lookup(net, f6i, table, fl6);
 +	trace_fib6_table_lookup(net, res, table, fl6);
  
 -	return f6i;
 +	return 0;
  }
  
  struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
  			       int oif, struct flowi6 *fl6,
  			       const struct sk_buff *skb, int flags)
  {
 -	struct fib6_info *f6i;
 +	struct fib6_result res = {};
  	struct rt6_info *rt;
  	int strict = 0;
  
@@@ -1889,26 -1848,27 +1884,26 @@@
  
  	rcu_read_lock();
  
 -	f6i = fib6_table_lookup(net, table, oif, fl6, strict);
 -	if (f6i->fib6_nsiblings)
 -		f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
 -
 -	if (f6i == net->ipv6.fib6_null_entry) {
 +	fib6_table_lookup(net, table, oif, fl6, &res, strict);
 +	if (res.f6i == net->ipv6.fib6_null_entry) {
  		rt = net->ipv6.ip6_null_entry;
  		rcu_read_unlock();
  		dst_hold(&rt->dst);
  		return rt;
  	}
  
 +	fib6_select_path(net, &res, fl6, oif, false, skb, strict);
 +
  	/*Search through exception table */
 -	rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
 +	rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr);
  	if (rt) {
 -		if (ip6_hold_safe(net, &rt, true))
 +		if (ip6_hold_safe(net, &rt))
  			dst_use_noref(&rt->dst, jiffies);
  
  		rcu_read_unlock();
  		return rt;
  	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
 -			    !(f6i->fib6_flags & RTF_GATEWAY))) {
 +			    !res.nh->fib_nh_gw_family)) {
  		/* Create a RTF_CACHE clone which will not be
  		 * owned by the fib6 tree.  It is for the special case where
  		 * the daddr in the skb during the neighbor look-up is different
@@@ -1916,7 -1876,7 +1911,7 @@@
  		 */
  		struct rt6_info *uncached_rt;
  
 -		uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL);
 +		uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
  
  		rcu_read_unlock();
  
@@@ -1938,10 -1898,10 +1933,10 @@@
  		struct rt6_info *pcpu_rt;
  
  		local_bh_disable();
 -		pcpu_rt = rt6_get_pcpu_route(f6i);
 +		pcpu_rt = rt6_get_pcpu_route(&res);
  
  		if (!pcpu_rt)
 -			pcpu_rt = rt6_make_pcpu_route(net, f6i);
 +			pcpu_rt = rt6_make_pcpu_route(net, &res);
  
  		local_bh_enable();
  		rcu_read_unlock();
@@@ -2360,23 -2320,19 +2355,23 @@@ static void __ip6_rt_update_pmtu(struc
  		if (rt6->rt6i_flags & RTF_CACHE)
  			rt6_update_exception_stamp_rt(rt6);
  	} else if (daddr) {
 -		struct fib6_info *from;
 +		struct fib6_result res = {};
  		struct rt6_info *nrt6;
  
  		rcu_read_lock();
 -		from = rcu_dereference(rt6->from);
 -		if (!from) {
 +		res.f6i = rcu_dereference(rt6->from);
 +		if (!res.f6i) {
  			rcu_read_unlock();
  			return;
  		}
 -		nrt6 = ip6_rt_cache_alloc(from, daddr, saddr);
 +		res.nh = &res.f6i->fib6_nh;
 +		res.fib6_flags = res.f6i->fib6_flags;
 +		res.fib6_type = res.f6i->fib6_type;
 +
 +		nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr);
  		if (nrt6) {
  			rt6_do_update_pmtu(nrt6, mtu);
 -			if (rt6_insert_exception(nrt6, from))
 +			if (rt6_insert_exception(nrt6, &res))
  				dst_release_immediate(&nrt6->dst);
  		}
  		rcu_read_unlock();
@@@ -2449,36 -2405,6 +2444,36 @@@ void ip6_sk_dst_store_flow(struct sock 
  		      NULL);
  }
  
 +static bool ip6_redirect_nh_match(const struct fib6_result *res,
 +				  struct flowi6 *fl6,
 +				  const struct in6_addr *gw,
 +				  struct rt6_info **ret)
 +{
 +	const struct fib6_nh *nh = res->nh;
 +
 +	if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
 +	    fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
 +		return false;
 +
 +	/* rt_cache's gateway might be different from its 'parent'
 +	 * in the case of an ip redirect.
 +	 * So we keep searching in the exception table if the gateway
 +	 * is different.
 +	 */
 +	if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
 +		struct rt6_info *rt_cache;
 +
 +		rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr);
 +		if (rt_cache &&
 +		    ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
 +			*ret = rt_cache;
 +			return true;
 +		}
 +		return false;
 +	}
 +	return true;
 +}
 +
  /* Handle redirects */
  struct ip6rd_flowi {
  	struct flowi6 fl6;
@@@ -2492,8 -2418,7 +2487,8 @@@ static struct rt6_info *__ip6_route_red
  					     int flags)
  {
  	struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
 -	struct rt6_info *ret = NULL, *rt_cache;
 +	struct rt6_info *ret = NULL;
 +	struct fib6_result res = {};
  	struct fib6_info *rt;
  	struct fib6_node *fn;
  
@@@ -2511,15 -2436,34 +2506,15 @@@
  	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
  restart:
  	for_each_fib6_node_rt_rcu(fn) {
 -		if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 -			continue;
 +		res.f6i = rt;
 +		res.nh = &rt->fib6_nh;
 +
  		if (fib6_check_expired(rt))
  			continue;
  		if (rt->fib6_flags & RTF_REJECT)
  			break;
 -		if (!(rt->fib6_flags & RTF_GATEWAY))
 -			continue;
 -		if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex)
 -			continue;
 -		/* rt_cache's gateway might be different from its 'parent'
 -		 * in the case of an ip redirect.
 -		 * So we keep searching in the exception table if the gateway
 -		 * is different.
 -		 */
 -		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) {
 -			rt_cache = rt6_find_cached_rt(rt,
 -						      &fl6->daddr,
 -						      &fl6->saddr);
 -			if (rt_cache &&
 -			    ipv6_addr_equal(&rdfl->gateway,
 -					    &rt_cache->rt6i_gateway)) {
 -				ret = rt_cache;
 -				break;
 -			}
 -			continue;
 -		}
 -		break;
 +		if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret))
 +			goto out;
  	}
  
  	if (!rt)
@@@ -2535,20 -2479,15 +2530,20 @@@
  			goto restart;
  	}
  
 +	res.f6i = rt;
 +	res.nh = &rt->fib6_nh;
  out:
 -	if (ret)
 -		ip6_hold_safe(net, &ret, true);
 -	else
 -		ret = ip6_create_rt_rcu(rt);
 +	if (ret) {
 +		ip6_hold_safe(net, &ret);
 +	} else {
 +		res.fib6_flags = res.f6i->fib6_flags;
 +		res.fib6_type = res.f6i->fib6_type;
 +		ret = ip6_create_rt_rcu(&res);
 +	}
  
  	rcu_read_unlock();
  
 -	trace_fib6_table_lookup(net, rt, table, fl6);
 +	trace_fib6_table_lookup(net, &res, table, fl6);
  	return ret;
  };
  
@@@ -2666,15 -2605,12 +2661,15 @@@ out
   * based on ip6_dst_mtu_forward and exception logic of
   * rt6_find_cached_rt; called with rcu_read_lock
   */
 -u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
 -		      struct in6_addr *saddr)
 +u32 ip6_mtu_from_fib6(const struct fib6_result *res,
 +		      const struct in6_addr *daddr,
 +		      const struct in6_addr *saddr)
  {
  	struct rt6_exception_bucket *bucket;
 +	const struct fib6_nh *nh = res->nh;
 +	struct fib6_info *f6i = res->f6i;
 +	const struct in6_addr *src_key;
  	struct rt6_exception *rt6_ex;
 -	struct in6_addr *src_key;
  	struct inet6_dev *idev;
  	u32 mtu = 0;
  
@@@ -2696,7 -2632,7 +2691,7 @@@
  		mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
  
  	if (likely(!mtu)) {
 -		struct net_device *dev = fib6_info_nh_dev(f6i);
 +		struct net_device *dev = nh->fib_nh_dev;
  
  		mtu = IPV6_MIN_MTU;
  		idev = __in6_dev_get(dev);
@@@ -2706,7 -2642,7 +2701,7 @@@
  
  	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
  out:
 -	return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu);
 +	return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu);
  }
  
  struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
@@@ -2962,143 -2898,17 +2957,143 @@@ out
  	return err;
  }
  
 +static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type)
 +{
 +	if ((flags & RTF_REJECT) ||
 +	    (dev && (dev->flags & IFF_LOOPBACK) &&
 +	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
 +	     !(flags & RTF_LOCAL)))
 +		return true;
 +
 +	return false;
 +}
 +
 +int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 +		 struct fib6_config *cfg, gfp_t gfp_flags,
 +		 struct netlink_ext_ack *extack)
 +{
 +	struct net_device *dev = NULL;
 +	struct inet6_dev *idev = NULL;
 +	int addr_type;
 +	int err;
 +
 +	fib6_nh->fib_nh_family = AF_INET6;
 +
 +	err = -ENODEV;
 +	if (cfg->fc_ifindex) {
 +		dev = dev_get_by_index(net, cfg->fc_ifindex);
 +		if (!dev)
 +			goto out;
 +		idev = in6_dev_get(dev);
 +		if (!idev)
 +			goto out;
 +	}
 +
 +	if (cfg->fc_flags & RTNH_F_ONLINK) {
 +		if (!dev) {
 +			NL_SET_ERR_MSG(extack,
 +				       "Nexthop device required for onlink");
 +			goto out;
 +		}
 +
 +		if (!(dev->flags & IFF_UP)) {
 +			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
 +			err = -ENETDOWN;
 +			goto out;
 +		}
 +
 +		fib6_nh->fib_nh_flags |= RTNH_F_ONLINK;
 +	}
 +
 +	fib6_nh->fib_nh_weight = 1;
 +
 +	/* We cannot add true routes via loopback here,
 +	 * they would result in kernel looping; promote them to reject routes
 +	 */
 +	addr_type = ipv6_addr_type(&cfg->fc_dst);
 +	if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) {
 +		/* hold loopback dev/idev if we haven't done so. */
 +		if (dev != net->loopback_dev) {
 +			if (dev) {
 +				dev_put(dev);
 +				in6_dev_put(idev);
 +			}
 +			dev = net->loopback_dev;
 +			dev_hold(dev);
 +			idev = in6_dev_get(dev);
 +			if (!idev) {
 +				err = -ENODEV;
 +				goto out;
 +			}
 +		}
 +		goto set_dev;
 +	}
 +
 +	if (cfg->fc_flags & RTF_GATEWAY) {
 +		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
 +		if (err)
 +			goto out;
 +
 +		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
 +		fib6_nh->fib_nh_gw_family = AF_INET6;
 +	}
 +
 +	err = -ENODEV;
 +	if (!dev)
 +		goto out;
 +
 +	if (idev->cnf.disable_ipv6) {
 +		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
 +		err = -EACCES;
 +		goto out;
 +	}
 +
 +	if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) {
 +		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
 +		err = -ENETDOWN;
 +		goto out;
 +	}
 +
 +	if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
 +	    !netif_carrier_ok(dev))
 +		fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN;
 +
 +	err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap,
 +				 cfg->fc_encap_type, cfg, gfp_flags, extack);
 +	if (err)
 +		goto out;
 +set_dev:
 +	fib6_nh->fib_nh_dev = dev;
 +	fib6_nh->fib_nh_oif = dev->ifindex;
 +	err = 0;
 +out:
 +	if (idev)
 +		in6_dev_put(idev);
 +
 +	if (err) {
 +		lwtstate_put(fib6_nh->fib_nh_lws);
 +		fib6_nh->fib_nh_lws = NULL;
 +		if (dev)
 +			dev_put(dev);
 +	}
 +
 +	return err;
 +}
 +
 +void fib6_nh_release(struct fib6_nh *fib6_nh)
 +{
 +	fib_nh_common_release(&fib6_nh->nh_common);
 +}
 +
  static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg,
  					      gfp_t gfp_flags,
  					      struct netlink_ext_ack *extack)
  {
  	struct net *net = cfg->fc_nlinfo.nl_net;
  	struct fib6_info *rt = NULL;
 -	struct net_device *dev = NULL;
 -	struct inet6_dev *idev = NULL;
  	struct fib6_table *table;
 -	int addr_type;
  	int err = -EINVAL;
 +	int addr_type;
  
  	/* RTF_PCPU is an internal flag; can not be set by userspace */
  	if (cfg->fc_flags & RTF_PCPU) {
@@@ -3132,6 -2942,33 +3127,6 @@@
  		goto out;
  	}
  #endif
 -	if (cfg->fc_ifindex) {
 -		err = -ENODEV;
 -		dev = dev_get_by_index(net, cfg->fc_ifindex);
 -		if (!dev)
 -			goto out;
 -		idev = in6_dev_get(dev);
 -		if (!idev)
 -			goto out;
 -	}
 -
 -	if (cfg->fc_metric == 0)
 -		cfg->fc_metric = IP6_RT_PRIO_USER;
 -
 -	if (cfg->fc_flags & RTNH_F_ONLINK) {
 -		if (!dev) {
 -			NL_SET_ERR_MSG(extack,
 -				       "Nexthop device required for onlink");
 -			err = -ENODEV;
 -			goto out;
 -		}
 -
 -		if (!(dev->flags & IFF_UP)) {
 -			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
 -			err = -ENETDOWN;
 -			goto out;
 -		}
 -	}
  
  	err = -ENOBUFS;
  	if (cfg->fc_nlinfo.nlh &&
@@@ -3175,10 -3012,18 +3170,10 @@@
  		cfg->fc_protocol = RTPROT_BOOT;
  	rt->fib6_protocol = cfg->fc_protocol;
  
 -	addr_type = ipv6_addr_type(&cfg->fc_dst);
 -
 -	if (cfg->fc_encap) {
 -		struct lwtunnel_state *lwtstate;
 -
 -		err = lwtunnel_build_state(cfg->fc_encap_type,
 -					   cfg->fc_encap, AF_INET6, cfg,
 -					   &lwtstate, extack);
 -		if (err)
 -			goto out;
 -		rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate);
 -	}
 +	rt->fib6_table = table;
 +	rt->fib6_metric = cfg->fc_metric;
 +	rt->fib6_type = cfg->fc_type;
 +	rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
  
  	ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
  	rt->fib6_dst.plen = cfg->fc_dst_len;
@@@ -3189,20 -3034,62 +3184,20 @@@
  	ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len);
  	rt->fib6_src.plen = cfg->fc_src_len;
  #endif
 -
 -	rt->fib6_metric = cfg->fc_metric;
 -	rt->fib6_nh.nh_weight = 1;
 -
 -	rt->fib6_type = cfg->fc_type;
 +	err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack);
 +	if (err)
 +		goto out;
  
  	/* We cannot add true routes via loopback here,
 -	   they would result in kernel looping; promote them to reject routes
 +	 * they would result in kernel looping; promote them to reject routes
  	 */
 -	if ((cfg->fc_flags & RTF_REJECT) ||
 -	    (dev && (dev->flags & IFF_LOOPBACK) &&
 -	     !(addr_type & IPV6_ADDR_LOOPBACK) &&
 -	     !(cfg->fc_flags & RTF_LOCAL))) {
 -		/* hold loopback dev/idev if we haven't done so. */
 -		if (dev != net->loopback_dev) {
 -			if (dev) {
 -				dev_put(dev);
 -				in6_dev_put(idev);
 -			}
 -			dev = net->loopback_dev;
 -			dev_hold(dev);
 -			idev = in6_dev_get(dev);
 -			if (!idev) {
 -				err = -ENODEV;
 -				goto out;
 -			}
 -		}
 -		rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP;
 -		goto install_route;
 -	}
 -
 -	if (cfg->fc_flags & RTF_GATEWAY) {
 -		err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
 -		if (err)
 -			goto out;
 -
 -		rt->fib6_nh.nh_gw = cfg->fc_gateway;
 -	}
 -
 -	err = -ENODEV;
 -	if (!dev)
 -		goto out;
 -
 -	if (idev->cnf.disable_ipv6) {
 -		NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
 -		err = -EACCES;
 -		goto out;
 -	}
 -
 -	if (!(dev->flags & IFF_UP)) {
 -		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
 -		err = -ENETDOWN;
 -		goto out;
 -	}
 +	addr_type = ipv6_addr_type(&cfg->fc_dst);
 +	if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type))
 +		rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
  
  	if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
 +		struct net_device *dev = fib6_info_nh_dev(rt);
 +
  		if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
  			NL_SET_ERR_MSG(extack, "Invalid source address");
  			err = -EINVAL;
@@@ -3213,8 -3100,26 +3208,8 @@@
  	} else
  		rt->fib6_prefsrc.plen = 0;
  
 -	rt->fib6_flags = cfg->fc_flags;
 -
 -install_route:
 -	if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
 -	    !netif_carrier_ok(dev))
 -		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
 -	rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
 -	rt->fib6_nh.nh_dev = dev;
 -	rt->fib6_table = table;
 -
 -	if (idev)
 -		in6_dev_put(idev);
 -
  	return rt;
  out:
 -	if (dev)
 -		dev_put(dev);
 -	if (idev)
 -		in6_dev_put(idev);
 -
  	fib6_info_release(rt);
  	return ERR_PTR(err);
  }
@@@ -3355,16 -3260,10 +3350,16 @@@ static int ip6_route_del(struct fib6_co
  
  	if (fn) {
  		for_each_fib6_node_rt_rcu(fn) {
 +			struct fib6_nh *nh;
 +
  			if (cfg->fc_flags & RTF_CACHE) {
 +				struct fib6_result res = {
 +					.f6i = rt,
 +				};
  				int rc;
  
 -				rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst,
 +				rt_cache = rt6_find_cached_rt(&res,
 +							      &cfg->fc_dst,
  							      &cfg->fc_src);
  				if (rt_cache) {
  					rc = ip6_del_cached_rt(rt_cache, cfg);
@@@ -3375,14 -3274,12 +3370,14 @@@
  				}
  				continue;
  			}
 +
 +			nh = &rt->fib6_nh;
  			if (cfg->fc_ifindex &&
 -			    (!rt->fib6_nh.nh_dev ||
 -			     rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex))
 +			    (!nh->fib_nh_dev ||
 +			     nh->fib_nh_dev->ifindex != cfg->fc_ifindex))
  				continue;
  			if (cfg->fc_flags & RTF_GATEWAY &&
 -			    !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw))
 +			    !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6))
  				continue;
  			if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric)
  				continue;
@@@ -3408,10 -3305,10 +3403,10 @@@ static void rt6_do_redirect(struct dst_
  {
  	struct netevent_redirect netevent;
  	struct rt6_info *rt, *nrt = NULL;
 +	struct fib6_result res = {};
  	struct ndisc_options ndopts;
  	struct inet6_dev *in6_dev;
  	struct neighbour *neigh;
 -	struct fib6_info *from;
  	struct rd_msg *msg;
  	int optlen, on_link;
  	u8 *lladdr;
@@@ -3494,17 -3391,11 +3489,14 @@@
  		     NDISC_REDIRECT, &ndopts);
  
  	rcu_read_lock();
 -	from = rcu_dereference(rt->from);
 -	if (!from)
 +	res.f6i = rcu_dereference(rt->from);
- 	/* This fib6_info_hold() is safe here because we hold reference to rt
- 	 * and rt already holds reference to fib6_info.
- 	 */
- 	fib6_info_hold(res.f6i);
- 	rcu_read_unlock();
++	if (!res.f6i)
+ 		goto out;
  
 -	nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL);
 +	res.nh = &res.f6i->fib6_nh;
 +	res.fib6_flags = res.f6i->fib6_flags;
 +	res.fib6_type = res.f6i->fib6_type;
 +	nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL);
  	if (!nrt)
  		goto out;
  
@@@ -3514,11 -3405,8 +3506,8 @@@
  
  	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
  
- 	/* No need to remove rt from the exception table if rt is
- 	 * a cached route because rt6_insert_exception() will
- 	 * takes care of it
- 	 */
+ 	/* rt6_insert_exception() will take care of duplicated exceptions */
 -	if (rt6_insert_exception(nrt, from)) {
 +	if (rt6_insert_exception(nrt, &res)) {
  		dst_release_immediate(&nrt->dst);
  		goto out;
  	}
@@@ -3530,7 -3418,7 +3519,7 @@@
  	call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
  
  out:
- 	fib6_info_release(res.f6i);
+ 	rcu_read_unlock();
  	neigh_release(neigh);
  }
  
@@@ -3556,12 -3444,11 +3545,12 @@@ static struct fib6_info *rt6_get_route_
  		goto out;
  
  	for_each_fib6_node_rt_rcu(fn) {
 -		if (rt->fib6_nh.nh_dev->ifindex != ifindex)
 +		if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex)
  			continue;
 -		if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 +		if (!(rt->fib6_flags & RTF_ROUTEINFO) ||
 +		    !rt->fib6_nh.fib_nh_gw_family)
  			continue;
 -		if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr))
 +		if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr))
  			continue;
  		if (!fib6_info_hold_safe(rt))
  			continue;
@@@ -3619,11 -3506,9 +3608,11 @@@ struct fib6_info *rt6_get_dflt_router(s
  
  	rcu_read_lock();
  	for_each_fib6_node_rt_rcu(&table->tb6_root) {
 -		if (dev == rt->fib6_nh.nh_dev &&
 +		struct fib6_nh *nh = &rt->fib6_nh;
 +
 +		if (dev == nh->fib_nh_dev &&
  		    ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 -		    ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr))
 +		    ipv6_addr_equal(&nh->fib_nh_gw6, addr))
  			break;
  	}
  	if (rt && !fib6_info_hold_safe(rt))
@@@ -3714,7 -3599,7 +3703,7 @@@ static void rtmsg_to_fib6_config(struc
  		.fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ?
  			 : RT6_TABLE_MAIN,
  		.fc_ifindex = rtmsg->rtmsg_ifindex,
 -		.fc_metric = rtmsg->rtmsg_metric,
 +		.fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER,
  		.fc_expires = rtmsg->rtmsg_info,
  		.fc_dst_len = rtmsg->rtmsg_dst_len,
  		.fc_src_len = rtmsg->rtmsg_src_len,
@@@ -3772,23 -3657,34 +3761,34 @@@ int ipv6_route_ioctl(struct net *net, u
  
  static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes)
  {
- 	int type;
  	struct dst_entry *dst = skb_dst(skb);
+ 	struct net *net = dev_net(dst->dev);
+ 	struct inet6_dev *idev;
+ 	int type;
+ 
+ 	if (netif_is_l3_master(skb->dev) &&
+ 	    dst->dev == net->loopback_dev)
+ 		idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif));
+ 	else
+ 		idev = ip6_dst_idev(dst);
+ 
  	switch (ipstats_mib_noroutes) {
  	case IPSTATS_MIB_INNOROUTES:
  		type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
  		if (type == IPV6_ADDR_ANY) {
- 			IP6_INC_STATS(dev_net(dst->dev),
- 				      __in6_dev_get_safely(skb->dev),
- 				      IPSTATS_MIB_INADDRERRORS);
+ 			IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
  			break;
  		}
  		/* FALLTHROUGH */
  	case IPSTATS_MIB_OUTNOROUTES:
- 		IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst),
- 			      ipstats_mib_noroutes);
+ 		IP6_INC_STATS(net, idev, ipstats_mib_noroutes);
  		break;
  	}
+ 
+ 	/* Start over by dropping the dst for l3mdev case */
+ 	if (netif_is_l3_master(skb->dev))
+ 		skb_dst_drop(skb);
+ 
  	icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0);
  	kfree_skb(skb);
  	return 0;
@@@ -3825,26 -3721,36 +3825,26 @@@ struct fib6_info *addrconf_f6i_alloc(st
  				     const struct in6_addr *addr,
  				     bool anycast, gfp_t gfp_flags)
  {
 -	u32 tb_id;
 -	struct net_device *dev = idev->dev;
 -	struct fib6_info *f6i;
 -
 -	f6i = fib6_info_alloc(gfp_flags);
 -	if (!f6i)
 -		return ERR_PTR(-ENOMEM);
 +	struct fib6_config cfg = {
 +		.fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL,
 +		.fc_ifindex = idev->dev->ifindex,
 +		.fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP,
 +		.fc_dst = *addr,
 +		.fc_dst_len = 128,
 +		.fc_protocol = RTPROT_KERNEL,
 +		.fc_nlinfo.nl_net = net,
 +		.fc_ignore_dev_down = true,
 +	};
  
 -	f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL);
 -	f6i->dst_nocount = true;
 -	f6i->dst_host = true;
 -	f6i->fib6_protocol = RTPROT_KERNEL;
 -	f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP;
  	if (anycast) {
 -		f6i->fib6_type = RTN_ANYCAST;
 -		f6i->fib6_flags |= RTF_ANYCAST;
 +		cfg.fc_type = RTN_ANYCAST;
 +		cfg.fc_flags |= RTF_ANYCAST;
  	} else {
 -		f6i->fib6_type = RTN_LOCAL;
 -		f6i->fib6_flags |= RTF_LOCAL;
 +		cfg.fc_type = RTN_LOCAL;
 +		cfg.fc_flags |= RTF_LOCAL;
  	}
  
 -	f6i->fib6_nh.nh_gw = *addr;
 -	dev_hold(dev);
 -	f6i->fib6_nh.nh_dev = dev;
 -	f6i->fib6_dst.addr = *addr;
 -	f6i->fib6_dst.plen = 128;
 -	tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL;
 -	f6i->fib6_table = fib6_get_table(net, tb_id);
 -
 -	return f6i;
 +	return ip6_route_info_create(&cfg, gfp_flags, NULL);
  }
  
  /* remove deleted ip from prefsrc entries */
@@@ -3860,7 -3766,7 +3860,7 @@@ static int fib6_remove_prefsrc(struct f
  	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
  	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
  
 -	if (((void *)rt->fib6_nh.nh_dev == dev || !dev) &&
 +	if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) &&
  	    rt != net->ipv6.fib6_null_entry &&
  	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
  		spin_lock_bh(&rt6_exception_lock);
@@@ -3882,7 -3788,7 +3882,7 @@@ void rt6_remove_prefsrc(struct inet6_if
  	fib6_clean_all(net, fib6_remove_prefsrc, &adni);
  }
  
 -#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY)
 +#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT)
  
  /* Remove routers and update dst entries when gateway turn into host. */
  static int fib6_clean_tohost(struct fib6_info *rt, void *arg)
@@@ -3890,8 -3796,7 +3890,8 @@@
  	struct in6_addr *gateway = (struct in6_addr *)arg;
  
  	if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) &&
 -	    ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) {
 +	    rt->fib6_nh.fib_nh_gw_family &&
 +	    ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) {
  		return -1;
  	}
  
@@@ -3912,7 -3817,7 +3912,7 @@@ void rt6_clean_tohost(struct net *net, 
  struct arg_netdev_event {
  	const struct net_device *dev;
  	union {
 -		unsigned int nh_flags;
 +		unsigned char nh_flags;
  		unsigned long event;
  	};
  };
@@@ -3939,9 -3844,9 +3939,9 @@@ static struct fib6_info *rt6_multipath_
  
  static bool rt6_is_dead(const struct fib6_info *rt)
  {
 -	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD ||
 -	    (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN &&
 -	     fib6_ignore_linkdown(rt)))
 +	if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ||
 +	    (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN &&
 +	     ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev)))
  		return true;
  
  	return false;
@@@ -3953,11 -3858,11 +3953,11 @@@ static int rt6_multipath_total_weight(c
  	int total = 0;
  
  	if (!rt6_is_dead(rt))
 -		total += rt->fib6_nh.nh_weight;
 +		total += rt->fib6_nh.fib_nh_weight;
  
  	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
  		if (!rt6_is_dead(iter))
 -			total += iter->fib6_nh.nh_weight;
 +			total += iter->fib6_nh.fib_nh_weight;
  	}
  
  	return total;
@@@ -3968,11 -3873,11 +3968,11 @@@ static void rt6_upper_bound_set(struct 
  	int upper_bound = -1;
  
  	if (!rt6_is_dead(rt)) {
 -		*weight += rt->fib6_nh.nh_weight;
 +		*weight += rt->fib6_nh.fib_nh_weight;
  		upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
  						    total) - 1;
  	}
 -	atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound);
 +	atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound);
  }
  
  static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total)
@@@ -4015,9 -3920,8 +4015,9 @@@ static int fib6_ifup(struct fib6_info *
  	const struct arg_netdev_event *arg = p_arg;
  	struct net *net = dev_net(arg->dev);
  
 -	if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) {
 -		rt->fib6_nh.nh_flags &= ~arg->nh_flags;
 +	if (rt != net->ipv6.fib6_null_entry &&
 +	    rt->fib6_nh.fib_nh_dev == arg->dev) {
 +		rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags;
  		fib6_update_sernum_upto_root(net, rt);
  		rt6_multipath_rebalance(rt);
  	}
@@@ -4025,7 -3929,7 +4025,7 @@@
  	return 0;
  }
  
 -void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
 +void rt6_sync_up(struct net_device *dev, unsigned char nh_flags)
  {
  	struct arg_netdev_event arg = {
  		.dev = dev,
@@@ -4045,10 -3949,10 +4045,10 @@@ static bool rt6_multipath_uses_dev(cons
  {
  	struct fib6_info *iter;
  
 -	if (rt->fib6_nh.nh_dev == dev)
 +	if (rt->fib6_nh.fib_nh_dev == dev)
  		return true;
  	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
 -		if (iter->fib6_nh.nh_dev == dev)
 +		if (iter->fib6_nh.fib_nh_dev == dev)
  			return true;
  
  	return false;
@@@ -4069,12 -3973,12 +4069,12 @@@ static unsigned int rt6_multipath_dead_
  	struct fib6_info *iter;
  	unsigned int dead = 0;
  
 -	if (rt->fib6_nh.nh_dev == down_dev ||
 -	    rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 +	if (rt->fib6_nh.fib_nh_dev == down_dev ||
 +	    rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
  		dead++;
  	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
 -		if (iter->fib6_nh.nh_dev == down_dev ||
 -		    iter->fib6_nh.nh_flags & RTNH_F_DEAD)
 +		if (iter->fib6_nh.fib_nh_dev == down_dev ||
 +		    iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
  			dead++;
  
  	return dead;
@@@ -4082,15 -3986,15 +4082,15 @@@
  
  static void rt6_multipath_nh_flags_set(struct fib6_info *rt,
  				       const struct net_device *dev,
 -				       unsigned int nh_flags)
 +				       unsigned char nh_flags)
  {
  	struct fib6_info *iter;
  
 -	if (rt->fib6_nh.nh_dev == dev)
 -		rt->fib6_nh.nh_flags |= nh_flags;
 +	if (rt->fib6_nh.fib_nh_dev == dev)
 +		rt->fib6_nh.fib_nh_flags |= nh_flags;
  	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings)
 -		if (iter->fib6_nh.nh_dev == dev)
 -			iter->fib6_nh.nh_flags |= nh_flags;
 +		if (iter->fib6_nh.fib_nh_dev == dev)
 +			iter->fib6_nh.fib_nh_flags |= nh_flags;
  }
  
  /* called with write lock held for table with rt */
@@@ -4105,12 -4009,12 +4105,12 @@@ static int fib6_ifdown(struct fib6_inf
  
  	switch (arg->event) {
  	case NETDEV_UNREGISTER:
 -		return rt->fib6_nh.nh_dev == dev ? -1 : 0;
 +		return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
  	case NETDEV_DOWN:
  		if (rt->should_flush)
  			return -1;
  		if (!rt->fib6_nsiblings)
 -			return rt->fib6_nh.nh_dev == dev ? -1 : 0;
 +			return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0;
  		if (rt6_multipath_uses_dev(rt, dev)) {
  			unsigned int count;
  
@@@ -4126,10 -4030,10 +4126,10 @@@
  		}
  		return -2;
  	case NETDEV_CHANGE:
 -		if (rt->fib6_nh.nh_dev != dev ||
 +		if (rt->fib6_nh.fib_nh_dev != dev ||
  		    rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
  			break;
 -		rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN;
 +		rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN;
  		rt6_multipath_rebalance(rt);
  		break;
  	}
@@@ -4185,7 -4089,7 +4185,7 @@@ static int rt6_mtu_change_route(struct 
  	   Since RFC 1981 doesn't include administrative MTU increase
  	   update PMTU increase is a MUST. (i.e. jumbo frame)
  	 */
 -	if (rt->fib6_nh.nh_dev == arg->dev &&
 +	if (rt->fib6_nh.fib_nh_dev == arg->dev &&
  	    !fib6_metric_locked(rt, RTAX_MTU)) {
  		u32 mtu = rt->fib6_pmtu;
  
@@@ -4239,8 -4143,8 +4239,8 @@@ static int rtm_to_fib6_config(struct sk
  	unsigned int pref;
  	int err;
  
 -	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
 -			  extack);
 +	err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
 +				     rtm_ipv6_policy, extack);
  	if (err < 0)
  		goto errout;
  
@@@ -4476,7 -4380,7 +4476,7 @@@ static int ip6_route_multipath_add(stru
  			goto cleanup;
  		}
  
 -		rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1;
 +		rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
  
  		err = ip6_route_info_append(info->nl_net, &rt6_nh_list,
  					    rt, &r_cfg);
@@@ -4626,9 -4530,6 +4626,9 @@@ static int inet6_rtm_newroute(struct sk
  	if (err < 0)
  		return err;
  
 +	if (cfg.fc_metric == 0)
 +		cfg.fc_metric = IP6_RT_PRIO_USER;
 +
  	if (cfg.fc_mp)
  		return ip6_route_multipath_add(&cfg, extack);
  	else
@@@ -4643,7 -4544,7 +4643,7 @@@ static size_t rt6_nlmsg_size(struct fib
  		nexthop_len = nla_total_size(0)	 /* RTA_MULTIPATH */
  			    + NLA_ALIGN(sizeof(struct rtnexthop))
  			    + nla_total_size(16) /* RTA_GATEWAY */
 -			    + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate);
 +			    + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
  
  		nexthop_len *= rt->fib6_nsiblings;
  	}
@@@ -4661,10 -4562,77 +4661,10 @@@
  	       + nla_total_size(sizeof(struct rta_cacheinfo))
  	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
  	       + nla_total_size(1) /* RTA_PREF */
 -	       + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate)
 +	       + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws)
  	       + nexthop_len;
  }
  
 -static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt,
 -			    unsigned int *flags, bool skip_oif)
 -{
 -	if (rt->fib6_nh.nh_flags & RTNH_F_DEAD)
 -		*flags |= RTNH_F_DEAD;
 -
 -	if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) {
 -		*flags |= RTNH_F_LINKDOWN;
 -
 -		rcu_read_lock();
 -		if (fib6_ignore_linkdown(rt))
 -			*flags |= RTNH_F_DEAD;
 -		rcu_read_unlock();
 -	}
 -
 -	if (rt->fib6_flags & RTF_GATEWAY) {
 -		if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0)
 -			goto nla_put_failure;
 -	}
 -
 -	*flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK);
 -	if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD)
 -		*flags |= RTNH_F_OFFLOAD;
 -
 -	/* not needed for multipath encoding b/c it has a rtnexthop struct */
 -	if (!skip_oif && rt->fib6_nh.nh_dev &&
 -	    nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex))
 -		goto nla_put_failure;
 -
 -	if (rt->fib6_nh.nh_lwtstate &&
 -	    lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0)
 -		goto nla_put_failure;
 -
 -	return 0;
 -
 -nla_put_failure:
 -	return -EMSGSIZE;
 -}
 -
 -/* add multipath next hop */
 -static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt)
 -{
 -	const struct net_device *dev = rt->fib6_nh.nh_dev;
 -	struct rtnexthop *rtnh;
 -	unsigned int flags = 0;
 -
 -	rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
 -	if (!rtnh)
 -		goto nla_put_failure;
 -
 -	rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1;
 -	rtnh->rtnh_ifindex = dev ? dev->ifindex : 0;
 -
 -	if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
 -		goto nla_put_failure;
 -
 -	rtnh->rtnh_flags = flags;
 -
 -	/* length of rtnetlink header + attributes */
 -	rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh;
 -
 -	return 0;
 -
 -nla_put_failure:
 -	return -EMSGSIZE;
 -}
 -
  static int rt6_fill_node(struct net *net, struct sk_buff *skb,
  			 struct fib6_info *rt, struct dst_entry *dst,
  			 struct in6_addr *dest, struct in6_addr *src,
@@@ -4777,30 -4745,23 +4777,30 @@@
  		struct fib6_info *sibling, *next_sibling;
  		struct nlattr *mp;
  
 -		mp = nla_nest_start(skb, RTA_MULTIPATH);
 +		mp = nla_nest_start_noflag(skb, RTA_MULTIPATH);
  		if (!mp)
  			goto nla_put_failure;
  
 -		if (rt6_add_nexthop(skb, rt) < 0)
 +		if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common,
 +				    rt->fib6_nh.fib_nh_weight) < 0)
  			goto nla_put_failure;
  
  		list_for_each_entry_safe(sibling, next_sibling,
  					 &rt->fib6_siblings, fib6_siblings) {
 -			if (rt6_add_nexthop(skb, sibling) < 0)
 +			if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common,
 +					    sibling->fib6_nh.fib_nh_weight) < 0)
  				goto nla_put_failure;
  		}
  
  		nla_nest_end(skb, mp);
  	} else {
 -		if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0)
 +		unsigned char nh_flags = 0;
 +
 +		if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common,
 +				     &nh_flags, false) < 0)
  			goto nla_put_failure;
 +
 +		rtm->rtm_flags |= nh_flags;
  	}
  
  	if (rt6_flags & RTF_EXPIRES) {
@@@ -4826,7 -4787,7 +4826,7 @@@ nla_put_failure
  static bool fib6_info_uses_dev(const struct fib6_info *f6i,
  			       const struct net_device *dev)
  {
 -	if (f6i->fib6_nh.nh_dev == dev)
 +	if (f6i->fib6_nh.fib_nh_dev == dev)
  		return true;
  
  	if (f6i->fib6_nsiblings) {
@@@ -4834,7 -4795,7 +4834,7 @@@
  
  		list_for_each_entry_safe(sibling, next_sibling,
  					 &f6i->fib6_siblings, fib6_siblings) {
 -			if (sibling->fib6_nh.nh_dev == dev)
 +			if (sibling->fib6_nh.fib_nh_dev == dev)
  				return true;
  		}
  	}
@@@ -4886,8 -4847,8 +4886,8 @@@ static int inet6_rtm_valid_getroute_req
  	}
  
  	if (!netlink_strict_get_check(skb))
 -		return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
 -				   rtm_ipv6_policy, extack);
 +		return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX,
 +					      rtm_ipv6_policy, extack);
  
  	rtm = nlmsg_data(nlh);
  	if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
@@@ -4903,8 -4864,8 +4903,8 @@@
  		return -EINVAL;
  	}
  
 -	err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
 -				 rtm_ipv6_policy, extack);
 +	err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
 +					    rtm_ipv6_policy, extack);
  	if (err)
  		return err;
  
@@@ -5056,16 -5017,20 +5056,20 @@@ static int inet6_rtm_getroute(struct sk
  
  	rcu_read_lock();
  	from = rcu_dereference(rt->from);
- 
- 	if (fibmatch)
- 		err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif,
- 				    RTM_NEWROUTE, NETLINK_CB(in_skb).portid,
- 				    nlh->nlmsg_seq, 0);
- 	else
- 		err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
- 				    &fl6.saddr, iif, RTM_NEWROUTE,
- 				    NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
- 				    0);
+ 	if (from) {
+ 		if (fibmatch)
+ 			err = rt6_fill_node(net, skb, from, NULL, NULL, NULL,
+ 					    iif, RTM_NEWROUTE,
+ 					    NETLINK_CB(in_skb).portid,
+ 					    nlh->nlmsg_seq, 0);
+ 		else
+ 			err = rt6_fill_node(net, skb, from, dst, &fl6.daddr,
+ 					    &fl6.saddr, iif, RTM_NEWROUTE,
+ 					    NETLINK_CB(in_skb).portid,
+ 					    nlh->nlmsg_seq, 0);
+ 	} else {
+ 		err = -ENETUNREACH;
+ 	}
  	rcu_read_unlock();
  
  	if (err < 0) {
@@@ -5119,7 -5084,7 +5123,7 @@@ static int ip6_route_dev_notify(struct 
  		return NOTIFY_OK;
  
  	if (event == NETDEV_REGISTER) {
 -		net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev;
 +		net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev;
  		net->ipv6.ip6_null_entry->dst.dev = dev;
  		net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev);
  #ifdef CONFIG_IPV6_MULTIPLE_TABLES
@@@ -5454,7 -5419,7 +5458,7 @@@ void __init ip6_route_init_special_entr
  	/* Registering of the loopback is done before this portion of code,
  	 * the loopback reference in rt6_info will not be taken, do it
  	 * manually for init_net */
 -	init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev;
 +	init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev;
  	init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev;
  	init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev);
    #ifdef CONFIG_IPV6_MULTIPLE_TABLES
diff --combined net/mac80211/iface.c
index 94459b2b3d2a,02d2e6f11e93..410685d38c46
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@@ -1133,7 -1133,8 +1133,7 @@@ static void ieee80211_uninit(struct net
  
  static u16 ieee80211_netdev_select_queue(struct net_device *dev,
  					 struct sk_buff *skb,
 -					 struct net_device *sb_dev,
 -					 select_queue_fallback_t fallback)
 +					 struct net_device *sb_dev)
  {
  	return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
  }
@@@ -1178,7 -1179,8 +1178,7 @@@ static const struct net_device_ops ieee
  
  static u16 ieee80211_monitor_select_queue(struct net_device *dev,
  					  struct sk_buff *skb,
 -					  struct net_device *sb_dev,
 -					  select_queue_fallback_t fallback)
 +					  struct net_device *sb_dev)
  {
  	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
  	struct ieee80211_local *local = sdata->local;
@@@ -1225,7 -1227,6 +1225,7 @@@ static void ieee80211_if_setup(struct n
  static void ieee80211_if_setup_no_queue(struct net_device *dev)
  {
  	ieee80211_if_setup(dev);
 +	dev->features |= NETIF_F_LLTX;
  	dev->priv_flags |= IFF_NO_QUEUE;
  }
  
@@@ -1763,13 -1764,13 +1763,13 @@@ int ieee80211_if_add(struct ieee80211_l
  			txq_size += sizeof(struct txq_info) +
  				    local->hw.txq_data_size;
  
 -		if (local->ops->wake_tx_queue)
 +		if (local->ops->wake_tx_queue) {
  			if_setup = ieee80211_if_setup_no_queue;
 -		else
 +		} else {
  			if_setup = ieee80211_if_setup;
 -
 -		if (local->hw.queues >= IEEE80211_NUM_ACS)
 -			txqs = IEEE80211_NUM_ACS;
 +			if (local->hw.queues >= IEEE80211_NUM_ACS)
 +				txqs = IEEE80211_NUM_ACS;
 +		}
  
  		ndev = alloc_netdev_mqs(size + txq_size,
  					name, name_assign_type,
@@@ -1907,6 -1908,9 +1907,9 @@@ void ieee80211_if_remove(struct ieee802
  	list_del_rcu(&sdata->list);
  	mutex_unlock(&sdata->local->iflist_mtx);
  
+ 	if (sdata->vif.txq)
+ 		ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq));
+ 
  	synchronize_rcu();
  
  	if (sdata->dev) {
diff --combined net/netlink/genetlink.c
index 72668759cd2b,cb69d35c8e6a..79cfa031dc7d
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@@ -362,8 -362,8 +362,8 @@@ int genl_register_family(struct genl_fa
  	} else
  		family->attrbuf = NULL;
  
- 	family->id = idr_alloc(&genl_fam_idr, family,
- 			       start, end + 1, GFP_KERNEL);
+ 	family->id = idr_alloc_cyclic(&genl_fam_idr, family,
+ 				      start, end + 1, GFP_KERNEL);
  	if (family->id < 0) {
  		err = family->id;
  		goto errout_free;
@@@ -536,24 -536,6 +536,24 @@@ static int genl_family_rcv_msg(const st
  		if (ops->dumpit == NULL)
  			return -EOPNOTSUPP;
  
 +		if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) {
 +			unsigned int validate = NL_VALIDATE_STRICT;
 +			int hdrlen = GENL_HDRLEN + family->hdrsize;
 +
 +			if (ops->validate & GENL_DONT_VALIDATE_DUMP_STRICT)
 +				validate = NL_VALIDATE_LIBERAL;
 +
 +			if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen))
 +				return -EINVAL;
 +
 +			rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen),
 +					    nlmsg_attrlen(nlh, hdrlen),
 +					    family->maxattr, family->policy,
 +					    validate, extack);
 +			if (rc)
 +				return rc;
 +		}
 +
  		if (!family->parallel_ops) {
  			struct netlink_dump_control c = {
  				.module = family->module,
@@@ -595,13 -577,8 +595,13 @@@
  		attrbuf = family->attrbuf;
  
  	if (attrbuf) {
 -		err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
 -				  ops->policy, extack);
 +		enum netlink_validation validate = NL_VALIDATE_STRICT;
 +
 +		if (ops->validate & GENL_DONT_VALIDATE_STRICT)
 +			validate = NL_VALIDATE_LIBERAL;
 +
 +		err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr,
 +				    family->policy, validate, extack);
  		if (err < 0)
  			goto out;
  	}
@@@ -688,7 -665,7 +688,7 @@@ static int ctrl_fill_info(const struct 
  		struct nlattr *nla_ops;
  		int i;
  
 -		nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS);
 +		nla_ops = nla_nest_start_noflag(skb, CTRL_ATTR_OPS);
  		if (nla_ops == NULL)
  			goto nla_put_failure;
  
@@@ -701,10 -678,10 +701,10 @@@
  				op_flags |= GENL_CMD_CAP_DUMP;
  			if (ops->doit)
  				op_flags |= GENL_CMD_CAP_DO;
 -			if (ops->policy)
 +			if (family->policy)
  				op_flags |= GENL_CMD_CAP_HASPOL;
  
 -			nest = nla_nest_start(skb, i + 1);
 +			nest = nla_nest_start_noflag(skb, i + 1);
  			if (nest == NULL)
  				goto nla_put_failure;
  
@@@ -722,7 -699,7 +722,7 @@@
  		struct nlattr *nla_grps;
  		int i;
  
 -		nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS);
 +		nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS);
  		if (nla_grps == NULL)
  			goto nla_put_failure;
  
@@@ -732,7 -709,7 +732,7 @@@
  
  			grp = &family->mcgrps[i];
  
 -			nest = nla_nest_start(skb, i + 1);
 +			nest = nla_nest_start_noflag(skb, i + 1);
  			if (nest == NULL)
  				goto nla_put_failure;
  
@@@ -772,11 -749,11 +772,11 @@@ static int ctrl_fill_mcgrp_info(const s
  	    nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id))
  		goto nla_put_failure;
  
 -	nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS);
 +	nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS);
  	if (nla_grps == NULL)
  		goto nla_put_failure;
  
 -	nest = nla_nest_start(skb, 1);
 +	nest = nla_nest_start_noflag(skb, 1);
  	if (nest == NULL)
  		goto nla_put_failure;
  
@@@ -961,9 -938,9 +961,9 @@@ static int genl_ctrl_event(int event, c
  static const struct genl_ops genl_ctrl_ops[] = {
  	{
  		.cmd		= CTRL_CMD_GETFAMILY,
 +		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
  		.doit		= ctrl_getfamily,
  		.dumpit		= ctrl_dumpfamily,
 -		.policy		= ctrl_policy,
  	},
  };
  
@@@ -981,7 -958,6 +981,7 @@@ static struct genl_family genl_ctrl __r
  	.name = "nlctrl",
  	.version = 0x2,
  	.maxattr = CTRL_ATTR_MAX,
 +	.policy = ctrl_policy,
  	.netnsok = true,
  };
  
diff --combined net/packet/af_packet.c
index 5c4a118d6f96,9b81813dd16a..90d4e3ce00e5
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@@ -275,22 -275,24 +275,22 @@@ static bool packet_use_direct_xmit(cons
  	return po->xmit == packet_direct_xmit;
  }
  
 -static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb,
 -				  struct net_device *sb_dev)
 -{
 -	return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL);
 -}
 -
  static u16 packet_pick_tx_queue(struct sk_buff *skb)
  {
  	struct net_device *dev = skb->dev;
  	const struct net_device_ops *ops = dev->netdev_ops;
 +	int cpu = raw_smp_processor_id();
  	u16 queue_index;
  
 +#ifdef CONFIG_XPS
 +	skb->sender_cpu = cpu + 1;
 +#endif
 +	skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
  	if (ops->ndo_select_queue) {
 -		queue_index = ops->ndo_select_queue(dev, skb, NULL,
 -						    __packet_pick_tx_queue);
 +		queue_index = ops->ndo_select_queue(dev, skb, NULL);
  		queue_index = netdev_cap_txqueue(dev, queue_index);
  	} else {
 -		queue_index = __packet_pick_tx_queue(dev, skb, NULL);
 +		queue_index = netdev_pick_tx(dev, skb, NULL);
  	}
  
  	return queue_index;
@@@ -2600,8 -2602,8 +2600,8 @@@ static int tpacket_snd(struct packet_so
  	void *ph;
  	DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
  	bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
+ 	unsigned char *addr = NULL;
  	int tp_len, size_max;
- 	unsigned char *addr;
  	void *data;
  	int len_sum = 0;
  	int status = TP_STATUS_AVAILABLE;
@@@ -2612,7 -2614,6 +2612,6 @@@
  	if (likely(saddr == NULL)) {
  		dev	= packet_cached_dev_get(po);
  		proto	= po->num;
- 		addr	= NULL;
  	} else {
  		err = -EINVAL;
  		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@@ -2622,10 -2623,13 +2621,13 @@@
  						sll_addr)))
  			goto out;
  		proto	= saddr->sll_protocol;
- 		addr	= saddr->sll_halen ? saddr->sll_addr : NULL;
  		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
- 		if (addr && dev && saddr->sll_halen < dev->addr_len)
- 			goto out_put;
+ 		if (po->sk.sk_socket->type == SOCK_DGRAM) {
+ 			if (dev && msg->msg_namelen < dev->addr_len +
+ 				   offsetof(struct sockaddr_ll, sll_addr))
+ 				goto out_put;
+ 			addr = saddr->sll_addr;
+ 		}
  	}
  
  	err = -ENXIO;
@@@ -2797,7 -2801,7 +2799,7 @@@ static int packet_snd(struct socket *so
  	struct sk_buff *skb;
  	struct net_device *dev;
  	__be16 proto;
- 	unsigned char *addr;
+ 	unsigned char *addr = NULL;
  	int err, reserve = 0;
  	struct sockcm_cookie sockc;
  	struct virtio_net_hdr vnet_hdr = { 0 };
@@@ -2814,7 -2818,6 +2816,6 @@@
  	if (likely(saddr == NULL)) {
  		dev	= packet_cached_dev_get(po);
  		proto	= po->num;
- 		addr	= NULL;
  	} else {
  		err = -EINVAL;
  		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
@@@ -2822,10 -2825,13 +2823,13 @@@
  		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
  			goto out;
  		proto	= saddr->sll_protocol;
- 		addr	= saddr->sll_halen ? saddr->sll_addr : NULL;
  		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
- 		if (addr && dev && saddr->sll_halen < dev->addr_len)
- 			goto out_unlock;
+ 		if (sock->type == SOCK_DGRAM) {
+ 			if (dev && msg->msg_namelen < dev->addr_len +
+ 				   offsetof(struct sockaddr_ll, sll_addr))
+ 				goto out_unlock;
+ 			addr = saddr->sll_addr;
+ 		}
  	}
  
  	err = -ENXIO;
@@@ -3342,20 -3348,29 +3346,29 @@@ static int packet_recvmsg(struct socke
  	sock_recv_ts_and_drops(msg, sk, skb);
  
  	if (msg->msg_name) {
+ 		int copy_len;
+ 
  		/* If the address length field is there to be filled
  		 * in, we fill it in now.
  		 */
  		if (sock->type == SOCK_PACKET) {
  			__sockaddr_check_size(sizeof(struct sockaddr_pkt));
  			msg->msg_namelen = sizeof(struct sockaddr_pkt);
+ 			copy_len = msg->msg_namelen;
  		} else {
  			struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
  
  			msg->msg_namelen = sll->sll_halen +
  				offsetof(struct sockaddr_ll, sll_addr);
+ 			copy_len = msg->msg_namelen;
+ 			if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
+ 				memset(msg->msg_name +
+ 				       offsetof(struct sockaddr_ll, sll_addr),
+ 				       0, sizeof(sll->sll_addr));
+ 				msg->msg_namelen = sizeof(struct sockaddr_ll);
+ 			}
  		}
- 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
- 		       msg->msg_namelen);
+ 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
  	}
  
  	if (pkt_sk(sk)->auxdata) {
@@@ -4075,6 -4090,11 +4088,6 @@@ static int packet_ioctl(struct socket *
  		spin_unlock_bh(&sk->sk_receive_queue.lock);
  		return put_user(amount, (int __user *)arg);
  	}
 -	case SIOCGSTAMP:
 -		return sock_get_timestamp(sk, (struct timeval __user *)arg);
 -	case SIOCGSTAMPNS:
 -		return sock_get_timestampns(sk, (struct timespec __user *)arg);
 -
  #ifdef CONFIG_INET
  	case SIOCADDRT:
  	case SIOCDELRT:
@@@ -4450,7 -4470,6 +4463,7 @@@ static const struct proto_ops packet_op
  	.getname =	packet_getname_spkt,
  	.poll =		datagram_poll,
  	.ioctl =	packet_ioctl,
 +	.gettstamp =	sock_gettstamp,
  	.listen =	sock_no_listen,
  	.shutdown =	sock_no_shutdown,
  	.setsockopt =	sock_no_setsockopt,
@@@ -4472,7 -4491,6 +4485,7 @@@ static const struct proto_ops packet_op
  	.getname =	packet_getname,
  	.poll =		packet_poll,
  	.ioctl =	packet_ioctl,
 +	.gettstamp =	sock_gettstamp,
  	.listen =	sock_no_listen,
  	.shutdown =	sock_no_shutdown,
  	.setsockopt =	packet_setsockopt,
diff --combined net/sctp/sm_statefuns.c
index 7dfc34b28f4f,713a669d2058..e3f4abe6134e
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@@ -3824,6 -3824,29 +3824,29 @@@ enum sctp_disposition sctp_sf_do_asconf
  	return SCTP_DISPOSITION_CONSUME;
  }
  
+ static enum sctp_disposition sctp_send_next_asconf(
+ 					struct net *net,
+ 					const struct sctp_endpoint *ep,
+ 					struct sctp_association *asoc,
+ 					const union sctp_subtype type,
+ 					struct sctp_cmd_seq *commands)
+ {
+ 	struct sctp_chunk *asconf;
+ 	struct list_head *entry;
+ 
+ 	if (list_empty(&asoc->addip_chunk_list))
+ 		return SCTP_DISPOSITION_CONSUME;
+ 
+ 	entry = asoc->addip_chunk_list.next;
+ 	asconf = list_entry(entry, struct sctp_chunk, list);
+ 
+ 	list_del_init(entry);
+ 	sctp_chunk_hold(asconf);
+ 	asoc->addip_last_asconf = asconf;
+ 
+ 	return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands);
+ }
+ 
  /*
   * ADDIP Section 4.3 General rules for address manipulation
   * When building TLV parameters for the ASCONF Chunk that will add or
@@@ -3915,14 -3938,10 +3938,10 @@@ enum sctp_disposition sctp_sf_do_asconf
  				SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
  
  		if (!sctp_process_asconf_ack((struct sctp_association *)asoc,
- 					     asconf_ack)) {
- 			/* Successfully processed ASCONF_ACK.  We can
- 			 * release the next asconf if we have one.
- 			 */
- 			sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF,
- 					SCTP_NULL());
- 			return SCTP_DISPOSITION_CONSUME;
- 		}
+ 					     asconf_ack))
+ 			return sctp_send_next_asconf(net, ep,
+ 					(struct sctp_association *)asoc,
+ 							type, commands);
  
  		abort = sctp_make_abort(asoc, asconf_ack,
  					sizeof(struct sctp_errhdr));
@@@ -6412,15 -6431,13 +6431,15 @@@ static int sctp_eat_data(const struct s
  	 * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our
  	 * memory usage too much
  	 */
 -	if (*sk->sk_prot_creator->memory_pressure) {
 +	if (sk_under_memory_pressure(sk)) {
  		if (sctp_tsnmap_has_gap(map) &&
  		    (sctp_tsnmap_get_ctsn(map) + 1) == tsn) {
  			pr_debug("%s: under pressure, reneging for tsn:%u\n",
  				 __func__, tsn);
  			deliver = SCTP_CMD_RENEGE;
 -		 }
 +		} else {
 +			sk_mem_reclaim(sk);
 +		}
  	}
  
  	/*
diff --combined net/tls/tls_device.c
index 26f26e71ef3f,14dedb24fa7b..e225c81e6b35
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@@ -89,6 -89,22 +89,6 @@@ static void tls_device_gc_task(struct w
  	}
  }
  
 -static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
 -			      struct net_device *netdev)
 -{
 -	if (sk->sk_destruct != tls_device_sk_destruct) {
 -		refcount_set(&ctx->refcount, 1);
 -		dev_hold(netdev);
 -		ctx->netdev = netdev;
 -		spin_lock_irq(&tls_device_lock);
 -		list_add_tail(&ctx->list, &tls_device_list);
 -		spin_unlock_irq(&tls_device_lock);
 -
 -		ctx->sk_destruct = sk->sk_destruct;
 -		sk->sk_destruct = tls_device_sk_destruct;
 -	}
 -}
 -
  static void tls_device_queue_ctx_destruction(struct tls_context *ctx)
  {
  	unsigned long flags;
@@@ -183,7 -199,7 +183,7 @@@ static void tls_icsk_clean_acked(struc
   * socket and no in-flight SKBs associated with this
   * socket, so it is safe to free all the resources.
   */
 -void tls_device_sk_destruct(struct sock *sk)
 +static void tls_device_sk_destruct(struct sock *sk)
  {
  	struct tls_context *tls_ctx = tls_get_ctx(sk);
  	struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
@@@ -201,6 -217,7 +201,6 @@@
  	if (refcount_dec_and_test(&tls_ctx->refcount))
  		tls_device_queue_ctx_destruction(tls_ctx);
  }
 -EXPORT_SYMBOL(tls_device_sk_destruct);
  
  void tls_device_free_resources_tx(struct sock *sk)
  {
@@@ -567,7 -584,7 +567,7 @@@ void handle_device_resync(struct sock *
  
  	rx_ctx = tls_offload_ctx_rx(tls_ctx);
  	resync_req = atomic64_read(&rx_ctx->resync_req);
 -	req_seq = ntohl(resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1);
 +	req_seq = (resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1);
  	is_req_pending = resync_req;
  
  	if (unlikely(is_req_pending) && req_seq == seq &&
@@@ -580,7 -597,7 +580,7 @@@
  static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb)
  {
  	struct strp_msg *rxm = strp_msg(skb);
- 	int err = 0, offset = rxm->offset, copy, nsg;
+ 	int err = 0, offset = rxm->offset, copy, nsg, data_len, pos;
  	struct sk_buff *skb_iter, *unused;
  	struct scatterlist sg[1];
  	char *orig_buf, *buf;
@@@ -611,25 -628,42 +611,42 @@@
  	else
  		err = 0;
  
- 	copy = min_t(int, skb_pagelen(skb) - offset,
- 		     rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ 	data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE;
  
- 	if (skb->decrypted)
- 		skb_store_bits(skb, offset, buf, copy);
+ 	if (skb_pagelen(skb) > offset) {
+ 		copy = min_t(int, skb_pagelen(skb) - offset, data_len);
  
- 	offset += copy;
- 	buf += copy;
+ 		if (skb->decrypted)
+ 			skb_store_bits(skb, offset, buf, copy);
  
+ 		offset += copy;
+ 		buf += copy;
+ 	}
+ 
+ 	pos = skb_pagelen(skb);
  	skb_walk_frags(skb, skb_iter) {
- 		copy = min_t(int, skb_iter->len,
- 			     rxm->full_len - offset + rxm->offset -
- 			     TLS_CIPHER_AES_GCM_128_TAG_SIZE);
+ 		int frag_pos;
+ 
+ 		/* Practically all frags must belong to msg if reencrypt
+ 		 * is needed with current strparser and coalescing logic,
+ 		 * but strparser may "get optimized", so let's be safe.
+ 		 */
+ 		if (pos + skb_iter->len <= offset)
+ 			goto done_with_frag;
+ 		if (pos >= data_len + rxm->offset)
+ 			break;
+ 
+ 		frag_pos = offset - pos;
+ 		copy = min_t(int, skb_iter->len - frag_pos,
+ 			     data_len + rxm->offset - offset);
  
  		if (skb_iter->decrypted)
- 			skb_store_bits(skb_iter, offset, buf, copy);
+ 			skb_store_bits(skb_iter, frag_pos, buf, copy);
  
  		offset += copy;
  		buf += copy;
+ done_with_frag:
+ 		pos += skb_iter->len;
  	}
  
  free_buf:
@@@ -665,22 -699,6 +682,22 @@@ int tls_device_decrypted(struct sock *s
  		tls_device_reencrypt(sk, skb);
  }
  
 +static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
 +			      struct net_device *netdev)
 +{
 +	if (sk->sk_destruct != tls_device_sk_destruct) {
 +		refcount_set(&ctx->refcount, 1);
 +		dev_hold(netdev);
 +		ctx->netdev = netdev;
 +		spin_lock_irq(&tls_device_lock);
 +		list_add_tail(&ctx->list, &tls_device_list);
 +		spin_unlock_irq(&tls_device_lock);
 +
 +		ctx->sk_destruct = sk->sk_destruct;
 +		sk->sk_destruct = tls_device_sk_destruct;
 +	}
 +}
 +
  int tls_set_device_offload(struct sock *sk, struct tls_context *ctx)
  {
  	u16 nonce_size, tag_size, iv_size, rec_seq_size;
@@@ -864,6 -882,8 +881,6 @@@ int tls_set_device_offload_rx(struct so
  	}
  
  	if (!(netdev->features & NETIF_F_HW_TLS_RX)) {
 -		pr_err_ratelimited("%s: netdev %s with no TLS offload\n",
 -				   __func__, netdev->name);
  		rc = -ENOTSUPP;
  		goto release_netdev;
  	}
@@@ -891,8 -911,11 +908,8 @@@
  	rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX,
  					     &ctx->crypto_recv.info,
  					     tcp_sk(sk)->copied_seq);
 -	if (rc) {
 -		pr_err_ratelimited("%s: The netdev has refused to offload this socket\n",
 -				   __func__);
 +	if (rc)
  		goto free_sw_resources;
 -	}
  
  	tls_device_attach(ctx, sk, netdev);
  	goto release_netdev;
diff --combined net/wireless/reg.c
index 816425ffe05a,a6fd5ce199da..4831ad745f91
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@@ -427,10 -427,14 +427,10 @@@ static const struct ieee80211_regdomai
  reg_copy_regd(const struct ieee80211_regdomain *src_regd)
  {
  	struct ieee80211_regdomain *regd;
 -	int size_of_regd;
  	unsigned int i;
  
 -	size_of_regd =
 -		sizeof(struct ieee80211_regdomain) +
 -		src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule);
 -
 -	regd = kzalloc(size_of_regd, GFP_KERNEL);
 +	regd = kzalloc(struct_size(regd, reg_rules, src_regd->n_reg_rules),
 +		       GFP_KERNEL);
  	if (!regd)
  		return ERR_PTR(-ENOMEM);
  
@@@ -944,10 -948,12 +944,10 @@@ static int regdb_query_country(const st
  	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
  	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
  	struct ieee80211_regdomain *regdom;
 -	unsigned int size_of_regd, i;
 -
 -	size_of_regd = sizeof(struct ieee80211_regdomain) +
 -		coll->n_rules * sizeof(struct ieee80211_reg_rule);
 +	unsigned int i;
  
 -	regdom = kzalloc(size_of_regd, GFP_KERNEL);
 +	regdom = kzalloc(struct_size(regdom, reg_rules, coll->n_rules),
 +			 GFP_KERNEL);
  	if (!regdom)
  		return -ENOMEM;
  
@@@ -1483,7 -1489,7 +1483,7 @@@ static struct ieee80211_regdomain 
  regdom_intersect(const struct ieee80211_regdomain *rd1,
  		 const struct ieee80211_regdomain *rd2)
  {
 -	int r, size_of_regd;
 +	int r;
  	unsigned int x, y;
  	unsigned int num_rules = 0;
  	const struct ieee80211_reg_rule *rule1, *rule2;
@@@ -1514,7 -1520,10 +1514,7 @@@
  	if (!num_rules)
  		return NULL;
  
 -	size_of_regd = sizeof(struct ieee80211_regdomain) +
 -		       num_rules * sizeof(struct ieee80211_reg_rule);
 -
 -	rd = kzalloc(size_of_regd, GFP_KERNEL);
 +	rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL);
  	if (!rd)
  		return NULL;
  
@@@ -3769,10 -3778,9 +3769,9 @@@ void wiphy_regulatory_register(struct w
  		/*
  		 * The last request may have been received before this
  		 * registration call. Call the driver notifier if
- 		 * initiator is USER and user type is CELL_BASE.
+ 		 * initiator is USER.
  		 */
- 		if (lr->initiator == NL80211_REGDOM_SET_BY_USER &&
- 		    lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE)
+ 		if (lr->initiator == NL80211_REGDOM_SET_BY_USER)
  			reg_call_notifier(wiphy, lr);
  	}
  
diff --combined net/xfrm/xfrm_interface.c
index b9f118530db6,85fec98676d3..ad3a2555c517
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@@ -70,17 -70,28 +70,28 @@@ static struct xfrm_if *xfrmi_lookup(str
  	return NULL;
  }
  
- static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb)
+ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb,
+ 					    unsigned short family)
  {
  	struct xfrmi_net *xfrmn;
- 	int ifindex;
  	struct xfrm_if *xi;
+ 	int ifindex = 0;
  
  	if (!secpath_exists(skb) || !skb->dev)
  		return NULL;
  
+ 	switch (family) {
+ 	case AF_INET6:
+ 		ifindex = inet6_sdif(skb);
+ 		break;
+ 	case AF_INET:
+ 		ifindex = inet_sdif(skb);
+ 		break;
+ 	}
+ 	if (!ifindex)
+ 		ifindex = skb->dev->ifindex;
+ 
  	xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id);
- 	ifindex = skb->dev->ifindex;
  
  	for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) {
  		if (ifindex == xi->dev->ifindex &&
@@@ -244,8 -255,8 +255,8 @@@ static void xfrmi_scrub_packet(struct s
  
  static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
  {
 +	const struct xfrm_mode *inner_mode;
  	struct pcpu_sw_netstats *tstats;
 -	struct xfrm_mode *inner_mode;
  	struct net_device *dev;
  	struct xfrm_state *x;
  	struct xfrm_if *xi;
@@@ -273,7 -284,7 +284,7 @@@
  	xnet = !net_eq(xi->net, dev_net(skb->dev));
  
  	if (xnet) {
 -		inner_mode = x->inner_mode;
 +		inner_mode = &x->inner_mode;
  
  		if (x->sel.family == AF_UNSPEC) {
  			inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
@@@ -285,7 -296,7 +296,7 @@@
  		}
  
  		if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb,
 -				       inner_mode->afinfo->family))
 +				       inner_mode->family))
  			return -EPERM;
  	}
  
diff --combined net/xfrm/xfrm_policy.c
index 03b6bf85d70b,a6b58df7a70f..410233c5681e
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@@ -27,14 -27,10 +27,14 @@@
  #include <linux/cpu.h>
  #include <linux/audit.h>
  #include <linux/rhashtable.h>
 +#include <linux/if_tunnel.h>
  #include <net/dst.h>
  #include <net/flow.h>
  #include <net/xfrm.h>
  #include <net/ip.h>
 +#if IS_ENABLED(CONFIG_IPV6_MIP6)
 +#include <net/mip6.h>
 +#endif
  #ifdef CONFIG_XFRM_STATISTICS
  #include <net/snmp.h>
  #endif
@@@ -2454,10 -2450,18 +2454,10 @@@ xfrm_tmpl_resolve(struct xfrm_policy **
  
  static int xfrm_get_tos(const struct flowi *fl, int family)
  {
 -	const struct xfrm_policy_afinfo *afinfo;
 -	int tos;
 +	if (family == AF_INET)
 +		return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos;
  
 -	afinfo = xfrm_policy_get_afinfo(family);
 -	if (!afinfo)
 -		return 0;
 -
 -	tos = afinfo->get_tos(fl);
 -
 -	rcu_read_unlock();
 -
 -	return tos;
 +	return 0;
  }
  
  static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
@@@ -2495,14 -2499,21 +2495,14 @@@
  	return xdst;
  }
  
 -static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 -				 int nfheader_len)
 +static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
 +			   int nfheader_len)
  {
 -	const struct xfrm_policy_afinfo *afinfo =
 -		xfrm_policy_get_afinfo(dst->ops->family);
 -	int err;
 -
 -	if (!afinfo)
 -		return -EINVAL;
 -
 -	err = afinfo->init_path(path, dst, nfheader_len);
 -
 -	rcu_read_unlock();
 -
 -	return err;
 +	if (dst->ops->family == AF_INET6) {
 +		struct rt6_info *rt = (struct rt6_info *)dst;
 +		path->path_cookie = rt6_get_cookie(rt);
 +		path->u.rt6.rt6i_nfheader_len = nfheader_len;
 +	}
  }
  
  static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
@@@ -2534,11 -2545,10 +2534,11 @@@ static struct dst_entry *xfrm_bundle_cr
  					    const struct flowi *fl,
  					    struct dst_entry *dst)
  {
 +	const struct xfrm_state_afinfo *afinfo;
 +	const struct xfrm_mode *inner_mode;
  	struct net *net = xp_net(policy);
  	unsigned long now = jiffies;
  	struct net_device *dev;
 -	struct xfrm_mode *inner_mode;
  	struct xfrm_dst *xdst_prev = NULL;
  	struct xfrm_dst *xdst0 = NULL;
  	int i = 0;
@@@ -2584,7 -2594,7 +2584,7 @@@
  				goto put_states;
  			}
  		} else
 -			inner_mode = xfrm[i]->inner_mode;
 +			inner_mode = &xfrm[i]->inner_mode;
  
  		xdst->route = dst;
  		dst_copy_metrics(dst1, dst);
@@@ -2612,14 -2622,7 +2612,14 @@@
  		dst1->lastuse = now;
  
  		dst1->input = dst_discard;
 -		dst1->output = inner_mode->afinfo->output;
 +
 +		rcu_read_lock();
 +		afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
 +		if (likely(afinfo))
 +			dst1->output = afinfo->output;
 +		else
 +			dst1->output = dst_discard_out;
 +		rcu_read_unlock();
  
  		xdst_prev = xdst;
  
@@@ -3260,229 -3263,20 +3260,229 @@@ xfrm_policy_ok(const struct xfrm_tmpl *
  	return start;
  }
  
 +static void
 +decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
 +{
 +	const struct iphdr *iph = ip_hdr(skb);
 +	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
 +	struct flowi4 *fl4 = &fl->u.ip4;
 +	int oif = 0;
 +
 +	if (skb_dst(skb))
 +		oif = skb_dst(skb)->dev->ifindex;
 +
 +	memset(fl4, 0, sizeof(struct flowi4));
 +	fl4->flowi4_mark = skb->mark;
 +	fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
 +
 +	if (!ip_is_fragment(iph)) {
 +		switch (iph->protocol) {
 +		case IPPROTO_UDP:
 +		case IPPROTO_UDPLITE:
 +		case IPPROTO_TCP:
 +		case IPPROTO_SCTP:
 +		case IPPROTO_DCCP:
 +			if (xprth + 4 < skb->data ||
 +			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
 +				__be16 *ports;
 +
 +				xprth = skb_network_header(skb) + iph->ihl * 4;
 +				ports = (__be16 *)xprth;
 +
 +				fl4->fl4_sport = ports[!!reverse];
 +				fl4->fl4_dport = ports[!reverse];
 +			}
 +			break;
 +		case IPPROTO_ICMP:
 +			if (xprth + 2 < skb->data ||
 +			    pskb_may_pull(skb, xprth + 2 - skb->data)) {
 +				u8 *icmp;
 +
 +				xprth = skb_network_header(skb) + iph->ihl * 4;
 +				icmp = xprth;
 +
 +				fl4->fl4_icmp_type = icmp[0];
 +				fl4->fl4_icmp_code = icmp[1];
 +			}
 +			break;
 +		case IPPROTO_ESP:
 +			if (xprth + 4 < skb->data ||
 +			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
 +				__be32 *ehdr;
 +
 +				xprth = skb_network_header(skb) + iph->ihl * 4;
 +				ehdr = (__be32 *)xprth;
 +
 +				fl4->fl4_ipsec_spi = ehdr[0];
 +			}
 +			break;
 +		case IPPROTO_AH:
 +			if (xprth + 8 < skb->data ||
 +			    pskb_may_pull(skb, xprth + 8 - skb->data)) {
 +				__be32 *ah_hdr;
 +
 +				xprth = skb_network_header(skb) + iph->ihl * 4;
 +				ah_hdr = (__be32 *)xprth;
 +
 +				fl4->fl4_ipsec_spi = ah_hdr[1];
 +			}
 +			break;
 +		case IPPROTO_COMP:
 +			if (xprth + 4 < skb->data ||
 +			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
 +				__be16 *ipcomp_hdr;
 +
 +				xprth = skb_network_header(skb) + iph->ihl * 4;
 +				ipcomp_hdr = (__be16 *)xprth;
 +
 +				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
 +			}
 +			break;
 +		case IPPROTO_GRE:
 +			if (xprth + 12 < skb->data ||
 +			    pskb_may_pull(skb, xprth + 12 - skb->data)) {
 +				__be16 *greflags;
 +				__be32 *gre_hdr;
 +
 +				xprth = skb_network_header(skb) + iph->ihl * 4;
 +				greflags = (__be16 *)xprth;
 +				gre_hdr = (__be32 *)xprth;
 +
 +				if (greflags[0] & GRE_KEY) {
 +					if (greflags[0] & GRE_CSUM)
 +						gre_hdr++;
 +					fl4->fl4_gre_key = gre_hdr[1];
 +				}
 +			}
 +			break;
 +		default:
 +			fl4->fl4_ipsec_spi = 0;
 +			break;
 +		}
 +	}
 +	fl4->flowi4_proto = iph->protocol;
 +	fl4->daddr = reverse ? iph->saddr : iph->daddr;
 +	fl4->saddr = reverse ? iph->daddr : iph->saddr;
 +	fl4->flowi4_tos = iph->tos;
 +}
 +
 +#if IS_ENABLED(CONFIG_IPV6)
 +static void
 +decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
 +{
 +	struct flowi6 *fl6 = &fl->u.ip6;
 +	int onlyproto = 0;
 +	const struct ipv6hdr *hdr = ipv6_hdr(skb);
 +	u32 offset = sizeof(*hdr);
 +	struct ipv6_opt_hdr *exthdr;
 +	const unsigned char *nh = skb_network_header(skb);
 +	u16 nhoff = IP6CB(skb)->nhoff;
 +	int oif = 0;
 +	u8 nexthdr;
 +
 +	if (!nhoff)
 +		nhoff = offsetof(struct ipv6hdr, nexthdr);
 +
 +	nexthdr = nh[nhoff];
 +
 +	if (skb_dst(skb))
 +		oif = skb_dst(skb)->dev->ifindex;
 +
 +	memset(fl6, 0, sizeof(struct flowi6));
 +	fl6->flowi6_mark = skb->mark;
 +	fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
 +
 +	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
 +	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
 +
 +	while (nh + offset + sizeof(*exthdr) < skb->data ||
 +	       pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
 +		nh = skb_network_header(skb);
 +		exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 +
 +		switch (nexthdr) {
 +		case NEXTHDR_FRAGMENT:
 +			onlyproto = 1;
 +			/* fall through */
 +		case NEXTHDR_ROUTING:
 +		case NEXTHDR_HOP:
 +		case NEXTHDR_DEST:
 +			offset += ipv6_optlen(exthdr);
 +			nexthdr = exthdr->nexthdr;
 +			exthdr = (struct ipv6_opt_hdr *)(nh + offset);
 +			break;
 +		case IPPROTO_UDP:
 +		case IPPROTO_UDPLITE:
 +		case IPPROTO_TCP:
 +		case IPPROTO_SCTP:
 +		case IPPROTO_DCCP:
 +			if (!onlyproto && (nh + offset + 4 < skb->data ||
 +			     pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
 +				__be16 *ports;
 +
 +				nh = skb_network_header(skb);
 +				ports = (__be16 *)(nh + offset);
 +				fl6->fl6_sport = ports[!!reverse];
 +				fl6->fl6_dport = ports[!reverse];
 +			}
 +			fl6->flowi6_proto = nexthdr;
 +			return;
 +		case IPPROTO_ICMPV6:
 +			if (!onlyproto && (nh + offset + 2 < skb->data ||
 +			    pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
 +				u8 *icmp;
 +
 +				nh = skb_network_header(skb);
 +				icmp = (u8 *)(nh + offset);
 +				fl6->fl6_icmp_type = icmp[0];
 +				fl6->fl6_icmp_code = icmp[1];
 +			}
 +			fl6->flowi6_proto = nexthdr;
 +			return;
 +#if IS_ENABLED(CONFIG_IPV6_MIP6)
 +		case IPPROTO_MH:
 +			offset += ipv6_optlen(exthdr);
 +			if (!onlyproto && (nh + offset + 3 < skb->data ||
 +			    pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
 +				struct ip6_mh *mh;
 +
 +				nh = skb_network_header(skb);
 +				mh = (struct ip6_mh *)(nh + offset);
 +				fl6->fl6_mh_type = mh->ip6mh_type;
 +			}
 +			fl6->flowi6_proto = nexthdr;
 +			return;
 +#endif
 +		/* XXX Why are there these headers? */
 +		case IPPROTO_AH:
 +		case IPPROTO_ESP:
 +		case IPPROTO_COMP:
 +		default:
 +			fl6->fl6_ipsec_spi = 0;
 +			fl6->flowi6_proto = nexthdr;
 +			return;
 +		}
 +	}
 +}
 +#endif
 +
  int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
  			  unsigned int family, int reverse)
  {
 -	const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 -	int err;
 -
 -	if (unlikely(afinfo == NULL))
 +	switch (family) {
 +	case AF_INET:
 +		decode_session4(skb, fl, reverse);
 +		break;
 +#if IS_ENABLED(CONFIG_IPV6)
 +	case AF_INET6:
 +		decode_session6(skb, fl, reverse);
 +		break;
 +#endif
 +	default:
  		return -EAFNOSUPPORT;
 +	}
  
 -	afinfo->decode_session(skb, fl, reverse);
 -
 -	err = security_xfrm_decode_session(skb, &fl->flowi_secid);
 -	rcu_read_unlock();
 -	return err;
 +	return security_xfrm_decode_session(skb, &fl->flowi_secid);
  }
  EXPORT_SYMBOL(__xfrm_decode_session);
  
@@@ -3519,7 -3313,7 +3519,7 @@@ int __xfrm_policy_check(struct sock *sk
  	ifcb = xfrm_if_get_cb();
  
  	if (ifcb) {
- 		xi = ifcb->decode_session(skb);
+ 		xi = ifcb->decode_session(skb, family);
  		if (xi) {
  			if_id = xi->p.if_id;
  			net = xi->net;
diff --combined net/xfrm/xfrm_state.c
index ed25eb81aabe,178baaa037e5..3edbf4b26116
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@@ -173,7 -173,7 +173,7 @@@ static DEFINE_SPINLOCK(xfrm_state_gc_lo
  int __xfrm_state_delete(struct xfrm_state *x);
  
  int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
 -bool km_is_alive(const struct km_event *c);
 +static bool km_is_alive(const struct km_event *c);
  void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
  
  static DEFINE_SPINLOCK(xfrm_type_lock);
@@@ -330,67 -330,100 +330,67 @@@ static void xfrm_put_type_offload(cons
  	module_put(type->owner);
  }
  
 -static DEFINE_SPINLOCK(xfrm_mode_lock);
 -int xfrm_register_mode(struct xfrm_mode *mode, int family)
 -{
 -	struct xfrm_state_afinfo *afinfo;
 -	struct xfrm_mode **modemap;
 -	int err;
 -
 -	if (unlikely(mode->encap >= XFRM_MODE_MAX))
 -		return -EINVAL;
 -
 -	afinfo = xfrm_state_get_afinfo(family);
 -	if (unlikely(afinfo == NULL))
 -		return -EAFNOSUPPORT;
 -
 -	err = -EEXIST;
 -	modemap = afinfo->mode_map;
 -	spin_lock_bh(&xfrm_mode_lock);
 -	if (modemap[mode->encap])
 -		goto out;
 -
 -	err = -ENOENT;
 -	if (!try_module_get(afinfo->owner))
 -		goto out;
 -
 -	mode->afinfo = afinfo;
 -	modemap[mode->encap] = mode;
 -	err = 0;
 -
 -out:
 -	spin_unlock_bh(&xfrm_mode_lock);
 -	rcu_read_unlock();
 -	return err;
 -}
 -EXPORT_SYMBOL(xfrm_register_mode);
 -
 -int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
 -{
 -	struct xfrm_state_afinfo *afinfo;
 -	struct xfrm_mode **modemap;
 -	int err;
 -
 -	if (unlikely(mode->encap >= XFRM_MODE_MAX))
 -		return -EINVAL;
 -
 -	afinfo = xfrm_state_get_afinfo(family);
 -	if (unlikely(afinfo == NULL))
 -		return -EAFNOSUPPORT;
 -
 -	err = -ENOENT;
 -	modemap = afinfo->mode_map;
 -	spin_lock_bh(&xfrm_mode_lock);
 -	if (likely(modemap[mode->encap] == mode)) {
 -		modemap[mode->encap] = NULL;
 -		module_put(mode->afinfo->owner);
 -		err = 0;
 -	}
 -
 -	spin_unlock_bh(&xfrm_mode_lock);
 -	rcu_read_unlock();
 -	return err;
 -}
 -EXPORT_SYMBOL(xfrm_unregister_mode);
 -
 -static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
 -{
 -	struct xfrm_state_afinfo *afinfo;
 -	struct xfrm_mode *mode;
 -	int modload_attempted = 0;
 +static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = {
 +	[XFRM_MODE_BEET] = {
 +		.encap = XFRM_MODE_BEET,
 +		.flags = XFRM_MODE_FLAG_TUNNEL,
 +		.family = AF_INET,
 +	},
 +	[XFRM_MODE_TRANSPORT] = {
 +		.encap = XFRM_MODE_TRANSPORT,
 +		.family = AF_INET,
 +	},
 +	[XFRM_MODE_TUNNEL] = {
 +		.encap = XFRM_MODE_TUNNEL,
 +		.flags = XFRM_MODE_FLAG_TUNNEL,
 +		.family = AF_INET,
 +	},
 +};
 +
 +static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = {
 +	[XFRM_MODE_BEET] = {
 +		.encap = XFRM_MODE_BEET,
 +		.flags = XFRM_MODE_FLAG_TUNNEL,
 +		.family = AF_INET6,
 +	},
 +	[XFRM_MODE_ROUTEOPTIMIZATION] = {
 +		.encap = XFRM_MODE_ROUTEOPTIMIZATION,
 +		.family = AF_INET6,
 +	},
 +	[XFRM_MODE_TRANSPORT] = {
 +		.encap = XFRM_MODE_TRANSPORT,
 +		.family = AF_INET6,
 +	},
 +	[XFRM_MODE_TUNNEL] = {
 +		.encap = XFRM_MODE_TUNNEL,
 +		.flags = XFRM_MODE_FLAG_TUNNEL,
 +		.family = AF_INET6,
 +	},
 +};
 +
 +static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
 +{
 +	const struct xfrm_mode *mode;
  
  	if (unlikely(encap >= XFRM_MODE_MAX))
  		return NULL;
  
 -retry:
 -	afinfo = xfrm_state_get_afinfo(family);
 -	if (unlikely(afinfo == NULL))
 -		return NULL;
 -
 -	mode = READ_ONCE(afinfo->mode_map[encap]);
 -	if (unlikely(mode && !try_module_get(mode->owner)))
 -		mode = NULL;
 -
 -	rcu_read_unlock();
 -	if (!mode && !modload_attempted) {
 -		request_module("xfrm-mode-%d-%d", family, encap);
 -		modload_attempted = 1;
 -		goto retry;
 +	switch (family) {
 +	case AF_INET:
 +		mode = &xfrm4_mode_map[encap];
 +		if (mode->family == family)
 +			return mode;
 +		break;
 +	case AF_INET6:
 +		mode = &xfrm6_mode_map[encap];
 +		if (mode->family == family)
 +			return mode;
 +		break;
 +	default:
 +		break;
  	}
  
 -	return mode;
 -}
 -
 -static void xfrm_put_mode(struct xfrm_mode *mode)
 -{
 -	module_put(mode->owner);
 +	return NULL;
  }
  
  void xfrm_state_free(struct xfrm_state *x)
@@@ -411,6 -444,12 +411,6 @@@ static void ___xfrm_state_destroy(struc
  	kfree(x->coaddr);
  	kfree(x->replay_esn);
  	kfree(x->preplay_esn);
 -	if (x->inner_mode)
 -		xfrm_put_mode(x->inner_mode);
 -	if (x->inner_mode_iaf)
 -		xfrm_put_mode(x->inner_mode_iaf);
 -	if (x->outer_mode)
 -		xfrm_put_mode(x->outer_mode);
  	if (x->type_offload)
  		xfrm_put_type_offload(x->type_offload);
  	if (x->type) {
@@@ -551,6 -590,8 +551,6 @@@ struct xfrm_state *xfrm_state_alloc(str
  		x->lft.hard_packet_limit = XFRM_INF;
  		x->replay_maxage = 0;
  		x->replay_maxdiff = 0;
 -		x->inner_mode = NULL;
 -		x->inner_mode_iaf = NULL;
  		spin_lock_init(&x->lock);
  	}
  	return x;
@@@ -2025,7 -2066,7 +2025,7 @@@ int km_report(struct net *net, u8 proto
  }
  EXPORT_SYMBOL(km_report);
  
 -bool km_is_alive(const struct km_event *c)
 +static bool km_is_alive(const struct km_event *c)
  {
  	struct xfrm_mgr *km;
  	bool is_alive = false;
@@@ -2041,6 -2082,7 +2041,6 @@@
  
  	return is_alive;
  }
 -EXPORT_SYMBOL(km_is_alive);
  
  int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
  {
@@@ -2153,7 -2195,6 +2153,7 @@@ struct xfrm_state_afinfo *xfrm_state_af
  
  	return rcu_dereference(xfrm_state_afinfo[family]);
  }
 +EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu);
  
  struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
  {
@@@ -2201,9 -2242,8 +2201,9 @@@ int xfrm_state_mtu(struct xfrm_state *x
  
  int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
  {
 -	struct xfrm_state_afinfo *afinfo;
 -	struct xfrm_mode *inner_mode;
 +	const struct xfrm_state_afinfo *afinfo;
 +	const struct xfrm_mode *inner_mode;
 +	const struct xfrm_mode *outer_mode;
  	int family = x->props.family;
  	int err;
  
@@@ -2229,22 -2269,25 +2229,22 @@@
  			goto error;
  
  		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
 -		    family != x->sel.family) {
 -			xfrm_put_mode(inner_mode);
 +		    family != x->sel.family)
  			goto error;
 -		}
  
 -		x->inner_mode = inner_mode;
 +		x->inner_mode = *inner_mode;
  	} else {
 -		struct xfrm_mode *inner_mode_iaf;
 +		const struct xfrm_mode *inner_mode_iaf;
  		int iafamily = AF_INET;
  
  		inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
  		if (inner_mode == NULL)
  			goto error;
  
 -		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
 -			xfrm_put_mode(inner_mode);
 +		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL))
  			goto error;
 -		}
 -		x->inner_mode = inner_mode;
 +
 +		x->inner_mode = *inner_mode;
  
  		if (x->props.family == AF_INET)
  			iafamily = AF_INET6;
@@@ -2252,7 -2295,9 +2252,7 @@@
  		inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
  		if (inner_mode_iaf) {
  			if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
 -				x->inner_mode_iaf = inner_mode_iaf;
 -			else
 -				xfrm_put_mode(inner_mode_iaf);
 +				x->inner_mode_iaf = *inner_mode_iaf;
  		}
  	}
  
@@@ -2266,13 -2311,12 +2266,13 @@@
  	if (err)
  		goto error;
  
 -	x->outer_mode = xfrm_get_mode(x->props.mode, family);
 -	if (x->outer_mode == NULL) {
 +	outer_mode = xfrm_get_mode(x->props.mode, family);
 +	if (!outer_mode) {
  		err = -EPROTONOSUPPORT;
  		goto error;
  	}
  
 +	x->outer_mode = *outer_mode;
  	if (init_replay) {
  		err = xfrm_init_replay(x);
  		if (err)
@@@ -2340,7 -2384,7 +2340,7 @@@ void xfrm_state_fini(struct net *net
  
  	flush_work(&net->xfrm.state_hash_work);
  	flush_work(&xfrm_state_gc_work);
- 	xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
+ 	xfrm_state_flush(net, 0, false, true);
  
  	WARN_ON(!list_empty(&net->xfrm.state_all));
  
diff --combined net/xfrm/xfrm_user.c
index d7cb16f0df5b,6916931b1de1..eb8d14389601
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@@ -1006,8 -1006,8 +1006,8 @@@ static int xfrm_dump_sa(struct sk_buff 
  		u8 proto = 0;
  		int err;
  
 -		err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy,
 -				  cb->extack);
 +		err = nlmsg_parse_deprecated(cb->nlh, 0, attrs, XFRMA_MAX,
 +					     xfrma_policy, cb->extack);
  		if (err < 0)
  			return err;
  
@@@ -1424,7 -1424,7 +1424,7 @@@ static int verify_newpolicy_info(struc
  	ret = verify_policy_dir(p->dir);
  	if (ret)
  		return ret;
- 	if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir))
+ 	if (p->index && (xfrm_policy_id2dir(p->index) != p->dir))
  		return -EINVAL;
  
  	return 0;
@@@ -1513,20 -1513,8 +1513,8 @@@ static int validate_tmpl(int nr, struc
  			return -EINVAL;
  		}
  
- 		switch (ut[i].id.proto) {
- 		case IPPROTO_AH:
- 		case IPPROTO_ESP:
- 		case IPPROTO_COMP:
- #if IS_ENABLED(CONFIG_IPV6)
- 		case IPPROTO_ROUTING:
- 		case IPPROTO_DSTOPTS:
- #endif
- 		case IPSEC_PROTO_ANY:
- 			break;
- 		default:
+ 		if (!xfrm_id_proto_valid(ut[i].id.proto))
  			return -EINVAL;
- 		}
- 
  	}
  
  	return 0;
@@@ -2656,9 -2644,9 +2644,9 @@@ static int xfrm_user_rcv_msg(struct sk_
  		}
  	}
  
 -	err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs,
 -			  link->nla_max ? : XFRMA_MAX,
 -			  link->nla_pol ? : xfrma_policy, extack);
 +	err = nlmsg_parse_deprecated(nlh, xfrm_msg_min[type], attrs,
 +				     link->nla_max ? : XFRMA_MAX,
 +				     link->nla_pol ? : xfrma_policy, extack);
  	if (err < 0)
  		return err;
  
diff --combined tools/bpf/bpftool/map.c
index e951d45c0131,994a7e0d16fb..3ec82904ccec
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@@ -46,7 -46,6 +46,7 @@@ const char * const map_type_name[] = 
  	[BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]	= "percpu_cgroup_storage",
  	[BPF_MAP_TYPE_QUEUE]			= "queue",
  	[BPF_MAP_TYPE_STACK]			= "stack",
 +	[BPF_MAP_TYPE_SK_STORAGE]		= "sk_storage",
  };
  
  const size_t map_type_name_size = ARRAY_SIZE(map_type_name);
@@@ -154,13 -153,11 +154,13 @@@ static int do_dump_btf(const struct btf
  	/* start of key-value pair */
  	jsonw_start_object(d->jw);
  
 -	jsonw_name(d->jw, "key");
 +	if (map_info->btf_key_type_id) {
 +		jsonw_name(d->jw, "key");
  
 -	ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
 -	if (ret)
 -		goto err_end_obj;
 +		ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
 +		if (ret)
 +			goto err_end_obj;
 +	}
  
  	if (!map_is_per_cpu(map_info->type)) {
  		jsonw_name(d->jw, "value");
@@@ -262,20 -259,20 +262,20 @@@ static void print_entry_json(struct bpf
  }
  
  static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
 -			      const char *value)
 +			      const char *error_msg)
  {
 -	int value_size = strlen(value);
 +	int msg_size = strlen(error_msg);
  	bool single_line, break_names;
  
 -	break_names = info->key_size > 16 || value_size > 16;
 -	single_line = info->key_size + value_size <= 24 && !break_names;
 +	break_names = info->key_size > 16 || msg_size > 16;
 +	single_line = info->key_size + msg_size <= 24 && !break_names;
  
  	printf("key:%c", break_names ? '\n' : ' ');
  	fprint_hex(stdout, key, info->key_size, " ");
  
  	printf(single_line ? "  " : "\n");
  
 -	printf("value:%c%s", break_names ? '\n' : ' ', value);
 +	printf("value:%c%s", break_names ? '\n' : ' ', error_msg);
  
  	printf("\n");
  }
@@@ -299,7 -296,11 +299,7 @@@ static void print_entry_plain(struct bp
  
  		if (info->value_size) {
  			printf("value:%c", break_names ? '\n' : ' ');
 -			if (value)
 -				fprint_hex(stdout, value, info->value_size,
 -					   " ");
 -			else
 -				printf("<no entry>");
 +			fprint_hex(stdout, value, info->value_size, " ");
  		}
  
  		printf("\n");
@@@ -318,8 -319,11 +318,8 @@@
  			for (i = 0; i < n; i++) {
  				printf("value (CPU %02d):%c",
  				       i, info->value_size > 16 ? '\n' : ' ');
 -				if (value)
 -					fprint_hex(stdout, value + i * step,
 -						   info->value_size, " ");
 -				else
 -					printf("<no entry>");
 +				fprint_hex(stdout, value + i * step,
 +					   info->value_size, " ");
  				printf("\n");
  			}
  		}
@@@ -532,9 -536,6 +532,9 @@@ static int show_map_close_json(int fd, 
  	}
  	close(fd);
  
 +	if (info->btf_id)
 +		jsonw_int_field(json_wtr, "btf_id", info->btf_id);
 +
  	if (!hash_empty(map_table.table)) {
  		struct pinned_obj *obj;
  
@@@ -601,19 -602,15 +601,19 @@@ static int show_map_close_plain(int fd
  	}
  	close(fd);
  
 -	printf("\n");
  	if (!hash_empty(map_table.table)) {
  		struct pinned_obj *obj;
  
  		hash_for_each_possible(map_table.table, obj, hash, info->id) {
  			if (obj->id == info->id)
 -				printf("\tpinned %s\n", obj->path);
 +				printf("\n\tpinned %s", obj->path);
  		}
  	}
 +
 +	if (info->btf_id)
 +		printf("\n\tbtf_id %d", info->btf_id);
 +
 +	printf("\n");
  	return 0;
  }
  
@@@ -723,16 -720,11 +723,16 @@@ static int dump_map_elem(int fd, void *
  		jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
  		jsonw_end_object(json_wtr);
  	} else {
 -		if (errno == ENOENT)
 -			print_entry_plain(map_info, key, NULL);
 -		else
 -			print_entry_error(map_info, key,
 -					  strerror(lookup_errno));
 +		const char *msg = NULL;
 +
 +		if (lookup_errno == ENOENT)
 +			msg = "<no entry>";
 +		else if (lookup_errno == ENOSPC &&
 +			 map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
 +			msg = "<cannot read>";
 +
 +		print_entry_error(map_info, key,
 +				  msg ? : strerror(lookup_errno));
  	}
  
  	return 0;
@@@ -786,10 -778,6 +786,10 @@@ static int do_dump(int argc, char **arg
  			}
  		}
  
 +	if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
 +	    info.value_size != 8)
 +		p_info("Warning: cannot read values from %s map with value_size != 8",
 +		       map_type_name[info.type]);
  	while (true) {
  		err = bpf_map_get_next_key(fd, prev_key, key);
  		if (err) {
@@@ -1163,6 -1151,9 +1163,9 @@@ static int do_create(int argc, char **a
  				return -1;
  			}
  			NEXT_ARG();
+ 		} else {
+ 			p_err("unknown arg %s", *argv);
+ 			return -1;
  		}
  	}
  
diff --combined tools/lib/bpf/.gitignore
index 7d9e182a1f51,fecb78afea3f..d9e9dec04605
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@@ -1,4 -1,4 +1,5 @@@
  libbpf_version.h
 +libbpf.pc
  FEATURE-DUMP.libbpf
  test_libbpf
+ libbpf.so.*

-- 
LinuxNextTracking


More information about the linux-merge mailing list