The following commit has been merged in the master branch: commit ff24e4980a68d83090a02fda081741a410fe8eef Merge: 26f146ed971c0e4a264ce525d7a66a71ef73690d ea9866793d1e925b4d320eaea409263b2a568f38 Author: David S. Miller davem@davemloft.net Date: Thu May 2 22:14:21 2019 -0400
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Three trivial overlapping conflicts.
Signed-off-by: David S. Miller davem@davemloft.net
diff --combined Documentation/networking/ip-sysctl.txt index c9538a30ef7e,c4ac35234f05..725b8bea58a7 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@@ -81,11 -81,6 +81,11 @@@ fib_multipath_hash_policy - INTEGE 0 - Layer 3 1 - Layer 4
+fib_sync_mem - UNSIGNED INTEGER + Amount of dirty memory from fib entries that can be backlogged before + synchronize_rcu is forced. + Default: 512kB Minimum: 64kB Maximum: 64MB + ip_forward_update_priority - INTEGER Whether to update SKB priority from "TOS" field in IPv4 header after it is forwarded. The new SKB priority is mapped from TOS field value @@@ -1342,6 -1337,7 +1342,7 @@@ tag - INTEGE Default value is 0.
xfrm4_gc_thresh - INTEGER + (Obsolete since linux-4.14) The threshold at which we will start garbage collecting for IPv4 destination cache entries. At twice this value the system will refuse new allocations. @@@ -1914,42 -1910,18 +1915,43 @@@ enhanced_dad - BOOLEA
icmp/*: ratelimit - INTEGER - Limit the maximal rates for sending ICMPv6 packets. + Limit the maximal rates for sending ICMPv6 messages. 0 to disable any limiting, otherwise the minimal space between responses in milliseconds. Default: 1000
+ratemask - list of comma separated ranges + For ICMPv6 message types matching the ranges in the ratemask, limit + the sending of the message according to ratelimit parameter. + + The format used for both input and output is a comma separated + list of ranges (e.g. "0-127,129" for ICMPv6 message type 0 to 127 and + 129). Writing to the file will clear all previous ranges of ICMPv6 + message types and update the current list with the input. + + Refer to: https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml + for numerical values of ICMPv6 message types, e.g. echo request is 128 + and echo reply is 129. + + Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big) + echo_ignore_all - BOOLEAN If set non-zero, then the kernel will ignore all ICMP ECHO requests sent to it over the IPv6 protocol. Default: 0
+echo_ignore_multicast - BOOLEAN + If set non-zero, then the kernel will ignore all ICMP ECHO + requests sent to it over the IPv6 protocol via multicast. + Default: 0 + +echo_ignore_anycast - BOOLEAN + If set non-zero, then the kernel will ignore all ICMP ECHO + requests sent to it over the IPv6 protocol destined to anycast address. + Default: 0 + xfrm6_gc_thresh - INTEGER + (Obsolete since linux-4.14) The threshold at which we will start garbage collecting for IPv6 destination cache entries. At twice this value the system will refuse new allocations. diff --combined Makefile index b6e7ee4f1fc4,633d1196bf00..e1bb7345cdd1 --- a/Makefile +++ b/Makefile @@@ -2,7 -2,7 +2,7 @@@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 - EXTRAVERSION = -rc6 + EXTRAVERSION = -rc7 NAME = Shy Crocodile
# *DOCUMENTATION* @@@ -401,7 -401,6 +401,7 @@@ NM = $(CROSS_COMPILE)n STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump +PAHOLE = pahole LEX = flex YACC = bison AWK = awk @@@ -456,7 -455,7 +456,7 @@@ KBUILD_LDFLAGS : GCC_PLUGINS_CFLAGS :=
export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS +export CPP AR NM STRIP OBJCOPY OBJDUMP PAHOLE KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS export MAKE LEX YACC AWK INSTALLKERNEL PERL PYTHON PYTHON2 PYTHON3 UTS_MACHINE export HOSTCXX KBUILD_HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS
@@@ -679,6 -678,7 +679,7 @@@ KBUILD_CFLAGS += $(call cc-disable-warn KBUILD_CFLAGS += $(call cc-disable-warning, format-truncation) KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) + KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE KBUILD_CFLAGS += -Os @@@ -720,7 -720,6 +721,6 @@@ ifdef CONFIG_CC_IS_CLAN KBUILD_CPPFLAGS += $(call cc-option,-Qunused-arguments,) KBUILD_CFLAGS += $(call cc-disable-warning, format-invalid-specifier) KBUILD_CFLAGS += $(call cc-disable-warning, gnu) - KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member) # Quiet clang warning: comparison of unsigned expression < 0 is always false KBUILD_CFLAGS += $(call cc-disable-warning, tautological-compare) # CLANG uses a _MergedGlobals as optimization, but this breaks modpost, as the diff --combined drivers/infiniband/hw/mlx5/main.c index 0845e95d2d11,d3dd290ae1b1..347e3cac254e --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@@ -1119,6 -1119,8 +1119,8 @@@ static int mlx5_ib_query_device(struct if (MLX5_CAP_GEN(mdev, qp_packet_based)) resp.flags |= MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE; + + resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT; }
if (field_avail(typeof(resp), sw_parsing_caps, @@@ -2009,7 -2011,7 +2011,7 @@@ static phys_addr_t uar_index2pfn(struc
fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
- return (pci_resource_start(dev->mdev->pdev, 0) >> PAGE_SHIFT) + uar_idx / fw_uars_per_page; + return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page; }
static int get_command(unsigned long offset) @@@ -2066,6 -2068,7 +2068,7 @@@ static int mlx5_ib_mmap_clock_info_page
if (vma->vm_flags & VM_WRITE) return -EPERM; + vma->vm_flags &= ~VM_MAYWRITE;
if (!dev->mdev->clock_info_page) return -EOPNOTSUPP; @@@ -2199,7 -2202,7 +2202,7 @@@ static int dm_mmap(struct ib_ucontext * page_idx + npages) return -EINVAL;
- pfn = ((pci_resource_start(dev->mdev->pdev, 0) + + pfn = ((dev->mdev->bar_addr + MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >> PAGE_SHIFT) + page_idx; @@@ -2231,19 -2234,18 +2234,18 @@@ static int mlx5_ib_mmap(struct ib_ucont
if (vma->vm_flags & VM_WRITE) return -EPERM; + vma->vm_flags &= ~VM_MAYWRITE;
/* Don't expose to user-space information it shouldn't have */ if (PAGE_SIZE > 4096) return -EOPNOTSUPP;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); pfn = (dev->mdev->iseg_base + offsetof(struct mlx5_init_seg, internal_timer_h)) >> PAGE_SHIFT; - if (io_remap_pfn_range(vma, vma->vm_start, pfn, - PAGE_SIZE, vma->vm_page_prot)) - return -EAGAIN; - break; + return rdma_user_mmap_io(&context->ibucontext, vma, pfn, + PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot)); case MLX5_IB_MMAP_CLOCK_INFO: return mlx5_ib_mmap_clock_info_page(dev, vma, context);
@@@ -2283,7 -2285,7 +2285,7 @@@ struct ib_dm *mlx5_ib_alloc_dm(struct i goto err_free;
start_offset = memic_addr & ~PAGE_MASK; - page_idx = (memic_addr - pci_resource_start(memic->dev->pdev, 0) - + page_idx = (memic_addr - memic->dev->bar_addr - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> PAGE_SHIFT;
@@@ -2326,7 -2328,7 +2328,7 @@@ int mlx5_ib_dealloc_dm(struct ib_dm *ib if (ret) return ret;
- page_idx = (dm->dev_addr - pci_resource_start(memic->dev->pdev, 0) - + page_idx = (dm->dev_addr - memic->dev->bar_addr - MLX5_CAP64_DEV_MEM(memic->dev, memic_bar_start_addr)) >> PAGE_SHIFT; bitmap_clear(to_mucontext(ibdm->uobject->context)->dm_pages, diff --combined drivers/infiniband/hw/mlx5/qp.c index ef7d69269a88,8870c350fda0..fc67d78ca959 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@@ -1818,13 -1818,16 +1818,16 @@@ static void configure_responder_scat_cq
rcqe_sz = mlx5_ib_get_cqe_size(init_attr->recv_cq);
- if (rcqe_sz == 128) { - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + if (init_attr->qp_type == MLX5_IB_QPT_DCT) { + if (rcqe_sz == 128) + MLX5_SET(dctc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); + return; }
- if (init_attr->qp_type != MLX5_IB_QPT_DCT) - MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); + MLX5_SET(qpc, qpc, cs_res, + rcqe_sz == 128 ? MLX5_RES_SCAT_DATA64_CQE : + MLX5_RES_SCAT_DATA32_CQE); }
static void configure_requester_scat_cqe(struct mlx5_ib_dev *dev, @@@ -5119,7 -5122,7 +5122,7 @@@ out wmb();
/* currently we support only regular doorbells */ - mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset, NULL); + mlx5_write64((__be32 *)ctrl, bf->bfreg->map + bf->offset); /* Make sure doorbells don't leak out of SQ spinlock * and reach the HCA out of order. */ diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c index 526f36dcb204,52ade133b57c..a0de3c368f4a --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@@ -551,7 -551,7 +551,7 @@@ normal_tx prod = NEXT_TX(prod); txr->tx_prod = prod;
- if (!skb->xmit_more || netif_xmit_stopped(txq)) + if (!netdev_xmit_more() || netif_xmit_stopped(txq)) bnxt_db_write(bp, &txr->tx_db, prod);
tx_done: @@@ -559,7 -559,7 +559,7 @@@ mmiowb();
if (unlikely(bnxt_tx_avail(bp, txr) <= MAX_SKB_FRAGS + 1)) { - if (skb->xmit_more && !tx_buf->is_push) + if (netdev_xmit_more() && !tx_buf->is_push) bnxt_db_write(bp, &txr->tx_db, prod);
netif_tx_stop_queue(txq); @@@ -899,7 -899,7 +899,7 @@@ static struct sk_buff *bnxt_rx_page_skb DMA_ATTR_WEAK_ORDERING);
if (unlikely(!payload)) - payload = eth_get_headlen(data_ptr, len); + payload = eth_get_headlen(bp->dev, data_ptr, len);
skb = napi_alloc_skb(&rxr->bnapi->napi, payload); if (!skb) { @@@ -1625,7 -1625,7 +1625,7 @@@ static int bnxt_rx_pkt(struct bnxt *bp netdev_warn(bp->dev, "RX buffer error %x\n", rx_err); bnxt_sched_reset(bp, rxr); } - goto next_rx; + goto next_rx_no_len; }
len = le32_to_cpu(rxcmp->rx_cmp_len_flags_type) >> RX_CMP_LEN_SHIFT; @@@ -1706,12 -1706,13 +1706,13 @@@ rc = 1;
next_rx: - rxr->rx_prod = NEXT_RX(prod); - rxr->rx_next_cons = NEXT_RX(cons); - cpr->rx_packets += 1; cpr->rx_bytes += len;
+ next_rx_no_len: + rxr->rx_prod = NEXT_RX(prod); + rxr->rx_next_cons = NEXT_RX(cons); + next_rx_no_prod_no_len: *raw_cons = tmp_raw_cons;
@@@ -5135,10 -5136,10 +5136,10 @@@ static void bnxt_hwrm_ring_free(struct for (i = 0; i < bp->tx_nr_rings; i++) { struct bnxt_tx_ring_info *txr = &bp->tx_ring[i]; struct bnxt_ring_struct *ring = &txr->tx_ring_struct; - u32 cmpl_ring_id;
- cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_tx(bp, txr); + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_TX, close_path ? cmpl_ring_id : @@@ -5151,10 -5152,10 +5152,10 @@@ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring = &rxr->rx_ring_struct; u32 grp_idx = rxr->bnapi->index; - u32 cmpl_ring_id;
- cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); + hwrm_ring_free_send_msg(bp, ring, RING_FREE_REQ_RING_TYPE_RX, close_path ? cmpl_ring_id : @@@ -5173,10 -5174,10 +5174,10 @@@ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; struct bnxt_ring_struct *ring = &rxr->rx_agg_ring_struct; u32 grp_idx = rxr->bnapi->index; - u32 cmpl_ring_id;
- cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); if (ring->fw_ring_id != INVALID_HW_RING_ID) { + u32 cmpl_ring_id = bnxt_cp_ring_for_rx(bp, rxr); + hwrm_ring_free_send_msg(bp, ring, type, close_path ? cmpl_ring_id : INVALID_HW_RING_ID); @@@ -5315,17 -5316,16 +5316,16 @@@ __bnxt_hwrm_reserve_pf_rings(struct bnx req->num_tx_rings = cpu_to_le16(tx_rings); if (BNXT_NEW_RM(bp)) { enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RX_RINGS : 0; + enables |= stats ? FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; if (bp->flags & BNXT_FLAG_CHIP_P5) { enables |= cp_rings ? FUNC_CFG_REQ_ENABLES_NUM_MSIX : 0; enables |= tx_rings + ring_grps ? - FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= rx_rings ? FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; } else { enables |= cp_rings ? - FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= ring_grps ? FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS | FUNC_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; @@@ -5365,14 -5365,13 +5365,13 @@@ __bnxt_hwrm_reserve_vf_rings(struct bnx enables |= tx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_TX_RINGS : 0; enables |= rx_rings ? FUNC_VF_CFG_REQ_ENABLES_NUM_RX_RINGS | FUNC_VF_CFG_REQ_ENABLES_NUM_RSSCOS_CTXS : 0; + enables |= stats ? FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; if (bp->flags & BNXT_FLAG_CHIP_P5) { enables |= tx_rings + ring_grps ? - FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; } else { enables |= cp_rings ? - FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS | - FUNC_VF_CFG_REQ_ENABLES_NUM_STAT_CTXS : 0; + FUNC_VF_CFG_REQ_ENABLES_NUM_CMPL_RINGS : 0; enables |= ring_grps ? FUNC_VF_CFG_REQ_ENABLES_NUM_HW_RING_GRPS : 0; } @@@ -6753,6 -6752,7 +6752,7 @@@ static int bnxt_hwrm_port_qstats_ext(st struct hwrm_queue_pri2cos_qcfg_input req2 = {0}; struct hwrm_port_qstats_ext_input req = {0}; struct bnxt_pf_info *pf = &bp->pf; + u32 tx_stat_size; int rc;
if (!(bp->flags & BNXT_FLAG_PORT_STATS_EXT)) @@@ -6762,13 -6762,16 +6762,16 @@@ req.port_id = cpu_to_le16(pf->port_id); req.rx_stat_size = cpu_to_le16(sizeof(struct rx_port_stats_ext)); req.rx_stat_host_addr = cpu_to_le64(bp->hw_rx_port_stats_ext_map); - req.tx_stat_size = cpu_to_le16(sizeof(struct tx_port_stats_ext)); + tx_stat_size = bp->hw_tx_port_stats_ext ? + sizeof(*bp->hw_tx_port_stats_ext) : 0; + req.tx_stat_size = cpu_to_le16(tx_stat_size); req.tx_stat_host_addr = cpu_to_le64(bp->hw_tx_port_stats_ext_map); mutex_lock(&bp->hwrm_cmd_lock); rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) { bp->fw_rx_stats_ext_size = le16_to_cpu(resp->rx_stat_size) / 8; - bp->fw_tx_stats_ext_size = le16_to_cpu(resp->tx_stat_size) / 8; + bp->fw_tx_stats_ext_size = tx_stat_size ? + le16_to_cpu(resp->tx_stat_size) / 8 : 0; } else { bp->fw_rx_stats_ext_size = 0; bp->fw_tx_stats_ext_size = 0; @@@ -8961,8 -8964,15 +8964,15 @@@ static int bnxt_cfg_rx_mode(struct bnx
skip_uc: rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); + if (rc && vnic->mc_list_count) { + netdev_info(bp->dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n", + rc); + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; + vnic->mc_list_count = 0; + rc = bnxt_hwrm_cfa_l2_set_rx_mask(bp, 0); + } if (rc) - netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %x\n", + netdev_err(bp->dev, "HWRM cfa l2 rx mask failure rc: %d\n", rc);
return rc; @@@ -10058,6 -10068,23 +10068,6 @@@ static int bnxt_bridge_setlink(struct n return rc; }
-static int bnxt_get_phys_port_name(struct net_device *dev, char *buf, - size_t len) -{ - struct bnxt *bp = netdev_priv(dev); - int rc; - - /* The PF and it's VF-reps only support the switchdev framework */ - if (!BNXT_PF(bp)) - return -EOPNOTSUPP; - - rc = snprintf(buf, len, "p%d", bp->pf.port_id); - - if (rc >= len) - return -EOPNOTSUPP; - return 0; -} - int bnxt_get_port_parent_id(struct net_device *dev, struct netdev_phys_item_id *ppid) { @@@ -10076,13 -10103,6 +10086,13 @@@ return 0; }
+static struct devlink_port *bnxt_get_devlink_port(struct net_device *dev) +{ + struct bnxt *bp = netdev_priv(dev); + + return &bp->dl_port; +} + static const struct net_device_ops bnxt_netdev_ops = { .ndo_open = bnxt_open, .ndo_start_xmit = bnxt_start_xmit, @@@ -10114,7 -10134,8 +10124,7 @@@ .ndo_bpf = bnxt_xdp, .ndo_bridge_getlink = bnxt_bridge_getlink, .ndo_bridge_setlink = bnxt_bridge_setlink, - .ndo_get_port_parent_id = bnxt_get_port_parent_id, - .ndo_get_phys_port_name = bnxt_get_phys_port_name + .ndo_get_devlink_port = bnxt_get_devlink_port, };
static void bnxt_remove_one(struct pci_dev *pdev) @@@ -10438,26 -10459,6 +10448,26 @@@ static int bnxt_init_mac_addr(struct bn return rc; }
+static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[]) +{ + struct pci_dev *pdev = bp->pdev; + int pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DSN); + u32 dw; + + if (!pos) { + netdev_info(bp->dev, "Unable do read adapter's DSN"); + return -EOPNOTSUPP; + } + + /* DSN (two dw) is at an offset of 4 from the cap pos */ + pos += 4; + pci_read_config_dword(pdev, pos, &dw); + put_unaligned_le32(dw, &dsn[0]); + pci_read_config_dword(pdev, pos + 4, &dw); + put_unaligned_le32(dw, &dsn[4]); + return 0; +} + static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { static int version_printed; @@@ -10598,11 -10599,6 +10608,11 @@@ goto init_err_pci_clean; }
+ /* Read the adapter's DSN to use as the eswitch switch_id */ + rc = bnxt_pcie_dsn_get(bp, bp->switch_id); + if (rc) + goto init_err_pci_clean; + bnxt_hwrm_func_qcfg(bp); bnxt_hwrm_vnic_qcaps(bp); bnxt_hwrm_port_led_qcaps(bp); @@@ -10699,6 -10695,7 +10709,7 @@@ init_err_cleanup_tc bnxt_clear_int_mode(bp);
init_err_pci_clean: + bnxt_free_hwrm_short_cmd_req(bp); bnxt_free_hwrm_resources(bp); bnxt_free_ctx_mem(bp); kfree(bp->ctx); diff --combined drivers/net/phy/marvell.c index a7e8c8113d97,f76c4048b978..a7796134e3be --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@@ -29,7 -29,6 +29,7 @@@ #include <linux/ethtool.h> #include <linux/phy.h> #include <linux/marvell_phy.h> +#include <linux/bitfield.h> #include <linux/of.h>
#include <linux/io.h> @@@ -92,14 -91,6 +92,14 @@@ #define MII_88E1510_TEMP_SENSOR 0x1b #define MII_88E1510_TEMP_SENSOR_MASK 0xff
+#define MII_88E1540_COPPER_CTRL3 0x1a +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK GENMASK(11, 10) +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS 0 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS 1 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS 2 +#define MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS 3 +#define MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN BIT(9) + #define MII_88E6390_MISC_TEST 0x1b #define MII_88E6390_MISC_TEST_SAMPLE_1S 0 #define MII_88E6390_MISC_TEST_SAMPLE_10MS BIT(14) @@@ -137,7 -128,6 +137,7 @@@ #define MII_PHY_LED_CTRL 16 #define MII_88E1121_PHY_LED_DEF 0x0030 #define MII_88E1510_PHY_LED_DEF 0x1177 +#define MII_88E1510_PHY_LED0_LINK_LED1_ACTIVE 0x1040
#define MII_M1011_PHY_STATUS 0x11 #define MII_M1011_PHY_STATUS_1000 0x8000 @@@ -634,10 -624,7 +634,10 @@@ static void marvell_config_led(struct p * LED[2] .. Blink, Activity */ case MARVELL_PHY_FAMILY_ID(MARVELL_PHY_ID_88E1510): - def_config = MII_88E1510_PHY_LED_DEF; + if (phydev->dev_flags & MARVELL_PHY_LED0_LINK_LED1_ACTIVE) + def_config = MII_88E1510_PHY_LED0_LINK_LED1_ACTIVE; + else + def_config = MII_88E1510_PHY_LED_DEF; break; default: return; @@@ -1038,101 -1025,6 +1038,101 @@@ static int m88e1145_config_init(struct return 0; }
+static int m88e1540_get_fld(struct phy_device *phydev, u8 *msecs) +{ + int val; + + val = phy_read(phydev, MII_88E1540_COPPER_CTRL3); + if (val < 0) + return val; + + if (!(val & MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN)) { + *msecs = ETHTOOL_PHY_FAST_LINK_DOWN_OFF; + return 0; + } + + val = FIELD_GET(MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val); + + switch (val) { + case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS: + *msecs = 0; + break; + case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS: + *msecs = 10; + break; + case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS: + *msecs = 20; + break; + case MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS: + *msecs = 40; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int m88e1540_set_fld(struct phy_device *phydev, const u8 *msecs) +{ + struct ethtool_eee eee; + int val, ret; + + if (*msecs == ETHTOOL_PHY_FAST_LINK_DOWN_OFF) + return phy_clear_bits(phydev, MII_88E1540_COPPER_CTRL3, + MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN); + + /* According to the Marvell data sheet EEE must be disabled for + * Fast Link Down detection to work properly + */ + ret = phy_ethtool_get_eee(phydev, &eee); + if (!ret && eee.eee_enabled) { + phydev_warn(phydev, "Fast Link Down detection requires EEE to be disabled!\n"); + return -EBUSY; + } + + if (*msecs <= 5) + val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_00MS; + else if (*msecs <= 15) + val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_10MS; + else if (*msecs <= 30) + val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_20MS; + else + val = MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_40MS; + + val = FIELD_PREP(MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val); + + ret = phy_modify(phydev, MII_88E1540_COPPER_CTRL3, + MII_88E1540_COPPER_CTRL3_LINK_DOWN_DELAY_MASK, val); + if (ret) + return ret; + + return phy_set_bits(phydev, MII_88E1540_COPPER_CTRL3, + MII_88E1540_COPPER_CTRL3_FAST_LINK_DOWN); +} + +static int m88e1540_get_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_FAST_LINK_DOWN: + return m88e1540_get_fld(phydev, data); + default: + return -EOPNOTSUPP; + } +} + +static int m88e1540_set_tunable(struct phy_device *phydev, + struct ethtool_tunable *tuna, const void *data) +{ + switch (tuna->id) { + case ETHTOOL_PHY_FAST_LINK_DOWN: + return m88e1540_set_fld(phydev, data); + default: + return -EOPNOTSUPP; + } +} + /* The VOD can be out of specification on link up. Poke an * undocumented register, in an undocumented page, with a magic value * to fix this. @@@ -1597,9 -1489,10 +1597,10 @@@ static int marvell_get_sset_count(struc
static void marvell_get_strings(struct phy_device *phydev, u8 *data) { + int count = marvell_get_sset_count(phydev); int i;
- for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) { + for (i = 0; i < count; i++) { strlcpy(data + i * ETH_GSTRING_LEN, marvell_hw_stats[i].string, ETH_GSTRING_LEN); } @@@ -1627,9 -1520,10 +1628,10 @@@ static u64 marvell_get_stat(struct phy_ static void marvell_get_stats(struct phy_device *phydev, struct ethtool_stats *stats, u64 *data) { + int count = marvell_get_sset_count(phydev); int i;
- for (i = 0; i < ARRAY_SIZE(marvell_hw_stats); i++) + for (i = 0; i < count; i++) data[i] = marvell_get_stat(phydev, i); }
@@@ -2130,7 -2024,7 +2132,7 @@@ static struct phy_driver marvell_driver .phy_id = MARVELL_PHY_ID_88E1101, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1101", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &marvell_config_init, .config_aneg = &m88e1101_config_aneg, @@@ -2148,7 -2042,7 +2150,7 @@@ .phy_id = MARVELL_PHY_ID_88E1112, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1112", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &m88e1111_config_init, .config_aneg = &marvell_config_aneg, @@@ -2166,7 -2060,7 +2168,7 @@@ .phy_id = MARVELL_PHY_ID_88E1111, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1111", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &m88e1111_config_init, .config_aneg = &marvell_config_aneg, @@@ -2185,7 -2079,7 +2187,7 @@@ .phy_id = MARVELL_PHY_ID_88E1118, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1118", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &m88e1118_config_init, .config_aneg = &m88e1118_config_aneg, @@@ -2203,7 -2097,7 +2205,7 @@@ .phy_id = MARVELL_PHY_ID_88E1121R, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1121R", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = &m88e1121_probe, .config_init = &marvell_config_init, .config_aneg = &m88e1121_config_aneg, @@@ -2223,7 -2117,7 +2225,7 @@@ .phy_id = MARVELL_PHY_ID_88E1318S, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1318S", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &m88e1318_config_init, .config_aneg = &m88e1318_config_aneg, @@@ -2245,7 -2139,7 +2247,7 @@@ .phy_id = MARVELL_PHY_ID_88E1145, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1145", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &m88e1145_config_init, .config_aneg = &m88e1101_config_aneg, @@@ -2264,7 -2158,7 +2266,7 @@@ .phy_id = MARVELL_PHY_ID_88E1149R, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1149R", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &m88e1149_config_init, .config_aneg = &m88e1118_config_aneg, @@@ -2282,7 -2176,7 +2284,7 @@@ .phy_id = MARVELL_PHY_ID_88E1240, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1240", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &m88e1111_config_init, .config_aneg = &marvell_config_aneg, @@@ -2300,7 -2194,7 +2302,7 @@@ .phy_id = MARVELL_PHY_ID_88E1116R, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1116R", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = marvell_probe, .config_init = &m88e1116r_config_init, .ack_interrupt = &marvell_ack_interrupt, @@@ -2340,7 -2234,7 +2342,7 @@@ .phy_id = MARVELL_PHY_ID_88E1540, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1540", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = m88e1510_probe, .config_init = &marvell_config_init, .config_aneg = &m88e1510_config_aneg, @@@ -2355,15 -2249,13 +2357,15 @@@ .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, + .get_tunable = m88e1540_get_tunable, + .set_tunable = m88e1540_set_tunable, }, { .phy_id = MARVELL_PHY_ID_88E1545, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1545", .probe = m88e1510_probe, - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .config_init = &marvell_config_init, .config_aneg = &m88e1510_config_aneg, .read_status = &marvell_read_status, @@@ -2382,7 -2274,7 +2384,7 @@@ .phy_id = MARVELL_PHY_ID_88E3016, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E3016", - .features = PHY_BASIC_FEATURES, + /* PHY_BASIC_FEATURES */ .probe = marvell_probe, .config_init = &m88e3016_config_init, .aneg_done = &marvell_aneg_done, @@@ -2402,7 -2294,7 +2404,7 @@@ .phy_id = MARVELL_PHY_ID_88E6390, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E6390", - .features = PHY_GBIT_FEATURES, + /* PHY_GBIT_FEATURES */ .probe = m88e6390_probe, .config_init = &marvell_config_init, .config_aneg = &m88e6390_config_aneg, @@@ -2417,8 -2309,6 +2419,8 @@@ .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, + .get_tunable = m88e1540_get_tunable, + .set_tunable = m88e1540_set_tunable, }, };
diff --combined drivers/net/usb/qmi_wwan.c index 18c4e5d17b05,679e404a5224..5c3ac97519b7 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@@ -63,7 -63,6 +63,7 @@@ enum qmi_wwan_flags
enum qmi_wwan_quirks { QMI_WWAN_QUIRK_DTR = 1 << 0, /* needs "set DTR" request */ + QMI_WWAN_QUIRK_QUECTEL_DYNCFG = 1 << 1, /* check num. endpoints */ };
struct qmimux_hdr { @@@ -846,16 -845,6 +846,16 @@@ static const struct driver_info qmi_wwa .data = QMI_WWAN_QUIRK_DTR, };
+static const struct driver_info qmi_wwan_info_quirk_quectel_dyncfg = { + .description = "WWAN/QMI device", + .flags = FLAG_WWAN | FLAG_SEND_ZLP, + .bind = qmi_wwan_bind, + .unbind = qmi_wwan_unbind, + .manage_power = qmi_wwan_manage_power, + .rx_fixup = qmi_wwan_rx_fixup, + .data = QMI_WWAN_QUIRK_DTR | QMI_WWAN_QUIRK_QUECTEL_DYNCFG, +}; + #define HUAWEI_VENDOR_ID 0x12D1
/* map QMI/wwan function by a fixed interface number */ @@@ -876,15 -865,6 +876,15 @@@ #define QMI_GOBI_DEVICE(vend, prod) \ QMI_FIXED_INTF(vend, prod, 0)
+/* Quectel does not use fixed interface numbers on at least some of their + * devices. We need to check the number of endpoints to ensure that we bind to + * the correct interface. + */ +#define QMI_QUIRK_QUECTEL_DYNCFG(vend, prod) \ + USB_DEVICE_AND_INTERFACE_INFO(vend, prod, USB_CLASS_VENDOR_SPEC, \ + USB_SUBCLASS_VENDOR_SPEC, 0xff), \ + .driver_info = (unsigned long)&qmi_wwan_info_quirk_quectel_dyncfg + static const struct usb_device_id products[] = { /* 1. CDC ECM like devices match on the control interface */ { /* Huawei E392, E398 and possibly others sharing both device id and more... */ @@@ -989,9 -969,20 +989,9 @@@ USB_DEVICE_AND_INTERFACE_INFO(0x03f0, 0x581d, USB_CLASS_VENDOR_SPEC, 1, 7), .driver_info = (unsigned long)&qmi_wwan_info, }, - { /* Quectel EP06/EG06/EM06 */ - USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0306, - USB_CLASS_VENDOR_SPEC, - USB_SUBCLASS_VENDOR_SPEC, - 0xff), - .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr, - }, - { /* Quectel EG12/EM12 */ - USB_DEVICE_AND_INTERFACE_INFO(0x2c7c, 0x0512, - USB_CLASS_VENDOR_SPEC, - USB_SUBCLASS_VENDOR_SPEC, - 0xff), - .driver_info = (unsigned long)&qmi_wwan_info_quirk_dtr, - }, + {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0125)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ + {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0306)}, /* Quectel EP06/EG06/EM06 */ + {QMI_QUIRK_QUECTEL_DYNCFG(0x2c7c, 0x0512)}, /* Quectel EG12/EM12 */
/* 3. Combined interface devices matching on interface number */ {QMI_FIXED_INTF(0x0408, 0xea42, 4)}, /* Yota / Megafon M100-1 */ @@@ -1131,9 -1122,16 +1131,16 @@@ {QMI_FIXED_INTF(0x0846, 0x68d3, 8)}, /* Netgear Aircard 779S */ {QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */ {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */ + {QMI_FIXED_INTF(0x1435, 0x0918, 3)}, /* Wistron NeWeb D16Q1 */ + {QMI_FIXED_INTF(0x1435, 0x0918, 4)}, /* Wistron NeWeb D16Q1 */ + {QMI_FIXED_INTF(0x1435, 0x0918, 5)}, /* Wistron NeWeb D16Q1 */ + {QMI_FIXED_INTF(0x1435, 0x3185, 4)}, /* Wistron NeWeb M18Q5 */ + {QMI_FIXED_INTF(0x1435, 0xd111, 4)}, /* M9615A DM11-1 D51QC */ {QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 4)}, /* Wistron NeWeb D18Q1 */ {QMI_FIXED_INTF(0x1435, 0xd181, 5)}, /* Wistron NeWeb D18Q1 */ + {QMI_FIXED_INTF(0x1435, 0xd182, 4)}, /* Wistron NeWeb D18 */ + {QMI_FIXED_INTF(0x1435, 0xd182, 5)}, /* Wistron NeWeb D18 */ {QMI_FIXED_INTF(0x1435, 0xd191, 4)}, /* Wistron NeWeb D19Q1 */ {QMI_QUIRK_SET_DTR(0x1508, 0x1001, 4)}, /* Fibocom NL668 series */ {QMI_FIXED_INTF(0x16d8, 0x6003, 0)}, /* CMOTech 6003 */ @@@ -1189,6 -1187,7 +1196,7 @@@ {QMI_FIXED_INTF(0x19d2, 0x0265, 4)}, /* ONDA MT8205 4G LTE */ {QMI_FIXED_INTF(0x19d2, 0x0284, 4)}, /* ZTE MF880 */ {QMI_FIXED_INTF(0x19d2, 0x0326, 4)}, /* ZTE MF821D */ + {QMI_FIXED_INTF(0x19d2, 0x0396, 3)}, /* ZTE ZM8620 */ {QMI_FIXED_INTF(0x19d2, 0x0412, 4)}, /* Telewell TW-LTE 4G */ {QMI_FIXED_INTF(0x19d2, 0x1008, 4)}, /* ZTE (Vodafone) K3570-Z */ {QMI_FIXED_INTF(0x19d2, 0x1010, 4)}, /* ZTE (Vodafone) K3571-Z */ @@@ -1209,7 -1208,9 +1217,9 @@@ {QMI_FIXED_INTF(0x19d2, 0x1425, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1426, 2)}, /* ZTE MF91 */ {QMI_FIXED_INTF(0x19d2, 0x1428, 2)}, /* Telewell TW-LTE 4G v2 */ + {QMI_FIXED_INTF(0x19d2, 0x1432, 3)}, /* ZTE ME3620 */ {QMI_FIXED_INTF(0x19d2, 0x2002, 4)}, /* ZTE (Vodafone) K3765-Z */ + {QMI_FIXED_INTF(0x2001, 0x7e16, 3)}, /* D-Link DWM-221 */ {QMI_FIXED_INTF(0x2001, 0x7e19, 4)}, /* D-Link DWM-221 B1 */ {QMI_FIXED_INTF(0x2001, 0x7e35, 4)}, /* D-Link DWM-222 */ {QMI_FIXED_INTF(0x2020, 0x2031, 4)}, /* Olicard 600 */ @@@ -1280,6 -1281,7 +1290,6 @@@ {QMI_FIXED_INTF(0x03f0, 0x9d1d, 1)}, /* HP lt4120 Snapdragon X5 LTE */ {QMI_FIXED_INTF(0x22de, 0x9061, 3)}, /* WeTelecom WPD-600N */ {QMI_QUIRK_SET_DTR(0x1e0e, 0x9001, 5)}, /* SIMCom 7100E, 7230E, 7600E ++ */ - {QMI_QUIRK_SET_DTR(0x2c7c, 0x0125, 4)}, /* Quectel EC25, EC20 R2.0 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0121, 4)}, /* Quectel EC21 Mini PCIe */ {QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */ {QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */ @@@ -1359,12 -1361,27 +1369,12 @@@ static bool quectel_ec20_detected(struc return false; }
-static bool quectel_diag_detected(struct usb_interface *intf) -{ - struct usb_device *dev = interface_to_usbdev(intf); - struct usb_interface_descriptor intf_desc = intf->cur_altsetting->desc; - u16 id_vendor = le16_to_cpu(dev->descriptor.idVendor); - u16 id_product = le16_to_cpu(dev->descriptor.idProduct); - - if (id_vendor != 0x2c7c || intf_desc.bNumEndpoints != 2) - return false; - - if (id_product == 0x0306 || id_product == 0x0512) - return true; - else - return false; -} - static int qmi_wwan_probe(struct usb_interface *intf, const struct usb_device_id *prod) { struct usb_device_id *id = (struct usb_device_id *)prod; struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc; + const struct driver_info *info;
/* Workaround to enable dynamic IDs. This disables usbnet * blacklisting functionality. Which, if required, can be @@@ -1398,14 -1415,10 +1408,14 @@@ * we need to match on class/subclass/protocol. These values are * identical for the diagnostic- and QMI-interface, but bNumEndpoints is * different. Ignore the current interface if the number of endpoints - * the number for the diag interface (two). + * equals the number for the diag interface (two). */ - if (quectel_diag_detected(intf)) - return -ENODEV; + info = (void *)&id->driver_info; + + if (info->data & QMI_WWAN_QUIRK_QUECTEL_DYNCFG) { + if (desc->bNumEndpoints == 2) + return -ENODEV; + }
return usbnet_probe(intf, id); } diff --combined drivers/net/wireless/intel/iwlwifi/cfg/22000.c index fc915ecfb06e,0a87d87fbb4f..17b34f6e4515 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@@ -89,7 -89,6 +89,7 @@@ #define IWL_22000_SO_A_HR_B_FW_PRE "iwlwifi-so-a0-hr-b0-" #define IWL_22000_SO_A_GF_A_FW_PRE "iwlwifi-so-a0-gf-a0-" #define IWL_22000_TY_A_GF_A_FW_PRE "iwlwifi-ty-a0-gf-a0-" +#define IWL_22000_SO_A_GF4_A_FW_PRE "iwlwifi-so-a0-gf4-a0-"
#define IWL_22000_HR_MODULE_FIRMWARE(api) \ IWL_22000_HR_FW_PRE __stringify(api) ".ucode" @@@ -181,11 -180,7 +181,11 @@@ static const struct iwl_ht_params iwl_2 .dbgc_supported = true, \ .min_umac_error_event_table = 0x400000, \ .d3_debug_data_base_addr = 0x401000, \ - .d3_debug_data_length = 60 * 1024 + .d3_debug_data_length = 60 * 1024, \ + .fw_mon_smem_write_ptr_addr = 0xa0c16c, \ + .fw_mon_smem_write_ptr_msk = 0xfffff, \ + .fw_mon_smem_cycle_cnt_ptr_addr = 0xa0c174, \ + .fw_mon_smem_cycle_cnt_ptr_msk = 0xfffff
#define IWL_DEVICE_AX200_COMMON \ IWL_DEVICE_22000_COMMON, \ @@@ -195,8 -190,7 +195,8 @@@ IWL_DEVICE_22000_COMMON, \ .device_family = IWL_DEVICE_FAMILY_22000, \ .base_params = &iwl_22000_base_params, \ - .csr = &iwl_csr_v1 + .csr = &iwl_csr_v1, \ + .gp2_reg_addr = 0xa02c68
#define IWL_DEVICE_22560 \ IWL_DEVICE_22000_COMMON, \ @@@ -207,11 -201,9 +207,11 @@@ #define IWL_DEVICE_AX210 \ IWL_DEVICE_AX200_COMMON, \ .device_family = IWL_DEVICE_FAMILY_AX210, \ - .base_params = &iwl_22000_base_params, \ + .base_params = &iwl_22560_base_params, \ .csr = &iwl_csr_v1, \ - .min_txq_size = 128 + .min_txq_size = 128, \ + .gp2_reg_addr = 0xd02c68, \ + .min_256_ba_txq_size = 512
const struct iwl_cfg iwl22000_2ac_cfg_hr = { .name = "Intel(R) Dual Band Wireless AC 22000", @@@ -448,20 -440,12 +448,20 @@@ const struct iwl_cfg iwlax210_2ax_cfg_s const struct iwl_cfg iwlax210_2ax_cfg_so_gf_a0 = { .name = "Intel(R) Wi-Fi 7 AX211 160MHz", .fw_name_pre = IWL_22000_SO_A_GF_A_FW_PRE, + .uhb_supported = true, IWL_DEVICE_AX210, };
const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0 = { .name = "Intel(R) Wi-Fi 7 AX210 160MHz", .fw_name_pre = IWL_22000_TY_A_GF_A_FW_PRE, + .uhb_supported = true, + IWL_DEVICE_AX210, +}; + +const struct iwl_cfg iwlax210_2ax_cfg_so_gf4_a0 = { + .name = "Intel(R) Wi-Fi 7 AX210 160MHz", + .fw_name_pre = IWL_22000_SO_A_GF4_A_FW_PRE, IWL_DEVICE_AX210, };
diff --combined drivers/net/wireless/intel/iwlwifi/fw/file.h index abfdcabdcbf7,e06407dc088b..cd622af90077 --- a/drivers/net/wireless/intel/iwlwifi/fw/file.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h @@@ -93,7 -93,7 +93,7 @@@ struct iwl_ucode_header } u; };
- #define IWL_UCODE_INI_TLV_GROUP BIT(24) + #define IWL_UCODE_INI_TLV_GROUP 0x1000000
/* * new TLV uCode file layout @@@ -148,11 -148,14 +148,14 @@@ enum iwl_ucode_tlv_type IWL_UCODE_TLV_UMAC_DEBUG_ADDRS = 54, IWL_UCODE_TLV_LMAC_DEBUG_ADDRS = 55, IWL_UCODE_TLV_FW_RECOVERY_INFO = 57, - IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION = IWL_UCODE_INI_TLV_GROUP | 0x1, - IWL_UCODE_TLV_TYPE_HCMD = IWL_UCODE_INI_TLV_GROUP | 0x2, - IWL_UCODE_TLV_TYPE_REGIONS = IWL_UCODE_INI_TLV_GROUP | 0x3, - IWL_UCODE_TLV_TYPE_TRIGGERS = IWL_UCODE_INI_TLV_GROUP | 0x4, - IWL_UCODE_TLV_TYPE_DEBUG_FLOW = IWL_UCODE_INI_TLV_GROUP | 0x5, + + IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION = IWL_UCODE_INI_TLV_GROUP + 0x1, + IWL_UCODE_TLV_DEBUG_BASE = IWL_UCODE_TLV_TYPE_BUFFER_ALLOCATION, + IWL_UCODE_TLV_TYPE_HCMD = IWL_UCODE_INI_TLV_GROUP + 0x2, + IWL_UCODE_TLV_TYPE_REGIONS = IWL_UCODE_INI_TLV_GROUP + 0x3, + IWL_UCODE_TLV_TYPE_TRIGGERS = IWL_UCODE_INI_TLV_GROUP + 0x4, + IWL_UCODE_TLV_TYPE_DEBUG_FLOW = IWL_UCODE_INI_TLV_GROUP + 0x5, + IWL_UCODE_TLV_DEBUG_MAX = IWL_UCODE_TLV_TYPE_DEBUG_FLOW,
/* TLVs 0x1000-0x2000 are for internal driver usage */ IWL_UCODE_TLV_FW_DBG_DUMP_LST = 0x1000, @@@ -272,15 -275,8 +275,15 @@@ typedef unsigned int __bitwise iwl_ucod * version of the beacon notification. * @IWL_UCODE_TLV_API_BEACON_FILTER_V4: This ucode supports v4 of * BEACON_FILTER_CONFIG_API_S_VER_4. + * @IWL_UCODE_TLV_API_REGULATORY_NVM_INFO: This ucode supports v4 of + * REGULATORY_NVM_GET_INFO_RSP_API_S. * @IWL_UCODE_TLV_API_FTM_NEW_RANGE_REQ: This ucode supports v7 of * LOCATION_RANGE_REQ_CMD_API_S and v6 of LOCATION_RANGE_RESP_NTFY_API_S. + * @IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS: This ucode supports v2 of + * SCAN_OFFLOAD_PROFILE_MATCH_RESULTS_S and v3 of + * SCAN_OFFLOAD_PROFILES_QUERY_RSP_S. + * @IWL_UCODE_TLV_API_MBSSID_HE: This ucode supports v2 of + * STA_CONTEXT_DOT11AX_API_S * * @NUM_IWL_UCODE_TLV_API: number of bits used */ @@@ -307,10 -303,7 +310,10 @@@ enum iwl_ucode_tlv_api IWL_UCODE_TLV_API_REDUCE_TX_POWER = (__force iwl_ucode_tlv_api_t)45, IWL_UCODE_TLV_API_SHORT_BEACON_NOTIF = (__force iwl_ucode_tlv_api_t)46, IWL_UCODE_TLV_API_BEACON_FILTER_V4 = (__force iwl_ucode_tlv_api_t)47, + IWL_UCODE_TLV_API_REGULATORY_NVM_INFO = (__force iwl_ucode_tlv_api_t)48, IWL_UCODE_TLV_API_FTM_NEW_RANGE_REQ = (__force iwl_ucode_tlv_api_t)49, + IWL_UCODE_TLV_API_SCAN_OFFLOAD_CHANS = (__force iwl_ucode_tlv_api_t)50, + IWL_UCODE_TLV_API_MBSSID_HE = (__force iwl_ucode_tlv_api_t)52,
NUM_IWL_UCODE_TLV_API #ifdef __CHECKER__ @@@ -360,7 -353,6 +363,7 @@@ typedef unsigned int __bitwise iwl_ucod * IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD: firmware supports CSA command * @IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS: firmware supports ultra high band * (6 GHz). + * @IWL_UCODE_TLV_CAPA_CS_MODIFY: firmware supports modify action CSA command * @IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE: extended DTS measurement * @IWL_UCODE_TLV_CAPA_SHORT_PM_TIMEOUTS: supports short PM timeouts * @IWL_UCODE_TLV_CAPA_BT_MPLUT_SUPPORT: supports bt-coex Multi-priority LUT @@@ -431,7 -423,6 +434,7 @@@ enum iwl_ucode_tlv_capa IWL_UCODE_TLV_CAPA_CHANNEL_SWITCH_CMD = (__force iwl_ucode_tlv_capa_t)46, IWL_UCODE_TLV_CAPA_ULTRA_HB_CHANNELS = (__force iwl_ucode_tlv_capa_t)48, IWL_UCODE_TLV_CAPA_FTM_CALIBRATED = (__force iwl_ucode_tlv_capa_t)47, + IWL_UCODE_TLV_CAPA_CS_MODIFY = (__force iwl_ucode_tlv_capa_t)49,
/* set 2 */ IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE = (__force iwl_ucode_tlv_capa_t)64, diff --combined drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c index 9107302cc444,c7070760a10a..0e8664375298 --- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c @@@ -5,7 -5,7 +5,7 @@@ * * GPL LICENSE SUMMARY * - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@@ -28,7 -28,7 +28,7 @@@ * * BSD LICENSE * - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@@ -73,9 -73,6 +73,9 @@@ void iwl_fw_dbg_copy_tlv(struct iwl_tra int copy_size = le32_to_cpu(tlv->length) + sizeof(*tlv); int offset_size = copy_size;
+ if (le32_to_cpu(header->tlv_version) != 1) + return; + if (WARN_ONCE(apply_point >= IWL_FW_INI_APPLY_NUM, "Invalid apply point id %d\n", apply_point)) return; @@@ -129,15 -126,13 +129,16 @@@ void iwl_alloc_dbg_tlv(struct iwl_tran len -= ALIGN(tlv_len, 4); data += sizeof(*tlv) + ALIGN(tlv_len, 4);
- if (!(tlv_type & IWL_UCODE_INI_TLV_GROUP)) + if (tlv_type < IWL_UCODE_TLV_DEBUG_BASE || + tlv_type > IWL_UCODE_TLV_DEBUG_MAX) continue;
hdr = (void *)&tlv->data[0]; apply = le32_to_cpu(hdr->apply_point);
+ if (le32_to_cpu(hdr->tlv_version) != 1) + continue; + IWL_DEBUG_FW(trans, "Read TLV %x, apply point %d\n", le32_to_cpu(tlv->type), apply);
diff --combined drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 9bf2407c9b4b,6925527d8457..f043eefabb4e --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@@ -743,8 -743,9 +743,8 @@@ static ssize_t iwl_dbgfs_quota_min_read #define MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz) \ _MVM_DEBUGFS_READ_WRITE_FILE_OPS(name, bufsz, struct ieee80211_vif) #define MVM_DEBUGFS_ADD_FILE_VIF(name, parent, mode) do { \ - if (!debugfs_create_file(#name, mode, parent, vif, \ - &iwl_dbgfs_##name##_ops)) \ - goto err; \ + debugfs_create_file(#name, mode, parent, vif, \ + &iwl_dbgfs_##name##_ops); \ } while (0)
MVM_DEBUGFS_READ_FILE_OPS(mac_params); @@@ -773,6 -774,11 +773,11 @@@ void iwl_mvm_vif_dbgfs_register(struct return;
mvmvif->dbgfs_dir = debugfs_create_dir("iwlmvm", dbgfs_dir); + if (IS_ERR_OR_NULL(mvmvif->dbgfs_dir)) { + IWL_ERR(mvm, "Failed to create debugfs directory under %pd\n", + dbgfs_dir); + return; + }
if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM && ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) || @@@ -805,6 -811,12 +810,6 @@@
mvmvif->dbgfs_slink = debugfs_create_symlink(dbgfs_dir->d_name.name, mvm->debugfs_dir, buf); - if (!mvmvif->dbgfs_slink) - IWL_ERR(mvm, "Can't create debugfs symbolic link under %pd\n", - dbgfs_dir); - return; -err: - IWL_ERR(mvm, "Can't create debugfs entity\n"); }
void iwl_mvm_vif_dbgfs_clean(struct iwl_mvm *mvm, struct ieee80211_vif *vif) diff --combined drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 55d399899d1c,13681b03c10e..8da9e5572fcf --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@@ -8,7 -8,7 +8,7 @@@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@@ -31,7 -31,7 +31,7 @@@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@@ -834,7 -834,7 +834,7 @@@ iwl_op_mode_mvm_start(struct iwl_trans mutex_lock(&mvm->mutex); iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE); err = iwl_run_init_mvm_ucode(mvm, true); - if (err) + if (err && err != -ERFKILL) iwl_fw_dbg_error_collect(&mvm->fwrt, FW_DBG_TRIGGER_DRIVER); if (!iwlmvm_mod_params.init_dbg || !err) iwl_mvm_stop_device(mvm); @@@ -862,7 -862,9 +862,7 @@@ min_backoff = iwl_mvm_min_backoff(mvm); iwl_mvm_thermal_initialize(mvm, min_backoff);
- err = iwl_mvm_dbgfs_register(mvm, dbgfs_dir); - if (err) - goto out_unregister; + iwl_mvm_dbgfs_register(mvm, dbgfs_dir);
if (!iwl_mvm_has_new_rx_stats_api(mvm)) memset(&mvm->rx_stats_v3, 0, @@@ -879,6 -881,14 +879,6 @@@
return op_mode;
- out_unregister: - if (iwlmvm_mod_params.init_dbg) - return op_mode; - - ieee80211_unregister_hw(mvm->hw); - mvm->hw_registered = false; - iwl_mvm_leds_exit(mvm); - iwl_mvm_thermal_exit(mvm); out_free: iwl_fw_flush_dump(&mvm->fwrt); iwl_fw_runtime_free(&mvm->fwrt); @@@ -1095,7 -1105,7 +1095,7 @@@ static void iwl_mvm_rx_mq(struct iwl_op else if (cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE)) iwl_mvm_rx_frame_release(mvm, napi, rxb, 0); else if (cmd == WIDE_ID(DATA_PATH_GROUP, RX_NO_DATA_NOTIF)) - iwl_mvm_rx_monitor_ndp(mvm, napi, rxb, 0); + iwl_mvm_rx_monitor_no_data(mvm, napi, rxb, 0); else iwl_mvm_rx_common(mvm, rxb, pkt); } @@@ -1281,7 -1291,8 +1281,7 @@@ void iwl_mvm_nic_restart(struct iwl_mv * can't recover this since we're already half suspended. */ if (!mvm->fw_restart && fw_error) { - iwl_fw_dbg_collect_desc(&mvm->fwrt, &iwl_dump_desc_assert, - false, 0); + iwl_fw_error_collect(&mvm->fwrt); } else if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { struct iwl_mvm_reprobe *reprobe;
@@@ -1329,8 -1340,6 +1329,8 @@@ } }
+ iwl_fw_error_collect(&mvm->fwrt); + if (fw_error && mvm->fw_restart > 0) mvm->fw_restart--; set_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status); diff --combined drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index 0b1b208de767,b516fd1867ec..1824566d08fc --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@@ -8,7 -8,7 +8,7 @@@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@@ -31,7 -31,7 +31,7 @@@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2015 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 - 2019 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@@ -169,9 -169,9 +169,9 @@@ static inline int iwl_mvm_check_pn(stru }
/* iwl_mvm_create_skb Adds the rxb to a new skb */ - static void iwl_mvm_create_skb(struct sk_buff *skb, struct ieee80211_hdr *hdr, - u16 len, u8 crypt_len, - struct iwl_rx_cmd_buffer *rxb) + static int iwl_mvm_create_skb(struct iwl_mvm *mvm, struct sk_buff *skb, + struct ieee80211_hdr *hdr, u16 len, u8 crypt_len, + struct iwl_rx_cmd_buffer *rxb) { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_rx_mpdu_desc *desc = (void *)pkt->data; @@@ -204,6 -204,20 +204,20 @@@ * present before copying packet data. */ hdrlen += crypt_len; + + if (WARN_ONCE(headlen < hdrlen, + "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n", + hdrlen, len, crypt_len)) { + /* + * We warn and trace because we want to be able to see + * it in trace-cmd as well. + */ + IWL_DEBUG_RX(mvm, + "invalid packet lengths (hdrlen=%d, len=%d, crypt_len=%d)\n", + hdrlen, len, crypt_len); + return -EINVAL; + } + skb_put_data(skb, hdr, hdrlen); skb_put_data(skb, (u8 *)hdr + hdrlen + pad_len, headlen - hdrlen);
@@@ -216,6 -230,8 +230,8 @@@ skb_add_rx_frag(skb, 0, rxb_steal_page(rxb), offset, fraglen, rxb->truesize); } + + return 0; }
static void iwl_mvm_add_rtap_sniffer_config(struct iwl_mvm *mvm, @@@ -1671,7 -1687,11 +1687,11 @@@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm rx_status->boottime_ns = ktime_get_boot_ns(); }
- iwl_mvm_create_skb(skb, hdr, len, crypt_len, rxb); + if (iwl_mvm_create_skb(mvm, skb, hdr, len, crypt_len, rxb)) { + kfree_skb(skb); + goto out; + } + if (!iwl_mvm_reorder(mvm, napi, queue, sta, skb, desc)) iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, queue, sta, csi); @@@ -1679,8 -1699,8 +1699,8 @@@ out rcu_read_unlock(); }
-void iwl_mvm_rx_monitor_ndp(struct iwl_mvm *mvm, struct napi_struct *napi, - struct iwl_rx_cmd_buffer *rxb, int queue) +void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi, + struct iwl_rx_cmd_buffer *rxb, int queue) { struct ieee80211_rx_status *rx_status; struct iwl_rx_packet *pkt = rxb_addr(rxb); @@@ -1701,6 -1721,10 +1721,6 @@@ if (unlikely(test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status))) return;
- /* Currently only NDP type is supported */ - if (info_type != RX_NO_DATA_INFO_TYPE_NDP) - return; - energy_a = (rssi & RX_NO_DATA_CHAIN_A_MSK) >> RX_NO_DATA_CHAIN_A_POS; energy_b = (rssi & RX_NO_DATA_CHAIN_B_MSK) >> RX_NO_DATA_CHAIN_B_POS; channel = (rssi & RX_NO_DATA_CHANNEL_MSK) >> RX_NO_DATA_CHANNEL_POS; @@@ -1722,22 -1746,9 +1742,22 @@@
/* 0-length PSDU */ rx_status->flag |= RX_FLAG_NO_PSDU; - /* currently this is the only type for which we get this notif */ - rx_status->zero_length_psdu_type = - IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING; + + switch (info_type) { + case RX_NO_DATA_INFO_TYPE_NDP: + rx_status->zero_length_psdu_type = + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_SOUNDING; + break; + case RX_NO_DATA_INFO_TYPE_MU_UNMATCHED: + case RX_NO_DATA_INFO_TYPE_HE_TB_UNMATCHED: + rx_status->zero_length_psdu_type = + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_NOT_CAPTURED; + break; + default: + rx_status->zero_length_psdu_type = + IEEE80211_RADIOTAP_ZERO_LEN_PSDU_VENDOR; + break; + }
/* This may be overridden by iwl_mvm_rx_he() to HE_RU */ switch (rate_n_flags & RATE_MCS_CHAN_WIDTH_MSK) { diff --combined drivers/net/wireless/intel/iwlwifi/pcie/trans.c index c5baaae8d38e,c4375b868901..cccb8bbd7ea7 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@@ -2442,8 -2442,9 +2442,8 @@@ void iwl_pcie_dump_csr(struct iwl_tran #ifdef CONFIG_IWLWIFI_DEBUGFS /* create and remove of files */ #define DEBUGFS_ADD_FILE(name, parent, mode) do { \ - if (!debugfs_create_file(#name, mode, parent, trans, \ - &iwl_dbgfs_##name##_ops)) \ - goto err; \ + debugfs_create_file(#name, mode, parent, trans, \ + &iwl_dbgfs_##name##_ops); \ } while (0)
/* file operation */ @@@ -2846,7 -2847,7 +2846,7 @@@ static const struct file_operations iwl };
/* Create the debugfs files and directories */ -int iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans) +void iwl_trans_pcie_dbgfs_register(struct iwl_trans *trans) { struct dentry *dir = trans->dbgfs_dir;
@@@ -2857,6 -2858,11 +2857,6 @@@ DEBUGFS_ADD_FILE(fh_reg, dir, 0400); DEBUGFS_ADD_FILE(rfkill, dir, 0600); DEBUGFS_ADD_FILE(monitor_data, dir, 0400); - return 0; - -err: - IWL_ERR(trans, "failed to create the trans debugfs entry\n"); - return -ENOMEM; }
static void iwl_trans_pcie_debugfs_cleanup(struct iwl_trans *trans) @@@ -3006,14 -3012,10 +3006,14 @@@ static voi iwl_trans_pcie_dump_pointers(struct iwl_trans *trans, struct iwl_fw_error_dump_fw_mon *fw_mon_data) { - u32 base, write_ptr, wrap_cnt; + u32 base, base_high, write_ptr, write_ptr_val, wrap_cnt;
- /* If there was a dest TLV - use the values from there */ - if (trans->ini_valid) { + if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { + base = DBGC_CUR_DBGBUF_BASE_ADDR_LSB; + base_high = DBGC_CUR_DBGBUF_BASE_ADDR_MSB; + write_ptr = DBGC_CUR_DBGBUF_STATUS; + wrap_cnt = DBGC_DBGBUF_WRAP_AROUND; + } else if (trans->ini_valid) { base = iwl_umac_prph(trans, MON_BUFF_BASE_ADDR_VER2); write_ptr = iwl_umac_prph(trans, MON_BUFF_WRPTR_VER2); wrap_cnt = iwl_umac_prph(trans, MON_BUFF_CYCLE_CNT_VER2); @@@ -3026,18 -3028,12 +3026,18 @@@ write_ptr = MON_BUFF_WRPTR; wrap_cnt = MON_BUFF_CYCLE_CNT; } - fw_mon_data->fw_mon_wr_ptr = - cpu_to_le32(iwl_read_prph(trans, write_ptr)); + + write_ptr_val = iwl_read_prph(trans, write_ptr); fw_mon_data->fw_mon_cycle_cnt = cpu_to_le32(iwl_read_prph(trans, wrap_cnt)); fw_mon_data->fw_mon_base_ptr = cpu_to_le32(iwl_read_prph(trans, base)); + if (trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210) { + fw_mon_data->fw_mon_base_high_ptr = + cpu_to_le32(iwl_read_prph(trans, base_high)); + write_ptr_val &= DBGC_CUR_DBGBUF_STATUS_OFFSET_MSK; + } + fw_mon_data->fw_mon_wr_ptr = cpu_to_le32(write_ptr_val); }
static u32 @@@ -3048,10 -3044,9 +3048,10 @@@ iwl_trans_pcie_dump_monitor(struct iwl_ u32 len = 0;
if ((trans->num_blocks && - trans->cfg->device_family == IWL_DEVICE_FAMILY_7000) || - (trans->dbg_dest_tlv && !trans->ini_valid) || - (trans->ini_valid && trans->num_blocks)) { + (trans->cfg->device_family == IWL_DEVICE_FAMILY_7000 || + trans->cfg->device_family >= IWL_DEVICE_FAMILY_AX210 || + trans->ini_valid)) || + (trans->dbg_dest_tlv && !trans->ini_valid)) { struct iwl_fw_error_dump_fw_mon *fw_mon_data;
(*data)->type = cpu_to_le32(IWL_FW_ERROR_DUMP_FW_MONITOR); @@@ -3170,10 -3165,8 +3170,10 @@@ static struct iwl_trans_dump_dat len = sizeof(*dump_data);
/* host commands */ - len += sizeof(*data) + - cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); + if (dump_mask & BIT(IWL_FW_ERROR_DUMP_TXCMD)) + len += sizeof(*data) + + cmdq->n_window * (sizeof(*txcmd) + + TFD_MAX_PAYLOAD_SIZE);
/* FW monitor */ if (dump_mask & BIT(IWL_FW_ERROR_DUMP_FW_MONITOR)) @@@ -3547,9 -3540,6 +3547,9 @@@ struct iwl_trans *iwl_trans_pcie_alloc( } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF)) { trans->cfg = &iwlax210_2ax_cfg_so_gf_a0; + } else if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == + CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF4)) { + trans->cfg = &iwlax210_2ax_cfg_so_gf4_a0; } } else if (cfg == &iwl_ax101_cfg_qu_hr) { if (CSR_HW_RF_ID_TYPE_CHIP_ID(trans->hw_rf_id) == @@@ -3654,20 -3644,27 +3654,27 @@@ out_no_pci
void iwl_trans_pcie_sync_nmi(struct iwl_trans *trans) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); unsigned long timeout = jiffies + IWL_TRANS_NMI_TIMEOUT; + u32 inta_addr, sw_err_bit; + + if (trans_pcie->msix_enabled) { + inta_addr = CSR_MSIX_HW_INT_CAUSES_AD; + sw_err_bit = MSIX_HW_INT_CAUSES_REG_SW_ERR; + } else { + inta_addr = CSR_INT; + sw_err_bit = CSR_INT_BIT_SW_ERR; + }
iwl_disable_interrupts(trans); iwl_force_nmi(trans); while (time_after(timeout, jiffies)) { - u32 inta_hw = iwl_read32(trans, - CSR_MSIX_HW_INT_CAUSES_AD); + u32 inta_hw = iwl_read32(trans, inta_addr);
/* Error detected by uCode */ - if (inta_hw & MSIX_HW_INT_CAUSES_REG_SW_ERR) { + if (inta_hw & sw_err_bit) { /* Clear causes register */ - iwl_write32(trans, CSR_MSIX_HW_INT_CAUSES_AD, - inta_hw & - MSIX_HW_INT_CAUSES_REG_SW_ERR); + iwl_write32(trans, inta_addr, inta_hw & sw_err_bit); break; }
diff --combined fs/proc/proc_sysctl.c index 2d61e5e8c863,7325baa8f9d4..c74570736b24 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@@ -13,7 -13,6 +13,7 @@@ #include <linux/namei.h> #include <linux/mm.h> #include <linux/module.h> +#include <linux/bpf-cgroup.h> #include "internal.h"
static const struct dentry_operations proc_sys_dentry_operations; @@@ -570,8 -569,8 +570,8 @@@ static ssize_t proc_sys_call_handler(st struct inode *inode = file_inode(filp); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; + void *new_buf = NULL; ssize_t error; - size_t res;
if (IS_ERR(head)) return PTR_ERR(head); @@@ -589,27 -588,11 +589,27 @@@ if (!table->proc_handler) goto out;
+ error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count, + ppos, &new_buf); + if (error) + goto out; + /* careful: calling conventions are nasty here */ - res = count; - error = table->proc_handler(table, write, buf, &res, ppos); + if (new_buf) { + mm_segment_t old_fs; + + old_fs = get_fs(); + set_fs(KERNEL_DS); + error = table->proc_handler(table, write, (void __user *)new_buf, + &count, ppos); + set_fs(old_fs); + kfree(new_buf); + } else { + error = table->proc_handler(table, write, buf, &count, ppos); + } + if (!error) - error = res; + error = count; out: sysctl_head_finish(head);
@@@ -1643,9 -1626,11 +1643,11 @@@ static void drop_sysctl_table(struct ct if (--header->nreg) return;
- if (parent) + if (parent) { put_links(header); - start_unregistering(header); + start_unregistering(header); + } + if (!--header->count) kfree_rcu(header, rcu);
diff --combined include/linux/bpf.h index 9a21848fdb07,944ccc310201..59631dd0777c --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@@ -57,12 -57,6 +57,12 @@@ struct bpf_map_ops const struct btf *btf, const struct btf_type *key_type, const struct btf_type *value_type); + + /* Direct value access helpers. */ + int (*map_direct_value_addr)(const struct bpf_map *map, + u64 *imm, u32 off); + int (*map_direct_value_meta)(const struct bpf_map *map, + u64 imm, u32 *off); };
struct bpf_map { @@@ -87,8 -81,7 +87,8 @@@ struct btf *btf; u32 pages; bool unpriv_array; - /* 51 bytes hole */ + bool frozen; /* write-once */ + /* 48 bytes hole */
/* The 3rd and 4th cacheline with misc members to avoid false sharing * particularly with refcounting. @@@ -184,7 -177,6 +184,7 @@@ enum bpf_arg_type ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ + ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */
/* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack @@@ -203,9 -195,6 +203,9 @@@ ARG_ANYTHING, /* any (initialized) argument is ok */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ + ARG_PTR_TO_INT, /* pointer to int */ + ARG_PTR_TO_LONG, /* pointer to long */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ };
/* type of values returned from helper functions */ @@@ -216,7 -205,6 +216,7 @@@ enum bpf_return_type RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ + RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ };
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs @@@ -274,7 -262,6 +274,7 @@@ enum bpf_reg_type PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ };
/* The information passed from prog-specific *_is_valid_access @@@ -364,7 -351,6 +364,7 @@@ struct bpf_prog_aux u32 used_map_cnt; u32 max_ctx_offset; u32 max_pkt_offset; + u32 max_tp_access; u32 stack_depth; u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ @@@ -434,38 -420,8 +434,38 @@@ struct bpf_array }; };
+#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */ #define MAX_TAIL_CALL_CNT 32
+#define BPF_F_ACCESS_MASK (BPF_F_RDONLY | \ + BPF_F_RDONLY_PROG | \ + BPF_F_WRONLY | \ + BPF_F_WRONLY_PROG) + +#define BPF_MAP_CAN_READ BIT(0) +#define BPF_MAP_CAN_WRITE BIT(1) + +static inline u32 bpf_map_flags_to_cap(struct bpf_map *map) +{ + u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); + + /* Combination of BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG is + * not possible. + */ + if (access_flags & BPF_F_RDONLY_PROG) + return BPF_MAP_CAN_READ; + else if (access_flags & BPF_F_WRONLY_PROG) + return BPF_MAP_CAN_WRITE; + else + return BPF_MAP_CAN_READ | BPF_MAP_CAN_WRITE; +} + +static inline bool bpf_map_flags_access_ok(u32 access_flags) +{ + return (access_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG)) != + (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG); +} + struct bpf_event_entry { struct perf_event *event; struct file *perf_file; @@@ -489,6 -445,14 +489,6 @@@ typedef u32 (*bpf_convert_ctx_access_t) u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy);
-int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); -int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); -int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, - const union bpf_attr *kattr, - union bpf_attr __user *uattr); - /* an array of programs to be executed under rcu_lock. * * Typical usage: @@@ -546,7 -510,7 +546,7 @@@ int bpf_prog_array_copy(struct bpf_prog } \ _out: \ rcu_read_unlock(); \ - preempt_enable_no_resched(); \ + preempt_enable(); \ _ret; \ })
@@@ -679,13 -643,6 +679,13 @@@ static inline int bpf_map_attr_numa_nod struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); int array_map_alloc_check(union bpf_attr *attr);
+int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@@ -797,27 -754,6 +797,27 @@@ static inline struct bpf_prog *bpf_prog { return ERR_PTR(-EOPNOTSUPP); } + +static inline int bpf_prog_test_run_xdp(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + +static inline int bpf_prog_test_run_skb(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} + +static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, + const union bpf_attr *kattr, + union bpf_attr __user *uattr) +{ + return -ENOTSUPP; +} #endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@@ -993,8 -929,6 +993,8 @@@ extern const struct bpf_func_proto bpf_ extern const struct bpf_func_proto bpf_spin_lock_proto; extern const struct bpf_func_proto bpf_spin_unlock_proto; extern const struct bpf_func_proto bpf_get_local_storage_proto; +extern const struct bpf_func_proto bpf_strtol_proto; +extern const struct bpf_func_proto bpf_strtoul_proto;
/* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --combined include/net/xfrm.h index eb5018b1cf9c,c9b0b2b5d672..debcc5198e33 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@@ -132,17 -132,6 +132,17 @@@ struct xfrm_state_offload u8 flags; };
+struct xfrm_mode { + u8 encap; + u8 family; + u8 flags; +}; + +/* Flags for xfrm_mode. */ +enum { + XFRM_MODE_FLAG_TUNNEL = 1, +}; + /* Full description of state of transformer. */ struct xfrm_state { possible_net_t xs_net; @@@ -245,9 -234,9 +245,9 @@@ /* Reference to data common to all the instances of this * transformer. */ const struct xfrm_type *type; - struct xfrm_mode *inner_mode; - struct xfrm_mode *inner_mode_iaf; - struct xfrm_mode *outer_mode; + struct xfrm_mode inner_mode; + struct xfrm_mode inner_mode_iaf; + struct xfrm_mode outer_mode;
const struct xfrm_type_offload *type_offload;
@@@ -306,7 -295,8 +306,8 @@@ struct xfrm_replay };
struct xfrm_if_cb { - struct xfrm_if *(*decode_session)(struct sk_buff *skb); + struct xfrm_if *(*decode_session)(struct sk_buff *skb, + unsigned short family); };
void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb); @@@ -326,6 -316,13 +327,6 @@@ struct xfrm_policy_afinfo xfrm_address_t *saddr, xfrm_address_t *daddr, u32 mark); - void (*decode_session)(struct sk_buff *skb, - struct flowi *fl, - int reverse); - int (*get_tos)(const struct flowi *fl); - int (*init_path)(struct xfrm_dst *path, - struct dst_entry *dst, - int nfheader_len); int (*fill_dst)(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl); @@@ -351,6 -348,7 +352,6 @@@ struct xfrm_state_afinfo struct module *owner; const struct xfrm_type *type_map[IPPROTO_MAX]; const struct xfrm_type_offload *type_offload_map[IPPROTO_MAX]; - struct xfrm_mode *mode_map[XFRM_MODE_MAX];
int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_selector *sel, @@@ -425,6 -423,78 +426,6 @@@ struct xfrm_type_offload int xfrm_register_type_offload(const struct xfrm_type_offload *type, unsigned short family); int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, unsigned short family);
-struct xfrm_mode { - /* - * Remove encapsulation header. - * - * The IP header will be moved over the top of the encapsulation - * header. - * - * On entry, the transport header shall point to where the IP header - * should be and the network header shall be set to where the IP - * header currently is. skb->data shall point to the start of the - * payload. - */ - int (*input2)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * This is the actual input entry point. - * - * For transport mode and equivalent this would be identical to - * input2 (which does not need to be set). While tunnel mode - * and equivalent would set this to the tunnel encapsulation function - * xfrm4_prepare_input that would in turn call input2. - */ - int (*input)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * Add encapsulation header. - * - * On exit, the transport header will be set to the start of the - * encapsulation header to be filled in by x->type->output and - * the mac header will be set to the nextheader (protocol for - * IPv4) field of the extension header directly preceding the - * encapsulation header, or in its absence, that of the top IP - * header. The value of the network header will always point - * to the top IP header while skb->data will point to the payload. - */ - int (*output2)(struct xfrm_state *x,struct sk_buff *skb); - - /* - * This is the actual output entry point. - * - * For transport mode and equivalent this would be identical to - * output2 (which does not need to be set). While tunnel mode - * and equivalent would set this to a tunnel encapsulation function - * (xfrm4_prepare_output or xfrm6_prepare_output) that would in turn - * call output2. - */ - int (*output)(struct xfrm_state *x, struct sk_buff *skb); - - /* - * Adjust pointers into the packet and do GSO segmentation. - */ - struct sk_buff *(*gso_segment)(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features); - - /* - * Adjust pointers into the packet when IPsec is done at layer2. - */ - void (*xmit)(struct xfrm_state *x, struct sk_buff *skb); - - struct xfrm_state_afinfo *afinfo; - struct module *owner; - unsigned int encap; - int flags; -}; - -/* Flags for xfrm_mode. */ -enum { - XFRM_MODE_FLAG_TUNNEL = 1, -}; - -int xfrm_register_mode(struct xfrm_mode *mode, int family); -int xfrm_unregister_mode(struct xfrm_mode *mode, int family); - static inline int xfrm_af2proto(unsigned int family) { switch(family) { @@@ -437,13 -507,13 +438,13 @@@ } }
-static inline struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) +static inline const struct xfrm_mode *xfrm_ip2inner_mode(struct xfrm_state *x, int ipproto) { if ((ipproto == IPPROTO_IPIP && x->props.family == AF_INET) || (ipproto == IPPROTO_IPV6 && x->props.family == AF_INET6)) - return x->inner_mode; + return &x->inner_mode; else - return x->inner_mode_iaf; + return &x->inner_mode_iaf; }
struct xfrm_tmpl { @@@ -1335,6 -1405,23 +1336,23 @@@ static inline int xfrm_state_kern(cons return atomic_read(&x->tunnel_users); }
+ static inline bool xfrm_id_proto_valid(u8 proto) + { + switch (proto) { + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: + #if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ROUTING: + case IPPROTO_DSTOPTS: + #endif + return true; + default: + return false; + } + } + + /* IPSEC_PROTO_ANY only matches 3 IPsec protocols, 0 could match all. */ static inline int xfrm_id_proto_match(u8 proto, u8 userproto) { return (!userproto || proto == userproto || @@@ -1536,6 -1623,7 +1554,6 @@@ int xfrm_init_replay(struct xfrm_state int xfrm_state_mtu(struct xfrm_state *x, int mtu); int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload); int xfrm_init_state(struct xfrm_state *x); -int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb); int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type); int xfrm_input_resume(struct sk_buff *skb, int nexthdr); int xfrm_trans_queue(struct sk_buff *skb, @@@ -1543,11 -1631,7 +1561,11 @@@ struct sk_buff *)); int xfrm_output_resume(struct sk_buff *skb, int err); int xfrm_output(struct sock *sk, struct sk_buff *skb); -int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb); + +#if IS_ENABLED(CONFIG_NET_PKTGEN) +int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb); +#endif + void xfrm_local_error(struct sk_buff *skb, int mtu); int xfrm4_extract_header(struct sk_buff *skb); int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb); @@@ -1566,8 -1650,10 +1584,8 @@@ static inline int xfrm4_rcv_spi(struct }
int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb); -int xfrm4_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char protocol); int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); @@@ -1583,6 -1669,7 +1601,6 @@@ int xfrm6_rcv(struct sk_buff *skb) int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); void xfrm6_local_error(struct sk_buff *skb, u32 mtu); -int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err); int xfrm6_protocol_register(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, unsigned char protocol); int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); @@@ -1590,6 -1677,7 +1608,6 @@@ int xfrm6_tunnel_deregister(struct xfrm __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr); int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); -int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb); int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb); int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, @@@ -1981,7 -2069,7 +1999,7 @@@ static inline int xfrm_tunnel_check(str tunnel = true; break; } - if (tunnel && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)) + if (tunnel && !(x->outer_mode.flags & XFRM_MODE_FLAG_TUNNEL)) return -EINVAL;
return 0; diff --combined kernel/bpf/verifier.c index 271717246af3,09d5d972c9ff..7b05e8938d5c --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@@ -176,6 -176,7 +176,6 @@@ struct bpf_verifier_stack_elem struct bpf_verifier_stack_elem *next; };
-#define BPF_COMPLEXITY_LIMIT_INSNS 131072 #define BPF_COMPLEXITY_LIMIT_STACK 1024 #define BPF_COMPLEXITY_LIMIT_STATES 64
@@@ -376,8 -377,7 +376,8 @@@ static bool is_release_function(enum bp static bool is_acquire_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_sk_lookup_tcp || - func_id == BPF_FUNC_sk_lookup_udp; + func_id == BPF_FUNC_sk_lookup_udp || + func_id == BPF_FUNC_skc_lookup_tcp; }
static bool is_ptr_cast_function(enum bpf_func_id func_id) @@@ -405,7 -405,6 +405,7 @@@ static const char * const reg_type_str[ [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", [PTR_TO_TCP_SOCK] = "tcp_sock", [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", + [PTR_TO_TP_BUFFER] = "tp_buffer", };
static char slot_type_char[] = { @@@ -1092,7 -1091,7 +1092,7 @@@ static int check_subprogs(struct bpf_ve */ subprog[env->subprog_cnt].start = insn_cnt;
- if (env->log.level > 1) + if (env->log.level & BPF_LOG_LEVEL2) for (i = 0; i < env->subprog_cnt; i++) verbose(env, "func#%d @%d\n", i, subprog[i].start);
@@@ -1139,7 -1138,6 +1139,7 @@@ static int mark_reg_read(struct bpf_ver struct bpf_reg_state *parent) { bool writes = parent == state->parent; /* Observe write marks */ + int cnt = 0;
while (parent) { /* if read wasn't screened by an earlier write ... */ @@@ -1151,25 -1149,12 +1151,25 @@@ parent->var_off.value, parent->off); return -EFAULT; } + if (parent->live & REG_LIVE_READ) + /* The parentage chain never changes and + * this parent was already marked as LIVE_READ. + * There is no need to keep walking the chain again and + * keep re-marking all parents as LIVE_READ. + * This case happens when the same register is read + * multiple times without writes into it in-between. + */ + break; /* ... then we depend on parent's value */ parent->live |= REG_LIVE_READ; state = parent; parent = state->parent; writes = true; + cnt++; } + + if (env->longest_mark_read_walk < cnt) + env->longest_mark_read_walk = cnt; return 0; }
@@@ -1178,32 -1163,30 +1178,32 @@@ static int check_reg_arg(struct bpf_ver { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg, *regs = state->regs;
if (regno >= MAX_BPF_REG) { verbose(env, "R%d is invalid\n", regno); return -EINVAL; }
+ reg = ®s[regno]; if (t == SRC_OP) { /* check whether register used as source operand can be read */ - if (regs[regno].type == NOT_INIT) { + if (reg->type == NOT_INIT) { verbose(env, "R%d !read_ok\n", regno); return -EACCES; } /* We don't need to worry about FP liveness because it's read-only */ - if (regno != BPF_REG_FP) - return mark_reg_read(env, ®s[regno], - regs[regno].parent); + if (regno == BPF_REG_FP) + return 0; + + return mark_reg_read(env, reg, reg->parent); } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { verbose(env, "frame pointer is read only\n"); return -EACCES; } - regs[regno].live |= REG_LIVE_WRITTEN; + reg->live |= REG_LIVE_WRITTEN; if (t == DST_OP) mark_reg_unknown(env, regs, regno); } @@@ -1429,7 -1412,7 +1429,7 @@@ static int check_stack_access(struct bp char tn_buf[48];
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "variable stack access var_off=%s off=%d size=%d", + verbose(env, "variable stack access var_off=%s off=%d size=%d\n", tn_buf, off, size); return -EACCES; } @@@ -1442,28 -1425,6 +1442,28 @@@ return 0; }
+static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, + int off, int size, enum bpf_access_type type) +{ + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_map *map = regs[regno].map_ptr; + u32 cap = bpf_map_flags_to_cap(map); + + if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) { + verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", + map->value_size, off, size); + return -EACCES; + } + + if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) { + verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", + map->value_size, off, size); + return -EACCES; + } + + return 0; +} + /* check read/write into map element returned by bpf_map_lookup_elem() */ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed) @@@ -1493,7 -1454,7 +1493,7 @@@ static int check_map_access(struct bpf_ * need to try adding each of min_value and max_value to off * to make sure our theoretical access will be safe. */ - if (env->log.level) + if (env->log.level & BPF_LOG_LEVEL) print_verifier_state(env, state);
/* The minimum value is only important with signed @@@ -1994,32 -1955,6 +1994,32 @@@ static int check_ctx_reg(struct bpf_ver return 0; }
+static int check_tp_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size) +{ + if (off < 0) { + verbose(env, + "R%d invalid tracepoint buffer access: off=%d, size=%d", + regno, off, size); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, + "R%d invalid variable buffer offset: off=%d, var_off=%s", + regno, off, tn_buf); + return -EACCES; + } + if (off + size > env->prog->aux->max_tp_access) + env->prog->aux->max_tp_access = off + size; + + return 0; +} + + /* truncate register to smaller size (in bytes) * must be called with size < BPF_REG_SIZE */ @@@ -2076,9 -2011,7 +2076,9 @@@ static int check_mem_access(struct bpf_ verbose(env, "R%d leaks addr into map\n", value_regno); return -EACCES; } - + err = check_map_access_type(env, regno, off, size, t); + if (err) + return err; err = check_map_access(env, regno, off, size, false); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); @@@ -2164,10 -2097,6 +2164,10 @@@ err = check_sock_access(env, insn_idx, regno, off, size, t); if (!err && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_TP_BUFFER) { + err = check_tp_buffer_access(env, reg, regno, off, size); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@@ -2228,29 -2157,6 +2228,29 @@@ static int check_xadd(struct bpf_verifi BPF_SIZE(insn->code), BPF_WRITE, -1, true); }
+static int __check_stack_boundary(struct bpf_verifier_env *env, u32 regno, + int off, int access_size, + bool zero_size_allowed) +{ + struct bpf_reg_state *reg = reg_state(env, regno); + + if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || + access_size < 0 || (access_size == 0 && !zero_size_allowed)) { + if (tnum_is_const(reg->var_off)) { + verbose(env, "invalid stack type R%d off=%d access_size=%d\n", + regno, off, access_size); + } else { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "invalid stack type R%d var_off=%s access_size=%d\n", + regno, tn_buf, access_size); + } + return -EACCES; + } + return 0; +} + /* when register 'regno' is passed into function that will read 'access_size' * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized. @@@ -2263,7 -2169,7 +2263,7 @@@ static int check_stack_boundary(struct { struct bpf_reg_state *reg = reg_state(env, regno); struct bpf_func_state *state = func(env, reg); - int off, i, slot, spi; + int err, min_off, max_off, i, slot, spi;
if (reg->type != PTR_TO_STACK) { /* Allow zero-byte read from NULL, regardless of pointer type */ @@@ -2277,57 -2183,21 +2277,57 @@@ return -EACCES; }
- /* Only allow fixed-offset stack reads */ - if (!tnum_is_const(reg->var_off)) { - char tn_buf[48]; + if (tnum_is_const(reg->var_off)) { + min_off = max_off = reg->var_off.value + reg->off; + err = __check_stack_boundary(env, regno, min_off, access_size, + zero_size_allowed); + if (err) + return err; + } else { + /* Variable offset is prohibited for unprivileged mode for + * simplicity since it requires corresponding support in + * Spectre masking for stack ALU. + * See also retrieve_ptr_limit(). + */ + if (!env->allow_ptr_leaks) { + char tn_buf[48];
- tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose(env, "invalid variable stack read R%d var_off=%s\n", - regno, tn_buf); - return -EACCES; - } - off = reg->off + reg->var_off.value; - if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || - access_size < 0 || (access_size == 0 && !zero_size_allowed)) { - verbose(env, "invalid stack type R%d off=%d access_size=%d\n", - regno, off, access_size); - return -EACCES; + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "R%d indirect variable offset stack access prohibited for !root, var_off=%s\n", + regno, tn_buf); + return -EACCES; + } + /* Only initialized buffer on stack is allowed to be accessed + * with variable offset. With uninitialized buffer it's hard to + * guarantee that whole memory is marked as initialized on + * helper return since specific bounds are unknown what may + * cause uninitialized stack leaking. + */ + if (meta && meta->raw_mode) + meta = NULL; + + if (reg->smax_value >= BPF_MAX_VAR_OFF || + reg->smax_value <= -BPF_MAX_VAR_OFF) { + verbose(env, "R%d unbounded indirect variable offset stack access\n", + regno); + return -EACCES; + } + min_off = reg->smin_value + reg->off; + max_off = reg->smax_value + reg->off; + err = __check_stack_boundary(env, regno, min_off, access_size, + zero_size_allowed); + if (err) { + verbose(env, "R%d min value is outside of stack bound\n", + regno); + return err; + } + err = __check_stack_boundary(env, regno, max_off, access_size, + zero_size_allowed); + if (err) { + verbose(env, "R%d max value is outside of stack bound\n", + regno); + return err; + } }
if (meta && meta->raw_mode) { @@@ -2336,10 -2206,10 +2336,10 @@@ return 0; }
- for (i = 0; i < access_size; i++) { + for (i = min_off; i < max_off + access_size; i++) { u8 *stype;
- slot = -(off + i) - 1; + slot = -i - 1; spi = slot / BPF_REG_SIZE; if (state->allocated_stack <= slot) goto err; @@@ -2352,16 -2222,8 +2352,16 @@@ goto mark; } err: - verbose(env, "invalid indirect read from stack off %d+%d size %d\n", - off, i, access_size); + if (tnum_is_const(reg->var_off)) { + verbose(env, "invalid indirect read from stack off %d+%d size %d\n", + min_off, i - min_off, access_size); + } else { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "invalid indirect read from stack var_off %s+%d size %d\n", + tn_buf, i - min_off, access_size); + } return -EACCES; mark: /* reading any byte out of 8-byte 'spill_slot' will cause @@@ -2370,7 -2232,7 +2370,7 @@@ mark_reg_read(env, &state->stack[spi].spilled_ptr, state->stack[spi].spilled_ptr.parent); } - return update_stack_depth(env, state, off); + return update_stack_depth(env, state, min_off); }
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, @@@ -2385,10 -2247,6 +2385,10 @@@ return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed); case PTR_TO_MAP_VALUE: + if (check_map_access_type(env, regno, reg->off, access_size, + meta && meta->raw_mode ? BPF_WRITE : + BPF_READ)) + return -EACCES; return check_map_access(env, regno, reg->off, access_size, zero_size_allowed); default: /* scalar_value|ptr_to_stack or invalid ptr */ @@@ -2495,22 -2353,6 +2495,22 @@@ static bool arg_type_is_mem_size(enum b type == ARG_CONST_SIZE_OR_ZERO; }
+static bool arg_type_is_int_ptr(enum bpf_arg_type type) +{ + return type == ARG_PTR_TO_INT || + type == ARG_PTR_TO_LONG; +} + +static int int_ptr_type_to_size(enum bpf_arg_type type) +{ + if (type == ARG_PTR_TO_INT) + return sizeof(u32); + else if (type == ARG_PTR_TO_LONG) + return sizeof(u64); + + return -EINVAL; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) @@@ -2543,15 -2385,10 +2543,15 @@@
if (arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE || - arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { + arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || + arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { expected_type = PTR_TO_STACK; - if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && - type != expected_type) + if (register_is_null(reg) && + arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) + /* final test in check_stack_boundary() */; + else if (!type_is_pkt_pointer(type) && + type != PTR_TO_MAP_VALUE && + type != expected_type) goto err_type; } else if (arg_type == ARG_CONST_SIZE || arg_type == ARG_CONST_SIZE_OR_ZERO) { @@@ -2583,10 -2420,6 +2583,10 @@@ } meta->ref_obj_id = reg->ref_obj_id; } + } else if (arg_type == ARG_PTR_TO_SOCKET) { + expected_type = PTR_TO_SOCKET; + if (type != expected_type) + goto err_type; } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) @@@ -2612,12 -2445,6 +2612,12 @@@ type != expected_type) goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; + } else if (arg_type_is_int_ptr(arg_type)) { + expected_type = PTR_TO_STACK; + if (!type_is_pkt_pointer(type) && + type != PTR_TO_MAP_VALUE && + type != expected_type) + goto err_type; } else { verbose(env, "unsupported arg_type %d\n", arg_type); return -EFAULT; @@@ -2644,8 -2471,6 +2644,8 @@@ meta->map_ptr->key_size, false, NULL); } else if (arg_type == ARG_PTR_TO_MAP_VALUE || + (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && + !register_is_null(reg)) || arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity @@@ -2701,13 -2526,6 +2701,13 @@@ err = check_helper_mem_access(env, regno - 1, reg->umax_value, zero_size_allowed, meta); + } else if (arg_type_is_int_ptr(arg_type)) { + int size = int_ptr_type_to_size(arg_type); + + err = check_helper_mem_access(env, regno, size, false, meta); + if (err) + return err; + err = check_ptr_alignment(env, reg, 0, size, true); }
return err; @@@ -2795,11 -2613,6 +2795,11 @@@ static int check_map_func_compatibility func_id != BPF_FUNC_map_push_elem) goto error; break; + case BPF_MAP_TYPE_SK_STORAGE: + if (func_id != BPF_FUNC_sk_storage_get && + func_id != BPF_FUNC_sk_storage_delete) + goto error; + break; default: break; } @@@ -2863,11 -2676,6 +2863,11 @@@ map->map_type != BPF_MAP_TYPE_STACK) goto error; break; + case BPF_FUNC_sk_storage_get: + case BPF_FUNC_sk_storage_delete: + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) + goto error; + break; default: break; } @@@ -3097,7 -2905,7 +3097,7 @@@ static int check_func_call(struct bpf_v /* and go analyze first insn of the callee */ *insn_idx = target_insn;
- if (env->log.level) { + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "caller:\n"); print_verifier_state(env, caller); verbose(env, "callee:\n"); @@@ -3137,7 -2945,7 +3137,7 @@@ static int prepare_func_exit(struct bpf return err;
*insn_idx = callee->callsite + 1; - if (env->log.level) { + if (env->log.level & BPF_LOG_LEVEL) { verbose(env, "returning from callee:\n"); print_verifier_state(env, callee); verbose(env, "to caller at %d:\n", *insn_idx); @@@ -3171,7 -2979,6 +3171,7 @@@ record_func_map(struct bpf_verifier_en int func_id, int insn_idx) { struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx]; + struct bpf_map *map = meta->map_ptr;
if (func_id != BPF_FUNC_tail_call && func_id != BPF_FUNC_map_lookup_elem && @@@ -3182,24 -2989,11 +3182,24 @@@ func_id != BPF_FUNC_map_peek_elem) return 0;
- if (meta->map_ptr == NULL) { + if (map == NULL) { verbose(env, "kernel subsystem misconfigured verifier\n"); return -EINVAL; }
+ /* In case of read-only, some additional restrictions + * need to be applied in order to prevent altering the + * state of the map from program side. + */ + if ((map->map_flags & BPF_F_RDONLY_PROG) && + (func_id == BPF_FUNC_map_delete_elem || + func_id == BPF_FUNC_map_update_elem || + func_id == BPF_FUNC_map_push_elem || + func_id == BPF_FUNC_map_pop_elem)) { + verbose(env, "write into map forbidden\n"); + return -EACCES; + } + if (!BPF_MAP_PTR(aux->map_state)) bpf_map_ptr_store(aux, meta->map_ptr, meta->map_ptr->unpriv_array); @@@ -3363,11 -3157,19 +3363,11 @@@ static int check_helper_call(struct bpf } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - if (is_acquire_function(func_id)) { - int id = acquire_reference_state(env, insn_idx); - - if (id < 0) - return id; - /* For mark_ptr_or_null_reg() */ - regs[BPF_REG_0].id = id; - /* For release_reference() */ - regs[BPF_REG_0].ref_obj_id = id; - } else { - /* For mark_ptr_or_null_reg() */ - regs[BPF_REG_0].id = ++env->id_gen; - } + regs[BPF_REG_0].id = ++env->id_gen; + } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; + regs[BPF_REG_0].id = ++env->id_gen; } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; @@@ -3378,19 -3180,9 +3378,19 @@@ return -EINVAL; }
- if (is_ptr_cast_function(func_id)) + if (is_ptr_cast_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + } else if (is_acquire_function(func_id)) { + int id = acquire_reference_state(env, insn_idx); + + if (id < 0) + return id; + /* For mark_ptr_or_null_reg() */ + regs[BPF_REG_0].id = id; + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = id; + }
do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
@@@ -3490,9 -3282,6 +3490,9 @@@ static int retrieve_ptr_limit(const str
switch (ptr_reg->type) { case PTR_TO_STACK: + /* Indirect variable offset stack access is prohibited in + * unprivileged mode so it's not handled here. + */ off = ptr_reg->off + ptr_reg->var_off.value; if (mask_to_left) *ptr_limit = MAX_BPF_STACK + off; @@@ -4349,15 -4138,35 +4349,35 @@@ static int check_alu_op(struct bpf_veri return 0; }
+ static void __find_good_pkt_pointers(struct bpf_func_state *state, + struct bpf_reg_state *dst_reg, + enum bpf_reg_type type, u16 new_range) + { + struct bpf_reg_state *reg; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) { + reg = &state->regs[i]; + if (reg->type == type && reg->id == dst_reg->id) + /* keep the maximum range already checked */ + reg->range = max(reg->range, new_range); + } + + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) + continue; + if (reg->type == type && reg->id == dst_reg->id) + reg->range = max(reg->range, new_range); + } + } + static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open) { - struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *regs = state->regs, *reg; u16 new_range; - int i, j; + int i;
if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) @@@ -4422,20 -4231,9 +4442,9 @@@ * the range won't allow anything. * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16. */ - for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].type == type && regs[i].id == dst_reg->id) - /* keep the maximum range already checked */ - regs[i].range = max(regs[i].range, new_range); - - for (j = 0; j <= vstate->curframe; j++) { - state = vstate->frame[j]; - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - if (reg->type == type && reg->id == dst_reg->id) - reg->range = max(reg->range, new_range); - } - } + for (i = 0; i <= vstate->curframe; i++) + __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, + new_range); }
/* compute branch direction of the expression "if (reg opcode val) goto target;" @@@ -4909,6 -4707,22 +4918,22 @@@ static void mark_ptr_or_null_reg(struc } }
+ static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, + bool is_null) + { + struct bpf_reg_state *reg; + int i; + + for (i = 0; i < MAX_BPF_REG; i++) + mark_ptr_or_null_reg(state, &state->regs[i], id, is_null); + + bpf_for_each_spilled_reg(i, state, reg) { + if (!reg) + continue; + mark_ptr_or_null_reg(state, reg, id, is_null); + } + } + /* The logic is similar to find_good_pkt_pointers(), both could eventually * be folded together at some point. */ @@@ -4916,10 -4730,10 +4941,10 @@@ static void mark_ptr_or_null_regs(struc bool is_null) { struct bpf_func_state *state = vstate->frame[vstate->curframe]; - struct bpf_reg_state *reg, *regs = state->regs; + struct bpf_reg_state *regs = state->regs; u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; - int i, j; + int i;
if (ref_obj_id && ref_obj_id == id && is_null) /* regs[regno] is in the " == NULL" branch. @@@ -4928,17 -4742,8 +4953,8 @@@ */ WARN_ON_ONCE(release_reference_state(state, id));
- for (i = 0; i < MAX_BPF_REG; i++) - mark_ptr_or_null_reg(state, ®s[i], id, is_null); - - for (j = 0; j <= vstate->curframe; j++) { - state = vstate->frame[j]; - bpf_for_each_spilled_reg(i, state, reg) { - if (!reg) - continue; - mark_ptr_or_null_reg(state, reg, id, is_null); - } - } + for (i = 0; i <= vstate->curframe; i++) + __mark_ptr_or_null_regs(vstate->frame[i], id, is_null); }
static bool try_match_pkt_pointers(const struct bpf_insn *insn, @@@ -5177,17 -4982,23 +5193,17 @@@ static int check_cond_jmp_op(struct bpf insn->dst_reg); return -EACCES; } - if (env->log.level) + if (env->log.level & BPF_LOG_LEVEL) print_verifier_state(env, this_branch->frame[this_branch->curframe]); return 0; }
-/* return the map pointer stored inside BPF_LD_IMM64 instruction */ -static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) -{ - u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32; - - return (struct bpf_map *) (unsigned long) imm64; -} - /* verify BPF_LD_IMM64 instruction */ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { + struct bpf_insn_aux_data *aux = cur_aux(env); struct bpf_reg_state *regs = cur_regs(env); + struct bpf_map *map; int err;
if (BPF_SIZE(insn->code) != BPF_DW) { @@@ -5211,22 -5022,11 +5227,22 @@@ return 0; }
- /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ - BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); + map = env->used_maps[aux->map_index]; + mark_reg_known_zero(env, regs, insn->dst_reg); + regs[insn->dst_reg].map_ptr = map; + + if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) { + regs[insn->dst_reg].type = PTR_TO_MAP_VALUE; + regs[insn->dst_reg].off = aux->map_off; + if (map_value_has_spin_lock(map)) + regs[insn->dst_reg].id = ++env->id_gen; + } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) { + regs[insn->dst_reg].type = CONST_PTR_TO_MAP; + } else { + verbose(env, "bpf verifier is misconfigured\n"); + return -EINVAL; + }
- regs[insn->dst_reg].type = CONST_PTR_TO_MAP; - regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn); return 0; }
@@@ -5350,7 -5150,6 +5366,7 @@@ static int check_return_code(struct bpf case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_SOCK_OPS: case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_CGROUP_SYSCTL: break; default: return 0; @@@ -5421,6 -5220,10 +5437,6 @@@ enum
#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
-static int *insn_stack; /* stack of insns to process */ -static int cur_stack; /* current stack index */ -static int *insn_state; - /* t, w, e - match pseudo-code above: * t - index of current instruction * w - next instruction @@@ -5428,9 -5231,6 +5444,9 @@@ */ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { + int *insn_stack = env->cfg.insn_stack; + int *insn_state = env->cfg.insn_state; + if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) return 0;
@@@ -5451,9 -5251,9 +5467,9 @@@ /* tree-edge */ insn_state[t] = DISCOVERED | e; insn_state[w] = DISCOVERED; - if (cur_stack >= env->prog->len) + if (env->cfg.cur_stack >= env->prog->len) return -E2BIG; - insn_stack[cur_stack++] = w; + insn_stack[env->cfg.cur_stack++] = w; return 1; } else if ((insn_state[w] & 0xF0) == DISCOVERED) { verbose_linfo(env, t, "%d: ", t); @@@ -5477,28 -5277,27 +5493,28 @@@ static int check_cfg(struct bpf_verifie { struct bpf_insn *insns = env->prog->insnsi; int insn_cnt = env->prog->len; + int *insn_stack, *insn_state; int ret = 0; int i, t;
- insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_state) return -ENOMEM;
- insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL); + insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL); if (!insn_stack) { - kfree(insn_state); + kvfree(insn_state); return -ENOMEM; }
insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */ insn_stack[0] = 0; /* 0 is the first instruction */ - cur_stack = 1; + env->cfg.cur_stack = 1;
peek_stack: - if (cur_stack == 0) + if (env->cfg.cur_stack == 0) goto check_state; - t = insn_stack[cur_stack - 1]; + t = insn_stack[env->cfg.cur_stack - 1];
if (BPF_CLASS(insns[t].code) == BPF_JMP || BPF_CLASS(insns[t].code) == BPF_JMP32) { @@@ -5567,7 -5366,7 +5583,7 @@@
mark_explored: insn_state[t] = EXPLORED; - if (cur_stack-- <= 0) { + if (env->cfg.cur_stack-- <= 0) { verbose(env, "pop stack internal bug\n"); ret = -EFAULT; goto err_free; @@@ -5585,9 -5384,8 +5601,9 @@@ check_state ret = 0; /* cfg looks good */
err_free: - kfree(insn_state); - kfree(insn_stack); + kvfree(insn_state); + kvfree(insn_stack); + env->cfg.insn_state = env->cfg.insn_stack = NULL; return ret; }
@@@ -6276,22 -6074,6 +6292,22 @@@ static bool states_equal(struct bpf_ver return true; }
+static int propagate_liveness_reg(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, + struct bpf_reg_state *parent_reg) +{ + int err; + + if (parent_reg->live & REG_LIVE_READ || !(reg->live & REG_LIVE_READ)) + return 0; + + err = mark_reg_read(env, reg, parent_reg); + if (err) + return err; + + return 0; +} + /* A write screens off any subsequent reads; but write marks come from the * straight-line code between a state and its parent. When we arrive at an * equivalent state (jump target or such) we didn't arrive by the straight-line @@@ -6303,9 -6085,8 +6319,9 @@@ static int propagate_liveness(struct bp const struct bpf_verifier_state *vstate, struct bpf_verifier_state *vparent) { - int i, frame, err = 0; + struct bpf_reg_state *state_reg, *parent_reg; struct bpf_func_state *state, *parent; + int i, frame, err = 0;
if (vparent->curframe != vstate->curframe) { WARN(1, "propagate_live: parent frame %d current frame %d\n", @@@ -6315,27 -6096,30 +6331,27 @@@ /* Propagate read liveness of registers... */ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); for (frame = 0; frame <= vstate->curframe; frame++) { + parent = vparent->frame[frame]; + state = vstate->frame[frame]; + parent_reg = parent->regs; + state_reg = state->regs; /* We don't need to worry about FP liveness, it's read-only */ for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { - if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ) - continue; - if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) { - err = mark_reg_read(env, &vstate->frame[frame]->regs[i], - &vparent->frame[frame]->regs[i]); - if (err) - return err; - } + err = propagate_liveness_reg(env, &state_reg[i], + &parent_reg[i]); + if (err) + return err; } - }
- /* ... and stack slots */ - for (frame = 0; frame <= vstate->curframe; frame++) { - state = vstate->frame[frame]; - parent = vparent->frame[frame]; + /* Propagate stack slots. */ for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && i < parent->allocated_stack / BPF_REG_SIZE; i++) { - if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) - continue; - if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) - mark_reg_read(env, &state->stack[i].spilled_ptr, - &parent->stack[i].spilled_ptr); + parent_reg = &parent->stack[i].spilled_ptr; + state_reg = &state->stack[i].spilled_ptr; + err = propagate_liveness_reg(env, state_reg, + parent_reg); + if (err) + return err; } } return err; @@@ -6344,13 -6128,11 +6360,13 @@@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; - struct bpf_verifier_state_list *sl; + struct bpf_verifier_state_list *sl, **pprev; struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err, states_cnt = 0;
- sl = env->explored_states[insn_idx]; + pprev = &env->explored_states[insn_idx]; + sl = *pprev; + if (!sl) /* this 'insn_idx' instruction wasn't marked, so we will not * be doing state search here @@@ -6361,7 -6143,6 +6377,7 @@@
while (sl != STATE_LIST_MARK) { if (states_equal(env, &sl->state, cur)) { + sl->hit_cnt++; /* reached equivalent register/stack state, * prune the search. * Registers read by the continuation are read by us. @@@ -6377,40 -6158,10 +6393,40 @@@ return err; return 1; } - sl = sl->next; states_cnt++; + sl->miss_cnt++; + /* heuristic to determine whether this state is beneficial + * to keep checking from state equivalence point of view. + * Higher numbers increase max_states_per_insn and verification time, + * but do not meaningfully decrease insn_processed. + */ + if (sl->miss_cnt > sl->hit_cnt * 3 + 3) { + /* the state is unlikely to be useful. Remove it to + * speed up verification + */ + *pprev = sl->next; + if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) { + free_verifier_state(&sl->state, false); + kfree(sl); + env->peak_states--; + } else { + /* cannot free this state, since parentage chain may + * walk it later. Add it for free_list instead to + * be freed at the end of verification + */ + sl->next = env->free_list; + env->free_list = sl; + } + sl = *pprev; + continue; + } + pprev = &sl->next; + sl = *pprev; }
+ if (env->max_states_per_insn < states_cnt) + env->max_states_per_insn = states_cnt; + if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) return 0;
@@@ -6424,8 -6175,6 +6440,8 @@@ new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); if (!new_sl) return -ENOMEM; + env->total_states++; + env->peak_states++;
/* add new state to the head of linked list */ new = &new_sl->state; @@@ -6510,7 -6259,8 +6526,7 @@@ static int do_check(struct bpf_verifier struct bpf_verifier_state *state; struct bpf_insn *insns = env->prog->insnsi; struct bpf_reg_state *regs; - int insn_cnt = env->prog->len, i; - int insn_processed = 0; + int insn_cnt = env->prog->len; bool do_print_state = false;
env->prev_linfo = NULL; @@@ -6545,10 -6295,10 +6561,10 @@@ insn = &insns[env->insn_idx]; class = BPF_CLASS(insn->code);
- if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { + if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { verbose(env, "BPF program is too large. Processed %d insn\n", - insn_processed); + env->insn_processed); return -E2BIG; }
@@@ -6557,7 -6307,7 +6573,7 @@@ return err; if (err == 1) { /* found equivalent state, can prune the search */ - if (env->log.level) { + if (env->log.level & BPF_LOG_LEVEL) { if (do_print_state) verbose(env, "\nfrom %d to %d%s: safe\n", env->prev_insn_idx, env->insn_idx, @@@ -6575,9 -6325,8 +6591,9 @@@ if (need_resched()) cond_resched();
- if (env->log.level > 1 || (env->log.level && do_print_state)) { - if (env->log.level > 1) + if (env->log.level & BPF_LOG_LEVEL2 || + (env->log.level & BPF_LOG_LEVEL && do_print_state)) { + if (env->log.level & BPF_LOG_LEVEL2) verbose(env, "%d:", env->insn_idx); else verbose(env, "\nfrom %d to %d%s:", @@@ -6588,7 -6337,7 +6604,7 @@@ do_print_state = false; }
- if (env->log.level) { + if (env->log.level & BPF_LOG_LEVEL) { const struct bpf_insn_cbs cbs = { .cb_print = verbose, .private_data = env, @@@ -6853,6 -6602,16 +6869,6 @@@ process_bpf_exit env->insn_idx++; }
- verbose(env, "processed %d insns (limit %d), stack depth ", - insn_processed, BPF_COMPLEXITY_LIMIT_INSNS); - for (i = 0; i < env->subprog_cnt; i++) { - u32 depth = env->subprog_info[i].stack_depth; - - verbose(env, "%d", depth); - if (i + 1 < env->subprog_cnt) - verbose(env, "+"); - } - verbose(env, "\n"); env->prog->aux->stack_depth = env->subprog_info[0].stack_depth; return 0; } @@@ -6950,10 -6709,8 +6966,10 @@@ static int replace_map_fd_with_map_ptr( }
if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) { + struct bpf_insn_aux_data *aux; struct bpf_map *map; struct fd f; + u64 addr;
if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 || @@@ -6962,19 -6719,13 +6978,19 @@@ return -EINVAL; }
- if (insn->src_reg == 0) + if (insn[0].src_reg == 0) /* valid generic load 64-bit imm */ goto next_insn;
- if (insn[0].src_reg != BPF_PSEUDO_MAP_FD || - insn[1].imm != 0) { - verbose(env, "unrecognized bpf_ld_imm64 insn\n"); + /* In final convert_pseudo_ld_imm64() step, this is + * converted into regular 64-bit imm load insn. + */ + if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && + insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) || + (insn[0].src_reg == BPF_PSEUDO_MAP_FD && + insn[1].imm != 0)) { + verbose(env, + "unrecognized bpf_ld_imm64 insn\n"); return -EINVAL; }
@@@ -6992,47 -6743,16 +7008,47 @@@ return err; }
- /* store map pointer inside BPF_LD_IMM64 instruction */ - insn[0].imm = (u32) (unsigned long) map; - insn[1].imm = ((u64) (unsigned long) map) >> 32; + aux = &env->insn_aux_data[i]; + if (insn->src_reg == BPF_PSEUDO_MAP_FD) { + addr = (unsigned long)map; + } else { + u32 off = insn[1].imm; + + if (off >= BPF_MAX_VAR_OFF) { + verbose(env, "direct value offset of %u is not allowed\n", off); + fdput(f); + return -EINVAL; + } + + if (!map->ops->map_direct_value_addr) { + verbose(env, "no direct value access support for this map type\n"); + fdput(f); + return -EINVAL; + } + + err = map->ops->map_direct_value_addr(map, &addr, off); + if (err) { + verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", + map->value_size, off); + fdput(f); + return err; + } + + aux->map_off = off; + addr += off; + } + + insn[0].imm = (u32)addr; + insn[1].imm = addr >> 32;
/* check whether we recorded this map already */ - for (j = 0; j < env->used_map_cnt; j++) + for (j = 0; j < env->used_map_cnt; j++) { if (env->used_maps[j] == map) { + aux->map_index = j; fdput(f); goto next_insn; } + }
if (env->used_map_cnt >= MAX_USED_MAPS) { fdput(f); @@@ -7049,8 -6769,6 +7065,8 @@@ fdput(f); return PTR_ERR(map); } + + aux->map_index = env->used_map_cnt; env->used_maps[env->used_map_cnt++] = map;
if (bpf_map_is_cgroup_storage(map) && @@@ -7156,13 -6874,8 +7172,13 @@@ static struct bpf_prog *bpf_patch_insn_ struct bpf_prog *new_prog;
new_prog = bpf_patch_insn_single(env->prog, off, patch, len); - if (!new_prog) + if (IS_ERR(new_prog)) { + if (PTR_ERR(new_prog) == -ERANGE) + verbose(env, + "insn %d cannot be patched due to 16-bit range\n", + env->insn_aux_data[off].orig_idx); return NULL; + } if (adjust_insn_aux_data(env, new_prog->len, off, len)) return NULL; adjust_subprog_starts(env, off, len); @@@ -7700,8 -7413,9 +7716,8 @@@ static int jit_subprogs(struct bpf_veri insn->src_reg != BPF_PSEUDO_CALL) continue; subprog = insn->off; - insn->imm = (u64 (*)(u64, u64, u64, u64, u64)) - func[subprog]->bpf_func - - __bpf_call_base; + insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - + __bpf_call_base; }
/* we use the aux data to keep a list of the start addresses @@@ -8103,14 -7817,6 +8119,14 @@@ static void free_states(struct bpf_veri struct bpf_verifier_state_list *sl, *sln; int i;
+ sl = env->free_list; + while (sl) { + sln = sl->next; + free_verifier_state(&sl->state, false); + kfree(sl); + sl = sln; + } + if (!env->explored_states) return;
@@@ -8126,37 -7832,12 +8142,37 @@@ } }
- kfree(env->explored_states); + kvfree(env->explored_states); +} + +static void print_verification_stats(struct bpf_verifier_env *env) +{ + int i; + + if (env->log.level & BPF_LOG_STATS) { + verbose(env, "verification time %lld usec\n", + div_u64(env->verification_time, 1000)); + verbose(env, "stack depth "); + for (i = 0; i < env->subprog_cnt; i++) { + u32 depth = env->subprog_info[i].stack_depth; + + verbose(env, "%d", depth); + if (i + 1 < env->subprog_cnt) + verbose(env, "+"); + } + verbose(env, "\n"); + } + verbose(env, "processed %d insns (limit %d) max_states_per_insn %d " + "total_states %d peak_states %d mark_read %d\n", + env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, + env->max_states_per_insn, env->total_states, + env->peak_states, env->longest_mark_read_walk); }
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, union bpf_attr __user *uattr) { + u64 start_time = ktime_get_ns(); struct bpf_verifier_env *env; struct bpf_verifier_log *log; int i, len, ret = -EINVAL; @@@ -8184,11 -7865,9 +8200,11 @@@ env->insn_aux_data[i].orig_idx = i; env->prog = *prog; env->ops = bpf_verifier_ops[env->prog->type]; + is_priv = capable(CAP_SYS_ADMIN);
/* grab the mutex to protect few globals used by verifier */ - mutex_lock(&bpf_verifier_lock); + if (!is_priv) + mutex_lock(&bpf_verifier_lock);
if (attr->log_level || attr->log_buf || attr->log_size) { /* user requested verbose verifier output @@@ -8200,8 -7879,8 +8216,8 @@@
ret = -EINVAL; /* log attributes have to be sane */ - if (log->len_total < 128 || log->len_total > UINT_MAX >> 8 || - !log->level || !log->ubuf) + if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 || + !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK) goto err_unlock; }
@@@ -8211,6 -7890,7 +8227,6 @@@ if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) env->strict_alignment = false;
- is_priv = capable(CAP_SYS_ADMIN); env->allow_ptr_leaks = is_priv;
ret = replace_map_fd_with_map_ptr(env); @@@ -8223,7 -7903,7 +8239,7 @@@ goto skip_full_check; }
- env->explored_states = kcalloc(env->prog->len, + env->explored_states = kvcalloc(env->prog->len, sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; @@@ -8281,9 -7961,6 +8297,9 @@@ skip_full_check if (ret == 0) ret = fixup_call_args(env);
+ env->verification_time = ktime_get_ns() - start_time; + print_verification_stats(env); + if (log->level && bpf_verifier_log_full(log)) ret = -ENOSPC; if (log->level && !log->ubuf) { @@@ -8323,8 -8000,7 +8339,8 @@@ err_release_maps release_maps(env); *prog = env->prog; err_unlock: - mutex_unlock(&bpf_verifier_lock); + if (!is_priv) + mutex_unlock(&bpf_verifier_lock); vfree(env->insn_aux_data); err_free_env: kfree(env); diff --combined lib/Kconfig.debug index 7117ac61174e,d5a4a4036d2f..8ed7d276fe7d --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@@ -219,14 -219,6 +219,14 @@@ config DEBUG_INFO_DWARF But it significantly improves the success of resolving variables in gdb on optimized code.
+config DEBUG_INFO_BTF + bool "Generate BTF typeinfo" + depends on DEBUG_INFO + help + Generate deduplicated BTF type information from DWARF debug info. + Turning this on expects presence of pahole tool, which will convert + DWARF type info into equivalent deduplicated BTF type info. + config GDB_SCRIPTS bool "Provide GDB scripts for kernel debugging" depends on DEBUG_INFO @@@ -1937,6 -1929,7 +1937,7 @@@ config TEST_KMO depends on m depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS depends on NETDEVICES && NET_CORE && INET # for TUN + depends on BLOCK select TEST_LKM select XFS_FS select TUN diff --combined net/appletalk/ddp.c index e2511027d19b,dbe8b1993be9..a2555023c654 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@@ -1806,6 -1806,12 +1806,6 @@@ static int atalk_ioctl(struct socket *s rc = put_user(amount, (int __user *)argp); break; } - case SIOCGSTAMP: - rc = sock_get_timestamp(sk, argp); - break; - case SIOCGSTAMPNS: - rc = sock_get_timestampns(sk, argp); - break; /* Routing */ case SIOCADDRT: case SIOCDELRT: @@@ -1865,7 -1871,6 +1865,7 @@@ static const struct proto_ops atalk_dgr .getname = atalk_getname, .poll = datagram_poll, .ioctl = atalk_ioctl, + .gettstamp = sock_gettstamp, #ifdef CONFIG_COMPAT .compat_ioctl = atalk_compat_ioctl, #endif @@@ -1915,6 -1920,7 +1915,7 @@@ static int __init atalk_init(void ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); if (!ddp_dl) { pr_crit("Unable to register DDP with SNAP.\n"); + rc = -ENOMEM; goto out_sock; }
diff --combined net/ipv4/esp4_offload.c index b61a8ff558f9,d3170a8001b2..8edcfa66d1e5 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@@ -52,13 -52,13 +52,13 @@@ static struct sk_buff *esp4_gro_receive goto out;
if (sp->len == XFRM_MAX_DEPTH) - goto out; + goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ip_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET); if (!x) - goto out; + goto out_reset;
sp->xvec[sp->len++] = x; sp->olen++; @@@ -66,7 -66,7 +66,7 @@@ xo = xfrm_offload(skb); if (!xo) { xfrm_state_put(x); - goto out; + goto out_reset; } }
@@@ -82,6 -82,8 +82,8 @@@ xfrm_input(skb, IPPROTO_ESP, spi, -2);
return ERR_PTR(-EINPROGRESS); + out_reset: + secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; @@@ -107,44 -109,6 +109,44 @@@ static void esp4_gso_encap(struct xfrm_ xo->proto = proto; }
+static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + __skb_push(skb, skb->mac_len); + return skb_mac_gso_segment(skb, features); +} + +static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_offload *xo = xfrm_offload(skb); + + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet_offloads[xo->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + +static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_TUNNEL: + return xfrm4_tunnel_gso_segment(x, skb, features); + case XFRM_MODE_TRANSPORT: + return xfrm4_transport_gso_segment(x, skb, features); + } + + return ERR_PTR(-EOPNOTSUPP); +} + static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@@ -176,16 -140,14 +178,16 @@@
skb->encap_hdr_csum = 1;
- if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) + if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) && + !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK); - else if (!(features & NETIF_F_HW_ESP_TX_CSUM)) + else if (!(features & NETIF_F_HW_ESP_TX_CSUM) && + !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM)) esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
- return x->outer_mode->gso_segment(x, skb, esp_features); + return xfrm4_outer_mode_gso_segment(x, skb, esp_features); }
static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) @@@ -221,9 -183,7 +223,9 @@@ static int esp_xmit(struct xfrm_state * if (!xo) return -EINVAL;
- if (!(features & NETIF_F_HW_ESP) || x->xso.dev != skb->dev) { + if ((!(features & NETIF_F_HW_ESP) && + !(skb->dev->gso_partial_features & NETIF_F_HW_ESP)) || + x->xso.dev != skb->dev) { xo->flags |= CRYPTO_FALLBACK; hw_offload = false; } diff --combined net/ipv4/ip_output.c index 4e42c1974ba2,e8bb2e85c5a4..ac880beda8a7 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@@ -188,7 -188,7 +188,7 @@@ static int ip_finish_output2(struct ne struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; - u32 nexthop; + bool is_v6gw = false;
if (rt->rt_type == RTN_MULTICAST) { IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len); @@@ -218,13 -218,16 +218,13 @@@ }
rcu_read_lock_bh(); - nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); - neigh = __ipv4_neigh_lookup_noref(dev, nexthop); - if (unlikely(!neigh)) - neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int res;
sock_confirm_neigh(skb, neigh); - res = neigh_output(neigh, skb); - + /* if crossing protocols, can not use the cached header */ + res = neigh_output(neigh, skb, is_v6gw); rcu_read_unlock_bh(); return res; } @@@ -469,7 -472,7 +469,7 @@@ int __ip_queue_xmit(struct sock *sk, st skb_dst_set_noref(skb, &rt->dst);
packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gw_family) goto no_route;
/* OK, we know where to send it, allocate and build IP header. */ @@@ -516,6 -519,7 +516,7 @@@ static void ip_copy_metadata(struct sk_ to->pkt_type = from->pkt_type; to->priority = from->priority; to->protocol = from->protocol; + to->skb_iif = from->skb_iif; skb_dst_drop(to); skb_dst_copy(to, from); to->dev = from->dev; @@@ -690,8 -694,11 +691,8 @@@ int ip_do_fragment(struct net *net, str return 0; }
- while (frag) { - skb = frag->next; - kfree_skb(frag); - frag = skb; - } + kfree_skb_list(frag); + IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); return err;
diff --combined net/ipv4/ip_vti.c index cc5d9c0a8a10,35d8346742e2..254a42e83ff9 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@@ -50,7 -50,7 +50,7 @@@ static unsigned int vti_net_id __read_m static int vti_tunnel_init(struct net_device *dev);
static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type) + int encap_type, bool update_skb_dev) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); @@@ -65,9 -65,6 +65,9 @@@
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
+ if (update_skb_dev) + skb->dev = tunnel->dev; + return xfrm_input(skb, nexthdr, spi, encap_type); }
@@@ -77,28 -74,47 +77,28 @@@ drop return 0; }
-static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, - int encap_type) +static int vti_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi, + int encap_type) { - struct ip_tunnel *tunnel; - const struct iphdr *iph = ip_hdr(skb); - struct net *net = dev_net(skb->dev); - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - - tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, - iph->saddr, iph->daddr, 0); - if (tunnel) { - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - - XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; - - skb->dev = tunnel->dev; - - return xfrm_input(skb, nexthdr, spi, encap_type); - } - - return -EINVAL; -drop: - kfree_skb(skb); - return 0; + return vti_input(skb, nexthdr, spi, encap_type, false); }
-static int vti_rcv(struct sk_buff *skb) +static int vti_rcv(struct sk_buff *skb, __be32 spi, bool update_skb_dev) { XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
- return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); + return vti_input(skb, ip_hdr(skb)->protocol, spi, 0, update_skb_dev); }
-static int vti_rcv_ipip(struct sk_buff *skb) +static int vti_rcv_proto(struct sk_buff *skb) { - XFRM_SPI_SKB_CB(skb)->family = AF_INET; - XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); + return vti_rcv(skb, 0, false); +}
- return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); +static int vti_rcv_tunnel(struct sk_buff *skb) +{ + return vti_rcv(skb, ip_hdr(skb)->saddr, true); }
static int vti_rcv_cb(struct sk_buff *skb, int err) @@@ -107,7 -123,7 +107,7 @@@ struct net_device *dev; struct pcpu_sw_netstats *tstats; struct xfrm_state *x; - struct xfrm_mode *inner_mode; + const struct xfrm_mode *inner_mode; struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; u32 orig_mark = skb->mark; int ret; @@@ -126,7 -142,7 +126,7 @@@
x = xfrm_input_state(skb);
- inner_mode = x->inner_mode; + inner_mode = &x->inner_mode;
if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@@ -137,7 -153,7 +137,7 @@@ } }
- family = inner_mode->afinfo->family; + family = inner_mode->family;
skb->mark = be32_to_cpu(tunnel->parms.i_key); ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); @@@ -431,31 -447,31 +431,31 @@@ static void __net_init vti_fb_tunnel_in }
static struct xfrm4_protocol vti_esp4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, };
static struct xfrm4_protocol vti_ah4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, };
static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { - .handler = vti_rcv, - .input_handler = vti_input, + .handler = vti_rcv_proto, + .input_handler = vti_input_proto, .cb_handler = vti_rcv_cb, .err_handler = vti4_err, .priority = 100, };
static struct xfrm_tunnel ipip_handler __read_mostly = { - .handler = vti_rcv_ipip, + .handler = vti_rcv_tunnel, .err_handler = vti4_err, .priority = 0, }; @@@ -630,10 -646,8 +630,8 @@@ static int __init vti_init(void
msg = "ipip tunnel"; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); - if (err < 0) { - pr_info("%s: cant't register tunnel\n",__func__); + if (err < 0) goto xfrm_tunnel_failed; - }
msg = "netlink interface"; err = rtnl_link_register(&vti_link_ops); @@@ -643,9 -657,9 +641,9 @@@ return err;
rtnl_link_failed: - xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); - xfrm_tunnel_failed: xfrm4_tunnel_deregister(&ipip_handler, AF_INET); + xfrm_tunnel_failed: + xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm_proto_comp_failed: xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: @@@ -660,6 -674,7 +658,7 @@@ pernet_dev_failed static void __exit vti_fini(void) { rtnl_link_unregister(&vti_link_ops); + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); diff --combined net/ipv4/tcp_ipv4.c index faa6fa619f59,a2896944aa37..af81e4a6a8d8 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@@ -1673,7 -1673,9 +1673,9 @@@ bool tcp_add_backlog(struct sock *sk, s if (TCP_SKB_CB(tail)->end_seq != TCP_SKB_CB(skb)->seq || TCP_SKB_CB(tail)->ip_dsfield != TCP_SKB_CB(skb)->ip_dsfield || ((TCP_SKB_CB(tail)->tcp_flags | - TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_URG) || + TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_SYN | TCPHDR_RST | TCPHDR_URG)) || + !((TCP_SKB_CB(tail)->tcp_flags & + TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || #ifdef CONFIG_TLS_DEVICE @@@ -1692,6 -1694,15 +1694,15 @@@ if (after(TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(tail)->ack_seq)) TCP_SKB_CB(tail)->ack_seq = TCP_SKB_CB(skb)->ack_seq;
+ /* We have to update both TCP_SKB_CB(tail)->tcp_flags and + * thtail->fin, so that the fast path in tcp_rcv_established() + * is not entered if we append a packet with a FIN. + * SYN, RST, URG are not present. + * ACK is set on both packets. + * PSH : we do not really care in TCP stack, + * at least for 'GRO' packets. + */ + thtail->fin |= th->fin; TCP_SKB_CB(tail)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
if (TCP_SKB_CB(skb)->has_rxtstamp) { @@@ -1774,7 -1785,6 +1785,7 @@@ static void tcp_v4_fill_cb(struct sk_bu int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + struct sk_buff *skb_to_free; int sdif = inet_sdif(skb); const struct iphdr *iph; const struct tcphdr *th; @@@ -1906,17 -1916,11 +1917,17 @@@ process tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { + skb_to_free = sk->sk_rx_skb_cache; + sk->sk_rx_skb_cache = NULL; ret = tcp_v4_do_rcv(sk, skb); - } else if (tcp_add_backlog(sk, skb)) { - goto discard_and_relse; + } else { + if (tcp_add_backlog(sk, skb)) + goto discard_and_relse; + skb_to_free = NULL; } bh_unlock_sock(sk); + if (skb_to_free) + __kfree_skb(skb_to_free);
put_and_return: if (refcounted) diff --combined net/ipv6/esp6_offload.c index bff83279d76f,cb99f6fb79b7..d453cf417b03 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@@ -74,13 -74,13 +74,13 @@@ static struct sk_buff *esp6_gro_receive goto out;
if (sp->len == XFRM_MAX_DEPTH) - goto out; + goto out_reset;
x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ipv6_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET6); if (!x) - goto out; + goto out_reset;
sp->xvec[sp->len++] = x; sp->olen++; @@@ -88,7 -88,7 +88,7 @@@ xo = xfrm_offload(skb); if (!xo) { xfrm_state_put(x); - goto out; + goto out_reset; } }
@@@ -109,6 -109,8 +109,8 @@@ xfrm_input(skb, IPPROTO_ESP, spi, -2);
return ERR_PTR(-EINPROGRESS); + out_reset: + secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; @@@ -134,44 -136,6 +136,44 @@@ static void esp6_gso_encap(struct xfrm_ xo->proto = proto; }
+static struct sk_buff *xfrm6_tunnel_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + __skb_push(skb, skb->mac_len); + return skb_mac_gso_segment(skb, features); +} + +static struct sk_buff *xfrm6_transport_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + const struct net_offload *ops; + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct xfrm_offload *xo = xfrm_offload(skb); + + skb->transport_header += x->props.header_len; + ops = rcu_dereference(inet6_offloads[xo->proto]); + if (likely(ops && ops->callbacks.gso_segment)) + segs = ops->callbacks.gso_segment(skb, features); + + return segs; +} + +static struct sk_buff *xfrm6_outer_mode_gso_segment(struct xfrm_state *x, + struct sk_buff *skb, + netdev_features_t features) +{ + switch (x->outer_mode.encap) { + case XFRM_MODE_TUNNEL: + return xfrm6_tunnel_gso_segment(x, skb, features); + case XFRM_MODE_TRANSPORT: + return xfrm6_transport_gso_segment(x, skb, features); + } + + return ERR_PTR(-EOPNOTSUPP); +} + static struct sk_buff *esp6_gso_segment(struct sk_buff *skb, netdev_features_t features) { @@@ -210,7 -174,7 +212,7 @@@
xo->flags |= XFRM_GSO_SEGMENT;
- return x->outer_mode->gso_segment(x, skb, esp_features); + return xfrm6_outer_mode_gso_segment(x, skb, esp_features); }
static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) diff --combined net/ipv6/ip6_fib.c index a8919c217cc2,91247a6fc67f..08e0390e001c --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@@ -162,7 -162,7 +162,7 @@@ struct fib6_info *fib6_info_alloc(gfp_ }
INIT_LIST_HEAD(&f6i->fib6_siblings); - atomic_inc(&f6i->fib6_ref); + refcount_set(&f6i->fib6_ref, 1);
return f6i; } @@@ -175,7 -175,10 +175,7 @@@ void fib6_info_destroy_rcu(struct rcu_h WARN_ON(f6i->fib6_node);
bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, 1); - if (bucket) { - f6i->rt6i_exception_bucket = NULL; - kfree(bucket); - } + kfree(bucket);
if (f6i->rt6i_pcpu) { int cpu; @@@ -196,7 -199,10 +196,7 @@@ free_percpu(f6i->rt6i_pcpu); }
- lwtstate_put(f6i->fib6_nh.nh_lwtstate); - - if (f6i->fib6_nh.nh_dev) - dev_put(f6i->fib6_nh.nh_dev); + fib6_nh_release(&f6i->fib6_nh);
ip_fib_metrics_put(f6i->fib6_metrics);
@@@ -351,11 -357,10 +351,11 @@@ struct dst_entry *fib6_rule_lookup(stru }
/* called with rcu lock held; no reference taken on fib6_info */ -struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, - int flags) +int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, + struct fib6_result *res, int flags) { - return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, flags); + return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, + res, flags); }
static void __net_init fib6_tables_init(struct net *net) @@@ -846,8 -851,8 +846,8 @@@ insert_above
RCU_INIT_POINTER(in->parent, pn); in->leaf = fn->leaf; - atomic_inc(&rcu_dereference_protected(in->leaf, - lockdep_is_held(&table->tb6_lock))->fib6_ref); + fib6_info_hold(rcu_dereference_protected(in->leaf, + lockdep_is_held(&table->tb6_lock)));
/* update parent pointer */ if (dir) @@@ -916,9 -921,7 +916,7 @@@ static void fib6_drop_pcpu_from(struct if (pcpu_rt) { struct fib6_info *from;
- from = rcu_dereference_protected(pcpu_rt->from, - lockdep_is_held(&table->tb6_lock)); - rcu_assign_pointer(pcpu_rt->from, NULL); + from = xchg((__force struct fib6_info **)&pcpu_rt->from, NULL); fib6_info_release(from); } } @@@ -929,7 -932,7 +927,7 @@@ static void fib6_purge_rt(struct fib6_i { struct fib6_table *table = rt->fib6_table;
- if (atomic_read(&rt->fib6_ref) != 1) { + if (refcount_read(&rt->fib6_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, * which must be released in time. So, scan ascendant nodes @@@ -942,7 -945,7 +940,7 @@@ struct fib6_info *new_leaf; if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { new_leaf = fib6_find_prefix(net, table, fn); - atomic_inc(&new_leaf->fib6_ref); + fib6_info_hold(new_leaf);
rcu_assign_pointer(fn->leaf, new_leaf); fib6_info_release(rt); @@@ -1108,7 -1111,7 +1106,7 @@@ add return err;
rcu_assign_pointer(rt->fib6_next, iter); - atomic_inc(&rt->fib6_ref); + fib6_info_hold(rt); rcu_assign_pointer(rt->fib6_node, fn); rcu_assign_pointer(*ins, rt); if (!info->skip_notify) @@@ -1136,7 -1139,7 +1134,7 @@@ if (err) return err;
- atomic_inc(&rt->fib6_ref); + fib6_info_hold(rt); rcu_assign_pointer(rt->fib6_node, fn); rt->fib6_next = iter->fib6_next; rcu_assign_pointer(*ins, rt); @@@ -1278,7 -1281,7 +1276,7 @@@ int fib6_add(struct fib6_node *root, st if (!sfn) goto failure;
- atomic_inc(&info->nl_net->ipv6.fib6_null_entry->fib6_ref); + fib6_info_hold(info->nl_net->ipv6.fib6_null_entry); rcu_assign_pointer(sfn->leaf, info->nl_net->ipv6.fib6_null_entry); sfn->fn_flags = RTN_ROOT; @@@ -1321,7 -1324,7 +1319,7 @@@ rcu_assign_pointer(fn->leaf, info->nl_net->ipv6.fib6_null_entry); } else { - atomic_inc(&rt->fib6_ref); + fib6_info_hold(rt); rcu_assign_pointer(fn->leaf, rt); } } @@@ -2292,7 -2295,6 +2290,7 @@@ static int ipv6_route_seq_show(struct s { struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; + unsigned int flags = rt->fib6_flags; const struct net_device *dev;
seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); @@@ -2302,17 -2304,15 +2300,17 @@@ #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->fib6_flags & RTF_GATEWAY) - seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw); - else + if (rt->fib6_nh.fib_nh_gw_family) { + flags |= RTF_GATEWAY; + seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6); + } else { seq_puts(seq, "00000000000000000000000000000000"); + }
- dev = rt->fib6_nh.nh_dev; + dev = rt->fib6_nh.fib_nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", - rt->fib6_metric, atomic_read(&rt->fib6_ref), 0, - rt->fib6_flags, dev ? dev->name : ""); + rt->fib6_metric, refcount_read(&rt->fib6_ref), 0, + flags, dev ? dev->name : ""); iter->w.leaf = NULL; return 0; } diff --combined net/ipv6/route.c index b18e85cd7587,0520aca3354b..23a20d62daac --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@@ -59,7 -59,7 +59,7 @@@ #include <net/xfrm.h> #include <net/netevent.h> #include <net/netlink.h> -#include <net/nexthop.h> +#include <net/rtnh.h> #include <net/lwtunnel.h> #include <net/ip_tunnels.h> #include <net/l3mdev.h> @@@ -102,15 -102,14 +102,15 @@@ static void ip6_rt_update_pmtu(struct struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); -static int rt6_score_route(struct fib6_info *rt, int oif, int strict); +static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, + int strict); static size_t rt6_nlmsg_size(struct fib6_info *rt); static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *rt, struct dst_entry *dst, struct in6_addr *dest, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags); -static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, +static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, struct in6_addr *daddr, struct in6_addr *saddr);
@@@ -296,7 -295,7 +296,7 @@@ static const struct fib6_info fib6_null .fib6_flags = (RTF_REJECT | RTF_NONEXTHOP), .fib6_protocol = RTPROT_KERNEL, .fib6_metric = ~(u32)0, - .fib6_ref = ATOMIC_INIT(1), + .fib6_ref = REFCOUNT_INIT(1), .fib6_type = RTN_UNREACHABLE, .fib6_metrics = (struct dst_metrics *)&dst_default_metrics, }; @@@ -380,11 -379,8 +380,8 @@@ static void ip6_dst_destroy(struct dst_ in6_dev_put(idev); }
- rcu_read_lock(); - from = rcu_dereference(rt->from); - rcu_assign_pointer(rt->from, NULL); + from = xchg((__force struct fib6_info **)&rt->from, NULL); fib6_info_release(from); - rcu_read_unlock(); }
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@@ -428,15 -424,13 +425,15 @@@ static bool rt6_check_expired(const str return false; }
-struct fib6_info *fib6_multipath_select(const struct net *net, - struct fib6_info *match, - struct flowi6 *fl6, int oif, - const struct sk_buff *skb, - int strict) +void fib6_select_path(const struct net *net, struct fib6_result *res, + struct flowi6 *fl6, int oif, bool have_oif_match, + const struct sk_buff *skb, int strict) { struct fib6_info *sibling, *next_sibling; + struct fib6_info *match = res->f6i; + + if (!match->fib6_nsiblings || have_oif_match) + goto out;
/* We might have already computed the hash for ICMPv6 errors. In such * case it will always be non-zero. Otherwise now is the time to do it. @@@ -444,89 -438,61 +441,89 @@@ if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
- if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound)) - return match; + if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound)) + goto out;
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, fib6_siblings) { + const struct fib6_nh *nh = &sibling->fib6_nh; int nh_upper_bound;
- nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); + nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound); if (fl6->mp_hash > nh_upper_bound) continue; - if (rt6_score_route(sibling, oif, strict) < 0) + if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0) break; match = sibling; break; }
- return match; +out: + res->f6i = match; + res->nh = &match->fib6_nh; }
/* * Route lookup. rcu_read_lock() should be held. */
-static inline struct fib6_info *rt6_device_match(struct net *net, - struct fib6_info *rt, - const struct in6_addr *saddr, - int oif, - int flags) +static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh, + const struct in6_addr *saddr, int oif, int flags) { - struct fib6_info *sprt; + const struct net_device *dev;
- if (!oif && ipv6_addr_any(saddr) && - !(rt->fib6_nh.nh_flags & RTNH_F_DEAD)) - return rt; + if (nh->fib_nh_flags & RTNH_F_DEAD) + return false;
- for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { - const struct net_device *dev = sprt->fib6_nh.nh_dev; + dev = nh->fib_nh_dev; + if (oif) { + if (dev->ifindex == oif) + return true; + } else { + if (ipv6_chk_addr(net, saddr, dev, + flags & RT6_LOOKUP_F_IFACE)) + return true; + }
- if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD) - continue; + return false; +}
- if (oif) { - if (dev->ifindex == oif) - return sprt; - } else { - if (ipv6_chk_addr(net, saddr, dev, - flags & RT6_LOOKUP_F_IFACE)) - return sprt; +static void rt6_device_match(struct net *net, struct fib6_result *res, + const struct in6_addr *saddr, int oif, int flags) +{ + struct fib6_info *f6i = res->f6i; + struct fib6_info *spf6i; + struct fib6_nh *nh; + + if (!oif && ipv6_addr_any(saddr)) { + nh = &f6i->fib6_nh; + if (!(nh->fib_nh_flags & RTNH_F_DEAD)) + goto out; + } + + for (spf6i = f6i; spf6i; spf6i = rcu_dereference(spf6i->fib6_next)) { + nh = &spf6i->fib6_nh; + if (__rt6_device_match(net, nh, saddr, oif, flags)) { + res->f6i = spf6i; + goto out; } }
- if (oif && flags & RT6_LOOKUP_F_IFACE) - return net->ipv6.fib6_null_entry; + if (oif && flags & RT6_LOOKUP_F_IFACE) { + res->f6i = net->ipv6.fib6_null_entry; + nh = &res->f6i->fib6_nh; + goto out; + }
- return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; + nh = &f6i->fib6_nh; + if (nh->fib_nh_flags & RTNH_F_DEAD) { + res->f6i = net->ipv6.fib6_null_entry; + nh = &res->f6i->fib6_nh; + } +out: + res->nh = nh; + res->fib6_type = res->f6i->fib6_type; + res->fib6_flags = res->f6i->fib6_flags; }
#ifdef CONFIG_IPV6_ROUTER_PREF @@@ -548,7 -514,7 +545,7 @@@ static void rt6_probe_deferred(struct w kfree(work); }
-static void rt6_probe(struct fib6_info *rt) +static void rt6_probe(struct fib6_nh *fib6_nh) { struct __rt6_probe_work *work = NULL; const struct in6_addr *nh_gw; @@@ -564,11 -530,11 +561,11 @@@ * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - if (!rt || !(rt->fib6_flags & RTF_GATEWAY)) + if (fib6_nh->fib_nh_gw_family) return;
- nh_gw = &rt->fib6_nh.nh_gw; - dev = rt->fib6_nh.nh_dev; + nh_gw = &fib6_nh->fib_nh_gw6; + dev = fib6_nh->fib_nh_dev; rcu_read_lock_bh(); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); @@@ -585,13 -551,13 +582,13 @@@ __neigh_set_probe_once(neigh); } write_unlock(&neigh->lock); - } else if (time_after(jiffies, rt->last_probe + + } else if (time_after(jiffies, fib6_nh->last_probe + idev->cnf.rtr_probe_interval)) { work = kmalloc(sizeof(*work), GFP_ATOMIC); }
if (work) { - rt->last_probe = jiffies; + fib6_nh->last_probe = jiffies; INIT_WORK(&work->work, rt6_probe_deferred); work->target = *nh_gw; dev_hold(dev); @@@ -603,7 -569,7 +600,7 @@@ out rcu_read_unlock_bh(); } #else -static inline void rt6_probe(struct fib6_info *rt) +static inline void rt6_probe(struct fib6_nh *fib6_nh) { } #endif @@@ -611,14 -577,27 +608,14 @@@ /* * Default Router Selection (RFC 2461 6.3.6) */ -static inline int rt6_check_dev(struct fib6_info *rt, int oif) -{ - const struct net_device *dev = rt->fib6_nh.nh_dev; - - if (!oif || dev->ifindex == oif) - return 2; - return 0; -} - -static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) +static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh) { enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; struct neighbour *neigh;
- if (rt->fib6_flags & RTF_NONEXTHOP || - !(rt->fib6_flags & RTF_GATEWAY)) - return RT6_NUD_SUCCEED; - rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, - &rt->fib6_nh.nh_gw); + neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev, + &fib6_nh->fib_nh_gw6); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) @@@ -639,44 -618,58 +636,44 @@@ return ret; }
-static int rt6_score_route(struct fib6_info *rt, int oif, int strict) +static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, + int strict) { - int m; + int m = 0; + + if (!oif || nh->fib_nh_dev->ifindex == oif) + m = 2;
- m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF - m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2; + m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2; #endif - if (strict & RT6_LOOKUP_F_REACHABLE) { - int n = rt6_check_neigh(rt); + if ((strict & RT6_LOOKUP_F_REACHABLE) && + !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) { + int n = rt6_check_neigh(nh); if (n < 0) return n; } return m; }
-/* called with rc_read_lock held */ -static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i) +static bool find_match(struct fib6_nh *nh, u32 fib6_flags, + int oif, int strict, int *mpri, bool *do_rr) { - const struct net_device *dev = fib6_info_nh_dev(f6i); + bool match_do_rr = false; bool rc = false; - - if (dev) { - const struct inet6_dev *idev = __in6_dev_get(dev); - - rc = !!idev->cnf.ignore_routes_with_linkdown; - } - - return rc; -} - -static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, - int *mpri, struct fib6_info *match, - bool *do_rr) -{ int m; - bool match_do_rr = false;
- if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (nh->fib_nh_flags & RTNH_F_DEAD) goto out;
- if (fib6_ignore_linkdown(rt) && - rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && + if (ip6_ignore_linkdown(nh->fib_nh_dev) && + nh->fib_nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out;
- if (fib6_check_expired(rt)) - goto out; - - m = rt6_score_route(rt, oif, strict); + m = rt6_score_route(nh, fib6_flags, oif, strict); if (m == RT6_NUD_FAIL_DO_RR) { match_do_rr = true; m = 0; /* lowest valid score */ @@@ -685,82 -678,67 +682,82 @@@ }
if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(rt); + rt6_probe(nh);
/* note that m can be RT6_NUD_FAIL_PROBE at this point */ if (m > *mpri) { *do_rr = match_do_rr; *mpri = m; - match = rt; + rc = true; } out: - return match; + return rc; }
-static struct fib6_info *find_rr_leaf(struct fib6_node *fn, - struct fib6_info *leaf, - struct fib6_info *rr_head, - u32 metric, int oif, int strict, - bool *do_rr) +static void __find_rr_leaf(struct fib6_info *f6i_start, + struct fib6_info *nomatch, u32 metric, + struct fib6_result *res, struct fib6_info **cont, + int oif, int strict, bool *do_rr, int *mpri) { - struct fib6_info *rt, *match, *cont; - int mpri = -1; + struct fib6_info *f6i;
- match = NULL; - cont = NULL; - for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) { - if (rt->fib6_metric != metric) { - cont = rt; - break; + for (f6i = f6i_start; + f6i && f6i != nomatch; + f6i = rcu_dereference(f6i->fib6_next)) { + struct fib6_nh *nh; + + if (cont && f6i->fib6_metric != metric) { + *cont = f6i; + return; }
- match = find_match(rt, oif, strict, &mpri, match, do_rr); - } + if (fib6_check_expired(f6i)) + continue;
- for (rt = leaf; rt && rt != rr_head; - rt = rcu_dereference(rt->fib6_next)) { - if (rt->fib6_metric != metric) { - cont = rt; - break; + nh = &f6i->fib6_nh; + if (find_match(nh, f6i->fib6_flags, oif, strict, mpri, do_rr)) { + res->f6i = f6i; + res->nh = nh; + res->fib6_flags = f6i->fib6_flags; + res->fib6_type = f6i->fib6_type; } - - match = find_match(rt, oif, strict, &mpri, match, do_rr); } +}
- if (match || !cont) - return match; +static void find_rr_leaf(struct fib6_node *fn, struct fib6_info *leaf, + struct fib6_info *rr_head, int oif, int strict, + bool *do_rr, struct fib6_result *res) +{ + u32 metric = rr_head->fib6_metric; + struct fib6_info *cont = NULL; + int mpri = -1;
- for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next)) - match = find_match(rt, oif, strict, &mpri, match, do_rr); + __find_rr_leaf(rr_head, NULL, metric, res, &cont, + oif, strict, do_rr, &mpri);
- return match; + __find_rr_leaf(leaf, rr_head, metric, res, &cont, + oif, strict, do_rr, &mpri); + + if (res->f6i || !cont) + return; + + __find_rr_leaf(cont, NULL, metric, res, NULL, + oif, strict, do_rr, &mpri); }
-static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, - int oif, int strict) +static void rt6_select(struct net *net, struct fib6_node *fn, int oif, + struct fib6_result *res, int strict) { struct fib6_info *leaf = rcu_dereference(fn->leaf); - struct fib6_info *match, *rt0; + struct fib6_info *rt0; bool do_rr = false; int key_plen;
+ /* make sure this function or its helpers sets f6i */ + res->f6i = NULL; + if (!leaf || leaf == net->ipv6.fib6_null_entry) - return net->ipv6.fib6_null_entry; + goto out;
rt0 = rcu_dereference(fn->rr_ptr); if (!rt0) @@@ -777,9 -755,11 +774,9 @@@ key_plen = rt0->fib6_src.plen; #endif if (fn->fn_bit != key_plen) - return net->ipv6.fib6_null_entry; - - match = find_rr_leaf(fn, leaf, rt0, rt0->fib6_metric, oif, strict, - &do_rr); + goto out;
+ find_rr_leaf(fn, leaf, rt0, oif, strict, &do_rr, res); if (do_rr) { struct fib6_info *next = rcu_dereference(rt0->fib6_next);
@@@ -796,19 -776,12 +793,19 @@@ } }
- return match ? match : net->ipv6.fib6_null_entry; +out: + if (!res->f6i) { + res->f6i = net->ipv6.fib6_null_entry; + res->nh = &res->f6i->fib6_nh; + res->fib6_flags = res->f6i->fib6_flags; + res->fib6_type = res->f6i->fib6_type; + } }
-static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) +static bool rt6_is_gw_or_nonexthop(const struct fib6_result *res) { - return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); + return (res->f6i->fib6_flags & RTF_NONEXTHOP) || + res->nh->fib_nh_gw_family; }
#ifdef CONFIG_IPV6_ROUTE_INFO @@@ -892,17 -865,17 +889,17 @@@ int rt6_route_rcv(struct net_device *de */
/* called with rcu_lock held */ -static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) +static struct net_device *ip6_rt_get_dev_rcu(const struct fib6_result *res) { - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = res->nh->fib_nh_dev;
- if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { + if (res->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { /* for copies of local routes, dst->dev needs to be the * device if it is a master device, the master device if * device is enslaved, and the loopback as the default */ if (netif_is_l3_slave(dev) && - !rt6_need_strict(&rt->fib6_dst.addr)) + !rt6_need_strict(&res->f6i->fib6_dst.addr)) dev = l3mdev_master_dev_rcu(dev); else if (!netif_is_l3_master(dev)) dev = dev_net(dev)->loopback_dev; @@@ -948,11 -921,11 +945,11 @@@ static unsigned short fib6_info_dst_fla return flags; }
-static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort) +static void ip6_rt_init_dst_reject(struct rt6_info *rt, u8 fib6_type) { - rt->dst.error = ip6_rt_type_to_error(ort->fib6_type); + rt->dst.error = ip6_rt_type_to_error(fib6_type);
- switch (ort->fib6_type) { + switch (fib6_type) { case RTN_BLACKHOLE: rt->dst.output = dst_discard_out; rt->dst.input = dst_discard; @@@ -970,28 -943,26 +967,28 @@@ } }
-static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) +static void ip6_rt_init_dst(struct rt6_info *rt, const struct fib6_result *res) { - if (ort->fib6_flags & RTF_REJECT) { - ip6_rt_init_dst_reject(rt, ort); + struct fib6_info *f6i = res->f6i; + + if (res->fib6_flags & RTF_REJECT) { + ip6_rt_init_dst_reject(rt, res->fib6_type); return; }
rt->dst.error = 0; rt->dst.output = ip6_output;
- if (ort->fib6_type == RTN_LOCAL || ort->fib6_type == RTN_ANYCAST) { + if (res->fib6_type == RTN_LOCAL || res->fib6_type == RTN_ANYCAST) { rt->dst.input = ip6_input; - } else if (ipv6_addr_type(&ort->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { + } else if (ipv6_addr_type(&f6i->fib6_dst.addr) & IPV6_ADDR_MULTICAST) { rt->dst.input = ip6_mc_input; } else { rt->dst.input = ip6_forward; }
- if (ort->fib6_nh.nh_lwtstate) { - rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); + if (res->nh->fib_nh_lws) { + rt->dst.lwtstate = lwtstate_get(res->nh->fib_nh_lws); lwtunnel_set_redirect(&rt->dst); }
@@@ -1006,25 -977,20 +1003,25 @@@ static void rt6_set_from(struct rt6_inf ip_dst_init_metrics(&rt->dst, from->fib6_metrics); }
-/* Caller must already hold reference to @ort */ -static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) +/* Caller must already hold reference to f6i in result */ +static void ip6_rt_copy_init(struct rt6_info *rt, const struct fib6_result *res) { - struct net_device *dev = fib6_info_nh_dev(ort); + const struct fib6_nh *nh = res->nh; + const struct net_device *dev = nh->fib_nh_dev; + struct fib6_info *f6i = res->f6i;
- ip6_rt_init_dst(rt, ort); + ip6_rt_init_dst(rt, res);
- rt->rt6i_dst = ort->fib6_dst; + rt->rt6i_dst = f6i->fib6_dst; rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; - rt->rt6i_gateway = ort->fib6_nh.nh_gw; - rt->rt6i_flags = ort->fib6_flags; - rt6_set_from(rt, ort); + rt->rt6i_flags = res->fib6_flags; + if (nh->fib_nh_gw_family) { + rt->rt6i_gateway = nh->fib_nh_gw6; + rt->rt6i_flags |= RTF_GATEWAY; + } + rt6_set_from(rt, f6i); #ifdef CONFIG_IPV6_SUBTREES - rt->rt6i_src = ort->fib6_src; + rt->rt6i_src = f6i->fib6_src; #endif }
@@@ -1046,13 -1012,14 +1043,13 @@@ static struct fib6_node* fib6_backtrack } }
-static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, - bool null_fallback) +static bool ip6_hold_safe(struct net *net, struct rt6_info **prt) { struct rt6_info *rt = *prt;
if (dst_hold_safe(&rt->dst)) return true; - if (null_fallback) { + if (net) { rt = net->ipv6.ip6_null_entry; dst_hold(&rt->dst); } else { @@@ -1063,24 -1030,22 +1060,24 @@@ }
/* called with rcu_lock held */ -static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) +static struct rt6_info *ip6_create_rt_rcu(const struct fib6_result *res) { - unsigned short flags = fib6_info_dst_flags(rt); - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = res->nh->fib_nh_dev; + struct fib6_info *f6i = res->f6i; + unsigned short flags; struct rt6_info *nrt;
- if (!fib6_info_hold_safe(rt)) + if (!fib6_info_hold_safe(f6i)) goto fallback;
+ flags = fib6_info_dst_flags(f6i); nrt = ip6_dst_alloc(dev_net(dev), dev, flags); if (!nrt) { - fib6_info_release(rt); + fib6_info_release(f6i); goto fallback; }
- ip6_rt_copy_init(nrt, rt); + ip6_rt_copy_init(nrt, res); return nrt;
fallback: @@@ -1095,7 -1060,7 +1092,7 @@@ static struct rt6_info *ip6_pol_route_l const struct sk_buff *skb, int flags) { - struct fib6_info *f6i; + struct fib6_result res = {}; struct fib6_node *fn; struct rt6_info *rt;
@@@ -1105,38 -1070,37 +1102,38 @@@ rcu_read_lock(); fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - f6i = rcu_dereference(fn->leaf); - if (!f6i) { - f6i = net->ipv6.fib6_null_entry; - } else { - f6i = rt6_device_match(net, f6i, &fl6->saddr, - fl6->flowi6_oif, flags); - if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0) - f6i = fib6_multipath_select(net, f6i, fl6, - fl6->flowi6_oif, skb, - flags); - } - if (f6i == net->ipv6.fib6_null_entry) { + res.f6i = rcu_dereference(fn->leaf); + if (!res.f6i) + res.f6i = net->ipv6.fib6_null_entry; + else + rt6_device_match(net, &res, &fl6->saddr, fl6->flowi6_oif, + flags); + + if (res.f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; + + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + goto out; }
- trace_fib6_table_lookup(net, f6i, table, fl6); + fib6_select_path(net, &res, fl6, fl6->flowi6_oif, + fl6->flowi6_oif != 0, skb, flags);
/* Search through exception table */ - rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); if (rt) { - if (ip6_hold_safe(net, &rt, true)) + if (ip6_hold_safe(net, &rt)) dst_use_noref(&rt->dst, jiffies); - } else if (f6i == net->ipv6.fib6_null_entry) { - rt = net->ipv6.ip6_null_entry; - dst_hold(&rt->dst); } else { - rt = ip6_create_rt_rcu(f6i); + rt = ip6_create_rt_rcu(&res); }
+out: + trace_fib6_table_lookup(net, &res, table, fl6); + rcu_read_unlock();
return rt; @@@ -1202,11 -1166,10 +1199,11 @@@ int ip6_ins_rt(struct net *net, struct return __ip6_ins_rt(rt, &info, NULL); }
-static struct rt6_info *ip6_rt_cache_alloc(struct fib6_info *ort, +static struct rt6_info *ip6_rt_cache_alloc(const struct fib6_result *res, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct fib6_info *f6i = res->f6i; struct net_device *dev; struct rt6_info *rt;
@@@ -1214,25 -1177,25 +1211,25 @@@ * Clone the route. */
- if (!fib6_info_hold_safe(ort)) + if (!fib6_info_hold_safe(f6i)) return NULL;
- dev = ip6_rt_get_dev_rcu(ort); + dev = ip6_rt_get_dev_rcu(res); rt = ip6_dst_alloc(dev_net(dev), dev, 0); if (!rt) { - fib6_info_release(ort); + fib6_info_release(f6i); return NULL; }
- ip6_rt_copy_init(rt, ort); + ip6_rt_copy_init(rt, res); rt->rt6i_flags |= RTF_CACHE; rt->dst.flags |= DST_HOST; rt->rt6i_dst.addr = *daddr; rt->rt6i_dst.plen = 128;
- if (!rt6_is_gw_or_nonexthop(ort)) { - if (ort->fib6_dst.plen != 128 && - ipv6_addr_equal(&ort->fib6_dst.addr, daddr)) + if (!rt6_is_gw_or_nonexthop(res)) { + if (f6i->fib6_dst.plen != 128 && + ipv6_addr_equal(&f6i->fib6_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { @@@ -1245,56 -1208,55 +1242,56 @@@ return rt; }
-static struct rt6_info *ip6_rt_pcpu_alloc(struct fib6_info *rt) +static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) { - unsigned short flags = fib6_info_dst_flags(rt); + struct fib6_info *f6i = res->f6i; + unsigned short flags = fib6_info_dst_flags(f6i); struct net_device *dev; struct rt6_info *pcpu_rt;
- if (!fib6_info_hold_safe(rt)) + if (!fib6_info_hold_safe(f6i)) return NULL;
rcu_read_lock(); - dev = ip6_rt_get_dev_rcu(rt); + dev = ip6_rt_get_dev_rcu(res); pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags); rcu_read_unlock(); if (!pcpu_rt) { - fib6_info_release(rt); + fib6_info_release(f6i); return NULL; } - ip6_rt_copy_init(pcpu_rt, rt); + ip6_rt_copy_init(pcpu_rt, res); pcpu_rt->rt6i_flags |= RTF_PCPU; return pcpu_rt; }
/* It should be called with rcu_read_lock() acquired */ -static struct rt6_info *rt6_get_pcpu_route(struct fib6_info *rt) +static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) { struct rt6_info *pcpu_rt, **p;
- p = this_cpu_ptr(rt->rt6i_pcpu); + p = this_cpu_ptr(res->f6i->rt6i_pcpu); pcpu_rt = *p;
if (pcpu_rt) - ip6_hold_safe(NULL, &pcpu_rt, false); + ip6_hold_safe(NULL, &pcpu_rt);
return pcpu_rt; }
static struct rt6_info *rt6_make_pcpu_route(struct net *net, - struct fib6_info *rt) + const struct fib6_result *res) { struct rt6_info *pcpu_rt, *prev, **p;
- pcpu_rt = ip6_rt_pcpu_alloc(rt); + pcpu_rt = ip6_rt_pcpu_alloc(res); if (!pcpu_rt) { dst_hold(&net->ipv6.ip6_null_entry->dst); return net->ipv6.ip6_null_entry; }
dst_hold(&pcpu_rt->dst); - p = this_cpu_ptr(rt->rt6i_pcpu); + p = this_cpu_ptr(res->f6i->rt6i_pcpu); prev = cmpxchg(p, NULL, pcpu_rt); BUG_ON(prev);
@@@ -1323,9 -1285,7 +1320,7 @@@ static void rt6_remove_exception(struc /* purge completely the exception to allow releasing the held resources: * some [sk] cache may keep the dst around for unlimited time */ - from = rcu_dereference_protected(rt6_ex->rt6i->from, - lockdep_is_held(&rt6_exception_lock)); - rcu_assign_pointer(rt6_ex->rt6i->from, NULL); + from = xchg((__force struct fib6_info **)&rt6_ex->rt6i->from, NULL); fib6_info_release(from); dst_dev_put(&rt6_ex->rt6i->dst);
@@@ -1437,15 -1397,14 +1432,15 @@@ __rt6_find_exception_rcu(struct rt6_exc return NULL; }
-static unsigned int fib6_mtu(const struct fib6_info *rt) +static unsigned int fib6_mtu(const struct fib6_result *res) { + const struct fib6_nh *nh = res->nh; unsigned int mtu;
- if (rt->fib6_pmtu) { - mtu = rt->fib6_pmtu; + if (res->f6i->fib6_pmtu) { + mtu = res->f6i->fib6_pmtu; } else { - struct net_device *dev = fib6_info_nh_dev(rt); + struct net_device *dev = nh->fib_nh_dev; struct inet6_dev *idev;
rcu_read_lock(); @@@ -1456,27 -1415,26 +1451,27 @@@
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
- return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); + return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); }
static int rt6_insert_exception(struct rt6_info *nrt, - struct fib6_info *ort) + const struct fib6_result *res) { struct net *net = dev_net(nrt->dst.dev); struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; + struct fib6_info *f6i = res->f6i; int err = 0;
spin_lock_bh(&rt6_exception_lock);
- if (ort->exception_bucket_flushed) { + if (f6i->exception_bucket_flushed) { err = -EINVAL; goto out; }
- bucket = rcu_dereference_protected(ort->rt6i_exception_bucket, + bucket = rcu_dereference_protected(f6i->rt6i_exception_bucket, lockdep_is_held(&rt6_exception_lock)); if (!bucket) { bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), @@@ -1485,24 -1443,24 +1480,24 @@@ err = -ENOMEM; goto out; } - rcu_assign_pointer(ort->rt6i_exception_bucket, bucket); + rcu_assign_pointer(f6i->rt6i_exception_bucket, bucket); }
#ifdef CONFIG_IPV6_SUBTREES - /* rt6i_src.plen != 0 indicates ort is in subtree + /* fib6_src.plen != 0 indicates f6i is in subtree * and exception table is indexed by a hash of - * both rt6i_dst and rt6i_src. + * both fib6_dst and fib6_src. * Otherwise, the exception table is indexed by - * a hash of only rt6i_dst. + * a hash of only fib6_dst. */ - if (ort->fib6_src.plen) + if (f6i->fib6_src.plen) src_key = &nrt->rt6i_src.addr; #endif - /* rt6_mtu_change() might lower mtu on ort. + /* rt6_mtu_change() might lower mtu on f6i. * Only insert this exception route if its mtu - * is less than ort's mtu value. + * is less than f6i's mtu value. */ - if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(ort)) { + if (dst_metric_raw(&nrt->dst, RTAX_MTU) >= fib6_mtu(res)) { err = -EINVAL; goto out; } @@@ -1531,9 -1489,9 +1526,9 @@@ out
/* Update fn->fn_sernum to invalidate all cached dst */ if (!err) { - spin_lock_bh(&ort->fib6_table->tb6_lock); - fib6_update_sernum(net, ort); - spin_unlock_bh(&ort->fib6_table->tb6_lock); + spin_lock_bh(&f6i->fib6_table->tb6_lock); + fib6_update_sernum(net, f6i); + spin_unlock_bh(&f6i->fib6_table->tb6_lock); fib6_force_start_gc(net); }
@@@ -1570,33 -1528,33 +1565,33 @@@ out /* Find cached rt in the hash table inside passed in rt * Caller has to hold rcu_read_lock() */ -static struct rt6_info *rt6_find_cached_rt(struct fib6_info *rt, +static struct rt6_info *rt6_find_cached_rt(const struct fib6_result *res, struct in6_addr *daddr, struct in6_addr *saddr) { struct rt6_exception_bucket *bucket; struct in6_addr *src_key = NULL; struct rt6_exception *rt6_ex; - struct rt6_info *res = NULL; + struct rt6_info *ret = NULL;
- bucket = rcu_dereference(rt->rt6i_exception_bucket); + bucket = rcu_dereference(res->f6i->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES - /* rt6i_src.plen != 0 indicates rt is in subtree + /* fib6i_src.plen != 0 indicates f6i is in subtree * and exception table is indexed by a hash of - * both rt6i_dst and rt6i_src. + * both fib6_dst and fib6_src. * Otherwise, the exception table is indexed by - * a hash of only rt6i_dst. + * a hash of only fib6_dst. */ - if (rt->fib6_src.plen) + if (res->f6i->fib6_src.plen) src_key = saddr; #endif rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key);
if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) - res = rt6_ex->rt6i; + ret = rt6_ex->rt6i;
- return res; + return ret; }
/* Remove the passed in cached rt from the hash table that contains it */ @@@ -1844,10 -1802,11 +1839,10 @@@ void rt6_age_exceptions(struct fib6_inf }
/* must be called with rcu lock held */ -struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, int strict) +int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, + struct flowi6 *fl6, struct fib6_result *res, int strict) { struct fib6_node *fn, *saved_fn; - struct fib6_info *f6i;
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; @@@ -1856,8 -1815,8 +1851,8 @@@ oif = 0;
redo_rt6_select: - f6i = rt6_select(net, fn, oif, strict); - if (f6i == net->ipv6.fib6_null_entry) { + rt6_select(net, fn, oif, res, strict); + if (res->f6i == net->ipv6.fib6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto redo_rt6_select; @@@ -1869,16 -1828,16 +1864,16 @@@ } }
- trace_fib6_table_lookup(net, f6i, table, fl6); + trace_fib6_table_lookup(net, res, table, fl6);
- return f6i; + return 0; }
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, const struct sk_buff *skb, int flags) { - struct fib6_info *f6i; + struct fib6_result res = {}; struct rt6_info *rt; int strict = 0;
@@@ -1889,26 -1848,27 +1884,26 @@@
rcu_read_lock();
- f6i = fib6_table_lookup(net, table, oif, fl6, strict); - if (f6i->fib6_nsiblings) - f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict); - - if (f6i == net->ipv6.fib6_null_entry) { + fib6_table_lookup(net, table, oif, fl6, &res, strict); + if (res.f6i == net->ipv6.fib6_null_entry) { rt = net->ipv6.ip6_null_entry; rcu_read_unlock(); dst_hold(&rt->dst); return rt; }
+ fib6_select_path(net, &res, fl6, oif, false, skb, strict); + /*Search through exception table */ - rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); + rt = rt6_find_cached_rt(&res, &fl6->daddr, &fl6->saddr); if (rt) { - if (ip6_hold_safe(net, &rt, true)) + if (ip6_hold_safe(net, &rt)) dst_use_noref(&rt->dst, jiffies);
rcu_read_unlock(); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(f6i->fib6_flags & RTF_GATEWAY))) { + !res.nh->fib_nh_gw_family)) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different @@@ -1916,7 -1876,7 +1911,7 @@@ */ struct rt6_info *uncached_rt;
- uncached_rt = ip6_rt_cache_alloc(f6i, &fl6->daddr, NULL); + uncached_rt = ip6_rt_cache_alloc(&res, &fl6->daddr, NULL);
rcu_read_unlock();
@@@ -1938,10 -1898,10 +1933,10 @@@ struct rt6_info *pcpu_rt;
local_bh_disable(); - pcpu_rt = rt6_get_pcpu_route(f6i); + pcpu_rt = rt6_get_pcpu_route(&res);
if (!pcpu_rt) - pcpu_rt = rt6_make_pcpu_route(net, f6i); + pcpu_rt = rt6_make_pcpu_route(net, &res);
local_bh_enable(); rcu_read_unlock(); @@@ -2360,23 -2320,19 +2355,23 @@@ static void __ip6_rt_update_pmtu(struc if (rt6->rt6i_flags & RTF_CACHE) rt6_update_exception_stamp_rt(rt6); } else if (daddr) { - struct fib6_info *from; + struct fib6_result res = {}; struct rt6_info *nrt6;
rcu_read_lock(); - from = rcu_dereference(rt6->from); - if (!from) { + res.f6i = rcu_dereference(rt6->from); + if (!res.f6i) { rcu_read_unlock(); return; } - nrt6 = ip6_rt_cache_alloc(from, daddr, saddr); + res.nh = &res.f6i->fib6_nh; + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; + + nrt6 = ip6_rt_cache_alloc(&res, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); - if (rt6_insert_exception(nrt6, from)) + if (rt6_insert_exception(nrt6, &res)) dst_release_immediate(&nrt6->dst); } rcu_read_unlock(); @@@ -2449,36 -2405,6 +2444,36 @@@ void ip6_sk_dst_store_flow(struct sock NULL); }
+static bool ip6_redirect_nh_match(const struct fib6_result *res, + struct flowi6 *fl6, + const struct in6_addr *gw, + struct rt6_info **ret) +{ + const struct fib6_nh *nh = res->nh; + + if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family || + fl6->flowi6_oif != nh->fib_nh_dev->ifindex) + return false; + + /* rt_cache's gateway might be different from its 'parent' + * in the case of an ip redirect. + * So we keep searching in the exception table if the gateway + * is different. + */ + if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) { + struct rt6_info *rt_cache; + + rt_cache = rt6_find_cached_rt(res, &fl6->daddr, &fl6->saddr); + if (rt_cache && + ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) { + *ret = rt_cache; + return true; + } + return false; + } + return true; +} + /* Handle redirects */ struct ip6rd_flowi { struct flowi6 fl6; @@@ -2492,8 -2418,7 +2487,8 @@@ static struct rt6_info *__ip6_route_red int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *ret = NULL, *rt_cache; + struct rt6_info *ret = NULL; + struct fib6_result res = {}; struct fib6_info *rt; struct fib6_node *fn;
@@@ -2511,15 -2436,34 +2506,15 @@@ fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) - continue; + res.f6i = rt; + res.nh = &rt->fib6_nh; + if (fib6_check_expired(rt)) continue; if (rt->fib6_flags & RTF_REJECT) break; - if (!(rt->fib6_flags & RTF_GATEWAY)) - continue; - if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex) - continue; - /* rt_cache's gateway might be different from its 'parent' - * in the case of an ip redirect. - * So we keep searching in the exception table if the gateway - * is different. - */ - if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) { - rt_cache = rt6_find_cached_rt(rt, - &fl6->daddr, - &fl6->saddr); - if (rt_cache && - ipv6_addr_equal(&rdfl->gateway, - &rt_cache->rt6i_gateway)) { - ret = rt_cache; - break; - } - continue; - } - break; + if (ip6_redirect_nh_match(&res, fl6, &rdfl->gateway, &ret)) + goto out; }
if (!rt) @@@ -2535,20 -2479,15 +2530,20 @@@ goto restart; }
+ res.f6i = rt; + res.nh = &rt->fib6_nh; out: - if (ret) - ip6_hold_safe(net, &ret, true); - else - ret = ip6_create_rt_rcu(rt); + if (ret) { + ip6_hold_safe(net, &ret); + } else { + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; + ret = ip6_create_rt_rcu(&res); + }
rcu_read_unlock();
- trace_fib6_table_lookup(net, rt, table, fl6); + trace_fib6_table_lookup(net, &res, table, fl6); return ret; };
@@@ -2666,15 -2605,12 +2661,15 @@@ out * based on ip6_dst_mtu_forward and exception logic of * rt6_find_cached_rt; called with rcu_read_lock */ -u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, - struct in6_addr *saddr) +u32 ip6_mtu_from_fib6(const struct fib6_result *res, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct rt6_exception_bucket *bucket; + const struct fib6_nh *nh = res->nh; + struct fib6_info *f6i = res->f6i; + const struct in6_addr *src_key; struct rt6_exception *rt6_ex; - struct in6_addr *src_key; struct inet6_dev *idev; u32 mtu = 0;
@@@ -2696,7 -2632,7 +2691,7 @@@ mtu = dst_metric_raw(&rt6_ex->rt6i->dst, RTAX_MTU);
if (likely(!mtu)) { - struct net_device *dev = fib6_info_nh_dev(f6i); + struct net_device *dev = nh->fib_nh_dev;
mtu = IPV6_MIN_MTU; idev = __in6_dev_get(dev); @@@ -2706,7 -2642,7 +2701,7 @@@
mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); out: - return mtu - lwtunnel_headroom(fib6_info_nh_lwt(f6i), mtu); + return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); }
struct dst_entry *icmp6_dst_alloc(struct net_device *dev, @@@ -2962,143 -2898,17 +2957,143 @@@ out return err; }
+static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type) +{ + if ((flags & RTF_REJECT) || + (dev && (dev->flags & IFF_LOOPBACK) && + !(addr_type & IPV6_ADDR_LOOPBACK) && + !(flags & RTF_LOCAL))) + return true; + + return false; +} + +int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = NULL; + struct inet6_dev *idev = NULL; + int addr_type; + int err; + + fib6_nh->fib_nh_family = AF_INET6; + + err = -ENODEV; + if (cfg->fc_ifindex) { + dev = dev_get_by_index(net, cfg->fc_ifindex); + if (!dev) + goto out; + idev = in6_dev_get(dev); + if (!idev) + goto out; + } + + if (cfg->fc_flags & RTNH_F_ONLINK) { + if (!dev) { + NL_SET_ERR_MSG(extack, + "Nexthop device required for onlink"); + goto out; + } + + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + + fib6_nh->fib_nh_flags |= RTNH_F_ONLINK; + } + + fib6_nh->fib_nh_weight = 1; + + /* We cannot add true routes via loopback here, + * they would result in kernel looping; promote them to reject routes + */ + addr_type = ipv6_addr_type(&cfg->fc_dst); + if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) { + /* hold loopback dev/idev if we haven't done so. */ + if (dev != net->loopback_dev) { + if (dev) { + dev_put(dev); + in6_dev_put(idev); + } + dev = net->loopback_dev; + dev_hold(dev); + idev = in6_dev_get(dev); + if (!idev) { + err = -ENODEV; + goto out; + } + } + goto set_dev; + } + + if (cfg->fc_flags & RTF_GATEWAY) { + err = ip6_validate_gw(net, cfg, &dev, &idev, extack); + if (err) + goto out; + + fib6_nh->fib_nh_gw6 = cfg->fc_gateway; + fib6_nh->fib_nh_gw_family = AF_INET6; + } + + err = -ENODEV; + if (!dev) + goto out; + + if (idev->cnf.disable_ipv6) { + NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); + err = -EACCES; + goto out; + } + + if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + + if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) && + !netif_carrier_ok(dev)) + fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; + + err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap, + cfg->fc_encap_type, cfg, gfp_flags, extack); + if (err) + goto out; +set_dev: + fib6_nh->fib_nh_dev = dev; + fib6_nh->fib_nh_oif = dev->ifindex; + err = 0; +out: + if (idev) + in6_dev_put(idev); + + if (err) { + lwtstate_put(fib6_nh->fib_nh_lws); + fib6_nh->fib_nh_lws = NULL; + if (dev) + dev_put(dev); + } + + return err; +} + +void fib6_nh_release(struct fib6_nh *fib6_nh) +{ + fib_nh_common_release(&fib6_nh->nh_common); +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; struct fib6_info *rt = NULL; - struct net_device *dev = NULL; - struct inet6_dev *idev = NULL; struct fib6_table *table; - int addr_type; int err = -EINVAL; + int addr_type;
/* RTF_PCPU is an internal flag; can not be set by userspace */ if (cfg->fc_flags & RTF_PCPU) { @@@ -3132,6 -2942,33 +3127,6 @@@ goto out; } #endif - if (cfg->fc_ifindex) { - err = -ENODEV; - dev = dev_get_by_index(net, cfg->fc_ifindex); - if (!dev) - goto out; - idev = in6_dev_get(dev); - if (!idev) - goto out; - } - - if (cfg->fc_metric == 0) - cfg->fc_metric = IP6_RT_PRIO_USER; - - if (cfg->fc_flags & RTNH_F_ONLINK) { - if (!dev) { - NL_SET_ERR_MSG(extack, - "Nexthop device required for onlink"); - err = -ENODEV; - goto out; - } - - if (!(dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } - }
err = -ENOBUFS; if (cfg->fc_nlinfo.nlh && @@@ -3175,10 -3012,18 +3170,10 @@@ cfg->fc_protocol = RTPROT_BOOT; rt->fib6_protocol = cfg->fc_protocol;
- addr_type = ipv6_addr_type(&cfg->fc_dst); - - if (cfg->fc_encap) { - struct lwtunnel_state *lwtstate; - - err = lwtunnel_build_state(cfg->fc_encap_type, - cfg->fc_encap, AF_INET6, cfg, - &lwtstate, extack); - if (err) - goto out; - rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate); - } + rt->fib6_table = table; + rt->fib6_metric = cfg->fc_metric; + rt->fib6_type = cfg->fc_type; + rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY;
ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->fib6_dst.plen = cfg->fc_dst_len; @@@ -3189,20 -3034,62 +3184,20 @@@ ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->fib6_src.plen = cfg->fc_src_len; #endif - - rt->fib6_metric = cfg->fc_metric; - rt->fib6_nh.nh_weight = 1; - - rt->fib6_type = cfg->fc_type; + err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack); + if (err) + goto out;
/* We cannot add true routes via loopback here, - they would result in kernel looping; promote them to reject routes + * they would result in kernel looping; promote them to reject routes */ - if ((cfg->fc_flags & RTF_REJECT) || - (dev && (dev->flags & IFF_LOOPBACK) && - !(addr_type & IPV6_ADDR_LOOPBACK) && - !(cfg->fc_flags & RTF_LOCAL))) { - /* hold loopback dev/idev if we haven't done so. */ - if (dev != net->loopback_dev) { - if (dev) { - dev_put(dev); - in6_dev_put(idev); - } - dev = net->loopback_dev; - dev_hold(dev); - idev = in6_dev_get(dev); - if (!idev) { - err = -ENODEV; - goto out; - } - } - rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP; - goto install_route; - } - - if (cfg->fc_flags & RTF_GATEWAY) { - err = ip6_validate_gw(net, cfg, &dev, &idev, extack); - if (err) - goto out; - - rt->fib6_nh.nh_gw = cfg->fc_gateway; - } - - err = -ENODEV; - if (!dev) - goto out; - - if (idev->cnf.disable_ipv6) { - NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); - err = -EACCES; - goto out; - } - - if (!(dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } + addr_type = ipv6_addr_type(&cfg->fc_dst); + if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type)) + rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP;
if (!ipv6_addr_any(&cfg->fc_prefsrc)) { + struct net_device *dev = fib6_info_nh_dev(rt); + if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); err = -EINVAL; @@@ -3213,8 -3100,26 +3208,8 @@@ } else rt->fib6_prefsrc.plen = 0;
- rt->fib6_flags = cfg->fc_flags; - -install_route: - if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) && - !netif_carrier_ok(dev)) - rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; - rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); - rt->fib6_nh.nh_dev = dev; - rt->fib6_table = table; - - if (idev) - in6_dev_put(idev); - return rt; out: - if (dev) - dev_put(dev); - if (idev) - in6_dev_put(idev); - fib6_info_release(rt); return ERR_PTR(err); } @@@ -3355,16 -3260,10 +3350,16 @@@ static int ip6_route_del(struct fib6_co
if (fn) { for_each_fib6_node_rt_rcu(fn) { + struct fib6_nh *nh; + if (cfg->fc_flags & RTF_CACHE) { + struct fib6_result res = { + .f6i = rt, + }; int rc;
- rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, + rt_cache = rt6_find_cached_rt(&res, + &cfg->fc_dst, &cfg->fc_src); if (rt_cache) { rc = ip6_del_cached_rt(rt_cache, cfg); @@@ -3375,14 -3274,12 +3370,14 @@@ } continue; } + + nh = &rt->fib6_nh; if (cfg->fc_ifindex && - (!rt->fib6_nh.nh_dev || - rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex)) + (!nh->fib_nh_dev || + nh->fib_nh_dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw)) + !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6)) continue; if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) continue; @@@ -3408,10 -3305,10 +3403,10 @@@ static void rt6_do_redirect(struct dst_ { struct netevent_redirect netevent; struct rt6_info *rt, *nrt = NULL; + struct fib6_result res = {}; struct ndisc_options ndopts; struct inet6_dev *in6_dev; struct neighbour *neigh; - struct fib6_info *from; struct rd_msg *msg; int optlen, on_link; u8 *lladdr; @@@ -3494,17 -3391,11 +3489,14 @@@ NDISC_REDIRECT, &ndopts);
rcu_read_lock(); - from = rcu_dereference(rt->from); - if (!from) + res.f6i = rcu_dereference(rt->from); - /* This fib6_info_hold() is safe here because we hold reference to rt - * and rt already holds reference to fib6_info. - */ - fib6_info_hold(res.f6i); - rcu_read_unlock(); ++ if (!res.f6i) + goto out;
- nrt = ip6_rt_cache_alloc(from, &msg->dest, NULL); + res.nh = &res.f6i->fib6_nh; + res.fib6_flags = res.f6i->fib6_flags; + res.fib6_type = res.f6i->fib6_type; + nrt = ip6_rt_cache_alloc(&res, &msg->dest, NULL); if (!nrt) goto out;
@@@ -3514,11 -3405,8 +3506,8 @@@
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
- /* No need to remove rt from the exception table if rt is - * a cached route because rt6_insert_exception() will - * takes care of it - */ + /* rt6_insert_exception() will take care of duplicated exceptions */ - if (rt6_insert_exception(nrt, from)) { + if (rt6_insert_exception(nrt, &res)) { dst_release_immediate(&nrt->dst); goto out; } @@@ -3530,7 -3418,7 +3519,7 @@@ call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
out: - fib6_info_release(res.f6i); + rcu_read_unlock(); neigh_release(neigh); }
@@@ -3556,12 -3444,11 +3545,12 @@@ static struct fib6_info *rt6_get_route_ goto out;
for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_dev->ifindex != ifindex) + if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex) continue; - if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) + if (!(rt->fib6_flags & RTF_ROUTEINFO) || + !rt->fib6_nh.fib_nh_gw_family) continue; - if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) + if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr)) continue; if (!fib6_info_hold_safe(rt)) continue; @@@ -3619,11 -3506,9 +3608,11 @@@ struct fib6_info *rt6_get_dflt_router(s
rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (dev == rt->fib6_nh.nh_dev && + struct fib6_nh *nh = &rt->fib6_nh; + + if (dev == nh->fib_nh_dev && ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && - ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) + ipv6_addr_equal(&nh->fib_nh_gw6, addr)) break; } if (rt && !fib6_info_hold_safe(rt)) @@@ -3714,7 -3599,7 +3703,7 @@@ static void rtmsg_to_fib6_config(struc .fc_table = l3mdev_fib_table_by_index(net, rtmsg->rtmsg_ifindex) ? : RT6_TABLE_MAIN, .fc_ifindex = rtmsg->rtmsg_ifindex, - .fc_metric = rtmsg->rtmsg_metric, + .fc_metric = rtmsg->rtmsg_metric ? : IP6_RT_PRIO_USER, .fc_expires = rtmsg->rtmsg_info, .fc_dst_len = rtmsg->rtmsg_dst_len, .fc_src_len = rtmsg->rtmsg_src_len, @@@ -3772,23 -3657,34 +3761,34 @@@ int ipv6_route_ioctl(struct net *net, u
static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) { - int type; struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(dst->dev); + struct inet6_dev *idev; + int type; + + if (netif_is_l3_master(skb->dev) && + dst->dev == net->loopback_dev) + idev = __in6_dev_get_safely(dev_get_by_index_rcu(net, IP6CB(skb)->iif)); + else + idev = ip6_dst_idev(dst); + switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY) { - IP6_INC_STATS(dev_net(dst->dev), - __in6_dev_get_safely(skb->dev), - IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), - ipstats_mib_noroutes); + IP6_INC_STATS(net, idev, ipstats_mib_noroutes); break; } + + /* Start over by dropping the dst for l3mdev case */ + if (netif_is_l3_master(skb->dev)) + skb_dst_drop(skb); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); kfree_skb(skb); return 0; @@@ -3825,26 -3721,36 +3825,26 @@@ struct fib6_info *addrconf_f6i_alloc(st const struct in6_addr *addr, bool anycast, gfp_t gfp_flags) { - u32 tb_id; - struct net_device *dev = idev->dev; - struct fib6_info *f6i; - - f6i = fib6_info_alloc(gfp_flags); - if (!f6i) - return ERR_PTR(-ENOMEM); + struct fib6_config cfg = { + .fc_table = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL, + .fc_ifindex = idev->dev->ifindex, + .fc_flags = RTF_UP | RTF_ADDRCONF | RTF_NONEXTHOP, + .fc_dst = *addr, + .fc_dst_len = 128, + .fc_protocol = RTPROT_KERNEL, + .fc_nlinfo.nl_net = net, + .fc_ignore_dev_down = true, + };
- f6i->fib6_metrics = ip_fib_metrics_init(net, NULL, 0, NULL); - f6i->dst_nocount = true; - f6i->dst_host = true; - f6i->fib6_protocol = RTPROT_KERNEL; - f6i->fib6_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) { - f6i->fib6_type = RTN_ANYCAST; - f6i->fib6_flags |= RTF_ANYCAST; + cfg.fc_type = RTN_ANYCAST; + cfg.fc_flags |= RTF_ANYCAST; } else { - f6i->fib6_type = RTN_LOCAL; - f6i->fib6_flags |= RTF_LOCAL; + cfg.fc_type = RTN_LOCAL; + cfg.fc_flags |= RTF_LOCAL; }
- f6i->fib6_nh.nh_gw = *addr; - dev_hold(dev); - f6i->fib6_nh.nh_dev = dev; - f6i->fib6_dst.addr = *addr; - f6i->fib6_dst.plen = 128; - tb_id = l3mdev_fib_table(idev->dev) ? : RT6_TABLE_LOCAL; - f6i->fib6_table = fib6_get_table(net, tb_id); - - return f6i; + return ip6_route_info_create(&cfg, gfp_flags, NULL); }
/* remove deleted ip from prefsrc entries */ @@@ -3860,7 -3766,7 +3860,7 @@@ static int fib6_remove_prefsrc(struct f struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
- if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && + if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) && rt != net->ipv6.fib6_null_entry && ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); @@@ -3882,7 -3788,7 +3882,7 @@@ void rt6_remove_prefsrc(struct inet6_if fib6_clean_all(net, fib6_remove_prefsrc, &adni); }
-#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT)
/* Remove routers and update dst entries when gateway turn into host. */ static int fib6_clean_tohost(struct fib6_info *rt, void *arg) @@@ -3890,8 -3796,7 +3890,8 @@@ struct in6_addr *gateway = (struct in6_addr *)arg;
if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && - ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) { + rt->fib6_nh.fib_nh_gw_family && + ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) { return -1; }
@@@ -3912,7 -3817,7 +3912,7 @@@ void rt6_clean_tohost(struct net *net, struct arg_netdev_event { const struct net_device *dev; union { - unsigned int nh_flags; + unsigned char nh_flags; unsigned long event; }; }; @@@ -3939,9 -3844,9 +3939,9 @@@ static struct fib6_info *rt6_multipath_
static bool rt6_is_dead(const struct fib6_info *rt) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || - (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && - fib6_ignore_linkdown(rt))) + if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD || + (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && + ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev))) return true;
return false; @@@ -3953,11 -3858,11 +3953,11 @@@ static int rt6_multipath_total_weight(c int total = 0;
if (!rt6_is_dead(rt)) - total += rt->fib6_nh.nh_weight; + total += rt->fib6_nh.fib_nh_weight;
list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { if (!rt6_is_dead(iter)) - total += iter->fib6_nh.nh_weight; + total += iter->fib6_nh.fib_nh_weight; }
return total; @@@ -3968,11 -3873,11 +3968,11 @@@ static void rt6_upper_bound_set(struct int upper_bound = -1;
if (!rt6_is_dead(rt)) { - *weight += rt->fib6_nh.nh_weight; + *weight += rt->fib6_nh.fib_nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, total) - 1; } - atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound); + atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound); }
static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) @@@ -4015,9 -3920,8 +4015,9 @@@ static int fib6_ifup(struct fib6_info * const struct arg_netdev_event *arg = p_arg; struct net *net = dev_net(arg->dev);
- if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { - rt->fib6_nh.nh_flags &= ~arg->nh_flags; + if (rt != net->ipv6.fib6_null_entry && + rt->fib6_nh.fib_nh_dev == arg->dev) { + rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags; fib6_update_sernum_upto_root(net, rt); rt6_multipath_rebalance(rt); } @@@ -4025,7 -3929,7 +4025,7 @@@ return 0; }
-void rt6_sync_up(struct net_device *dev, unsigned int nh_flags) +void rt6_sync_up(struct net_device *dev, unsigned char nh_flags) { struct arg_netdev_event arg = { .dev = dev, @@@ -4045,10 -3949,10 +4045,10 @@@ static bool rt6_multipath_uses_dev(cons { struct fib6_info *iter;
- if (rt->fib6_nh.nh_dev == dev) + if (rt->fib6_nh.fib_nh_dev == dev) return true; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == dev) + if (iter->fib6_nh.fib_nh_dev == dev) return true;
return false; @@@ -4069,12 -3973,12 +4069,12 @@@ static unsigned int rt6_multipath_dead_ struct fib6_info *iter; unsigned int dead = 0;
- if (rt->fib6_nh.nh_dev == down_dev || - rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.fib_nh_dev == down_dev || + rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) dead++; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == down_dev || - iter->fib6_nh.nh_flags & RTNH_F_DEAD) + if (iter->fib6_nh.fib_nh_dev == down_dev || + iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD) dead++;
return dead; @@@ -4082,15 -3986,15 +4082,15 @@@
static void rt6_multipath_nh_flags_set(struct fib6_info *rt, const struct net_device *dev, - unsigned int nh_flags) + unsigned char nh_flags) { struct fib6_info *iter;
- if (rt->fib6_nh.nh_dev == dev) - rt->fib6_nh.nh_flags |= nh_flags; + if (rt->fib6_nh.fib_nh_dev == dev) + rt->fib6_nh.fib_nh_flags |= nh_flags; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == dev) - iter->fib6_nh.nh_flags |= nh_flags; + if (iter->fib6_nh.fib_nh_dev == dev) + iter->fib6_nh.fib_nh_flags |= nh_flags; }
/* called with write lock held for table with rt */ @@@ -4105,12 -4009,12 +4105,12 @@@ static int fib6_ifdown(struct fib6_inf
switch (arg->event) { case NETDEV_UNREGISTER: - return rt->fib6_nh.nh_dev == dev ? -1 : 0; + return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; case NETDEV_DOWN: if (rt->should_flush) return -1; if (!rt->fib6_nsiblings) - return rt->fib6_nh.nh_dev == dev ? -1 : 0; + return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; if (rt6_multipath_uses_dev(rt, dev)) { unsigned int count;
@@@ -4126,10 -4030,10 +4126,10 @@@ } return -2; case NETDEV_CHANGE: - if (rt->fib6_nh.nh_dev != dev || + if (rt->fib6_nh.fib_nh_dev != dev || rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; - rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN; rt6_multipath_rebalance(rt); break; } @@@ -4185,7 -4089,7 +4185,7 @@@ static int rt6_mtu_change_route(struct Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - if (rt->fib6_nh.nh_dev == arg->dev && + if (rt->fib6_nh.fib_nh_dev == arg->dev && !fib6_metric_locked(rt, RTAX_MTU)) { u32 mtu = rt->fib6_pmtu;
@@@ -4239,8 -4143,8 +4239,8 @@@ static int rtm_to_fib6_config(struct sk unsigned int pref; int err;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy, - extack); + err = nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); if (err < 0) goto errout;
@@@ -4476,7 -4380,7 +4476,7 @@@ static int ip6_route_multipath_add(stru goto cleanup; }
- rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; + rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1;
err = ip6_route_info_append(info->nl_net, &rt6_nh_list, rt, &r_cfg); @@@ -4626,9 -4530,6 +4626,9 @@@ static int inet6_rtm_newroute(struct sk if (err < 0) return err;
+ if (cfg.fc_metric == 0) + cfg.fc_metric = IP6_RT_PRIO_USER; + if (cfg.fc_mp) return ip6_route_multipath_add(&cfg, extack); else @@@ -4643,7 -4544,7 +4643,7 @@@ static size_t rt6_nlmsg_size(struct fib nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); + + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws);
nexthop_len *= rt->fib6_nsiblings; } @@@ -4661,10 -4562,77 +4661,10 @@@ + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate) + + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws) + nexthop_len; }
-static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt, - unsigned int *flags, bool skip_oif) -{ - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) - *flags |= RTNH_F_DEAD; - - if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) { - *flags |= RTNH_F_LINKDOWN; - - rcu_read_lock(); - if (fib6_ignore_linkdown(rt)) - *flags |= RTNH_F_DEAD; - rcu_read_unlock(); - } - - if (rt->fib6_flags & RTF_GATEWAY) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0) - goto nla_put_failure; - } - - *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK); - if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD) - *flags |= RTNH_F_OFFLOAD; - - /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && rt->fib6_nh.nh_dev && - nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex)) - goto nla_put_failure; - - if (rt->fib6_nh.nh_lwtstate && - lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -/* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt) -{ - const struct net_device *dev = rt->fib6_nh.nh_dev; - struct rtnexthop *rtnh; - unsigned int flags = 0; - - rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); - if (!rtnh) - goto nla_put_failure; - - rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1; - rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; - - if (rt6_nexthop_info(skb, rt, &flags, true) < 0) - goto nla_put_failure; - - rtnh->rtnh_flags = flags; - - /* length of rtnetlink header + attributes */ - rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *rt, struct dst_entry *dst, struct in6_addr *dest, struct in6_addr *src, @@@ -4777,30 -4745,23 +4777,30 @@@ struct fib6_info *sibling, *next_sibling; struct nlattr *mp;
- mp = nla_nest_start(skb, RTA_MULTIPATH); + mp = nla_nest_start_noflag(skb, RTA_MULTIPATH); if (!mp) goto nla_put_failure;
- if (rt6_add_nexthop(skb, rt) < 0) + if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common, + rt->fib6_nh.fib_nh_weight) < 0) goto nla_put_failure;
list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { - if (rt6_add_nexthop(skb, sibling) < 0) + if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common, + sibling->fib6_nh.fib_nh_weight) < 0) goto nla_put_failure; }
nla_nest_end(skb, mp); } else { - if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) + unsigned char nh_flags = 0; + + if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common, + &nh_flags, false) < 0) goto nla_put_failure; + + rtm->rtm_flags |= nh_flags; }
if (rt6_flags & RTF_EXPIRES) { @@@ -4826,7 -4787,7 +4826,7 @@@ nla_put_failure static bool fib6_info_uses_dev(const struct fib6_info *f6i, const struct net_device *dev) { - if (f6i->fib6_nh.nh_dev == dev) + if (f6i->fib6_nh.fib_nh_dev == dev) return true;
if (f6i->fib6_nsiblings) { @@@ -4834,7 -4795,7 +4834,7 @@@
list_for_each_entry_safe(sibling, next_sibling, &f6i->fib6_siblings, fib6_siblings) { - if (sibling->fib6_nh.nh_dev == dev) + if (sibling->fib6_nh.fib_nh_dev == dev) return true; } } @@@ -4886,8 -4847,8 +4886,8 @@@ static int inet6_rtm_valid_getroute_req }
if (!netlink_strict_get_check(skb)) - return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv6_policy, extack); + return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack);
rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) || @@@ -4903,8 -4864,8 +4903,8 @@@ return -EINVAL; }
- err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX, - rtm_ipv6_policy, extack); + err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, + rtm_ipv6_policy, extack); if (err) return err;
@@@ -5056,16 -5017,20 +5056,20 @@@ static int inet6_rtm_getroute(struct sk
rcu_read_lock(); from = rcu_dereference(rt->from); - - if (fibmatch) - err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0); - else - err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, - &fl6.saddr, iif, RTM_NEWROUTE, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - 0); + if (from) { + if (fibmatch) + err = rt6_fill_node(net, skb, from, NULL, NULL, NULL, + iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + else + err = rt6_fill_node(net, skb, from, dst, &fl6.daddr, + &fl6.saddr, iif, RTM_NEWROUTE, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); + } else { + err = -ENETUNREACH; + } rcu_read_unlock();
if (err < 0) { @@@ -5119,7 -5084,7 +5123,7 @@@ static int ip6_route_dev_notify(struct return NOTIFY_OK;
if (event == NETDEV_REGISTER) { - net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev; + net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev; net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@@ -5454,7 -5419,7 +5458,7 @@@ void __init ip6_route_init_special_entr /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ - init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev; + init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES diff --combined net/mac80211/iface.c index 94459b2b3d2a,02d2e6f11e93..410685d38c46 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@@ -1133,7 -1133,8 +1133,7 @@@ static void ieee80211_uninit(struct net
static u16 ieee80211_netdev_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb); } @@@ -1178,7 -1179,8 +1178,7 @@@ static const struct net_device_ops ieee
static u16 ieee80211_monitor_select_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev, - select_queue_fallback_t fallback) + struct net_device *sb_dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@@ -1225,7 -1227,6 +1225,7 @@@ static void ieee80211_if_setup(struct n static void ieee80211_if_setup_no_queue(struct net_device *dev) { ieee80211_if_setup(dev); + dev->features |= NETIF_F_LLTX; dev->priv_flags |= IFF_NO_QUEUE; }
@@@ -1763,13 -1764,13 +1763,13 @@@ int ieee80211_if_add(struct ieee80211_l txq_size += sizeof(struct txq_info) + local->hw.txq_data_size;
- if (local->ops->wake_tx_queue) + if (local->ops->wake_tx_queue) { if_setup = ieee80211_if_setup_no_queue; - else + } else { if_setup = ieee80211_if_setup; - - if (local->hw.queues >= IEEE80211_NUM_ACS) - txqs = IEEE80211_NUM_ACS; + if (local->hw.queues >= IEEE80211_NUM_ACS) + txqs = IEEE80211_NUM_ACS; + }
ndev = alloc_netdev_mqs(size + txq_size, name, name_assign_type, @@@ -1907,6 -1908,9 +1907,9 @@@ void ieee80211_if_remove(struct ieee802 list_del_rcu(&sdata->list); mutex_unlock(&sdata->local->iflist_mtx);
+ if (sdata->vif.txq) + ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + synchronize_rcu();
if (sdata->dev) { diff --combined net/netlink/genetlink.c index 72668759cd2b,cb69d35c8e6a..79cfa031dc7d --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@@ -362,8 -362,8 +362,8 @@@ int genl_register_family(struct genl_fa } else family->attrbuf = NULL;
- family->id = idr_alloc(&genl_fam_idr, family, - start, end + 1, GFP_KERNEL); + family->id = idr_alloc_cyclic(&genl_fam_idr, family, + start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; goto errout_free; @@@ -536,24 -536,6 +536,24 @@@ static int genl_family_rcv_msg(const st if (ops->dumpit == NULL) return -EOPNOTSUPP;
+ if (!(ops->validate & GENL_DONT_VALIDATE_DUMP)) { + unsigned int validate = NL_VALIDATE_STRICT; + int hdrlen = GENL_HDRLEN + family->hdrsize; + + if (ops->validate & GENL_DONT_VALIDATE_DUMP_STRICT) + validate = NL_VALIDATE_LIBERAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) + return -EINVAL; + + rc = __nla_validate(nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), + family->maxattr, family->policy, + validate, extack); + if (rc) + return rc; + } + if (!family->parallel_ops) { struct netlink_dump_control c = { .module = family->module, @@@ -595,13 -577,8 +595,13 @@@ attrbuf = family->attrbuf;
if (attrbuf) { - err = nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, - ops->policy, extack); + enum netlink_validation validate = NL_VALIDATE_STRICT; + + if (ops->validate & GENL_DONT_VALIDATE_STRICT) + validate = NL_VALIDATE_LIBERAL; + + err = __nlmsg_parse(nlh, hdrlen, attrbuf, family->maxattr, + family->policy, validate, extack); if (err < 0) goto out; } @@@ -688,7 -665,7 +688,7 @@@ static int ctrl_fill_info(const struct struct nlattr *nla_ops; int i;
- nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + nla_ops = nla_nest_start_noflag(skb, CTRL_ATTR_OPS); if (nla_ops == NULL) goto nla_put_failure;
@@@ -701,10 -678,10 +701,10 @@@ op_flags |= GENL_CMD_CAP_DUMP; if (ops->doit) op_flags |= GENL_CMD_CAP_DO; - if (ops->policy) + if (family->policy) op_flags |= GENL_CMD_CAP_HASPOL;
- nest = nla_nest_start(skb, i + 1); + nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure;
@@@ -722,7 -699,7 +722,7 @@@ struct nlattr *nla_grps; int i;
- nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS); if (nla_grps == NULL) goto nla_put_failure;
@@@ -732,7 -709,7 +732,7 @@@
grp = &family->mcgrps[i];
- nest = nla_nest_start(skb, i + 1); + nest = nla_nest_start_noflag(skb, i + 1); if (nest == NULL) goto nla_put_failure;
@@@ -772,11 -749,11 +772,11 @@@ static int ctrl_fill_mcgrp_info(const s nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, family->id)) goto nla_put_failure;
- nla_grps = nla_nest_start(skb, CTRL_ATTR_MCAST_GROUPS); + nla_grps = nla_nest_start_noflag(skb, CTRL_ATTR_MCAST_GROUPS); if (nla_grps == NULL) goto nla_put_failure;
- nest = nla_nest_start(skb, 1); + nest = nla_nest_start_noflag(skb, 1); if (nest == NULL) goto nla_put_failure;
@@@ -961,9 -938,9 +961,9 @@@ static int genl_ctrl_event(int event, c static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETFAMILY, + .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = ctrl_getfamily, .dumpit = ctrl_dumpfamily, - .policy = ctrl_policy, }, };
@@@ -981,7 -958,6 +981,7 @@@ static struct genl_family genl_ctrl __r .name = "nlctrl", .version = 0x2, .maxattr = CTRL_ATTR_MAX, + .policy = ctrl_policy, .netnsok = true, };
diff --combined net/packet/af_packet.c index 5c4a118d6f96,9b81813dd16a..90d4e3ce00e5 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@@ -275,22 -275,24 +275,22 @@@ static bool packet_use_direct_xmit(cons return po->xmit == packet_direct_xmit; }
-static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb, - struct net_device *sb_dev) -{ - return dev_pick_tx_cpu_id(dev, skb, sb_dev, NULL); -} - static u16 packet_pick_tx_queue(struct sk_buff *skb) { struct net_device *dev = skb->dev; const struct net_device_ops *ops = dev->netdev_ops; + int cpu = raw_smp_processor_id(); u16 queue_index;
+#ifdef CONFIG_XPS + skb->sender_cpu = cpu + 1; +#endif + skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); if (ops->ndo_select_queue) { - queue_index = ops->ndo_select_queue(dev, skb, NULL, - __packet_pick_tx_queue); + queue_index = ops->ndo_select_queue(dev, skb, NULL); queue_index = netdev_cap_txqueue(dev, queue_index); } else { - queue_index = __packet_pick_tx_queue(dev, skb, NULL); + queue_index = netdev_pick_tx(dev, skb, NULL); }
return queue_index; @@@ -2600,8 -2602,8 +2600,8 @@@ static int tpacket_snd(struct packet_so void *ph; DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); + unsigned char *addr = NULL; int tp_len, size_max; - unsigned char *addr; void *data; int len_sum = 0; int status = TP_STATUS_AVAILABLE; @@@ -2612,7 -2614,6 +2612,6 @@@ if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@@ -2622,10 -2623,13 +2621,13 @@@ sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_put; + if (po->sk.sk_socket->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_put; + addr = saddr->sll_addr; + } }
err = -ENXIO; @@@ -2797,7 -2801,7 +2799,7 @@@ static int packet_snd(struct socket *so struct sk_buff *skb; struct net_device *dev; __be16 proto; - unsigned char *addr; + unsigned char *addr = NULL; int err, reserve = 0; struct sockcm_cookie sockc; struct virtio_net_hdr vnet_hdr = { 0 }; @@@ -2814,7 -2818,6 +2816,6 @@@ if (likely(saddr == NULL)) { dev = packet_cached_dev_get(po); proto = po->num; - addr = NULL; } else { err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) @@@ -2822,10 -2825,13 +2823,13 @@@ if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) goto out; proto = saddr->sll_protocol; - addr = saddr->sll_halen ? saddr->sll_addr : NULL; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - if (addr && dev && saddr->sll_halen < dev->addr_len) - goto out_unlock; + if (sock->type == SOCK_DGRAM) { + if (dev && msg->msg_namelen < dev->addr_len + + offsetof(struct sockaddr_ll, sll_addr)) + goto out_unlock; + addr = saddr->sll_addr; + } }
err = -ENXIO; @@@ -3342,20 -3348,29 +3346,29 @@@ static int packet_recvmsg(struct socke sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) { + int copy_len; + /* If the address length field is there to be filled * in, we fill it in now. */ if (sock->type == SOCK_PACKET) { __sockaddr_check_size(sizeof(struct sockaddr_pkt)); msg->msg_namelen = sizeof(struct sockaddr_pkt); + copy_len = msg->msg_namelen; } else { struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + copy_len = msg->msg_namelen; + if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { + memset(msg->msg_name + + offsetof(struct sockaddr_ll, sll_addr), + 0, sizeof(sll->sll_addr)); + msg->msg_namelen = sizeof(struct sockaddr_ll); + } } - memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, - msg->msg_namelen); + memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); }
if (pkt_sk(sk)->auxdata) { @@@ -4075,6 -4090,11 +4088,6 @@@ static int packet_ioctl(struct socket * spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } - case SIOCGSTAMP: - return sock_get_timestamp(sk, (struct timeval __user *)arg); - case SIOCGSTAMPNS: - return sock_get_timestampns(sk, (struct timespec __user *)arg); - #ifdef CONFIG_INET case SIOCADDRT: case SIOCDELRT: @@@ -4450,7 -4470,6 +4463,7 @@@ static const struct proto_ops packet_op .getname = packet_getname_spkt, .poll = datagram_poll, .ioctl = packet_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = sock_no_setsockopt, @@@ -4472,7 -4491,6 +4485,7 @@@ static const struct proto_ops packet_op .getname = packet_getname, .poll = packet_poll, .ioctl = packet_ioctl, + .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, diff --combined net/sctp/sm_statefuns.c index 7dfc34b28f4f,713a669d2058..e3f4abe6134e --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@@ -3824,6 -3824,29 +3824,29 @@@ enum sctp_disposition sctp_sf_do_asconf return SCTP_DISPOSITION_CONSUME; }
+ static enum sctp_disposition sctp_send_next_asconf( + struct net *net, + const struct sctp_endpoint *ep, + struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_cmd_seq *commands) + { + struct sctp_chunk *asconf; + struct list_head *entry; + + if (list_empty(&asoc->addip_chunk_list)) + return SCTP_DISPOSITION_CONSUME; + + entry = asoc->addip_chunk_list.next; + asconf = list_entry(entry, struct sctp_chunk, list); + + list_del_init(entry); + sctp_chunk_hold(asconf); + asoc->addip_last_asconf = asconf; + + return sctp_sf_do_prm_asconf(net, ep, asoc, type, asconf, commands); + } + /* * ADDIP Section 4.3 General rules for address manipulation * When building TLV parameters for the ASCONF Chunk that will add or @@@ -3915,14 -3938,10 +3938,10 @@@ enum sctp_disposition sctp_sf_do_asconf SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
if (!sctp_process_asconf_ack((struct sctp_association *)asoc, - asconf_ack)) { - /* Successfully processed ASCONF_ACK. We can - * release the next asconf if we have one. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_SEND_NEXT_ASCONF, - SCTP_NULL()); - return SCTP_DISPOSITION_CONSUME; - } + asconf_ack)) + return sctp_send_next_asconf(net, ep, + (struct sctp_association *)asoc, + type, commands);
abort = sctp_make_abort(asoc, asconf_ack, sizeof(struct sctp_errhdr)); @@@ -6412,15 -6431,13 +6431,15 @@@ static int sctp_eat_data(const struct s * in sctp_ulpevent_make_rcvmsg will drop the frame if we grow our * memory usage too much */ - if (*sk->sk_prot_creator->memory_pressure) { + if (sk_under_memory_pressure(sk)) { if (sctp_tsnmap_has_gap(map) && (sctp_tsnmap_get_ctsn(map) + 1) == tsn) { pr_debug("%s: under pressure, reneging for tsn:%u\n", __func__, tsn); deliver = SCTP_CMD_RENEGE; - } + } else { + sk_mem_reclaim(sk); + } }
/* diff --combined net/tls/tls_device.c index 26f26e71ef3f,14dedb24fa7b..e225c81e6b35 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@@ -89,6 -89,22 +89,6 @@@ static void tls_device_gc_task(struct w } }
-static void tls_device_attach(struct tls_context *ctx, struct sock *sk, - struct net_device *netdev) -{ - if (sk->sk_destruct != tls_device_sk_destruct) { - refcount_set(&ctx->refcount, 1); - dev_hold(netdev); - ctx->netdev = netdev; - spin_lock_irq(&tls_device_lock); - list_add_tail(&ctx->list, &tls_device_list); - spin_unlock_irq(&tls_device_lock); - - ctx->sk_destruct = sk->sk_destruct; - sk->sk_destruct = tls_device_sk_destruct; - } -} - static void tls_device_queue_ctx_destruction(struct tls_context *ctx) { unsigned long flags; @@@ -183,7 -199,7 +183,7 @@@ static void tls_icsk_clean_acked(struc * socket and no in-flight SKBs associated with this * socket, so it is safe to free all the resources. */ -void tls_device_sk_destruct(struct sock *sk) +static void tls_device_sk_destruct(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); @@@ -201,6 -217,7 +201,6 @@@ if (refcount_dec_and_test(&tls_ctx->refcount)) tls_device_queue_ctx_destruction(tls_ctx); } -EXPORT_SYMBOL(tls_device_sk_destruct);
void tls_device_free_resources_tx(struct sock *sk) { @@@ -567,7 -584,7 +567,7 @@@ void handle_device_resync(struct sock *
rx_ctx = tls_offload_ctx_rx(tls_ctx); resync_req = atomic64_read(&rx_ctx->resync_req); - req_seq = ntohl(resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1); + req_seq = (resync_req >> 32) - ((u32)TLS_HEADER_SIZE - 1); is_req_pending = resync_req;
if (unlikely(is_req_pending) && req_seq == seq && @@@ -580,7 -597,7 +580,7 @@@ static int tls_device_reencrypt(struct sock *sk, struct sk_buff *skb) { struct strp_msg *rxm = strp_msg(skb); - int err = 0, offset = rxm->offset, copy, nsg; + int err = 0, offset = rxm->offset, copy, nsg, data_len, pos; struct sk_buff *skb_iter, *unused; struct scatterlist sg[1]; char *orig_buf, *buf; @@@ -611,25 -628,42 +611,42 @@@ else err = 0;
- copy = min_t(int, skb_pagelen(skb) - offset, - rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE;
- if (skb->decrypted) - skb_store_bits(skb, offset, buf, copy); + if (skb_pagelen(skb) > offset) { + copy = min_t(int, skb_pagelen(skb) - offset, data_len);
- offset += copy; - buf += copy; + if (skb->decrypted) + skb_store_bits(skb, offset, buf, copy);
+ offset += copy; + buf += copy; + } + + pos = skb_pagelen(skb); skb_walk_frags(skb, skb_iter) { - copy = min_t(int, skb_iter->len, - rxm->full_len - offset + rxm->offset - - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + int frag_pos; + + /* Practically all frags must belong to msg if reencrypt + * is needed with current strparser and coalescing logic, + * but strparser may "get optimized", so let's be safe. + */ + if (pos + skb_iter->len <= offset) + goto done_with_frag; + if (pos >= data_len + rxm->offset) + break; + + frag_pos = offset - pos; + copy = min_t(int, skb_iter->len - frag_pos, + data_len + rxm->offset - offset);
if (skb_iter->decrypted) - skb_store_bits(skb_iter, offset, buf, copy); + skb_store_bits(skb_iter, frag_pos, buf, copy);
offset += copy; buf += copy; + done_with_frag: + pos += skb_iter->len; }
free_buf: @@@ -665,22 -699,6 +682,22 @@@ int tls_device_decrypted(struct sock *s tls_device_reencrypt(sk, skb); }
+static void tls_device_attach(struct tls_context *ctx, struct sock *sk, + struct net_device *netdev) +{ + if (sk->sk_destruct != tls_device_sk_destruct) { + refcount_set(&ctx->refcount, 1); + dev_hold(netdev); + ctx->netdev = netdev; + spin_lock_irq(&tls_device_lock); + list_add_tail(&ctx->list, &tls_device_list); + spin_unlock_irq(&tls_device_lock); + + ctx->sk_destruct = sk->sk_destruct; + sk->sk_destruct = tls_device_sk_destruct; + } +} + int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) { u16 nonce_size, tag_size, iv_size, rec_seq_size; @@@ -864,6 -882,8 +881,6 @@@ int tls_set_device_offload_rx(struct so }
if (!(netdev->features & NETIF_F_HW_TLS_RX)) { - pr_err_ratelimited("%s: netdev %s with no TLS offload\n", - __func__, netdev->name); rc = -ENOTSUPP; goto release_netdev; } @@@ -891,8 -911,11 +908,8 @@@ rc = netdev->tlsdev_ops->tls_dev_add(netdev, sk, TLS_OFFLOAD_CTX_DIR_RX, &ctx->crypto_recv.info, tcp_sk(sk)->copied_seq); - if (rc) { - pr_err_ratelimited("%s: The netdev has refused to offload this socket\n", - __func__); + if (rc) goto free_sw_resources; - }
tls_device_attach(ctx, sk, netdev); goto release_netdev; diff --combined net/wireless/reg.c index 816425ffe05a,a6fd5ce199da..4831ad745f91 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@@ -427,10 -427,14 +427,10 @@@ static const struct ieee80211_regdomai reg_copy_regd(const struct ieee80211_regdomain *src_regd) { struct ieee80211_regdomain *regd; - int size_of_regd; unsigned int i;
- size_of_regd = - sizeof(struct ieee80211_regdomain) + - src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule); - - regd = kzalloc(size_of_regd, GFP_KERNEL); + regd = kzalloc(struct_size(regd, reg_rules, src_regd->n_reg_rules), + GFP_KERNEL); if (!regd) return ERR_PTR(-ENOMEM);
@@@ -944,10 -948,12 +944,10 @@@ static int regdb_query_country(const st unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; struct fwdb_collection *coll = (void *)((u8 *)db + ptr); struct ieee80211_regdomain *regdom; - unsigned int size_of_regd, i; - - size_of_regd = sizeof(struct ieee80211_regdomain) + - coll->n_rules * sizeof(struct ieee80211_reg_rule); + unsigned int i;
- regdom = kzalloc(size_of_regd, GFP_KERNEL); + regdom = kzalloc(struct_size(regdom, reg_rules, coll->n_rules), + GFP_KERNEL); if (!regdom) return -ENOMEM;
@@@ -1483,7 -1489,7 +1483,7 @@@ static struct ieee80211_regdomain regdom_intersect(const struct ieee80211_regdomain *rd1, const struct ieee80211_regdomain *rd2) { - int r, size_of_regd; + int r; unsigned int x, y; unsigned int num_rules = 0; const struct ieee80211_reg_rule *rule1, *rule2; @@@ -1514,7 -1520,10 +1514,7 @@@ if (!num_rules) return NULL;
- size_of_regd = sizeof(struct ieee80211_regdomain) + - num_rules * sizeof(struct ieee80211_reg_rule); - - rd = kzalloc(size_of_regd, GFP_KERNEL); + rd = kzalloc(struct_size(rd, reg_rules, num_rules), GFP_KERNEL); if (!rd) return NULL;
@@@ -3769,10 -3778,9 +3769,9 @@@ void wiphy_regulatory_register(struct w /* * The last request may have been received before this * registration call. Call the driver notifier if - * initiator is USER and user type is CELL_BASE. + * initiator is USER. */ - if (lr->initiator == NL80211_REGDOM_SET_BY_USER && - lr->user_reg_hint_type == NL80211_USER_REG_HINT_CELL_BASE) + if (lr->initiator == NL80211_REGDOM_SET_BY_USER) reg_call_notifier(wiphy, lr); }
diff --combined net/xfrm/xfrm_interface.c index b9f118530db6,85fec98676d3..ad3a2555c517 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@@ -70,17 -70,28 +70,28 @@@ static struct xfrm_if *xfrmi_lookup(str return NULL; }
- static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb) + static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, + unsigned short family) { struct xfrmi_net *xfrmn; - int ifindex; struct xfrm_if *xi; + int ifindex = 0;
if (!secpath_exists(skb) || !skb->dev) return NULL;
+ switch (family) { + case AF_INET6: + ifindex = inet6_sdif(skb); + break; + case AF_INET: + ifindex = inet_sdif(skb); + break; + } + if (!ifindex) + ifindex = skb->dev->ifindex; + xfrmn = net_generic(xs_net(xfrm_input_state(skb)), xfrmi_net_id); - ifindex = skb->dev->ifindex;
for_each_xfrmi_rcu(xfrmn->xfrmi[0], xi) { if (ifindex == xi->dev->ifindex && @@@ -244,8 -255,8 +255,8 @@@ static void xfrmi_scrub_packet(struct s
static int xfrmi_rcv_cb(struct sk_buff *skb, int err) { + const struct xfrm_mode *inner_mode; struct pcpu_sw_netstats *tstats; - struct xfrm_mode *inner_mode; struct net_device *dev; struct xfrm_state *x; struct xfrm_if *xi; @@@ -273,7 -284,7 +284,7 @@@ xnet = !net_eq(xi->net, dev_net(skb->dev));
if (xnet) { - inner_mode = x->inner_mode; + inner_mode = &x->inner_mode;
if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); @@@ -285,7 -296,7 +296,7 @@@ }
if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, - inner_mode->afinfo->family)) + inner_mode->family)) return -EPERM; }
diff --combined net/xfrm/xfrm_policy.c index 03b6bf85d70b,a6b58df7a70f..410233c5681e --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@@ -27,14 -27,10 +27,14 @@@ #include <linux/cpu.h> #include <linux/audit.h> #include <linux/rhashtable.h> +#include <linux/if_tunnel.h> #include <net/dst.h> #include <net/flow.h> #include <net/xfrm.h> #include <net/ip.h> +#if IS_ENABLED(CONFIG_IPV6_MIP6) +#include <net/mip6.h> +#endif #ifdef CONFIG_XFRM_STATISTICS #include <net/snmp.h> #endif @@@ -2454,10 -2450,18 +2454,10 @@@ xfrm_tmpl_resolve(struct xfrm_policy **
static int xfrm_get_tos(const struct flowi *fl, int family) { - const struct xfrm_policy_afinfo *afinfo; - int tos; + if (family == AF_INET) + return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos;
- afinfo = xfrm_policy_get_afinfo(family); - if (!afinfo) - return 0; - - tos = afinfo->get_tos(fl); - - rcu_read_unlock(); - - return tos; + return 0; }
static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) @@@ -2495,14 -2499,21 +2495,14 @@@ return xdst; }
-static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, - int nfheader_len) +static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, + int nfheader_len) { - const struct xfrm_policy_afinfo *afinfo = - xfrm_policy_get_afinfo(dst->ops->family); - int err; - - if (!afinfo) - return -EINVAL; - - err = afinfo->init_path(path, dst, nfheader_len); - - rcu_read_unlock(); - - return err; + if (dst->ops->family == AF_INET6) { + struct rt6_info *rt = (struct rt6_info *)dst; + path->path_cookie = rt6_get_cookie(rt); + path->u.rt6.rt6i_nfheader_len = nfheader_len; + } }
static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, @@@ -2534,11 -2545,10 +2534,11 @@@ static struct dst_entry *xfrm_bundle_cr const struct flowi *fl, struct dst_entry *dst) { + const struct xfrm_state_afinfo *afinfo; + const struct xfrm_mode *inner_mode; struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; - struct xfrm_mode *inner_mode; struct xfrm_dst *xdst_prev = NULL; struct xfrm_dst *xdst0 = NULL; int i = 0; @@@ -2584,7 -2594,7 +2584,7 @@@ goto put_states; } } else - inner_mode = xfrm[i]->inner_mode; + inner_mode = &xfrm[i]->inner_mode;
xdst->route = dst; dst_copy_metrics(dst1, dst); @@@ -2612,14 -2622,7 +2612,14 @@@ dst1->lastuse = now;
dst1->input = dst_discard; - dst1->output = inner_mode->afinfo->output; + + rcu_read_lock(); + afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); + if (likely(afinfo)) + dst1->output = afinfo->output; + else + dst1->output = dst_discard_out; + rcu_read_unlock();
xdst_prev = xdst;
@@@ -3260,229 -3263,20 +3260,229 @@@ xfrm_policy_ok(const struct xfrm_tmpl * return start; }
+static void +decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) +{ + const struct iphdr *iph = ip_hdr(skb); + u8 *xprth = skb_network_header(skb) + iph->ihl * 4; + struct flowi4 *fl4 = &fl->u.ip4; + int oif = 0; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; + + memset(fl4, 0, sizeof(struct flowi4)); + fl4->flowi4_mark = skb->mark; + fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + + if (!ip_is_fragment(iph)) { + switch (iph->protocol) { + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be16 *ports; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ports = (__be16 *)xprth; + + fl4->fl4_sport = ports[!!reverse]; + fl4->fl4_dport = ports[!reverse]; + } + break; + case IPPROTO_ICMP: + if (xprth + 2 < skb->data || + pskb_may_pull(skb, xprth + 2 - skb->data)) { + u8 *icmp; + + xprth = skb_network_header(skb) + iph->ihl * 4; + icmp = xprth; + + fl4->fl4_icmp_type = icmp[0]; + fl4->fl4_icmp_code = icmp[1]; + } + break; + case IPPROTO_ESP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be32 *ehdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ehdr = (__be32 *)xprth; + + fl4->fl4_ipsec_spi = ehdr[0]; + } + break; + case IPPROTO_AH: + if (xprth + 8 < skb->data || + pskb_may_pull(skb, xprth + 8 - skb->data)) { + __be32 *ah_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ah_hdr = (__be32 *)xprth; + + fl4->fl4_ipsec_spi = ah_hdr[1]; + } + break; + case IPPROTO_COMP: + if (xprth + 4 < skb->data || + pskb_may_pull(skb, xprth + 4 - skb->data)) { + __be16 *ipcomp_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + ipcomp_hdr = (__be16 *)xprth; + + fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); + } + break; + case IPPROTO_GRE: + if (xprth + 12 < skb->data || + pskb_may_pull(skb, xprth + 12 - skb->data)) { + __be16 *greflags; + __be32 *gre_hdr; + + xprth = skb_network_header(skb) + iph->ihl * 4; + greflags = (__be16 *)xprth; + gre_hdr = (__be32 *)xprth; + + if (greflags[0] & GRE_KEY) { + if (greflags[0] & GRE_CSUM) + gre_hdr++; + fl4->fl4_gre_key = gre_hdr[1]; + } + } + break; + default: + fl4->fl4_ipsec_spi = 0; + break; + } + } + fl4->flowi4_proto = iph->protocol; + fl4->daddr = reverse ? iph->saddr : iph->daddr; + fl4->saddr = reverse ? iph->daddr : iph->saddr; + fl4->flowi4_tos = iph->tos; +} + +#if IS_ENABLED(CONFIG_IPV6) +static void +decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) +{ + struct flowi6 *fl6 = &fl->u.ip6; + int onlyproto = 0; + const struct ipv6hdr *hdr = ipv6_hdr(skb); + u32 offset = sizeof(*hdr); + struct ipv6_opt_hdr *exthdr; + const unsigned char *nh = skb_network_header(skb); + u16 nhoff = IP6CB(skb)->nhoff; + int oif = 0; + u8 nexthdr; + + if (!nhoff) + nhoff = offsetof(struct ipv6hdr, nexthdr); + + nexthdr = nh[nhoff]; + + if (skb_dst(skb)) + oif = skb_dst(skb)->dev->ifindex; + + memset(fl6, 0, sizeof(struct flowi6)); + fl6->flowi6_mark = skb->mark; + fl6->flowi6_oif = reverse ? skb->skb_iif : oif; + + fl6->daddr = reverse ? hdr->saddr : hdr->daddr; + fl6->saddr = reverse ? hdr->daddr : hdr->saddr; + + while (nh + offset + sizeof(*exthdr) < skb->data || + pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) { + nh = skb_network_header(skb); + exthdr = (struct ipv6_opt_hdr *)(nh + offset); + + switch (nexthdr) { + case NEXTHDR_FRAGMENT: + onlyproto = 1; + /* fall through */ + case NEXTHDR_ROUTING: + case NEXTHDR_HOP: + case NEXTHDR_DEST: + offset += ipv6_optlen(exthdr); + nexthdr = exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr *)(nh + offset); + break; + case IPPROTO_UDP: + case IPPROTO_UDPLITE: + case IPPROTO_TCP: + case IPPROTO_SCTP: + case IPPROTO_DCCP: + if (!onlyproto && (nh + offset + 4 < skb->data || + pskb_may_pull(skb, nh + offset + 4 - skb->data))) { + __be16 *ports; + + nh = skb_network_header(skb); + ports = (__be16 *)(nh + offset); + fl6->fl6_sport = ports[!!reverse]; + fl6->fl6_dport = ports[!reverse]; + } + fl6->flowi6_proto = nexthdr; + return; + case IPPROTO_ICMPV6: + if (!onlyproto && (nh + offset + 2 < skb->data || + pskb_may_pull(skb, nh + offset + 2 - skb->data))) { + u8 *icmp; + + nh = skb_network_header(skb); + icmp = (u8 *)(nh + offset); + fl6->fl6_icmp_type = icmp[0]; + fl6->fl6_icmp_code = icmp[1]; + } + fl6->flowi6_proto = nexthdr; + return; +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPPROTO_MH: + offset += ipv6_optlen(exthdr); + if (!onlyproto && (nh + offset + 3 < skb->data || + pskb_may_pull(skb, nh + offset + 3 - skb->data))) { + struct ip6_mh *mh; + + nh = skb_network_header(skb); + mh = (struct ip6_mh *)(nh + offset); + fl6->fl6_mh_type = mh->ip6mh_type; + } + fl6->flowi6_proto = nexthdr; + return; +#endif + /* XXX Why are there these headers? */ + case IPPROTO_AH: + case IPPROTO_ESP: + case IPPROTO_COMP: + default: + fl6->fl6_ipsec_spi = 0; + fl6->flowi6_proto = nexthdr; + return; + } + } +} +#endif + int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { - const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int err; - - if (unlikely(afinfo == NULL)) + switch (family) { + case AF_INET: + decode_session4(skb, fl, reverse); + break; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + decode_session6(skb, fl, reverse); + break; +#endif + default: return -EAFNOSUPPORT; + }
- afinfo->decode_session(skb, fl, reverse); - - err = security_xfrm_decode_session(skb, &fl->flowi_secid); - rcu_read_unlock(); - return err; + return security_xfrm_decode_session(skb, &fl->flowi_secid); } EXPORT_SYMBOL(__xfrm_decode_session);
@@@ -3519,7 -3313,7 +3519,7 @@@ int __xfrm_policy_check(struct sock *sk ifcb = xfrm_if_get_cb();
if (ifcb) { - xi = ifcb->decode_session(skb); + xi = ifcb->decode_session(skb, family); if (xi) { if_id = xi->p.if_id; net = xi->net; diff --combined net/xfrm/xfrm_state.c index ed25eb81aabe,178baaa037e5..3edbf4b26116 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@@ -173,7 -173,7 +173,7 @@@ static DEFINE_SPINLOCK(xfrm_state_gc_lo int __xfrm_state_delete(struct xfrm_state *x);
int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -bool km_is_alive(const struct km_event *c); +static bool km_is_alive(const struct km_event *c); void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
static DEFINE_SPINLOCK(xfrm_type_lock); @@@ -330,67 -330,100 +330,67 @@@ static void xfrm_put_type_offload(cons module_put(type->owner); }
-static DEFINE_SPINLOCK(xfrm_mode_lock); -int xfrm_register_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -EEXIST; - modemap = afinfo->mode_map; - spin_lock_bh(&xfrm_mode_lock); - if (modemap[mode->encap]) - goto out; - - err = -ENOENT; - if (!try_module_get(afinfo->owner)) - goto out; - - mode->afinfo = afinfo; - modemap[mode->encap] = mode; - err = 0; - -out: - spin_unlock_bh(&xfrm_mode_lock); - rcu_read_unlock(); - return err; -} -EXPORT_SYMBOL(xfrm_register_mode); - -int xfrm_unregister_mode(struct xfrm_mode *mode, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode **modemap; - int err; - - if (unlikely(mode->encap >= XFRM_MODE_MAX)) - return -EINVAL; - - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - - err = -ENOENT; - modemap = afinfo->mode_map; - spin_lock_bh(&xfrm_mode_lock); - if (likely(modemap[mode->encap] == mode)) { - modemap[mode->encap] = NULL; - module_put(mode->afinfo->owner); - err = 0; - } - - spin_unlock_bh(&xfrm_mode_lock); - rcu_read_unlock(); - return err; -} -EXPORT_SYMBOL(xfrm_unregister_mode); - -static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) -{ - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode *mode; - int modload_attempted = 0; +static const struct xfrm_mode xfrm4_mode_map[XFRM_MODE_MAX] = { + [XFRM_MODE_BEET] = { + .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, + [XFRM_MODE_TRANSPORT] = { + .encap = XFRM_MODE_TRANSPORT, + .family = AF_INET, + }, + [XFRM_MODE_TUNNEL] = { + .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET, + }, +}; + +static const struct xfrm_mode xfrm6_mode_map[XFRM_MODE_MAX] = { + [XFRM_MODE_BEET] = { + .encap = XFRM_MODE_BEET, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, + [XFRM_MODE_ROUTEOPTIMIZATION] = { + .encap = XFRM_MODE_ROUTEOPTIMIZATION, + .family = AF_INET6, + }, + [XFRM_MODE_TRANSPORT] = { + .encap = XFRM_MODE_TRANSPORT, + .family = AF_INET6, + }, + [XFRM_MODE_TUNNEL] = { + .encap = XFRM_MODE_TUNNEL, + .flags = XFRM_MODE_FLAG_TUNNEL, + .family = AF_INET6, + }, +}; + +static const struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family) +{ + const struct xfrm_mode *mode;
if (unlikely(encap >= XFRM_MODE_MAX)) return NULL;
-retry: - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return NULL; - - mode = READ_ONCE(afinfo->mode_map[encap]); - if (unlikely(mode && !try_module_get(mode->owner))) - mode = NULL; - - rcu_read_unlock(); - if (!mode && !modload_attempted) { - request_module("xfrm-mode-%d-%d", family, encap); - modload_attempted = 1; - goto retry; + switch (family) { + case AF_INET: + mode = &xfrm4_mode_map[encap]; + if (mode->family == family) + return mode; + break; + case AF_INET6: + mode = &xfrm6_mode_map[encap]; + if (mode->family == family) + return mode; + break; + default: + break; }
- return mode; -} - -static void xfrm_put_mode(struct xfrm_mode *mode) -{ - module_put(mode->owner); + return NULL; }
void xfrm_state_free(struct xfrm_state *x) @@@ -411,6 -444,12 +411,6 @@@ static void ___xfrm_state_destroy(struc kfree(x->coaddr); kfree(x->replay_esn); kfree(x->preplay_esn); - if (x->inner_mode) - xfrm_put_mode(x->inner_mode); - if (x->inner_mode_iaf) - xfrm_put_mode(x->inner_mode_iaf); - if (x->outer_mode) - xfrm_put_mode(x->outer_mode); if (x->type_offload) xfrm_put_type_offload(x->type_offload); if (x->type) { @@@ -551,6 -590,8 +551,6 @@@ struct xfrm_state *xfrm_state_alloc(str x->lft.hard_packet_limit = XFRM_INF; x->replay_maxage = 0; x->replay_maxdiff = 0; - x->inner_mode = NULL; - x->inner_mode_iaf = NULL; spin_lock_init(&x->lock); } return x; @@@ -2025,7 -2066,7 +2025,7 @@@ int km_report(struct net *net, u8 proto } EXPORT_SYMBOL(km_report);
-bool km_is_alive(const struct km_event *c) +static bool km_is_alive(const struct km_event *c) { struct xfrm_mgr *km; bool is_alive = false; @@@ -2041,6 -2082,7 +2041,6 @@@
return is_alive; } -EXPORT_SYMBOL(km_is_alive);
int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { @@@ -2153,7 -2195,6 +2153,7 @@@ struct xfrm_state_afinfo *xfrm_state_af
return rcu_dereference(xfrm_state_afinfo[family]); } +EXPORT_SYMBOL_GPL(xfrm_state_afinfo_get_rcu);
struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family) { @@@ -2201,9 -2242,8 +2201,9 @@@ int xfrm_state_mtu(struct xfrm_state *x
int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { - struct xfrm_state_afinfo *afinfo; - struct xfrm_mode *inner_mode; + const struct xfrm_state_afinfo *afinfo; + const struct xfrm_mode *inner_mode; + const struct xfrm_mode *outer_mode; int family = x->props.family; int err;
@@@ -2229,22 -2269,25 +2229,22 @@@ goto error;
if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - family != x->sel.family) { - xfrm_put_mode(inner_mode); + family != x->sel.family) goto error; - }
- x->inner_mode = inner_mode; + x->inner_mode = *inner_mode; } else { - struct xfrm_mode *inner_mode_iaf; + const struct xfrm_mode *inner_mode_iaf; int iafamily = AF_INET;
inner_mode = xfrm_get_mode(x->props.mode, x->props.family); if (inner_mode == NULL) goto error;
- if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { - xfrm_put_mode(inner_mode); + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) goto error; - } - x->inner_mode = inner_mode; + + x->inner_mode = *inner_mode;
if (x->props.family == AF_INET) iafamily = AF_INET6; @@@ -2252,7 -2295,9 +2252,7 @@@ inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily); if (inner_mode_iaf) { if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL) - x->inner_mode_iaf = inner_mode_iaf; - else - xfrm_put_mode(inner_mode_iaf); + x->inner_mode_iaf = *inner_mode_iaf; } }
@@@ -2266,13 -2311,12 +2266,13 @@@ if (err) goto error;
- x->outer_mode = xfrm_get_mode(x->props.mode, family); - if (x->outer_mode == NULL) { + outer_mode = xfrm_get_mode(x->props.mode, family); + if (!outer_mode) { err = -EPROTONOSUPPORT; goto error; }
+ x->outer_mode = *outer_mode; if (init_replay) { err = xfrm_init_replay(x); if (err) @@@ -2340,7 -2384,7 +2340,7 @@@ void xfrm_state_fini(struct net *net
flush_work(&net->xfrm.state_hash_work); flush_work(&xfrm_state_gc_work); - xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true); + xfrm_state_flush(net, 0, false, true);
WARN_ON(!list_empty(&net->xfrm.state_all));
diff --combined net/xfrm/xfrm_user.c index d7cb16f0df5b,6916931b1de1..eb8d14389601 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@@ -1006,8 -1006,8 +1006,8 @@@ static int xfrm_dump_sa(struct sk_buff u8 proto = 0; int err;
- err = nlmsg_parse(cb->nlh, 0, attrs, XFRMA_MAX, xfrma_policy, - cb->extack); + err = nlmsg_parse_deprecated(cb->nlh, 0, attrs, XFRMA_MAX, + xfrma_policy, cb->extack); if (err < 0) return err;
@@@ -1424,7 -1424,7 +1424,7 @@@ static int verify_newpolicy_info(struc ret = verify_policy_dir(p->dir); if (ret) return ret; - if (p->index && ((p->index & XFRM_POLICY_MAX) != p->dir)) + if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) return -EINVAL;
return 0; @@@ -1513,20 -1513,8 +1513,8 @@@ static int validate_tmpl(int nr, struc return -EINVAL; }
- switch (ut[i].id.proto) { - case IPPROTO_AH: - case IPPROTO_ESP: - case IPPROTO_COMP: - #if IS_ENABLED(CONFIG_IPV6) - case IPPROTO_ROUTING: - case IPPROTO_DSTOPTS: - #endif - case IPSEC_PROTO_ANY: - break; - default: + if (!xfrm_id_proto_valid(ut[i].id.proto)) return -EINVAL; - } - }
return 0; @@@ -2656,9 -2644,9 +2644,9 @@@ static int xfrm_user_rcv_msg(struct sk_ } }
- err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, - link->nla_max ? : XFRMA_MAX, - link->nla_pol ? : xfrma_policy, extack); + err = nlmsg_parse_deprecated(nlh, xfrm_msg_min[type], attrs, + link->nla_max ? : XFRMA_MAX, + link->nla_pol ? : xfrma_policy, extack); if (err < 0) return err;
diff --combined tools/bpf/bpftool/map.c index e951d45c0131,994a7e0d16fb..3ec82904ccec --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@@ -46,7 -46,6 +46,7 @@@ const char * const map_type_name[] = [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE] = "percpu_cgroup_storage", [BPF_MAP_TYPE_QUEUE] = "queue", [BPF_MAP_TYPE_STACK] = "stack", + [BPF_MAP_TYPE_SK_STORAGE] = "sk_storage", };
const size_t map_type_name_size = ARRAY_SIZE(map_type_name); @@@ -154,13 -153,11 +154,13 @@@ static int do_dump_btf(const struct btf /* start of key-value pair */ jsonw_start_object(d->jw);
- jsonw_name(d->jw, "key"); + if (map_info->btf_key_type_id) { + jsonw_name(d->jw, "key");
- ret = btf_dumper_type(d, map_info->btf_key_type_id, key); - if (ret) - goto err_end_obj; + ret = btf_dumper_type(d, map_info->btf_key_type_id, key); + if (ret) + goto err_end_obj; + }
if (!map_is_per_cpu(map_info->type)) { jsonw_name(d->jw, "value"); @@@ -262,20 -259,20 +262,20 @@@ static void print_entry_json(struct bpf }
static void print_entry_error(struct bpf_map_info *info, unsigned char *key, - const char *value) + const char *error_msg) { - int value_size = strlen(value); + int msg_size = strlen(error_msg); bool single_line, break_names;
- break_names = info->key_size > 16 || value_size > 16; - single_line = info->key_size + value_size <= 24 && !break_names; + break_names = info->key_size > 16 || msg_size > 16; + single_line = info->key_size + msg_size <= 24 && !break_names;
printf("key:%c", break_names ? '\n' : ' '); fprint_hex(stdout, key, info->key_size, " ");
printf(single_line ? " " : "\n");
- printf("value:%c%s", break_names ? '\n' : ' ', value); + printf("value:%c%s", break_names ? '\n' : ' ', error_msg);
printf("\n"); } @@@ -299,7 -296,11 +299,7 @@@ static void print_entry_plain(struct bp
if (info->value_size) { printf("value:%c", break_names ? '\n' : ' '); - if (value) - fprint_hex(stdout, value, info->value_size, - " "); - else - printf("<no entry>"); + fprint_hex(stdout, value, info->value_size, " "); }
printf("\n"); @@@ -318,8 -319,11 +318,8 @@@ for (i = 0; i < n; i++) { printf("value (CPU %02d):%c", i, info->value_size > 16 ? '\n' : ' '); - if (value) - fprint_hex(stdout, value + i * step, - info->value_size, " "); - else - printf("<no entry>"); + fprint_hex(stdout, value + i * step, + info->value_size, " "); printf("\n"); } } @@@ -532,9 -536,6 +532,9 @@@ static int show_map_close_json(int fd, } close(fd);
+ if (info->btf_id) + jsonw_int_field(json_wtr, "btf_id", info->btf_id); + if (!hash_empty(map_table.table)) { struct pinned_obj *obj;
@@@ -601,19 -602,15 +601,19 @@@ static int show_map_close_plain(int fd } close(fd);
- printf("\n"); if (!hash_empty(map_table.table)) { struct pinned_obj *obj;
hash_for_each_possible(map_table.table, obj, hash, info->id) { if (obj->id == info->id) - printf("\tpinned %s\n", obj->path); + printf("\n\tpinned %s", obj->path); } } + + if (info->btf_id) + printf("\n\tbtf_id %d", info->btf_id); + + printf("\n"); return 0; }
@@@ -723,16 -720,11 +723,16 @@@ static int dump_map_elem(int fd, void * jsonw_string_field(json_wtr, "error", strerror(lookup_errno)); jsonw_end_object(json_wtr); } else { - if (errno == ENOENT) - print_entry_plain(map_info, key, NULL); - else - print_entry_error(map_info, key, - strerror(lookup_errno)); + const char *msg = NULL; + + if (lookup_errno == ENOENT) + msg = "<no entry>"; + else if (lookup_errno == ENOSPC && + map_info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY) + msg = "<cannot read>"; + + print_entry_error(map_info, key, + msg ? : strerror(lookup_errno)); }
return 0; @@@ -786,10 -778,6 +786,10 @@@ static int do_dump(int argc, char **arg } }
+ if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && + info.value_size != 8) + p_info("Warning: cannot read values from %s map with value_size != 8", + map_type_name[info.type]); while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { @@@ -1163,6 -1151,9 +1163,9 @@@ static int do_create(int argc, char **a return -1; } NEXT_ARG(); + } else { + p_err("unknown arg %s", *argv); + return -1; } }
diff --combined tools/lib/bpf/.gitignore index 7d9e182a1f51,fecb78afea3f..d9e9dec04605 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@@ -1,4 -1,4 +1,5 @@@ libbpf_version.h +libbpf.pc FEATURE-DUMP.libbpf test_libbpf + libbpf.so.*