The following commit has been merged in the master branch: commit a655fe9f194842693258f43b5382855db1c2f654 Merge: 7499a288bf1a4a49be9d72beb0a5c7b9aa6ffec9 27b4ad621e887ce8e5eb508a0103f13d30f6b38a Author: David S. Miller davem@davemloft.net Date: Fri Feb 8 15:00:17 2019 -0800
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
An ipvlan bug fix in 'net' conflicted with the abstraction away of the IPV6 specific support in 'net-next'.
Similarly, a bug fix for mlx5 in 'net' conflicted with the flow action conversion in 'net-next'.
Signed-off-by: David S. Miller davem@davemloft.net
diff --combined MAINTAINERS index b4491132b9ce,825dc2b7453d..604bca2fc05d --- a/MAINTAINERS +++ b/MAINTAINERS @@@ -2848,6 -2848,9 +2848,9 @@@ F: include/uapi/linux/if_bonding. BPF (Safe dynamic programs and tools) M: Alexei Starovoitov ast@kernel.org M: Daniel Borkmann daniel@iogearbox.net + R: Martin KaFai Lau kafai@fb.com + R: Song Liu songliubraving@fb.com + R: Yonghong Song yhs@fb.com L: netdev@vger.kernel.org L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git @@@ -2873,6 -2876,8 +2876,8 @@@ F: samples/bpf F: tools/bpf/ F: tools/lib/bpf/ F: tools/testing/selftests/bpf/ + K: bpf + N: bpf
BPF JIT for ARM M: Shubham Bansal illusionist.neo@gmail.com @@@ -2907,12 -2912,6 +2912,12 @@@ L: netdev@vger.kernel.or S: Maintained F: arch/powerpc/net/
+BPF JIT for RISC-V (RV64G) +M: Björn Töpel bjorn.topel@gmail.com +L: netdev@vger.kernel.org +S: Maintained +F: arch/riscv/net/ + BPF JIT for S390 M: Martin Schwidefsky schwidefsky@de.ibm.com M: Heiko Carstens heiko.carstens@de.ibm.com @@@ -4129,7 -4128,7 +4134,7 @@@ S: Maintaine F: drivers/media/dvb-frontends/cxd2820r*
CXGB3 ETHERNET DRIVER (CXGB3) -M: Arjun Vynipadath arjun@chelsio.com +M: Vishal Kulkarni vishal@chelsio.com L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported @@@ -4158,7 -4157,7 +4163,7 @@@ S: Supporte F: drivers/crypto/chelsio
CXGB4 ETHERNET DRIVER (CXGB4) -M: Arjun Vynipadath arjun@chelsio.com +M: Vishal Kulkarni vishal@chelsio.com L: netdev@vger.kernel.org W: http://www.chelsio.com S: Supported @@@ -6030,12 -6029,6 +6035,12 @@@ L: linuxppc-dev@lists.ozlabs.or S: Maintained F: drivers/dma/fsldma.*
+FREESCALE ENETC ETHERNET DRIVERS +M: Claudiu Manoil claudiu.manoil@nxp.com +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/freescale/enetc/ + FREESCALE eTSEC ETHERNET DRIVER (GIANFAR) M: Claudiu Manoil claudiu.manoil@nxp.com L: netdev@vger.kernel.org @@@ -6100,7 -6093,6 +6105,7 @@@ M: Yangbo Lu <yangbo.lu@nxp.com L: netdev@vger.kernel.org S: Maintained F: drivers/ptp/ptp_qoriq.c +F: drivers/ptp/ptp_qoriq_debugfs.c F: include/linux/fsl/ptp_qoriq.h F: Documentation/devicetree/bindings/ptp/ptp-qoriq.txt
@@@ -10590,7 -10582,6 +10595,7 @@@ F: Documentation/devicetree/bindings/ne F: net/dsa/ F: include/net/dsa.h F: include/linux/dsa/ +F: include/linux/platform_data/dsa.h F: drivers/net/dsa/
NETWORKING [GENERAL] @@@ -12623,14 -12614,6 +12628,14 @@@ L: netdev@vger.kernel.or S: Maintained F: drivers/net/ethernet/qualcomm/emac/
+QUALCOMM ETHQOS ETHERNET DRIVER +M: Vinod Koul vkoul@kernel.org +M: Niklas Cassel niklas.cassel@linaro.org +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +F: Documentation/devicetree/bindings/net/qcom,dwmac.txt + QUALCOMM GENERIC INTERFACE I2C DRIVER M: Alok Chauhan alokc@codeaurora.org M: Karthikeyan Ramasubramanian kramasub@codeaurora.org @@@ -12890,6 -12873,13 +12895,13 @@@ F: Documentation/devicetree/bindings/ne F: drivers/net/dsa/realtek-smi* F: drivers/net/dsa/rtl83*
+ REDPINE WIRELESS DRIVER + M: Amitkumar Karwar amitkarwar@gmail.com + M: Siva Rebbagondla siva8118@gmail.com + L: linux-wireless@vger.kernel.org + S: Maintained + F: drivers/net/wireless/rsi/ + REGISTER MAP ABSTRACTION M: Mark Brown broonie@kernel.org L: linux-kernel@vger.kernel.org @@@ -13718,6 -13708,15 +13730,15 @@@ L: netdev@vger.kernel.or S: Supported F: drivers/net/ethernet/sfc/
+ SFF/SFP/SFP+ MODULE SUPPORT + M: Russell King linux@armlinux.org.uk + L: netdev@vger.kernel.org + S: Maintained + F: drivers/net/phy/phylink.c + F: drivers/net/phy/sfp* + F: include/linux/phylink.h + F: include/linux/sfp.h + SGI GRU DRIVER M: Dimitri Sivanich sivanich@sgi.com S: Maintained @@@ -16663,6 -16662,15 +16684,15 @@@ S: Maintaine F: drivers/platform/x86/ F: drivers/platform/olpc/
+ X86 PLATFORM DRIVERS - ARCH + R: Darren Hart dvhart@infradead.org + R: Andy Shevchenko andy@infradead.org + L: platform-driver-x86@vger.kernel.org + L: x86@kernel.org + T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core + S: Maintained + F: arch/x86/platform + X86 VDSO M: Andy Lutomirski luto@kernel.org L: linux-kernel@vger.kernel.org diff --combined arch/riscv/Kconfig index e64c657060bb,515fc3cc9687..bd149905a5b5 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@@ -49,7 -49,6 +49,7 @@@ config RISC select RISCV_TIMER select GENERIC_IRQ_MULTI_HANDLER select ARCH_HAS_PTE_SPECIAL + select HAVE_EBPF_JIT if 64BIT
config MMU def_bool y @@@ -104,7 -103,7 +104,7 @@@ choic prompt "Base ISA" default ARCH_RV64I help - This selects the base ISA that this kernel will traget and must match + This selects the base ISA that this kernel will target and must match the target platform.
config ARCH_RV32I diff --combined drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1c2987c3d708,8bc7e495b027..92d73453a318 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@@ -4973,12 -4973,18 +4973,18 @@@ static int bnxt_hwrm_ring_alloc(struct struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; struct bnxt_ring_struct *ring = &cpr->cp_ring_struct; u32 map_idx = ring->map_idx; + unsigned int vector;
+ vector = bp->irq_tbl[map_idx].vector; + disable_irq_nosync(vector); rc = hwrm_ring_alloc_send_msg(bp, ring, type, map_idx); - if (rc) + if (rc) { + enable_irq(vector); goto err_out; + } bnxt_set_db(bp, &cpr->cp_db, type, map_idx, ring->fw_ring_id); bnxt_db_nq(bp, &cpr->cp_db, cpr->cp_raw_cons); + enable_irq(vector); bp->grp_info[i].cp_fw_ring_id = ring->fw_ring_id;
if (!i) { @@@ -9981,11 -9987,8 +9987,11 @@@ static int bnxt_get_phys_port_name(stru return 0; }
-int bnxt_port_attr_get(struct bnxt *bp, struct switchdev_attr *attr) +int bnxt_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { + struct bnxt *bp = netdev_priv(dev); + if (bp->eswitch_mode != DEVLINK_ESWITCH_MODE_SWITCHDEV) return -EOPNOTSUPP;
@@@ -9993,12 -9996,27 +9999,12 @@@ if (!BNXT_PF(bp)) return -EOPNOTSUPP;
- switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(bp->switch_id); - memcpy(attr->u.ppid.id, bp->switch_id, attr->u.ppid.id_len); - break; - default: - return -EOPNOTSUPP; - } - return 0; -} + ppid->id_len = sizeof(bp->switch_id); + memcpy(ppid->id, bp->switch_id, ppid->id_len);
-static int bnxt_swdev_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) -{ - return bnxt_port_attr_get(netdev_priv(dev), attr); + return 0; }
-static const struct switchdev_ops bnxt_switchdev_ops = { - .switchdev_port_attr_get = bnxt_swdev_port_attr_get -}; - static const struct net_device_ops bnxt_netdev_ops = { .ndo_open = bnxt_open, .ndo_start_xmit = bnxt_start_xmit, @@@ -10030,7 -10048,6 +10036,7 @@@ .ndo_bpf = bnxt_xdp, .ndo_bridge_getlink = bnxt_bridge_getlink, .ndo_bridge_setlink = bnxt_bridge_setlink, + .ndo_get_port_parent_id = bnxt_get_port_parent_id, .ndo_get_phys_port_name = bnxt_get_phys_port_name };
@@@ -10389,6 -10406,7 +10395,6 @@@ static int bnxt_init_one(struct pci_de dev->netdev_ops = &bnxt_netdev_ops; dev->watchdog_timeo = BNXT_TX_TIMEOUT; dev->ethtool_ops = &bnxt_ethtool_ops; - SWITCHDEV_SET_OPS(dev, &bnxt_switchdev_ops); pci_set_drvdata(pdev, dev);
rc = bnxt_alloc_hwrm_resources(bp); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 9e71f4d41b82,f3c7ab6faea5..bdcc5e79328d --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@@ -25,7 -25,7 +25,7 @@@ static int get_route_and_out_devs(struc /* if the egress device isn't on the same HW e-switch or * it's a LAG device, use the uplink */ - if (!switchdev_port_same_parent_id(priv->netdev, dev) || + if (!netdev_port_same_parent_id(priv->netdev, dev) || dst_is_lag_dev) { *route_dev = uplink_dev; *out_dev = *route_dev; @@@ -256,6 -256,7 +256,7 @@@ int mlx5e_tc_tun_create_header_ipv4(str e->m_neigh.family = n->ops->family; memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; + e->route_dev = route_dev;
/* It's important to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the @@@ -369,6 -370,7 +370,7 @@@ int mlx5e_tc_tun_create_header_ipv6(str e->m_neigh.family = n->ops->family; memcpy(&e->m_neigh.dst_ip, n->primary_key, n->tbl->key_len); e->out_dev = out_dev; + e->route_dev = route_dev;
/* It's importent to add the neigh to the hash table before checking * the neigh validity state. So if we'll get a notification, in case the @@@ -496,21 -498,25 +498,21 @@@ static int mlx5e_tc_tun_parse_vxlan(str void *headers_c, void *headers_v) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_PORTS, - f->mask); void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_match_ports enc_ports; + + flow_rule_match_enc_ports(rule, &enc_ports);
/* Full udp dst port must be given */ - if (!dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS) || - memchr_inv(&mask->dst, 0xff, sizeof(mask->dst))) { + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS) || + memchr_inv(&enc_ports.mask->dst, 0xff, sizeof(enc_ports.mask->dst))) { NL_SET_ERR_MSG_MOD(extack, "VXLAN decap filter must include enc_dst_port condition"); netdev_warn(priv->netdev, @@@ -519,12 -525,12 +521,12 @@@ }
/* udp dst port must be knonwn as a VXLAN port */ - if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(key->dst))) { + if (!mlx5_vxlan_lookup_port(priv->mdev->vxlan, be16_to_cpu(enc_ports.key->dst))) { NL_SET_ERR_MSG_MOD(extack, "Matched UDP port is not registered as a VXLAN port"); netdev_warn(priv->netdev, "UDP port %d is not registered as a VXLAN port\n", - be16_to_cpu(key->dst)); + be16_to_cpu(enc_ports.key->dst)); return -EOPNOTSUPP; }
@@@ -532,26 -538,26 +534,26 @@@ MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(mask->dst)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(key->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, + ntohs(enc_ports.mask->dst)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, + ntohs(enc_ports.key->dst));
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(mask->src)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(key->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, + ntohs(enc_ports.mask->src)); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, + ntohs(enc_ports.key->src));
/* match on VNI */ - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); - struct flow_dissector_key_keyid *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni, - be32_to_cpu(mask->keyid)); + be32_to_cpu(enc_keyid.mask->keyid)); MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni, - be32_to_cpu(key->keyid)); + be32_to_cpu(enc_keyid.key->keyid)); } return 0; } @@@ -566,7 -572,6 +568,7 @@@ static int mlx5e_tc_tun_parse_gretap(st misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f);
if (!MLX5_CAP_ESW(priv->mdev, nvgre_encap_decap)) { NL_SET_ERR_MSG_MOD(f->common.extack, @@@ -584,14 -589,21 +586,14 @@@ MLX5_SET(fte_match_set_misc, misc_v, gre_protocol, ETH_P_TEB);
/* gre key */ - if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { - struct flow_dissector_key_keyid *mask = NULL; - struct flow_dissector_key_keyid *key = NULL; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid;
- mask = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->mask); + flow_rule_match_enc_keyid(rule, &enc_keyid); MLX5_SET(fte_match_set_misc, misc_c, - gre_key.key, be32_to_cpu(mask->keyid)); - - key = skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_KEYID, - f->key); + gre_key.key, be32_to_cpu(enc_keyid.mask->keyid)); MLX5_SET(fte_match_set_misc, misc_v, - gre_key.key, be32_to_cpu(key->keyid)); + gre_key.key, be32_to_cpu(enc_keyid.key->keyid)); }
return 0; @@@ -602,16 -614,18 +604,18 @@@ int mlx5e_tc_tun_parse(struct net_devic struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, void *headers_c, - void *headers_v) + void *headers_v, u8 *match_level) { int tunnel_type; int err = 0;
tunnel_type = mlx5e_tc_tun_get_type(filter_dev); if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) { + *match_level = MLX5_MATCH_L4; err = mlx5e_tc_tun_parse_vxlan(priv, spec, f, headers_c, headers_v); } else if (tunnel_type == MLX5E_TC_TUNNEL_TYPE_GRETAP) { + *match_level = MLX5_MATCH_L3; err = mlx5e_tc_tun_parse_gretap(priv, spec, f, headers_c, headers_v); } else { diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 0b1988b330f3,ef9e472daffb..7b5829406d95 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@@ -162,16 -162,27 +162,16 @@@ static void mlx5e_rep_update_hw_counter static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv) { struct mlx5e_sw_stats *s = &priv->stats.sw; - struct mlx5e_rq_stats *rq_stats; - struct mlx5e_sq_stats *sq_stats; - int i, j; + struct rtnl_link_stats64 stats64 = {};
memset(s, 0, sizeof(*s)); - for (i = 0; i < priv->channels.num; i++) { - struct mlx5e_channel *c = priv->channels.c[i]; - - rq_stats = c->rq.stats; - - s->rx_packets += rq_stats->packets; - s->rx_bytes += rq_stats->bytes; + mlx5e_fold_sw_stats64(priv, &stats64);
- for (j = 0; j < priv->channels.params.num_tc; j++) { - sq_stats = c->sq[j].stats; - - s->tx_packets += sq_stats->packets; - s->tx_bytes += sq_stats->bytes; - s->tx_queue_dropped += sq_stats->dropped; - } - } + s->rx_packets = stats64.rx_packets; + s->rx_bytes = stats64.rx_bytes; + s->tx_packets = stats64.tx_packets; + s->tx_bytes = stats64.tx_bytes; + s->tx_queue_dropped = stats64.tx_dropped; }
static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, @@@ -184,7 -195,8 +184,7 @@@ return;
mutex_lock(&priv->state_lock); - if (test_bit(MLX5E_STATE_OPENED, &priv->state)) - mlx5e_rep_update_sw_counters(priv); + mlx5e_rep_update_sw_counters(priv); mlx5e_rep_update_hw_counters(priv); mutex_unlock(&priv->state_lock);
@@@ -381,8 -393,7 +381,8 @@@ static const struct ethtool_ops mlx5e_u .set_pauseparam = mlx5e_uplink_rep_set_pauseparam, };
-static int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +static int mlx5e_rep_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@@ -399,14 -410,20 +399,14 @@@ uplink_priv = netdev_priv(uplink_dev); }
- switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = ETH_ALEN; - if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { - ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr); - } else { - struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct mlx5_eswitch_rep *rep = rpriv->rep; + ppid->id_len = ETH_ALEN; + if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) { + ether_addr_copy(ppid->id, uplink_upper->dev_addr); + } else { + struct mlx5e_rep_priv *rpriv = priv->ppriv; + struct mlx5_eswitch_rep *rep = rpriv->rep;
- ether_addr_copy(attr->u.ppid.id, rep->hw_id); - } - break; - default: - return -EOPNOTSUPP; + ether_addr_copy(ppid->id, rep->hw_id); }
return 0; @@@ -579,6 -596,10 +579,10 @@@ static void mlx5e_rep_update_flows(stru if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) { ether_addr_copy(e->h_dest, ha); ether_addr_copy(eth->h_dest, ha); + /* Update the encap source mac, in case that we delete + * the flows when encap source mac changed. + */ + ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
mlx5e_tc_encap_flows_add(priv, e); } @@@ -1220,8 -1241,17 +1224,8 @@@ mlx5e_get_sw_stats64(const struct net_d struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_sw_stats *sstats = &priv->stats.sw; - - mlx5e_rep_update_sw_counters(priv); - - stats->rx_packets = sstats->rx_packets; - stats->rx_bytes = sstats->rx_bytes; - stats->tx_packets = sstats->tx_packets; - stats->tx_bytes = sstats->tx_bytes; - - stats->tx_dropped = sstats->tx_queue_dropped;
+ mlx5e_fold_sw_stats64(priv, stats); return 0; }
@@@ -1279,6 -1309,10 +1283,6 @@@ static int mlx5e_uplink_rep_set_vf_vlan return 0; }
-static const struct switchdev_ops mlx5e_rep_switchdev_ops = { - .switchdev_port_attr_get = mlx5e_attr_get, -}; - static const struct net_device_ops mlx5e_netdev_ops_vf_rep = { .ndo_open = mlx5e_vf_rep_open, .ndo_stop = mlx5e_vf_rep_close, @@@ -1289,7 -1323,6 +1293,7 @@@ .ndo_has_offload_stats = mlx5e_rep_has_offload_stats, .ndo_get_offload_stats = mlx5e_rep_get_offload_stats, .ndo_change_mtu = mlx5e_vf_rep_change_mtu, + .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, };
static const struct net_device_ops mlx5e_netdev_ops_uplink_rep = { @@@ -1311,7 -1344,6 +1315,7 @@@ .ndo_get_vf_config = mlx5e_get_vf_config, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, + .ndo_get_port_parent_id = mlx5e_rep_get_port_parent_id, };
bool mlx5e_eswitch_rep(struct net_device *netdev) @@@ -1386,6 -1418,8 +1390,6 @@@ static void mlx5e_build_rep_netdev(stru netdev->watchdog_timeo = 15 * HZ;
- netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; - netdev->features |= NETIF_F_HW_TC | NETIF_F_NETNS_LOCAL; netdev->hw_features |= NETIF_F_HW_TC;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 85c5dd7fc2c7,b5c1b039375a..e9437757cad7 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@@ -38,6 -38,7 +38,6 @@@ #include <linux/mlx5/fs.h> #include <linux/mlx5/device.h> #include <linux/rhashtable.h> -#include <net/switchdev.h> #include <net/tc_act/tc_mirred.h> #include <net/tc_act/tc_vlan.h> #include <net/tc_act/tc_tunnel_key.h> @@@ -127,6 -128,7 +127,7 @@@ struct mlx5e_tc_flow_parse_attr struct net_device *filter_dev; struct mlx5_flow_spec spec; int num_mod_hdr_actions; + int max_mod_hdr_actions; void *mod_hdr_actions; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; }; @@@ -1301,89 -1303,101 +1302,89 @@@ static void mlx5e_tc_del_flow(struct ml static int parse_tunnel_attr(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, - struct net_device *filter_dev) + struct net_device *filter_dev, u8 *match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers); void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); - - struct flow_dissector_key_control *enc_control = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - f->key); - int err = 0; + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_match_control enc_control; + int err;
err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f, - headers_c, headers_v); + headers_c, headers_v, match_level); if (err) { NL_SET_ERR_MSG_MOD(extack, "failed to parse tunnel attributes"); return err; }
- if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, - f->mask); + flow_rule_match_enc_control(rule, &enc_control); + + if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(mask->src)); + ntohl(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4, - ntohl(key->src)); + ntohl(match.key->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(mask->dst)); + ntohl(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4, - ntohl(key->dst)); + ntohl(match.key->dst));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - } else if (enc_control->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, - f->mask); + } else if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match;
+ flow_rule_match_enc_ipv6_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.mask->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.key->src, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.mask->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6)); + &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6));
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + flow_rule_match_enc_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl);
- if (mask->ttl && + if (match.mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB (priv->mdev, ft_field_support.outer_ipv4_ttl)) { @@@ -1413,7 -1427,7 +1414,7 @@@ static int __parse_cls_flower(struct ml struct mlx5_flow_spec *spec, struct tc_cls_flower_offload *f, struct net_device *filter_dev, - u8 *match_level) + u8 *match_level, u8 *tunnel_match_level) { struct netlink_ext_ack *extack = f->common.extack; void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, @@@ -1424,14 -1438,12 +1425,14 @@@ misc_parameters); void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); + struct flow_dissector *dissector = rule->match.dissector; u16 addr_type = 0; u8 ip_proto = 0;
*match_level = MLX5_MATCH_NONE;
- if (f->dissector->used_keys & + if (dissector->used_keys & ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | @@@ -1450,21 -1462,23 +1451,21 @@@ BIT(FLOW_DISSECTOR_KEY_ENC_IP))) { NL_SET_ERR_MSG_MOD(extack, "Unsupported key"); netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n", - f->dissector->used_keys); + dissector->used_keys); return -EOPNOTSUPP; }
- if ((dissector_uses_key(f->dissector, - FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) || - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) && - dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ENC_CONTROL, - f->key); - switch (key->addr_type) { + if ((flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID) || + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) && + flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_enc_control(rule, &match); + switch (match.key->addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: case FLOW_DISSECTOR_KEY_IPV6_ADDRS: - if (parse_tunnel_attr(priv, spec, f, filter_dev)) + if (parse_tunnel_attr(priv, spec, f, filter_dev, tunnel_match_level)) return -EOPNOTSUPP; break; default: @@@ -1480,27 -1494,35 +1481,27 @@@ inner_headers); }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, - ntohs(mask->n_proto)); + ntohs(match.mask->n_proto)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, - ntohs(key->n_proto)); + ntohs(match.key->n_proto));
- if (mask->n_proto) + if (match.mask->n_proto) *match_level = MLX5_MATCH_L2; }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_VLAN, - f->mask); - if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { - if (key->vlan_tpid == htons(ETH_P_8021AD)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, svlan_tag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, @@@ -1512,15 -1534,11 +1513,15 @@@ cvlan_tag, 1); }
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, + match.mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, + match.key->vlan_id);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, mask->vlan_priority); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, key->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, + match.mask->vlan_priority); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, + match.key->vlan_priority);
*match_level = MLX5_MATCH_L2; } @@@ -1530,14 -1548,17 +1531,14 @@@ *match_level = MLX5_MATCH_L2; }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CVLAN)) { - struct flow_dissector_key_vlan *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CVLAN, - f->key); - struct flow_dissector_key_vlan *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CVLAN, - f->mask); - if (mask->vlan_id || mask->vlan_priority || mask->vlan_tpid) { - if (key->vlan_tpid == htons(ETH_P_8021AD)) { + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) { + struct flow_match_vlan match; + + flow_rule_match_vlan(rule, &match); + if (match.mask->vlan_id || + match.mask->vlan_priority || + match.mask->vlan_tpid) { + if (match.key->vlan_tpid == htons(ETH_P_8021AD)) { MLX5_SET(fte_match_set_misc, misc_c, outer_second_svlan_tag, 1); MLX5_SET(fte_match_set_misc, misc_v, @@@ -1550,58 -1571,69 +1551,58 @@@ }
MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid, - mask->vlan_id); + match.mask->vlan_id); MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid, - key->vlan_id); + match.key->vlan_id); MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio, - mask->vlan_priority); + match.mask->vlan_priority); MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio, - key->vlan_priority); + match.key->vlan_priority);
*match_level = MLX5_MATCH_L2; } }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { - struct flow_dissector_key_eth_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->key); - struct flow_dissector_key_eth_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_ETH_ADDRS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match;
+ flow_rule_match_eth_addrs(rule, &match); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dmac_47_16), - mask->dst); + match.mask->dst); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16), - key->dst); + match.key->dst);
ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, smac_47_16), - mask->src); + match.mask->src); ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16), - key->src); + match.key->src);
- if (!is_zero_ether_addr(mask->src) || !is_zero_ether_addr(mask->dst)) + if (!is_zero_ether_addr(match.mask->src) || + !is_zero_ether_addr(match.mask->dst)) *match_level = MLX5_MATCH_L2; }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) { - struct flow_dissector_key_control *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->key); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match;
- struct flow_dissector_key_control *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_CONTROL, - f->mask); - addr_type = key->addr_type; + flow_rule_match_control(rule, &match); + addr_type = match.key->addr_type;
/* the HW doesn't support frag first/later */ - if (mask->flags & FLOW_DIS_FIRST_FRAG) + if (match.mask->flags & FLOW_DIS_FIRST_FRAG) return -EOPNOTSUPP;
- if (mask->flags & FLOW_DIS_IS_FRAGMENT) { + if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) { MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1); MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, - key->flags & FLOW_DIS_IS_FRAGMENT); + match.key->flags & FLOW_DIS_IS_FRAGMENT);
/* the HW doesn't need L3 inline to match on frag=no */ - if (!(key->flags & FLOW_DIS_IS_FRAGMENT)) + if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT)) *match_level = MLX5_MATCH_L2; /* *** L2 attributes parsing up to here *** */ else @@@ -1609,85 -1641,102 +1610,85 @@@ } }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) { - struct flow_dissector_key_basic *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->key); - struct flow_dissector_key_basic *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_BASIC, - f->mask); - ip_proto = key->ip_proto; + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + ip_proto = match.key->ip_proto;
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - mask->ip_proto); + match.mask->ip_proto); MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - key->ip_proto); + match.key->ip_proto);
- if (mask->ip_proto) + if (match.mask->ip_proto) *match_level = MLX5_MATCH_L3; }
if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { - struct flow_dissector_key_ipv4_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->key); - struct flow_dissector_key_ipv4_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, - f->mask); + struct flow_match_ipv4_addrs match;
+ flow_rule_match_ipv4_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &mask->src, sizeof(mask->src)); + &match.mask->src, sizeof(match.mask->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), - &key->src, sizeof(key->src)); + &match.key->src, sizeof(match.key->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &mask->dst, sizeof(mask->dst)); + &match.mask->dst, sizeof(match.mask->dst)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &key->dst, sizeof(key->dst)); + &match.key->dst, sizeof(match.key->dst));
- if (mask->src || mask->dst) + if (match.mask->src || match.mask->dst) *match_level = MLX5_MATCH_L3; }
if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { - struct flow_dissector_key_ipv6_addrs *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->key); - struct flow_dissector_key_ipv6_addrs *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, - f->mask); + struct flow_match_ipv6_addrs match;
+ flow_rule_match_ipv6_addrs(rule, &match); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &mask->src, sizeof(mask->src)); + &match.mask->src, sizeof(match.mask->src)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), - &key->src, sizeof(key->src)); + &match.key->src, sizeof(match.key->src));
memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &mask->dst, sizeof(mask->dst)); + &match.mask->dst, sizeof(match.mask->dst)); memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &key->dst, sizeof(key->dst)); + &match.key->dst, sizeof(match.key->dst));
- if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY || - ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY) + if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY || + ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY) *match_level = MLX5_MATCH_L3; }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_IP)) { - struct flow_dissector_key_ip *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->key); - struct flow_dissector_key_ip *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_IP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) { + struct flow_match_ip match;
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, mask->tos & 0x3); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, key->tos & 0x3); + flow_rule_match_ip(rule, &match); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn, + match.mask->tos & 0x3); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn, + match.key->tos & 0x3);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, mask->tos >> 2); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, key->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp, + match.mask->tos >> 2); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp, + match.key->tos >> 2);
- MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, mask->ttl); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, key->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit, + match.mask->ttl); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit, + match.key->ttl);
- if (mask->ttl && + if (match.mask->ttl && !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, ft_field_support.outer_ipv4_ttl)) { NL_SET_ERR_MSG_MOD(extack, @@@ -1695,39 -1744,44 +1696,39 @@@ return -EOPNOTSUPP; }
- if (mask->tos || mask->ttl) + if (match.mask->tos || match.mask->ttl) *match_level = MLX5_MATCH_L3; }
/* *** L3 attributes parsing up to here *** */
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) { - struct flow_dissector_key_ports *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->key); - struct flow_dissector_key_ports *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_PORTS, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); switch (ip_proto) { case IPPROTO_TCP: MLX5_SET(fte_match_set_lyr_2_4, headers_c, - tcp_sport, ntohs(mask->src)); + tcp_sport, ntohs(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - tcp_sport, ntohs(key->src)); + tcp_sport, ntohs(match.key->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_c, - tcp_dport, ntohs(mask->dst)); + tcp_dport, ntohs(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - tcp_dport, ntohs(key->dst)); + tcp_dport, ntohs(match.key->dst)); break;
case IPPROTO_UDP: MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_sport, ntohs(mask->src)); + udp_sport, ntohs(match.mask->src)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_sport, ntohs(key->src)); + udp_sport, ntohs(match.key->src));
MLX5_SET(fte_match_set_lyr_2_4, headers_c, - udp_dport, ntohs(mask->dst)); + udp_dport, ntohs(match.mask->dst)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, - udp_dport, ntohs(key->dst)); + udp_dport, ntohs(match.key->dst)); break; default: NL_SET_ERR_MSG_MOD(extack, @@@ -1737,20 -1791,26 +1738,20 @@@ return -EINVAL; }
- if (mask->src || mask->dst) + if (match.mask->src || match.mask->dst) *match_level = MLX5_MATCH_L4; }
- if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_TCP)) { - struct flow_dissector_key_tcp *key = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->key); - struct flow_dissector_key_tcp *mask = - skb_flow_dissector_target(f->dissector, - FLOW_DISSECTOR_KEY_TCP, - f->mask); + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) { + struct flow_match_tcp match;
+ flow_rule_match_tcp(rule, &match); MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags, - ntohs(mask->flags)); + ntohs(match.mask->flags)); MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags, - ntohs(key->flags)); + ntohs(match.key->flags));
- if (mask->flags) + if (match.mask->flags) *match_level = MLX5_MATCH_L4; }
@@@ -1767,11 -1827,11 +1768,11 @@@ static int parse_cls_flower(struct mlx5 struct mlx5_core_dev *dev = priv->mdev; struct mlx5_eswitch *esw = dev->priv.eswitch; struct mlx5e_rep_priv *rpriv = priv->ppriv; + u8 match_level, tunnel_match_level = MLX5_MATCH_NONE; struct mlx5_eswitch_rep *rep; - u8 match_level; int err;
- err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level); + err = __parse_cls_flower(priv, spec, f, filter_dev, &match_level, &tunnel_match_level);
if (!err && (flow->flags & MLX5E_TC_FLOW_ESWITCH)) { rep = rpriv->rep; @@@ -1787,10 -1847,12 +1788,12 @@@ } }
- if (flow->flags & MLX5E_TC_FLOW_ESWITCH) + if (flow->flags & MLX5E_TC_FLOW_ESWITCH) { flow->esw_attr->match_level = match_level; - else + flow->esw_attr->tunnel_match_level = tunnel_match_level; + } else { flow->nic_attr->match_level = match_level; + }
return err; } @@@ -1803,32 -1865,27 +1806,32 @@@ struct pedit_headers struct udphdr udp; };
+struct pedit_headers_action { + struct pedit_headers vals; + struct pedit_headers masks; + u32 pedits; +}; + static int pedit_header_offsets[] = { - [TCA_PEDIT_KEY_EX_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), - [TCA_PEDIT_KEY_EX_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), - [TCA_PEDIT_KEY_EX_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), - [TCA_PEDIT_KEY_EX_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), - [TCA_PEDIT_KEY_EX_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), + [FLOW_ACT_MANGLE_HDR_TYPE_ETH] = offsetof(struct pedit_headers, eth), + [FLOW_ACT_MANGLE_HDR_TYPE_IP4] = offsetof(struct pedit_headers, ip4), + [FLOW_ACT_MANGLE_HDR_TYPE_IP6] = offsetof(struct pedit_headers, ip6), + [FLOW_ACT_MANGLE_HDR_TYPE_TCP] = offsetof(struct pedit_headers, tcp), + [FLOW_ACT_MANGLE_HDR_TYPE_UDP] = offsetof(struct pedit_headers, udp), };
#define pedit_header(_ph, _htype) ((void *)(_ph) + pedit_header_offsets[_htype])
static int set_pedit_val(u8 hdr_type, u32 mask, u32 val, u32 offset, - struct pedit_headers *masks, - struct pedit_headers *vals) + struct pedit_headers_action *hdrs) { u32 *curr_pmask, *curr_pval;
- if (hdr_type >= __PEDIT_HDR_TYPE_MAX) + if (hdr_type >= 2) goto out_err;
- curr_pmask = (u32 *)(pedit_header(masks, hdr_type) + offset); - curr_pval = (u32 *)(pedit_header(vals, hdr_type) + offset); + curr_pmask = (u32 *)(pedit_header(&hdrs->masks, hdr_type) + offset); + curr_pval = (u32 *)(pedit_header(&hdrs->vals, hdr_type) + offset);
if (*curr_pmask & mask) /* disallow acting twice on the same location */ goto out_err; @@@ -1880,11 -1937,12 +1883,11 @@@ static struct mlx5_fields fields[] = OFFLOAD(UDP_DPORT, 2, udp.dest, 0), };
- /* On input attr->num_mod_hdr_actions tells how many HW actions can be parsed at - * max from the SW pedit action. On success, it says how many HW actions were - * actually parsed. + /* On input attr->max_mod_hdr_actions tells how many HW actions can be parsed at + * max from the SW pedit action. On success, attr->num_mod_hdr_actions + * says how many HW actions were actually parsed. */ -static int offload_pedit_fields(struct pedit_headers *masks, - struct pedit_headers *vals, +static int offload_pedit_fields(struct pedit_headers_action *hdrs, struct mlx5e_tc_flow_parse_attr *parse_attr, struct netlink_ext_ack *extack) { @@@ -1899,15 -1957,17 +1902,17 @@@ __be16 mask_be16; void *action;
- set_masks = &masks[TCA_PEDIT_KEY_EX_CMD_SET]; - add_masks = &masks[TCA_PEDIT_KEY_EX_CMD_ADD]; - set_vals = &vals[TCA_PEDIT_KEY_EX_CMD_SET]; - add_vals = &vals[TCA_PEDIT_KEY_EX_CMD_ADD]; + set_masks = &hdrs[0].masks; + add_masks = &hdrs[1].masks; + set_vals = &hdrs[0].vals; + add_vals = &hdrs[1].vals;
action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto); - action = parse_attr->mod_hdr_actions; - max_actions = parse_attr->num_mod_hdr_actions; - nactions = 0; + action = parse_attr->mod_hdr_actions + + parse_attr->num_mod_hdr_actions * action_size; + + max_actions = parse_attr->max_mod_hdr_actions; + nactions = parse_attr->num_mod_hdr_actions;
for (i = 0; i < ARRAY_SIZE(fields); i++) { f = &fields[i]; @@@ -1998,14 -2058,12 +2003,14 @@@ }
static int alloc_mod_hdr_actions(struct mlx5e_priv *priv, - const struct tc_action *a, int namespace, + struct pedit_headers_action *hdrs, + int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr) { int nkeys, action_size, max_actions;
- nkeys = tcf_pedit_nkeys(a); + nkeys = hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits + + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits; action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
if (namespace == MLX5_FLOW_NAMESPACE_FDB) /* FDB offloading */ @@@ -2020,65 -2078,64 +2025,67 @@@ if (!parse_attr->mod_hdr_actions) return -ENOMEM;
- parse_attr->num_mod_hdr_actions = max_actions; + parse_attr->max_mod_hdr_actions = max_actions; return 0; }
static const struct pedit_headers zero_masks = {};
static int parse_tc_pedit_action(struct mlx5e_priv *priv, - const struct tc_action *a, int namespace, + const struct flow_action_entry *act, int namespace, struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, struct netlink_ext_ack *extack) { - struct pedit_headers masks[__PEDIT_CMD_MAX], vals[__PEDIT_CMD_MAX], *cmd_masks; - int nkeys, i, err = -EOPNOTSUPP; + u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1; + int err = -EOPNOTSUPP; u32 mask, val, offset; - u8 cmd, htype; + u8 htype;
- nkeys = tcf_pedit_nkeys(a); + htype = act->mangle.htype; + err = -EOPNOTSUPP; /* can't be all optimistic */
- memset(masks, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); - memset(vals, 0, sizeof(struct pedit_headers) * __PEDIT_CMD_MAX); + if (htype == FLOW_ACT_MANGLE_UNSPEC) { + NL_SET_ERR_MSG_MOD(extack, "legacy pedit isn't offloaded"); + goto out_err; + }
- for (i = 0; i < nkeys; i++) { - htype = tcf_pedit_htype(a, i); - cmd = tcf_pedit_cmd(a, i); - err = -EOPNOTSUPP; /* can't be all optimistic */ + mask = act->mangle.mask; + val = act->mangle.val; + offset = act->mangle.offset;
- if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK) { - NL_SET_ERR_MSG_MOD(extack, - "legacy pedit isn't offloaded"); - goto out_err; - } + err = set_pedit_val(htype, ~mask, val, offset, &hdrs[cmd]); + if (err) + goto out_err;
- if (cmd != TCA_PEDIT_KEY_EX_CMD_SET && cmd != TCA_PEDIT_KEY_EX_CMD_ADD) { - NL_SET_ERR_MSG_MOD(extack, "pedit cmd isn't offloaded"); - goto out_err; - } + hdrs[cmd].pedits++;
- mask = tcf_pedit_mask(a, i); - val = tcf_pedit_val(a, i); - offset = tcf_pedit_offset(a, i); + return 0; +out_err: + return err; +}
- err = set_pedit_val(htype, ~mask, val, offset, &masks[cmd], &vals[cmd]); - if (err) - goto out_err; - } +static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace, + struct mlx5e_tc_flow_parse_attr *parse_attr, + struct pedit_headers_action *hdrs, + struct netlink_ext_ack *extack) +{ + struct pedit_headers *cmd_masks; + int err; + u8 cmd;
- err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); - if (err) - goto out_err; + if (!parse_attr->mod_hdr_actions) { - err = alloc_mod_hdr_actions(priv, a, namespace, parse_attr); ++ err = alloc_mod_hdr_actions(priv, hdrs, namespace, parse_attr); + if (err) + goto out_err; + }
- err = offload_pedit_fields(masks, vals, parse_attr, extack); + err = offload_pedit_fields(hdrs, parse_attr, extack); if (err < 0) goto out_dealloc_parsed_actions;
for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) { - cmd_masks = &masks[cmd]; + cmd_masks = &hdrs[cmd].masks; if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) { NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field"); @@@ -2128,17 -2185,22 +2135,22 @@@ static bool csum_offload_supported(stru }
static bool modify_header_match_supported(struct mlx5_flow_spec *spec, - struct tcf_exts *exts, + struct flow_action *flow_action, + u32 actions, struct netlink_ext_ack *extack) { - const struct tc_action *a; + const struct flow_action_entry *act; bool modify_ip_header; u8 htype, ip_proto; void *headers_v; u16 ethertype; - int nkeys, i; + int i;
- headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + if (actions & MLX5_FLOW_CONTEXT_ACTION_DECAP) + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers); + else + headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers); + ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
/* for non-IP we only re-write MACs, so we're okay */ @@@ -2146,16 -2208,20 +2158,16 @@@ goto out_ok;
modify_ip_header = false; - tcf_exts_for_each_action(i, a, exts) { - int k; - - if (!is_tcf_pedit(a)) + flow_action_for_each(i, act, flow_action) { + if (act->id != FLOW_ACTION_MANGLE && + act->id != FLOW_ACTION_ADD) continue;
- nkeys = tcf_pedit_nkeys(a); - for (k = 0; k < nkeys; k++) { - htype = tcf_pedit_htype(a, k); - if (htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP4 || - htype == TCA_PEDIT_KEY_EX_HDR_TYPE_IP6) { - modify_ip_header = true; - break; - } + htype = act->mangle.htype; + if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4 || + htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) { + modify_ip_header = true; + break; } }
@@@ -2173,7 -2239,7 +2185,7 @@@ out_ok }
static bool actions_match_supported(struct mlx5e_priv *priv, - struct tcf_exts *exts, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) @@@ -2190,9 -2256,8 +2202,9 @@@ return false;
if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return modify_header_match_supported(&parse_attr->spec, exts, - actions, extack); + return modify_header_match_supported(&parse_attr->spec, - flow_action, ++ flow_action, actions, + extack);
return true; } @@@ -2211,50 -2276,52 +2223,50 @@@ static bool same_hw_devs(struct mlx5e_p return (fsystem_guid == psystem_guid); }
-static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, +static int parse_tc_nic_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { struct mlx5_nic_flow_attr *attr = flow->nic_attr; - const struct tc_action *a; + struct pedit_headers_action hdrs[2] = {}; + const struct flow_action_entry *act; u32 action = 0; int err, i;
- if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return -EINVAL;
attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
- tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_shot(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP; if (MLX5_CAP_FLOWTABLE(priv->mdev, flow_table_properties_nic_receive.flow_counter)) action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - continue; - } - - if (is_tcf_pedit(a)) { - err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_KERNEL, - parse_attr, extack); + break; + case FLOW_ACTION_MANGLE: + case FLOW_ACTION_ADD: + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); if (err) return err;
action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - continue; - } - - if (is_tcf_csum(a)) { + break; + case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a), + act->csum_flags, extack)) - continue; + break;
return -EOPNOTSUPP; - } - - if (is_tcf_mirred_egress_redirect(a)) { - struct net_device *peer_dev = tcf_mirred_dev(a); + case FLOW_ACTION_REDIRECT: { + struct net_device *peer_dev = act->dev;
if (priv->netdev->netdev_ops == peer_dev->netdev_ops && same_hw_devs(priv, netdev_priv(peer_dev))) { @@@ -2269,10 -2336,11 +2281,10 @@@ peer_dev->name); return -EINVAL; } - continue; - } - - if (is_tcf_skbedit_mark(a)) { - u32 mark = tcf_skbedit_mark(a); + } + break; + case FLOW_ACTION_MARK: { + u32 mark = act->mark;
if (mark & ~MLX5E_TC_FLOW_ID_MASK) { NL_SET_ERR_MSG_MOD(extack, @@@ -2282,23 -2350,14 +2294,23 @@@
attr->flow_tag = mark; action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - continue; + } + break; + default: + return -EINVAL; } + }
- return -EINVAL; + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); + if (err) + return err; }
attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP;
return 0; @@@ -2403,7 -2462,7 +2415,7 @@@ out_err }
static int parse_tc_vlan_action(struct mlx5e_priv *priv, - const struct tc_action *a, + const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, u32 *action) { @@@ -2412,8 -2471,7 +2424,8 @@@ if (vlan_idx >= MLX5_FS_VLAN_DEPTH) return -EOPNOTSUPP;
- if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { + switch (act->id) { + case FLOW_ACTION_VLAN_POP: if (vlan_idx) { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, MLX5_FS_VLAN_DEPTH)) @@@ -2423,11 -2481,10 +2435,11 @@@ } else { *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; } - } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { - attr->vlan_vid[vlan_idx] = tcf_vlan_push_vid(a); - attr->vlan_prio[vlan_idx] = tcf_vlan_push_prio(a); - attr->vlan_proto[vlan_idx] = tcf_vlan_push_proto(a); + break; + case FLOW_ACTION_VLAN_PUSH: + attr->vlan_vid[vlan_idx] = act->vlan.vid; + attr->vlan_prio[vlan_idx] = act->vlan.prio; + attr->vlan_proto[vlan_idx] = act->vlan.proto; if (!attr->vlan_proto[vlan_idx]) attr->vlan_proto[vlan_idx] = htons(ETH_P_8021Q);
@@@ -2439,15 -2496,13 +2451,15 @@@ *action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2; } else { if (!mlx5_eswitch_vlan_actions_supported(priv->mdev, 1) && - (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || - tcf_vlan_push_prio(a))) + (act->vlan.proto != htons(ETH_P_8021Q) || + act->vlan.prio)) return -EOPNOTSUPP;
*action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; } - } else { /* action is TCA_VLAN_ACT_MODIFY */ + break; + default: + /* action is FLOW_ACT_VLAN_MANGLE */ return -EOPNOTSUPP; }
@@@ -2456,56 -2511,58 +2468,56 @@@ return 0; }
-static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, +static int parse_tc_fdb_actions(struct mlx5e_priv *priv, + struct flow_action *flow_action, struct mlx5e_tc_flow_parse_attr *parse_attr, struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack) { + struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_esw_flow_attr *attr = flow->esw_attr; struct mlx5e_rep_priv *rpriv = priv->ppriv; - struct ip_tunnel_info *info = NULL; - const struct tc_action *a; + const struct ip_tunnel_info *info = NULL; + const struct flow_action_entry *act; bool encap = false; u32 action = 0; int err, i;
- if (!tcf_exts_has_actions(exts)) + if (!flow_action_has_entries(flow_action)) return -EINVAL;
attr->in_rep = rpriv->rep; attr->in_mdev = priv->mdev;
- tcf_exts_for_each_action(i, a, exts) { - if (is_tcf_gact_shot(a)) { + flow_action_for_each(i, act, flow_action) { + switch (act->id) { + case FLOW_ACTION_DROP: action |= MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT; - continue; - } - - if (is_tcf_pedit(a)) { - err = parse_tc_pedit_action(priv, a, MLX5_FLOW_NAMESPACE_FDB, - parse_attr, extack); + break; + case FLOW_ACTION_MANGLE: + case FLOW_ACTION_ADD: + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, + parse_attr, hdrs, extack); if (err) return err;
action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; attr->split_count = attr->out_count; - continue; - } - - if (is_tcf_csum(a)) { + break; + case FLOW_ACTION_CSUM: if (csum_offload_supported(priv, action, - tcf_csum_update_flags(a), - extack)) - continue; + act->csum_flags, extack)) + break;
return -EOPNOTSUPP; - } - - if (is_tcf_mirred_egress_redirect(a) || is_tcf_mirred_egress_mirror(a)) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_MIRRED: { struct mlx5e_priv *out_priv; struct net_device *out_dev;
- out_dev = tcf_mirred_dev(a); + out_dev = act->dev; if (!out_dev) { /* out_dev is NULL when filters with * non-existing mirred device are replayed to @@@ -2524,8 -2581,8 +2536,8 @@@
action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_COUNT; - if (switchdev_port_same_parent_id(priv->netdev, - out_dev) || + if (netdev_port_same_parent_id(priv->netdev, + out_dev) || is_merged_eswitch_dev(priv, out_dev)) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH); @@@ -2570,29 -2627,35 +2582,29 @@@ priv->netdev->name, out_dev->name); return -EINVAL; } - continue; - } - - if (is_tcf_tunnel_set(a)) { - info = tcf_tunnel_info(a); + } + break; + case FLOW_ACTION_TUNNEL_ENCAP: + info = act->tunnel; if (info) encap = true; else return -EOPNOTSUPP; - continue; - } - - if (is_tcf_vlan(a)) { - err = parse_tc_vlan_action(priv, a, attr, &action);
+ break; + case FLOW_ACTION_VLAN_PUSH: + case FLOW_ACTION_VLAN_POP: + err = parse_tc_vlan_action(priv, act, attr, &action); if (err) return err;
attr->split_count = attr->out_count; - continue; - } - - if (is_tcf_tunnel_release(a)) { + break; + case FLOW_ACTION_TUNNEL_DECAP: action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - continue; - } - - if (is_tcf_gact_goto_chain(a)) { - u32 dest_chain = tcf_gact_goto_chain_index(a); + break; + case FLOW_ACTION_GOTO: { + u32 dest_chain = act->chain_index; u32 max_chain = mlx5_eswitch_get_chain_range(esw);
if (dest_chain <= attr->chain) { @@@ -2605,23 -2668,15 +2617,23 @@@ } action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; attr->dest_chain = dest_chain; - - continue; + break; + } + default: + return -EINVAL; } + }
- return -EINVAL; + if (hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits || + hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits) { + err = alloc_tc_pedit_action(priv, MLX5_FLOW_NAMESPACE_KERNEL, + parse_attr, hdrs, extack); + if (err) + return err; }
attr->action = action; - if (!actions_match_supported(priv, exts, parse_attr, flow, extack)) + if (!actions_match_supported(priv, flow_action, parse_attr, flow, extack)) return -EOPNOTSUPP;
if (attr->dest_chain) { @@@ -2724,15 -2779,15 +2736,15 @@@ err_free return err; }
-static int +static struct mlx5e_tc_flow * __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f, u16 flow_flags, struct net_device *filter_dev, struct mlx5_eswitch_rep *in_rep, - struct mlx5_core_dev *in_mdev, - struct mlx5e_tc_flow **__flow) + struct mlx5_core_dev *in_mdev) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_flow_parse_attr *parse_attr; @@@ -2754,7 -2809,7 +2766,7 @@@
flow->esw_attr->chain = f->common.chain_index; flow->esw_attr->prio = TC_H_MAJ(f->common.prio) >> 16; - err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free;
@@@ -2771,13 -2826,15 +2783,13 @@@ if (err) goto err_free;
- *__flow = flow; - - return 0; + return flow;
err_free: kfree(flow); kvfree(parse_attr); out: - return err; + return ERR_PTR(err); }
static int mlx5e_tc_add_fdb_peer_flow(struct tc_cls_flower_offload *f, @@@ -2810,13 -2867,11 +2822,13 @@@ in_mdev = priv->mdev;
parse_attr = flow->esw_attr->parse_attr; - err = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags, - parse_attr->filter_dev, - flow->esw_attr->in_rep, in_mdev, &peer_flow); - if (err) + peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow->flags, + parse_attr->filter_dev, + flow->esw_attr->in_rep, in_mdev); + if (IS_ERR(peer_flow)) { + err = PTR_ERR(peer_flow); goto out; + }
flow->peer_flow = peer_flow; flow->flags |= MLX5E_TC_FLOW_DUP; @@@ -2842,10 -2897,10 +2854,10 @@@ mlx5e_add_fdb_flow(struct mlx5e_priv *p struct mlx5e_tc_flow *flow; int err;
- err = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, - in_mdev, &flow); - if (err) - goto out; + flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep, + in_mdev); + if (IS_ERR(flow)) + return PTR_ERR(flow);
if (is_peer_flow_needed(flow)) { err = mlx5e_tc_add_fdb_peer_flow(f, flow); @@@ -2870,7 -2925,6 +2882,7 @@@ mlx5e_add_nic_flow(struct mlx5e_priv *p struct net_device *filter_dev, struct mlx5e_tc_flow **__flow) { + struct flow_rule *rule = tc_cls_flower_offload_flow_rule(f); struct netlink_ext_ack *extack = f->common.extack; struct mlx5e_tc_flow_parse_attr *parse_attr; struct mlx5e_tc_flow *flow; @@@ -2893,7 -2947,7 +2905,7 @@@ if (err) goto err_free;
- err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow, extack); + err = parse_tc_nic_actions(priv, &rule->action, parse_attr, flow, extack); if (err) goto err_free;
@@@ -3051,7 -3105,7 +3063,7 @@@ int mlx5e_stats_flower(struct net_devic mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
out: - tcf_exts_stats_update(f->exts, bytes, packets, lastuse); + flow_stats_update(&f->stats, bytes, packets, lastuse);
return 0; } diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 189211295e48,0e55cd1f2e98..c1334a8ac8f3 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@@ -387,8 -387,14 +387,14 @@@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_ num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi); if (unlikely(contig_wqebbs_room < num_wqebbs)) { + #ifdef CONFIG_MLX5_EN_IPSEC + struct mlx5_wqe_eth_seg cur_eth = wqe->eth; + #endif mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room); mlx5e_sq_fetch_wqe(sq, &wqe, &pi); + #ifdef CONFIG_MLX5_EN_IPSEC + wqe->eth = cur_eth; + #endif }
/* fill wqe */ @@@ -513,9 -519,8 +519,9 @@@ bool mlx5e_poll_tx_cq(struct mlx5e_cq * &sq->state)) { mlx5e_dump_error_cqe(sq, (struct mlx5_err_cqe *)cqe); - queue_work(cq->channel->priv->wq, - &sq->recover.recover_work); + if (!IS_ERR_OR_NULL(cq->channel->priv->tx_reporter)) + queue_work(cq->channel->priv->wq, + &sq->recover_work); } stats->cqe_err++; } diff --combined drivers/net/ethernet/qlogic/qed/qed.h index 3b0955d34716,2d8a77cc156b..2d21c94f19c2 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@@ -53,7 -53,7 +53,7 @@@ extern const struct qed_common_ops qed_common_ops_pass;
#define QED_MAJOR_VERSION 8 - #define QED_MINOR_VERSION 33 + #define QED_MINOR_VERSION 37 #define QED_REVISION_VERSION 0 #define QED_ENGINEERING_VERSION 20
@@@ -554,6 -554,7 +554,6 @@@ struct qed_hwfn u8 dp_level; char name[NAME_SIZE];
- bool first_on_engine; bool hw_init_done;
u8 num_funcs_on_engine; @@@ -804,9 -805,6 +804,9 @@@ struct qed_dev
u32 mcp_nvm_resp;
+ /* Recovery */ + bool recov_in_prog; + /* Linux specific here */ struct qede_dev *edev; struct pci_dev *pdev; @@@ -946,7 -944,6 +946,7 @@@ void qed_link_update(struct qed_hwfn *h u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); +void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn); void qed_get_protocol_stats(struct qed_dev *cdev, enum qed_mcp_protocol_type type, union qed_mcp_protocol_stats *stats); diff --combined drivers/net/ethernet/qlogic/qed/qed_spq.c index 3e0f7c46bb1b,ba64ff9bedbd..79b311b86f66 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@@ -397,6 -397,11 +397,11 @@@ int qed_eq_completion(struct qed_hwfn *
qed_eq_prod_update(p_hwfn, qed_chain_get_prod_idx(p_chain));
+ /* Attempt to post pending requests */ + spin_lock_bh(&p_hwfn->p_spq->lock); + rc = qed_spq_pend_post(p_hwfn); + spin_unlock_bh(&p_hwfn->p_spq->lock); + return rc; }
@@@ -767,7 -772,7 +772,7 @@@ static int qed_spq_post_list(struct qed return 0; }
- static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) + int qed_spq_pend_post(struct qed_hwfn *p_hwfn) { struct qed_spq *p_spq = p_hwfn->p_spq; struct qed_spq_entry *p_ent = NULL; @@@ -790,17 -795,6 +795,17 @@@ SPQ_HIGH_PRI_RESERVE_DEFAULT); }
+static void qed_spq_recov_set_ret_code(struct qed_spq_entry *p_ent, + u8 *fw_return_code) +{ + if (!fw_return_code) + return; + + if (p_ent->elem.hdr.protocol_id == PROTOCOLID_ROCE || + p_ent->elem.hdr.protocol_id == PROTOCOLID_IWARP) + *fw_return_code = RDMA_RETURN_OK; +} + /* Avoid overriding of SPQ entries when getting out-of-order completions, by * marking the completions in a bitmap and increasing the chain consumer only * for the first successive completed entries. @@@ -836,17 -830,6 +841,17 @@@ int qed_spq_post(struct qed_hwfn *p_hwf return -EINVAL; }
+ if (p_hwfn->cdev->recov_in_prog) { + DP_VERBOSE(p_hwfn, + QED_MSG_SPQ, + "Recovery is in progress. Skip spq post [cmd %02x protocol %02x]\n", + p_ent->elem.hdr.cmd_id, p_ent->elem.hdr.protocol_id); + + /* Let the flow complete w/o any error handling */ + qed_spq_recov_set_ret_code(p_ent, fw_return_code); + return 0; + } + /* Complete the entry */ rc = qed_spq_fill_entry(p_hwfn, p_ent);
@@@ -927,7 -910,6 +932,6 @@@ int qed_spq_completion(struct qed_hwfn struct qed_spq_entry *p_ent = NULL; struct qed_spq_entry *tmp; struct qed_spq_entry *found = NULL; - int rc;
if (!p_hwfn) return -EINVAL; @@@ -985,12 -967,7 +989,7 @@@ */ qed_spq_return_entry(p_hwfn, found);
- /* Attempt to post pending requests */ - spin_lock_bh(&p_spq->lock); - rc = qed_spq_pend_post(p_hwfn); - spin_unlock_bh(&p_spq->lock); - - return rc; + return 0; }
int qed_consq_alloc(struct qed_hwfn *p_hwfn) diff --combined drivers/net/ethernet/qlogic/qede/qede.h index 843416404aeb,730997b13747..63a78162cfaf --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@@ -56,7 -56,7 +56,7 @@@ #include <net/tc_act/tc_gact.h>
#define QEDE_MAJOR_VERSION 8 - #define QEDE_MINOR_VERSION 33 + #define QEDE_MINOR_VERSION 37 #define QEDE_REVISION_VERSION 0 #define QEDE_ENGINEERING_VERSION 20 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \ @@@ -162,7 -162,6 +162,7 @@@ struct qede_rdma_dev struct list_head entry; struct list_head rdma_event_list; struct workqueue_struct *rdma_wq; + bool exp_recovery; };
struct qede_ptp; @@@ -265,7 -264,6 +265,7 @@@ struct qede_dev enum QEDE_STATE { QEDE_STATE_CLOSED, QEDE_STATE_OPEN, + QEDE_STATE_RECOVERY, };
#define HILO_U64(hi, lo) ((((u64)(hi)) << 32) + (lo)) @@@ -464,7 -462,6 +464,7 @@@ struct qede_fastpath #define QEDE_CSUM_UNNECESSARY BIT(1) #define QEDE_TUNN_CSUM_UNNECESSARY BIT(2)
+#define QEDE_SP_RECOVERY 0 #define QEDE_SP_RX_MODE 1
#ifdef CONFIG_RFS_ACCEL @@@ -497,6 -494,9 +497,9 @@@ struct qede_reload_args
/* Datapath functions definition */ netdev_tx_t qede_start_xmit(struct sk_buff *skb, struct net_device *ndev); + u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev, + select_queue_fallback_t fallback); netdev_features_t qede_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); diff --combined drivers/net/ethernet/qlogic/qede/qede_main.c index 6b4d96635238,9790f26d17c4..02a97c659e29 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@@ -133,12 -133,23 +133,12 @@@ static int qede_probe(struct pci_dev *p static void qede_remove(struct pci_dev *pdev); static void qede_shutdown(struct pci_dev *pdev); static void qede_link_update(void *dev, struct qed_link_output *link); +static void qede_schedule_recovery_handler(void *dev); +static void qede_recovery_handler(struct qede_dev *edev); static void qede_get_eth_tlv_data(void *edev, void *data); static void qede_get_generic_tlv_data(void *edev, struct qed_generic_tlvs *data);
-/* The qede lock is used to protect driver state change and driver flows that - * are not reentrant. - */ -void __qede_lock(struct qede_dev *edev) -{ - mutex_lock(&edev->qede_lock); -} - -void __qede_unlock(struct qede_dev *edev) -{ - mutex_unlock(&edev->qede_lock); -} - #ifdef CONFIG_QED_SRIOV static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, __be16 vlan_proto) @@@ -220,7 -231,6 +220,7 @@@ static struct qed_eth_cb_ops qede_ll_op .arfs_filter_op = qede_arfs_filter_op, #endif .link_update = qede_link_update, + .schedule_recovery_handler = qede_schedule_recovery_handler, .get_generic_tlv_data = qede_get_generic_tlv_data, .get_protocol_tlv_data = qede_get_eth_tlv_data, }, @@@ -621,6 -631,7 +621,7 @@@ static const struct net_device_ops qede .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@@ -656,6 -667,7 +657,7 @@@ static const struct net_device_ops qede .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@@ -674,6 -686,7 +676,7 @@@ static const struct net_device_ops qede .ndo_open = qede_open, .ndo_stop = qede_close, .ndo_start_xmit = qede_start_xmit, + .ndo_select_queue = qede_select_queue, .ndo_set_rx_mode = qede_set_rx_mode, .ndo_set_mac_address = qede_set_mac_addr, .ndo_validate_addr = eth_validate_addr, @@@ -940,57 -953,11 +943,57 @@@ err return -ENOMEM; }
+/* The qede lock is used to protect driver state change and driver flows that + * are not reentrant. + */ +void __qede_lock(struct qede_dev *edev) +{ + mutex_lock(&edev->qede_lock); +} + +void __qede_unlock(struct qede_dev *edev) +{ + mutex_unlock(&edev->qede_lock); +} + +/* This version of the lock should be used when acquiring the RTNL lock is also + * needed in addition to the internal qede lock. + */ +void qede_lock(struct qede_dev *edev) +{ + rtnl_lock(); + __qede_lock(edev); +} + +void qede_unlock(struct qede_dev *edev) +{ + __qede_unlock(edev); + rtnl_unlock(); +} + static void qede_sp_task(struct work_struct *work) { struct qede_dev *edev = container_of(work, struct qede_dev, sp_task.work);
+ /* The locking scheme depends on the specific flag: + * In case of QEDE_SP_RECOVERY, acquiring the RTNL lock is required to + * ensure that ongoing flows are ended and new ones are not started. + * In other cases - only the internal qede lock should be acquired. + */ + + if (test_and_clear_bit(QEDE_SP_RECOVERY, &edev->sp_flags)) { +#ifdef CONFIG_QED_SRIOV + /* SRIOV must be disabled outside the lock to avoid a deadlock. + * The recovery of the active VFs is currently not supported. + */ + qede_sriov_configure(edev->pdev, 0); +#endif + qede_lock(edev); + qede_recovery_handler(edev); + qede_unlock(edev); + } + __qede_lock(edev);
if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags)) @@@ -1067,7 -1034,6 +1070,7 @@@ static void qede_log_probe(struct qede_
enum qede_probe_mode { QEDE_PROBE_NORMAL, + QEDE_PROBE_RECOVERY, };
static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, @@@ -1088,7 -1054,6 +1091,7 @@@ probe_params.dp_module = dp_module; probe_params.dp_level = dp_level; probe_params.is_vf = is_vf; + probe_params.recov_in_prog = (mode == QEDE_PROBE_RECOVERY); cdev = qed_ops->common->probe(pdev, &probe_params); if (!cdev) { rc = -ENODEV; @@@ -1116,20 -1081,11 +1119,20 @@@ if (rc) goto err2;
- edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module, - dp_level); - if (!edev) { - rc = -ENOMEM; - goto err2; + if (mode != QEDE_PROBE_RECOVERY) { + edev = qede_alloc_etherdev(cdev, pdev, &dev_info, dp_module, + dp_level); + if (!edev) { + rc = -ENOMEM; + goto err2; + } + } else { + struct net_device *ndev = pci_get_drvdata(pdev); + + edev = netdev_priv(ndev); + edev->cdev = cdev; + memset(&edev->stats, 0, sizeof(edev->stats)); + memcpy(&edev->dev_info, &dev_info, sizeof(dev_info)); }
if (is_vf) @@@ -1137,31 -1093,28 +1140,31 @@@
qede_init_ndev(edev);
- rc = qede_rdma_dev_add(edev); + rc = qede_rdma_dev_add(edev, (mode == QEDE_PROBE_RECOVERY)); if (rc) goto err3;
- /* Prepare the lock prior to the registration of the netdev, - * as once it's registered we might reach flows requiring it - * [it's even possible to reach a flow needing it directly - * from there, although it's unlikely]. - */ - INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); - mutex_init(&edev->qede_lock); - rc = register_netdev(edev->ndev); - if (rc) { - DP_NOTICE(edev, "Cannot register net-device\n"); - goto err4; + if (mode != QEDE_PROBE_RECOVERY) { + /* Prepare the lock prior to the registration of the netdev, + * as once it's registered we might reach flows requiring it + * [it's even possible to reach a flow needing it directly + * from there, although it's unlikely]. + */ + INIT_DELAYED_WORK(&edev->sp_task, qede_sp_task); + mutex_init(&edev->qede_lock); + + rc = register_netdev(edev->ndev); + if (rc) { + DP_NOTICE(edev, "Cannot register net-device\n"); + goto err4; + } }
edev->ops->common->set_name(cdev, edev->ndev->name);
/* PTP not supported on VFs */ if (!is_vf) - qede_ptp_enable(edev, true); + qede_ptp_enable(edev, (mode == QEDE_PROBE_NORMAL));
edev->ops->register_ops(cdev, &qede_ll_ops, edev);
@@@ -1176,7 -1129,7 +1179,7 @@@ return 0;
err4: - qede_rdma_dev_remove(edev); + qede_rdma_dev_remove(edev, (mode == QEDE_PROBE_RECOVERY)); err3: free_netdev(edev->ndev); err2: @@@ -1212,7 -1165,6 +1215,7 @@@ static int qede_probe(struct pci_dev *p
enum qede_remove_mode { QEDE_REMOVE_NORMAL, + QEDE_REMOVE_RECOVERY, };
static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) @@@ -1223,19 -1175,15 +1226,19 @@@
DP_INFO(edev, "Starting qede_remove\n");
- qede_rdma_dev_remove(edev); - unregister_netdev(ndev); - cancel_delayed_work_sync(&edev->sp_task); + qede_rdma_dev_remove(edev, (mode == QEDE_REMOVE_RECOVERY));
- qede_ptp_disable(edev); + if (mode != QEDE_REMOVE_RECOVERY) { + unregister_netdev(ndev);
- edev->ops->common->set_power_state(cdev, PCI_D0); + cancel_delayed_work_sync(&edev->sp_task);
- pci_set_drvdata(pdev, NULL); + edev->ops->common->set_power_state(cdev, PCI_D0); + + pci_set_drvdata(pdev, NULL); + } + + qede_ptp_disable(edev);
/* Use global ops since we've freed edev */ qed_ops->common->slowpath_stop(cdev); @@@ -1249,8 -1197,7 +1252,8 @@@ * [e.g., QED register callbacks] won't break anything when * accessing the netdevice. */ - free_netdev(ndev); + if (mode != QEDE_REMOVE_RECOVERY) + free_netdev(ndev);
dev_info(&pdev->dev, "Ending qede_remove successfully\n"); } @@@ -1595,58 -1542,6 +1598,58 @@@ static int qede_alloc_mem_load(struct q return 0; }
+static void qede_empty_tx_queue(struct qede_dev *edev, + struct qede_tx_queue *txq) +{ + unsigned int pkts_compl = 0, bytes_compl = 0; + struct netdev_queue *netdev_txq; + int rc, len = 0; + + netdev_txq = netdev_get_tx_queue(edev->ndev, txq->ndev_txq_id); + + while (qed_chain_get_cons_idx(&txq->tx_pbl) != + qed_chain_get_prod_idx(&txq->tx_pbl)) { + DP_VERBOSE(edev, NETIF_MSG_IFDOWN, + "Freeing a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n", + txq->index, qed_chain_get_cons_idx(&txq->tx_pbl), + qed_chain_get_prod_idx(&txq->tx_pbl)); + + rc = qede_free_tx_pkt(edev, txq, &len); + if (rc) { + DP_NOTICE(edev, + "Failed to free a packet on tx queue[%d]: chain_cons 0x%x, chain_prod 0x%x\n", + txq->index, + qed_chain_get_cons_idx(&txq->tx_pbl), + qed_chain_get_prod_idx(&txq->tx_pbl)); + break; + } + + bytes_compl += len; + pkts_compl++; + txq->sw_tx_cons++; + } + + netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl); +} + +static void qede_empty_tx_queues(struct qede_dev *edev) +{ + int i; + + for_each_queue(i) + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + int cos; + + for_each_cos_in_txq(edev, cos) { + struct qede_fastpath *fp; + + fp = &edev->fp_array[i]; + qede_empty_tx_queue(edev, + &fp->txq[cos]); + } + } +} + /* This function inits fp content and resets the SB, RXQ and TXQ structures */ static void qede_init_fp(struct qede_dev *edev) { @@@ -2161,7 -2056,6 +2164,7 @@@ out
enum qede_unload_mode { QEDE_UNLOAD_NORMAL, + QEDE_UNLOAD_RECOVERY, };
static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode, @@@ -2177,8 -2071,7 +2180,8 @@@
clear_bit(QEDE_FLAGS_LINK_REQUESTED, &edev->flags);
- edev->state = QEDE_STATE_CLOSED; + if (mode != QEDE_UNLOAD_RECOVERY) + edev->state = QEDE_STATE_CLOSED;
qede_rdma_dev_event_close(edev);
@@@ -2186,20 -2079,17 +2189,20 @@@ netif_tx_disable(edev->ndev); netif_carrier_off(edev->ndev);
- /* Reset the link */ - memset(&link_params, 0, sizeof(link_params)); - link_params.link_up = false; - edev->ops->common->set_link(edev->cdev, &link_params); - rc = qede_stop_queues(edev); - if (rc) { - qede_sync_free_irqs(edev); - goto out; - } + if (mode != QEDE_UNLOAD_RECOVERY) { + /* Reset the link */ + memset(&link_params, 0, sizeof(link_params)); + link_params.link_up = false; + edev->ops->common->set_link(edev->cdev, &link_params);
- DP_INFO(edev, "Stopped Queues\n"); + rc = qede_stop_queues(edev); + if (rc) { + qede_sync_free_irqs(edev); + goto out; + } + + DP_INFO(edev, "Stopped Queues\n"); + }
qede_vlan_mark_nonconfigured(edev); edev->ops->fastpath_stop(edev->cdev); @@@ -2215,26 -2105,18 +2218,26 @@@
qede_napi_disable_remove(edev);
+ if (mode == QEDE_UNLOAD_RECOVERY) + qede_empty_tx_queues(edev); + qede_free_mem_load(edev); qede_free_fp_array(edev);
out: if (!is_locked) __qede_unlock(edev); + + if (mode != QEDE_UNLOAD_RECOVERY) + DP_NOTICE(edev, "Link is down\n"); + DP_INFO(edev, "Ending qede unload\n"); }
enum qede_load_mode { QEDE_LOAD_NORMAL, QEDE_LOAD_RELOAD, + QEDE_LOAD_RECOVERY, };
static int qede_load(struct qede_dev *edev, enum qede_load_mode mode, @@@ -2414,77 -2296,6 +2417,77 @@@ static void qede_link_update(void *dev } }
+static void qede_schedule_recovery_handler(void *dev) +{ + struct qede_dev *edev = dev; + + if (edev->state == QEDE_STATE_RECOVERY) { + DP_NOTICE(edev, + "Avoid scheduling a recovery handling since already in recovery state\n"); + return; + } + + set_bit(QEDE_SP_RECOVERY, &edev->sp_flags); + schedule_delayed_work(&edev->sp_task, 0); + + DP_INFO(edev, "Scheduled a recovery handler\n"); +} + +static void qede_recovery_failed(struct qede_dev *edev) +{ + netdev_err(edev->ndev, "Recovery handling has failed. Power cycle is needed.\n"); + + netif_device_detach(edev->ndev); + + if (edev->cdev) + edev->ops->common->set_power_state(edev->cdev, PCI_D3hot); +} + +static void qede_recovery_handler(struct qede_dev *edev) +{ + u32 curr_state = edev->state; + int rc; + + DP_NOTICE(edev, "Starting a recovery process\n"); + + /* No need to acquire first the qede_lock since is done by qede_sp_task + * before calling this function. + */ + edev->state = QEDE_STATE_RECOVERY; + + edev->ops->common->recovery_prolog(edev->cdev); + + if (curr_state == QEDE_STATE_OPEN) + qede_unload(edev, QEDE_UNLOAD_RECOVERY, true); + + __qede_remove(edev->pdev, QEDE_REMOVE_RECOVERY); + + rc = __qede_probe(edev->pdev, edev->dp_module, edev->dp_level, + IS_VF(edev), QEDE_PROBE_RECOVERY); + if (rc) { + edev->cdev = NULL; + goto err; + } + + if (curr_state == QEDE_STATE_OPEN) { + rc = qede_load(edev, QEDE_LOAD_RECOVERY, true); + if (rc) + goto err; + + qede_config_rx_mode(edev->ndev); + udp_tunnel_get_rx_info(edev->ndev); + } + + edev->state = curr_state; + + DP_NOTICE(edev, "Recovery handling is done\n"); + + return; + +err: + qede_recovery_failed(edev); +} + static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq) { struct netdev_queue *netdev_txq; diff --combined drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index effff171d250,685d20472358..f12dd59c85cf --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@@ -534,7 -534,7 +534,7 @@@ static void stmmac_get_rx_hwtstamp(stru }
/** - * stmmac_hwtstamp_ioctl - control hardware timestamping. + * stmmac_hwtstamp_set - control hardware timestamping. * @dev: device pointer. * @ifr: An IOCTL specific structure, that can contain a pointer to * a proprietary structure used to pass information to the driver. @@@ -544,7 -544,7 +544,7 @@@ * Return Value: * 0 on success and an appropriate -ve integer on failure. */ -static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr) +static int stmmac_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) { struct stmmac_priv *priv = netdev_priv(dev); struct hwtstamp_config config; @@@ -573,7 -573,7 +573,7 @@@ }
if (copy_from_user(&config, ifr->ifr_data, - sizeof(struct hwtstamp_config))) + sizeof(config))) return -EFAULT;
netdev_dbg(priv->dev, "%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n", @@@ -597,13 -597,12 +597,13 @@@ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: /* PTP v1, UDP, any kind of event packet */ config.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; - /* take time stamp for all event messages */ - if (xmac) - snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; - else - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; - + /* 'xmac' hardware can support Sync, Pdelay_Req and + * Pdelay_resp by setting bit14 and bits17/16 to 01 + * This leaves Delay_Req timestamps out. + * Enable all events *and* general purpose message + * timestamping + */ + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; break; @@@ -634,7 -633,10 +634,7 @@@ config.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; /* take time stamp for all event messages */ - if (xmac) - snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; - else - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; + snap_type_sel = PTP_TCR_SNAPTYPSEL_1;
ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; @@@ -667,7 -669,12 +667,7 @@@ /* PTP v2/802.AS1 any layer, any kind of event packet */ config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; ptp_v2 = PTP_TCR_TSVER2ENA; - /* take time stamp for all event messages */ - if (xmac) - snap_type_sel = PTP_GMAC4_TCR_SNAPTYPSEL_1; - else - snap_type_sel = PTP_TCR_SNAPTYPSEL_1; - + snap_type_sel = PTP_TCR_SNAPTYPSEL_1; ptp_over_ipv4_udp = PTP_TCR_TSIPV4ENA; ptp_over_ipv6_udp = PTP_TCR_TSIPV6ENA; ptp_over_ethernet = PTP_TCR_TSIPENA; @@@ -758,31 -765,8 +758,31 @@@ (u32)now.tv_sec, now.tv_nsec); }
+ memcpy(&priv->tstamp_config, &config, sizeof(config)); + return copy_to_user(ifr->ifr_data, &config, - sizeof(struct hwtstamp_config)) ? -EFAULT : 0; + sizeof(config)) ? -EFAULT : 0; +} + +/** + * stmmac_hwtstamp_get - read hardware timestamping. + * @dev: device pointer. + * @ifr: An IOCTL specific structure, that can contain a pointer to + * a proprietary structure used to pass information to the driver. + * Description: + * This function obtain the current hardware timestamping settings + as requested. + */ +static int stmmac_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +{ + struct stmmac_priv *priv = netdev_priv(dev); + struct hwtstamp_config *config = &priv->tstamp_config; + + if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) + return -EOPNOTSUPP; + + return copy_to_user(ifr->ifr_data, config, + sizeof(*config)) ? -EFAULT : 0; }
/** @@@ -3039,10 -3023,22 +3039,22 @@@ static netdev_tx_t stmmac_xmit(struct s
tx_q = &priv->tx_queue[queue];
+ if (priv->tx_path_in_lpi_mode) + stmmac_disable_eee_mode(priv); + /* Manage oversized TCP frames for GMAC4 device */ if (skb_is_gso(skb) && priv->tso) { - if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) + if (skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { + /* + * There is no way to determine the number of TSO + * capable Queues. Let's use always the Queue 0 + * because if TSO is supported then at least this + * one will be capable. + */ + skb_set_queue_mapping(skb, 0); + return stmmac_tso_xmit(skb, dev); + } }
if (unlikely(stmmac_tx_avail(priv, queue) < nfrags + 1)) { @@@ -3057,9 -3053,6 +3069,6 @@@ return NETDEV_TX_BUSY; }
- if (priv->tx_path_in_lpi_mode) - stmmac_disable_eee_mode(priv); - entry = tx_q->cur_tx; first_entry = entry; WARN_ON(tx_q->tx_skbuff[first_entry]); @@@ -3783,10 -3776,7 +3792,10 @@@ static int stmmac_ioctl(struct net_devi ret = phy_mii_ioctl(dev->phydev, rq, cmd); break; case SIOCSHWTSTAMP: - ret = stmmac_hwtstamp_ioctl(dev, rq); + ret = stmmac_hwtstamp_set(dev, rq); + break; + case SIOCGHWTSTAMP: + ret = stmmac_hwtstamp_get(dev, rq); break; default: break; diff --combined drivers/net/ipvlan/ipvlan_l3s.c index 9a2f24078a54,000000000000..d17480a911a3 mode 100644,000000..100644 --- a/drivers/net/ipvlan/ipvlan_l3s.c +++ b/drivers/net/ipvlan/ipvlan_l3s.c @@@ -1,227 -1,0 +1,227 @@@ +/* Copyright (c) 2014 Mahesh Bandewar maheshb@google.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + */ + +#include "ipvlan.h" + +static unsigned int ipvlan_netid __read_mostly; + +struct ipvlan_netns { + unsigned int ipvl_nf_hook_refcnt; +}; + +static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipvl_addr *addr = NULL; + struct ipvl_port *port; + int addr_type; + void *lyr3h; + + if (!dev || !netif_is_ipvlan_port(dev)) + goto out; + + port = ipvlan_port_get_rcu(dev); + if (!port || port->mode != IPVLAN_MODE_L3S) + goto out; + + lyr3h = ipvlan_get_L3_hdr(port, skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); +out: + return addr; +} + +static struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, + struct sk_buff *skb, u16 proto) +{ + struct ipvl_addr *addr; + struct net_device *sdev; + + addr = ipvlan_skb_to_addr(skb, dev); + if (!addr) + goto out; + + sdev = addr->master->dev; + switch (proto) { + case AF_INET: + { + struct iphdr *ip4h = ip_hdr(skb); + int err; + + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, + ip4h->tos, sdev); + if (unlikely(err)) + goto out; + break; + } +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + { + struct dst_entry *dst; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi6 fl6 = { + .flowi6_iif = sdev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + skb_dst_drop(skb); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, + skb, flags); + skb_dst_set(skb, dst); + break; + } +#endif + default: + break; + } +out: + return skb; +} + +static const struct l3mdev_ops ipvl_l3mdev_ops = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + +static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct ipvl_addr *addr; + unsigned int len; + + addr = ipvlan_skb_to_addr(skb, skb->dev); + if (!addr) + goto out; + + skb->dev = addr->master->dev; + len = skb->len + ETH_HLEN; + ipvlan_count_rx(addr->master, len, true, false); +out: + return NF_ACCEPT; +} + +static const struct nf_hook_ops ipvl_nfops[] = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +#if IS_ENABLED(CONFIG_IPV6) + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +#endif +}; + +static int ipvlan_register_nf_hook(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + int err = 0; + + if (!vnet->ipvl_nf_hook_refcnt) { + err = nf_register_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); + if (!err) + vnet->ipvl_nf_hook_refcnt = 1; + } else { + vnet->ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + + if (WARN_ON(!vnet->ipvl_nf_hook_refcnt)) + return; + + vnet->ipvl_nf_hook_refcnt--; + if (!vnet->ipvl_nf_hook_refcnt) + nf_unregister_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); +} + +void ipvlan_migrate_l3s_hook(struct net *oldnet, struct net *newnet) +{ + struct ipvlan_netns *old_vnet; + + ASSERT_RTNL(); + + old_vnet = net_generic(oldnet, ipvlan_netid); + if (!old_vnet->ipvl_nf_hook_refcnt) + return; + + ipvlan_register_nf_hook(newnet); + ipvlan_unregister_nf_hook(oldnet); +} + +static void ipvlan_ns_exit(struct net *net) +{ + struct ipvlan_netns *vnet = net_generic(net, ipvlan_netid); + + if (WARN_ON_ONCE(vnet->ipvl_nf_hook_refcnt)) { + vnet->ipvl_nf_hook_refcnt = 0; + nf_unregister_net_hooks(net, ipvl_nfops, + ARRAY_SIZE(ipvl_nfops)); + } +} + +static struct pernet_operations ipvlan_net_ops = { + .id = &ipvlan_netid, + .size = sizeof(struct ipvlan_netns), + .exit = ipvlan_ns_exit, +}; + +int ipvlan_l3s_init(void) +{ + return register_pernet_subsys(&ipvlan_net_ops); +} + +void ipvlan_l3s_cleanup(void) +{ + unregister_pernet_subsys(&ipvlan_net_ops); +} + +int ipvlan_l3s_register(struct ipvl_port *port) +{ + struct net_device *dev = port->dev; + int ret; + + ASSERT_RTNL(); + + ret = ipvlan_register_nf_hook(read_pnet(&port->pnet)); + if (!ret) { + dev->l3mdev_ops = &ipvl_l3mdev_ops; - dev->priv_flags |= IFF_L3MDEV_MASTER; ++ dev->priv_flags |= IFF_L3MDEV_RX_HANDLER; + } + + return ret; +} + +void ipvlan_l3s_unregister(struct ipvl_port *port) +{ + struct net_device *dev = port->dev; + + ASSERT_RTNL(); + - dev->priv_flags &= ~IFF_L3MDEV_MASTER; ++ dev->priv_flags &= ~IFF_L3MDEV_RX_HANDLER; + ipvlan_unregister_nf_hook(read_pnet(&port->pnet)); + dev->l3mdev_ops = NULL; +} diff --combined drivers/net/phy/dp83640.c index 25ef483bcc24,6e8807212aa3..2fe2ebaf62d1 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@@ -1,8 -1,21 +1,8 @@@ +// SPDX-License-Identifier: GPL-2.0+ /* * Driver for the National Semiconductor DP83640 PHYTER * * Copyright (C) 2010 OMICRON electronics GmbH - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@@ -885,14 -898,14 +885,14 @@@ static void decode_txts(struct dp83640_ struct phy_txts *phy_txts) { struct skb_shared_hwtstamps shhwtstamps; + struct dp83640_skb_info *skb_info; struct sk_buff *skb; - u64 ns; u8 overflow; + u64 ns;
/* We must already have the skb that triggered this. */ - + again: skb = skb_dequeue(&dp83640->tx_queue); - if (!skb) { pr_debug("have timestamp but tx_queue empty\n"); return; @@@ -907,6 -920,11 +907,11 @@@ } return; } + skb_info = (struct dp83640_skb_info *)skb->cb; + if (time_after(jiffies, skb_info->tmo)) { + kfree_skb(skb); + goto again; + }
ns = phy2txts(phy_txts); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); @@@ -1459,6 -1477,7 +1464,7 @@@ static bool dp83640_rxtstamp(struct phy static void dp83640_txtstamp(struct phy_device *phydev, struct sk_buff *skb, int type) { + struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb; struct dp83640_private *dp83640 = phydev->priv;
switch (dp83640->hwts_tx_en) { @@@ -1471,6 -1490,7 +1477,7 @@@ /* fall through */ case HWTSTAMP_TX_ON: skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + skb_info->tmo = jiffies + SKB_TIMESTAMP_TIMEOUT; skb_queue_tail(&dp83640->tx_queue, skb); break;
diff --combined drivers/net/phy/marvell.c index 90f44ba8aca7,abb7876a8776..3ccba37bd6dd --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@@ -1,4 -1,3 +1,4 @@@ +// SPDX-License-Identifier: GPL-2.0+ /* * drivers/net/phy/marvell.c * @@@ -9,6 -8,12 +9,6 @@@ * Copyright (c) 2004 Freescale Semiconductor, Inc. * * Copyright (c) 2013 Michael Stapelberg michael@stapelberg.de - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * */ #include <linux/kernel.h> #include <linux/string.h> @@@ -842,7 -847,6 +842,6 @@@ static int m88e1510_config_init(struct
/* SGMII-to-Copper mode initialization */ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) { - /* Select page 18 */ err = marvell_set_page(phydev, 18); if (err < 0) @@@ -865,21 -869,6 +864,6 @@@ err = marvell_set_page(phydev, MII_MARVELL_COPPER_PAGE); if (err < 0) return err; - - /* There appears to be a bug in the 88e1512 when used in - * SGMII to copper mode, where the AN advertisement register - * clears the pause bits each time a negotiation occurs. - * This means we can never be truely sure what was advertised, - * so disable Pause support. - */ - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->supported); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, - phydev->advertising); - linkmode_clear_bit(ETHTOOL_LINK_MODE_Pause_BIT, - phydev->advertising); }
return m88e1318_config_init(phydev); diff --combined drivers/net/virtio_net.c index 2a0edd4653e3,4cfceb789eea..7eb38ea9ba56 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@@ -57,6 -57,8 +57,8 @@@ module_param(napi_tx, bool, 0644) #define VIRTIO_XDP_TX BIT(0) #define VIRTIO_XDP_REDIR BIT(1)
+ #define VIRTIO_XDP_FLAG BIT(0) + /* RX packet size EWMA. The average packet size is used to determine the packet * buffer size when refilling RX rings. As the entire RX ring may be refilled * at once, the weight is chosen so that the EWMA will be insensitive to short- @@@ -252,6 -254,21 +254,21 @@@ struct padded_vnet_hdr char padding[4]; };
+ static bool is_xdp_frame(void *ptr) + { + return (unsigned long)ptr & VIRTIO_XDP_FLAG; + } + + static void *xdp_to_ptr(struct xdp_frame *ptr) + { + return (void *)((unsigned long)ptr | VIRTIO_XDP_FLAG); + } + + static struct xdp_frame *ptr_to_xdp(void *ptr) + { + return (struct xdp_frame *)((unsigned long)ptr & ~VIRTIO_XDP_FLAG); + } + /* Converting between virtqueue no. and kernel tx/rx queue no. * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq */ @@@ -462,7 -479,8 +479,8 @@@ static int __virtnet_xdp_xmit_one(struc
sg_init_one(sq->sg, xdpf->data, xdpf->len);
- err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdpf, GFP_ATOMIC); + err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp_to_ptr(xdpf), + GFP_ATOMIC); if (unlikely(err)) return -ENOSPC; /* Caller handle free/refcnt */
@@@ -482,36 -500,47 +500,47 @@@ static int virtnet_xdp_xmit(struct net_ { struct virtnet_info *vi = netdev_priv(dev); struct receive_queue *rq = vi->rq; - struct xdp_frame *xdpf_sent; struct bpf_prog *xdp_prog; struct send_queue *sq; unsigned int len; + int packets = 0; + int bytes = 0; int drops = 0; int kicks = 0; int ret, err; + void *ptr; int i;
- sq = virtnet_xdp_sq(vi); - - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { - ret = -EINVAL; - drops = n; - goto out; - } - /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this * indicate XDP resources have been successfully allocated. */ xdp_prog = rcu_dereference(rq->xdp_prog); - if (!xdp_prog) { - ret = -ENXIO; + if (!xdp_prog) + return -ENXIO; + + sq = virtnet_xdp_sq(vi); + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) { + ret = -EINVAL; drops = n; goto out; }
/* Free up any pending old buffers before queueing new ones. */ - while ((xdpf_sent = virtqueue_get_buf(sq->vq, &len)) != NULL) - xdp_return_frame(xdpf_sent); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(is_xdp_frame(ptr))) { + struct xdp_frame *frame = ptr_to_xdp(ptr); + + bytes += frame->len; + xdp_return_frame(frame); + } else { + struct sk_buff *skb = ptr; + + bytes += skb->len; + napi_consume_skb(skb, false); + } + packets++; + }
for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; @@@ -530,6 -559,8 +559,8 @@@ } out: u64_stats_update_begin(&sq->stats.syncp); + sq->stats.bytes += bytes; + sq->stats.packets += packets; sq->stats.xdp_tx += n; sq->stats.xdp_tx_drops += drops; sq->stats.kicks += kicks; @@@ -1035,7 -1066,6 +1066,7 @@@ static void receive_buf(struct virtnet_ goto frame_err; }
+ skb_record_rx_queue(skb, vq2rxq(rq->vq)); skb->protocol = eth_type_trans(skb, dev); pr_debug("Receiving skb proto 0x%04x len %i type %i\n", ntohs(skb->protocol), skb->len, skb->pkt_type); @@@ -1333,18 -1363,26 +1364,26 @@@ static int virtnet_receive(struct recei
static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi) { - struct sk_buff *skb; unsigned int len; unsigned int packets = 0; unsigned int bytes = 0; + void *ptr;
- while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) { - pr_debug("Sent skb %p\n", skb); + while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) { + if (likely(!is_xdp_frame(ptr))) { + struct sk_buff *skb = ptr;
- bytes += skb->len; - packets++; + pr_debug("Sent skb %p\n", skb); + + bytes += skb->len; + napi_consume_skb(skb, in_napi); + } else { + struct xdp_frame *frame = ptr_to_xdp(ptr);
- napi_consume_skb(skb, in_napi); + bytes += frame->len; + xdp_return_frame(frame); + } + packets++; }
/* Avoid overhead when no packets have been processed @@@ -1359,6 -1397,16 +1398,16 @@@ u64_stats_update_end(&sq->stats.syncp); }
+ static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) + { + if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) + return false; + else if (q < vi->curr_queue_pairs) + return true; + else + return false; + } + static void virtnet_poll_cleantx(struct receive_queue *rq) { struct virtnet_info *vi = rq->vq->vdev->priv; @@@ -1366,7 -1414,7 +1415,7 @@@ struct send_queue *sq = &vi->sq[index]; struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, index);
- if (!sq->napi.weight) + if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index)) return;
if (__netif_tx_trylock(txq)) { @@@ -1443,8 -1491,16 +1492,16 @@@ static int virtnet_poll_tx(struct napi_ { struct send_queue *sq = container_of(napi, struct send_queue, napi); struct virtnet_info *vi = sq->vq->vdev->priv; - struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq)); + unsigned int index = vq2txq(sq->vq); + struct netdev_queue *txq;
+ if (unlikely(is_xdp_raw_buffer_queue(vi, index))) { + /* We don't need to enable cb for XDP */ + napi_complete_done(napi, 0); + return 0; + } + + txq = netdev_get_tx_queue(vi->dev, index); __netif_tx_lock(txq, raw_smp_processor_id()); free_old_xmit_skbs(sq, true); __netif_tx_unlock(txq); @@@ -2396,6 -2452,10 +2453,10 @@@ static int virtnet_xdp_set(struct net_d return -ENOMEM; }
+ old_prog = rtnl_dereference(vi->rq[0].xdp_prog); + if (!prog && !old_prog) + return 0; + if (prog) { prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); if (IS_ERR(prog)) @@@ -2403,36 -2463,62 +2464,62 @@@ }
/* Make sure NAPI is not using any XDP TX queues for RX. */ - if (netif_running(dev)) - for (i = 0; i < vi->max_queue_pairs; i++) + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { napi_disable(&vi->rq[i].napi); + virtnet_napi_tx_disable(&vi->sq[i].napi); + } + } + + if (!prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0) + virtnet_restore_guest_offloads(vi); + } + synchronize_net(); + }
- netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); err = _virtnet_set_queues(vi, curr_qp + xdp_qp); if (err) goto err; + netif_set_real_num_rx_queues(dev, curr_qp + xdp_qp); vi->xdp_queue_pairs = xdp_qp;
- for (i = 0; i < vi->max_queue_pairs; i++) { - old_prog = rtnl_dereference(vi->rq[i].xdp_prog); - rcu_assign_pointer(vi->rq[i].xdp_prog, prog); - if (i == 0) { - if (!old_prog) + if (prog) { + for (i = 0; i < vi->max_queue_pairs; i++) { + rcu_assign_pointer(vi->rq[i].xdp_prog, prog); + if (i == 0 && !old_prog) virtnet_clear_guest_offloads(vi); - if (!prog) - virtnet_restore_guest_offloads(vi); } + } + + for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) + if (netif_running(dev)) { virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } }
return 0;
err: - for (i = 0; i < vi->max_queue_pairs; i++) - virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + if (!prog) { + virtnet_clear_guest_offloads(vi); + for (i = 0; i < vi->max_queue_pairs; i++) + rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); + } + + if (netif_running(dev)) { + for (i = 0; i < vi->max_queue_pairs; i++) { + virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi); + virtnet_napi_tx_enable(vi, vi->sq[i].vq, + &vi->sq[i].napi); + } + } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1); return err; @@@ -2614,16 -2700,6 +2701,6 @@@ static void free_receive_page_frags(str put_page(vi->rq[i].alloc_frag.page); }
- static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q) - { - if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs)) - return false; - else if (q < vi->curr_queue_pairs) - return true; - else - return false; - } - static void free_unused_bufs(struct virtnet_info *vi) { void *buf; @@@ -2632,10 -2708,10 +2709,10 @@@ for (i = 0; i < vi->max_queue_pairs; i++) { struct virtqueue *vq = vi->sq[i].vq; while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { - if (!is_xdp_raw_buffer_queue(vi, i)) + if (!is_xdp_frame(buf)) dev_kfree_skb(buf); else - put_page(virt_to_head_page(buf)); + xdp_return_frame(ptr_to_xdp(buf)); } }
diff --combined drivers/net/wan/dscc4.c index d5dc823f781e,27decf8ae840..fa78d2b14136 --- a/drivers/net/wan/dscc4.c +++ b/drivers/net/wan/dscc4.c @@@ -456,16 -456,16 +456,16 @@@ static int state_check(u32 state, struc int ret = 0;
if (debug > 1) { - if (SOURCE_ID(state) != dpriv->dev_id) { - printk(KERN_DEBUG "%s (%s): Source Id=%d, state=%08x\n", - dev->name, msg, SOURCE_ID(state), state ); + if (SOURCE_ID(state) != dpriv->dev_id) { + printk(KERN_DEBUG "%s (%s): Source Id=%d, state=%08x\n", + dev->name, msg, SOURCE_ID(state), state); ret = -1; - } - if (state & 0x0df80c00) { - printk(KERN_DEBUG "%s (%s): state=%08x (UFO alert)\n", - dev->name, msg, state); + } + if (state & 0x0df80c00) { + printk(KERN_DEBUG "%s (%s): state=%08x (UFO alert)\n", + dev->name, msg, state); ret = -1; - } + } } return ret; } @@@ -1575,7 -1575,7 +1575,7 @@@ try dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; } - dev_kfree_skb_irq(skb); + dev_consume_skb_irq(skb); dpriv->tx_skbuff[cur] = NULL; ++dpriv->tx_dirty; } else { @@@ -1760,25 -1760,25 +1760,25 @@@ try } else { /* SccEvt */ if (debug > 1) { //FIXME: verifier la presence de tous les evenements - static struct { - u32 mask; - const char *irq_name; - } evts[] = { - { 0x00008000, "TIN"}, - { 0x00000020, "RSC"}, - { 0x00000010, "PCE"}, - { 0x00000008, "PLLA"}, - { 0, NULL} - }, *evt; - - for (evt = evts; evt->irq_name; evt++) { - if (state & evt->mask) { + static struct { + u32 mask; + const char *irq_name; + } evts[] = { + { 0x00008000, "TIN"}, + { 0x00000020, "RSC"}, + { 0x00000010, "PCE"}, + { 0x00000008, "PLLA"}, + { 0, NULL} + }, *evt; + + for (evt = evts; evt->irq_name; evt++) { + if (state & evt->mask) { printk(KERN_DEBUG "%s: %s\n", - dev->name, evt->irq_name); - if (!(state &= ~evt->mask)) - goto try; + dev->name, evt->irq_name); + if (!(state &= ~evt->mask)) + goto try; + } } - } } else { if (!(state &= ~0x0000c03c)) goto try; diff --combined drivers/net/wireless/mediatek/mt76/mt76x0/phy.c index 5a4c6f34267e,b6166703ad76..1117cdc15b04 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/phy.c @@@ -845,17 -845,17 +845,17 @@@ static void mt76x0_phy_tssi_calibrate(s void mt76x0_phy_set_txpower(struct mt76x02_dev *dev) { struct mt76_rate_power *t = &dev->mt76.rate_power; - u8 info[2]; + s8 info;
mt76x0_get_tx_power_per_rate(dev); - mt76x0_get_power_info(dev, info); + mt76x0_get_power_info(dev, &info);
- mt76x02_add_rate_power_offset(t, info[0]); + mt76x02_add_rate_power_offset(t, info); mt76x02_limit_rate_power(t, dev->mt76.txpower_conf); dev->mt76.txpower_cur = mt76x02_get_max_rate_power(t); - mt76x02_add_rate_power_offset(t, -info[0]); + mt76x02_add_rate_power_offset(t, -info);
- mt76x02_phy_set_txpower(dev, info[0], info[1]); + mt76x02_phy_set_txpower(dev, info, info); }
void mt76x0_phy_calibrate(struct mt76x02_dev *dev, bool power_on) @@@ -1013,8 -1013,6 +1013,8 @@@ int mt76x0_phy_set_channel(struct mt76x mt76x0_phy_calibrate(dev, false); mt76x0_phy_set_txpower(dev);
+ mt76x02_edcca_init(dev); + ieee80211_queue_delayed_work(dev->mt76.hw, &dev->cal_work, MT_CALIBRATE_INTERVAL);
@@@ -1077,9 -1075,7 +1077,9 @@@ mt76x0_phy_update_channel_gain(struct m u8 gain_delta; int low_gain;
- dev->cal.avg_rssi_all = mt76x02_phy_get_min_avg_rssi(dev); + dev->cal.avg_rssi_all = mt76_get_min_avg_rssi(&dev->mt76); + if (!dev->cal.avg_rssi_all) + dev->cal.avg_rssi_all = -75;
low_gain = (dev->cal.avg_rssi_all > mt76x02_get_rssi_gain_thresh(dev)) + (dev->cal.avg_rssi_all > mt76x02_get_low_rssi_gain_thresh(dev)); diff --combined drivers/s390/net/qeth_core.h index d65650ef6b41,122059ecad84..71d27a804920 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@@ -22,6 -22,7 +22,7 @@@ #include <linux/hashtable.h> #include <linux/ip.h> #include <linux/refcount.h> + #include <linux/workqueue.h>
#include <net/ipv6.h> #include <net/if_inet6.h> @@@ -741,6 -742,11 +742,6 @@@ struct qeth_discipline struct qeth_ipa_cmd *cmd); };
-struct qeth_vlan_vid { - struct list_head list; - unsigned short vid; -}; - enum qeth_addr_disposition { QETH_DISP_ADDR_DELETE = 0, QETH_DISP_ADDR_DO_NOTHING = 1, @@@ -784,9 -790,12 +785,10 @@@ struct qeth_card struct qeth_seqno seqno; struct qeth_card_options options;
+ struct workqueue_struct *event_wq; wait_queue_head_t wait_q; spinlock_t mclock; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; - struct mutex vid_list_mutex; /* vid_list */ - struct list_head vid_list; DECLARE_HASHTABLE(mac_htable, 4); DECLARE_HASHTABLE(ip_htable, 4); DECLARE_HASHTABLE(ip_mc_htable, 4); @@@ -795,6 -804,7 +797,6 @@@ unsigned long thread_start_mask; unsigned long thread_allowed_mask; unsigned long thread_running_mask; - struct task_struct *recovery_task; spinlock_t ip_lock; struct qeth_ipato ipato; struct list_head cmd_waiter_list; @@@ -954,7 -964,6 +956,6 @@@ extern const struct attribute_group *qe extern const struct attribute_group qeth_device_attr_group; extern const struct attribute_group qeth_device_blkt_group; extern const struct device_type qeth_generic_devtype; - extern struct workqueue_struct *qeth_wq;
int qeth_card_hw_is_reachable(struct qeth_card *); const char *qeth_get_cardname_short(struct qeth_card *); @@@ -968,8 -977,11 +969,8 @@@ extern struct qeth_dbf_info qeth_dbf[QE
struct net_device *qeth_clone_netdev(struct net_device *orig); struct qeth_card *qeth_get_card_by_busid(char *bus_id); -void qeth_set_recovery_task(struct qeth_card *); -void qeth_clear_recovery_task(struct qeth_card *); void qeth_set_allowed_threads(struct qeth_card *, unsigned long , int); int qeth_threads_running(struct qeth_card *, unsigned long); -int qeth_wait_for_threads(struct qeth_card *, unsigned long); int qeth_do_run_thread(struct qeth_card *, unsigned long); void qeth_clear_thread_start_bit(struct qeth_card *, unsigned long); void qeth_clear_thread_running_bit(struct qeth_card *, unsigned long); @@@ -1036,9 -1048,6 +1037,9 @@@ netdev_features_t qeth_fix_features(str netdev_features_t qeth_features_check(struct sk_buff *skb, struct net_device *dev, netdev_features_t features); +int qeth_open(struct net_device *dev); +int qeth_stop(struct net_device *dev); + int qeth_vm_request_mac(struct qeth_card *card); int qeth_xmit(struct qeth_card *card, struct sk_buff *skb, struct qeth_qdio_out_q *queue, int ipv, int cast_type, diff --combined drivers/s390/net/qeth_core_main.c index dcc06e48b70b,89f912213e62..6ef0c89370b5 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@@ -74,8 -74,7 +74,7 @@@ static void qeth_notify_skbs(struct qet static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf); static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *, int);
- struct workqueue_struct *qeth_wq; - EXPORT_SYMBOL_GPL(qeth_wq); + static struct workqueue_struct *qeth_wq;
int qeth_card_hw_is_reachable(struct qeth_card *card) { @@@ -193,6 -192,23 +192,6 @@@ const char *qeth_get_cardname_short(str return "n/a"; }
-void qeth_set_recovery_task(struct qeth_card *card) -{ - card->recovery_task = current; -} -EXPORT_SYMBOL_GPL(qeth_set_recovery_task); - -void qeth_clear_recovery_task(struct qeth_card *card) -{ - card->recovery_task = NULL; -} -EXPORT_SYMBOL_GPL(qeth_clear_recovery_task); - -static bool qeth_is_recovery_task(const struct qeth_card *card) -{ - return card->recovery_task == current; -} - void qeth_set_allowed_threads(struct qeth_card *card, unsigned long threads, int clear_start_mask) { @@@ -219,6 -235,15 +218,6 @@@ int qeth_threads_running(struct qeth_ca } EXPORT_SYMBOL_GPL(qeth_threads_running);
-int qeth_wait_for_threads(struct qeth_card *card, unsigned long threads) -{ - if (qeth_is_recovery_task(card)) - return 0; - return wait_event_interruptible(card->wait_q, - qeth_threads_running(card, threads) == 0); -} -EXPORT_SYMBOL_GPL(qeth_wait_for_threads); - void qeth_clear_working_pool_list(struct qeth_card *card) { struct qeth_buffer_pool_entry *pool_entry, *tmp; @@@ -540,6 -565,7 +539,7 @@@ static int __qeth_issue_next_read(struc QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n", rc, CARD_DEVID(card)); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); card->read_or_write_problem = 1; qeth_schedule_recovery(card); wake_up(&card->wait_q); @@@ -1101,6 -1127,8 +1101,8 @@@ static void qeth_irq(struct ccw_device rc = qeth_get_problem(card, cdev, irb); if (rc) { card->read_or_write_problem = 1; + if (iob) + qeth_release_buffer(iob->channel, iob); qeth_clear_ipacmd_list(card); qeth_schedule_recovery(card); goto out; @@@ -1404,6 -1432,7 +1406,6 @@@ static void qeth_setup_card(struct qeth spin_lock_init(&card->thread_mask_lock); mutex_init(&card->conf_mutex); mutex_init(&card->discipline_mutex); - mutex_init(&card->vid_list_mutex); INIT_WORK(&card->kernel_thread_starter, qeth_start_kernel_thread); INIT_LIST_HEAD(&card->cmd_waiter_list); init_waitqueue_head(&card->wait_q); @@@ -1439,6 -1468,10 +1441,10 @@@ static struct qeth_card *qeth_alloc_car CARD_RDEV(card) = gdev->cdev[0]; CARD_WDEV(card) = gdev->cdev[1]; CARD_DDEV(card) = gdev->cdev[2]; + + card->event_wq = alloc_ordered_workqueue("%s", 0, dev_name(&gdev->dev)); + if (!card->event_wq) + goto out_wq; if (qeth_setup_channel(&card->read, true)) goto out_ip; if (qeth_setup_channel(&card->write, true)) @@@ -1454,6 -1487,8 +1460,8 @@@ out_data out_channel: qeth_clean_channel(&card->read); out_ip: + destroy_workqueue(card->event_wq); + out_wq: dev_set_drvdata(&gdev->dev, NULL); kfree(card); out: @@@ -1782,6 -1817,7 +1790,7 @@@ static int qeth_idx_activate_get_answer QETH_DBF_MESSAGE(2, "Error2 in activating channel rc=%d\n", rc); QETH_DBF_TEXT_(SETUP, 2, "2err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@@ -1851,6 -1887,7 +1860,7 @@@ static int qeth_idx_activate_channel(st rc); QETH_DBF_TEXT_(SETUP, 2, "1err%d", rc); atomic_set(&channel->irq_pending, 0); + qeth_release_buffer(channel, iob); wake_up(&card->wait_q); return rc; } @@@ -2031,6 -2068,7 +2041,7 @@@ int qeth_send_control_data(struct qeth_ } reply = qeth_alloc_reply(card); if (!reply) { + qeth_release_buffer(channel, iob); return -ENOMEM; } reply->callback = reply_cb; @@@ -2362,11 -2400,12 +2373,12 @@@ static int qeth_init_qdio_out_buf(struc return 0; }
- static void qeth_free_qdio_out_buf(struct qeth_qdio_out_q *q) + static void qeth_free_output_queue(struct qeth_qdio_out_q *q) { if (!q) return;
+ qeth_clear_outq_buffers(q, 1); qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); kfree(q); } @@@ -2440,10 -2479,8 +2452,8 @@@ out_freeoutqbufs card->qdio.out_qs[i]->bufs[j] = NULL; } out_freeoutq: - while (i > 0) { - qeth_free_qdio_out_buf(card->qdio.out_qs[--i]); - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - } + while (i > 0) + qeth_free_output_queue(card->qdio.out_qs[--i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; out_freepool: @@@ -2476,10 -2513,8 +2486,8 @@@ static void qeth_free_qdio_buffers(stru qeth_free_buffer_pool(card); /* free outbound qdio_qs */ if (card->qdio.out_qs) { - for (i = 0; i < card->qdio.no_out_queues; ++i) { - qeth_clear_outq_buffers(card->qdio.out_qs[i], 1); - qeth_free_qdio_out_buf(card->qdio.out_qs[i]); - } + for (i = 0; i < card->qdio.no_out_queues; i++) + qeth_free_output_queue(card->qdio.out_qs[i]); kfree(card->qdio.out_qs); card->qdio.out_qs = NULL; } @@@ -3531,6 -3566,8 +3539,6 @@@ static void qeth_qdio_cq_handler(struc card->qdio.c_q->next_buf_to_init = (card->qdio.c_q->next_buf_to_init + count) % QDIO_MAX_BUFFERS_PER_Q;
- netif_wake_queue(card->dev); - if (card->options.performance_stats) { int delta_t = qeth_get_micros(); delta_t -= card->perf_stats.cq_start_time; @@@ -3899,6 -3936,7 +3907,6 @@@ static int qeth_fill_buffer(struct qeth { struct qdio_buffer *buffer = buf->buffer; bool is_first_elem = true; - int flush_cnt = 0;
__skb_queue_tail(&buf->skb_list, skb);
@@@ -3919,22 -3957,24 +3927,22 @@@
if (!queue->do_pack) { QETH_CARD_TEXT(queue->card, 6, "fillbfnp"); - /* set state to PRIMED -> will be flushed */ - atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); - flush_cnt = 1; } else { QETH_CARD_TEXT(queue->card, 6, "fillbfpa"); if (queue->card->options.performance_stats) queue->card->perf_stats.skbs_sent_pack++; - if (buf->next_element_to_fill >= - QETH_MAX_BUFFER_ELEMENTS(queue->card)) { - /* - * packed buffer if full -> set state PRIMED - * -> will be flushed - */ - atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); - flush_cnt = 1; - } + + /* If the buffer still has free elements, keep using it. */ + if (buf->next_element_to_fill < + QETH_MAX_BUFFER_ELEMENTS(queue->card)) + return 0; } - return flush_cnt; + + /* flush out the buffer */ + atomic_set(&buf->state, QETH_QDIO_BUF_PRIMED); + queue->next_buf_to_fill = (queue->next_buf_to_fill + 1) % + QDIO_MAX_BUFFERS_PER_Q; + return 1; }
static int qeth_do_send_packet_fast(struct qeth_qdio_out_q *queue, @@@ -3950,6 -3990,7 +3958,6 @@@ */ if (atomic_read(&buffer->state) != QETH_QDIO_BUF_EMPTY) return -EBUSY; - queue->next_buf_to_fill = (index + 1) % QDIO_MAX_BUFFERS_PER_Q; qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); qeth_flush_buffers(queue, index, 1); return 0; @@@ -4007,9 -4048,10 +4015,9 @@@ int qeth_do_send_packet(struct qeth_car } } } - tmp = qeth_fill_buffer(queue, buffer, skb, hdr, offset, hd_len); - queue->next_buf_to_fill = (queue->next_buf_to_fill + tmp) % - QDIO_MAX_BUFFERS_PER_Q; - flush_count += tmp; + + flush_count += qeth_fill_buffer(queue, buffer, skb, hdr, offset, + hd_len); if (flush_count) qeth_flush_buffers(queue, start_index, flush_count); else if (!atomic_read(&queue->set_pci_flags_count)) @@@ -4994,6 -5036,7 +5002,7 @@@ static void qeth_core_free_card(struct qeth_clean_channel(&card->read); qeth_clean_channel(&card->write); qeth_clean_channel(&card->data); + destroy_workqueue(card->event_wq); qeth_free_qdio_buffers(card); unregister_service_level(&card->qeth_service_level); dev_set_drvdata(&card->gdev->dev, NULL); @@@ -5120,6 -5163,13 +5129,6 @@@ retriable *carrier_ok = true; }
- if (qeth_netdev_is_registered(card->dev)) { - if (*carrier_ok) - netif_carrier_on(card->dev); - else - netif_carrier_off(card->dev); - } - card->options.ipa4.supported_funcs = 0; card->options.ipa6.supported_funcs = 0; card->options.adp.supported_funcs = 0; @@@ -5896,6 -5946,9 +5905,6 @@@ int qeth_do_ioctl(struct net_device *de if (!card) return -ENODEV;
- if (!qeth_card_hw_is_reachable(card)) - return -ENODEV; - if (card->info.type == QETH_CARD_TYPE_OSN) return -EPERM;
@@@ -6206,6 -6259,8 +6215,6 @@@ int qeth_core_ethtool_get_link_ksetting /* Check if we can obtain more accurate information. */ /* If QUERY_CARD_INFO command is not supported or fails, */ /* just return the heuristics that was filled above. */ - if (!qeth_card_hw_is_reachable(card)) - return -ENODEV; rc = qeth_query_card_info(card, &carrier_info); if (rc == -EOPNOTSUPP) /* for old hardware, return heuristic */ return 0; @@@ -6488,6 -6543,8 +6497,6 @@@ static int qeth_set_ipa_rx_csum(struct return (rc_ipv6) ? rc_ipv6 : rc_ipv4; }
-#define QETH_HW_FEATURES (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_TSO | \ - NETIF_F_IPV6_CSUM | NETIF_F_TSO6) /** * qeth_enable_hw_features() - (Re-)Enable HW functions for device features * @dev: a net_device @@@ -6497,20 -6554,17 +6506,20 @@@ void qeth_enable_hw_features(struct net struct qeth_card *card = dev->ml_priv; netdev_features_t features;
- rtnl_lock(); features = dev->features; - /* force-off any feature that needs an IPA sequence. + /* force-off any feature that might need an IPA sequence. * netdev_update_features() will restart them. */ - dev->features &= ~QETH_HW_FEATURES; + dev->features &= ~dev->hw_features; + /* toggle VLAN filter, so that VIDs are re-programmed: */ + if (IS_LAYER2(card) && IS_VM_NIC(card)) { + dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + dev->wanted_features |= NETIF_F_HW_VLAN_CTAG_FILTER; + } netdev_update_features(dev); if (features != dev->features) dev_warn(&card->gdev->dev, "Device recovery failed to restore all offload features\n"); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(qeth_enable_hw_features);
@@@ -6579,7 -6633,10 +6588,7 @@@ netdev_features_t qeth_fix_features(str features &= ~NETIF_F_TSO; if (!qeth_is_supported6(card, IPA_OUTBOUND_TSO)) features &= ~NETIF_F_TSO6; - /* if the card isn't up, remove features that require hw changes */ - if (card->state == CARD_STATE_DOWN || - card->state == CARD_STATE_RECOVER) - features &= ~QETH_HW_FEATURES; + QETH_DBF_HEX(SETUP, 2, &features, sizeof(features)); return features; } @@@ -6611,46 -6668,6 +6620,46 @@@ netdev_features_t qeth_features_check(s } EXPORT_SYMBOL_GPL(qeth_features_check);
+int qeth_open(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + + QETH_CARD_TEXT(card, 4, "qethopen"); + if (card->state == CARD_STATE_UP) + return 0; + if (card->state != CARD_STATE_SOFTSETUP) + return -ENODEV; + + if (qdio_stop_irq(CARD_DDEV(card), 0) < 0) + return -EIO; + + card->data.state = CH_STATE_UP; + card->state = CARD_STATE_UP; + netif_start_queue(dev); + + napi_enable(&card->napi); + local_bh_disable(); + napi_schedule(&card->napi); + /* kick-start the NAPI softirq: */ + local_bh_enable(); + return 0; +} +EXPORT_SYMBOL_GPL(qeth_open); + +int qeth_stop(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + + QETH_CARD_TEXT(card, 4, "qethstop"); + netif_tx_disable(dev); + if (card->state == CARD_STATE_UP) { + card->state = CARD_STATE_SOFTSETUP; + napi_disable(&card->napi); + } + return 0; +} +EXPORT_SYMBOL_GPL(qeth_stop); + static int __init qeth_core_init(void) { int rc; diff --combined drivers/s390/net/qeth_l2_main.c index 82f50cc30b0a,a43de2f9bcac..ef0b5eaf2532 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@@ -25,6 -25,7 +25,6 @@@ #include "qeth_l2.h"
static int qeth_l2_set_offline(struct ccwgroup_device *); -static int qeth_l2_stop(struct net_device *); static void qeth_bridgeport_query_support(struct qeth_card *card); static void qeth_bridge_state_change(struct qeth_card *card, struct qeth_ipa_cmd *cmd); @@@ -97,7 -98,8 +97,7 @@@ static int qeth_l2_send_setmac(struct q rc = qeth_l2_send_setdelmac(card, mac, IPA_CMD_SETVMAC); if (rc == 0) { dev_info(&card->gdev->dev, - "MAC address %pM successfully registered on device %s\n", - mac, card->dev->name); + "MAC address %pM successfully registered\n", mac); } else { switch (rc) { case -EEXIST: @@@ -261,28 -263,75 +261,28 @@@ static int qeth_l2_send_setdelvlan(stru qeth_l2_send_setdelvlan_cb, NULL)); }
-static void qeth_l2_process_vlans(struct qeth_card *card) -{ - struct qeth_vlan_vid *id; - - QETH_CARD_TEXT(card, 3, "L2prcvln"); - mutex_lock(&card->vid_list_mutex); - list_for_each_entry(id, &card->vid_list, list) { - qeth_l2_send_setdelvlan(card, id->vid, IPA_CMD_SETVLAN); - } - mutex_unlock(&card->vid_list_mutex); -} - static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct qeth_card *card = dev->ml_priv; - struct qeth_vlan_vid *id; - int rc;
QETH_CARD_TEXT_(card, 4, "aid:%d", vid); if (!vid) return 0; - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "aidREC"); - return 0; - } - id = kmalloc(sizeof(*id), GFP_KERNEL); - if (id) { - id->vid = vid; - rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN); - if (rc) { - kfree(id); - return rc; - } - mutex_lock(&card->vid_list_mutex); - list_add_tail(&id->list, &card->vid_list); - mutex_unlock(&card->vid_list_mutex); - } else { - return -ENOMEM; - } - return 0; + + return qeth_l2_send_setdelvlan(card, vid, IPA_CMD_SETVLAN); }
static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { - struct qeth_vlan_vid *id, *tmpid = NULL; struct qeth_card *card = dev->ml_priv; - int rc = 0;
QETH_CARD_TEXT_(card, 4, "kid:%d", vid); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "kidREC"); + if (!vid) return 0; - } - mutex_lock(&card->vid_list_mutex); - list_for_each_entry(id, &card->vid_list, list) { - if (id->vid == vid) { - list_del(&id->list); - tmpid = id; - break; - } - } - mutex_unlock(&card->vid_list_mutex); - if (tmpid) { - rc = qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN); - kfree(tmpid); - } - return rc; + + return qeth_l2_send_setdelvlan(card, vid, IPA_CMD_DELVLAN); }
static void qeth_l2_stop_card(struct qeth_card *card, int recovery_mode) @@@ -294,8 -343,9 +294,8 @@@ if (card->read.state == CH_STATE_UP && card->write.state == CH_STATE_UP && (card->state == CARD_STATE_UP)) { - if (recovery_mode && - card->info.type != QETH_CARD_TYPE_OSN) { - qeth_l2_stop(card->dev); + if (recovery_mode && !IS_OSN(card)) { + qeth_stop(card->dev); } else { rtnl_lock(); dev_close(card->dev); @@@ -319,6 -369,8 +319,8 @@@ qeth_clear_cmd_buffers(&card->read); qeth_clear_cmd_buffers(&card->write); } + + flush_workqueue(card->event_wq); }
static int qeth_l2_process_inbound_buffer(struct qeth_card *card, @@@ -410,26 -462,6 +412,26 @@@ out return 0; }
+static void qeth_l2_register_dev_addr(struct qeth_card *card) +{ + if (!is_valid_ether_addr(card->dev->dev_addr)) + qeth_l2_request_initial_mac(card); + + if (!IS_OSN(card) && !qeth_l2_send_setmac(card, card->dev->dev_addr)) + card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; +} + +static int qeth_l2_validate_addr(struct net_device *dev) +{ + struct qeth_card *card = dev->ml_priv; + + if (IS_OSN(card) || (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) + return eth_validate_addr(dev); + + QETH_CARD_TEXT(card, 4, "nomacadr"); + return -EPERM; +} + static int qeth_l2_set_mac_address(struct net_device *dev, void *p) { struct sockaddr *addr = p; @@@ -449,22 -481,39 +451,22 @@@ if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL;
- if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "setmcREC"); - return -ERESTARTSYS; - } - - /* avoid racing against concurrent state change: */ - if (!mutex_trylock(&card->conf_mutex)) - return -EAGAIN; - - if (!qeth_card_hw_is_reachable(card)) { - ether_addr_copy(dev->dev_addr, addr->sa_data); - goto out_unlock; - } - /* don't register the same address twice */ if (ether_addr_equal_64bits(dev->dev_addr, addr->sa_data) && (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED)) - goto out_unlock; + return 0;
/* add the new address, switch over, drop the old */ rc = qeth_l2_send_setmac(card, addr->sa_data); if (rc) - goto out_unlock; + return rc; ether_addr_copy(old_addr, dev->dev_addr); ether_addr_copy(dev->dev_addr, addr->sa_data);
if (card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED) qeth_l2_remove_mac(card, old_addr); card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; - -out_unlock: - mutex_unlock(&card->conf_mutex); - return rc; + return 0; }
static void qeth_promisc_to_bridge(struct qeth_card *card) @@@ -539,6 -588,9 +541,6 @@@ static void qeth_l2_set_rx_mode(struct return;
QETH_CARD_TEXT(card, 3, "setmulti"); - if (qeth_threads_running(card, QETH_RECOVER_THREAD) && - (card->state != CARD_STATE_UP)) - return;
spin_lock_bh(&card->mclock);
@@@ -662,6 -714,62 +664,6 @@@ tx_drop return NETDEV_TX_OK; }
-static int __qeth_l2_open(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - int rc = 0; - - QETH_CARD_TEXT(card, 4, "qethopen"); - if (card->state == CARD_STATE_UP) - return rc; - if (card->state != CARD_STATE_SOFTSETUP) - return -ENODEV; - - if ((card->info.type != QETH_CARD_TYPE_OSN) && - (!(card->info.mac_bits & QETH_LAYER2_MAC_REGISTERED))) { - QETH_CARD_TEXT(card, 4, "nomacadr"); - return -EPERM; - } - card->data.state = CH_STATE_UP; - card->state = CARD_STATE_UP; - netif_start_queue(dev); - - if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { - napi_enable(&card->napi); - local_bh_disable(); - napi_schedule(&card->napi); - /* kick-start the NAPI softirq: */ - local_bh_enable(); - } else - rc = -EIO; - return rc; -} - -static int qeth_l2_open(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 5, "qethope_"); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "openREC"); - return -ERESTARTSYS; - } - return __qeth_l2_open(dev); -} - -static int qeth_l2_stop(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 4, "qethstop"); - netif_tx_disable(dev); - if (card->state == CARD_STATE_UP) { - card->state = CARD_STATE_SOFTSETUP; - napi_disable(&card->napi); - } - return 0; -} - static const struct device_type qeth_l2_devtype = { .name = "qeth_layer2", .groups = qeth_l2_attr_groups, @@@ -677,7 -785,7 +679,7 @@@ static int qeth_l2_probe_device(struct if (rc) return rc; } - INIT_LIST_HEAD(&card->vid_list); + hash_init(card->mac_htable); card->info.hwtrap = 0; qeth_l2_vnicc_set_defaults(card); @@@ -695,6 -803,8 +697,8 @@@ static void qeth_l2_remove_device(struc
if (cgdev->state == CCWGROUP_ONLINE) qeth_l2_set_offline(cgdev); + + cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); } @@@ -716,12 -826,12 +720,12 @@@ static const struct ethtool_ops qeth_l2 };
static const struct net_device_ops qeth_l2_netdev_ops = { - .ndo_open = qeth_l2_open, - .ndo_stop = qeth_l2_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l2_hard_start_xmit, .ndo_features_check = qeth_features_check, - .ndo_validate_addr = eth_validate_addr, + .ndo_validate_addr = qeth_l2_validate_addr, .ndo_set_rx_mode = qeth_l2_set_rx_mode, .ndo_do_ioctl = qeth_do_ioctl, .ndo_set_mac_address = qeth_l2_set_mac_address, @@@ -736,6 -846,9 +740,6 @@@ static int qeth_l2_setup_netdev(struct { int rc;
- if (qeth_netdev_is_registered(card->dev)) - return 0; - card->dev->priv_flags |= IFF_UNICAST_FLT; card->dev->netdev_ops = &qeth_l2_netdev_ops; if (card->info.type == QETH_CARD_TYPE_OSN) { @@@ -746,13 -859,10 +750,13 @@@ card->dev->needed_headroom = sizeof(struct qeth_hdr); }
- if (card->info.type == QETH_CARD_TYPE_OSM) + if (IS_OSM(card)) { card->dev->features |= NETIF_F_VLAN_CHALLENGED; - else + } else { + if (!IS_VM_NIC(card)) + card->dev->hw_features |= NETIF_F_HW_VLAN_CTAG_FILTER; card->dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; + }
if (card->info.type == QETH_CARD_TYPE_OSD && !card->info.guestlan) { card->dev->features |= NETIF_F_SG; @@@ -786,6 -896,8 +790,6 @@@ PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1)); }
- if (!is_valid_ether_addr(card->dev->dev_addr)) - qeth_l2_request_initial_mac(card); netif_napi_add(card->dev, &card->napi, qeth_poll, QETH_NAPI_WEIGHT); rc = register_netdev(card->dev); if (!rc && carrier_ok) @@@ -819,7 -931,6 +823,7 @@@ static void qeth_l2_trace_features(stru static int __qeth_l2_set_online(struct ccwgroup_device *gdev, int recovery_mode) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct net_device *dev = card->dev; int rc = 0; enum qeth_card_states recover_flag; bool carrier_ok; @@@ -841,7 -952,13 +845,7 @@@ dev_info(&card->gdev->dev, "The device represents a Bridge Capable Port\n");
- rc = qeth_l2_setup_netdev(card, carrier_ok); - if (rc) - goto out_remove; - - if (card->info.type != QETH_CARD_TYPE_OSN && - !qeth_l2_send_setmac(card, card->dev->dev_addr)) - card->info.mac_bits |= QETH_LAYER2_MAC_REGISTERED; + qeth_l2_register_dev_addr(card);
if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) { if (card->info.hwtrap && @@@ -871,6 -988,11 +875,6 @@@ goto out_remove; }
- if (card->info.type != QETH_CARD_TYPE_OSN) - qeth_l2_process_vlans(card); - - netif_tx_disable(card->dev); - rc = qeth_init_qdio_queues(card); if (rc) { QETH_DBF_TEXT_(SETUP, 2, "6err%d", rc); @@@ -881,31 -1003,17 +885,31 @@@
qeth_set_allowed_threads(card, 0xffffffff, 0);
- qeth_enable_hw_features(card->dev); - if (recover_flag == CARD_STATE_RECOVER) { - if (recovery_mode && - card->info.type != QETH_CARD_TYPE_OSN) { - __qeth_l2_open(card->dev); - qeth_l2_set_rx_mode(card->dev); - } else { - rtnl_lock(); - dev_open(card->dev, NULL); - rtnl_unlock(); + if (!qeth_netdev_is_registered(dev)) { + rc = qeth_l2_setup_netdev(card, carrier_ok); + if (rc) + goto out_remove; + } else { + rtnl_lock(); + if (carrier_ok) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + + netif_device_attach(dev); + qeth_enable_hw_features(dev); + + if (recover_flag == CARD_STATE_RECOVER) { + if (recovery_mode && !IS_OSN(card)) { + if (!qeth_l2_validate_addr(dev)) { + qeth_open(dev); + qeth_l2_set_rx_mode(dev); + } + } else { + dev_open(dev, NULL); + } } + rtnl_unlock(); } /* let user_space know that device is online */ kobject_uevent(&gdev->dev.kobj, KOBJ_CHANGE); @@@ -945,11 -1053,7 +949,11 @@@ static int __qeth_l2_set_offline(struc QETH_DBF_TEXT(SETUP, 3, "setoffl"); QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
+ rtnl_lock(); + netif_device_detach(card->dev); netif_carrier_off(card->dev); + rtnl_unlock(); + recover_flag = card->state; if ((!recovery_mode && card->info.hwtrap) || card->info.hwtrap == 2) { qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM); @@@ -990,6 -1094,7 +994,6 @@@ static int qeth_l2_recover(void *ptr QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_recovery_task(card); __qeth_l2_set_offline(card->gdev, 1); rc = __qeth_l2_set_online(card->gdev, 1); if (!rc) @@@ -1000,6 -1105,7 +1004,6 @@@ dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } - qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; @@@ -1020,6 -1126,7 +1024,6 @@@ static int qeth_l2_pm_suspend(struct cc { struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- netif_device_detach(card->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (gdev->state == CCWGROUP_OFFLINE) @@@ -1049,6 -1156,7 +1053,6 @@@ static int qeth_l2_pm_resume(struct ccw rc = __qeth_l2_set_online(card->gdev, 0);
qeth_set_allowed_threads(card, 0xffffffff, 0); - netif_device_attach(card->dev); if (rc) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); @@@ -1330,7 -1438,7 +1334,7 @@@ static void qeth_bridge_state_change(st data->card = card; memcpy(&data->qports, qports, sizeof(struct qeth_sbp_state_change) + extrasize); - queue_work(qeth_wq, &data->worker); + queue_work(card->event_wq, &data->worker); }
struct qeth_bridge_host_data { @@@ -1402,7 -1510,7 +1406,7 @@@ static void qeth_bridge_host_event(stru data->card = card; memcpy(&data->hostevs, hostevs, sizeof(struct qeth_ipacmd_addr_change) + extrasize); - queue_work(qeth_wq, &data->worker); + queue_work(card->event_wq, &data->worker); }
/* SETBRIDGEPORT support; sending commands */ diff --combined drivers/s390/net/qeth_l3_main.c index 59535ecb1487,df34bff4ac31..f7d0623999ba --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@@ -40,6 -40,7 +40,6 @@@
static int qeth_l3_set_offline(struct ccwgroup_device *); -static int qeth_l3_stop(struct net_device *); static void qeth_l3_set_rx_mode(struct net_device *dev); static int qeth_l3_register_addr_entry(struct qeth_card *, struct qeth_ipaddr *); @@@ -1280,6 -1281,10 +1280,6 @@@ static int qeth_l3_vlan_rx_kill_vid(str
QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
- if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "kidREC"); - return 0; - } clear_bit(vid, card->active_vlans); qeth_l3_set_rx_mode(dev); return 0; @@@ -1405,7 -1410,7 +1405,7 @@@ static void qeth_l3_stop_card(struct qe card->write.state == CH_STATE_UP && (card->state == CARD_STATE_UP)) { if (recovery_mode) - qeth_l3_stop(card->dev); + qeth_stop(card->dev); else { rtnl_lock(); dev_close(card->dev); @@@ -1428,6 -1433,8 +1428,8 @@@ qeth_clear_cmd_buffers(&card->read); qeth_clear_cmd_buffers(&card->write); } + + flush_workqueue(card->event_wq); }
/* @@@ -1468,7 -1475,9 +1470,7 @@@ static void qeth_l3_set_rx_mode(struct int i, rc;
QETH_CARD_TEXT(card, 3, "setmulti"); - if (qeth_threads_running(card, QETH_RECOVER_THREAD) && - (card->state != CARD_STATE_UP)) - return; + if (!card->options.sniffer) { spin_lock_bh(&card->mclock);
@@@ -2093,6 -2102,56 +2095,6 @@@ tx_drop return NETDEV_TX_OK; }
-static int __qeth_l3_open(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - int rc = 0; - - QETH_CARD_TEXT(card, 4, "qethopen"); - if (card->state == CARD_STATE_UP) - return rc; - if (card->state != CARD_STATE_SOFTSETUP) - return -ENODEV; - card->data.state = CH_STATE_UP; - card->state = CARD_STATE_UP; - netif_start_queue(dev); - - if (qdio_stop_irq(card->data.ccwdev, 0) >= 0) { - napi_enable(&card->napi); - local_bh_disable(); - napi_schedule(&card->napi); - /* kick-start the NAPI softirq: */ - local_bh_enable(); - } else - rc = -EIO; - return rc; -} - -static int qeth_l3_open(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 5, "qethope_"); - if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) { - QETH_CARD_TEXT(card, 3, "openREC"); - return -ERESTARTSYS; - } - return __qeth_l3_open(dev); -} - -static int qeth_l3_stop(struct net_device *dev) -{ - struct qeth_card *card = dev->ml_priv; - - QETH_CARD_TEXT(card, 4, "qethstop"); - netif_tx_disable(dev); - if (card->state == CARD_STATE_UP) { - card->state = CARD_STATE_SOFTSETUP; - napi_disable(&card->napi); - } - return 0; -} - static const struct ethtool_ops qeth_l3_ethtool_ops = { .get_link = ethtool_op_get_link, .get_strings = qeth_core_get_strings, @@@ -2136,8 -2195,8 +2138,8 @@@ static netdev_features_t qeth_l3_osa_fe }
static const struct net_device_ops qeth_l3_netdev_ops = { - .ndo_open = qeth_l3_open, - .ndo_stop = qeth_l3_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_validate_addr = eth_validate_addr, @@@ -2151,8 -2210,8 +2153,8 @@@ };
static const struct net_device_ops qeth_l3_osa_netdev_ops = { - .ndo_open = qeth_l3_open, - .ndo_stop = qeth_l3_stop, + .ndo_open = qeth_open, + .ndo_stop = qeth_stop, .ndo_get_stats = qeth_get_stats, .ndo_start_xmit = qeth_l3_hard_start_xmit, .ndo_features_check = qeth_l3_osa_features_check, @@@ -2172,6 -2231,9 +2174,6 @@@ static int qeth_l3_setup_netdev(struct unsigned int headroom; int rc;
- if (qeth_netdev_is_registered(card->dev)) - return 0; - if (card->info.type == QETH_CARD_TYPE_OSD || card->info.type == QETH_CARD_TYPE_OSX) { if ((card->info.link_type == QETH_LINK_TYPE_LANE_TR) || @@@ -2278,6 -2340,7 +2280,7 @@@ static void qeth_l3_remove_device(struc if (cgdev->state == CCWGROUP_ONLINE) qeth_l3_set_offline(cgdev);
+ cancel_work_sync(&card->close_dev_work); if (qeth_netdev_is_registered(card->dev)) unregister_netdev(card->dev); qeth_l3_clear_ip_htable(card, 0); @@@ -2287,7 -2350,6 +2290,7 @@@ static int __qeth_l3_set_online(struct ccwgroup_device *gdev, int recovery_mode) { struct qeth_card *card = dev_get_drvdata(&gdev->dev); + struct net_device *dev = card->dev; int rc = 0; enum qeth_card_states recover_flag; bool carrier_ok; @@@ -2305,6 -2367,10 +2308,6 @@@ goto out_remove; }
- rc = qeth_l3_setup_netdev(card, carrier_ok); - if (rc) - goto out_remove; - if (qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP)) { if (card->info.hwtrap && qeth_hw_trap(card, QETH_DIAGS_TRAP_ARM)) @@@ -2334,6 -2400,7 +2337,6 @@@ if (rc) QETH_DBF_TEXT_(SETUP, 2, "5err%04x", rc); } - netif_tx_disable(card->dev);
rc = qeth_init_qdio_queues(card); if (rc) { @@@ -2346,27 -2413,14 +2349,27 @@@ qeth_set_allowed_threads(card, 0xffffffff, 0); qeth_l3_recover_ip(card);
- qeth_enable_hw_features(card->dev); - if (recover_flag == CARD_STATE_RECOVER) { + if (!qeth_netdev_is_registered(dev)) { + rc = qeth_l3_setup_netdev(card, carrier_ok); + if (rc) + goto out_remove; + } else { rtnl_lock(); - if (recovery_mode) { - __qeth_l3_open(card->dev); - qeth_l3_set_rx_mode(card->dev); - } else { - dev_open(card->dev, NULL); + if (carrier_ok) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + + netif_device_attach(dev); + qeth_enable_hw_features(dev); + + if (recover_flag == CARD_STATE_RECOVER) { + if (recovery_mode) { + qeth_open(dev); + qeth_l3_set_rx_mode(dev); + } else { + dev_open(dev, NULL); + } } rtnl_unlock(); } @@@ -2408,11 -2462,7 +2411,11 @@@ static int __qeth_l3_set_offline(struc QETH_DBF_TEXT(SETUP, 3, "setoffl"); QETH_DBF_HEX(SETUP, 3, &card, sizeof(void *));
+ rtnl_lock(); + netif_device_detach(card->dev); netif_carrier_off(card->dev); + rtnl_unlock(); + recover_flag = card->state; if ((!recovery_mode && card->info.hwtrap) || card->info.hwtrap == 2) { qeth_hw_trap(card, QETH_DIAGS_TRAP_DISARM); @@@ -2459,6 -2509,7 +2462,6 @@@ static int qeth_l3_recover(void *ptr QETH_CARD_TEXT(card, 2, "recover2"); dev_warn(&card->gdev->dev, "A recovery process has been started for the device\n"); - qeth_set_recovery_task(card); __qeth_l3_set_offline(card->gdev, 1); rc = __qeth_l3_set_online(card->gdev, 1); if (!rc) @@@ -2469,6 -2520,7 +2472,6 @@@ dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); } - qeth_clear_recovery_task(card); qeth_clear_thread_start_bit(card, QETH_RECOVER_THREAD); qeth_clear_thread_running_bit(card, QETH_RECOVER_THREAD); return 0; @@@ -2478,6 -2530,7 +2481,6 @@@ static int qeth_l3_pm_suspend(struct cc { struct qeth_card *card = dev_get_drvdata(&gdev->dev);
- netif_device_detach(card->dev); qeth_set_allowed_threads(card, 0, 1); wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0); if (gdev->state == CCWGROUP_OFFLINE) @@@ -2507,6 -2560,7 +2510,6 @@@ static int qeth_l3_pm_resume(struct ccw rc = __qeth_l3_set_online(card->gdev, 0);
qeth_set_allowed_threads(card, 0xffffffff, 0); - netif_device_attach(card->dev); if (rc) dev_warn(&card->gdev->dev, "The qeth device driver " "failed to recover an error on the device\n"); diff --combined include/linux/filter.h index 7317376734f7,e532fcc6e4b5..95e2d7ebdf21 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@@ -277,26 -277,6 +277,26 @@@ struct sock_reuseport .off = OFF, \ .imm = IMM })
+/* Like BPF_JMP_REG, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_REG(OP, DST, SRC, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_X, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = OFF, \ + .imm = 0 }) + +/* Like BPF_JMP_IMM, but with 32-bit wide operands for comparison. */ + +#define BPF_JMP32_IMM(OP, DST, IMM, OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP32 | BPF_OP(OP) | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = IMM }) + /* Unconditional jumps, goto pc + off16 */
#define BPF_JMP_A(OFF) \ @@@ -611,8 -591,8 +611,8 @@@ static inline u8 *bpf_skb_cb(struct sk_ return qdisc_skb_cb(skb)->data; }
- static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, - struct sk_buff *skb) + static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); u8 cb_saved[BPF_SKB_CB_LEN]; @@@ -631,15 -611,30 +631,30 @@@ return res; }
+ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) + { + u32 res; + + preempt_disable(); + res = __bpf_prog_run_save_cb(prog, skb); + preempt_enable(); + return res; + } + static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, struct sk_buff *skb) { u8 *cb_data = bpf_skb_cb(skb); + u32 res;
if (unlikely(prog->cb_access)) memset(cb_data, 0, BPF_SKB_CB_LEN);
- return BPF_PROG_RUN(prog, skb); + preempt_disable(); + res = BPF_PROG_RUN(prog, skb); + preempt_enable(); + return res; }
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, @@@ -798,7 -793,6 +813,7 @@@ static inline bool bpf_dump_raw_ok(void
struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +int bpf_remove_insns(struct bpf_prog *prog, u32 off, u32 cnt);
void bpf_clear_redirect_map(struct bpf_map *map);
@@@ -880,9 -874,7 +895,9 @@@ bpf_jit_binary_alloc(unsigned int progl unsigned int alignment, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); - +u64 bpf_jit_alloc_exec_limit(void); +void *bpf_jit_alloc_exec(unsigned long size); +void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp);
int bpf_jit_get_func_addr(const struct bpf_prog *prog, diff --combined include/linux/netdevice.h index 1d95e634f3fe,86dbb3e29139..1fb733f38a47 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@@ -630,7 -630,6 +630,7 @@@ struct netdev_queue } ____cacheline_aligned_in_smp;
extern int sysctl_fb_tunnels_only_for_init_net; +extern int sysctl_devconf_inherit_init_net;
static inline bool net_has_fallback_tunnels(const struct net *net) { @@@ -1153,8 -1152,7 +1153,8 @@@ struct dev_ifalias * * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, - * const unsigned char *addr, u16 vid, u16 flags) + * const unsigned char *addr, u16 vid, u16 flags, + * struct netlink_ext_ack *extack); * Adds an FDB entry to dev for addr. * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], * struct net_device *dev, @@@ -1188,10 -1186,6 +1188,10 @@@ * not implement this, it is assumed that the hw is not able to have * multiple net devices on single physical port. * + * int (*ndo_get_port_parent_id)(struct net_device *dev, + * struct netdev_phys_item_id *ppid) + * Called to get the parent ID of the physical port of this device. + * * void (*ndo_udp_tunnel_add)(struct net_device *dev, * struct udp_tunnel_info *ti); * Called by UDP tunnel to notify a driver about the UDP port and socket @@@ -1382,8 -1376,7 +1382,8 @@@ struct net_device_ops struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags); + u16 flags, + struct netlink_ext_ack *extack); int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, @@@ -1416,8 -1409,6 +1416,8 @@@ bool new_carrier); int (*ndo_get_phys_port_id)(struct net_device *dev, struct netdev_phys_item_id *ppid); + int (*ndo_get_port_parent_id)(struct net_device *dev, + struct netdev_phys_item_id *ppid); int (*ndo_get_phys_port_name)(struct net_device *dev, char *name, size_t len); void (*ndo_udp_tunnel_add)(struct net_device *dev, @@@ -1492,6 -1483,7 +1492,7 @@@ * @IFF_NO_RX_HANDLER: device doesn't support the rx_handler hook * @IFF_FAILOVER: device is a failover master device * @IFF_FAILOVER_SLAVE: device is lower dev of a failover master device + * @IFF_L3MDEV_RX_HANDLER: only invoke the rx handler of L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@@ -1523,6 -1515,7 +1524,7 @@@ IFF_NO_RX_HANDLER = 1<<26, IFF_FAILOVER = 1<<27, IFF_FAILOVER_SLAVE = 1<<28, + IFF_L3MDEV_RX_HANDLER = 1<<29, };
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@@ -1553,6 -1546,7 +1555,7 @@@ #define IFF_NO_RX_HANDLER IFF_NO_RX_HANDLER #define IFF_FAILOVER IFF_FAILOVER #define IFF_FAILOVER_SLAVE IFF_FAILOVER_SLAVE + #define IFF_L3MDEV_RX_HANDLER IFF_L3MDEV_RX_HANDLER
/** * struct net_device - The DEVICE structure. @@@ -3657,9 -3651,6 +3660,9 @@@ int dev_get_phys_port_id(struct net_dev struct netdev_phys_item_id *ppid); int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); +int dev_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid, bool recurse); +bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b); int dev_change_proto_down(struct net_device *dev, bool proto_down); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev, bool *again); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, @@@ -4561,6 -4552,11 +4564,11 @@@ static inline bool netif_supports_nofcs return dev->priv_flags & IFF_SUPP_NOFCS; }
+ static inline bool netif_has_l3_rx_handler(const struct net_device *dev) + { + return dev->priv_flags & IFF_L3MDEV_RX_HANDLER; + } + static inline bool netif_is_l3_master(const struct net_device *dev) { return dev->priv_flags & IFF_L3MDEV_MASTER; @@@ -4672,22 -4668,22 +4680,22 @@@ static inline const char *netdev_reg_st return " (unknown)"; }
-__printf(3, 4) +__printf(3, 4) __cold void netdev_printk(const char *level, const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_emerg(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_alert(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_crit(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_err(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_warn(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_notice(const struct net_device *dev, const char *format, ...); -__printf(2, 3) +__printf(2, 3) __cold void netdev_info(const struct net_device *dev, const char *format, ...);
#define netdev_level_once(level, dev, fmt, ...) \ diff --combined include/net/netfilter/nf_tables.h index 45eba7d7ab38,b4984bbbe157..a66fcd316734 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@@ -469,9 -469,7 +469,7 @@@ struct nft_set_binding int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); - void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding); + struct nft_set_binding *binding, bool commit); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set);
/** @@@ -721,6 -719,13 +719,13 @@@ struct nft_expr_type #define NFT_EXPR_STATEFUL 0x1 #define NFT_EXPR_GC 0x2
+ enum nft_trans_phase { + NFT_TRANS_PREPARE, + NFT_TRANS_ABORT, + NFT_TRANS_COMMIT, + NFT_TRANS_RELEASE + }; + /** * struct nft_expr_ops - nf_tables expression operations * @@@ -750,7 -755,8 +755,8 @@@ struct nft_expr_ops void (*activate)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*deactivate)(const struct nft_ctx *ctx, - const struct nft_expr *expr); + const struct nft_expr *expr, + enum nft_trans_phase phase); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); void (*destroy_clone)(const struct nft_ctx *ctx, @@@ -1012,32 -1018,21 +1018,32 @@@ int nft_verdict_dump(struct sk_buff *sk const struct nft_verdict *v);
/** + * struct nft_object_hash_key - key to lookup nft_object + * + * @name: name of the stateful object to look up + * @table: table the object belongs to + */ +struct nft_object_hash_key { + const char *name; + const struct nft_table *table; +}; + +/** * struct nft_object - nf_tables stateful object * * @list: table stateful object list node - * @table: table this object belongs to - * @name: name of this stateful object + * @key: keys that identify this object + * @rhlhead: nft_objname_ht node * @genmask: generation mask * @use: number of references to this stateful object * @handle: unique object handle * @ops: object operations - * @data: object data, layout depends on type + * @data: object data, layout depends on type */ struct nft_object { struct list_head list; - char *name; - struct nft_table *table; + struct rhlist_head rhlhead; + struct nft_object_hash_key key; u32 genmask:2, use:30; u64 handle; @@@ -1054,12 -1049,11 +1060,12 @@@ static inline void *nft_obj_data(const
#define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr))
-struct nft_object *nft_obj_lookup(const struct nft_table *table, +struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask);
-void nft_obj_notify(struct net *net, struct nft_table *table, +void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp);
@@@ -1335,12 -1329,15 +1341,15 @@@ struct nft_trans_rule struct nft_trans_set { struct nft_set *set; u32 set_id; + bool bound; };
#define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) + #define nft_trans_set_bound(trans) \ + (((struct nft_trans_set *)trans->data)->bound)
struct nft_trans_chain { bool update; diff --combined kernel/bpf/btf.c index 7019c1f05cab,c57bd10340ed..bd3921b1514b --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@@ -157,7 -157,7 +157,7 @@@ * */
-#define BITS_PER_U64 (sizeof(u64) * BITS_PER_BYTE) +#define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2) #define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1) #define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK) #define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3) @@@ -355,11 -355,6 +355,11 @@@ static bool btf_type_is_struct(const st return kind == BTF_KIND_STRUCT || kind == BTF_KIND_UNION; }
+static bool __btf_type_is_struct(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT; +} + static bool btf_type_is_array(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; @@@ -530,7 -525,7 +530,7 @@@ const struct btf_type *btf_type_by_id(c
/* * Regular int is not a bit field and it must be either - * u8/u16/u32/u64. + * u8/u16/u32/u64 or __int128. */ static bool btf_type_int_is_regular(const struct btf_type *t) { @@@ -543,8 -538,7 +543,8 @@@ if (BITS_PER_BYTE_MASKED(nr_bits) || BTF_INT_OFFSET(int_data) || (nr_bytes != sizeof(u8) && nr_bytes != sizeof(u16) && - nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64))) { + nr_bytes != sizeof(u32) && nr_bytes != sizeof(u64) && + nr_bytes != (2 * sizeof(u64)))) { return false; }
@@@ -1069,9 -1063,9 +1069,9 @@@ static int btf_int_check_member(struct nr_copy_bits = BTF_INT_BITS(int_data) + BITS_PER_BYTE_MASKED(struct_bits_off);
- if (nr_copy_bits > BITS_PER_U64) { + if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, - "nr_copy_bits exceeds 64"); + "nr_copy_bits exceeds 128"); return -EINVAL; }
@@@ -1125,9 -1119,9 +1125,9 @@@ static int btf_int_check_kflag_member(s
bytes_offset = BITS_ROUNDDOWN_BYTES(struct_bits_off); nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED(struct_bits_off); - if (nr_copy_bits > BITS_PER_U64) { + if (nr_copy_bits > BITS_PER_U128) { btf_verifier_log_member(env, struct_type, member, - "nr_copy_bits exceeds 64"); + "nr_copy_bits exceeds 128"); return -EINVAL; }
@@@ -1174,9 -1168,9 +1174,9 @@@ static s32 btf_int_check_meta(struct bt
nr_bits = BTF_INT_BITS(int_data) + BTF_INT_OFFSET(int_data);
- if (nr_bits > BITS_PER_U64) { + if (nr_bits > BITS_PER_U128) { btf_verifier_log_type(env, t, "nr_bits exceeds %zu", - BITS_PER_U64); + BITS_PER_U128); return -EINVAL; }
@@@ -1217,93 -1211,31 +1217,93 @@@ static void btf_int_log(struct btf_veri btf_int_encoding_str(BTF_INT_ENCODING(int_data))); }
+static void btf_int128_print(struct seq_file *m, void *data) +{ + /* data points to a __int128 number. + * Suppose + * int128_num = *(__int128 *)data; + * The below formulas shows what upper_num and lower_num represents: + * upper_num = int128_num >> 64; + * lower_num = int128_num & 0xffffffffFFFFFFFFULL; + */ + u64 upper_num, lower_num; + +#ifdef __BIG_ENDIAN_BITFIELD + upper_num = *(u64 *)data; + lower_num = *(u64 *)(data + 8); +#else + upper_num = *(u64 *)(data + 8); + lower_num = *(u64 *)data; +#endif + if (upper_num == 0) + seq_printf(m, "0x%llx", lower_num); + else + seq_printf(m, "0x%llx%016llx", upper_num, lower_num); +} + +static void btf_int128_shift(u64 *print_num, u16 left_shift_bits, + u16 right_shift_bits) +{ + u64 upper_num, lower_num; + +#ifdef __BIG_ENDIAN_BITFIELD + upper_num = print_num[0]; + lower_num = print_num[1]; +#else + upper_num = print_num[1]; + lower_num = print_num[0]; +#endif + + /* shake out un-needed bits by shift/or operations */ + if (left_shift_bits >= 64) { + upper_num = lower_num << (left_shift_bits - 64); + lower_num = 0; + } else { + upper_num = (upper_num << left_shift_bits) | + (lower_num >> (64 - left_shift_bits)); + lower_num = lower_num << left_shift_bits; + } + + if (right_shift_bits >= 64) { + lower_num = upper_num >> (right_shift_bits - 64); + upper_num = 0; + } else { + lower_num = (lower_num >> right_shift_bits) | + (upper_num << (64 - right_shift_bits)); + upper_num = upper_num >> right_shift_bits; + } + +#ifdef __BIG_ENDIAN_BITFIELD + print_num[0] = upper_num; + print_num[1] = lower_num; +#else + print_num[0] = lower_num; + print_num[1] = upper_num; +#endif +} + static void btf_bitfield_seq_show(void *data, u8 bits_offset, u8 nr_bits, struct seq_file *m) { u16 left_shift_bits, right_shift_bits; u8 nr_copy_bytes; u8 nr_copy_bits; - u64 print_num; + u64 print_num[2] = {};
nr_copy_bits = nr_bits + bits_offset; nr_copy_bytes = BITS_ROUNDUP_BYTES(nr_copy_bits);
- print_num = 0; - memcpy(&print_num, data, nr_copy_bytes); + memcpy(print_num, data, nr_copy_bytes);
#ifdef __BIG_ENDIAN_BITFIELD left_shift_bits = bits_offset; #else - left_shift_bits = BITS_PER_U64 - nr_copy_bits; + left_shift_bits = BITS_PER_U128 - nr_copy_bits; #endif - right_shift_bits = BITS_PER_U64 - nr_bits; + right_shift_bits = BITS_PER_U128 - nr_bits;
- print_num <<= left_shift_bits; - print_num >>= right_shift_bits; - - seq_printf(m, "0x%llx", print_num); + btf_int128_shift(print_num, left_shift_bits, right_shift_bits); + btf_int128_print(m, print_num); }
@@@ -1318,7 -1250,7 +1318,7 @@@ static void btf_int_bits_seq_show(cons
/* * bits_offset is at most 7. - * BTF_INT_OFFSET() cannot exceed 64 bits. + * BTF_INT_OFFSET() cannot exceed 128 bits. */ total_bits_offset = bits_offset + BTF_INT_OFFSET(int_data); data += BITS_ROUNDDOWN_BYTES(total_bits_offset); @@@ -1342,9 -1274,6 +1342,9 @@@ static void btf_int_seq_show(const stru }
switch (nr_bits) { + case 128: + btf_int128_print(m, data); + break; case 64: if (sign) seq_printf(m, "%lld", *(s64 *)data); @@@ -1530,7 -1459,8 +1530,8 @@@ static int btf_modifier_resolve(struct
/* "typedef void new_void", "const void"...etc */ if (!btf_type_is_void(next_type) && - !btf_type_is_fwd(next_type)) { + !btf_type_is_fwd(next_type) && + !btf_type_is_func_proto(next_type)) { btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL; } @@@ -2050,43 -1980,6 +2051,43 @@@ static void btf_struct_log(struct btf_v btf_verifier_log(env, "size=%u vlen=%u", t->size, btf_type_vlen(t)); }
+/* find 'struct bpf_spin_lock' in map value. + * return >= 0 offset if found + * and < 0 in case of error + */ +int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t) +{ + const struct btf_member *member; + u32 i, off = -ENOENT; + + if (!__btf_type_is_struct(t)) + return -EINVAL; + + for_each_member(i, t, member) { + const struct btf_type *member_type = btf_type_by_id(btf, + member->type); + if (!__btf_type_is_struct(member_type)) + continue; + if (member_type->size != sizeof(struct bpf_spin_lock)) + continue; + if (strcmp(__btf_name_by_offset(btf, member_type->name_off), + "bpf_spin_lock")) + continue; + if (off != -ENOENT) + /* only one 'struct bpf_spin_lock' is allowed */ + return -E2BIG; + off = btf_member_bit_offset(t, member); + if (off % 8) + /* valid C code cannot generate such BTF */ + return -EINVAL; + off /= 8; + if (off % __alignof__(struct bpf_spin_lock)) + /* valid struct bpf_spin_lock will be 4 byte aligned */ + return -EINVAL; + } + return off; +} + static void btf_struct_seq_show(const struct btf *btf, const struct btf_type *t, u32 type_id, void *data, u8 bits_offset, struct seq_file *m) diff --combined kernel/bpf/cgroup.c index d78cfec5807d,d17d05570a3f..4e807973aa80 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@@ -230,7 -230,6 +230,7 @@@ cleanup * @cgrp: The cgroup which descendants to traverse * @prog: A program to attach * @type: Type of attach operation + * @flags: Option flags * * Must be called with cgroup_mutex held. */ @@@ -364,7 -363,7 +364,7 @@@ cleanup * Must be called with cgroup_mutex held. */ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 unused_flags) + enum bpf_attach_type type) { struct list_head *progs = &cgrp->bpf.progs[type]; enum bpf_cgroup_storage_type stype; @@@ -573,7 -572,7 +573,7 @@@ int __cgroup_bpf_run_filter_skb(struct bpf_compute_and_save_data_end(skb, &saved_data_end);
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, - bpf_prog_run_save_cb); + __bpf_prog_run_save_cb); bpf_restore_data_end(skb, saved_data_end); __skb_pull(skb, offset); skb->sk = save_sk; diff --combined kernel/bpf/hashtab.c index 937776531998,f9274114c88d..fed15cf94dca --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@@ -686,7 -686,7 +686,7 @@@ static void free_htab_elem(struct bpf_h }
if (htab_is_prealloc(htab)) { - pcpu_freelist_push(&htab->freelist, &l->fnode); + __pcpu_freelist_push(&htab->freelist, &l->fnode); } else { atomic_dec(&htab->count); l->htab = htab; @@@ -718,12 -718,21 +718,12 @@@ static bool fd_htab_map_needs_adjust(co BITS_PER_LONG == 64; }
-static u32 htab_size_value(const struct bpf_htab *htab, bool percpu) -{ - u32 size = htab->map.value_size; - - if (percpu || fd_htab_map_needs_adjust(htab)) - size = round_up(size, 8); - return size; -} - static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, bool percpu, bool onallcpus, struct htab_elem *old_elem) { - u32 size = htab_size_value(htab, percpu); + u32 size = htab->map.value_size; bool prealloc = htab_is_prealloc(htab); struct htab_elem *l_new, **pl_new; void __percpu *pptr; @@@ -739,7 -748,7 +739,7 @@@ } else { struct pcpu_freelist_node *l;
- l = pcpu_freelist_pop(&htab->freelist); + l = __pcpu_freelist_pop(&htab->freelist); if (!l) return ERR_PTR(-E2BIG); l_new = container_of(l, struct htab_elem, fnode); @@@ -761,13 -770,10 +761,13 @@@ l_new = ERR_PTR(-ENOMEM); goto dec_count; } + check_and_init_map_lock(&htab->map, + l_new->key + round_up(key_size, 8)); }
memcpy(l_new->key, key, key_size); if (percpu) { + size = round_up(size, 8); if (prealloc) { pptr = htab_elem_get_ptr(l_new, key_size); } else { @@@ -785,13 -791,8 +785,13 @@@
if (!prealloc) htab_elem_set_ptr(l_new, key_size, pptr); - } else { + } else if (fd_htab_map_needs_adjust(htab)) { + size = round_up(size, 8); memcpy(l_new->key + round_up(key_size, 8), value, size); + } else { + copy_map_value(&htab->map, + l_new->key + round_up(key_size, 8), + value); }
l_new->hash = hash; @@@ -804,11 -805,11 +804,11 @@@ dec_count static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old, u64 map_flags) { - if (l_old && map_flags == BPF_NOEXIST) + if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) /* elem already exists */ return -EEXIST;
- if (!l_old && map_flags == BPF_EXIST) + if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) /* elem doesn't exist, cannot update it */ return -ENOENT;
@@@ -827,7 -828,7 +827,7 @@@ static int htab_map_update_elem(struct u32 key_size, hash; int ret;
- if (unlikely(map_flags > BPF_EXIST)) + if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST)) /* unknown flags */ return -EINVAL;
@@@ -840,28 -841,6 +840,28 @@@ b = __select_bucket(htab, hash); head = &b->head;
+ if (unlikely(map_flags & BPF_F_LOCK)) { + if (unlikely(!map_value_has_spin_lock(map))) + return -EINVAL; + /* find an element without taking the bucket lock */ + l_old = lookup_nulls_elem_raw(head, hash, key, key_size, + htab->n_buckets); + ret = check_flags(htab, l_old, map_flags); + if (ret) + return ret; + if (l_old) { + /* grab the element lock and update value in place */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + return 0; + } + /* fall through, grab the bucket lock and lookup again. + * 99.9% chance that the element won't be found, + * but second lookup under lock has to be done. + */ + } + /* bpf_map_update_elem() can be called in_irq() */ raw_spin_lock_irqsave(&b->lock, flags);
@@@ -871,20 -850,6 +871,20 @@@ if (ret) goto err;
+ if (unlikely(l_old && (map_flags & BPF_F_LOCK))) { + /* first lookup without the bucket lock didn't find the element, + * but second lookup with the bucket lock found it. + * This case is highly unlikely, but has to be dealt with: + * grab the element lock in addition to the bucket lock + * and update element in place + */ + copy_map_value_locked(map, + l_old->key + round_up(key_size, 8), + value, false); + ret = 0; + goto err; + } + l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, l_old); if (IS_ERR(l_new)) { diff --combined kernel/bpf/syscall.c index 0834958f1dc4,8577bb7f8be6..ec7c552af76b --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@@ -463,7 -463,7 +463,7 @@@ int map_check_no_btf(const struct bpf_m return -ENOTSUPP; }
-static int map_check_btf(const struct bpf_map *map, const struct btf *btf, +static int map_check_btf(struct bpf_map *map, const struct btf *btf, u32 btf_key_id, u32 btf_value_id) { const struct btf_type *key_type, *value_type; @@@ -478,22 -478,6 +478,22 @@@ if (!value_type || value_size != map->value_size) return -EINVAL;
+ map->spin_lock_off = btf_find_spin_lock(btf, value_type); + + if (map_value_has_spin_lock(map)) { + if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY && + map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE) + return -ENOTSUPP; + if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > + map->value_size) { + WARN_ONCE(1, + "verifier bug spin_lock_off %d value_size %d\n", + map->spin_lock_off, map->value_size); + return -EFAULT; + } + } + if (map->ops->map_check_btf) ret = map->ops->map_check_btf(map, btf, key_type, value_type);
@@@ -558,8 -542,6 +558,8 @@@ static int map_create(union bpf_attr *a map->btf = btf; map->btf_key_type_id = attr->btf_key_type_id; map->btf_value_type_id = attr->btf_value_type_id; + } else { + map->spin_lock_off = -EINVAL; }
err = security_bpf_map_alloc(map); @@@ -682,7 -664,7 +682,7 @@@ static void *__bpf_copy_key(void __use }
/* last field in 'union bpf_attr' used by this command */ -#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value +#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags
static int map_lookup_elem(union bpf_attr *attr) { @@@ -698,9 -680,6 +698,9 @@@ if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) return -EINVAL;
+ if (attr->flags & ~BPF_F_LOCK) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) @@@ -711,12 -690,6 +711,12 @@@ goto err_put; }
+ if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); @@@ -740,8 -713,13 +740,13 @@@
if (bpf_map_is_dev_bound(map)) { err = bpf_map_offload_lookup_elem(map, key, value); - } else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + goto done; + } + + preempt_disable(); + this_cpu_inc(bpf_prog_active); + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { err = bpf_percpu_hash_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { err = bpf_percpu_array_copy(map, key, value); @@@ -767,17 -745,14 +772,20 @@@ err = -ENOENT; } else { err = 0; - memcpy(value, ptr, value_size); + if (attr->flags & BPF_F_LOCK) + /* lock 'ptr' and copy everything but lock */ + copy_map_value_locked(map, value, ptr, true); + else + copy_map_value(map, value, ptr); + /* mask lock, since value wasn't zero inited */ + check_and_init_map_lock(map, value); } rcu_read_unlock(); } + this_cpu_dec(bpf_prog_active); + preempt_enable();
+ done: if (err) goto free_value;
@@@ -833,12 -808,6 +841,12 @@@ static int map_update_elem(union bpf_at goto err_put; }
+ if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); diff --combined net/batman-adv/bat_v_elp.c index 7b80f6f8d4dc,ef0dec20c7d8..a9b7919c9de5 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@@ -1,5 -1,5 +1,5 @@@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2011-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2011-2019 B.A.T.M.A.N. contributors: * * Linus Lüssing, Marek Lindner * @@@ -104,6 -104,9 +104,9 @@@ static u32 batadv_v_elp_get_throughput(
ret = cfg80211_get_station(real_netdev, neigh->addr, &sinfo);
+ /* free the TID stats immediately */ + cfg80211_sinfo_release_content(&sinfo); + dev_put(real_netdev); if (ret == -ENOENT) { /* Node is not associated anymore! It would be diff --combined net/batman-adv/hard-interface.c index 28c1fb8d1af0,415d494cbe22..96ef7c70b4d9 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@@ -1,5 -1,5 +1,5 @@@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@@ -20,7 -20,6 +20,6 @@@ #include "main.h"
#include <linux/atomic.h> - #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/errno.h> #include <linux/gfp.h> @@@ -179,8 -178,10 +178,10 @@@ static bool batadv_is_on_batman_iface(c parent_dev = __dev_get_by_index((struct net *)parent_net, dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ - if (WARN(!parent_dev, "Cannot find parent device")) + if (!parent_dev) { + pr_err("Cannot find parent device\n"); return false; + }
if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; diff --combined net/batman-adv/soft-interface.c index b14fb3462af7,b85ca809e509..93a5975c21a4 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@@ -1,5 -1,5 +1,5 @@@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright (C) 2007-2018 B.A.T.M.A.N. contributors: +/* Copyright (C) 2007-2019 B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich * @@@ -212,7 -212,6 +212,7 @@@ static netdev_tx_t batadv_interface_tx( enum batadv_forw_mode forw_mode; struct batadv_orig_node *mcast_single_orig = NULL; int network_offset = ETH_HLEN; + __be16 proto;
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) goto dropped; @@@ -222,17 -221,16 +222,19 @@@
netif_trans_update(soft_iface); vid = batadv_get_vid(skb, 0); + + skb_reset_mac_header(skb); ethhdr = eth_hdr(skb);
- switch (ntohs(ethhdr->h_proto)) { + proto = ethhdr->h_proto; + + switch (ntohs(proto)) { case ETH_P_8021Q: vhdr = vlan_eth_hdr(skb); + proto = vhdr->h_vlan_encapsulated_proto;
/* drop batman-in-batman packets to prevent loops */ - if (vhdr->h_vlan_encapsulated_proto != htons(ETH_P_BATMAN)) { + if (proto != htons(ETH_P_BATMAN)) { network_offset += VLAN_HLEN; break; } @@@ -260,9 -258,6 +262,9 @@@ goto dropped; }
+ /* Snoop address candidates from DHCPACKs for early DAT filling */ + batadv_dat_snoop_outgoing_dhcp_ack(bat_priv, skb, proto, vid); + /* don't accept stp packets. STP does not help in meshes. * better use the bridge loop avoidance ... * diff --combined net/core/filter.c index 3a49f68eda10,7a54dc11ac2d..b5a002d7b263 --- a/net/core/filter.c +++ b/net/core/filter.c @@@ -4112,10 -4112,12 +4112,12 @@@ BPF_CALL_5(bpf_setsockopt, struct bpf_s /* Only some socketops are supported */ switch (optname) { case SO_RCVBUF: + val = min_t(u32, val, sysctl_rmem_max); sk->sk_userlocks |= SOCK_RCVBUF_LOCK; sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF); break; case SO_SNDBUF: + val = min_t(u32, val, sysctl_wmem_max); sk->sk_userlocks |= SOCK_SNDBUF_LOCK; sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF); break; @@@ -5314,20 -5316,10 +5316,20 @@@ bpf_base_func_proto(enum bpf_func_id fu return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + default: + break; + } + + if (!capable(CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; case BPF_FUNC_trace_printk: - if (capable(CAP_SYS_ADMIN)) - return bpf_get_trace_printk_proto(); - /* else, fall through */ + return bpf_get_trace_printk_proto(); default: return NULL; } @@@ -6718,27 -6710,6 +6720,27 @@@ static u32 bpf_convert_ctx_access(enum target_size)); break;
+ case offsetof(struct __sk_buff, gso_segs): + /* si->dst_reg = skb_shinfo(SKB); */ +#ifdef NET_SKBUFF_DATA_USES_OFFSET + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, end)); + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); +#else + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, end)); +#endif + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), + si->dst_reg, si->dst_reg, + bpf_target_off(struct skb_shared_info, + gso_segs, 2, + target_size)); + break; case offsetof(struct __sk_buff, wire_len): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
@@@ -7729,7 -7700,6 +7731,7 @@@ const struct bpf_verifier_ops flow_diss };
const struct bpf_prog_ops flow_dissector_prog_ops = { + .test_run = bpf_prog_test_run_flow_dissector, };
int sk_detach_filter(struct sock *sk) diff --combined net/core/skmsg.c index e76ed8df9f13,8c826603bf36..ae6f06e45737 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@@ -78,9 -78,11 +78,9 @@@ int sk_msg_clone(struct sock *sk, struc { int i = src->sg.start; struct scatterlist *sge = sk_msg_elem(src, i); + struct scatterlist *sgd = NULL; u32 sge_len, sge_off;
- if (sk_msg_full(dst)) - return -ENOSPC; - while (off) { if (sge->length > off) break; @@@ -92,27 -94,16 +92,27 @@@ }
while (len) { - if (sk_msg_full(dst)) - return -ENOSPC; - sge_len = sge->length - off; - sge_off = sge->offset + off; if (sge_len > len) sge_len = len; + + if (dst->sg.end) + sgd = sk_msg_elem(dst, dst->sg.end - 1); + + if (sgd && + (sg_page(sge) == sg_page(sgd)) && + (sg_virt(sge) + off == sg_virt(sgd) + sgd->length)) { + sgd->length += sge_len; + dst->sg.size += sge_len; + } else if (!sk_msg_full(dst)) { + sge_off = sge->offset + off; + sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); + } else { + return -ENOSPC; + } + off = 0; len -= sge_len; - sk_msg_page_add(dst, sg_page(sge), sge_len, sge_off); sk_mem_charge(sk, sge_len); sk_msg_iter_var_next(i); if (i == src->sg.end && len) @@@ -554,8 -545,7 +554,7 @@@ static void sk_psock_destroy_deferred(s struct sk_psock *psock = container_of(gc, struct sk_psock, gc);
/* No sk_callback_lock since already detached. */ - if (psock->parser.enabled) - strp_done(&psock->parser.strp); + strp_done(&psock->parser.strp);
cancel_work_sync(&psock->work);
diff --combined net/dsa/master.c index 79e97d2f2d9b,54f5551fb799..c58f33931be1 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@@ -126,17 -126,6 +126,17 @@@ static void dsa_master_get_strings(stru } }
+static int dsa_master_get_phys_port_name(struct net_device *dev, + char *name, size_t len) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + + if (snprintf(name, len, "p%d", cpu_dp->index) >= len) + return -EINVAL; + + return 0; +} + static int dsa_master_ethtool_setup(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; @@@ -169,38 -158,6 +169,38 @@@ static void dsa_master_ethtool_teardown cpu_dp->orig_ethtool_ops = NULL; }
+static int dsa_master_ndo_setup(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch *ds = cpu_dp->ds; + struct net_device_ops *ops; + + if (dev->netdev_ops->ndo_get_phys_port_name) + return 0; + + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + cpu_dp->orig_ndo_ops = dev->netdev_ops; + if (cpu_dp->orig_ndo_ops) + memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops)); + + ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name; + + dev->netdev_ops = ops; + + return 0; +} + +static void dsa_master_ndo_teardown(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + + dev->netdev_ops = cpu_dp->orig_ndo_ops; + cpu_dp->orig_ndo_ops = NULL; +} + static ssize_t tagging_show(struct device *d, struct device_attribute *attr, char *buf) { @@@ -248,6 -205,8 +248,8 @@@ static void dsa_master_reset_mtu(struc rtnl_unlock(); }
+ static struct lock_class_key dsa_master_addr_list_lock_key; + int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { int ret; @@@ -261,32 -220,23 +263,34 @@@ wmb();
dev->dsa_ptr = cpu_dp; + lockdep_set_class(&dev->addr_list_lock, + &dsa_master_addr_list_lock_key);
ret = dsa_master_ethtool_setup(dev); if (ret) return ret;
+ ret = dsa_master_ndo_setup(dev); + if (ret) + goto out_err_ethtool_teardown; + ret = sysfs_create_group(&dev->dev.kobj, &dsa_group); if (ret) - dsa_master_ethtool_teardown(dev); + goto out_err_ndo_teardown; + + return ret;
+out_err_ndo_teardown: + dsa_master_ndo_teardown(dev); +out_err_ethtool_teardown: + dsa_master_ethtool_teardown(dev); return ret; }
void dsa_master_teardown(struct net_device *dev) { sysfs_remove_group(&dev->dev.kobj, &dsa_group); + dsa_master_ndo_teardown(dev); dsa_master_ethtool_teardown(dev); dsa_master_reset_mtu(dev);
diff --combined net/dsa/slave.c index 70395a0ae52e,a1c9fe155057..2e5e7c04821b --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@@ -140,11 -140,14 +140,14 @@@ static int dsa_slave_close(struct net_d static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct net_device *master = dsa_slave_to_master(dev); - - if (change & IFF_ALLMULTI) - dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); - if (change & IFF_PROMISC) - dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); + if (dev->flags & IFF_UP) { + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, + dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, + dev->flags & IFF_PROMISC ? 1 : -1); + } }
static void dsa_slave_set_rx_mode(struct net_device *dev) @@@ -362,23 -365,18 +365,23 @@@ static int dsa_slave_port_obj_del(struc return err; }
-static int dsa_slave_port_attr_get(struct net_device *dev, - struct switchdev_attr *attr) +static int dsa_slave_get_port_parent_id(struct net_device *dev, + struct netdev_phys_item_id *ppid) { struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst;
+ ppid->id_len = sizeof(dst->index); + memcpy(&ppid->id, &dst->index, ppid->id_len); + + return 0; +} + +static int dsa_slave_port_attr_get(struct net_device *dev, + struct switchdev_attr *attr) +{ switch (attr->id) { - case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(dst->index); - memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len); - break; case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: attr->u.brport_flags_support = 0; break; @@@ -644,7 -642,7 +647,7 @@@ static int dsa_slave_set_eee(struct net int ret;
/* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV;
if (!ds->ops->set_mac_eee) @@@ -664,7 -662,7 +667,7 @@@ static int dsa_slave_get_eee(struct net int ret;
/* Port's PHY and MAC both need to be EEE capable */ - if (!dev->phydev && !dp->pl) + if (!dev->phydev || !dp->pl) return -ENODEV;
if (!ds->ops->get_mac_eee) @@@ -1014,8 -1012,7 +1017,8 @@@ static const struct ethtool_ops dsa_sla int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, - u16 flags) + u16 flags, + struct netlink_ext_ack *extack) { struct dsa_port *dp = dsa_slave_to_port(dev);
@@@ -1051,7 -1048,6 +1054,7 @@@ static const struct net_device_ops dsa_ .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, .ndo_get_stats64 = dsa_slave_get_stats64, + .ndo_get_port_parent_id = dsa_slave_get_port_parent_id, };
static const struct switchdev_ops dsa_slave_switchdev_ops = { @@@ -1457,7 -1453,7 +1460,7 @@@ static void dsa_slave_switchdev_event_w } fdb_info->offloaded = true; call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, - &fdb_info->info); + &fdb_info->info, NULL); break;
case SWITCHDEV_FDB_DEL_TO_DEVICE: diff --combined net/ipv4/ip_gre.c index d1cef66820d3,3978f807fa8b..ccee9411dae1 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@@ -449,14 -449,81 +449,14 @@@ static int gre_handle_offloads(struct s return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); }
-static struct rtable *gre_get_rt(struct sk_buff *skb, - struct net_device *dev, - struct flowi4 *fl, - const struct ip_tunnel_key *key) -{ - struct net *net = dev_net(dev); - - memset(fl, 0, sizeof(*fl)); - fl->daddr = key->u.ipv4.dst; - fl->saddr = key->u.ipv4.src; - fl->flowi4_tos = RT_TOS(key->tos); - fl->flowi4_mark = skb->mark; - fl->flowi4_proto = IPPROTO_GRE; - - return ip_route_output_key(net, fl); -} - -static struct rtable *prepare_fb_xmit(struct sk_buff *skb, - struct net_device *dev, - struct flowi4 *fl, - int tunnel_hlen) -{ - struct ip_tunnel_info *tun_info; - const struct ip_tunnel_key *key; - struct rtable *rt = NULL; - int min_headroom; - bool use_cache; - int err; - - tun_info = skb_tunnel_info(skb); - key = &tun_info->key; - use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); - - if (use_cache) - rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); - if (!rt) { - rt = gre_get_rt(skb, dev, fl, key); - if (IS_ERR(rt)) - goto err_free_skb; - if (use_cache) - dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, - fl->saddr); - } - - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + tunnel_hlen + sizeof(struct iphdr); - if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { - int head_delta = SKB_DATA_ALIGN(min_headroom - - skb_headroom(skb) + - 16); - err = pskb_expand_head(skb, max_t(int, head_delta, 0), - 0, GFP_ATOMIC); - if (unlikely(err)) - goto err_free_rt; - } - return rt; - -err_free_rt: - ip_rt_put(rt); -err_free_skb: - kfree_skb(skb); - dev->stats.tx_dropped++; - return NULL; -} - static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, __be16 proto) { struct ip_tunnel *tunnel = netdev_priv(dev); struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; - struct rtable *rt = NULL; - struct flowi4 fl; int tunnel_hlen; - __be16 df, flags; + __be16 flags;
tun_info = skb_tunnel_info(skb); if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || @@@ -466,12 -533,13 +466,12 @@@ key = &tun_info->key; tunnel_hlen = gre_calc_hlen(key->tun_flags);
- rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); - if (!rt) - return; + if (skb_cow_head(skb, dev->needed_headroom)) + goto err_free_skb;
/* Push Tunnel header. */ if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) - goto err_free_rt; + goto err_free_skb;
flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ); @@@ -479,10 -547,14 +479,10 @@@ tunnel_id_to_key32(tun_info->key.tun_id), (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
- iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); return;
-err_free_rt: - ip_rt_put(rt); err_free_skb: kfree_skb(skb); dev->stats.tx_dropped++; @@@ -494,8 -566,10 +494,8 @@@ static void erspan_fb_xmit(struct sk_bu struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct erspan_metadata *md; - struct rtable *rt = NULL; bool truncate = false; - __be16 df, proto; - struct flowi4 fl; + __be16 proto; int tunnel_hlen; int version; int nhoff; @@@ -508,20 -582,21 +508,20 @@@
key = &tun_info->key; if (!(tun_info->key.tun_flags & TUNNEL_ERSPAN_OPT)) - goto err_free_rt; + goto err_free_skb; md = ip_tunnel_info_opts(tun_info); if (!md) - goto err_free_rt; + goto err_free_skb;
/* ERSPAN has fixed 8 byte GRE header */ version = md->version; tunnel_hlen = 8 + erspan_hdr_len(version);
- rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); - if (!rt) - return; + if (skb_cow_head(skb, dev->needed_headroom)) + goto err_free_skb;
if (gre_handle_offloads(skb, false)) - goto err_free_rt; + goto err_free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) { pskb_trim(skb, dev->mtu + dev->hard_header_len); @@@ -550,16 -625,20 +550,16 @@@ truncate, true); proto = htons(ETH_P_ERSPAN2); } else { - goto err_free_rt; + goto err_free_skb; }
gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(tunnel->o_seqno++));
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
- iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, - key->tos, key->ttl, df, false); return;
-err_free_rt: - ip_rt_put(rt); err_free_skb: kfree_skb(skb); dev->stats.tx_dropped++; @@@ -568,18 -647,13 +568,18 @@@ static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); + const struct ip_tunnel_key *key; struct rtable *rt; struct flowi4 fl4;
if (ip_tunnel_info_af(info) != AF_INET) return -EINVAL;
- rt = gre_get_rt(skb, dev, &fl4, &info->key); + key = &info->key; + ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src, + tunnel_id_to_key32(key->tun_id), key->tos, 0, + skb->mark); + rt = ip_route_output_key(dev_net(dev), &fl4); if (IS_ERR(rt)) return PTR_ERR(rt);
@@@ -1381,12 -1455,17 +1381,17 @@@ static int ipgre_fill_info(struct sk_bu { struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((t->erspan_ver == 1 || t->erspan_ver == 2) && + !t->collect_md) + o_flags |= TUNNEL_KEY;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) || diff --combined net/ipv6/ip6_gre.c index e081e69d534e,801a9a0c217e..65a4f96dc462 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@@ -524,7 -524,7 +524,7 @@@ static int ip6gre_rcv(struct sk_buff *s return PACKET_REJECT; }
-static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len, +static int ip6erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi) { struct erspan_base_hdr *ershdr; @@@ -607,7 -607,7 +607,7 @@@ static int gre_rcv(struct sk_buff *skb
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) || tpi.proto == htons(ETH_P_ERSPAN2))) { - if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD) + if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD) return 0; goto out; } @@@ -2098,12 -2098,17 +2098,17 @@@ static int ip6gre_fill_info(struct sk_b { struct ip6_tnl *t = netdev_priv(dev); struct __ip6_tnl_parm *p = &t->parms; + __be16 o_flags = p->o_flags; + + if ((p->erspan_ver == 1 || p->erspan_ver == 2) && + !p->collect_md) + o_flags |= TUNNEL_KEY;
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || nla_put_be16(skb, IFLA_GRE_IFLAGS, gre_tnl_flags_to_gre_flags(p->i_flags)) || nla_put_be16(skb, IFLA_GRE_OFLAGS, - gre_tnl_flags_to_gre_flags(p->o_flags)) || + gre_tnl_flags_to_gre_flags(o_flags)) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || diff --combined net/mac80211/tx.c index 61c7ea9de2cc,928f13a208b0..8a49a74c0a37 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@@ -1449,7 -1449,6 +1449,7 @@@ void ieee80211_txq_init(struct ieee8021 codel_vars_init(&txqi->def_cvars); codel_stats_init(&txqi->cstats); __skb_queue_head_init(&txqi->frags); + INIT_LIST_HEAD(&txqi->schedule_order);
txqi->txq.vif = &sdata->vif;
@@@ -1488,14 -1487,8 +1488,14 @@@ void ieee80211_txq_purge(struct ieee802 struct fq *fq = &local->fq; struct fq_tin *tin = &txqi->tin;
+ spin_lock_bh(&fq->lock); fq_tin_reset(fq, tin, fq_skb_free_func); ieee80211_purge_tx_queue(&local->hw, &txqi->frags); + spin_unlock_bh(&fq->lock); + + spin_lock_bh(&local->active_txq_lock[txqi->txq.ac]); + list_del_init(&txqi->schedule_order); + spin_unlock_bh(&local->active_txq_lock[txqi->txq.ac]); }
void ieee80211_txq_set_params(struct ieee80211_local *local) @@@ -1612,7 -1605,7 +1612,7 @@@ static bool ieee80211_queue_skb(struct ieee80211_txq_enqueue(local, txqi, skb); spin_unlock_bh(&fq->lock);
- drv_wake_tx_queue(local, txqi); + schedule_and_wake_txq(local, txqi);
return true; } @@@ -1945,9 -1938,16 +1945,16 @@@ static int ieee80211_skb_resize(struct int head_need, bool may_encrypt) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hdr *hdr; + bool enc_tailroom; int tail_need = 0;
- if (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt) { + hdr = (struct ieee80211_hdr *) skb->data; + enc_tailroom = may_encrypt && + (sdata->crypto_tx_tailroom_needed_cnt || + ieee80211_is_mgmt(hdr->frame_control)); + + if (enc_tailroom) { tail_need = IEEE80211_ENCRYPT_TAILROOM; tail_need -= skb_tailroom(skb); tail_need = max_t(int, tail_need, 0); @@@ -1955,8 -1955,7 +1962,7 @@@
if (skb_cloned(skb) && (!ieee80211_hw_check(&local->hw, SUPPORTS_CLONED_SKBS) || - !skb_clone_writable(skb, ETH_HLEN) || - (may_encrypt && sdata->crypto_tx_tailroom_needed_cnt))) + !skb_clone_writable(skb, ETH_HLEN) || enc_tailroom)) I802_DEBUG_INC(local->tx_expand_skb_head_cloned); else if (head_need || tail_need) I802_DEBUG_INC(local->tx_expand_skb_head); @@@ -3637,151 -3636,6 +3643,151 @@@ out } EXPORT_SYMBOL(ieee80211_tx_dequeue);
+struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = NULL; + + lockdep_assert_held(&local->active_txq_lock[ac]); + + begin: + txqi = list_first_entry_or_null(&local->active_txqs[ac], + struct txq_info, + schedule_order); + if (!txqi) + return NULL; + + if (txqi->txq.sta) { + struct sta_info *sta = container_of(txqi->txq.sta, + struct sta_info, sta); + + if (sta->airtime[txqi->txq.ac].deficit < 0) { + sta->airtime[txqi->txq.ac].deficit += + sta->airtime_weight; + list_move_tail(&txqi->schedule_order, + &local->active_txqs[txqi->txq.ac]); + goto begin; + } + } + + + if (txqi->schedule_round == local->schedule_round[ac]) + return NULL; + + list_del_init(&txqi->schedule_order); + txqi->schedule_round = local->schedule_round[ac]; + return &txqi->txq; +} +EXPORT_SYMBOL(ieee80211_next_txq); + +void ieee80211_return_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = to_txq_info(txq); + + lockdep_assert_held(&local->active_txq_lock[txq->ac]); + + if (list_empty(&txqi->schedule_order) && + (!skb_queue_empty(&txqi->frags) || txqi->tin.backlog_packets)) { + /* If airtime accounting is active, always enqueue STAs at the + * head of the list to ensure that they only get moved to the + * back by the airtime DRR scheduler once they have a negative + * deficit. A station that already has a negative deficit will + * get immediately moved to the back of the list on the next + * call to ieee80211_next_txq(). + */ + if (txqi->txq.sta && + wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_AIRTIME_FAIRNESS)) + list_add(&txqi->schedule_order, + &local->active_txqs[txq->ac]); + else + list_add_tail(&txqi->schedule_order, + &local->active_txqs[txq->ac]); + } +} +EXPORT_SYMBOL(ieee80211_return_txq); + +void ieee80211_schedule_txq(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) + __acquires(txq_lock) __releases(txq_lock) +{ + struct ieee80211_local *local = hw_to_local(hw); + + spin_lock_bh(&local->active_txq_lock[txq->ac]); + ieee80211_return_txq(hw, txq); + spin_unlock_bh(&local->active_txq_lock[txq->ac]); +} +EXPORT_SYMBOL(ieee80211_schedule_txq); + +bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *iter, *tmp, *txqi = to_txq_info(txq); + struct sta_info *sta; + u8 ac = txq->ac; + + lockdep_assert_held(&local->active_txq_lock[ac]); + + if (!txqi->txq.sta) + goto out; + + if (list_empty(&txqi->schedule_order)) + goto out; + + list_for_each_entry_safe(iter, tmp, &local->active_txqs[ac], + schedule_order) { + if (iter == txqi) + break; + + if (!iter->txq.sta) { + list_move_tail(&iter->schedule_order, + &local->active_txqs[ac]); + continue; + } + sta = container_of(iter->txq.sta, struct sta_info, sta); + if (sta->airtime[ac].deficit < 0) + sta->airtime[ac].deficit += sta->airtime_weight; + list_move_tail(&iter->schedule_order, &local->active_txqs[ac]); + } + + sta = container_of(txqi->txq.sta, struct sta_info, sta); + if (sta->airtime[ac].deficit >= 0) + goto out; + + sta->airtime[ac].deficit += sta->airtime_weight; + list_move_tail(&txqi->schedule_order, &local->active_txqs[ac]); + + return false; +out: + if (!list_empty(&txqi->schedule_order)) + list_del_init(&txqi->schedule_order); + + return true; +} +EXPORT_SYMBOL(ieee80211_txq_may_transmit); + +void ieee80211_txq_schedule_start(struct ieee80211_hw *hw, u8 ac) + __acquires(txq_lock) +{ + struct ieee80211_local *local = hw_to_local(hw); + + spin_lock_bh(&local->active_txq_lock[ac]); + local->schedule_round[ac]++; +} +EXPORT_SYMBOL(ieee80211_txq_schedule_start); + +void ieee80211_txq_schedule_end(struct ieee80211_hw *hw, u8 ac) + __releases(txq_lock) +{ + struct ieee80211_local *local = hw_to_local(hw); + + spin_unlock_bh(&local->active_txq_lock[ac]); +} +EXPORT_SYMBOL(ieee80211_txq_schedule_end); + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) diff --combined net/netfilter/nf_conntrack_core.c index 815956ac5a76,db4d46332e86..08ee03407ace --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@@ -222,24 -222,6 +222,24 @@@ static u32 hash_conntrack(const struct return scale_hash(hash_conntrack_raw(tuple, net)); }
+static bool nf_ct_get_tuple_ports(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ struct { + __be16 sport; + __be16 dport; + } _inet_hdr, *inet_hdr; + + /* Actually only need first 4 bytes to get ports. */ + inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); + if (!inet_hdr) + return false; + + tuple->src.u.udp.port = inet_hdr->sport; + tuple->dst.u.udp.port = inet_hdr->dport; + return true; +} + static bool nf_ct_get_tuple(const struct sk_buff *skb, unsigned int nhoff, @@@ -247,11 -229,16 +247,11 @@@ u_int16_t l3num, u_int8_t protonum, struct net *net, - struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_l4proto *l4proto) + struct nf_conntrack_tuple *tuple) { unsigned int size; const __be32 *ap; __be32 _addrs[8]; - struct { - __be16 sport; - __be16 dport; - } _inet_hdr, *inet_hdr;
memset(tuple, 0, sizeof(*tuple));
@@@ -287,36 -274,16 +287,36 @@@ tuple->dst.protonum = protonum; tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- if (unlikely(l4proto->pkt_to_tuple)) - return l4proto->pkt_to_tuple(skb, dataoff, net, tuple); - - /* Actually only need first 4 bytes to get ports. */ - inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr); - if (!inet_hdr) - return false; + switch (protonum) { +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple); +#endif + case IPPROTO_ICMP: + return icmp_pkt_to_tuple(skb, dataoff, net, tuple); +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return gre_pkt_to_tuple(skb, dataoff, net, tuple); +#endif + case IPPROTO_TCP: + case IPPROTO_UDP: /* fallthrough */ + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: + return nf_ct_get_tuple_ports(skb, dataoff, tuple); +#endif + default: + break; + }
- tuple->src.u.udp.port = inet_hdr->sport; - tuple->dst.u.udp.port = inet_hdr->dport; return true; }
@@@ -399,20 -366,33 +399,20 @@@ bool nf_ct_get_tuplepr(const struct sk_ u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple) { - const struct nf_conntrack_l4proto *l4proto; u8 protonum; int protoff; - int ret; - - rcu_read_lock();
protoff = get_l4proto(skb, nhoff, l3num, &protonum); - if (protoff <= 0) { - rcu_read_unlock(); + if (protoff <= 0) return false; - } - - l4proto = __nf_ct_l4proto_find(protonum);
- ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple, - l4proto); - - rcu_read_unlock(); - return ret; + return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple); } EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig, - const struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_tuple *orig) { memset(inverse, 0, sizeof(*inverse));
@@@ -435,14 -415,8 +435,14 @@@
inverse->dst.protonum = orig->dst.protonum;
- if (unlikely(l4proto->invert_tuple)) - return l4proto->invert_tuple(inverse, orig); + switch (orig->dst.protonum) { + case IPPROTO_ICMP: + return nf_conntrack_invert_icmp_tuple(inverse, orig); +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return nf_conntrack_invert_icmpv6_tuple(inverse, orig); +#endif + }
inverse->src.u.all = orig->dst.u.all; inverse->dst.u.all = orig->src.u.all; @@@ -552,20 -526,11 +552,20 @@@ void nf_ct_tmpl_free(struct nf_conn *tm } EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
+static void destroy_gre_conntrack(struct nf_conn *ct) +{ +#ifdef CONFIG_NF_CT_PROTO_GRE + struct nf_conn *master = ct->master; + + if (master) + nf_ct_gre_keymap_destroy(master); +#endif +} + static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - const struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct); WARN_ON(atomic_read(&nfct->use) != 0); @@@ -574,9 -539,9 +574,9 @@@ nf_ct_tmpl_free(ct); return; } - l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); - if (l4proto->destroy) - l4proto->destroy(ct); + + if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE)) + destroy_gre_conntrack(ct);
local_bh_disable(); /* Expectations will have been removed in clean_from_lists, @@@ -875,7 -840,7 +875,7 @@@ static int nf_ct_resolve_clash(struct n enum ip_conntrack_info oldinfo; struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->allow_clash && !nf_ct_is_dying(ct) && atomic_inc_not_zero(&ct->ct_general.use)) { @@@ -1042,6 -1007,22 +1042,22 @@@ nf_conntrack_tuple_taken(const struct n }
if (nf_ct_key_equal(h, tuple, zone, net)) { + /* Tuple is taken already, so caller will need to find + * a new source port to use. + * + * Only exception: + * If the *original tuples* are identical, then both + * conntracks refer to the same flow. + * This is a rare situation, it can occur e.g. when + * more than one UDP packet is sent from same socket + * in different threads. + * + * Let nf_ct_resolve_clash() deal with this later. + */ + if (nf_ct_tuple_equal(&ignored_conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)) + continue; + NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; @@@ -1147,7 -1128,7 +1163,7 @@@ static bool gc_worker_can_early_drop(co if (!test_bit(IPS_ASSURED_BIT, &ct->status)) return true;
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct)); + l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct)); if (l4proto->can_early_drop && l4proto->can_early_drop(ct)) return true;
@@@ -1377,6 -1358,7 +1393,6 @@@ EXPORT_SYMBOL_GPL(nf_conntrack_free) static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, unsigned int dataoff, u32 hash) { @@@ -1389,7 -1371,7 +1405,7 @@@ struct nf_conn_timeout *timeout_ext; struct nf_conntrack_zone tmp;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) { + if (!nf_ct_invert_tuple(&repl_tuple, tuple)) { pr_debug("Can't invert tuple.\n"); return NULL; } @@@ -1471,6 -1453,7 +1487,6 @@@ resolve_normal_ct(struct nf_conn *tmpl struct sk_buff *skb, unsigned int dataoff, u_int8_t protonum, - const struct nf_conntrack_l4proto *l4proto, const struct nf_hook_state *state) { const struct nf_conntrack_zone *zone; @@@ -1483,7 -1466,7 +1499,7 @@@
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, state->pf, protonum, state->net, - &tuple, l4proto)) { + &tuple)) { pr_debug("Can't get tuple\n"); return 0; } @@@ -1493,7 -1476,7 +1509,7 @@@ hash = hash_conntrack_raw(&tuple, state->net); h = __nf_conntrack_find_get(state->net, zone, &tuple, hash); if (!h) { - h = init_conntrack(state->net, tmpl, &tuple, l4proto, + h = init_conntrack(state->net, tmpl, &tuple, skb, dataoff, hash); if (!h) return 0; @@@ -1555,66 -1538,10 +1571,66 @@@ nf_conntrack_handle_icmp(struct nf_con return ret; }
+static int generic_packet(struct nf_conn *ct, struct sk_buff *skb, + enum ip_conntrack_info ctinfo) +{ + const unsigned int *timeout = nf_ct_timeout_lookup(ct); + + if (!timeout) + timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout; + + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); + return NF_ACCEPT; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int nf_conntrack_handle_packet(struct nf_conn *ct, + struct sk_buff *skb, + unsigned int dataoff, + enum ip_conntrack_info ctinfo, + const struct nf_hook_state *state) +{ + switch (nf_ct_protonum(ct)) { + case IPPROTO_TCP: + return nf_conntrack_tcp_packet(ct, skb, dataoff, + ctinfo, state); + case IPPROTO_UDP: + return nf_conntrack_udp_packet(ct, skb, dataoff, + ctinfo, state); + case IPPROTO_ICMP: + return nf_conntrack_icmp_packet(ct, skb, ctinfo, state); +#if IS_ENABLED(CONFIG_IPV6) + case IPPROTO_ICMPV6: + return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE + case IPPROTO_UDPLITE: + return nf_conntrack_udplite_packet(ct, skb, dataoff, + ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_SCTP + case IPPROTO_SCTP: + return nf_conntrack_sctp_packet(ct, skb, dataoff, + ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP + case IPPROTO_DCCP: + return nf_conntrack_dccp_packet(ct, skb, dataoff, + ctinfo, state); +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE + case IPPROTO_GRE: + return nf_conntrack_gre_packet(ct, skb, dataoff, + ctinfo, state); +#endif + } + + return generic_packet(ct, skb, ctinfo); +} + unsigned int nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state) { - const struct nf_conntrack_l4proto *l4proto; enum ip_conntrack_info ctinfo; struct nf_conn *ct, *tmpl; u_int8_t protonum; @@@ -1641,6 -1568,8 +1657,6 @@@ goto out; }
- l4proto = __nf_ct_l4proto_find(protonum); - if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) { ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff, protonum, state); @@@ -1654,7 -1583,7 +1670,7 @@@ } repeat: ret = resolve_normal_ct(tmpl, skb, dataoff, - protonum, l4proto, state); + protonum, state); if (ret < 0) { /* Too stressed to deal. */ NF_CT_STAT_INC_ATOMIC(state->net, drop); @@@ -1670,7 -1599,7 +1686,7 @@@ goto out; }
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, state); + ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ @@@ -1701,6 -1630,19 +1717,6 @@@ out } EXPORT_SYMBOL_GPL(nf_conntrack_in);
-bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, - const struct nf_conntrack_tuple *orig) -{ - bool ret; - - rcu_read_lock(); - ret = nf_ct_invert_tuple(inverse, orig, - __nf_ct_l4proto_find(orig->dst.protonum)); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr); - /* Alter reply tuple (maybe alter helper). This is for NAT, and is implicitly racy: see __nf_conntrack_confirm */ void nf_conntrack_alter_reply(struct nf_conn *ct, @@@ -1831,6 -1773,7 +1847,6 @@@ static void nf_conntrack_attach(struct
static int nf_conntrack_update(struct net *net, struct sk_buff *skb) { - const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; enum ip_conntrack_info ctinfo; @@@ -1851,8 -1794,10 +1867,8 @@@ if (dataoff <= 0) return -1;
- l4proto = nf_ct_l4proto_find_get(l4num); - if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num, - l4num, net, &tuple, l4proto)) + l4num, net, &tuple)) return -1;
if (ct->status & IPS_SRC_NAT) { @@@ -2484,10 -2429,15 +2500,10 @@@ int nf_conntrack_init_net(struct net *n nf_conntrack_tstamp_pernet_init(net); nf_conntrack_ecache_pernet_init(net); nf_conntrack_helper_pernet_init(net); + nf_conntrack_proto_pernet_init(net);
- ret = nf_conntrack_proto_pernet_init(net); - if (ret < 0) - goto err_proto; return 0;
-err_proto: - nf_conntrack_ecache_pernet_fini(net); - nf_conntrack_expect_pernet_fini(net); err_expect: free_percpu(net->ct.stat); err_pcpu_lists: diff --combined net/netfilter/nf_tables_api.c index e92bedd09cde,5a92f23f179f..5ca5ec8f3cf0 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@@ -37,16 -37,10 +37,16 @@@ enum NFT_VALIDATE_DO, };
+static struct rhltable nft_objname_ht; + static u32 nft_chain_hash(const void *data, u32 len, u32 seed); static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed); static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed); +static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed); +static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *); + static const struct rhashtable_params nft_chain_ht_params = { .head_offset = offsetof(struct nft_chain, rhlhead), .key_offset = offsetof(struct nft_chain, name), @@@ -57,15 -51,6 +57,15 @@@ .automatic_shrinking = true, };
+static const struct rhashtable_params nft_objname_ht_params = { + .head_offset = offsetof(struct nft_object, rhlhead), + .key_offset = offsetof(struct nft_object, key), + .hashfn = nft_objname_hash, + .obj_hashfn = nft_objname_hash_obj, + .obj_cmpfn = nft_objname_hash_cmp, + .automatic_shrinking = true, +}; + static void nft_validate_state_update(struct net *net, u8 new_validate_state) { switch (net->nft.validate_state) { @@@ -131,6 -116,23 +131,23 @@@ static void nft_trans_destroy(struct nf kfree(trans); }
+ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) + { + struct net *net = ctx->net; + struct nft_trans *trans; + + if (!nft_set_is_anonymous(set)) + return; + + list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { + if (trans->msg_type == NFT_MSG_NEWSET && + nft_trans_set(trans) == set) { + nft_trans_set_bound(trans) = true; + break; + } + } + } + static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@@ -226,18 -228,6 +243,6 @@@ static int nft_delchain(struct nft_ctx return err; }
- /* either expr ops provide both activate/deactivate, or neither */ - static bool nft_expr_check_ops(const struct nft_expr_ops *ops) - { - if (!ops) - return true; - - if (WARN_ON_ONCE((!ops->activate ^ !ops->deactivate))) - return false; - - return true; - } - static void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule) { @@@ -253,14 -243,15 +258,15 @@@ }
static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, - struct nft_rule *rule) + struct nft_rule *rule, + enum nft_trans_phase phase) { struct nft_expr *expr;
expr = nft_expr_first(rule); while (expr != nft_expr_last(rule) && expr->ops) { if (expr->ops->deactivate) - expr->ops->deactivate(ctx, expr); + expr->ops->deactivate(ctx, expr, phase);
expr = nft_expr_next(expr); } @@@ -311,7 -302,7 +317,7 @@@ static int nft_delrule(struct nft_ctx * nft_trans_destroy(trans); return err; } - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_PREPARE);
return 0; } @@@ -829,34 -820,6 +835,34 @@@ static int nft_chain_hash_cmp(struct rh return strcmp(chain->name, name); }
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed) +{ + const struct nft_object_hash_key *k = data; + + seed ^= hash_ptr(k->table, 32); + + return jhash(k->name, strlen(k->name), seed); +} + +static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct nft_object *obj = data; + + return nft_objname_hash(&obj->key, 0, seed); +} + +static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_object_hash_key *k = arg->key; + const struct nft_object *obj = ptr; + + if (obj->key.table != k->table) + return -1; + + return strcmp(obj->key.name, k->name); +} + static int nf_tables_newtable(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@@ -1113,7 -1076,7 +1119,7 @@@ nft_chain_lookup_byhandle(const struct return ERR_PTR(-ENOENT); }
-static bool lockdep_commit_lock_is_held(struct net *net) +static bool lockdep_commit_lock_is_held(const struct net *net) { #ifdef CONFIG_PROVE_LOCKING return lockdep_is_held(&net->nft.commit_mutex); @@@ -1972,9 -1935,6 +1978,6 @@@ static int nf_tables_delchain(struct ne */ int nft_register_expr(struct nft_expr_type *type) { - if (!nft_expr_check_ops(type->ops)) - return -EINVAL; - nfnl_lock(NFNL_SUBSYS_NFTABLES); if (type->family == NFPROTO_UNSPEC) list_add_tail_rcu(&type->list, &nf_tables_expressions); @@@ -2122,10 -2082,6 +2125,6 @@@ static int nf_tables_expr_parse(const s err = PTR_ERR(ops); goto err1; } - if (!nft_expr_check_ops(ops)) { - err = -EINVAL; - goto err1; - } } else ops = type->ops;
@@@ -2554,7 -2510,7 +2553,7 @@@ static void nf_tables_rule_destroy(cons static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule) { - nft_rule_expr_deactivate(ctx, rule); + nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE); nf_tables_rule_destroy(ctx, rule); }
@@@ -2608,9 -2564,6 +2607,9 @@@ static int nft_table_validate(struct ne return 0; }
+static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nlattr *nla); + #define NFT_RULE_MAXEXPRS 128
static int nf_tables_newrule(struct net *net, struct sock *nlsk, @@@ -2680,12 -2633,6 +2679,12 @@@ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]); return PTR_ERR(old_rule); } + } else if (nla[NFTA_RULE_POSITION_ID]) { + old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]); + if (IS_ERR(old_rule)) { + NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]); + return PTR_ERR(old_rule); + } } }
@@@ -3760,39 -3707,30 +3759,30 @@@ int nf_tables_bind_set(const struct nft bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); + nft_set_trans_bind(ctx, set); + return 0; } EXPORT_SYMBOL_GPL(nf_tables_bind_set);
- void nf_tables_rebind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) - { - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) - list_add_tail_rcu(&set->list, &ctx->table->sets); - - list_add_tail_rcu(&binding->list, &set->bindings); - } - EXPORT_SYMBOL_GPL(nf_tables_rebind_set); - void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, - struct nft_set_binding *binding) + struct nft_set_binding *binding, bool event) { list_del_rcu(&binding->list);
- if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) { list_del_rcu(&set->list); + if (event) + nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, + GFP_KERNEL); + } } EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { - if (list_empty(&set->bindings) && nft_set_is_anonymous(set) && - nft_is_active(ctx->net, set)) { - nf_tables_set_notify(ctx, set, NFT_MSG_DELSET, GFP_ATOMIC); + if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(set); - } } EXPORT_SYMBOL_GPL(nf_tables_destroy_set);
@@@ -3903,7 -3841,7 +3893,7 @@@ static int nf_tables_fill_setelem(struc
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) && nla_put_string(skb, NFTA_SET_ELEM_OBJREF, - (*nft_set_ext_obj(ext))->name) < 0) + (*nft_set_ext_obj(ext))->key.name) < 0) goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && @@@ -4436,8 -4374,7 +4426,8 @@@ static int nft_add_set_elem(struct nft_ err = -EINVAL; goto err2; } - obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF], + obj = nft_obj_lookup(ctx->net, ctx->table, + nla[NFTA_SET_ELEM_OBJREF], set->objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); @@@ -4872,36 -4809,18 +4862,36 @@@ void nft_unregister_obj(struct nft_obje } EXPORT_SYMBOL_GPL(nft_unregister_obj);
-struct nft_object *nft_obj_lookup(const struct nft_table *table, +struct nft_object *nft_obj_lookup(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u32 objtype, u8 genmask) { + struct nft_object_hash_key k = { .table = table }; + char search[NFT_OBJ_MAXNAMELEN]; + struct rhlist_head *tmp, *list; struct nft_object *obj;
- list_for_each_entry_rcu(obj, &table->objects, list) { - if (!nla_strcmp(nla, obj->name) && - objtype == obj->ops->type->type && - nft_active_genmask(obj, genmask)) + nla_strlcpy(search, nla, sizeof(search)); + k.name = search; + + WARN_ON_ONCE(!rcu_read_lock_held() && + !lockdep_commit_lock_is_held(net)); + + rcu_read_lock(); + list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params); + if (!list) + goto out; + + rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) { + if (objtype == obj->ops->type->type && + nft_active_genmask(obj, genmask)) { + rcu_read_unlock(); return obj; + } } +out: + rcu_read_unlock(); return ERR_PTR(-ENOENT); } EXPORT_SYMBOL_GPL(nft_obj_lookup); @@@ -5059,7 -4978,7 +5049,7 @@@ static int nf_tables_newobj(struct net }
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { err = PTR_ERR(obj); if (err != -ENOENT) { @@@ -5085,11 -5004,11 +5075,11 @@@ err = PTR_ERR(obj); goto err1; } - obj->table = table; + obj->key.table = table; obj->handle = nf_tables_alloc_handle(table);
- obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); - if (!obj->name) { + obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); + if (!obj->key.name) { err = -ENOMEM; goto err2; } @@@ -5098,20 -5017,11 +5088,20 @@@ if (err < 0) goto err3;
+ err = rhltable_insert(&nft_objname_ht, &obj->rhlhead, + nft_objname_ht_params); + if (err < 0) + goto err4; + list_add_tail_rcu(&obj->list, &table->objects); table->use++; return 0; +err4: + /* queued in transaction log */ + INIT_LIST_HEAD(&obj->list); + return err; err3: - kfree(obj->name); + kfree(obj->key.name); err2: if (obj->ops->destroy) obj->ops->destroy(&ctx, obj); @@@ -5140,7 -5050,7 +5130,7 @@@ static int nf_tables_fill_obj_info(stru nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || - nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || + nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) || @@@ -5295,7 -5205,7 +5285,7 @@@ static int nf_tables_getobj(struct net }
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask); + obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return PTR_ERR(obj); @@@ -5326,7 -5236,7 +5316,7 @@@ static void nft_obj_destroy(const struc obj->ops->destroy(ctx, obj);
module_put(obj->ops->type->owner); - kfree(obj->name); + kfree(obj->key.name); kfree(obj); }
@@@ -5360,7 -5270,7 +5350,7 @@@ static int nf_tables_delobj(struct net obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask); } else { attr = nla[NFTA_OBJ_NAME]; - obj = nft_obj_lookup(table, attr, objtype, genmask); + obj = nft_obj_lookup(net, table, attr, objtype, genmask); }
if (IS_ERR(obj)) { @@@ -5377,7 -5287,7 +5367,7 @@@ return nft_delobj(&ctx, obj); }
-void nft_obj_notify(struct net *net, struct nft_table *table, +void nft_obj_notify(struct net *net, const struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, int event, int family, int report, gfp_t gfp) { @@@ -6484,12 -6394,6 +6474,12 @@@ static void nf_tables_commit_chain(stru nf_tables_commit_chain_free_rules_old(g0); }
+static void nft_obj_del(struct nft_object *obj) +{ + rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params); + list_del_rcu(&obj->list); +} + static void nft_chain_del(struct nft_chain *chain) { struct nft_table *table = chain->table; @@@ -6621,6 -6525,9 +6611,9 @@@ static int nf_tables_commit(struct net nf_tables_rule_notify(&trans->ctx, nft_trans_rule(trans), NFT_MSG_DELRULE); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_COMMIT); break; case NFT_MSG_NEWSET: nft_clear(net, nft_trans_set(trans)); @@@ -6666,7 -6573,7 +6659,7 @@@ nft_trans_destroy(trans); break; case NFT_MSG_DELOBJ: - list_del_rcu(&nft_trans_obj(trans)->list); + nft_obj_del(nft_trans_obj(trans)); nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), NFT_MSG_DELOBJ); break; @@@ -6707,7 -6614,8 +6700,8 @@@ static void nf_tables_abort_release(str nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(nft_trans_set(trans)); + if (!nft_trans_set_bound(trans)) + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@@ -6768,7 -6676,9 +6762,9 @@@ static int __nf_tables_abort(struct ne case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); + nft_rule_expr_deactivate(&trans->ctx, + nft_trans_rule(trans), + NFT_TRANS_ABORT); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; @@@ -6778,7 -6688,8 +6774,8 @@@ break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - list_del_rcu(&nft_trans_set(trans)->list); + if (!nft_trans_set_bound(trans)) + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; @@@ -6802,7 -6713,7 +6799,7 @@@ break; case NFT_MSG_NEWOBJ: trans->ctx.table->use--; - list_del_rcu(&nft_trans_obj(trans)->list); + nft_obj_del(nft_trans_obj(trans)); break; case NFT_MSG_DELOBJ: trans->ctx.table->use++; @@@ -7416,7 -7327,7 +7413,7 @@@ static void __nft_release_tables(struc nft_set_destroy(set); } list_for_each_entry_safe(obj, ne, &table->objects, list) { - list_del(&obj->list); + nft_obj_del(obj); table->use--; nft_obj_destroy(&ctx, obj); } @@@ -7478,18 -7389,12 +7475,18 @@@ static int __init nf_tables_module_init if (err < 0) goto err3;
+ err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params); + if (err < 0) + goto err4; + /* must be last */ err = nfnetlink_subsys_register(&nf_tables_subsys); if (err < 0) - goto err4; + goto err5;
return err; +err5: + rhltable_destroy(&nft_objname_ht); err4: unregister_netdevice_notifier(&nf_tables_flowtable_notifier); err3: @@@ -7509,7 -7414,6 +7506,7 @@@ static void __exit nf_tables_module_exi unregister_pernet_subsys(&nf_tables_net_ops); cancel_work_sync(&trans_destroy_work); rcu_barrier(); + rhltable_destroy(&nft_objname_ht); nf_tables_core_module_exit(); }
diff --combined net/netfilter/nft_dynset.c index 9658493d37d4,f1172f99752b..a8a74a16f9c4 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@@ -62,8 -62,9 +62,8 @@@ err1 return NULL; }
-static void nft_dynset_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_dynset *priv = nft_expr_priv(expr); struct nft_set *set = priv->set; @@@ -234,20 -235,17 +234,17 @@@ err1 return err; }
- static void nft_dynset_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) - { - struct nft_dynset *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); - } - static void nft_dynset_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_dynset *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); }
static void nft_dynset_destroy(const struct nft_ctx *ctx, @@@ -295,7 -293,6 +292,6 @@@ static const struct nft_expr_ops nft_dy .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, - .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --combined net/netfilter/nft_immediate.c index 3e5ed787b1d4,3f6d1d2a6281..5ec43124cbca --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@@ -17,9 -17,9 +17,9 @@@ #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h>
-static void nft_immediate_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +void nft_immediate_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { const struct nft_immediate_expr *priv = nft_expr_priv(expr);
@@@ -72,10 -72,14 +72,14 @@@ static void nft_immediate_activate(cons }
static void nft_immediate_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ if (phase == NFT_TRANS_COMMIT) + return; + return nft_data_release(&priv->data, nft_dreg_to_type(priv->dreg)); }
diff --combined net/netfilter/nft_objref.c index c1f2adf198a0,ae178e914486..79ef074c18ca --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@@ -38,8 -38,7 +38,8 @@@ static int nft_objref_init(const struc return -EINVAL;
objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE])); - obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype, + obj = nft_obj_lookup(ctx->net, ctx->table, + tb[NFTA_OBJREF_IMM_NAME], objtype, genmask); if (IS_ERR(obj)) return -ENOENT; @@@ -54,7 -53,7 +54,7 @@@ static int nft_objref_dump(struct sk_bu { const struct nft_object *obj = nft_objref_priv(expr);
- if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) || + if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) || nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->ops->type->type))) goto nla_put_failure; @@@ -156,20 -155,17 +156,17 @@@ nla_put_failure return -1; }
- static void nft_objref_map_activate(const struct nft_ctx *ctx, - const struct nft_expr *expr) - { - struct nft_objref_map *priv = nft_expr_priv(expr); - - nf_tables_rebind_set(ctx, priv->set, &priv->binding); - } - static void nft_objref_map_deactivate(const struct nft_ctx *ctx, - const struct nft_expr *expr) + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_objref_map *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (phase == NFT_TRANS_PREPARE) + return; + + nf_tables_unbind_set(ctx, priv->set, &priv->binding, + phase == NFT_TRANS_COMMIT); }
static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@@ -186,7 -182,6 +183,6 @@@ static const struct nft_expr_ops nft_ob .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, - .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, diff --combined net/sched/cls_flower.c index c5d1db3a3db7,12ca9d13db83..6a341287a527 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@@ -381,27 -381,16 +381,27 @@@ static int fl_hw_replace_filter(struct bool skip_sw = tc_skip_sw(f->flags); int err;
+ cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); + if (!cls_flower.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = TC_CLSFLOWER_REPLACE; cls_flower.cookie = (unsigned long) f; - cls_flower.dissector = &f->mask->dissector; - cls_flower.mask = &f->mask->key; - cls_flower.key = &f->mkey; - cls_flower.exts = &f->exts; + cls_flower.rule->match.dissector = &f->mask->dissector; + cls_flower.rule->match.mask = &f->mask->key; + cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid;
+ err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + if (err) { + kfree(cls_flower.rule); + return err; + } + err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); + kfree(cls_flower.rule); + if (err < 0) { fl_hw_destroy_filter(tp, f, NULL); return err; @@@ -424,13 -413,10 +424,13 @@@ static void fl_hw_update_stats(struct t tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = TC_CLSFLOWER_STATS; cls_flower.cookie = (unsigned long) f; - cls_flower.exts = &f->exts; cls_flower.classid = f->res.classid;
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + + tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, + cls_flower.stats.pkts, + cls_flower.stats.lastused); }
static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f, @@@ -1385,7 -1371,7 +1385,7 @@@ static int fl_change(struct net *net, s if (!tc_skip_hw(fnew->flags)) { err = fl_hw_replace_filter(tp, fnew, extack); if (err) - goto errout_mask; + goto errout_mask_ht; }
if (!tc_in_hw(fnew->flags)) @@@ -1415,6 -1401,10 +1415,10 @@@ kfree(mask); return 0;
+ errout_mask_ht: + rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node, + fnew->mask->filter_ht_params); + errout_mask: fl_mask_put(head, fnew->mask, false);
@@@ -1477,32 -1467,18 +1481,32 @@@ static int fl_reoffload(struct tcf_prot if (tc_skip_hw(f->flags)) continue;
+ cls_flower.rule = + flow_rule_alloc(tcf_exts_num_actions(&f->exts)); + if (!cls_flower.rule) + return -ENOMEM; + tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = add ? TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY; cls_flower.cookie = (unsigned long)f; - cls_flower.dissector = &mask->dissector; - cls_flower.mask = &mask->key; - cls_flower.key = &f->mkey; - cls_flower.exts = &f->exts; + cls_flower.rule->match.dissector = &mask->dissector; + cls_flower.rule->match.mask = &mask->key; + cls_flower.rule->match.key = &f->mkey; + + err = tc_setup_flow_action(&cls_flower.rule->action, + &f->exts); + if (err) { + kfree(cls_flower.rule); + return err; + } + cls_flower.classid = f->res.classid;
err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + kfree(cls_flower.rule); + if (err) { if (add && tc_skip_sw(f->flags)) return err; @@@ -1517,30 -1493,25 +1521,30 @@@ return 0; }
-static void fl_hw_create_tmplt(struct tcf_chain *chain, - struct fl_flow_tmplt *tmplt) +static int fl_hw_create_tmplt(struct tcf_chain *chain, + struct fl_flow_tmplt *tmplt) { struct tc_cls_flower_offload cls_flower = {}; struct tcf_block *block = chain->block; - struct tcf_exts dummy_exts = { 0, }; + + cls_flower.rule = flow_rule_alloc(0); + if (!cls_flower.rule) + return -ENOMEM;
cls_flower.common.chain_index = chain->index; cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE; cls_flower.cookie = (unsigned long) tmplt; - cls_flower.dissector = &tmplt->dissector; - cls_flower.mask = &tmplt->mask; - cls_flower.key = &tmplt->dummy_key; - cls_flower.exts = &dummy_exts; + cls_flower.rule->match.dissector = &tmplt->dissector; + cls_flower.rule->match.mask = &tmplt->mask; + cls_flower.rule->match.key = &tmplt->dummy_key;
/* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + kfree(cls_flower.rule); + + return 0; }
static void fl_hw_destroy_tmplt(struct tcf_chain *chain, @@@ -1584,14 -1555,12 +1588,14 @@@ static void *fl_tmplt_create(struct ne err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack); if (err) goto errout_tmplt; - kfree(tb);
fl_init_dissector(&tmplt->dissector, &tmplt->mask);
- fl_hw_create_tmplt(chain, tmplt); + err = fl_hw_create_tmplt(chain, tmplt); + if (err) + goto errout_tmplt;
+ kfree(tb); return tmplt;
errout_tmplt: diff --combined net/sctp/socket.c index 9644bdc8e85c,65d6d04546ae..a78e55a1bb9c --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@@ -248,7 -248,7 +248,7 @@@ struct sctp_association *sctp_id2assoc( }
/* Otherwise this is a UDP-style socket. */ - if (!id || (id == (sctp_assoc_t)-1)) + if (id <= SCTP_ALL_ASSOC) return NULL;
spin_lock_bh(&sctp_assocs_id_lock); @@@ -2027,7 -2027,7 +2027,7 @@@ static int sctp_sendmsg(struct sock *sk struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_transport *transport = NULL; struct sctp_sndrcvinfo _sinfo, *sinfo; - struct sctp_association *asoc; + struct sctp_association *asoc, *tmp; struct sctp_cmsgs cmsgs; union sctp_addr *daddr; bool new = false; @@@ -2053,7 -2053,7 +2053,7 @@@
/* SCTP_SENDALL process */ if ((sflags & SCTP_SENDALL) && sctp_style(sk, UDP)) { - list_for_each_entry(asoc, &ep->asocs, asocs) { + list_for_each_entry_safe(asoc, tmp, &ep->asocs, asocs) { err = sctp_sendmsg_check_sflags(asoc, sflags, msg, msg_len); if (err == 0) @@@ -2750,13 -2750,12 +2750,13 @@@ static int sctp_setsockopt_peer_addr_pa return -EINVAL; }
- /* Get association, if assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
/* Heartbeat demand can only be sent on a transport or @@@ -2798,43 -2797,6 +2798,43 @@@ static inline __u32 sctp_spp_sackdelay_ return (param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; }
+static void sctp_apply_asoc_delayed_ack(struct sctp_sack_info *params, + struct sctp_association *asoc) +{ + struct sctp_transport *trans; + + if (params->sack_delay) { + asoc->sackdelay = msecs_to_jiffies(params->sack_delay); + asoc->param_flags = + sctp_spp_sackdelay_enable(asoc->param_flags); + } + if (params->sack_freq == 1) { + asoc->param_flags = + sctp_spp_sackdelay_disable(asoc->param_flags); + } else if (params->sack_freq > 1) { + asoc->sackfreq = params->sack_freq; + asoc->param_flags = + sctp_spp_sackdelay_enable(asoc->param_flags); + } + + list_for_each_entry(trans, &asoc->peer.transport_addr_list, + transports) { + if (params->sack_delay) { + trans->sackdelay = msecs_to_jiffies(params->sack_delay); + trans->param_flags = + sctp_spp_sackdelay_enable(trans->param_flags); + } + if (params->sack_freq == 1) { + trans->param_flags = + sctp_spp_sackdelay_disable(trans->param_flags); + } else if (params->sack_freq > 1) { + trans->sackfreq = params->sack_freq; + trans->param_flags = + sctp_spp_sackdelay_enable(trans->param_flags); + } + } +} + /* * 7.1.23. Get or set delayed ack timer (SCTP_DELAYED_SACK) * @@@ -2874,9 -2836,10 +2874,9 @@@ static int sctp_setsockopt_delayed_ack(struct sock *sk, char __user *optval, unsigned int optlen) { - struct sctp_sack_info params; - struct sctp_transport *trans = NULL; - struct sctp_association *asoc = NULL; - struct sctp_sock *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_association *asoc; + struct sctp_sack_info params;
if (optlen == sizeof(struct sctp_sack_info)) { if (copy_from_user(¶ms, optval, optlen)) @@@ -2904,42 -2867,67 +2904,42 @@@ if (params.sack_delay > 500) return -EINVAL;
- /* Get association, if sack_assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.sack_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- if (params.sack_delay) { - if (asoc) { - asoc->sackdelay = - msecs_to_jiffies(params.sack_delay); - asoc->param_flags = - sctp_spp_sackdelay_enable(asoc->param_flags); - } else { + if (asoc) { + sctp_apply_asoc_delayed_ack(¶ms, asoc); + + return 0; + } + + if (params.sack_assoc_id == SCTP_FUTURE_ASSOC || + params.sack_assoc_id == SCTP_ALL_ASSOC) { + if (params.sack_delay) { sp->sackdelay = params.sack_delay; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } - } - - if (params.sack_freq == 1) { - if (asoc) { - asoc->param_flags = - sctp_spp_sackdelay_disable(asoc->param_flags); - } else { + if (params.sack_freq == 1) { sp->param_flags = sctp_spp_sackdelay_disable(sp->param_flags); - } - } else if (params.sack_freq > 1) { - if (asoc) { - asoc->sackfreq = params.sack_freq; - asoc->param_flags = - sctp_spp_sackdelay_enable(asoc->param_flags); - } else { + } else if (params.sack_freq > 1) { sp->sackfreq = params.sack_freq; sp->param_flags = sctp_spp_sackdelay_enable(sp->param_flags); } }
- /* If change is for association, also apply to each transport. */ - if (asoc) { - list_for_each_entry(trans, &asoc->peer.transport_addr_list, - transports) { - if (params.sack_delay) { - trans->sackdelay = - msecs_to_jiffies(params.sack_delay); - trans->param_flags = - sctp_spp_sackdelay_enable(trans->param_flags); - } - if (params.sack_freq == 1) { - trans->param_flags = - sctp_spp_sackdelay_disable(trans->param_flags); - } else if (params.sack_freq > 1) { - trans->sackfreq = params.sack_freq; - trans->param_flags = - sctp_spp_sackdelay_enable(trans->param_flags); - } - } - } + if (params.sack_assoc_id == SCTP_CURRENT_ASSOC || + params.sack_assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + sctp_apply_asoc_delayed_ack(¶ms, asoc);
return 0; } @@@ -3009,22 -2997,15 +3009,22 @@@ static int sctp_setsockopt_default_send return -EINVAL;
asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.sinfo_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { asoc->default_stream = info.sinfo_stream; asoc->default_flags = info.sinfo_flags; asoc->default_ppid = info.sinfo_ppid; asoc->default_context = info.sinfo_context; asoc->default_timetolive = info.sinfo_timetolive; - } else { + + return 0; + } + + if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC || + info.sinfo_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.sinfo_stream; sp->default_flags = info.sinfo_flags; sp->default_ppid = info.sinfo_ppid; @@@ -3032,17 -3013,6 +3032,17 @@@ sp->default_timetolive = info.sinfo_timetolive; }
+ if (info.sinfo_assoc_id == SCTP_CURRENT_ASSOC || + info.sinfo_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + asoc->default_stream = info.sinfo_stream; + asoc->default_flags = info.sinfo_flags; + asoc->default_ppid = info.sinfo_ppid; + asoc->default_context = info.sinfo_context; + asoc->default_timetolive = info.sinfo_timetolive; + } + } + return 0; }
@@@ -3067,37 -3037,20 +3067,37 @@@ static int sctp_setsockopt_default_sndi return -EINVAL;
asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.snd_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { asoc->default_stream = info.snd_sid; asoc->default_flags = info.snd_flags; asoc->default_ppid = info.snd_ppid; asoc->default_context = info.snd_context; - } else { + + return 0; + } + + if (info.snd_assoc_id == SCTP_FUTURE_ASSOC || + info.snd_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.snd_sid; sp->default_flags = info.snd_flags; sp->default_ppid = info.snd_ppid; sp->default_context = info.snd_context; }
+ if (info.snd_assoc_id == SCTP_CURRENT_ASSOC || + info.snd_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + asoc->default_stream = info.snd_sid; + asoc->default_flags = info.snd_flags; + asoc->default_ppid = info.snd_ppid; + asoc->default_context = info.snd_context; + } + } + return 0; }
@@@ -3191,8 -3144,7 +3191,8 @@@ static int sctp_setsockopt_rtoinfo(stru asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
/* Set the values to the specific association */ - if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) + if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
rto_max = rtoinfo.srto_max; @@@ -3254,8 -3206,7 +3254,8 @@@ static int sctp_setsockopt_associnfo(st
asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
- if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP)) + if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
/* Set the values to the specific association */ @@@ -3368,7 -3319,7 +3368,7 @@@ static int sctp_setsockopt_maxseg(struc current->comm, task_pid_nr(current)); if (copy_from_user(&val, optval, optlen)) return -EFAULT; - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (optlen == sizeof(struct sctp_assoc_value)) { if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@@ -3378,9 -3329,6 +3378,9 @@@ }
asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
if (val) { int min_len, max_len; @@@ -3398,6 -3346,8 +3398,6 @@@ asoc->user_frag = val; sctp_assoc_update_frag_point(asoc); } else { - if (params.assoc_id && sctp_style(sk, UDP)) - return -EINVAL; sp->user_frag = val; }
@@@ -3510,8 -3460,8 +3510,8 @@@ static int sctp_setsockopt_adaptation_l static int sctp_setsockopt_context(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc;
if (optlen != sizeof(struct sctp_assoc_value)) @@@ -3519,26 -3469,17 +3519,26 @@@ if (copy_from_user(¶ms, optval, optlen)) return -EFAULT;
- sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; + if (asoc) { asoc->default_rcv_context = params.assoc_value; - } else { - sp->default_rcv_context = params.assoc_value; + + return 0; }
+ if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->default_rcv_context = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + asoc->default_rcv_context = params.assoc_value; + return 0; }
@@@ -3639,9 -3580,11 +3639,9 @@@ static int sctp_setsockopt_maxburst(str char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc; - int val; - int assoc_id = 0;
if (optlen == sizeof(int)) { pr_warn_ratelimited(DEPRECATED @@@ -3649,34 -3592,25 +3649,34 @@@ "Use of int in max_burst socket option deprecated.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - if (copy_from_user(&val, optval, optlen)) + if (copy_from_user(¶ms.assoc_value, optval, optlen)) return -EFAULT; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (optlen == sizeof(struct sctp_assoc_value)) { if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; - val = params.assoc_value; - assoc_id = params.assoc_id; } else return -EINVAL;
- sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- if (assoc_id != 0) { - asoc = sctp_id2assoc(sk, assoc_id); - if (!asoc) - return -EINVAL; - asoc->max_burst = val; - } else - sp->max_burst = val; + if (asoc) { + asoc->max_burst = params.assoc_value; + + return 0; + } + + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->max_burst = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &sp->ep->asocs, asocs) + asoc->max_burst = params.assoc_value;
return 0; } @@@ -3768,7 -3702,7 +3768,7 @@@ static int sctp_setsockopt_auth_key(str struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authkey *authkey; struct sctp_association *asoc; - int ret; + int ret = -EINVAL;
if (!ep->auth_enable) return -EACCES; @@@ -3778,44 -3712,25 +3778,44 @@@ /* authkey->sca_keylength is u16, so optlen can't be bigger than * this. */ - optlen = min_t(unsigned int, optlen, USHRT_MAX + - sizeof(struct sctp_authkey)); + optlen = min_t(unsigned int, optlen, USHRT_MAX + sizeof(*authkey));
authkey = memdup_user(optval, optlen); if (IS_ERR(authkey)) return PTR_ERR(authkey);
- if (authkey->sca_keylength > optlen - sizeof(struct sctp_authkey)) { - ret = -EINVAL; + if (authkey->sca_keylength > optlen - sizeof(*authkey)) goto out; - }
asoc = sctp_id2assoc(sk, authkey->sca_assoc_id); - if (!asoc && authkey->sca_assoc_id && sctp_style(sk, UDP)) { - ret = -EINVAL; + if (!asoc && authkey->sca_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + if (asoc) { + ret = sctp_auth_set_key(ep, asoc, authkey); goto out; }
- ret = sctp_auth_set_key(ep, asoc, authkey); + if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC || + authkey->sca_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_key(ep, asoc, authkey); + if (ret) + goto out; + } + + ret = 0; + + if (authkey->sca_assoc_id == SCTP_CURRENT_ASSOC || + authkey->sca_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_set_key(ep, asoc, authkey); + + if (res && !ret) + ret = res; + } + } + out: kzfree(authkey); return ret; @@@ -3832,9 -3747,8 +3832,9 @@@ static int sctp_setsockopt_active_key(s unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0;
if (!ep->auth_enable) return -EACCES; @@@ -3845,32 -3759,10 +3845,32 @@@ return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_set_active_key(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; }
/* @@@ -3883,9 -3775,8 +3883,9 @@@ static int sctp_setsockopt_del_key(stru unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0;
if (!ep->auth_enable) return -EACCES; @@@ -3896,32 -3787,11 +3896,32 @@@ return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_del_key_id(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + }
+ return ret; }
/* @@@ -3933,9 -3803,8 +3933,9 @@@ static int sctp_setsockopt_deactivate_k unsigned int optlen) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct sctp_authkeyid val; struct sctp_association *asoc; + struct sctp_authkeyid val; + int ret = 0;
if (!ep->auth_enable) return -EACCES; @@@ -3946,32 -3815,10 +3946,32 @@@ return -EFAULT;
asoc = sctp_id2assoc(sk, val.scact_assoc_id); - if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.scact_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (asoc) + return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (ret) + return ret; + } + + if (val.scact_assoc_id == SCTP_CURRENT_ASSOC || + val.scact_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &ep->asocs, asocs) { + int res = sctp_auth_deact_key_id(ep, asoc, + val.scact_keynumber); + + if (res && !ret) + ret = res; + } + } + + return ret; }
/* @@@ -4037,25 -3884,11 +4037,25 @@@ static int sctp_setsockopt_paddr_thresh sizeof(struct sctp_paddrthlds))) return -EFAULT;
- - if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { + trans = sctp_addr_id2transport(sk, &val.spt_address, + val.spt_assoc_id); + if (!trans) return -ENOENT; + + if (val.spt_pathmaxrxt) + trans->pathmaxrxt = val.spt_pathmaxrxt; + trans->pf_retrans = val.spt_pathpfthld; + + return 0; + } + + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { list_for_each_entry(trans, &asoc->peer.transport_addr_list, transports) { if (val.spt_pathmaxrxt) @@@ -4067,11 -3900,14 +4067,11 @@@ asoc->pathmaxrxt = val.spt_pathmaxrxt; asoc->pf_retrans = val.spt_pathpfthld; } else { - trans = sctp_addr_id2transport(sk, &val.spt_address, - val.spt_assoc_id); - if (!trans) - return -ENOENT; + struct sctp_sock *sp = sctp_sk(sk);
if (val.spt_pathmaxrxt) - trans->pathmaxrxt = val.spt_pathmaxrxt; - trans->pf_retrans = val.spt_pathpfthld; + sp->pathmaxrxt = val.spt_pathmaxrxt; + sp->pf_retrans = val.spt_pathpfthld; }
return 0; @@@ -4114,7 -3950,6 +4114,7 @@@ static int sctp_setsockopt_pr_supported unsigned int optlen) { struct sctp_assoc_value params; + struct sctp_association *asoc;
if (optlen != sizeof(params)) return -EINVAL; @@@ -4122,11 -3957,6 +4122,11 @@@ if (copy_from_user(¶ms, optval, optlen)) return -EFAULT;
+ asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + sctp_sk(sk)->ep->prsctp_enable = !!params.assoc_value;
return 0; @@@ -4136,7 -3966,6 +4136,7 @@@ static int sctp_setsockopt_default_prin char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_default_prinfo info; struct sctp_association *asoc; int retval = -EINVAL; @@@ -4156,31 -3985,19 +4156,31 @@@ info.pr_value = 0;
asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (!asoc && info.pr_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + retval = 0; + if (asoc) { SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); asoc->default_timetolive = info.pr_value; - } else if (!info.pr_assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); + goto out; + }
+ if (info.pr_assoc_id == SCTP_FUTURE_ASSOC || + info.pr_assoc_id == SCTP_ALL_ASSOC) { SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); sp->default_timetolive = info.pr_value; - } else { - goto out; }
- retval = 0; + if (info.pr_assoc_id == SCTP_CURRENT_ASSOC || + info.pr_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + SCTP_PR_SET_POLICY(asoc->default_flags, info.pr_policy); + asoc->default_timetolive = info.pr_value; + } + }
out: return retval; @@@ -4203,14 -4020,15 +4203,14 @@@ static int sctp_setsockopt_reconfig_sup }
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - asoc->reconf_enable = !!params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - sp->ep->reconf_enable = !!params.assoc_value; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) goto out; - } + + if (asoc) + asoc->reconf_enable = !!params.assoc_value; + else + sctp_sk(sk)->ep->reconf_enable = !!params.assoc_value;
retval = 0;
@@@ -4222,7 -4040,6 +4222,7 @@@ static int sctp_setsockopt_enable_strre char __user *optval, unsigned int optlen) { + struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_assoc_value params; struct sctp_association *asoc; int retval = -EINVAL; @@@ -4239,25 -4056,17 +4239,25 @@@ goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + goto out; + + retval = 0; + if (asoc) { asoc->strreset_enable = params.assoc_value; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - sp->ep->strreset_enable = params.assoc_value; - } else { goto out; }
- retval = 0; + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + ep->strreset_enable = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + list_for_each_entry(asoc, &ep->asocs, asocs) + asoc->strreset_enable = params.assoc_value;
out: return retval; @@@ -4352,44 -4161,29 +4352,44 @@@ static int sctp_setsockopt_scheduler(st char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; struct sctp_assoc_value params; - int retval = -EINVAL; + int retval = 0;
if (optlen < sizeof(params)) - goto out; + return -EINVAL;
optlen = sizeof(params); - if (copy_from_user(¶ms, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (copy_from_user(¶ms, optval, optlen)) + return -EFAULT;
if (params.assoc_value > SCTP_SS_MAX) - goto out; + return -EINVAL;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - goto out; + if (!asoc && params.assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- retval = sctp_sched_set_sched(asoc, params.assoc_value); + if (asoc) + return sctp_sched_set_sched(asoc, params.assoc_value); + + if (params.assoc_id == SCTP_FUTURE_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) + sp->default_ss = params.assoc_value; + + if (params.assoc_id == SCTP_CURRENT_ASSOC || + params.assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + int ret = sctp_sched_set_sched(asoc, + params.assoc_value); + + if (ret && !retval) + retval = ret; + } + }
-out: return retval; }
@@@ -4397,8 -4191,8 +4397,8 @@@ static int sctp_setsockopt_scheduler_va char __user *optval, unsigned int optlen) { - struct sctp_association *asoc; struct sctp_stream_value params; + struct sctp_association *asoc; int retval = -EINVAL;
if (optlen < sizeof(params)) @@@ -4411,24 -4205,11 +4411,24 @@@ }
asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) + if (!asoc && params.assoc_id != SCTP_CURRENT_ASSOC && + sctp_style(sk, UDP)) goto out;
- retval = sctp_sched_set_value(asoc, params.stream_id, - params.stream_value, GFP_KERNEL); + if (asoc) { + retval = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + goto out; + } + + retval = 0; + + list_for_each_entry(asoc, &sctp_sk(sk)->ep->asocs, asocs) { + int ret = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + if (ret && !retval) /* try to return the 1st error. */ + retval = ret; + }
out: return retval; @@@ -4439,8 -4220,8 +4439,8 @@@ static int sctp_setsockopt_interleaving unsigned int optlen) { struct sctp_sock *sp = sctp_sk(sk); - struct net *net = sock_net(sk); struct sctp_assoc_value params; + struct sctp_association *asoc; int retval = -EINVAL;
if (optlen < sizeof(params)) @@@ -4452,12 -4233,10 +4452,12 @@@ goto out; }
- if (params.assoc_id) + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) goto out;
- if (!net->sctp.intl_enable || !sp->frag_interleave) { + if (!sock_net(sk)->sctp.intl_enable || !sp->frag_interleave) { retval = -EPERM; goto out; } @@@ -4492,69 -4271,54 +4492,69 @@@ static int sctp_setsockopt_reuse_port(s return 0; }
+static int sctp_assoc_ulpevent_type_set(struct sctp_event *param, + struct sctp_association *asoc) +{ + struct sctp_ulpevent *event; + + sctp_ulpevent_type_set(&asoc->subscribe, param->se_type, param->se_on); + + if (param->se_type == SCTP_SENDER_DRY_EVENT && param->se_on) { + if (sctp_outq_is_empty(&asoc->outqueue)) { + event = sctp_ulpevent_make_sender_dry_event(asoc, + GFP_USER | __GFP_NOWARN); + if (!event) + return -ENOMEM; + + asoc->stream.si->enqueue_event(&asoc->ulpq, event); + } + } + + return 0; +} + static int sctp_setsockopt_event(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_association *asoc; - struct sctp_ulpevent *event; struct sctp_event param; int retval = 0;
- if (optlen < sizeof(param)) { - retval = -EINVAL; - goto out; - } + if (optlen < sizeof(param)) + return -EINVAL;
optlen = sizeof(param); - if (copy_from_user(¶m, optval, optlen)) { - retval = -EFAULT; - goto out; - } + if (copy_from_user(¶m, optval, optlen)) + return -EFAULT;
if (param.se_type < SCTP_SN_TYPE_BASE || - param.se_type > SCTP_SN_TYPE_MAX) { - retval = -EINVAL; - goto out; - } + param.se_type > SCTP_SN_TYPE_MAX) + return -EINVAL;
asoc = sctp_id2assoc(sk, param.se_assoc_id); - if (!asoc) { - sctp_ulpevent_type_set(&sctp_sk(sk)->subscribe, - param.se_type, param.se_on); - goto out; - } + if (!asoc && param.se_assoc_id > SCTP_ALL_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- sctp_ulpevent_type_set(&asoc->subscribe, param.se_type, param.se_on); + if (asoc) + return sctp_assoc_ulpevent_type_set(¶m, asoc);
- if (param.se_type == SCTP_SENDER_DRY_EVENT && param.se_on) { - if (sctp_outq_is_empty(&asoc->outqueue)) { - event = sctp_ulpevent_make_sender_dry_event(asoc, - GFP_USER | __GFP_NOWARN); - if (!event) { - retval = -ENOMEM; - goto out; - } + if (param.se_assoc_id == SCTP_FUTURE_ASSOC || + param.se_assoc_id == SCTP_ALL_ASSOC) + sctp_ulpevent_type_set(&sp->subscribe, + param.se_type, param.se_on);
- asoc->stream.si->enqueue_event(&asoc->ulpq, event); + if (param.se_assoc_id == SCTP_CURRENT_ASSOC || + param.se_assoc_id == SCTP_ALL_ASSOC) { + list_for_each_entry(asoc, &sp->ep->asocs, asocs) { + int ret = sctp_assoc_ulpevent_type_set(¶m, asoc); + + if (ret && !retval) + retval = ret; } }
-out: return retval; }
@@@ -5013,14 -4777,12 +5013,14 @@@ static int sctp_init_sock(struct sock * */ sp->hbinterval = net->sctp.hb_interval; sp->pathmaxrxt = net->sctp.max_retrans_path; + sp->pf_retrans = net->sctp.pf_retrans; sp->pathmtu = 0; /* allow default discovery */ sp->sackdelay = net->sctp.sack_timeout; sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; + sp->default_ss = SCTP_SS_DEFAULT;
/* If enabled no SCTP message fragmentation will be performed. * Configure through SCTP_DISABLE_FRAGMENTS socket option. @@@ -5914,13 -5676,12 +5914,13 @@@ static int sctp_getsockopt_peer_addr_pa } }
- /* Get association, if assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.spp_assoc_id); - if (!asoc && params.spp_assoc_id && sctp_style(sk, UDP)) { + if (!asoc && params.spp_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { pr_debug("%s: failed no association\n", __func__); return -EINVAL; } @@@ -6049,19 -5810,19 +6049,19 @@@ static int sctp_getsockopt_delayed_ack( } else return -EINVAL;
- /* Get association, if sack_assoc_id != 0 and the socket is a one - * to many style socket, and an association was not found, then - * the id was invalid. + /* Get association, if sack_assoc_id != SCTP_FUTURE_ASSOC and the + * socket is a one to many style socket, and an association + * was not found, then the id was invalid. */ asoc = sctp_id2assoc(sk, params.sack_assoc_id); - if (!asoc && params.sack_assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.sack_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
if (asoc) { /* Fetch association values. */ if (asoc->param_flags & SPP_SACKDELAY_ENABLE) { - params.sack_delay = jiffies_to_msecs( - asoc->sackdelay); + params.sack_delay = jiffies_to_msecs(asoc->sackdelay); params.sack_freq = asoc->sackfreq;
} else { @@@ -6414,10 -6175,8 +6414,10 @@@ static int sctp_getsockopt_default_send return -EFAULT;
asoc = sctp_id2assoc(sk, info.sinfo_assoc_id); - if (!asoc && info.sinfo_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.sinfo_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { info.sinfo_stream = asoc->default_stream; info.sinfo_flags = asoc->default_flags; @@@ -6460,10 -6219,8 +6460,10 @@@ static int sctp_getsockopt_default_sndi return -EFAULT;
asoc = sctp_id2assoc(sk, info.snd_assoc_id); - if (!asoc && info.snd_assoc_id && sctp_style(sk, UDP)) + if (!asoc && info.snd_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL; + if (asoc) { info.snd_sid = asoc->default_stream; info.snd_flags = asoc->default_flags; @@@ -6539,8 -6296,7 +6539,8 @@@ static int sctp_getsockopt_rtoinfo(stru
asoc = sctp_id2assoc(sk, rtoinfo.srto_assoc_id);
- if (!asoc && rtoinfo.srto_assoc_id && sctp_style(sk, UDP)) + if (!asoc && rtoinfo.srto_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
/* Values corresponding to the specific association. */ @@@ -6597,8 -6353,7 +6597,8 @@@ static int sctp_getsockopt_associnfo(st
asoc = sctp_id2assoc(sk, assocparams.sasoc_assoc_id);
- if (!asoc && assocparams.sasoc_assoc_id && sctp_style(sk, UDP)) + if (!asoc && assocparams.sasoc_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
/* Values correspoinding to the specific association */ @@@ -6673,6 -6428,7 +6673,6 @@@ static int sctp_getsockopt_context(stru char __user *optval, int __user *optlen) { struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc;
if (len < sizeof(struct sctp_assoc_value)) @@@ -6683,13 -6439,16 +6683,13 @@@ if (copy_from_user(¶ms, optval, len)) return -EFAULT;
- sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; - params.assoc_value = asoc->default_rcv_context; - } else { - params.assoc_value = sp->default_rcv_context; - } + params.assoc_value = asoc ? asoc->default_rcv_context + : sctp_sk(sk)->default_rcv_context;
if (put_user(len, optlen)) return -EFAULT; @@@ -6738,7 -6497,7 +6738,7 @@@ static int sctp_getsockopt_maxseg(struc "Use of int in maxseg socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) @@@ -6747,8 -6506,7 +6747,8 @@@ return -EINVAL;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
if (asoc) @@@ -6825,6 -6583,7 +6825,6 @@@ static int sctp_getsockopt_maxburst(str int __user *optlen) { struct sctp_assoc_value params; - struct sctp_sock *sp; struct sctp_association *asoc;
if (len == sizeof(int)) { @@@ -6833,7 -6592,7 +6833,7 @@@ "Use of int in max_burst socket option.\n" "Use struct sctp_assoc_value instead\n", current->comm, task_pid_nr(current)); - params.assoc_id = 0; + params.assoc_id = SCTP_FUTURE_ASSOC; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) @@@ -6841,12 -6600,15 +6841,12 @@@ } else return -EINVAL;
- sp = sctp_sk(sk); + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL;
- if (params.assoc_id != 0) { - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) - return -EINVAL; - params.assoc_value = asoc->max_burst; - } else - params.assoc_value = sp->max_burst; + params.assoc_value = asoc ? asoc->max_burst : sctp_sk(sk)->max_burst;
if (len == sizeof(int)) { if (copy_to_user(optval, ¶ms.assoc_value, len)) @@@ -6997,12 -6759,14 +6997,12 @@@ static int sctp_getsockopt_local_auth_c
to = p->gauth_chunks; asoc = sctp_id2assoc(sk, val.gauth_assoc_id); - if (!asoc && val.gauth_assoc_id && sctp_style(sk, UDP)) + if (!asoc && val.gauth_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) return -EINVAL;
- if (asoc) - ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; - else - ch = ep->auth_chunk_list; - + ch = asoc ? (struct sctp_chunks_param *)asoc->c.auth_chunks + : ep->auth_chunk_list; if (!ch) goto num;
@@@ -7147,7 -6911,14 +7147,7 @@@ static int sctp_getsockopt_paddr_thresh if (copy_from_user(&val, (struct sctp_paddrthlds __user *)optval, len)) return -EFAULT;
- if (sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { - asoc = sctp_id2assoc(sk, val.spt_assoc_id); - if (!asoc) - return -ENOENT; - - val.spt_pathpfthld = asoc->pf_retrans; - val.spt_pathmaxrxt = asoc->pathmaxrxt; - } else { + if (!sctp_is_any(sk, (const union sctp_addr *)&val.spt_address)) { trans = sctp_addr_id2transport(sk, &val.spt_address, val.spt_assoc_id); if (!trans) @@@ -7155,23 -6926,6 +7155,23 @@@
val.spt_pathmaxrxt = trans->pathmaxrxt; val.spt_pathpfthld = trans->pf_retrans; + + return 0; + } + + asoc = sctp_id2assoc(sk, val.spt_assoc_id); + if (!asoc && val.spt_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + + if (asoc) { + val.spt_pathpfthld = asoc->pf_retrans; + val.spt_pathmaxrxt = asoc->pathmaxrxt; + } else { + struct sctp_sock *sp = sctp_sk(sk); + + val.spt_pathpfthld = sp->pf_retrans; + val.spt_pathmaxrxt = sp->pathmaxrxt; }
if (put_user(len, optlen) || copy_to_user(optval, &val, len)) @@@ -7302,15 -7056,17 +7302,15 @@@ static int sctp_getsockopt_pr_supported goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->prsctp_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->prsctp_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
+ params.assoc_value = asoc ? asoc->prsctp_enable + : sctp_sk(sk)->ep->prsctp_enable; + if (put_user(len, optlen)) goto out;
@@@ -7341,20 -7097,17 +7341,20 @@@ static int sctp_getsockopt_default_prin goto out;
asoc = sctp_id2assoc(sk, info.pr_assoc_id); + if (!asoc && info.pr_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + if (asoc) { info.pr_policy = SCTP_PR_POLICY(asoc->default_flags); info.pr_value = asoc->default_timetolive; - } else if (!info.pr_assoc_id) { + } else { struct sctp_sock *sp = sctp_sk(sk);
info.pr_policy = SCTP_PR_POLICY(sp->default_flags); info.pr_value = sp->default_timetolive; - } else { - retval = -EINVAL; - goto out; }
if (put_user(len, optlen)) @@@ -7510,15 -7263,17 +7510,15 @@@ static int sctp_getsockopt_reconfig_sup goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->reconf_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->reconf_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
+ params.assoc_value = asoc ? asoc->reconf_enable + : sctp_sk(sk)->ep->reconf_enable; + if (put_user(len, optlen)) goto out;
@@@ -7549,15 -7304,17 +7549,15 @@@ static int sctp_getsockopt_enable_strre goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->strreset_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->ep->strreset_enable; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
+ params.assoc_value = asoc ? asoc->strreset_enable + : sctp_sk(sk)->ep->strreset_enable; + if (put_user(len, optlen)) goto out;
@@@ -7588,14 -7345,12 +7588,14 @@@ static int sctp_getsockopt_scheduler(st goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc) { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
- params.assoc_value = sctp_sched_get_sched(asoc); + params.assoc_value = asoc ? sctp_sched_get_sched(asoc) + : sctp_sk(sk)->default_ss;
if (put_user(len, optlen)) goto out; @@@ -7669,15 -7424,17 +7669,15 @@@ static int sctp_getsockopt_interleaving goto out;
asoc = sctp_id2assoc(sk, params.assoc_id); - if (asoc) { - params.assoc_value = asoc->intl_enable; - } else if (!params.assoc_id) { - struct sctp_sock *sp = sctp_sk(sk); - - params.assoc_value = sp->strm_interleave; - } else { + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { retval = -EINVAL; goto out; }
+ params.assoc_value = asoc ? asoc->intl_enable + : sctp_sk(sk)->strm_interleave; + if (put_user(len, optlen)) goto out;
@@@ -7729,10 -7486,6 +7729,10 @@@ static int sctp_getsockopt_event(struc return -EINVAL;
asoc = sctp_id2assoc(sk, param.se_assoc_id); + if (!asoc && param.se_assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + return -EINVAL; + subscribe = asoc ? asoc->subscribe : sctp_sk(sk)->subscribe; param.se_on = sctp_ulpevent_type_enabled(subscribe, param.se_type);
diff --combined net/smc/af_smc.c index 48ea7669161f,b04a813fc865..46fa9f3016cc --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@@ -42,11 -42,8 +42,11 @@@ #include "smc_rx.h" #include "smc_close.h"
-static DEFINE_MUTEX(smc_create_lgr_pending); /* serialize link group - * creation +static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group + * creation on server + */ +static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group + * creation on client */
static void smc_tcp_listen_work(struct work_struct *); @@@ -148,33 -145,32 +148,33 @@@ static int smc_release(struct socket *s rc = smc_close_active(smc); sock_set_flag(sk, SOCK_DEAD); sk->sk_shutdown |= SHUTDOWN_MASK; - } - - sk->sk_prot->unhash(sk); - - if (smc->clcsock) { - if (smc->use_fallback && sk->sk_state == SMC_LISTEN) { + } else { + if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) + sock_put(sk); /* passive closing */ + if (sk->sk_state == SMC_LISTEN) { /* wake up clcsock accept */ rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR); } - mutex_lock(&smc->clcsock_release_lock); - sock_release(smc->clcsock); - smc->clcsock = NULL; - mutex_unlock(&smc->clcsock_release_lock); - } - if (smc->use_fallback) { - if (sk->sk_state != SMC_LISTEN && sk->sk_state != SMC_INIT) - sock_put(sk); /* passive closing */ sk->sk_state = SMC_CLOSED; sk->sk_state_change(sk); }
+ sk->sk_prot->unhash(sk); + + if (sk->sk_state == SMC_CLOSED) { + if (smc->clcsock) { + mutex_lock(&smc->clcsock_release_lock); + sock_release(smc->clcsock); + smc->clcsock = NULL; + mutex_unlock(&smc->clcsock_release_lock); + } + if (!smc->use_fallback) + smc_conn_free(&smc->conn); + } + /* detach socket */ sock_orphan(sk); sock->sk = NULL; - if (!smc->use_fallback && sk->sk_state == SMC_CLOSED) - smc_conn_free(&smc->conn); release_sock(sk);
sock_put(sk); /* final sock_put */ @@@ -295,8 -291,7 +295,8 @@@ static void smc_copy_sock_settings(stru (1UL << SOCK_RXQ_OVFL) | \ (1UL << SOCK_WIFI_STATUS) | \ (1UL << SOCK_NOFCS) | \ - (1UL << SOCK_FILTER_LOCKED)) + (1UL << SOCK_FILTER_LOCKED) | \ + (1UL << SOCK_TSTAMP_NEW)) /* copy only relevant settings and flags of SOL_SOCKET level from smc to * clc socket (since smc is not called for these options from net/core) */ @@@ -480,12 -475,7 +480,12 @@@ static int smc_connect_abort(struct smc { if (local_contact == SMC_FIRST_CONTACT) smc_lgr_forget(smc->conn.lgr); - mutex_unlock(&smc_create_lgr_pending); + if (smc->conn.lgr->is_smcd) + /* there is only one lgr role for SMC-D; use server lock */ + mutex_unlock(&smc_server_lgr_pending); + else + mutex_unlock(&smc_client_lgr_pending); + smc_conn_free(&smc->conn); return reason_code; } @@@ -570,7 -560,7 +570,7 @@@ static int smc_connect_rdma(struct smc_ struct smc_link *link; int reason_code = 0;
- mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_client_lgr_pending); local_contact = smc_conn_create(smc, false, aclc->hdr.flag, ibdev, ibport, ntoh24(aclc->qpn), &aclc->lcl, NULL, 0); @@@ -581,8 -571,7 +581,8 @@@ reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ else reason_code = SMC_CLC_DECL_INTERR; /* other error */ - return smc_connect_abort(smc, reason_code, 0); + mutex_unlock(&smc_client_lgr_pending); + return reason_code; } link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
@@@ -626,7 -615,7 +626,7 @@@ return smc_connect_abort(smc, reason_code, local_contact); } - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_client_lgr_pending);
smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@@ -643,14 -632,11 +643,14 @@@ static int smc_connect_ism(struct smc_s int local_contact = SMC_FIRST_CONTACT; int rc = 0;
- mutex_lock(&smc_create_lgr_pending); + /* there is only one lgr role for SMC-D; use server lock */ + mutex_lock(&smc_server_lgr_pending); local_contact = smc_conn_create(smc, true, aclc->hdr.flag, NULL, 0, 0, NULL, ismdev, aclc->gid); - if (local_contact < 0) - return smc_connect_abort(smc, SMC_CLC_DECL_MEM, 0); + if (local_contact < 0) { + mutex_unlock(&smc_server_lgr_pending); + return SMC_CLC_DECL_MEM; + }
/* Create send and receive buffers */ if (smc_buf_create(smc, true)) @@@ -664,7 -650,7 +664,7 @@@ rc = smc_clc_send_confirm(smc); if (rc) return smc_connect_abort(smc, rc, local_contact); - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending);
smc_copy_sock_settings_to_clc(smc); if (smc->sk.sk_state == SMC_INIT) @@@ -1263,7 -1249,7 +1263,7 @@@ static void smc_listen_work(struct work return; }
- mutex_lock(&smc_create_lgr_pending); + mutex_lock(&smc_server_lgr_pending); smc_close_init(new_smc); smc_rx_init(new_smc); smc_tx_init(new_smc); @@@ -1285,7 -1271,7 +1285,7 @@@ &local_contact) || smc_listen_rdma_reg(new_smc, local_contact))) { /* SMC not supported, decline */ - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP, local_contact); return; @@@ -1294,33 -1280,29 +1294,33 @@@ /* send SMC Accept CLC message */ rc = smc_clc_send_accept(new_smc, local_contact); if (rc) { - mutex_unlock(&smc_create_lgr_pending); + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, rc, local_contact); return; }
+ /* SMC-D does not need this lock any more */ + if (ism_supported) + mutex_unlock(&smc_server_lgr_pending); + /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (reason_code) { - mutex_unlock(&smc_create_lgr_pending); + if (!ism_supported) + mutex_unlock(&smc_server_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); return; }
/* finish worker */ if (!ism_supported) { - if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) { - mutex_unlock(&smc_create_lgr_pending); + rc = smc_listen_rdma_finish(new_smc, &cclc, local_contact); + mutex_unlock(&smc_server_lgr_pending); + if (rc) return; - } } smc_conn_save_peer_info(new_smc, &cclc); - mutex_unlock(&smc_create_lgr_pending); smc_listen_out_connected(new_smc); }
@@@ -1523,6 -1505,11 +1523,11 @@@ static int smc_recvmsg(struct socket *s
smc = smc_sk(sk); lock_sock(sk); + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if ((sk->sk_state == SMC_INIT) || (sk->sk_state == SMC_LISTEN) || (sk->sk_state == SMC_CLOSED)) @@@ -1858,7 -1845,11 +1863,11 @@@ static ssize_t smc_splice_read(struct s
smc = smc_sk(sk); lock_sock(sk); - + if (sk->sk_state == SMC_CLOSED && (sk->sk_shutdown & RCV_SHUTDOWN)) { + /* socket was connected before, no more data to read */ + rc = 0; + goto out; + } if (sk->sk_state == SMC_INIT || sk->sk_state == SMC_LISTEN || sk->sk_state == SMC_CLOSED) diff --combined net/smc/smc_cdc.h index e8c214b992b6,271e2524dc8f..e3b6b367f3b6 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@@ -160,7 -160,9 +160,9 @@@ static inline void smcd_curs_copy(unio #endif }
- /* calculate cursor difference between old and new, where old <= new */ + /* calculate cursor difference between old and new, where old <= new and + * difference cannot exceed size + */ static inline int smc_curs_diff(unsigned int size, union smc_host_cursor *old, union smc_host_cursor *new) @@@ -185,6 -187,28 +187,28 @@@ static inline int smc_curs_comp(unsigne return smc_curs_diff(size, old, new); }
+ /* calculate cursor difference between old and new, where old <= new and + * difference may exceed size + */ + static inline int smc_curs_diff_large(unsigned int size, + union smc_host_cursor *old, + union smc_host_cursor *new) + { + if (old->wrap < new->wrap) + return min_t(int, + (size - old->count) + new->count + + (new->wrap - old->wrap - 1) * size, + size); + + if (old->wrap > new->wrap) /* wrap has switched from 0xffff to 0x0000 */ + return min_t(int, + (size - old->count) + new->count + + (new->wrap + 0xffff - old->wrap) * size, + size); + + return max_t(int, 0, (new->count - old->count)); + } + static inline void smc_host_cursor_to_cdc(union smc_cdc_cursor *peer, union smc_host_cursor *local, struct smc_connection *conn) @@@ -245,18 -269,17 +269,18 @@@ static inline void smcr_cdc_msg_to_host }
static inline void smcd_cdc_msg_to_host(struct smc_host_cdc_msg *local, - struct smcd_cdc_msg *peer) + struct smcd_cdc_msg *peer, + struct smc_connection *conn) { union smc_host_cursor temp;
temp.wrap = peer->prod.wrap; temp.count = peer->prod.count; - atomic64_set(&local->prod.acurs, atomic64_read(&temp.acurs)); + smc_curs_copy(&local->prod, &temp, conn);
temp.wrap = peer->cons.wrap; temp.count = peer->cons.count; - atomic64_set(&local->cons.acurs, atomic64_read(&temp.acurs)); + smc_curs_copy(&local->cons, &temp, conn); local->prod_flags = peer->cons.prod_flags; local->conn_state_flags = peer->cons.conn_state_flags; } @@@ -266,15 -289,21 +290,21 @@@ static inline void smc_cdc_msg_to_host( struct smc_connection *conn) { if (conn->lgr->is_smcd) - smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer); + smcd_cdc_msg_to_host(local, (struct smcd_cdc_msg *)peer, conn); else smcr_cdc_msg_to_host(local, peer, conn); }
- struct smc_cdc_tx_pend; + struct smc_cdc_tx_pend { + struct smc_connection *conn; /* socket connection */ + union smc_host_cursor cursor; /* tx sndbuf cursor sent */ + union smc_host_cursor p_cursor; /* rx RMBE cursor produced */ + u16 ctrl_seq; /* conn. tx sequence # */ + };
int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, + struct smc_rdma_wr **wr_rdma_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --combined net/smc/smc_close.c index 0e60dd741698,e39cadda1bf5..2ad37e998509 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@@ -345,14 -345,7 +345,7 @@@ static void smc_close_passive_work(stru
switch (sk->sk_state) { case SMC_INIT: - if (atomic_read(&conn->bytes_to_rcv) || - (rxflags->peer_done_writing && - !smc_cdc_rxed_any_close(conn))) { - sk->sk_state = SMC_APPCLOSEWAIT1; - } else { - sk->sk_state = SMC_CLOSED; - sock_put(sk); /* passive closing */ - } + sk->sk_state = SMC_APPCLOSEWAIT1; break; case SMC_ACTIVE: sk->sk_state = SMC_APPCLOSEWAIT1; @@@ -405,13 -398,8 +398,13 @@@ wakeup if (old_state != sk->sk_state) { sk->sk_state_change(sk); if ((sk->sk_state == SMC_CLOSED) && - (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) + (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) { smc_conn_free(conn); + if (smc->clcsock) { + sock_release(smc->clcsock); + smc->clcsock = NULL; + } + } } release_sock(sk); sock_put(sk); /* sock_hold done by schedulers of close_work */ diff --combined net/smc/smc_core.c index a1a6d351ae1b,aa1c551cee81..349d789a9728 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@@ -118,6 -118,7 +118,6 @@@ static void __smc_lgr_unregister_conn(s rb_erase(&conn->alert_node, &lgr->conns_all); lgr->conns_num--; conn->alert_token_local = 0; - conn->lgr = NULL; sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */ }
@@@ -127,6 -128,8 +127,8 @@@ static void smc_lgr_unregister_conn(str { struct smc_link_group *lgr = conn->lgr;
+ if (!lgr) + return; write_lock_bh(&lgr->conns_lock); if (conn->alert_token_local) { __smc_lgr_unregister_conn(conn); @@@ -299,13 -302,13 +301,13 @@@ static void smc_buf_unuse(struct smc_co conn->sndbuf_desc->used = 0; if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { - conn->rmb_desc->used = 0; if (!lgr->is_smcd) { /* unregister rmb with peer */ smc_llc_do_delete_rkey( &lgr->lnk[SMC_SINGLE_LINK], conn->rmb_desc); } + conn->rmb_desc->used = 0; } else { /* buf registration failed, reuse not possible */ write_lock_bh(&lgr->rmbs_lock); @@@ -330,9 -333,8 +332,9 @@@ void smc_conn_free(struct smc_connectio } else { smc_cdc_tx_dismiss_slots(conn); } - smc_lgr_unregister_conn(conn); /* unsets conn->lgr */ + smc_lgr_unregister_conn(conn); smc_buf_unuse(conn, lgr); /* allow buffer reuse */ + conn->lgr = NULL;
if (!lgr->conns_num) smc_lgr_schedule_free_work(lgr); @@@ -462,7 -464,6 +464,7 @@@ static void __smc_lgr_terminate(struct sock_hold(&smc->sk); /* sock_put in close work */ conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); + conn->lgr = NULL; write_unlock_bh(&lgr->conns_lock); if (!schedule_work(&conn->close_work)) sock_put(&smc->sk); @@@ -629,6 -630,8 +631,8 @@@ int smc_conn_create(struct smc_sock *sm local_contact = SMC_REUSE_CONTACT; conn->lgr = lgr; smc_lgr_register_conn(conn); /* add smc conn to lgr */ + if (delayed_work_pending(&lgr->free_work)) + cancel_delayed_work(&lgr->free_work); write_unlock_bh(&lgr->conns_lock); break; } diff --combined net/socket.c index d51930689b98,d80d87a395ea..643a1648fcc2 --- a/net/socket.c +++ b/net/socket.c @@@ -669,7 -669,7 +669,7 @@@ static bool skb_is_err_queue(const stru * before the software timestamp is received, a hardware TX timestamp may be * returned only if there is no software TX timestamp. Ignore false software * timestamps, which may be made in the __sock_recv_timestamp() call when the - * option SO_TIMESTAMP(NS) is enabled on the socket, even when the skb has a + * option SO_TIMESTAMP_OLD(NS) is enabled on the socket, even when the skb has a * hardware timestamp. */ static bool skb_is_swtx_tstamp(const struct sk_buff *skb, int false_tstamp) @@@ -705,9 -705,7 +705,9 @@@ void __sock_recv_timestamp(struct msghd struct sk_buff *skb) { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); - struct scm_timestamping tss; + int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); + struct scm_timestamping_internal tss; + int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); @@@ -721,54 -719,34 +721,54 @@@
if (need_software_tstamp) { if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { - struct timeval tv; - skb_get_timestamp(skb, &tv); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, - sizeof(tv), &tv); + if (new_tstamp) { + struct __kernel_sock_timeval tv; + + skb_get_new_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, + sizeof(tv), &tv); + } else { + struct __kernel_old_timeval tv; + + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, + sizeof(tv), &tv); + } } else { - struct timespec ts; - skb_get_timestampns(skb, &ts); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(ts), &ts); + if (new_tstamp) { + struct __kernel_timespec ts; + + skb_get_new_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + sizeof(ts), &ts); + } else { + struct timespec ts; + + skb_get_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, + sizeof(ts), &ts); + } } }
memset(&tss, 0, sizeof(tss)); if ((sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) && - ktime_to_timespec_cond(skb->tstamp, tss.ts + 0)) + ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) empty = 0; if (shhwtstamps && (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) && !skb_is_swtx_tstamp(skb, false_tstamp) && - ktime_to_timespec_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { + ktime_to_timespec64_cond(shhwtstamps->hwtstamp, tss.ts + 2)) { empty = 0; if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) put_ts_pktinfo(msg, skb); } if (!empty) { - put_cmsg(msg, SOL_SOCKET, - SCM_TIMESTAMPING, sizeof(tss), &tss); + if (sock_flag(sk, SOCK_TSTAMP_NEW)) + put_cmsg_scm_timestamping64(msg, &tss); + else + put_cmsg_scm_timestamping(msg, &tss);
if (skb_is_err_queue(skb) && skb->len && SKB_EXT_ERR(skb)->opt_stats) @@@ -963,8 -941,7 +963,7 @@@ void dlci_ioctl_set(int (*hook) (unsign EXPORT_SYMBOL(dlci_ioctl_set);
static long sock_do_ioctl(struct net *net, struct socket *sock, - unsigned int cmd, unsigned long arg, - unsigned int ifreq_size) + unsigned int cmd, unsigned long arg) { int err; void __user *argp = (void __user *)arg; @@@ -990,11 -967,11 +989,11 @@@ } else { struct ifreq ifr; bool need_copyout; - if (copy_from_user(&ifr, argp, ifreq_size)) + if (copy_from_user(&ifr, argp, sizeof(struct ifreq))) return -EFAULT; err = dev_ioctl(net, cmd, &ifr, &need_copyout); if (!err && need_copyout) - if (copy_to_user(argp, &ifr, ifreq_size)) + if (copy_to_user(argp, &ifr, sizeof(struct ifreq))) return -EFAULT; } return err; @@@ -1093,8 -1070,7 +1092,7 @@@ static long sock_ioctl(struct file *fil err = open_related_ns(&net->ns, get_net_ns); break; default: - err = sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct ifreq)); + err = sock_do_ioctl(net, sock, cmd, arg); break; } return err; @@@ -2802,8 -2778,7 +2800,7 @@@ static int do_siocgstamp(struct net *ne int err;
set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); if (!err) err = compat_put_timeval(&ktv, up); @@@ -2819,8 -2794,7 +2816,7 @@@ static int do_siocgstampns(struct net * int err;
set_fs(KERNEL_DS); - err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts, - sizeof(struct compat_ifreq)); + err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); if (!err) err = compat_put_timespec(&kts, up); @@@ -3016,6 -2990,54 +3012,54 @@@ static int compat_ifr_data_ioctl(struc return dev_ioctl(net, cmd, &ifreq, NULL); }
+ static int compat_ifreq_ioctl(struct net *net, struct socket *sock, + unsigned int cmd, + struct compat_ifreq __user *uifr32) + { + struct ifreq __user *uifr; + int err; + + /* Handle the fact that while struct ifreq has the same *layout* on + * 32/64 for everything but ifreq::ifru_ifmap and ifreq::ifru_data, + * which are handled elsewhere, it still has different *size* due to + * ifreq::ifru_ifmap (which is 16 bytes on 32 bit, 24 bytes on 64-bit, + * resulting in struct ifreq being 32 and 40 bytes respectively). + * As a result, if the struct happens to be at the end of a page and + * the next page isn't readable/writable, we get a fault. To prevent + * that, copy back and forth to the full size. + */ + + uifr = compat_alloc_user_space(sizeof(*uifr)); + if (copy_in_user(uifr, uifr32, sizeof(*uifr32))) + return -EFAULT; + + err = sock_do_ioctl(net, sock, cmd, (unsigned long)uifr); + + if (!err) { + switch (cmd) { + case SIOCGIFFLAGS: + case SIOCGIFMETRIC: + case SIOCGIFMTU: + case SIOCGIFMEM: + case SIOCGIFHWADDR: + case SIOCGIFINDEX: + case SIOCGIFADDR: + case SIOCGIFBRDADDR: + case SIOCGIFDSTADDR: + case SIOCGIFNETMASK: + case SIOCGIFPFLAGS: + case SIOCGIFTXQLEN: + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCGIFNAME: + if (copy_in_user(uifr32, uifr, sizeof(*uifr32))) + err = -EFAULT; + break; + } + } + return err; + } + static int compat_sioc_ifmap(struct net *net, unsigned int cmd, struct compat_ifreq __user *uifr32) { @@@ -3131,8 -3153,7 +3175,7 @@@ static int routing_ioctl(struct net *ne }
set_fs(KERNEL_DS); - ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r, - sizeof(struct compat_ifreq)); + ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); set_fs(old_fs);
out: @@@ -3232,21 -3253,22 +3275,22 @@@ static int compat_sock_ioctl_trans(stru case SIOCSIFTXQLEN: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCGIFNAME: case SIOCSIFNAME: case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - case SIOCSARP: - case SIOCGARP: - case SIOCDARP: - case SIOCATMARK: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: case SIOCBONDCHANGEACTIVE: - case SIOCGIFNAME: - return sock_do_ioctl(net, sock, cmd, arg, - sizeof(struct compat_ifreq)); + return compat_ifreq_ioctl(net, sock, cmd, argp); + + case SIOCSARP: + case SIOCGARP: + case SIOCDARP: + case SIOCATMARK: + return sock_do_ioctl(net, sock, cmd, arg); }
return -ENOIOCTLCMD; diff --combined tools/bpf/bpftool/map.c index 2160a8ef17e5,1ef1ee2280a2..e0c650d91784 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@@ -21,7 -21,7 +21,7 @@@ #include "json_writer.h" #include "main.h"
-static const char * const map_type_name[] = { +const char * const map_type_name[] = { [BPF_MAP_TYPE_UNSPEC] = "unspec", [BPF_MAP_TYPE_HASH] = "hash", [BPF_MAP_TYPE_ARRAY] = "array", @@@ -48,8 -48,6 +48,8 @@@ [BPF_MAP_TYPE_STACK] = "stack", };
+const size_t map_type_name_size = ARRAY_SIZE(map_type_name); + static bool map_is_per_cpu(__u32 type) { return type == BPF_MAP_TYPE_PERCPU_HASH || @@@ -287,21 -285,16 +287,21 @@@ static void print_entry_plain(struct bp single_line = info->key_size + info->value_size <= 24 && !break_names;
- printf("key:%c", break_names ? '\n' : ' '); - fprint_hex(stdout, key, info->key_size, " "); + if (info->key_size) { + printf("key:%c", break_names ? '\n' : ' '); + fprint_hex(stdout, key, info->key_size, " ");
- printf(single_line ? " " : "\n"); + printf(single_line ? " " : "\n"); + }
- printf("value:%c", break_names ? '\n' : ' '); - if (value) - fprint_hex(stdout, value, info->value_size, " "); - else - printf("<no entry>"); + if (info->value_size) { + printf("value:%c", break_names ? '\n' : ' '); + if (value) + fprint_hex(stdout, value, info->value_size, + " "); + else + printf("<no entry>"); + }
printf("\n"); } else { @@@ -310,23 -303,19 +310,23 @@@ n = get_possible_cpus(); step = round_up(info->value_size, 8);
- printf("key:\n"); - fprint_hex(stdout, key, info->key_size, " "); - printf("\n"); - for (i = 0; i < n; i++) { - printf("value (CPU %02d):%c", - i, info->value_size > 16 ? '\n' : ' '); - if (value) - fprint_hex(stdout, value + i * step, - info->value_size, " "); - else - printf("<no entry>"); + if (info->key_size) { + printf("key:\n"); + fprint_hex(stdout, key, info->key_size, " "); printf("\n"); } + if (info->value_size) { + for (i = 0; i < n; i++) { + printf("value (CPU %02d):%c", + i, info->value_size > 16 ? '\n' : ' '); + if (value) + fprint_hex(stdout, value + i * step, + info->value_size, " "); + else + printf("<no entry>"); + printf("\n"); + } + } } }
@@@ -358,6 -347,20 +358,20 @@@ static char **parse_bytes(char **argv, return argv + i; }
+ /* on per cpu maps we must copy the provided value on all value instances */ + static void fill_per_cpu_value(struct bpf_map_info *info, void *value) + { + unsigned int i, n, step; + + if (!map_is_per_cpu(info->type)) + return; + + n = get_possible_cpus(); + step = round_up(info->value_size, 8); + for (i = 1; i < n; i++) + memcpy(value + i * step, value, info->value_size); + } + static int parse_elem(char **argv, struct bpf_map_info *info, void *key, void *value, __u32 key_size, __u32 value_size, __u32 *flags, __u32 **value_fd) @@@ -426,9 -429,6 +440,9 @@@ p_err("not enough value arguments for map of progs"); return -1; } + if (is_prefix(*argv, "id")) + p_info("Warning: updating program array via MAP_ID, make sure this map is kept open\n" + " by some process or pinned otherwise update will be lost");
fd = prog_parse_fd(&argc, &argv); if (fd < 0) @@@ -440,6 -440,8 +454,8 @@@ argv = parse_bytes(argv, "value", value, value_size); if (!argv) return -1; + + fill_per_cpu_value(info, value); }
return parse_elem(argv, info, key, NULL, key_size, value_size, @@@ -511,10 -513,9 +527,9 @@@ static int show_map_close_json(int fd, jsonw_uint_field(json_wtr, "owner_prog_type", prog_type); } - if (atoi(owner_jited)) - jsonw_bool_field(json_wtr, "owner_jited", true); - else - jsonw_bool_field(json_wtr, "owner_jited", false); + if (owner_jited) + jsonw_bool_field(json_wtr, "owner_jited", + !!atoi(owner_jited));
free(owner_prog_type); free(owner_jited); @@@ -567,7 -568,8 +582,8 @@@ static int show_map_close_plain(int fd char *owner_prog_type = get_fdinfo(fd, "owner_prog_type"); char *owner_jited = get_fdinfo(fd, "owner_jited");
- printf("\n\t"); + if (owner_prog_type || owner_jited) + printf("\n\t"); if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type);
@@@ -577,10 -579,9 +593,9 @@@ else printf("owner_prog_type %d ", prog_type); } - if (atoi(owner_jited)) - printf("owner jited"); - else - printf("owner not jited"); + if (owner_jited) + printf("owner%s jited", + atoi(owner_jited) ? "" : " not");
free(owner_prog_type); free(owner_jited); @@@ -793,32 -794,6 +808,32 @@@ exit_free return err; }
+static int alloc_key_value(struct bpf_map_info *info, void **key, void **value) +{ + *key = NULL; + *value = NULL; + + if (info->key_size) { + *key = malloc(info->key_size); + if (!*key) { + p_err("key mem alloc failed"); + return -1; + } + } + + if (info->value_size) { + *value = alloc_value(info); + if (!*value) { + p_err("value mem alloc failed"); + free(*key); + *key = NULL; + return -1; + } + } + + return 0; +} + static int do_update(int argc, char **argv) { struct bpf_map_info info = {}; @@@ -835,9 -810,13 +850,9 @@@ if (fd < 0) return -1;
- key = malloc(info.key_size); - value = alloc_value(&info); - if (!key || !value) { - p_err("mem alloc failed"); - err = -1; + err = alloc_key_value(&info, &key, &value); + if (err) goto exit_free; - }
err = parse_elem(argv, &info, key, value, info.key_size, info.value_size, &flags, &value_fd); @@@ -862,51 -841,12 +877,51 @@@ exit_free return err; }
+static void print_key_value(struct bpf_map_info *info, void *key, + void *value) +{ + json_writer_t *btf_wtr; + struct btf *btf = NULL; + int err; + + err = btf__get_from_id(info->btf_id, &btf); + if (err) { + p_err("failed to get btf"); + return; + } + + if (json_output) { + print_entry_json(info, key, value, btf); + } else if (btf) { + /* if here json_wtr wouldn't have been initialised, + * so let's create separate writer for btf + */ + btf_wtr = get_btf_writer(); + if (!btf_wtr) { + p_info("failed to create json writer for btf. falling back to plain output"); + btf__free(btf); + btf = NULL; + print_entry_plain(info, key, value); + } else { + struct btf_dumper d = { + .btf = btf, + .jw = btf_wtr, + .is_plain_text = true, + }; + + do_dump_btf(&d, info, key, value); + jsonw_destroy(&btf_wtr); + } + } else { + print_entry_plain(info, key, value); + } + btf__free(btf); +} + static int do_lookup(int argc, char **argv) { struct bpf_map_info info = {}; __u32 len = sizeof(info); - json_writer_t *btf_wtr; - struct btf *btf = NULL; void *key, *value; int err; int fd; @@@ -918,9 -858,13 +933,9 @@@ if (fd < 0) return -1;
- key = malloc(info.key_size); - value = alloc_value(&info); - if (!key || !value) { - p_err("mem alloc failed"); - err = -1; + err = alloc_key_value(&info, &key, &value); + if (err) goto exit_free; - }
err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); if (err) @@@ -944,12 -888,43 +959,12 @@@ }
/* here means bpf_map_lookup_elem() succeeded */ - err = btf__get_from_id(info.btf_id, &btf); - if (err) { - p_err("failed to get btf"); - goto exit_free; - } - - if (json_output) { - print_entry_json(&info, key, value, btf); - } else if (btf) { - /* if here json_wtr wouldn't have been initialised, - * so let's create separate writer for btf - */ - btf_wtr = get_btf_writer(); - if (!btf_wtr) { - p_info("failed to create json writer for btf. falling back to plain output"); - btf__free(btf); - btf = NULL; - print_entry_plain(&info, key, value); - } else { - struct btf_dumper d = { - .btf = btf, - .jw = btf_wtr, - .is_plain_text = true, - }; - - do_dump_btf(&d, &info, key, value); - jsonw_destroy(&btf_wtr); - } - } else { - print_entry_plain(&info, key, value); - } + print_key_value(&info, key, value);
exit_free: free(key); free(value); close(fd); - btf__free(btf);
return err; } @@@ -1162,49 -1137,6 +1177,49 @@@ static int do_create(int argc, char **a return 0; }
+static int do_pop_dequeue(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + void *key, *value; + int err; + int fd; + + if (argc < 2) + usage(); + + fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + if (fd < 0) + return -1; + + err = alloc_key_value(&info, &key, &value); + if (err) + goto exit_free; + + err = bpf_map_lookup_and_delete_elem(fd, key, value); + if (err) { + if (errno == ENOENT) { + if (json_output) + jsonw_null(json_wtr); + else + printf("Error: empty map\n"); + } else { + p_err("pop failed: %s", strerror(errno)); + } + + goto exit_free; + } + + print_key_value(&info, key, value); + +exit_free: + free(key); + free(value); + close(fd); + + return err; +} + static int do_help(int argc, char **argv) { if (json_output) { @@@ -1218,17 -1150,12 +1233,17 @@@ " entries MAX_ENTRIES name NAME [flags FLAGS] \\n" " [dev NAME]\n" " %s %s dump MAP\n" - " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n" - " %s %s lookup MAP key DATA\n" + " %s %s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n" + " %s %s lookup MAP [key DATA]\n" " %s %s getnext MAP [key DATA]\n" " %s %s delete MAP key DATA\n" " %s %s pin MAP FILE\n" " %s %s event_pipe MAP [cpu N index M]\n" + " %s %s peek MAP\n" + " %s %s push MAP value VALUE\n" + " %s %s pop MAP\n" + " %s %s enqueue MAP value VALUE\n" + " %s %s dequeue MAP\n" " %s %s help\n" "\n" " " HELP_SPEC_MAP "\n" @@@ -1246,8 -1173,7 +1261,8 @@@ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], - bin_name, argv[-2]); + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2], + bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
return 0; } @@@ -1264,11 -1190,6 +1279,11 @@@ static const struct cmd cmds[] = { "pin", do_pin }, { "event_pipe", do_event_pipe }, { "create", do_create }, + { "peek", do_lookup }, + { "push", do_update }, + { "enqueue", do_update }, + { "pop", do_pop_dequeue }, + { "dequeue", do_pop_dequeue }, { 0 } };
diff --combined tools/bpf/bpftool/prog.c index 0640e9bc0ada,b54ed82b9589..33ed0806ccc0 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@@ -78,13 -78,14 +78,14 @@@ static void print_boot_time(__u64 nsecs
static int prog_fd_by_tag(unsigned char *tag) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); unsigned int id = 0; int err; int fd;
while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + err = bpf_prog_get_next_id(id, &id); if (err) { p_err("%s", strerror(errno)); @@@ -930,9 -931,10 +931,9 @@@ static int load_with_options(int argc, err = libbpf_prog_type_by_name(type, &attr.prog_type, &expected_attach_type); free(type); - if (err < 0) { - p_err("unknown program type '%s'", *argv); + if (err < 0) goto err_free_reuse_maps; - } + NEXT_ARG(); } else if (is_prefix(*argv, "map")) { void *new_map_replace; @@@ -1027,8 -1029,11 +1028,8 @@@
err = libbpf_prog_type_by_name(sec_name, &prog_type, &expected_attach_type); - if (err < 0) { - p_err("failed to guess program type based on section name %s\n", - sec_name); + if (err < 0) goto err_close_obj; - } }
bpf_program__set_ifindex(pos, ifindex); diff --combined tools/testing/selftests/bpf/test_btf.c index 447acc34db94,91420fa83b08..ee723774015a --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@@ -18,7 -18,6 +18,7 @@@ #include <unistd.h> #include <fcntl.h> #include <errno.h> +#include <assert.h> #include <bpf/libbpf.h> #include <bpf/btf.h>
@@@ -52,10 -51,18 +52,10 @@@ static int count_result(int err return err; }
-#define __printf(a, b) __attribute__((format(printf, a, b))) - -__printf(1, 2) -static int __base_pr(const char *format, ...) +static int __base_pr(enum libbpf_print_level level __attribute__((unused)), + const char *format, va_list args) { - va_list args; - int err; - - va_start(args, format); - err = vfprintf(stderr, format, args); - va_end(args); - return err; + return vfprintf(stderr, format, args); }
#define BTF_INFO_ENC(kind, kind_flag, vlen) \ @@@ -70,21 -77,12 +70,21 @@@ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ BTF_INT_ENC(encoding, bits_offset, bits)
+#define BTF_FWD_ENC(name, kind_flag) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FWD, kind_flag, 0), 0) + #define BTF_ARRAY_ENC(type, index_type, nr_elems) \ (type), (index_type), (nr_elems) #define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \ BTF_ARRAY_ENC(type, index_type, nr_elems)
+#define BTF_STRUCT_ENC(name, nr_elems, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, nr_elems), sz) + +#define BTF_UNION_ENC(name, nr_elems, sz) \ + BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_UNION, 0, nr_elems), sz) + #define BTF_MEMBER_ENC(name, type, bits_offset) \ (name), (type), (bits_offset) #define BTF_ENUM_ENC(name, val) (name), (val) @@@ -100,12 -98,6 +100,12 @@@ #define BTF_CONST_ENC(type) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), type)
+#define BTF_VOLATILE_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), type) + +#define BTF_RESTRICT_ENC(type) \ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), type) + #define BTF_FUNC_PROTO_ENC(ret_type, nargs) \ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, nargs), ret_type)
@@@ -118,10 -110,6 +118,10 @@@ #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f
+#define NAME_NTH(N) (0xffff0000 | N) +#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000) +#define GET_NAME_NTH_IDX(X) (X & 0x0000ffff) + #define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535
@@@ -130,14 -118,12 +130,14 @@@ static struct args unsigned int file_test_num; unsigned int get_info_test_num; unsigned int info_raw_test_num; + unsigned int dedup_test_num; bool raw_test; bool file_test; bool get_info_test; bool pprint_test; bool always_log; bool info_raw_test; + bool dedup_test; } args;
static char btf_log_buf[BTF_LOG_BUF_SIZE]; @@@ -148,12 -134,6 +148,12 @@@ static struct btf_header hdr_tmpl = .hdr_len = sizeof(struct btf_header), };
+/* several different mapv kinds(types) supported by pprint */ +enum pprint_mapv_kind_t { + PPRINT_MAPV_KIND_BASIC = 0, + PPRINT_MAPV_KIND_INT128, +}; + struct btf_raw_test { const char *descr; const char *str_sec; @@@ -176,7 -156,6 +176,7 @@@ int type_off_delta; int str_off_delta; int str_len_delta; + enum pprint_mapv_kind_t mapv_kind; };
#define BTF_STR_SEC(str) \ @@@ -1902,13 -1881,12 +1902,12 @@@ static struct btf_raw_test raw_tests[] },
{ - .descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)", + .descr = "func proto (TYPEDEF=>FUNC_PROTO)", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4), /* [2] */ - BTF_CONST_ENC(4), /* [3] */ - BTF_TYPEDEF_ENC(NAME_TBD, 5), /* [4] */ - BTF_FUNC_PROTO_ENC(0, 2), /* [5] */ + BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */ + BTF_FUNC_PROTO_ENC(0, 2), /* [4] */ BTF_FUNC_PROTO_ARG_ENC(0, 1), BTF_FUNC_PROTO_ARG_ENC(0, 2), BTF_END_RAW, @@@ -1922,8 -1900,6 +1921,6 @@@ .key_type_id = 1, .value_type_id = 1, .max_entries = 4, - .btf_load_err = true, - .err_str = "Invalid type_id", },
{ @@@ -1978,7 -1954,7 +1975,7 @@@ /* void (*)(int a, unsigned int <bad_name_off>) */ BTF_FUNC_PROTO_ENC(0, 2), /* [3] */ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), - BTF_FUNC_PROTO_ARG_ENC(0xffffffff, 2), + BTF_FUNC_PROTO_ARG_ENC(0x0fffffff, 2), BTF_END_RAW, }, .str_sec = "\0a", @@@ -2728,99 -2704,6 +2725,99 @@@ .err_str = "Invalid member offset", },
+{ + .descr = "128-bit int", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "int_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, 128-bit int member", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, 120-bit int member bitfield", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 120, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, 0), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, kind_flag, 128-bit int member", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + +{ + .descr = "struct, kind_flag, 120-bit int member bitfield", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 128, 16), /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 16), /* [3] */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(120, 0)), + BTF_END_RAW, + }, + BTF_STR_SEC("\0A"), + .map_type = BPF_MAP_TYPE_ARRAY, + .map_name = "struct_type_check_btf", + .key_size = sizeof(int), + .value_size = sizeof(int), + .key_type_id = 1, + .value_type_id = 1, + .max_entries = 4, +}, + }; /* struct btf_raw_test raw_tests[] */
static const char *get_next_str(const char *start, const char *end) @@@ -2848,13 -2731,11 +2845,13 @@@ static void *btf_raw_create(const struc const char **ret_next_str) { const char *next_str = str, *end_str = str + str_sec_size; + const char **strs_idx = NULL, **tmp_strs_idx; + int strs_cap = 0, strs_cnt = 0, next_str_idx = 0; unsigned int size_needed, offset; struct btf_header *ret_hdr; - int i, type_sec_size; + int i, type_sec_size, err = 0; uint32_t *ret_types; - void *raw_btf; + void *raw_btf = NULL;
type_sec_size = get_raw_sec_size(raw_types); if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types")) @@@ -2869,44 -2750,17 +2866,44 @@@ memcpy(raw_btf, hdr, sizeof(*hdr)); offset = sizeof(*hdr);
+ /* Index strings */ + while ((next_str = get_next_str(next_str, end_str))) { + if (strs_cnt == strs_cap) { + strs_cap += max(16, strs_cap / 2); + tmp_strs_idx = realloc(strs_idx, + sizeof(*strs_idx) * strs_cap); + if (CHECK(!tmp_strs_idx, + "Cannot allocate memory for strs_idx")) { + err = -1; + goto done; + } + strs_idx = tmp_strs_idx; + } + strs_idx[strs_cnt++] = next_str; + next_str += strlen(next_str); + } + /* Copy type section */ ret_types = raw_btf + offset; for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) { if (raw_types[i] == NAME_TBD) { - next_str = get_next_str(next_str, end_str); - if (CHECK(!next_str, "Error in getting next_str")) { - free(raw_btf); - return NULL; + if (CHECK(next_str_idx == strs_cnt, + "Error in getting next_str #%d", + next_str_idx)) { + err = -1; + goto done; } - ret_types[i] = next_str - str; - next_str += strlen(next_str); + ret_types[i] = strs_idx[next_str_idx++] - str; + } else if (IS_NAME_NTH(raw_types[i])) { + int idx = GET_NAME_NTH_IDX(raw_types[i]); + + if (CHECK(idx <= 0 || idx > strs_cnt, + "Error getting string #%d, strs_cnt:%d", + idx, strs_cnt)) { + err = -1; + goto done; + } + ret_types[i] = strs_idx[idx-1] - str; } else { ret_types[i] = raw_types[i]; } @@@ -2923,17 -2777,8 +2920,17 @@@
*btf_size = size_needed; if (ret_next_str) - *ret_next_str = next_str; + *ret_next_str = + next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL;
+done: + if (err) { + if (raw_btf) + free(raw_btf); + if (strs_idx) + free(strs_idx); + return NULL; + } return raw_btf; }
@@@ -3682,16 -3527,6 +3679,16 @@@ struct pprint_mapv uint32_t bits2c:2; };
+#ifdef __SIZEOF_INT128__ +struct pprint_mapv_int128 { + __int128 si128a; + __int128 si128b; + unsigned __int128 bits3:3; + unsigned __int128 bits80:80; + unsigned __int128 ui128; +}; +#endif + static struct btf_raw_test pprint_test_template[] = { { .raw_types = { @@@ -3883,35 -3718,6 +3880,35 @@@ .max_entries = 128 * 1024, },
+#ifdef __SIZEOF_INT128__ +{ + /* test int128 */ + .raw_types = { + /* unsigned int */ /* [1] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4), + /* __int128 */ /* [2] */ + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 128, 16), + /* unsigned __int128 */ /* [3] */ + BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 128, 16), + /* struct pprint_mapv_int128 */ /* [4] */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 5), 64), + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 0)), /* si128a */ + BTF_MEMBER_ENC(NAME_TBD, 2, BTF_MEMBER_OFFSET(0, 128)), /* si128b */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(3, 256)), /* bits3 */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(80, 259)), /* bits80 */ + BTF_MEMBER_ENC(NAME_TBD, 3, BTF_MEMBER_OFFSET(0, 384)), /* ui128 */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0unsigned int\0__int128\0unsigned __int128\0pprint_mapv_int128\0si128a\0si128b\0bits3\0bits80\0ui128"), + .key_size = sizeof(unsigned int), + .value_size = sizeof(struct pprint_mapv_int128), + .key_type_id = 1, + .value_type_id = 4, + .max_entries = 128 * 1024, + .mapv_kind = PPRINT_MAPV_KIND_INT128, +}, +#endif + };
static struct btf_pprint_test_meta { @@@ -3978,108 -3784,24 +3975,108 @@@
};
+static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind) +{ + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) + return sizeof(struct pprint_mapv); + +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) + return sizeof(struct pprint_mapv_int128); +#endif + + assert(0); +}
-static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i, +static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind, + void *mapv, uint32_t i, int num_cpus, int rounded_value_size) { int cpu;
- for (cpu = 0; cpu < num_cpus; cpu++) { - v->ui32 = i + cpu; - v->si32 = -i; - v->unused_bits2a = 3; - v->bits28 = i; - v->unused_bits2b = 3; - v->ui64 = i; - v->aenum = i & 0x03; - v->ui32b = 4; - v->bits2c = 1; - v = (void *)v + rounded_value_size; + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { + struct pprint_mapv *v = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->ui32 = i + cpu; + v->si32 = -i; + v->unused_bits2a = 3; + v->bits28 = i; + v->unused_bits2b = 3; + v->ui64 = i; + v->aenum = i & 0x03; + v->ui32b = 4; + v->bits2c = 1; + v = (void *)v + rounded_value_size; + } } + +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) { + struct pprint_mapv_int128 *v = mapv; + + for (cpu = 0; cpu < num_cpus; cpu++) { + v->si128a = i; + v->si128b = -i; + v->bits3 = i & 0x07; + v->bits80 = (((unsigned __int128)1) << 64) + i; + v->ui128 = (((unsigned __int128)2) << 64) + i; + v = (void *)v + rounded_value_size; + } + } +#endif +} + +ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind, + char *expected_line, ssize_t line_size, + bool percpu_map, unsigned int next_key, + int cpu, void *mapv) +{ + ssize_t nexpected_line = -1; + + if (mapv_kind == PPRINT_MAPV_KIND_BASIC) { + struct pprint_mapv *v = mapv; + + nexpected_line = snprintf(expected_line, line_size, + "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," + "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," + "%u,0x%x}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + v->ui32, v->si32, + v->unused_bits2a, + v->bits28, + v->unused_bits2b, + v->ui64, + v->ui8a[0], v->ui8a[1], + v->ui8a[2], v->ui8a[3], + v->ui8a[4], v->ui8a[5], + v->ui8a[6], v->ui8a[7], + pprint_enum_str[v->aenum], + v->ui32b, + v->bits2c); + } + +#ifdef __SIZEOF_INT128__ + if (mapv_kind == PPRINT_MAPV_KIND_INT128) { + struct pprint_mapv_int128 *v = mapv; + + nexpected_line = snprintf(expected_line, line_size, + "%s%u: {0x%lx,0x%lx,0x%lx," + "0x%lx%016lx,0x%lx%016lx}\n", + percpu_map ? "\tcpu" : "", + percpu_map ? cpu : next_key, + (uint64_t)v->si128a, + (uint64_t)v->si128b, + (uint64_t)v->bits3, + (uint64_t)(v->bits80 >> 64), + (uint64_t)v->bits80, + (uint64_t)(v->ui128 >> 64), + (uint64_t)v->ui128); + } +#endif + + return nexpected_line; }
static int check_line(const char *expected_line, int nexpected_line, @@@ -4103,10 -3825,10 +4100,10 @@@ static int do_test_pprint(int test_num) { const struct btf_raw_test *test = &pprint_test_template[test_num]; + enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; struct bpf_create_map_attr create_attr = {}; bool ordered_map, lossless_map, percpu_map; int err, ret, num_cpus, rounded_value_size; - struct pprint_mapv *mapv = NULL; unsigned int key, nr_read_elems; int map_fd = -1, btf_fd = -1; unsigned int raw_btf_size; @@@ -4115,7 -3837,6 +4112,7 @@@ char pin_path[255]; size_t line_len = 0; char *line = NULL; + void *mapv = NULL; uint8_t *raw_btf; ssize_t nread;
@@@ -4168,7 -3889,7 +4165,7 @@@
percpu_map = test->percpu_map; num_cpus = percpu_map ? bpf_num_possible_cpus() : 1; - rounded_value_size = round_up(sizeof(struct pprint_mapv), 8); + rounded_value_size = round_up(get_pprint_mapv_size(mapv_kind), 8); mapv = calloc(num_cpus, rounded_value_size); if (CHECK(!mapv, "mapv allocation failure")) { err = -1; @@@ -4176,7 -3897,7 +4173,7 @@@ }
for (key = 0; key < test->max_entries; key++) { - set_pprint_mapv(mapv, key, num_cpus, rounded_value_size); + set_pprint_mapv(mapv_kind, mapv, key, num_cpus, rounded_value_size); bpf_map_update_elem(map_fd, &key, mapv, 0); }
@@@ -4200,13 -3921,13 +4197,13 @@@ ordered_map = test->ordered_map; lossless_map = test->lossless_map; do { - struct pprint_mapv *cmapv; ssize_t nexpected_line; unsigned int next_key; + void *cmapv; int cpu;
next_key = ordered_map ? nr_read_elems : atoi(line); - set_pprint_mapv(mapv, next_key, num_cpus, rounded_value_size); + set_pprint_mapv(mapv_kind, mapv, next_key, num_cpus, rounded_value_size); cmapv = mapv;
for (cpu = 0; cpu < num_cpus; cpu++) { @@@ -4239,16 -3960,31 +4236,16 @@@ break; }
- nexpected_line = snprintf(expected_line, sizeof(expected_line), - "%s%u: {%u,0,%d,0x%x,0x%x,0x%x," - "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s," - "%u,0x%x}\n", - percpu_map ? "\tcpu" : "", - percpu_map ? cpu : next_key, - cmapv->ui32, cmapv->si32, - cmapv->unused_bits2a, - cmapv->bits28, - cmapv->unused_bits2b, - cmapv->ui64, - cmapv->ui8a[0], cmapv->ui8a[1], - cmapv->ui8a[2], cmapv->ui8a[3], - cmapv->ui8a[4], cmapv->ui8a[5], - cmapv->ui8a[6], cmapv->ui8a[7], - pprint_enum_str[cmapv->aenum], - cmapv->ui32b, - cmapv->bits2c); - + nexpected_line = get_pprint_expected_line(mapv_kind, expected_line, + sizeof(expected_line), + percpu_map, next_key, + cpu, cmapv); err = check_line(expected_line, nexpected_line, sizeof(expected_line), line); if (err == -1) goto done;
- cmapv = (void *)cmapv + rounded_value_size; + cmapv = cmapv + rounded_value_size; }
if (percpu_map) { @@@ -4344,10 -4080,6 +4341,10 @@@ static struct prog_info_raw_test __u32 line_info_rec_size; __u32 nr_jited_ksyms; bool expected_prog_load_failure; + __u32 dead_code_cnt; + __u32 dead_code_mask; + __u32 dead_func_cnt; + __u32 dead_func_mask; } info_raw_tests[] = { { .descr = "func_type (main func + one sub)", @@@ -4774,369 -4506,6 +4771,369 @@@ .expected_prog_load_failure = true, },
+{ + .descr = "line_info (dead start)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0/* dead jmp */\0int a=1;\0int b=2;\0return a + b;\0return a + b;"), + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 7), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 6), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 1, + .dead_code_mask = 0x01, +}, + +{ + .descr = "line_info (dead end)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0int a=1;\0int b=2;\0return a + b;\0/* dead jmp */\0return a + b;\0/* dead exit */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, 1), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 0, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 12), + BPF_LINE_INFO_ENC(1, 0, NAME_TBD, 2, 11), + BPF_LINE_INFO_ENC(2, 0, NAME_TBD, 3, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 4, 9), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 5, 8), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 6, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 2, + .dead_code_mask = 0x28, +}, + +{ + .descr = "line_info (dead code + subprog + func_info)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0sub\0main\0int a=1+1;\0/* dead jmp */" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 8), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_1), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {14, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(14, 0, NAME_TBD, 3, 8), + BPF_LINE_INFO_ENC(16, 0, NAME_TBD, 4, 7), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 9, + .dead_code_mask = 0x3fe, +}, + +{ + .descr = "line_info (dead subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" + "\0return 0;\0return 0;\0/* dead */\0/* dead */" + "\0/* dead */\0return bla + 1;\0return bla + 1;" + "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {6, 3}, {9, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 3, + .dead_code_mask = 0x70, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead last subprog)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0int a=1+1;\0/* live call */" + "\0return 0;\0/* dead */\0/* dead */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {5, 3} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 1, + .dead_code_cnt = 2, + .dead_code_mask = 0x18, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead subprog + dead start)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* dead */" + "\0return 0;\0return 0;\0return 0;" + "\0/* dead */\0/* dead */\0/* dead */\0/* dead */" + "\0return b + 1;\0return b + 1;\0return b + 1;"), + .insns = { + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {7, 3}, {10, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(10, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BPF_LINE_INFO_ENC(13, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 5, + .dead_code_mask = 0x1e2, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead subprog + dead start w/ move)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [5] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0dead\0main\0func\0int a=1+1;\0/* live call */" + "\0return 0;\0return 0;\0/* dead */\0/* dead */" + "\0/* dead */\0return bla + 1;\0return bla + 1;" + "\0return bla + 1;\0return func(a);\0b+=1;\0return b;"), + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 1), + BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1), + BPF_CALL_REL(3), + BPF_CALL_REL(5), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_CALL_REL(1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_MOV64_REG(BPF_REG_0, 2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 3, + .func_info_rec_size = 8, + .func_info = { {0, 4}, {6, 3}, {9, 5} }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(3, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(4, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(5, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(7, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(8, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(9, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(11, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(12, 0, NAME_TBD, 2, 9), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, + .dead_code_cnt = 3, + .dead_code_mask = 0x70, + .dead_func_cnt = 1, + .dead_func_mask = 0x2, +}, + +{ + .descr = "line_info (dead end + subprog start w/ no linfo)", + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_FUNC_PROTO_ENC(1, 1), /* [2] */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */ + BTF_FUNC_ENC(NAME_TBD, 2), /* [4] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0x\0main\0func\0/* main linfo */\0/* func linfo */"), + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 1, 3), + BPF_CALL_REL(3), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .func_info_cnt = 2, + .func_info_rec_size = 8, + .func_info = { {0, 3}, {6, 4}, }, + .line_info = { + BPF_LINE_INFO_ENC(0, 0, NAME_TBD, 1, 10), + BPF_LINE_INFO_ENC(6, 0, NAME_TBD, 1, 10), + BTF_END_RAW, + }, + .line_info_rec_size = sizeof(struct bpf_line_info), + .nr_jited_ksyms = 2, +}, + };
static size_t probe_prog_length(const struct bpf_insn *fp) @@@ -5196,7 -4565,6 +5193,7 @@@ static int test_get_finfo(const struct struct bpf_func_info *finfo; __u32 info_len, rec_size, i; void *func_info = NULL; + __u32 nr_func_info; int err;
/* get necessary lens */ @@@ -5206,8 -4574,7 +5203,8 @@@ fprintf(stderr, "%s\n", btf_log_buf); return -1; } - if (CHECK(info.nr_func_info != test->func_info_cnt, + nr_func_info = test->func_info_cnt - test->dead_func_cnt; + if (CHECK(info.nr_func_info != nr_func_info, "incorrect info.nr_func_info (1st) %d", info.nr_func_info)) { return -1; @@@ -5228,7 -4595,7 +5225,7 @@@
/* reset info to only retrieve func_info related data */ memset(&info, 0, sizeof(info)); - info.nr_func_info = test->func_info_cnt; + info.nr_func_info = nr_func_info; info.func_info_rec_size = rec_size; info.func_info = ptr_to_u64(func_info); err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); @@@ -5237,7 -4604,7 +5234,7 @@@ err = -1; goto done; } - if (CHECK(info.nr_func_info != test->func_info_cnt, + if (CHECK(info.nr_func_info != nr_func_info, "incorrect info.nr_func_info (2nd) %d", info.nr_func_info)) { err = -1; @@@ -5251,9 -4618,7 +5248,9 @@@ }
finfo = func_info; - for (i = 0; i < test->func_info_cnt; i++) { + for (i = 0; i < nr_func_info; i++) { + if (test->dead_func_mask & (1 << i)) + continue; if (CHECK(finfo->type_id != test->func_info[i][1], "incorrect func_type %u expected %u", finfo->type_id, test->func_info[i][1])) { @@@ -5282,7 -4647,6 +5279,7 @@@ static int test_get_linfo(const struct struct bpf_prog_info info = {}; __u32 *jited_func_lens = NULL; __u64 cur_func_ksyms; + __u32 dead_insns; int err;
jited_cnt = cnt; @@@ -5291,7 -4655,7 +5288,7 @@@ if (test->nr_jited_ksyms) nr_jited_ksyms = test->nr_jited_ksyms; else - nr_jited_ksyms = test->func_info_cnt; + nr_jited_ksyms = test->func_info_cnt - test->dead_func_cnt; nr_jited_func_lens = nr_jited_ksyms;
info_len = sizeof(struct bpf_prog_info); @@@ -5393,20 -4757,12 +5390,20 @@@ goto done; }
+ dead_insns = 0; + while (test->dead_code_mask & (1 << dead_insns)) + dead_insns++; + CHECK(linfo[0].insn_off, "linfo[0].insn_off:%u", linfo[0].insn_off); for (i = 1; i < cnt; i++) { const struct bpf_line_info *expected_linfo;
- expected_linfo = patched_linfo + (i * test->line_info_rec_size); + while (test->dead_code_mask & (1 << (i + dead_insns))) + dead_insns++; + + expected_linfo = patched_linfo + + ((i + dead_insns) * test->line_info_rec_size); if (CHECK(linfo[i].insn_off <= linfo[i - 1].insn_off, "linfo[%u].insn_off:%u <= linfo[%u].insn_off:%u", i, linfo[i].insn_off, @@@ -5564,9 -4920,7 +5561,9 @@@ static int do_test_info_raw(unsigned in if (err) goto done;
- err = test_get_linfo(test, patched_linfo, attr.line_info_cnt, prog_fd); + err = test_get_linfo(test, patched_linfo, + attr.line_info_cnt - test->dead_code_cnt, + prog_fd); if (err) goto done;
@@@ -5602,450 -4956,20 +5599,450 @@@ static int test_info_raw(void return err; }
+struct btf_raw_data { + __u32 raw_types[MAX_NR_RAW_U32]; + const char *str_sec; + __u32 str_sec_size; +}; + +struct btf_dedup_test { + const char *descr; + struct btf_raw_data input; + struct btf_raw_data expect; + struct btf_dedup_opts opts; +}; + +const struct btf_dedup_test dedup_tests[] = { + +{ + .descr = "dedup: unused strings filtering", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0unused\0int\0foo\0bar\0long"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: strings deduplication", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_TYPE_INT_ENC(NAME_NTH(3), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 64, 8), + BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long int\0int\0long int\0int"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 64, 8), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0long int"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: struct example #1", + /* + * struct s { + * struct s *next; + * const int *a; + * int b[16]; + * int c; + * } + */ + .input = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* int[16] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ + /* struct s { */ + BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */ + BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */ + BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */ + BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */ + /* ptr -> [3] struct s */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> [6] const int */ + BTF_PTR_ENC(6), /* [5] */ + /* const -> [1] int */ + BTF_CONST_ENC(1), /* [6] */ + + /* full copy of the above */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */ + BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */ + BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */ + BTF_MEMBER_ENC(NAME_NTH(3), 10, 0), + BTF_MEMBER_ENC(NAME_NTH(4), 11, 64), + BTF_MEMBER_ENC(NAME_NTH(5), 8, 128), + BTF_MEMBER_ENC(NAME_NTH(6), 7, 640), + BTF_PTR_ENC(9), /* [10] */ + BTF_PTR_ENC(12), /* [11] */ + BTF_CONST_ENC(7), /* [12] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"), + }, + .expect = { + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* int[16] */ + BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */ + /* struct s { */ + BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */ + BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */ + BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */ + BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */ + BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */ + /* ptr -> [3] struct s */ + BTF_PTR_ENC(3), /* [4] */ + /* ptr -> [6] const int */ + BTF_PTR_ENC(6), /* [5] */ + /* const -> [1] int */ + BTF_CONST_ENC(1), /* [6] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: all possible kinds (no duplicates)", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ + BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ + BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ + BTF_PTR_ENC(0), /* [8] ptr */ + BTF_CONST_ENC(8), /* [9] const */ + BTF_VOLATILE_ENC(8), /* [10] volatile */ + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 8), /* [1] int */ + BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), 4), /* [2] enum */ + BTF_ENUM_ENC(NAME_TBD, 0), + BTF_ENUM_ENC(NAME_TBD, 1), + BTF_FWD_ENC(NAME_TBD, 1 /* union kind_flag */), /* [3] fwd */ + BTF_TYPE_ARRAY_ENC(2, 1, 7), /* [4] array */ + BTF_STRUCT_ENC(NAME_TBD, 1, 4), /* [5] struct */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_UNION_ENC(NAME_TBD, 1, 4), /* [6] union */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [7] typedef */ + BTF_PTR_ENC(0), /* [8] ptr */ + BTF_CONST_ENC(8), /* [9] const */ + BTF_VOLATILE_ENC(8), /* [10] volatile */ + BTF_RESTRICT_ENC(8), /* [11] restrict */ + BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */ + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1), + BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8), + BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, +{ + .descr = "dedup: no int duplicates", + .input = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), + /* different name */ + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), + /* different encoding */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), + /* different bit offset */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), + /* different bit size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), + /* different byte size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0some other int"), + }, + .expect = { + .raw_types = { + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8), + /* different name */ + BTF_TYPE_INT_ENC(NAME_NTH(2), BTF_INT_SIGNED, 0, 32, 8), + /* different encoding */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_CHAR, 0, 32, 8), + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_BOOL, 0, 32, 8), + /* different bit offset */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 8, 32, 8), + /* different bit size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8), + /* different byte size */ + BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0int\0some other int"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, + +}; + +static int btf_type_size(const struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = BTF_INFO_VLEN(t->info); + __u16 kind = BTF_INFO_KIND(t->info); + + switch (kind) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + default: + fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind); + return -EINVAL; + } +} + +static void dump_btf_strings(const char *strs, __u32 len) +{ + const char *cur = strs; + int i = 0; + + while (cur < strs + len) { + fprintf(stderr, "string #%d: '%s'\n", i, cur); + cur += strlen(cur) + 1; + i++; + } +} + +static int do_test_dedup(unsigned int test_num) +{ + const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; + int err = 0, i; + __u32 test_nr_types, expect_nr_types, test_str_len, expect_str_len; + void *raw_btf; + unsigned int raw_btf_size; + struct btf *test_btf = NULL, *expect_btf = NULL; + const char *ret_test_next_str, *ret_expect_next_str; + const char *test_strs, *expect_strs; + const char *test_str_cur, *test_str_end; + const char *expect_str_cur, *expect_str_end; + + fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr); + + raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types, + test->input.str_sec, test->input.str_sec_size, + &raw_btf_size, &ret_test_next_str); + if (!raw_btf) + return -1; + test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + free(raw_btf); + if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", + PTR_ERR(test_btf))) { + err = -1; + goto done; + } + + raw_btf = btf_raw_create(&hdr_tmpl, test->expect.raw_types, + test->expect.str_sec, + test->expect.str_sec_size, + &raw_btf_size, &ret_expect_next_str); + if (!raw_btf) + return -1; + expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); + free(raw_btf); + if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", + PTR_ERR(expect_btf))) { + err = -1; + goto done; + } + + err = btf__dedup(test_btf, NULL, &test->opts); + if (CHECK(err, "btf_dedup failed errno:%d", err)) { + err = -1; + goto done; + } + + btf__get_strings(test_btf, &test_strs, &test_str_len); + btf__get_strings(expect_btf, &expect_strs, &expect_str_len); + if (CHECK(test_str_len != expect_str_len, + "test_str_len:%u != expect_str_len:%u", + test_str_len, expect_str_len)) { + fprintf(stderr, "\ntest strings:\n"); + dump_btf_strings(test_strs, test_str_len); + fprintf(stderr, "\nexpected strings:\n"); + dump_btf_strings(expect_strs, expect_str_len); + err = -1; + goto done; + } + + test_str_cur = test_strs; + test_str_end = test_strs + test_str_len; + expect_str_cur = expect_strs; + expect_str_end = expect_strs + expect_str_len; + while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) { + size_t test_len, expect_len; + + test_len = strlen(test_str_cur); + expect_len = strlen(expect_str_cur); + if (CHECK(test_len != expect_len, + "test_len:%zu != expect_len:%zu " + "(test_str:%s, expect_str:%s)", + test_len, expect_len, test_str_cur, expect_str_cur)) { + err = -1; + goto done; + } + if (CHECK(strcmp(test_str_cur, expect_str_cur), + "test_str:%s != expect_str:%s", + test_str_cur, expect_str_cur)) { + err = -1; + goto done; + } + test_str_cur += test_len + 1; + expect_str_cur += expect_len + 1; + } + if (CHECK(test_str_cur != test_str_end, + "test_str_cur:%p != test_str_end:%p", + test_str_cur, test_str_end)) { + err = -1; + goto done; + } + + test_nr_types = btf__get_nr_types(test_btf); + expect_nr_types = btf__get_nr_types(expect_btf); + if (CHECK(test_nr_types != expect_nr_types, + "test_nr_types:%u != expect_nr_types:%u", + test_nr_types, expect_nr_types)) { + err = -1; + goto done; + } + + for (i = 1; i <= test_nr_types; i++) { + const struct btf_type *test_type, *expect_type; + int test_size, expect_size; + + test_type = btf__type_by_id(test_btf, i); + expect_type = btf__type_by_id(expect_btf, i); + test_size = btf_type_size(test_type); + expect_size = btf_type_size(expect_type); + + if (CHECK(test_size != expect_size, + "type #%d: test_size:%d != expect_size:%u", + i, test_size, expect_size)) { + err = -1; + goto done; + } + if (CHECK(memcmp((void *)test_type, + (void *)expect_type, + test_size), + "type #%d: contents differ", i)) { + err = -1; + goto done; + } + } + +done: + if (!err) + fprintf(stderr, "OK"); + if (!IS_ERR(test_btf)) + btf__free(test_btf); + if (!IS_ERR(expect_btf)) + btf__free(expect_btf); + + return err; +} + +static int test_dedup(void) +{ + unsigned int i; + int err = 0; + + if (args.dedup_test_num) + return count_result(do_test_dedup(args.dedup_test_num)); + + for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++) + err |= count_result(do_test_dedup(i)); + + return err; +} + static void usage(const char *cmd) { fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" "\t[-g btf_get_info_test_num (1 - %zu)] |\n" "\t[-f btf_file_test_num (1 - %zu)] |\n" "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" - "\t[-p (pretty print test)]]\n", + "\t[-p (pretty print test)] |\n" + "\t[-d btf_dedup_test_num (1 - %zu)]]\n", cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests)); + ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests), + ARRAY_SIZE(dedup_tests)); }
static int parse_args(int argc, char **argv) { - const char *optstr = "lpk:f:r:g:"; + const char *optstr = "hlpk:f:r:g:d:"; int opt;
while ((opt = getopt(argc, argv, optstr)) != -1) { @@@ -6072,16 -4996,12 +6069,16 @@@ args.info_raw_test_num = atoi(optarg); args.info_raw_test = true; break; + case 'd': + args.dedup_test_num = atoi(optarg); + args.dedup_test = true; + break; case 'h': usage(argv[0]); exit(0); default: - usage(argv[0]); - return -1; + usage(argv[0]); + return -1; } }
@@@ -6117,14 -5037,6 +6114,14 @@@ return -1; }
+ if (args.dedup_test_num && + (args.dedup_test_num < 1 || + args.dedup_test_num > ARRAY_SIZE(dedup_tests))) { + fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n", + ARRAY_SIZE(dedup_tests)); + return -1; + } + return 0; }
@@@ -6143,7 -5055,7 +6140,7 @@@ int main(int argc, char **argv return err;
if (args.always_log) - libbpf_set_print(__base_pr, __base_pr, __base_pr); + libbpf_set_print(__base_pr);
if (args.raw_test) err |= test_raw(); @@@ -6160,18 -5072,14 +6157,18 @@@ if (args.info_raw_test) err |= test_info_raw();
+ if (args.dedup_test) + err |= test_dedup(); + if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test || args.info_raw_test) + args.pprint_test || args.info_raw_test || args.dedup_test) goto done;
err |= test_raw(); err |= test_get_info(); err |= test_file(); err |= test_info_raw(); + err |= test_dedup();
done: print_summary();
linux-merge@lists.open-mesh.org