The following commit has been merged in the master branch: commit 573a8095f68c7a73b362b5c0f65bb6a0a8a53a03 Merge: 748d1c8a425ec529d541f082ee7a81f6a51fa120 5af75c747e2a868abbf8611494b50ed5e076fca7 Author: David S. Miller davem@davemloft.net Date: Tue Sep 22 17:44:59 2020 -0700
Merge tag 'mlx5-updates-2020-09-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says:
==================== mlx5-updates-2020-09-21
Multi packet TX descriptor support for SKBs.
This series introduces some refactoring of the regular TX data path in mlx5 and adds the Enhanced TX MPWQE feature support. MPWQE stands for multi-packet work queue element, and it can serve multiple packets, reducing the PCI bandwidth spent on control traffic. It should improve performance in scenarios where PCI is the bottleneck, and xmit_more is signaled by the kernel. The refactoring done in this series also improves the packet rate on its own.
MPWQE is already implemented in the XDP tx path, this series adds the support of MPWQE for regular kernel SKB tx path.
MPWQE is supported from ConnectX-5 and onward, for legacy devices we need to keep backward compatibility for regular (Single packet) WQE descriptor.
MPWQE is not compatible with certain offloads and features, such as TLS offload, TSO, nonlinear SKBs. If such incompatible features are in use, the driver gracefully falls back to non-MPWQE per SKB.
Prior to the final patch "net/mlx5e: Enhanced TX MPWQE for SKBs" that adds the actual support, Maxim did some refactoring to the tx data path to split it into stages and smaller helper functions that can be utilized and reused for both legacy and new MPWQE feature.
Performance testing:
UDP performance is improved in a single stream pktgen test: Packet rate: 16.86 Mpps (��0.15 Mpps) -> 20.94 Mpps (��0.33 Mpps) Instructions per packet: 434 -> 329 Cycles per packet: 158 -> 123 Instructions per cycle: 2.75 -> 2.67
TCP and XDP_TX single stream tests show no performance difference.
MPWQE can reduce PCI bandwidth: PCI Gen2, pktgen at fixed rate of 36864000 pps on 24 CPU cores: Inbound PCI utilization with MPWQE off: 80.3% Inbound PCI utilization with MPWQE on: 59.0% PCI Gen3, pktgen at fixed rate of 56064000 pps on 24 CPU cores: Inbound PCI utilization with MPWQE off: 65.4% Inbound PCI utilization with MPWQE on: 49.3%
MPWQE can also reduce CPU load, increasing the packet rate in case of CPU bottleneck: PCI Gen2, pktgen at full rate on 24 CPU cores: Packet rate with MPWQE off: 37.5 Mpps Packet rate with MPWQE on: 49.0 Mpps PCI Gen3, pktgen at full rate on 24 CPU cores: Packet rate with MPWQE off: 57.0 Mpps Packet rate with MPWQE on: 66.8 Mpps
Burst size in all pktgen tests is 32.
CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz (x86_64) NIC: Mellanox ConnectX-6 Dx GCC 10.2.0 ====================
Signed-off-by: David S. Miller davem@davemloft.net
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en.h index e9010ceb5ea9,0df40d24acb0..5368e06cd71c --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@@ -221,6 -221,7 +221,7 @@@ enum mlx5e_priv_flag MLX5E_PFLAG_RX_STRIDING_RQ, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE, MLX5E_PFLAG_XDP_TX_MPWQE, + MLX5E_PFLAG_SKB_TX_MPWQE, MLX5E_NUM_PFLAGS, /* Keep last */ };
@@@ -305,6 -306,7 +306,7 @@@ struct mlx5e_sq_dma
enum { MLX5E_SQ_STATE_ENABLED, + MLX5E_SQ_STATE_MPWQE, MLX5E_SQ_STATE_RECOVERING, MLX5E_SQ_STATE_IPSEC, MLX5E_SQ_STATE_AM, @@@ -313,26 -315,40 +315,40 @@@ MLX5E_SQ_STATE_PENDING_XSK_TX, };
+ struct mlx5e_tx_mpwqe { + /* Current MPWQE session */ + struct mlx5e_tx_wqe *wqe; + u32 bytes_count; + u8 ds_count; + u8 pkt_count; + u8 inline_on; + }; + struct mlx5e_txqsq { /* data path */
/* dirtied @completion */ u16 cc; + u16 skb_fifo_cc; u32 dma_fifo_cc; struct dim dim; /* Adaptive Moderation */
/* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; + u16 skb_fifo_pc; u32 dma_fifo_pc; + struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
/* read only */ struct mlx5_wq_cyc wq; u32 dma_fifo_mask; + u16 skb_fifo_mask; struct mlx5e_sq_stats *stats; struct { struct mlx5e_sq_dma *dma_fifo; + struct sk_buff **skb_fifo; struct mlx5e_tx_wqe_info *wqe_info; } db; void __iomem *uar_map; @@@ -399,7 -415,7 +415,7 @@@ struct mlx5e_xdp_info }; };
- struct mlx5e_xdp_xmit_data { + struct mlx5e_xmit_data { dma_addr_t dma_addr; void *data; u32 len; @@@ -412,18 -428,10 +428,10 @@@ struct mlx5e_xdp_info_fifo u32 mask; };
- struct mlx5e_xdp_mpwqe { - /* Current MPWQE session */ - struct mlx5e_tx_wqe *wqe; - u8 ds_count; - u8 pkt_count; - u8 inline_on; - }; - struct mlx5e_xdpsq; typedef int (*mlx5e_fp_xmit_xdp_frame_check)(struct mlx5e_xdpsq *); typedef bool (*mlx5e_fp_xmit_xdp_frame)(struct mlx5e_xdpsq *, - struct mlx5e_xdp_xmit_data *, + struct mlx5e_xmit_data *, struct mlx5e_xdp_info *, int);
@@@ -438,7 -446,7 +446,7 @@@ struct mlx5e_xdpsq u32 xdpi_fifo_pc ____cacheline_aligned_in_smp; u16 pc; struct mlx5_wqe_ctrl_seg *doorbell_cseg; - struct mlx5e_xdp_mpwqe mpwqe; + struct mlx5e_tx_mpwqe mpwqe;
struct mlx5e_cq cq;
@@@ -601,7 -609,7 +609,7 @@@ struct mlx5e_rq struct dim dim; /* Dynamic Interrupt Moderation */
/* XDP */ - struct bpf_prog *xdp_prog; + struct bpf_prog __rcu *xdp_prog; struct mlx5e_xdpsq *xdpsq; DECLARE_BITMAP(flags, 8); struct page_pool *page_pool; @@@ -1007,6 -1015,7 +1015,6 @@@ int mlx5e_update_nic_rx(struct mlx5e_pr void mlx5e_update_carrier(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); -void mlx5e_update_ndo_stats(struct mlx5e_priv *priv);
void mlx5e_queue_update_stats(struct mlx5e_priv *priv); int mlx5e_bits_invert(unsigned long a, int size); diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 24336c60123a,8ccd0b661a7f..07ee1d236ab3 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@@ -7,6 -7,21 +7,21 @@@ #include "en.h" #include <linux/indirect_call_wrapper.h>
+ #define MLX5E_TX_WQE_EMPTY_DS_COUNT (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + + /* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS + * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment. + * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a + * full-session WQE be cache-aligned. + */ + #if L1_CACHE_BYTES < 128 + #define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1) + #else + #define MLX5E_TX_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2) + #endif + + #define MLX5E_TX_MPW_MAX_NUM_DS (MLX5E_TX_MPW_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS) + #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
enum mlx5e_icosq_wqe_type { @@@ -20,11 -35,6 +35,11 @@@ };
/* General */ +static inline bool mlx5e_skb_is_multicast(struct sk_buff *skb) +{ + return skb->pkt_type == PACKET_MULTICAST || skb->pkt_type == PACKET_BROADCAST; +} + void mlx5e_trigger_irq(struct mlx5e_icosq *sq); void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe); void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); @@@ -46,8 -56,6 +61,6 @@@ void mlx5e_free_rx_in_progress_descs(st u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev); - void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, - struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); void mlx5e_free_txqsq_descs(struct mlx5e_txqsq *sq);
@@@ -110,6 -118,7 +123,7 @@@ struct mlx5e_tx_wqe_info u32 num_bytes; u8 num_wqebbs; u8 num_dma; + u8 num_fifo_pkts; #ifdef CONFIG_MLX5_EN_TLS struct page *resync_dump_frag_page; #endif @@@ -193,23 -202,6 +207,6 @@@ static inline u16 mlx5e_icosq_get_next_ return pi; }
- static inline void - mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq, - u16 pi, u16 nnops) - { - struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi]; - - edge_wi = wi + nnops; - - /* fill sq frag edge with nops to avoid wqe wrapping two pages */ - for (; wi < edge_wi; wi++) { - memset(wi, 0, sizeof(*wi)); - wi->num_wqebbs = 1; - mlx5e_post_nop(wq, sq->sqn, &sq->pc); - } - sq->stats->nop += nnops; - } - static inline void mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, struct mlx5_wqe_ctrl_seg *ctrl) @@@ -228,29 -220,6 +225,6 @@@ mlx5_write64((__be32 *)ctrl, uar_map); }
- static inline bool mlx5e_transport_inline_tx_wqe(struct mlx5_wqe_ctrl_seg *cseg) - { - return cseg && !!cseg->tis_tir_num; - } - - static inline u8 - mlx5e_tx_wqe_inline_mode(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg, - struct sk_buff *skb) - { - u8 mode; - - if (mlx5e_transport_inline_tx_wqe(cseg)) - return MLX5_INLINE_MODE_TCP_UDP; - - mode = sq->min_inline_mode; - - if (skb_vlan_tag_present(skb) && - test_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state)) - mode = max_t(u8, MLX5_INLINE_MODE_L2, mode); - - return mode; - } - static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) { struct mlx5_core_cq *mcq; @@@ -276,6 -245,23 +250,23 @@@ mlx5e_dma_push(struct mlx5e_txqsq *sq, dma->type = map_type; }
+ static inline struct sk_buff **mlx5e_skb_fifo_get(struct mlx5e_txqsq *sq, u16 i) + { + return &sq->db.skb_fifo[i & sq->skb_fifo_mask]; + } + + static inline void mlx5e_skb_fifo_push(struct mlx5e_txqsq *sq, struct sk_buff *skb) + { + struct sk_buff **skb_item = mlx5e_skb_fifo_get(sq, sq->skb_fifo_pc++); + + *skb_item = skb; + } + + static inline struct sk_buff *mlx5e_skb_fifo_pop(struct mlx5e_txqsq *sq) + { + return *mlx5e_skb_fifo_get(sq, sq->skb_fifo_cc++); + } + static inline void mlx5e_tx_dma_unmap(struct device *pdev, struct mlx5e_sq_dma *dma) { @@@ -291,6 -277,14 +282,14 @@@ } }
+ void mlx5e_sq_xmit_simple(struct mlx5e_txqsq *sq, struct sk_buff *skb, bool xmit_more); + void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq); + + static inline bool mlx5e_tx_mpwqe_is_full(struct mlx5e_tx_mpwqe *session) + { + return session->ds_count == MLX5E_TX_MPW_MAX_NUM_DS; + } + static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq) { if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { diff --combined drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 562bc465f82b,0b201e66f191..ae90d533a350 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@@ -59,7 -59,7 +59,7 @@@ static inline boo mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq, struct mlx5e_dma_info *di, struct xdp_buff *xdp) { - struct mlx5e_xdp_xmit_data xdptxd; + struct mlx5e_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; struct xdp_frame *xdpf; dma_addr_t dma_addr; @@@ -122,7 -122,7 +122,7 @@@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di, u32 *len, struct xdp_buff *xdp) { - struct bpf_prog *prog = READ_ONCE(rq->xdp_prog); + struct bpf_prog *prog = rcu_dereference(rq->xdp_prog); u32 act; int err;
@@@ -194,18 -194,22 +194,22 @@@ static u16 mlx5e_xdpsq_get_next_pi(stru
static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq) { - struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats; + struct mlx5e_tx_wqe *wqe; u16 pi;
- pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS); - session->wqe = MLX5E_TX_FETCH_WQE(sq, pi); + pi = mlx5e_xdpsq_get_next_pi(sq, MLX5E_TX_MPW_MAX_WQEBBS); + wqe = MLX5E_TX_FETCH_WQE(sq, pi); + net_prefetchw(wqe->data);
- net_prefetchw(session->wqe->data); - session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT; - session->pkt_count = 0; - - mlx5e_xdp_update_inline_state(sq); + *session = (struct mlx5e_tx_mpwqe) { + .wqe = wqe, + .bytes_count = 0, + .ds_count = MLX5E_TX_WQE_EMPTY_DS_COUNT, + .pkt_count = 0, + .inline_on = mlx5e_xdp_get_inline_state(sq, session->inline_on), + };
stats->mpwqe++; } @@@ -213,7 -217,7 +217,7 @@@ void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq) { struct mlx5_wq_cyc *wq = &sq->wq; - struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5_wqe_ctrl_seg *cseg = &session->wqe->ctrl; u16 ds_count = session->ds_count; u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); @@@ -258,10 -262,10 +262,10 @@@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_ }
INDIRECT_CALLABLE_SCOPE bool - mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, + mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, struct mlx5e_xdp_info *xdpi, int check_result) { - struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + struct mlx5e_tx_mpwqe *session = &sq->mpwqe; struct mlx5e_xdpsq_stats *stats = sq->stats;
if (unlikely(xdptxd->len > sq->hw_mtu)) { @@@ -284,8 -288,7 +288,7 @@@
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) || - session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS)) + if (unlikely(mlx5e_xdp_mpqwe_is_full(session))) mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi); @@@ -306,7 -309,7 +309,7 @@@ INDIRECT_CALLABLE_SCOPE int mlx5e_xmit_ }
INDIRECT_CALLABLE_SCOPE bool - mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_xmit_data *xdptxd, + mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd, struct mlx5e_xdp_info *xdpi, int check_result) { struct mlx5_wq_cyc *wq = &sq->wq; @@@ -503,7 -506,7 +506,7 @@@ int mlx5e_xdp_xmit(struct net_device *d
for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; - struct mlx5e_xdp_xmit_data xdptxd; + struct mlx5e_xmit_data xdptxd; struct mlx5e_xdp_info xdpi; bool ret;
diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4d30a81cbadc,472252ea67a1..961cdce37cc4 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@@ -158,6 -158,16 +158,6 @@@ static void mlx5e_update_carrier_work(s mutex_unlock(&priv->state_lock); }
-void mlx5e_update_ndo_stats(struct mlx5e_priv *priv) -{ - int i; - - for (i = mlx5e_nic_stats_grps_num(priv) - 1; i >= 0; i--) - if (mlx5e_nic_stats_grps[i]->update_stats_mask & - MLX5E_NDO_UPDATE_STATS) - mlx5e_nic_stats_grps[i]->update_stats(priv); -} - static void mlx5e_update_stats_work(struct work_struct *work) { struct mlx5e_priv *priv = container_of(work, struct mlx5e_priv, @@@ -389,7 -399,7 +389,7 @@@ static int mlx5e_alloc_rq(struct mlx5e_
if (params->xdp_prog) bpf_prog_inc(params->xdp_prog); - rq->xdp_prog = params->xdp_prog; + RCU_INIT_POINTER(rq->xdp_prog, params->xdp_prog);
rq_xdp_ix = rq->ix; if (xsk) @@@ -398,7 -408,7 +398,7 @@@ if (err < 0) goto err_rq_wq_destroy;
- rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; + rq->buff.map_dir = params->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk); pool_size = 1 << params->log_rq_mtu_frames;
@@@ -554,8 -564,8 +554,8 @@@ err_free }
err_rq_wq_destroy: - if (rq->xdp_prog) - bpf_prog_put(rq->xdp_prog); + if (params->xdp_prog) + bpf_prog_put(params->xdp_prog); xdp_rxq_info_unreg(&rq->xdp_rxq); page_pool_destroy(rq->page_pool); mlx5_wq_destroy(&rq->wq_ctrl); @@@ -565,16 -575,10 +565,16 @@@
static void mlx5e_free_rq(struct mlx5e_rq *rq) { + struct mlx5e_channel *c = rq->channel; + struct bpf_prog *old_prog = NULL; int i;
- if (rq->xdp_prog) - bpf_prog_put(rq->xdp_prog); + /* drop_rq has neither channel nor xdp_prog. */ + if (c) + old_prog = rcu_dereference_protected(rq->xdp_prog, + lockdep_is_held(&c->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog);
switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@@ -870,7 -874,7 +870,7 @@@ void mlx5e_activate_rq(struct mlx5e_rq void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */ + synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ }
void mlx5e_close_rq(struct mlx5e_rq *rq) @@@ -1043,6 -1047,7 +1043,7 @@@ static void mlx5e_free_icosq(struct mlx static void mlx5e_free_txqsq_db(struct mlx5e_txqsq *sq) { kvfree(sq->db.wqe_info); + kvfree(sq->db.skb_fifo); kvfree(sq->db.dma_fifo); }
@@@ -1054,15 -1059,19 +1055,19 @@@ static int mlx5e_alloc_txqsq_db(struct sq->db.dma_fifo = kvzalloc_node(array_size(df_sz, sizeof(*sq->db.dma_fifo)), GFP_KERNEL, numa); + sq->db.skb_fifo = kvzalloc_node(array_size(df_sz, + sizeof(*sq->db.skb_fifo)), + GFP_KERNEL, numa); sq->db.wqe_info = kvzalloc_node(array_size(wq_sz, sizeof(*sq->db.wqe_info)), GFP_KERNEL, numa); - if (!sq->db.dma_fifo || !sq->db.wqe_info) { + if (!sq->db.dma_fifo || !sq->db.skb_fifo || !sq->db.wqe_info) { mlx5e_free_txqsq_db(sq); return -ENOMEM; }
sq->dma_fifo_mask = df_sz - 1; + sq->skb_fifo_mask = df_sz - 1;
return 0; } @@@ -1073,6 -1082,12 +1078,12 @@@ static int mlx5e_calc_sq_stop_room(stru
sq->stop_room = mlx5e_tls_get_stop_room(sq); sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS); + if (test_bit(MLX5E_SQ_STATE_MPWQE, &sq->state)) + /* A MPWQE can take up to the maximum-sized WQE + all the normal + * stop room can be taken if a new packet breaks the active + * MPWQE session and allocates its WQEs right away. + */ + sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
if (WARN_ON(sq->stop_room >= sq_size)) { netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n", @@@ -1114,6 -1129,8 +1125,8 @@@ static int mlx5e_alloc_txqsq(struct mlx set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state); if (mlx5_accel_is_tls_device(c->priv->mdev)) set_bit(MLX5E_SQ_STATE_TLS, &sq->state); + if (param->is_mpw) + set_bit(MLX5E_SQ_STATE_MPWQE, &sq->state); err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size); if (err) return err; @@@ -1315,10 -1332,12 +1328,10 @@@ void mlx5e_tx_disable_queue(struct netd
static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) { - struct mlx5e_channel *c = sq->channel; struct mlx5_wq_cyc *wq = &sq->wq;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - /* prevent netif_tx_wake_queue */ - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */
mlx5e_tx_disable_queue(sq->txq);
@@@ -1393,8 -1412,10 +1406,8 @@@ void mlx5e_activate_icosq(struct mlx5e_
void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) { - struct mlx5e_channel *c = icosq->channel; - clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI. */ }
void mlx5e_close_icosq(struct mlx5e_icosq *sq) @@@ -1473,7 -1494,7 +1486,7 @@@ void mlx5e_close_xdpsq(struct mlx5e_xdp struct mlx5e_channel *c = sq->channel;
clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - napi_synchronize(&c->napi); + synchronize_rcu(); /* Sync with NAPI. */
mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_xdpsq_descs(sq); @@@ -2162,6 -2183,7 +2175,7 @@@ static void mlx5e_build_sq_param(struc mlx5e_build_sq_param_common(priv, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); + param->is_mpw = MLX5E_GET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE); mlx5e_build_tx_cq_param(priv, params, ¶m->cqp); }
@@@ -3569,7 -3591,6 +3583,7 @@@ void mlx5e_fold_sw_stats64(struct mlx5e
s->rx_packets += rq_stats->packets + xskrq_stats->packets; s->rx_bytes += rq_stats->bytes + xskrq_stats->bytes; + s->multicast += rq_stats->mcast_packets + xskrq_stats->mcast_packets;
for (j = 0; j < priv->max_opened_tc; j++) { struct mlx5e_sq_stats *sq_stats = &channel_stats->sq[j]; @@@ -3585,6 -3606,7 +3599,6 @@@ voi mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); - struct mlx5e_vport_stats *vstats = &priv->stats.vport; struct mlx5e_pport_stats *pstats = &priv->stats.pport;
/* In switchdev mode, monitor counters doesn't monitor @@@ -3619,6 -3641,12 +3633,6 @@@ stats->rx_errors = stats->rx_length_errors + stats->rx_crc_errors + stats->rx_frame_errors; stats->tx_errors = stats->tx_aborted_errors + stats->tx_carrier_errors; - - /* vport multicast also counts packets that are dropped due to steering - * or rx out of buffer - */ - stats->multicast = - VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); }
static void mlx5e_set_rx_mode(struct net_device *dev) @@@ -4327,16 -4355,6 +4341,16 @@@ static int mlx5e_xdp_allowed(struct mlx return 0; }
+static void mlx5e_rq_replace_xdp_prog(struct mlx5e_rq *rq, struct bpf_prog *prog) +{ + struct bpf_prog *old_prog; + + old_prog = rcu_replace_pointer(rq->xdp_prog, prog, + lockdep_is_held(&rq->channel->priv->state_lock)); + if (old_prog) + bpf_prog_put(old_prog); +} + static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) { struct mlx5e_priv *priv = netdev_priv(netdev); @@@ -4395,10 -4413,29 +4409,10 @@@ */ for (i = 0; i < priv->channels.num; i++) { struct mlx5e_channel *c = priv->channels.c[i]; - bool xsk_open = test_bit(MLX5E_CHANNEL_STATE_XSK, c->state); - - clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); - if (xsk_open) - clear_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); - napi_synchronize(&c->napi); - /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ - - old_prog = xchg(&c->rq.xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - - if (xsk_open) { - old_prog = xchg(&c->xskrq.xdp_prog, prog); - if (old_prog) - bpf_prog_put(old_prog); - }
- set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); - if (xsk_open) - set_bit(MLX5E_RQ_STATE_ENABLED, &c->xskrq.state); - /* napi_schedule in case we have missed anything */ - napi_schedule(&c->napi); + mlx5e_rq_replace_xdp_prog(&c->rq, prog); + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); }
unlock: @@@ -4703,6 -4740,8 +4717,8 @@@ void mlx5e_build_nic_params(struct mlx5 params->log_sq_size = is_kdump_kernel() ? MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE : MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; + MLX5E_SET_PFLAG(params, MLX5E_PFLAG_SKB_TX_MPWQE, + MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe));
/* XDP SQ */ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_XDP_TX_MPWQE, @@@ -5188,7 -5227,7 +5204,7 @@@ static const struct mlx5e_profile mlx5e .enable = mlx5e_nic_enable, .disable = mlx5e_nic_disable, .update_rx = mlx5e_update_nic_rx, - .update_stats = mlx5e_update_ndo_stats, + .update_stats = mlx5e_stats_update_ndo_stats, .update_carrier = mlx5e_update_carrier, .rx_handlers = &mlx5e_rx_handlers_nic, .max_tc = MLX5E_MAX_NUM_TC, diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 0d34befc5761,c580f8b8c242..78f6a6f0a7e0 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@@ -54,18 -54,6 +54,18 @@@ unsigned int mlx5e_stats_total_num(stru return total; }
+void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv) +{ + mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; + const unsigned int num_stats_grps = stats_grps_num(priv); + int i; + + for (i = num_stats_grps - 1; i >= 0; i--) + if (stats_grps[i]->update_stats && + stats_grps[i]->update_stats_mask & MLX5E_NDO_UPDATE_STATS) + stats_grps[i]->update_stats(priv); +} + void mlx5e_stats_update(struct mlx5e_priv *priv) { mlx5e_stats_grp_t *stats_grps = priv->profile->stats_grps; @@@ -110,6 -98,8 +110,8 @@@ static const struct counter_desc sw_sta { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_added_vlan_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_nop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_mpwqe_pkts) },
#ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tls_encrypted_packets) }, @@@ -365,6 -355,8 +367,8 @@@ static MLX5E_DECLARE_STATS_GRP_OP_UPDAT s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; s->tx_added_vlan_packets += sq_stats->added_vlan_packets; s->tx_nop += sq_stats->nop; + s->tx_mpwqe_blks += sq_stats->mpwqe_blks; + s->tx_mpwqe_pkts += sq_stats->mpwqe_pkts; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; @@@ -1568,6 -1560,8 +1572,8 @@@ static const struct counter_desc sq_sta { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, added_vlan_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_blks) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, mpwqe_pkts) }, #ifdef CONFIG_MLX5_EN_TLS { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_packets) }, { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tls_encrypted_bytes) }, diff --combined drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 771f30cb0700,1e46e0ed04aa..162daaadb0d8 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@@ -103,7 -103,6 +103,7 @@@ unsigned int mlx5e_stats_total_num(stru void mlx5e_stats_update(struct mlx5e_priv *priv); void mlx5e_stats_fill(struct mlx5e_priv *priv, u64 *data, int idx); void mlx5e_stats_fill_strings(struct mlx5e_priv *priv, u8 *data); +void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv);
void mlx5e_stats_pause_get(struct mlx5e_priv *priv, struct ethtool_pause_stats *pause_stats); @@@ -121,9 -120,10 +121,11 @@@ struct mlx5e_sw_stats u64 tx_tso_inner_bytes; u64 tx_added_vlan_packets; u64 tx_nop; + u64 tx_mpwqe_blks; + u64 tx_mpwqe_pkts; u64 rx_lro_packets; u64 rx_lro_bytes; + u64 rx_mcast_packets; u64 rx_ecn_mark; u64 rx_removed_vlan_packets; u64 rx_csum_unnecessary; @@@ -303,7 -303,6 +305,7 @@@ struct mlx5e_rq_stats u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 mcast_packets; u64 ecn_mark; u64 removed_vlan_packets; u64 xdp_drop; @@@ -351,6 -350,8 +353,8 @@@ struct mlx5e_sq_stats u64 csum_partial_inner; u64 added_vlan_packets; u64 nop; + u64 mpwqe_blks; + u64 mpwqe_pkts; #ifdef CONFIG_MLX5_EN_TLS u64 tls_encrypted_packets; u64 tls_encrypted_bytes;