Skip to content

Commit 758191c

Browse files
zackyoraykuba-moo
authored andcommitted
net/mlx5e: SHAMPO, Use KSMs instead of KLMs
KSM Mkey is KLM Mkey with a fixed buffer size. Due to this fact, it is a faster mechanism than KLM. SHAMPO feature used KLMs Mkeys for memory mappings of its headers buffer. As it used KLMs with the same buffer size for each entry, we can use KSMs instead. This commit changes the Mkeys that map the SHAMPO headers buffer from KLMs to KSMs. Signed-off-by: Yoray Zack <[email protected]> Signed-off-by: Tariq Toukan <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent e95c5b9 commit 758191c

File tree

6 files changed

+71
-67
lines changed

6 files changed

+71
-67
lines changed

drivers/net/ethernet/mellanox/mlx5/core/en.h

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ struct page_pool;
8080
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
8181

8282
#define MLX5E_RX_MAX_HEAD (256)
83+
#define MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE (8)
8384
#define MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE (9)
8485
#define MLX5E_SHAMPO_WQ_HEADER_PER_PAGE (PAGE_SIZE >> MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE)
8586
#define MLX5E_SHAMPO_WQ_BASE_HEAD_ENTRY_SIZE (64)
@@ -146,25 +147,6 @@ struct page_pool;
146147
#define MLX5E_TX_XSK_POLL_BUDGET 64
147148
#define MLX5E_SQ_RECOVER_MIN_INTERVAL 500 /* msecs */
148149

149-
#define MLX5E_KLM_UMR_WQE_SZ(sgl_len)\
150-
(sizeof(struct mlx5e_umr_wqe) +\
151-
(sizeof(struct mlx5_klm) * (sgl_len)))
152-
153-
#define MLX5E_KLM_UMR_WQEBBS(klm_entries) \
154-
(DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_BB))
155-
156-
#define MLX5E_KLM_UMR_DS_CNT(klm_entries)\
157-
(DIV_ROUND_UP(MLX5E_KLM_UMR_WQE_SZ(klm_entries), MLX5_SEND_WQE_DS))
158-
159-
#define MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size)\
160-
(((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_klm))
161-
162-
#define MLX5E_KLM_ENTRIES_PER_WQE(wqe_size)\
163-
ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT)
164-
165-
#define MLX5E_MAX_KLM_PER_WQE(mdev) \
166-
MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
167-
168150
#define mlx5e_state_dereference(priv, p) \
169151
rcu_dereference_protected((p), lockdep_is_held(&(priv)->state_lock))
170152

drivers/net/ethernet/mellanox/mlx5/core/en/params.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,18 +1071,18 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev,
10711071
struct mlx5e_params *params,
10721072
struct mlx5e_rq_param *rq_param)
10731073
{
1074-
int max_num_of_umr_per_wqe, max_hd_per_wqe, max_klm_per_umr, rest;
1074+
int max_num_of_umr_per_wqe, max_hd_per_wqe, max_ksm_per_umr, rest;
10751075
void *wqc = MLX5_ADDR_OF(rqc, rq_param->rqc, wq);
10761076
int wq_size = BIT(MLX5_GET(wq, wqc, log_wq_sz));
10771077
u32 wqebbs;
10781078

1079-
max_klm_per_umr = MLX5E_MAX_KLM_PER_WQE(mdev);
1079+
max_ksm_per_umr = MLX5E_MAX_KSM_PER_WQE(mdev);
10801080
max_hd_per_wqe = mlx5e_shampo_hd_per_wqe(mdev, params, rq_param);
1081-
max_num_of_umr_per_wqe = max_hd_per_wqe / max_klm_per_umr;
1082-
rest = max_hd_per_wqe % max_klm_per_umr;
1083-
wqebbs = MLX5E_KLM_UMR_WQEBBS(max_klm_per_umr) * max_num_of_umr_per_wqe;
1081+
max_num_of_umr_per_wqe = max_hd_per_wqe / max_ksm_per_umr;
1082+
rest = max_hd_per_wqe % max_ksm_per_umr;
1083+
wqebbs = MLX5E_KSM_UMR_WQEBBS(max_ksm_per_umr) * max_num_of_umr_per_wqe;
10841084
if (rest)
1085-
wqebbs += MLX5E_KLM_UMR_WQEBBS(rest);
1085+
wqebbs += MLX5E_KSM_UMR_WQEBBS(rest);
10861086
wqebbs *= wq_size;
10871087
return wqebbs;
10881088
}

drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,25 @@
3434

3535
#define MLX5E_RX_ERR_CQE(cqe) (get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)
3636

37+
#define MLX5E_KSM_UMR_WQE_SZ(sgl_len)\
38+
(sizeof(struct mlx5e_umr_wqe) +\
39+
(sizeof(struct mlx5_ksm) * (sgl_len)))
40+
41+
#define MLX5E_KSM_UMR_WQEBBS(ksm_entries) \
42+
(DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_BB))
43+
44+
#define MLX5E_KSM_UMR_DS_CNT(ksm_entries)\
45+
(DIV_ROUND_UP(MLX5E_KSM_UMR_WQE_SZ(ksm_entries), MLX5_SEND_WQE_DS))
46+
47+
#define MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size)\
48+
(((wqe_size) - sizeof(struct mlx5e_umr_wqe)) / sizeof(struct mlx5_ksm))
49+
50+
#define MLX5E_KSM_ENTRIES_PER_WQE(wqe_size)\
51+
ALIGN_DOWN(MLX5E_KSM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT)
52+
53+
#define MLX5E_MAX_KSM_PER_WQE(mdev) \
54+
MLX5E_KSM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * mlx5e_get_max_sq_aligned_wqebbs(mdev))
55+
3756
static inline
3857
ktime_t mlx5e_cqe_ts_to_ns(cqe_ts_to_ns func, struct mlx5_clock *clock, u64 cqe_ts)
3958
{

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,8 @@ static int mlx5e_create_umr_mkey(struct mlx5_core_dev *mdev,
504504
return err;
505505
}
506506

507-
static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
508-
u64 nentries,
507+
static int mlx5e_create_umr_ksm_mkey(struct mlx5_core_dev *mdev,
508+
u64 nentries, u8 log_entry_size,
509509
u32 *umr_mkey)
510510
{
511511
int inlen;
@@ -525,12 +525,13 @@ static int mlx5e_create_umr_klm_mkey(struct mlx5_core_dev *mdev,
525525
MLX5_SET(mkc, mkc, umr_en, 1);
526526
MLX5_SET(mkc, mkc, lw, 1);
527527
MLX5_SET(mkc, mkc, lr, 1);
528-
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
528+
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KSM);
529529
mlx5e_mkey_set_relaxed_ordering(mdev, mkc);
530530
MLX5_SET(mkc, mkc, qpn, 0xffffff);
531531
MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.hw_objs.pdn);
532532
MLX5_SET(mkc, mkc, translations_octword_size, nentries);
533-
MLX5_SET(mkc, mkc, length64, 1);
533+
MLX5_SET(mkc, mkc, log_page_size, log_entry_size);
534+
MLX5_SET64(mkc, mkc, len, nentries << log_entry_size);
534535
err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
535536

536537
kvfree(in);
@@ -565,14 +566,16 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
565566
static int mlx5e_create_rq_hd_umr_mkey(struct mlx5_core_dev *mdev,
566567
struct mlx5e_rq *rq)
567568
{
568-
u32 max_klm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
569+
u32 max_ksm_size = BIT(MLX5_CAP_GEN(mdev, log_max_klm_list_size));
569570

570-
if (max_klm_size < rq->mpwqe.shampo->hd_per_wq) {
571-
mlx5_core_err(mdev, "max klm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
572-
max_klm_size, rq->mpwqe.shampo->hd_per_wq);
571+
if (max_ksm_size < rq->mpwqe.shampo->hd_per_wq) {
572+
mlx5_core_err(mdev, "max ksm list size 0x%x is smaller than shampo header buffer list size 0x%x\n",
573+
max_ksm_size, rq->mpwqe.shampo->hd_per_wq);
573574
return -EINVAL;
574575
}
575-
return mlx5e_create_umr_klm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
576+
577+
return mlx5e_create_umr_ksm_mkey(mdev, rq->mpwqe.shampo->hd_per_wq,
578+
MLX5E_SHAMPO_LOG_HEADER_ENTRY_SIZE,
576579
&rq->mpwqe.shampo->mkey);
577580
}
578581

drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -619,25 +619,25 @@ static int bitmap_find_window(unsigned long *bitmap, int len,
619619
return min(len, count);
620620
}
621621

622-
static void build_klm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
623-
__be32 key, u16 offset, u16 klm_len, u16 wqe_bbs)
622+
static void build_ksm_umr(struct mlx5e_icosq *sq, struct mlx5e_umr_wqe *umr_wqe,
623+
__be32 key, u16 offset, u16 ksm_len)
624624
{
625-
memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_klms));
625+
memset(umr_wqe, 0, offsetof(struct mlx5e_umr_wqe, inline_ksms));
626626
umr_wqe->ctrl.opmod_idx_opcode =
627627
cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
628628
MLX5_OPCODE_UMR);
629629
umr_wqe->ctrl.umr_mkey = key;
630630
umr_wqe->ctrl.qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT)
631-
| MLX5E_KLM_UMR_DS_CNT(klm_len));
631+
| MLX5E_KSM_UMR_DS_CNT(ksm_len));
632632
umr_wqe->uctrl.flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
633633
umr_wqe->uctrl.xlt_offset = cpu_to_be16(offset);
634-
umr_wqe->uctrl.xlt_octowords = cpu_to_be16(klm_len);
634+
umr_wqe->uctrl.xlt_octowords = cpu_to_be16(ksm_len);
635635
umr_wqe->uctrl.mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE);
636636
}
637637

638638
static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
639639
struct mlx5e_icosq *sq,
640-
u16 klm_entries, u16 index)
640+
u16 ksm_entries, u16 index)
641641
{
642642
struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
643643
u16 entries, pi, header_offset, err, wqe_bbs, new_entries;
@@ -650,20 +650,20 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
650650
int headroom, i;
651651

652652
headroom = rq->buff.headroom;
653-
new_entries = klm_entries - (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1));
654-
entries = ALIGN(klm_entries, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT);
655-
wqe_bbs = MLX5E_KLM_UMR_WQEBBS(entries);
653+
new_entries = ksm_entries - (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
654+
entries = ALIGN(ksm_entries, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
655+
wqe_bbs = MLX5E_KSM_UMR_WQEBBS(entries);
656656
pi = mlx5e_icosq_get_next_pi(sq, wqe_bbs);
657657
umr_wqe = mlx5_wq_cyc_get_wqe(&sq->wq, pi);
658-
build_klm_umr(sq, umr_wqe, shampo->key, index, entries, wqe_bbs);
658+
build_ksm_umr(sq, umr_wqe, shampo->key, index, entries);
659659

660660
frag_page = &shampo->pages[page_index];
661661

662662
for (i = 0; i < entries; i++, index++) {
663663
dma_info = &shampo->info[index];
664-
if (i >= klm_entries || (index < shampo->pi && shampo->pi - index <
665-
MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT))
666-
goto update_klm;
664+
if (i >= ksm_entries || (index < shampo->pi && shampo->pi - index <
665+
MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT))
666+
goto update_ksm;
667667
header_offset = (index & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1)) <<
668668
MLX5E_SHAMPO_LOG_MAX_HEADER_ENTRY_SIZE;
669669
if (!(header_offset & (PAGE_SIZE - 1))) {
@@ -683,12 +683,11 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
683683
dma_info->frag_page = frag_page;
684684
}
685685

686-
update_klm:
687-
umr_wqe->inline_klms[i].bcount =
688-
cpu_to_be32(MLX5E_RX_MAX_HEAD);
689-
umr_wqe->inline_klms[i].key = cpu_to_be32(lkey);
690-
umr_wqe->inline_klms[i].va =
691-
cpu_to_be64(dma_info->addr + headroom);
686+
update_ksm:
687+
umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
688+
.key = cpu_to_be32(lkey),
689+
.va = cpu_to_be64(dma_info->addr + headroom),
690+
};
692691
}
693692

694693
sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
@@ -720,37 +719,37 @@ static int mlx5e_build_shampo_hd_umr(struct mlx5e_rq *rq,
720719
static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq)
721720
{
722721
struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo;
723-
u16 klm_entries, num_wqe, index, entries_before;
722+
u16 ksm_entries, num_wqe, index, entries_before;
724723
struct mlx5e_icosq *sq = rq->icosq;
725-
int i, err, max_klm_entries, len;
724+
int i, err, max_ksm_entries, len;
726725

727-
max_klm_entries = MLX5E_MAX_KLM_PER_WQE(rq->mdev);
728-
klm_entries = bitmap_find_window(shampo->bitmap,
726+
max_ksm_entries = MLX5E_MAX_KSM_PER_WQE(rq->mdev);
727+
ksm_entries = bitmap_find_window(shampo->bitmap,
729728
shampo->hd_per_wqe,
730729
shampo->hd_per_wq, shampo->pi);
731-
if (!klm_entries)
730+
if (!ksm_entries)
732731
return 0;
733732

734-
klm_entries += (shampo->pi & (MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT - 1));
735-
index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT);
733+
ksm_entries += (shampo->pi & (MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT - 1));
734+
index = ALIGN_DOWN(shampo->pi, MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT);
736735
entries_before = shampo->hd_per_wq - index;
737736

738-
if (unlikely(entries_before < klm_entries))
739-
num_wqe = DIV_ROUND_UP(entries_before, max_klm_entries) +
740-
DIV_ROUND_UP(klm_entries - entries_before, max_klm_entries);
737+
if (unlikely(entries_before < ksm_entries))
738+
num_wqe = DIV_ROUND_UP(entries_before, max_ksm_entries) +
739+
DIV_ROUND_UP(ksm_entries - entries_before, max_ksm_entries);
741740
else
742-
num_wqe = DIV_ROUND_UP(klm_entries, max_klm_entries);
741+
num_wqe = DIV_ROUND_UP(ksm_entries, max_ksm_entries);
743742

744743
for (i = 0; i < num_wqe; i++) {
745-
len = (klm_entries > max_klm_entries) ? max_klm_entries :
746-
klm_entries;
744+
len = (ksm_entries > max_ksm_entries) ? max_ksm_entries :
745+
ksm_entries;
747746
if (unlikely(index + len > shampo->hd_per_wq))
748747
len = shampo->hd_per_wq - index;
749748
err = mlx5e_build_shampo_hd_umr(rq, sq, len, index);
750749
if (unlikely(err))
751750
return err;
752751
index = (index + len) & (rq->mpwqe.shampo->hd_per_wq - 1);
753-
klm_entries -= len;
752+
ksm_entries -= len;
754753
}
755754

756755
return 0;

include/linux/mlx5/device.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,7 @@ enum {
294294
#define MLX5_UMR_FLEX_ALIGNMENT 0x40
295295
#define MLX5_UMR_MTT_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_mtt))
296296
#define MLX5_UMR_KLM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_klm))
297+
#define MLX5_UMR_KSM_NUM_ENTRIES_ALIGNMENT (MLX5_UMR_FLEX_ALIGNMENT / sizeof(struct mlx5_ksm))
297298

298299
#define MLX5_USER_INDEX_LEN (MLX5_FLD_SZ_BYTES(qpc, user_index) * 8)
299300

0 commit comments

Comments
 (0)