Skip to content

Commit 0d66840

Browse files
authored
Merge pull request #1731 from amzn/req-id-gen
efa: Add QP generation to device request ID
2 parents f862cc2 + 039586a commit 0d66840

3 files changed

Lines changed: 89 additions & 45 deletions

File tree

providers/efa/efa.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ static void efa_free_context(struct ibv_context *ibvctx)
117117
struct efa_context *ctx = to_efa_context(ibvctx);
118118

119119
free(ctx->qp_table);
120+
free(ctx->qp_gen_table);
120121
pthread_spin_destroy(&ctx->qp_table_lock);
121122
verbs_uninit_context(&ctx->ibvctx);
122123
free(ctx);

providers/efa/efa.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct efa_context {
4545
size_t cqe_size;
4646
size_t ex_cqe_size;
4747
struct efa_qp **qp_table;
48+
uint16_t *qp_gen_table;
4849
unsigned int qp_table_sz_m1;
4950
pthread_spinlock_t qp_table_lock;
5051
};
@@ -119,6 +120,8 @@ struct efa_wq {
119120
uint16_t desc_mask;
120121
/* wrid_idx_pool_next: Index of the next entry to use in wrid_idx_pool. */
121122
uint16_t wrid_idx_pool_next;
123+
uint16_t gen_mask;
124+
uint16_t shifted_gen;
122125
int max_sge;
123126
int phase;
124127
pthread_spinlock_t wqlock;
@@ -179,6 +182,7 @@ struct efa_qp {
179182
int wr_session_err;
180183
struct ibv_device *dev;
181184
struct efa_parent_domain *parent_domain;
185+
uint16_t gen;
182186
};
183187

184188
struct efa_mr {

providers/efa/verbs.c

Lines changed: 84 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ struct efa_wq_init_attr {
6060
int pgsz;
6161
uint16_t sub_cq_idx;
6262
bool need_lock;
63+
uint16_t gen;
6364
};
6465

6566
int efa_query_port(struct ibv_context *ibvctx, uint8_t port,
@@ -140,9 +141,17 @@ int efa_query_device_ctx(struct efa_context *ctx)
140141
ctx->max_wr_rdma_sge = attr.orig_attr.max_sge_rd;
141142
qp_table_sz = roundup_pow_of_two(attr.orig_attr.max_qp);
142143
ctx->qp_table_sz_m1 = qp_table_sz - 1;
144+
143145
ctx->qp_table = calloc(qp_table_sz, sizeof(*ctx->qp_table));
144146
if (!ctx->qp_table)
145147
return ENOMEM;
148+
149+
ctx->qp_gen_table = calloc(qp_table_sz, sizeof(*ctx->qp_gen_table));
150+
if (!ctx->qp_gen_table) {
151+
free(ctx->qp_table);
152+
return ENOMEM;
153+
}
154+
146155
return 0;
147156
}
148157

@@ -465,6 +474,21 @@ static void efa_wq_put_wrid_idx_unlocked(struct efa_wq *wq, uint32_t wrid_idx)
465474
pthread_spin_unlock(&wq->wqlock);
466475
}
467476

477+
static uint32_t efa_wq_get_dev_req_id_locked(struct efa_wq *wq, uint64_t wr_id)
478+
{
479+
return efa_wq_get_next_wrid_idx_locked(wq, wr_id) | wq->shifted_gen;
480+
}
481+
482+
static void efa_wq_put_dev_req_id_unlocked(struct efa_wq *wq, uint32_t dev_req_id)
483+
{
484+
efa_wq_put_wrid_idx_unlocked(wq, dev_req_id & ~wq->gen_mask);
485+
}
486+
487+
static uint64_t efa_wq_get_wrid_by_dev_req_id(struct efa_wq *wq, uint16_t dev_req_id)
488+
{
489+
return wq->wrid[dev_req_id & ~wq->gen_mask];
490+
}
491+
468492
static uint32_t efa_sub_cq_get_current_index(struct efa_sub_cq *sub_cq)
469493
{
470494
return sub_cq->consumed_cnt & sub_cq->qmask;
@@ -693,26 +717,29 @@ static int efa_wc_read_sgid(struct efadv_cq *efadv_cq, union ibv_gid *sgid)
693717
return 0;
694718
}
695719

720+
static bool efa_cqe_is_unsolicited(struct efa_io_cdesc_common *cqe)
721+
{
722+
return EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED);
723+
}
724+
696725
static bool efa_wc_is_unsolicited(struct efadv_cq *efadv_cq)
697726
{
698727
struct efa_cq *cq = efadv_cq_to_efa_cq(efadv_cq);
699728

700-
return EFA_GET(&cq->cur_cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED);
729+
return efa_cqe_is_unsolicited(cq->cur_cqe);
701730
}
702731

703732
static void efa_process_cqe(struct efa_cq *cq, struct ibv_wc *wc,
704733
struct efa_qp *qp)
705734
{
706735
struct efa_io_cdesc_common *cqe = cq->cur_cqe;
707736
enum efa_io_send_op_type op_type;
708-
uint32_t wrid_idx;
709737

710738
wc->status = to_ibv_status(cqe->status);
711739
wc->vendor_err = cqe->status;
712740
wc->wc_flags = 0;
713741
wc->qp_num = cqe->qp_num;
714742

715-
wrid_idx = cqe->req_id;
716743
op_type = EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_OP_TYPE);
717744
if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_Q_TYPE) == EFA_IO_SEND_QUEUE) {
718745
cq->cur_wq = &qp->sq.wq;
@@ -725,7 +752,7 @@ static void efa_process_cqe(struct efa_cq *cq, struct ibv_wc *wc,
725752
* because this wrid index has not been freed yet,
726753
* so there is no contention on this index.
727754
*/
728-
wc->wr_id = cq->cur_wq->wrid[wrid_idx];
755+
wc->wr_id = efa_wq_get_wrid_by_dev_req_id(cq->cur_wq, cqe->req_id);
729756

730757
rdma_tracepoint(rdma_core_efa, process_completion, cq->dev->name, wc->wr_id,
731758
wc->status, wc->opcode, wc->qp_num, UINT32_MAX, UINT16_MAX,
@@ -734,12 +761,12 @@ static void efa_process_cqe(struct efa_cq *cq, struct ibv_wc *wc,
734761
struct efa_io_rx_cdesc_ex *rcqe =
735762
container_of(cqe, struct efa_io_rx_cdesc_ex, base.common);
736763

737-
if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED)) {
764+
if (efa_cqe_is_unsolicited(cqe)) {
738765
cq->cur_wq = NULL;
739766
wc->wr_id = 0;
740767
} else {
741768
cq->cur_wq = &qp->rq.wq;
742-
wc->wr_id = cq->cur_wq->wrid[wrid_idx];
769+
wc->wr_id = efa_wq_get_wrid_by_dev_req_id(cq->cur_wq, cqe->req_id);
743770
}
744771

745772
wc->byte_len = rcqe->base.length;
@@ -770,25 +797,22 @@ static void efa_process_ex_cqe(struct efa_cq *cq, struct efa_qp *qp)
770797
{
771798
struct ibv_cq_ex *ibvcqx = &cq->verbs_cq.cq_ex;
772799
struct efa_io_cdesc_common *cqe = cq->cur_cqe;
773-
uint32_t wrid_idx;
774-
775-
wrid_idx = cqe->req_id;
776800

777801
if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_Q_TYPE) == EFA_IO_SEND_QUEUE) {
778802
cq->cur_wq = &qp->sq.wq;
779-
ibvcqx->wr_id = cq->cur_wq->wrid[wrid_idx];
803+
ibvcqx->wr_id = efa_wq_get_wrid_by_dev_req_id(cq->cur_wq, cqe->req_id);
780804
ibvcqx->status = to_ibv_status(cqe->status);
781805

782806
rdma_tracepoint(rdma_core_efa, process_completion, cq->dev->name, ibvcqx->wr_id,
783807
ibvcqx->status, efa_wc_read_opcode(ibvcqx), cqe->qp_num,
784808
UINT32_MAX, UINT16_MAX, efa_wc_read_byte_len(ibvcqx));
785809
} else {
786-
if (EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_UNSOLICITED)) {
810+
if (efa_cqe_is_unsolicited(cqe)) {
787811
cq->cur_wq = NULL;
788812
ibvcqx->wr_id = 0;
789813
} else {
790814
cq->cur_wq = &qp->rq.wq;
791-
ibvcqx->wr_id = cq->cur_wq->wrid[wrid_idx];
815+
ibvcqx->wr_id = efa_wq_get_wrid_by_dev_req_id(cq->cur_wq, cqe->req_id);
792816
}
793817

794818
ibvcqx->status = to_ibv_status(cqe->status);
@@ -800,42 +824,50 @@ static void efa_process_ex_cqe(struct efa_cq *cq, struct efa_qp *qp)
800824
}
801825
}
802826

827+
static bool efa_cqe_is_valid_req_id_qp_gen(struct efa_io_cdesc_common *cqe, struct efa_qp *qp)
828+
{
829+
struct efa_wq *wq;
830+
831+
wq = EFA_GET(&cqe->flags, EFA_IO_CDESC_COMMON_Q_TYPE) == EFA_IO_SEND_QUEUE ?
832+
&qp->sq.wq : &qp->rq.wq;
833+
834+
return (cqe->req_id & wq->gen_mask) == wq->shifted_gen;
835+
}
836+
803837
static inline int efa_poll_sub_cq(struct efa_cq *cq, struct efa_sub_cq *sub_cq,
804-
struct efa_qp **cur_qp, struct ibv_wc *wc,
838+
struct ibv_wc *wc,
805839
bool extended) ALWAYS_INLINE;
806840
static inline int efa_poll_sub_cq(struct efa_cq *cq, struct efa_sub_cq *sub_cq,
807-
struct efa_qp **cur_qp, struct ibv_wc *wc,
808-
bool extended)
841+
struct ibv_wc *wc, bool extended)
809842
{
810843
struct efa_context *ctx = to_efa_context(cq->verbs_cq.cq.context);
844+
struct efa_qp *qp;
811845
uint32_t qpn;
812846

813847
cq->cur_cqe = cq_next_sub_cqe_get(sub_cq);
814848
if (!cq->cur_cqe)
815849
return ENOENT;
816850

817851
qpn = cq->cur_cqe->qp_num;
818-
if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) {
819-
/* We do not have to take the QP table lock here,
820-
* because CQs will be locked while QPs are removed
821-
* from the table.
822-
*/
823-
*cur_qp = ctx->qp_table[qpn & ctx->qp_table_sz_m1];
824-
if (!*cur_qp || qpn != (*cur_qp)->verbs_qp.qp.qp_num) {
825-
cq->cur_wq = NULL;
826-
verbs_err(&ctx->ibvctx,
827-
"QP[%u] does not exist in QP table\n",
828-
qpn);
829-
return EINVAL;
830-
}
852+
/* We do not have to take the QP table lock here,
853+
* because CQs will be locked while QPs are removed
854+
* from the table.
855+
*/
856+
qp = ctx->qp_table[qpn & ctx->qp_table_sz_m1];
857+
if (!qp || qpn != qp->verbs_qp.qp.qp_num ||
858+
(!efa_cqe_is_unsolicited(cq->cur_cqe) &&
859+
!efa_cqe_is_valid_req_id_qp_gen(cq->cur_cqe, qp))) {
860+
cq->cur_wq = NULL;
861+
verbs_err(&ctx->ibvctx, "Invalid QP[%u]\n", qpn);
862+
return EINVAL;
831863
}
832864

833865
if (extended) {
834-
efa_process_ex_cqe(cq, *cur_qp);
866+
efa_process_ex_cqe(cq, qp);
835867
} else {
836-
efa_process_cqe(cq, wc, *cur_qp);
868+
efa_process_cqe(cq, wc, qp);
837869
if (cq->cur_wq)
838-
efa_wq_put_wrid_idx_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
870+
efa_wq_put_dev_req_id_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
839871
}
840872

841873
return 0;
@@ -848,15 +880,14 @@ static inline int efa_poll_sub_cqs(struct efa_cq *cq, struct ibv_wc *wc,
848880
{
849881
uint16_t num_sub_cqs = cq->num_sub_cqs;
850882
struct efa_sub_cq *sub_cq;
851-
struct efa_qp *qp = NULL;
852883
uint16_t sub_cq_idx;
853884
int err = ENOENT;
854885

855886
for (sub_cq_idx = 0; sub_cq_idx < num_sub_cqs; sub_cq_idx++) {
856887
sub_cq = &cq->sub_cq_arr[cq->next_poll_idx++];
857888
cq->next_poll_idx %= num_sub_cqs;
858889

859-
err = efa_poll_sub_cq(cq, sub_cq, &qp, wc, extended);
890+
err = efa_poll_sub_cq(cq, sub_cq, wc, extended);
860891
if (err != ENOENT) {
861892
cq->cc++;
862893
break;
@@ -909,7 +940,7 @@ static inline void efa_end_poll_common(struct efa_cq *cq)
909940
{
910941
if (cq->cur_cqe) {
911942
if (cq->cur_wq)
912-
efa_wq_put_wrid_idx_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
943+
efa_wq_put_dev_req_id_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
913944
if (cq->db)
914945
efa_update_cq_doorbell(cq, false);
915946
}
@@ -939,7 +970,7 @@ static int efa_next_poll(struct ibv_cq_ex *ibvcqx)
939970
int ret;
940971

941972
if (cq->cur_wq)
942-
efa_wq_put_wrid_idx_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
973+
efa_wq_put_dev_req_id_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
943974
ret = efa_poll_sub_cqs(cq, NULL, true);
944975

945976
return ret;
@@ -957,14 +988,13 @@ static int efa_start_poll_single_sub_cq(struct ibv_cq_ex *ibvcqx,
957988
struct ibv_poll_cq_attr *attr)
958989
{
959990
struct efa_cq *cq = to_efa_cq_ex(ibvcqx);
960-
struct efa_qp *qp = NULL;
961991
int ret;
962992

963993
if (efa_start_poll_comp_check(ibvcqx, attr))
964994
return EINVAL;
965995

966996
pthread_spin_lock(&cq->lock);
967-
ret = efa_poll_sub_cq(cq, cq->sub_cq_arr, &qp, NULL, true);
997+
ret = efa_poll_sub_cq(cq, cq->sub_cq_arr, NULL, true);
968998
if (ret != ENOENT)
969999
cq->cc++;
9701000

@@ -977,13 +1007,12 @@ static int efa_start_poll_single_sub_cq(struct ibv_cq_ex *ibvcqx,
9771007
static int efa_next_poll_single_sub_cq(struct ibv_cq_ex *ibvcqx)
9781008
{
9791009
struct efa_cq *cq = to_efa_cq_ex(ibvcqx);
980-
struct efa_qp *qp = NULL;
9811010
int ret;
9821011

9831012
if (cq->cur_wq)
984-
efa_wq_put_wrid_idx_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
1013+
efa_wq_put_dev_req_id_unlocked(cq->cur_wq, cq->cur_cqe->req_id);
9851014

986-
ret = efa_poll_sub_cq(cq, cq->sub_cq_arr, &qp, NULL, true);
1015+
ret = efa_poll_sub_cq(cq, cq->sub_cq_arr, NULL, true);
9871016
if (ret != ENOENT)
9881017
cq->cc++;
9891018

@@ -1012,13 +1041,12 @@ static int efa_start_poll_single_sub_cq_single_thread(struct ibv_cq_ex *ibvcqx,
10121041
struct ibv_poll_cq_attr *attr)
10131042
{
10141043
struct efa_cq *cq = to_efa_cq_ex(ibvcqx);
1015-
struct efa_qp *qp = NULL;
10161044
int ret;
10171045

10181046
if (efa_start_poll_comp_check(ibvcqx, attr))
10191047
return EINVAL;
10201048

1021-
ret = efa_poll_sub_cq(cq, cq->sub_cq_arr, &qp, NULL, true);
1049+
ret = efa_poll_sub_cq(cq, cq->sub_cq_arr, NULL, true);
10221050
if (ret != ENOENT)
10231051
cq->cc++;
10241052

@@ -1386,6 +1414,7 @@ static void efa_wq_terminate(struct efa_wq *wq, int pgsz)
13861414

13871415
static int efa_wq_initialize(struct efa_wq *wq, struct efa_wq_init_attr *attr)
13881416
{
1417+
uint16_t wrid_idx_mask;
13891418
uint8_t *db_base;
13901419
int err;
13911420
int i;
@@ -1400,6 +1429,10 @@ static int efa_wq_initialize(struct efa_wq *wq, struct efa_wq_init_attr *attr)
14001429
goto err_free_wrid;
14011430
}
14021431

1432+
wrid_idx_mask = roundup_pow_of_two(wq->wqe_cnt) - 1;
1433+
wq->gen_mask = ~wrid_idx_mask;
1434+
wq->shifted_gen = attr->gen << __bf_shf(wrid_idx_mask + 1);
1435+
14031436
db_base = mmap(NULL, attr->pgsz, PROT_WRITE, MAP_SHARED, attr->cmd_fd,
14041437
attr->db_mmap_key);
14051438
if (db_base == MAP_FAILED) {
@@ -1482,6 +1515,7 @@ static int efa_sq_initialize(struct efa_qp *qp,
14821515
.pgsz = qp->page_size,
14831516
.sub_cq_idx = resp->send_sub_cq_idx,
14841517
.need_lock = need_lock,
1518+
.gen = qp->gen,
14851519
};
14861520

14871521
err = efa_wq_initialize(&qp->sq.wq, &wq_attr);
@@ -1568,6 +1602,7 @@ static int efa_rq_initialize(struct efa_qp *qp,
15681602
.pgsz = qp->page_size,
15691603
.sub_cq_idx = resp->recv_sub_cq_idx,
15701604
.need_lock = need_lock,
1605+
.gen = qp->gen,
15711606
};
15721607

15731608
err = efa_wq_initialize(&qp->rq.wq, &wq_attr);
@@ -1931,6 +1966,10 @@ static struct ibv_qp *create_qp(struct ibv_context *ibvctx,
19311966
qp->sq_sig_all = attr->sq_sig_all;
19321967
qp->dev = ibvctx->device;
19331968

1969+
pthread_spin_lock(&ctx->qp_table_lock);
1970+
qp->gen = ++ctx->qp_gen_table[ibvqp->qp_num & ctx->qp_table_sz_m1];
1971+
pthread_spin_unlock(&ctx->qp_table_lock);
1972+
19341973
err = efa_rq_initialize(qp, attr, &resp);
19351974
if (err)
19361975
goto err_destroy_qp;
@@ -2449,7 +2488,7 @@ int efa_post_send(struct ibv_qp *ibvqp, struct ibv_send_wr *wr,
24492488

24502489
/* Set rest of the descriptor fields */
24512490
efa_set_common_ctrl_flags(md, sq, EFA_IO_SEND);
2452-
md->req_id = efa_wq_get_next_wrid_idx_locked(wq, wr->wr_id);
2491+
md->req_id = efa_wq_get_dev_req_id_locked(wq, wr->wr_id);
24532492
md->dest_qp_num = wr->wr.ud.remote_qpn;
24542493
md->ah = ah->efa_ah;
24552494
md->qkey = wr->wr.ud.remote_qkey;
@@ -2521,7 +2560,7 @@ static void efa_send_wr_init(struct efa_qp *qp, struct ibv_qp_ex *ibvqpx,
25212560

25222561
sq->curr_tx_wqe.md = md;
25232562
efa_set_common_ctrl_flags(sq->curr_tx_wqe.md, sq, op_type);
2524-
sq->curr_tx_wqe.md->req_id = efa_wq_get_next_wrid_idx_locked(&sq->wq, ibvqpx->wr_id);
2563+
sq->curr_tx_wqe.md->req_id = efa_wq_get_dev_req_id_locked(&sq->wq, ibvqpx->wr_id);
25252564

25262565
/* advance index and change phase */
25272566
efa_sq_advance_post_idx(sq);
@@ -3061,7 +3100,7 @@ int efa_post_recv(struct ibv_qp *ibvqp, struct ibv_recv_wr *wr,
30613100

30623101
memset(&rx_buf, 0, sizeof(rx_buf));
30633102

3064-
rx_buf.req_id = efa_wq_get_next_wrid_idx_locked(wq, wr->wr_id);
3103+
rx_buf.req_id = efa_wq_get_dev_req_id_locked(wq, wr->wr_id);
30653104
wq->wqe_posted++;
30663105

30673106
/* Default init of the rx buffer */

0 commit comments

Comments
 (0)