@@ -49,6 +49,8 @@ enum rds_ib_fr_state {
49
49
MR_IS_STALE , /* mr is possibly corrupt, marked if failure */
50
50
};
51
51
52
+ #define RDS_MR_INV_WR_ID ((u64)0xefefefefefefefefULL)
53
+
52
54
/*
53
55
* This is stored as mr->r_trans_private.
54
56
*/
@@ -120,7 +122,6 @@ static void rds_ib_mr_pool_flush_worker(struct work_struct *work);
120
122
static int rds_ib_map_fastreg_mr (struct rds_ib_device * rds_ibdev ,
121
123
struct rds_ib_mr * ibmr ,
122
124
struct scatterlist * sg , unsigned int sg_len );
123
- static int rds_ib_fastreg_inv (struct rds_ib_mr * ibmr );
124
125
125
126
static struct rds_ib_device * rds_ib_get_device (struct in6_addr * ipaddr )
126
127
{
@@ -869,20 +870,18 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
869
870
ret = ib_unmap_fmr (& fmr_list );
870
871
if (ret )
871
872
pr_warn ("RDS/IB: ib_unmap_fmr failed (err=%d)\n" , ret );
872
- } else {
873
- list_for_each_entry (ibmr , & unmap_list , unmap_list ) {
874
- ret = rds_ib_fastreg_inv (ibmr );
875
- if (ret )
876
- pr_warn_ratelimited (
877
- "RDS/IB: rds_ib_fastreg_inv failed (err=%d)\n" ,
878
- ret );
879
- }
880
873
}
881
874
882
875
/* Now we can destroy the DMA mapping and unpin any pages */
883
876
list_for_each_entry_safe (ibmr , next , & unmap_list , unmap_list ) {
877
+ /* Teardown only FMRs here, teardown fastreg MRs later after
878
+ * invalidating. However, increment 'unpinned' for both, since
879
+ * it is used to trigger flush.
880
+ */
884
881
unpinned += ibmr -> sg_len ;
885
- __rds_ib_teardown_mr (ibmr );
882
+ if (!pool -> use_fastreg )
883
+ __rds_ib_teardown_mr (ibmr );
884
+
886
885
if (nfreed < free_goal ||
887
886
(!pool -> use_fastreg &&
888
887
ibmr -> remap_count >= pool -> fmr_attr .max_maps ) ||
@@ -893,6 +892,7 @@ static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
893
892
rds_ib_stats_inc (s_ib_rdma_mr_1m_free );
894
893
list_del (& ibmr -> unmap_list );
895
894
if (pool -> use_fastreg ) {
895
+ __rds_ib_teardown_mr (ibmr );
896
896
if (ibmr -> page_list )
897
897
ib_free_fast_reg_page_list (ibmr -> page_list );
898
898
if (ibmr -> mr )
@@ -1089,15 +1089,16 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
1089
1089
/* Fastreg related functions */
1090
1090
1091
1091
static int rds_ib_map_scatterlist (struct rds_ib_device * rds_ibdev ,
1092
- struct rds_ib_mr * ibmr )
1092
+ struct rds_ib_mr * ibmr ,
1093
+ struct scatterlist * sg , unsigned int sg_len )
1093
1094
{
1094
1095
struct ib_device * dev = rds_ibdev -> dev ;
1095
1096
int i , j , ret , page_cnt ;
1096
1097
u32 len ;
1098
+ int sg_dma_len ;
1097
1099
1098
- ibmr -> sg_dma_len = ib_dma_map_sg (dev , ibmr -> sg , ibmr -> sg_len ,
1099
- DMA_BIDIRECTIONAL );
1100
- if (unlikely (!ibmr -> sg_dma_len )) {
1100
+ sg_dma_len = ib_dma_map_sg (dev , sg , sg_len , DMA_BIDIRECTIONAL );
1101
+ if (unlikely (!sg_dma_len )) {
1101
1102
pr_warn ("RDS/IB: dma_map_sg failed!\n" );
1102
1103
return - EBUSY ;
1103
1104
}
@@ -1107,9 +1108,9 @@ static int rds_ib_map_scatterlist(struct rds_ib_device *rds_ibdev,
1107
1108
len = 0 ;
1108
1109
1109
1110
ret = - EINVAL ;
1110
- for (i = 0 ; i < ibmr -> sg_dma_len ; ++ i ) {
1111
- unsigned int dma_len = ib_sg_dma_len (dev , & ibmr -> sg [i ]);
1112
- u64 dma_addr = ib_sg_dma_address (dev , & ibmr -> sg [i ]);
1111
+ for (i = 0 ; i < sg_dma_len ; ++ i ) {
1112
+ unsigned int dma_len = ib_sg_dma_len (dev , & sg [i ]);
1113
+ u64 dma_addr = ib_sg_dma_address (dev , & sg [i ]);
1113
1114
1114
1115
ibmr -> sg_byte_len += dma_len ;
1115
1116
if (dma_addr & ~PAGE_MASK ) {
@@ -1120,7 +1121,7 @@ static int rds_ib_map_scatterlist(struct rds_ib_device *rds_ibdev,
1120
1121
}
1121
1122
1122
1123
if ((dma_addr + dma_len ) & ~PAGE_MASK ) {
1123
- if (i < ibmr -> sg_dma_len - 1 )
1124
+ if (i < sg_dma_len - 1 )
1124
1125
goto out_unmap ;
1125
1126
else
1126
1127
++ ibmr -> dma_npages ;
@@ -1137,31 +1138,38 @@ static int rds_ib_map_scatterlist(struct rds_ib_device *rds_ibdev,
1137
1138
}
1138
1139
1139
1140
page_cnt = 0 ;
1140
- for (i = 0 ; i < ibmr -> sg_dma_len ; ++ i ) {
1141
- unsigned int dma_len = ib_sg_dma_len (dev , & ibmr -> sg [i ]);
1142
- u64 dma_addr = ib_sg_dma_address (dev , & ibmr -> sg [i ]);
1141
+ for (i = 0 ; i < sg_dma_len ; ++ i ) {
1142
+ unsigned int dma_len = ib_sg_dma_len (dev , & sg [i ]);
1143
+ u64 dma_addr = ib_sg_dma_address (dev , & sg [i ]);
1143
1144
1144
1145
for (j = 0 ; j < dma_len ; j += PAGE_SIZE )
1145
1146
ibmr -> page_list -> page_list [page_cnt ++ ] =
1146
1147
(dma_addr & PAGE_MASK ) + j ;
1147
1148
}
1148
1149
1149
1150
ibmr -> dma_npages = page_cnt ;
1150
- return 0 ;
1151
+ return sg_dma_len ;
1151
1152
1152
1153
out_unmap :
1154
+ if (sg_dma_len )
1155
+ ib_dma_unmap_sg (rds_ibdev -> dev , sg , sg_len , DMA_BIDIRECTIONAL );
1153
1156
return ret ;
1154
1157
}
1155
1158
1156
1159
static int rds_ib_rdma_build_fastreg (struct rds_ib_device * rds_ibdev ,
1157
1160
struct rds_ib_mr * ibmr )
1158
1161
{
1159
- struct ib_fast_reg_wr f_wr ;
1160
- struct ib_send_wr * failed_wr ;
1162
+ struct ib_fast_reg_wr fastreg_wr ;
1163
+ struct ib_send_wr inv_wr , * failed_wr , * first_wr = NULL ;
1161
1164
struct ib_qp * qp ;
1162
1165
atomic_t * n_wrs ;
1163
1166
int ret = 0 ;
1164
1167
1168
+ if (ibmr -> fr_state == MR_IS_STALE ) {
1169
+ WARN_ON (true);
1170
+ return - EAGAIN ;
1171
+ }
1172
+
1165
1173
if (ibmr -> ic ) {
1166
1174
n_wrs = & ibmr -> ic -> i_fastreg_wrs ;
1167
1175
qp = ibmr -> ic -> i_cm_id -> qp ;
@@ -1171,38 +1179,48 @@ static int rds_ib_rdma_build_fastreg(struct rds_ib_device *rds_ibdev,
1171
1179
qp = rds_ibdev -> fastreg_qp ;
1172
1180
}
1173
1181
1174
- while (atomic_dec_return ( n_wrs ) <= 0 ) {
1175
- atomic_inc ( n_wrs );
1182
+ while (atomic_sub_return ( 2 , n_wrs ) <= 0 ) {
1183
+ atomic_add ( 2 , n_wrs );
1176
1184
/* Depending on how many times schedule() is called,
1177
1185
* we could replace it with wait_event() in future.
1178
1186
*/
1179
1187
schedule ();
1180
1188
}
1181
1189
1190
+ if (ibmr -> fr_state == MR_IS_VALID ) {
1191
+ memset (& inv_wr , 0 , sizeof (inv_wr ));
1192
+ inv_wr .wr_id = RDS_MR_INV_WR_ID ;
1193
+ inv_wr .opcode = IB_WR_LOCAL_INV ;
1194
+ inv_wr .ex .invalidate_rkey = ibmr -> mr -> rkey ;
1195
+ first_wr = & inv_wr ;
1196
+ } else
1197
+ ibmr -> fr_state = MR_IS_VALID ;
1198
+
1182
1199
ib_update_fast_reg_key (ibmr -> mr , ibmr -> remap_count ++ );
1183
- WARN_ON (ibmr -> fr_state != MR_IS_INVALID );
1184
- ibmr -> fr_state = MR_IS_VALID ;
1185
-
1186
- memset (& f_wr , 0 , sizeof (f_wr ));
1187
- f_wr .wr .wr_id = (u64 )ibmr ;
1188
- f_wr .wr .opcode = IB_WR_FAST_REG_MR ;
1189
- f_wr .length = ibmr -> sg_byte_len ;
1190
- f_wr .rkey = ibmr -> mr -> rkey ;
1191
- f_wr .page_list = ibmr -> page_list ;
1192
- f_wr .page_list_len = ibmr -> dma_npages ;
1193
- f_wr .page_shift = PAGE_SHIFT ;
1194
- f_wr .access_flags = IB_ACCESS_LOCAL_WRITE |
1195
- IB_ACCESS_REMOTE_READ |
1196
- IB_ACCESS_REMOTE_WRITE ;
1197
- f_wr .iova_start = 0 ;
1198
- f_wr .wr .send_flags = IB_SEND_SIGNALED ;
1199
-
1200
- failed_wr = & f_wr .wr ;
1201
- ret = ib_post_send (qp , & f_wr .wr , & failed_wr );
1202
- BUG_ON (failed_wr != & f_wr .wr );
1200
+
1201
+ memset (& fastreg_wr , 0 , sizeof (fastreg_wr ));
1202
+ fastreg_wr .wr .wr_id = (u64 )ibmr ;
1203
+ fastreg_wr .wr .opcode = IB_WR_FAST_REG_MR ;
1204
+ fastreg_wr .length = ibmr -> sg_byte_len ;
1205
+ fastreg_wr .rkey = ibmr -> mr -> rkey ;
1206
+ fastreg_wr .page_list = ibmr -> page_list ;
1207
+ fastreg_wr .page_list_len = ibmr -> dma_npages ;
1208
+ fastreg_wr .page_shift = PAGE_SHIFT ;
1209
+ fastreg_wr .access_flags = IB_ACCESS_LOCAL_WRITE |
1210
+ IB_ACCESS_REMOTE_READ |
1211
+ IB_ACCESS_REMOTE_WRITE ;
1212
+ fastreg_wr .iova_start = 0 ;
1213
+ fastreg_wr .wr .send_flags = IB_SEND_SIGNALED ;
1214
+
1215
+ if (!first_wr )
1216
+ first_wr = & fastreg_wr .wr ;
1217
+ else
1218
+ first_wr -> next = & fastreg_wr .wr ;
1219
+
1220
+ ret = ib_post_send (qp , first_wr , & failed_wr );
1203
1221
if (ret ) {
1204
- atomic_inc ( n_wrs );
1205
- ibmr -> fr_state = MR_IS_INVALID ;
1222
+ atomic_add ( 2 , n_wrs );
1223
+ ibmr -> fr_state = MR_IS_STALE ;
1206
1224
pr_warn_ratelimited ("RDS/IB: %s:%d ib_post_send returned %d\n" ,
1207
1225
__func__ , __LINE__ , ret );
1208
1226
goto out ;
@@ -1225,23 +1243,26 @@ static int rds_ib_map_fastreg_mr(struct rds_ib_device *rds_ibdev,
1225
1243
struct scatterlist * sg , unsigned int sg_len )
1226
1244
{
1227
1245
int ret = 0 ;
1246
+ int sg_dma_len = 0 ;
1228
1247
1229
- /* We want to teardown old ibmr values here and fill it up with
1230
- * new sg values
1231
- */
1232
- rds_ib_teardown_mr (ibmr );
1233
-
1234
- ibmr -> sg = sg ;
1235
- ibmr -> sg_len = sg_len ;
1236
-
1237
- ret = rds_ib_map_scatterlist (rds_ibdev , ibmr );
1238
- if (ret )
1248
+ ret = rds_ib_map_scatterlist (rds_ibdev , ibmr , sg , sg_len );
1249
+ if (ret < 0 )
1239
1250
goto out ;
1251
+ sg_dma_len = ret ;
1240
1252
1241
1253
ret = rds_ib_rdma_build_fastreg (rds_ibdev , ibmr );
1242
1254
if (ret )
1243
1255
goto out ;
1244
1256
1257
+ /* Teardown previous values here since we
1258
+ * finished invalidating the previous key
1259
+ */
1260
+ __rds_ib_teardown_mr (ibmr );
1261
+
1262
+ ibmr -> sg = sg ;
1263
+ ibmr -> sg_len = sg_len ;
1264
+ ibmr -> sg_dma_len = sg_dma_len ;
1265
+
1245
1266
if (ibmr -> pool -> pool_type == RDS_IB_MR_8K_POOL )
1246
1267
rds_ib_stats_inc (s_ib_rdma_mr_8k_used );
1247
1268
else
@@ -1250,56 +1271,21 @@ static int rds_ib_map_fastreg_mr(struct rds_ib_device *rds_ibdev,
1250
1271
return ret ;
1251
1272
1252
1273
out :
1253
- if (ibmr -> sg_dma_len ) {
1254
- ib_dma_unmap_sg (rds_ibdev -> dev , ibmr -> sg , ibmr -> sg_len ,
1255
- DMA_BIDIRECTIONAL );
1256
- ibmr -> sg_dma_len = 0 ;
1257
- }
1258
- ibmr -> sg = NULL ;
1259
- ibmr -> sg_len = 0 ;
1260
- return ret ;
1261
- }
1262
-
1263
- static int rds_ib_fastreg_inv (struct rds_ib_mr * ibmr )
1264
- {
1265
- struct ib_send_wr s_wr , * failed_wr ;
1266
- int ret = 0 ;
1267
-
1268
- down_read (& ibmr -> device -> fastreg_lock );
1269
-
1270
- if (ibmr -> fr_state != MR_IS_VALID )
1271
- goto out ;
1272
-
1273
- ibmr -> fr_state = MR_IS_INVALID ;
1274
-
1275
- memset (& s_wr , 0 , sizeof (s_wr ));
1276
- s_wr .wr_id = (u64 )ibmr ;
1277
- s_wr .opcode = IB_WR_LOCAL_INV ;
1278
- s_wr .ex .invalidate_rkey = ibmr -> mr -> rkey ;
1279
- s_wr .send_flags = IB_SEND_SIGNALED ;
1280
-
1281
- failed_wr = & s_wr ;
1282
- ret = ib_post_send (ibmr -> device -> fastreg_qp , & s_wr , & failed_wr );
1283
- BUG_ON (failed_wr != & s_wr );
1284
- if (ret ) {
1285
- ibmr -> fr_state = MR_IS_STALE ;
1286
- pr_warn_ratelimited ("RDS/IB: %s:%d ib_post_send returned %d\n" ,
1287
- __func__ , __LINE__ , ret );
1288
- goto out ;
1289
- }
1290
-
1291
- wait_for_completion (& ibmr -> wr_comp );
1292
- out :
1293
- up_read (& ibmr -> device -> fastreg_lock );
1274
+ if (sg_dma_len )
1275
+ ib_dma_unmap_sg (rds_ibdev -> dev , sg , sg_len , DMA_BIDIRECTIONAL );
1294
1276
return ret ;
1295
1277
}
1296
1278
1297
1279
void rds_ib_fcq_handler (struct rds_ib_device * rds_ibdev , struct ib_wc * wc )
1298
1280
{
1299
- struct rds_ib_mr * ibmr = (struct rds_ib_mr * )wc -> wr_id ;
1300
- enum rds_ib_fr_state fr_state = ibmr -> fr_state ;
1281
+ struct rds_ib_mr * ibmr ;
1282
+
1283
+ if (wc -> wr_id == RDS_MR_INV_WR_ID )
1284
+ return ;
1285
+ ibmr = (struct rds_ib_mr * )wc -> wr_id ;
1301
1286
1302
1287
WARN_ON (ibmr -> fr_state == MR_IS_STALE );
1288
+ WARN_ON (ibmr -> fr_state == MR_IS_INVALID );
1303
1289
1304
1290
if (wc -> status != IB_WC_SUCCESS ) {
1305
1291
pr_warn ("RDS: IB: MR completion on fastreg qp status %u vendor_err %u\n" ,
@@ -1308,20 +1294,20 @@ void rds_ib_fcq_handler(struct rds_ib_device *rds_ibdev, struct ib_wc *wc)
1308
1294
queue_work (rds_wq , & rds_ibdev -> fastreg_reset_w );
1309
1295
}
1310
1296
1311
- if (fr_state == MR_IS_INVALID ) {
1312
- complete (& ibmr -> wr_comp );
1313
- } else if (fr_state == MR_IS_VALID ) {
1314
- atomic_inc (& rds_ibdev -> fastreg_wrs );
1315
- complete (& ibmr -> wr_comp );
1316
- }
1297
+ atomic_add (2 , & rds_ibdev -> fastreg_wrs );
1298
+ complete (& ibmr -> wr_comp );
1317
1299
}
1318
1300
1319
1301
void rds_ib_mr_cqe_handler (struct rds_ib_connection * ic , struct ib_wc * wc )
1320
1302
{
1321
- struct rds_ib_mr * ibmr = (struct rds_ib_mr * )wc -> wr_id ;
1322
- enum rds_ib_fr_state fr_state = ibmr -> fr_state ;
1303
+ struct rds_ib_mr * ibmr ;
1304
+
1305
+ if (wc -> wr_id == RDS_MR_INV_WR_ID )
1306
+ return ;
1307
+ ibmr = (struct rds_ib_mr * )wc -> wr_id ;
1323
1308
1324
1309
WARN_ON (ibmr -> fr_state == MR_IS_STALE );
1310
+ WARN_ON (ibmr -> fr_state == MR_IS_INVALID );
1325
1311
1326
1312
if (wc -> status != IB_WC_SUCCESS ) {
1327
1313
if (rds_conn_up (ic -> conn )) {
@@ -1333,10 +1319,6 @@ void rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
1333
1319
ibmr -> fr_state = MR_IS_STALE ;
1334
1320
}
1335
1321
1336
- if (fr_state == MR_IS_INVALID ) {
1337
- complete (& ibmr -> wr_comp );
1338
- } else if (fr_state == MR_IS_VALID ) {
1339
- atomic_inc (& ic -> i_fastreg_wrs );
1340
- complete (& ibmr -> wr_comp );
1341
- }
1322
+ atomic_add (2 , & ic -> i_fastreg_wrs );
1323
+ complete (& ibmr -> wr_comp );
1342
1324
}
0 commit comments