Skip to content

Commit 32277db

Browse files
committed
Add support for async progress in the BTL TCP.
All BTL-only operations (basically all data movements with the exception of the matching operation) can now be handled for the TCP BTL by a progress thread.
1 parent 4b38b6b commit 32277db

File tree

10 files changed

+737
-217
lines changed

10 files changed

+737
-217
lines changed

contrib/build-mca-comps-outside-of-tree/btl_tcp2.c

Lines changed: 74 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@
3232
#include "opal/datatype/opal_convertor.h"
3333
#include "ompi/mca/mpool/base/base.h"
3434
#include "ompi/mca/mpool/mpool.h"
35-
#include "ompi/proc/proc.h"
35+
#include "btl_tcp.h"
36+
#include "btl_tcp_frag.h"
37+
#include "btl_tcp_proc.h"
38+
#include "btl_tcp_endpoint.h"
3639

3740
mca_btl_tcp2_module_t mca_btl_tcp2_module = {
3841
{
@@ -57,9 +60,9 @@ mca_btl_tcp2_module_t mca_btl_tcp2_module = {
5760
mca_btl_tcp2_prepare_dst,
5861
mca_btl_tcp2_send,
5962
NULL, /* send immediate */
60-
mca_btl_tcp2_put,
61-
NULL, /* get */
62-
mca_btl_base_dump,
63+
mca_btl_tcp_put,
64+
NULL, /* get */
65+
mca_btl_tcp_dump,
6366
NULL, /* mpool */
6467
NULL, /* register error */
6568
mca_btl_tcp2_ft_event
@@ -134,7 +137,9 @@ int mca_btl_tcp2_add_procs( struct mca_btl_base_module_t* btl,
134137
/* we increase the count of MPI users of the event library
135138
once per peer, so that we are used until we aren't
136139
connected to a peer */
140+
#if !MCA_BTL_TCP_USES_PROGRESS_THREAD
137141
opal_progress_event_users_increment();
142+
#endif /* !MCA_BTL_TCP_USES_PROGRESS_THREAD */
138143
}
139144

140145
return OMPI_SUCCESS;
@@ -153,7 +158,9 @@ int mca_btl_tcp2_del_procs(struct mca_btl_base_module_t* btl,
153158
opal_list_remove_item(&tcp_btl->tcp_endpoints, (opal_list_item_t*)tcp_endpoint);
154159
OBJ_RELEASE(tcp_endpoint);
155160
}
161+
#if !MCA_BTL_TCP_USES_PROGRESS_THREAD
156162
opal_progress_event_users_decrement();
163+
#endif /* !MCA_BTL_TCP_USES_PROGRESS_THREAD */
157164
}
158165
return OMPI_SUCCESS;
159166
}
@@ -183,7 +190,11 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_alloc(
183190
if( OPAL_UNLIKELY(NULL == frag) ) {
184191
return NULL;
185192
}
186-
193+
194+
#define GB_DEFINED 0
195+
#if GB_DEFINED
196+
opal_output(0, "alloc_frag( size = %lu )\n", size);
197+
#endif /* GB_DEFINED */
187198
frag->segments[0].seg_len = size;
188199
frag->segments[0].seg_addr.pval = frag+1;
189200

@@ -193,7 +204,8 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_alloc(
193204
frag->base.des_dst_cnt = 0;
194205
frag->base.des_flags = flags;
195206
frag->base.order = MCA_BTL_NO_ORDER;
196-
frag->btl = (mca_btl_tcp2_module_t*)btl;
207+
frag->btl = (mca_btl_tcp_module_t*)btl;
208+
frag->endpoint = endpoint;
197209
return (mca_btl_base_descriptor_t*)frag;
198210
}
199211

@@ -296,6 +308,10 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_src(
296308
frag->base.des_flags = flags;
297309
frag->base.order = MCA_BTL_NO_ORDER;
298310
*size = max_data;
311+
#if GB_DEFINED
312+
opal_output(0, "prepare_src( bConverted = %lu, size = %lu\n",
313+
convertor->bConverted, *size);
314+
#endif /* GB_DEFINED */
299315
return &frag->base;
300316
}
301317

@@ -343,6 +359,10 @@ mca_btl_base_descriptor_t* mca_btl_tcp2_prepare_dst(
343359
frag->base.des_dst_cnt = 1;
344360
frag->base.des_flags = flags;
345361
frag->base.order = MCA_BTL_NO_ORDER;
362+
#if GB_DEFINED
363+
opal_output(0, " prepare_dst( bConverted = %lu, size = %lu\n",
364+
convertor->bConverted, *size);
365+
#endif /* GB_DEFINED */
346366
return &frag->base;
347367
}
348368

@@ -384,7 +404,10 @@ int mca_btl_tcp2_send( struct mca_btl_base_module_t* btl,
384404
frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_SEND;
385405
frag->hdr.count = 0;
386406
if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr);
387-
return mca_btl_tcp2_endpoint_send(endpoint,frag);
407+
#if GB_DEFINED
408+
opal_output(0, "frag_send( size = %u )\n", frag->hdr.size );
409+
#endif /* GB_DEFINED */
410+
return mca_btl_tcp_endpoint_send(endpoint,frag);
388411
}
389412

390413

@@ -425,7 +448,10 @@ int mca_btl_tcp2_put( mca_btl_base_module_t* btl,
425448
frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_PUT;
426449
frag->hdr.count = frag->base.des_dst_cnt;
427450
if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr);
428-
return ((i = mca_btl_tcp2_endpoint_send(endpoint,frag)) >= 0 ? OMPI_SUCCESS : i);
451+
#if GB_DEFINED
452+
opal_output(0, "frag_put( size = %u )\n", frag->hdr.size );
453+
#endif /* GB_DEFINED */
454+
return ((i = mca_btl_tcp_endpoint_send(endpoint,frag)) >= 0 ? OMPI_SUCCESS : i);
429455
}
430456

431457

@@ -462,12 +488,16 @@ int mca_btl_tcp2_get(
462488
frag->hdr.type = MCA_BTL_TCP_HDR_TYPE_GET;
463489
frag->hdr.count = frag->base.des_src_cnt;
464490
if (endpoint->endpoint_nbo) MCA_BTL_TCP_HDR_HTON(frag->hdr);
465-
return ((rc = mca_btl_tcp2_endpoint_send(endpoint,frag)) >= 0 ? OMPI_SUCCESS : rc);
491+
#if GB_DEFINED
492+
opal_output(0, "frag_get( size = %u )\n", frag->hdr.size );
493+
#endif /* GB_DEFINED */
494+
return ((rc = mca_btl_tcp_endpoint_send(endpoint,frag)) >= 0 ? OMPI_SUCCESS : rc);
466495
}
467496

468497

469498
/*
470-
* Cleanup/release module resources.
499+
* Cleanup/release module resources. This function should only be called once,
500+
* there is no need to protect it.
471501
*/
472502

473503
int mca_btl_tcp2_finalize(struct mca_btl_base_module_t* btl)
@@ -479,8 +509,42 @@ int mca_btl_tcp2_finalize(struct mca_btl_base_module_t* btl)
479509
item = opal_list_remove_first(&tcp_btl->tcp_endpoints)) {
480510
mca_btl_tcp2_endpoint_t *endpoint = (mca_btl_tcp2_endpoint_t*)item;
481511
OBJ_RELEASE(endpoint);
512+
#if !MCA_BTL_TCP_USES_PROGRESS_THREAD
482513
opal_progress_event_users_decrement();
514+
#endif /* !MCA_BTL_TCP_USES_PROGRESS_THREAD */
483515
}
484516
free(tcp_btl);
485517
return OMPI_SUCCESS;
486518
}
519+
520+
/**
521+
*
522+
*/
523+
void mca_btl_tcp_dump(struct mca_btl_base_module_t* base_btl,
524+
struct mca_btl_base_endpoint_t* endpoint,
525+
int verbose)
526+
{
527+
mca_btl_tcp_module_t* btl = (mca_btl_tcp_module_t*)base_btl;
528+
mca_btl_base_err("%s TCP %p kernel_id %d\n"
529+
#if MCA_BTL_TCP_STATISTICS
530+
" | statistics: sent %lu recv %lu\n"
531+
#endif /* MCA_BTL_TCP_STATISTICS */
532+
" | latency %u bandwidth %u\n",
533+
ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), (void*)btl, btl->tcp_ifkindex,
534+
#if MCA_BTL_TCP_STATISTICS
535+
btl->tcp_bytes_sent, btl->btl_bytes_recv,
536+
#endif /* MCA_BTL_TCP_STATISTICS */
537+
btl->super.btl_latency, btl->super.btl_bandwidth);
538+
if( NULL != endpoint ) {
539+
mca_btl_tcp_endpoint_dump( endpoint, "TCP" );
540+
} else if( verbose ) {
541+
opal_list_item_t *item;
542+
543+
for(item = opal_list_get_first(&btl->tcp_endpoints);
544+
item != opal_list_get_end(&btl->tcp_endpoints);
545+
item = opal_list_get_next(item)) {
546+
mca_btl_tcp_endpoint_dump( (mca_btl_base_endpoint_t*)item, "TCP" );
547+
}
548+
}
549+
}
550+

0 commit comments

Comments
 (0)