Skip to content

Commit 7cd3aba

Browse files
committed
Optimize the pack/unpack of contiguous with gaps datatype.
Signed-off-by: George Bosilca <[email protected]>
1 parent 31a20f5 commit 7cd3aba

File tree

2 files changed

+48
-68
lines changed

2 files changed

+48
-68
lines changed

opal/datatype/opal_datatype_pack.c

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -129,22 +129,20 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
129129
}
130130
/* We can provide directly the pointers in the user buffers (like the convertor_raw) */
131131
if( NULL == iov[0].iov_base ) {
132-
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
132+
user_memory = pConv->pBaseBuf + pData->true_lb;
133+
133134
for( idx = 0; (idx < (*out_size)) && stack[0].count; idx++ ) {
134-
iov[idx].iov_base = user_memory;
135+
iov[idx].iov_base = user_memory + stack[0].disp + stack[1].disp;
135136
iov[idx].iov_len = stack[1].count;
136137
COMPUTE_CSUM( iov[idx].iov_base, iov[idx].iov_len, pConv );
137138

138-
user_memory += extent;
139139
pConv->bConverted += stack[1].count;
140140

141141
stack[0].disp += extent;
142142
stack[0].count--;
143-
stack[1].disp = 0;
143+
stack[1].disp = 0;
144144
stack[1].count = pData->size; /* we might need this to update the partial
145145
* length for the first iteration */
146-
147-
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp;
148146
}
149147
goto update_status_and_return;
150148
}

opal/datatype/opal_datatype_unpack.c

Lines changed: 44 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -70,97 +70,79 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
7070
{
7171
const opal_datatype_t *pData = pConv->pDesc;
7272
unsigned char *user_memory, *packed_buffer;
73-
uint32_t iov_count, i;
74-
size_t remaining, length, initial_bytes_converted = pConv->bConverted;
73+
uint32_t iov_idx, i;
74+
size_t remaining, initial_bytes_converted = pConv->bConverted;
7575
dt_stack_t* stack = pConv->pStack;
7676
ptrdiff_t extent = pData->ub - pData->lb;
7777

78-
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n",
78+
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov count %d )\n",
7979
(void*)pConv->pBaseBuf, *out_size ); );
8080
if( stack[1].type != opal_datatype_uint1.id ) {
8181
stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
8282
stack[1].type = opal_datatype_uint1.id;
8383
}
8484

85-
for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
86-
remaining = pConv->local_size - pConv->bConverted;
87-
if( 0 == remaining ) break; /* we're done this time */
88-
if( remaining > iov[iov_count].iov_len )
89-
remaining = iov[iov_count].iov_len;
90-
packed_buffer = (unsigned char*)iov[iov_count].iov_base;
91-
pConv->bConverted += remaining; /* how much will get unpacked this time */
92-
user_memory = pConv->pBaseBuf + pData->true_lb;
93-
94-
if( (ptrdiff_t)pData->size == extent ) {
95-
user_memory += pConv->bConverted;
96-
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %" PRIsize_t "\n",
97-
(void*)user_memory, (void*)packed_buffer, remaining ); );
85+
if( (ptrdiff_t)pData->size == extent ) {
86+
for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) {
87+
remaining = pConv->local_size - pConv->bConverted;
88+
if( 0 == remaining ) break; /* we're done this time */
89+
if( remaining > iov[iov_idx].iov_len )
90+
remaining = iov[iov_idx].iov_len;
91+
92+
packed_buffer = (unsigned char*)iov[iov_idx].iov_base;
93+
user_memory = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;
9894

9995
/* contiguous data or basic datatype with count */
10096
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining,
10197
pConv->pBaseBuf, pData, pConv->count );
102-
DO_DEBUG( opal_output( 0, "1. unpack contig dest %p src %p length %" PRIsize_t "\n",
103-
(void*)user_memory, (void*)packed_buffer, remaining ); );
98+
DO_DEBUG( opal_output( 0, "unpack contig [%d] dest %p src %p length %" PRIsize_t "\n",
99+
iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); );
104100
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
105-
} else {
106-
user_memory += stack[0].disp + stack[1].disp;
101+
pConv->bConverted += remaining; /* how much will get unpacked this time */
102+
}
103+
} else {
104+
for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) {
105+
remaining = pConv->local_size - pConv->bConverted;
106+
if( 0 == remaining ) break; /* we're done this time */
107+
if( remaining > iov[iov_idx].iov_len )
108+
remaining = iov[iov_idx].iov_len;
109+
110+
packed_buffer = (unsigned char*)iov[iov_idx].iov_base;
111+
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
112+
pConv->bConverted += remaining; /* how much will get unpacked this time */
113+
114+
for( i = 0; stack[1].count <= remaining; i++ ) { /* partial or full data */
115+
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, stack[1].count, pConv->pBaseBuf,
116+
pData, pConv->count );
117+
DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [%d]\n",
118+
iov_idx, (void*)user_memory, (void*)packed_buffer, stack[1].count, i ); );
119+
MEMCPY_CSUM( user_memory, packed_buffer, stack[1].count, pConv );
107120

108-
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %" PRIsize_t "\n",
109-
(void*)user_memory, (void*)packed_buffer, remaining ); );
121+
packed_buffer += stack[1].count;
122+
remaining -= stack[1].count;
110123

111-
length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last unpack */
112-
/* complete the last copy */
113-
if( (pData->size != length) && (length <= remaining) ) {
114-
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
115-
pData, pConv->count );
116-
DO_DEBUG( opal_output( 0, "2. unpack dest %p src %p length %" PRIsize_t "\n",
117-
(void*)user_memory, (void*)packed_buffer, length ); );
118-
MEMCPY_CSUM( user_memory, packed_buffer, length, pConv );
119-
packed_buffer += length;
120-
remaining -= length;
121-
stack[1].count -= length;
122-
stack[1].disp += length; /* just in case, we overwrite this below */
123-
if( 0 == stack[1].count) { /* one completed element */
124-
stack[0].count--;
125-
stack[0].disp += extent;
126-
if( 0 == stack[0].count )
127-
break;
128-
stack[1].count = pData->size;
129-
stack[1].disp = 0;
130-
}
131-
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
132-
}
124+
stack[0].count--;
125+
stack[0].disp += extent;
126+
stack[1].count = pData->size;
127+
stack[1].disp = 0;
133128

134-
for( i = 0; pData->size <= remaining; i++ ) {
135-
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf,
136-
pData, pConv->count );
137-
DO_DEBUG( opal_output( 0, "3. unpack dest %p src %p length %" PRIsize_t "\n",
138-
(void*)user_memory, (void*)packed_buffer, pData->size ); );
139-
MEMCPY_CSUM( user_memory, packed_buffer, pData->size, pConv );
140-
packed_buffer += pData->size;
141-
user_memory += extent;
142-
remaining -= pData->size;
129+
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp;
143130
}
144-
stack[0].count -= i;
145-
stack[0].disp += (i * extent);
146131

147132
/* Copy the last bits */
148133
if( 0 != remaining ) {
149134
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
150135
pData, pConv->count );
151-
DO_DEBUG( opal_output( 0, "4. unpack dest %p src %p length %" PRIsize_t "\n",
152-
(void*)user_memory, (void*)packed_buffer, remaining ); );
136+
DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [epilog]\n",
137+
iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); );
153138
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
154139
stack[1].count -= remaining;
155140
stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */
156-
if( 0 == stack[1].count ) { /* prepare for the next element */
157-
stack[1].count = pData->size;
158-
stack[1].disp = 0;
159-
}
141+
assert( stack[1].count );
160142
}
161143
}
162144
}
163-
*out_size = iov_count; /* we only reach this line after the for loop succesfully complete */
145+
*out_size = iov_idx; /* we only reach this line after the for loop succesfully complete */
164146
*max_data = pConv->bConverted - initial_bytes_converted;
165147
if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED;
166148
return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */

0 commit comments

Comments
 (0)