Fix issue with unmatched messages in ph5diff (#3719)

This commit is contained in:
jhendersonHDF 2023-10-19 08:14:20 -05:00 committed by GitHub
parent 29c1c02300
commit c4a146efc4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 17 additions and 7 deletions

View File

@ -791,6 +791,19 @@ Bug Fixes since HDF5-1.14.0 release
Tools
-----
- Fixed an issue with unmatched MPI messages in ph5diff
The "manager" MPI rank in ph5diff was unintentionally sending "program end"
messages to its workers twice, leading to an error from MPICH similar to the
following:
Abort(810645519) on node 1 (rank 1 in comm 0): Fatal error in internal_Finalize: Other MPI error, error stack:
internal_Finalize(50)...........: MPI_Finalize failed
MPII_Finalize(394)..............:
MPIR_Comm_delete_internal(1224).: Communicator (handle=44000000) being freed has 1 unmatched message(s)
MPIR_Comm_release_always(1250)..:
MPIR_finalize_builtin_comms(154):
- Fixed an issue in h5repack for variable-length typed datasets
When repacking datasets into a new file, h5repack tries to determine whether

View File

@ -1485,9 +1485,6 @@ diff_match(hid_t file1_id, const char *grp1, trav_info_t *info1, hid_t file2_id,
} /* end else */
} /* end while */
for (i = 1; (int)i < g_nTasks; i++)
MPI_Send(NULL, 0, MPI_BYTE, (int)i, MPI_TAG_END, MPI_COMM_WORLD);
/* Print any final data waiting in our queue */
print_incoming_data();
} /* end if */

View File

@ -127,7 +127,7 @@ ph5diff_worker(int nID)
char filenames[2][MAX_FILENAME];
/* Retrieve filenames */
MPI_Recv(filenames, MAX_FILENAME * 2, MPI_CHAR, 0, MPI_ANY_TAG, MPI_COMM_WORLD, &Status);
MPI_Recv(filenames, MAX_FILENAME * 2, MPI_CHAR, 0, MPI_TAG_PARALLEL, MPI_COMM_WORLD, &Status);
/* disable error reporting */
H5E_BEGIN_TRY
@ -173,7 +173,7 @@ ph5diff_worker(int nID)
/* When get token, send all of our output to the manager task and then return the token */
for (i = 0; i < outBuffOffset; i += PRINT_DATA_MAX_SIZE)
MPI_Send(outBuff + i, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA,
MPI_Send(outBuff + i, PRINT_DATA_MAX_SIZE, MPI_CHAR, 0, MPI_TAG_PRINT_DATA,
MPI_COMM_WORLD);
/* An overflow file exists, so we send it's output to the manager too and then delete it */
@ -188,7 +188,7 @@ ph5diff_worker(int nID)
while ((tmp = getc(overflow_file)) >= 0) {
*(out_data + i++) = (char)tmp;
if (i == PRINT_DATA_MAX_SIZE) {
MPI_Send(out_data, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA,
MPI_Send(out_data, PRINT_DATA_MAX_SIZE, MPI_CHAR, 0, MPI_TAG_PRINT_DATA,
MPI_COMM_WORLD);
i = 0;
memset(out_data, 0, PRINT_DATA_MAX_SIZE);
@ -196,7 +196,7 @@ ph5diff_worker(int nID)
}
if (i > 0)
MPI_Send(out_data, PRINT_DATA_MAX_SIZE, MPI_BYTE, 0, MPI_TAG_PRINT_DATA,
MPI_Send(out_data, PRINT_DATA_MAX_SIZE, MPI_CHAR, 0, MPI_TAG_PRINT_DATA,
MPI_COMM_WORLD);
fclose(overflow_file);