mirror of
https://github.com/HDFGroup/hdf5.git
synced 2025-01-30 15:32:37 +08:00
Merge pull request #1523 in HDFFV/hdf5 from ~JHENDERSON/hdf5:develop to develop
* commit '8f447bd3564a88738b9a26b1b0ac3bba785efd39': Add RELEASE.txt note for collective metadata read fixes Fix some collective metadata read issues
This commit is contained in:
commit
8f42566f7a
@ -264,6 +264,22 @@ Bug Fixes since HDF5-1.10.3 release
|
||||
|
||||
Library
|
||||
-------
|
||||
- Fix hangs with collective metadata reads during chunked dataset I/O
|
||||
|
||||
In the parallel library, it was discovered that when a particular
|
||||
sequence of operations following a pattern of:
|
||||
|
||||
"write to chunked dataset" -> "flush file" -> "read from dataset"
|
||||
|
||||
occurred with collective metadata reads enabled, hangs could be
|
||||
observed due to certain MPI ranks not participating in the collective
|
||||
metadata reads.
|
||||
|
||||
To fix the issue, collective metadata reads are now disabled during
|
||||
chunked dataset raw data I/O.
|
||||
|
||||
(JTH - 2019/02/11, HDFFV-10563, HDFFV-10688)
|
||||
|
||||
- Performance issue when closing an object
|
||||
|
||||
The slow down is due to the search of the "tag_list" to find
|
||||
|
55
src/H5C.c
55
src/H5C.c
@ -1483,31 +1483,17 @@ H5C_insert_entry(H5F_t * f,
|
||||
H5C__UPDATE_STATS_FOR_INSERTION(cache_ptr, entry_ptr)
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
|
||||
coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE);
|
||||
|
||||
/* If not explicitly disabled, get the cmdr setting from the API context */
|
||||
if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read)
|
||||
coll_access = H5CX_get_coll_metadata_read();
|
||||
} /* end if */
|
||||
if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI))
|
||||
coll_access = H5CX_get_coll_metadata_read();
|
||||
|
||||
entry_ptr->coll_access = coll_access;
|
||||
if(coll_access) {
|
||||
H5C__INSERT_IN_COLL_LIST(cache_ptr, entry_ptr, FAIL)
|
||||
|
||||
/* Make sure the size of the collective entries in the cache remain in check */
|
||||
if(H5P_USER_TRUE == f->coll_md_read) {
|
||||
if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100) {
|
||||
if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries")
|
||||
} /* end if */
|
||||
} /* end if */
|
||||
else {
|
||||
if(cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100) {
|
||||
if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries")
|
||||
} /* end if */
|
||||
} /* end else */
|
||||
if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100)
|
||||
if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL, "can't clear collective metadata entries")
|
||||
} /* end if */
|
||||
#endif
|
||||
|
||||
@ -2243,13 +2229,8 @@ H5C_protect(H5F_t * f,
|
||||
ring = H5CX_get_ring();
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
|
||||
coll_access = (H5P_USER_TRUE == f->coll_md_read ? TRUE : FALSE);
|
||||
|
||||
/* If not explicitly disabled, get the cmdr setting from the API context */
|
||||
if(!coll_access && H5P_FORCE_FALSE != f->coll_md_read)
|
||||
coll_access = H5CX_get_coll_metadata_read();
|
||||
} /* end if */
|
||||
if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI))
|
||||
coll_access = H5CX_get_coll_metadata_read();
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
|
||||
/* first check to see if the target is in cache */
|
||||
@ -2286,7 +2267,7 @@ H5C_protect(H5F_t * f,
|
||||
the entry in their cache still have to participate in the
|
||||
bcast. */
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI) && coll_access) {
|
||||
if(coll_access) {
|
||||
if(!(entry_ptr->is_dirty) && !(entry_ptr->coll_access)) {
|
||||
MPI_Comm comm; /* File MPI Communicator */
|
||||
int mpi_code; /* MPI error code */
|
||||
@ -2601,21 +2582,11 @@ H5C_protect(H5F_t * f,
|
||||
} /* end if */
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
if(H5F_HAS_FEATURE(f, H5FD_FEAT_HAS_MPI)) {
|
||||
/* Make sure the size of the collective entries in the cache remain in check */
|
||||
if(coll_access) {
|
||||
if(H5P_USER_TRUE == f->coll_md_read) {
|
||||
if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100)
|
||||
if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries")
|
||||
} /* end if */
|
||||
else {
|
||||
if(cache_ptr->max_cache_size * 40 < cache_ptr->coll_list_size * 100)
|
||||
if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries")
|
||||
} /* end else */
|
||||
} /* end if */
|
||||
} /* end if */
|
||||
/* Make sure the size of the collective entries in the cache remain in check */
|
||||
if(coll_access)
|
||||
if(cache_ptr->max_cache_size * 80 < cache_ptr->coll_list_size * 100)
|
||||
if(H5C_clear_coll_entries(cache_ptr, TRUE) < 0)
|
||||
HGOTO_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL, "can't clear collective metadata entries")
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
|
||||
done:
|
||||
|
@ -815,8 +815,8 @@ H5CX_set_apl(hid_t *acspl_id, const H5P_libclass_t *libclass,
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
/* If this routine is not guaranteed to be collective (i.e. it doesn't
|
||||
* modify the structural metadata in a file), check if we should use
|
||||
* a collective metadata read.
|
||||
* modify the structural metadata in a file), check if the application
|
||||
* specified a collective metadata read for just this operation.
|
||||
*/
|
||||
if(!is_collective) {
|
||||
H5P_genplist_t *plist; /* Property list pointer */
|
||||
|
@ -1065,9 +1065,9 @@ H5C__read_cache_image(H5F_t *f, H5C_t *cache_ptr)
|
||||
H5C__UPDATE_STATS_FOR_CACHE_IMAGE_READ(cache_ptr)
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
if ( aux_ptr ) {
|
||||
if ( aux_ptr ) {
|
||||
|
||||
/* Broadcast cache image */
|
||||
/* Broadcast cache image */
|
||||
if ( MPI_SUCCESS !=
|
||||
(mpi_result = MPI_Bcast(cache_ptr->image_buffer,
|
||||
(int)cache_ptr->image_len, MPI_BYTE,
|
||||
|
@ -2914,9 +2914,6 @@ H5D__chunk_lookup(const H5D_t *dset, const hsize_t *scaled,
|
||||
/* Check for cached information */
|
||||
if(!H5D__chunk_cinfo_cache_found(&dset->shared->cache.chunk.last, udata)) {
|
||||
H5D_chk_idx_info_t idx_info; /* Chunked index info */
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
H5P_coll_md_read_flag_t temp_cmr; /* Temp value to hold the coll metadata read setting */
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
|
||||
/* Compose chunked index info struct */
|
||||
idx_info.f = dset->oloc.file;
|
||||
@ -2925,25 +2922,18 @@ H5D__chunk_lookup(const H5D_t *dset, const hsize_t *scaled,
|
||||
idx_info.storage = &dset->shared->layout.storage.u.chunk;
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI)) {
|
||||
/* disable collective metadata read for chunk indexes
|
||||
as it is highly unlikely that users would read the
|
||||
same chunks from all processes. MSC - might turn on
|
||||
for root node? */
|
||||
temp_cmr = H5F_COLL_MD_READ(idx_info.f);
|
||||
H5F_set_coll_md_read(idx_info.f, H5P_FORCE_FALSE);
|
||||
} /* end if */
|
||||
/* Disable collective metadata read for chunk indexes as it is
|
||||
* highly unlikely that users would read the same chunks from all
|
||||
* processes.
|
||||
*/
|
||||
if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI))
|
||||
H5CX_set_coll_metadata_read(FALSE);
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
|
||||
/* Go get the chunk information */
|
||||
if((dset->shared->layout.storage.u.chunk.ops->get_addr)(&idx_info, udata) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't query chunk address")
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
if(H5F_HAS_FEATURE(idx_info.f, H5FD_FEAT_HAS_MPI))
|
||||
H5F_set_coll_md_read(idx_info.f, temp_cmr);
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
|
||||
/*
|
||||
* Cache the information retrieved.
|
||||
*
|
||||
|
@ -800,6 +800,10 @@ H5D__chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *type_inf
|
||||
HDassert(type_info);
|
||||
HDassert(fm);
|
||||
|
||||
/* Disable collective metadata reads for chunked dataset I/O operations
|
||||
* in order to prevent potential hangs */
|
||||
H5CX_set_coll_metadata_read(FALSE);
|
||||
|
||||
/* Check the optional property list for the collective chunk IO optimization option */
|
||||
if(H5CX_get_mpio_chunk_opt_mode(&chunk_opt_mode) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "couldn't get chunk optimization option")
|
||||
@ -2313,7 +2317,7 @@ if(H5DEBUG(D))
|
||||
|
||||
/* Broadcasting the MPI_IO option info. and chunk address info. */
|
||||
if(MPI_SUCCESS != (mpi_code = MPI_Bcast(total_chunk_addr_array, (int)(sizeof(haddr_t) * fm->layout->u.chunk.nchunks), MPI_BYTE, (int)0, io_info->comm)))
|
||||
HMPI_GOTO_ERROR(FAIL, "MPI_BCast failed", mpi_code)
|
||||
HMPI_GOTO_ERROR(FAIL, "MPI_BCast failed", mpi_code)
|
||||
} /* end if */
|
||||
|
||||
/* Start at first node in chunk skip list */
|
||||
|
@ -611,7 +611,7 @@ H5Z__flush_file_cb(void *obj_ptr, hid_t H5_ATTR_UNUSED obj_id, void *key)
|
||||
/* Sanity check for collectively calling H5Zunregister, if requested */
|
||||
/* (Sanity check assumes that a barrier on one file's comm
|
||||
* is sufficient (i.e. that there aren't different comms for
|
||||
* different files). -QAK, 2018/02/14
|
||||
* different files). -QAK, 2018/02/14)
|
||||
*/
|
||||
if(H5_coll_api_sanity_check_g && !object->sanity_checked) {
|
||||
MPI_Comm mpi_comm; /* File's communicator */
|
||||
|
@ -7225,7 +7225,7 @@ smoke_check_6(int metadata_write_strategy)
|
||||
/* some error occured in the server -- report failure */
|
||||
nerrors++;
|
||||
if ( verbose ) {
|
||||
HDfprintf(stdout, "%d:%s: server_main() failed.\n",
|
||||
HDfprintf(stdout, "%d:%s: server_main() failed.\n",
|
||||
world_mpi_rank, FUNC);
|
||||
}
|
||||
}
|
||||
@ -7241,7 +7241,7 @@ smoke_check_6(int metadata_write_strategy)
|
||||
fid = -1;
|
||||
cache_ptr = NULL;
|
||||
if ( verbose ) {
|
||||
HDfprintf(stdout, "%d:%s: setup_cache_for_test() failed.\n",
|
||||
HDfprintf(stdout, "%d:%s: setup_cache_for_test() failed.\n",
|
||||
world_mpi_rank, FUNC);
|
||||
}
|
||||
}
|
||||
@ -7250,7 +7250,7 @@ smoke_check_6(int metadata_write_strategy)
|
||||
virt_num_data_entries = NUM_DATA_ENTRIES;
|
||||
|
||||
/* insert the first half collectively */
|
||||
file_ptr->coll_md_read = H5P_USER_TRUE;
|
||||
H5CX_set_coll_metadata_read(TRUE);
|
||||
for ( i = 0; i < virt_num_data_entries/2; i++ )
|
||||
{
|
||||
struct datum * entry_ptr;
|
||||
@ -7271,7 +7271,7 @@ smoke_check_6(int metadata_write_strategy)
|
||||
}
|
||||
|
||||
/* insert the other half independently */
|
||||
file_ptr->coll_md_read = H5P_USER_FALSE;
|
||||
H5CX_set_coll_metadata_read(FALSE);
|
||||
for ( i = virt_num_data_entries/2; i < virt_num_data_entries; i++ )
|
||||
{
|
||||
struct datum * entry_ptr;
|
||||
@ -7291,7 +7291,7 @@ smoke_check_6(int metadata_write_strategy)
|
||||
HDassert(cache_ptr->max_cache_size*0.8 > cache_ptr->coll_list_size);
|
||||
}
|
||||
|
||||
/* flush the file */
|
||||
/* flush the file */
|
||||
if ( H5Fflush(fid, H5F_SCOPE_GLOBAL) < 0 ) {
|
||||
nerrors++;
|
||||
if ( verbose ) {
|
||||
@ -7301,7 +7301,7 @@ smoke_check_6(int metadata_write_strategy)
|
||||
}
|
||||
|
||||
/* Protect the first half of the entries collectively */
|
||||
file_ptr->coll_md_read = H5P_USER_TRUE;
|
||||
H5CX_set_coll_metadata_read(TRUE);
|
||||
for ( i = 0; i < (virt_num_data_entries / 2); i++ )
|
||||
{
|
||||
struct datum * entry_ptr;
|
||||
@ -7322,13 +7322,13 @@ smoke_check_6(int metadata_write_strategy)
|
||||
}
|
||||
|
||||
/* protect the other half independently */
|
||||
file_ptr->coll_md_read = H5P_USER_FALSE;
|
||||
H5CX_set_coll_metadata_read(FALSE);
|
||||
for ( i = virt_num_data_entries/2; i < virt_num_data_entries; i++ )
|
||||
{
|
||||
struct datum * entry_ptr;
|
||||
entry_ptr = &(data[i]);
|
||||
|
||||
lock_entry(file_ptr, i);
|
||||
lock_entry(file_ptr, i);
|
||||
|
||||
if(FALSE != entry_ptr->header.coll_access) {
|
||||
nerrors++;
|
||||
|
@ -32,6 +32,14 @@
|
||||
#define PARTIAL_NO_SELECTION_Y_DIM_SCALE 5
|
||||
#define PARTIAL_NO_SELECTION_X_DIM_SCALE 5
|
||||
|
||||
#define MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS 2
|
||||
|
||||
#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_NO_SEL_PROCESS (mpi_rank == mpi_size - 1)
|
||||
#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DATASET_NAME "linked_chunk_io_sort_chunk_issue"
|
||||
#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_Y_DIM_SCALE 20000
|
||||
#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE 1
|
||||
#define LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS 1
|
||||
|
||||
/*
|
||||
* A test for issue HDFFV-10501. A parallel hang was reported which occurred
|
||||
* in linked-chunk I/O when collective metadata reads are enabled and some ranks
|
||||
@ -57,13 +65,13 @@ void test_partial_no_selection_coll_md_read(void)
|
||||
hsize_t stride[PARTIAL_NO_SELECTION_DATASET_NDIMS];
|
||||
hsize_t count[PARTIAL_NO_SELECTION_DATASET_NDIMS];
|
||||
hsize_t block[PARTIAL_NO_SELECTION_DATASET_NDIMS];
|
||||
hid_t file_id = -1;
|
||||
hid_t fapl_id = -1;
|
||||
hid_t dset_id = -1;
|
||||
hid_t dcpl_id = -1;
|
||||
hid_t dxpl_id = -1;
|
||||
hid_t fspace_id = -1;
|
||||
hid_t mspace_id = -1;
|
||||
hid_t file_id = H5I_INVALID_HID;
|
||||
hid_t fapl_id = H5I_INVALID_HID;
|
||||
hid_t dset_id = H5I_INVALID_HID;
|
||||
hid_t dcpl_id = H5I_INVALID_HID;
|
||||
hid_t dxpl_id = H5I_INVALID_HID;
|
||||
hid_t fspace_id = H5I_INVALID_HID;
|
||||
hid_t mspace_id = H5I_INVALID_HID;
|
||||
int mpi_rank, mpi_size;
|
||||
void *data = NULL;
|
||||
void *read_buf = NULL;
|
||||
@ -86,7 +94,7 @@ void test_partial_no_selection_coll_md_read(void)
|
||||
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
|
||||
VRFY((file_id >= 0), "H5Fcreate succeeded");
|
||||
|
||||
dataset_dims = malloc(PARTIAL_NO_SELECTION_DATASET_NDIMS * sizeof(*dataset_dims));
|
||||
dataset_dims = HDmalloc(PARTIAL_NO_SELECTION_DATASET_NDIMS * sizeof(*dataset_dims));
|
||||
VRFY((dataset_dims != NULL), "malloc succeeded");
|
||||
|
||||
dataset_dims[0] = PARTIAL_NO_SELECTION_Y_DIM_SCALE * mpi_size;
|
||||
@ -129,7 +137,7 @@ void test_partial_no_selection_coll_md_read(void)
|
||||
mspace_id = H5Screate_simple(1, sel_dims, NULL);
|
||||
VRFY((mspace_id >= 0), "H5Screate_simple succeeded");
|
||||
|
||||
data = calloc(1, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
|
||||
data = HDcalloc(1, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
|
||||
VRFY((data != NULL), "calloc succeeded");
|
||||
|
||||
dxpl_id = H5Pcreate(H5P_DATASET_XFER);
|
||||
@ -151,7 +159,7 @@ void test_partial_no_selection_coll_md_read(void)
|
||||
*/
|
||||
VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, H5FD_MPIO_CHUNK_ONE_IO) >= 0), "H5Pset_dxpl_mpio_chunk_opt succeeded");
|
||||
|
||||
read_buf = malloc(count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
|
||||
read_buf = HDmalloc(count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
|
||||
VRFY((read_buf != NULL), "malloc succeeded");
|
||||
|
||||
/*
|
||||
@ -171,21 +179,321 @@ void test_partial_no_selection_coll_md_read(void)
|
||||
* Check data integrity just to be sure.
|
||||
*/
|
||||
if (!PARTIAL_NO_SELECTION_NO_SEL_PROCESS) {
|
||||
VRFY((!memcmp(data, read_buf, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int))), "memcmp succeeded");
|
||||
VRFY((!HDmemcmp(data, read_buf, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int))), "memcmp succeeded");
|
||||
}
|
||||
|
||||
if (dataset_dims) {
|
||||
free(dataset_dims);
|
||||
HDfree(dataset_dims);
|
||||
dataset_dims = NULL;
|
||||
}
|
||||
|
||||
if (data) {
|
||||
free(data);
|
||||
HDfree(data);
|
||||
data = NULL;
|
||||
}
|
||||
|
||||
if (read_buf) {
|
||||
free(read_buf);
|
||||
HDfree(read_buf);
|
||||
read_buf = NULL;
|
||||
}
|
||||
|
||||
VRFY((H5Sclose(fspace_id) >= 0), "H5Sclose succeeded");
|
||||
VRFY((H5Sclose(mspace_id) >= 0), "H5Sclose succeeded");
|
||||
VRFY((H5Pclose(dcpl_id) >= 0), "H5Pclose succeeded");
|
||||
VRFY((H5Pclose(dxpl_id) >= 0), "H5Pclose succeeded");
|
||||
VRFY((H5Dclose(dset_id) >= 0), "H5Dclose succeeded");
|
||||
VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded");
|
||||
VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
|
||||
}
|
||||
|
||||
/*
|
||||
* A test for HDFFV-10562 which attempts to verify that using multi-chunk
|
||||
* I/O with collective metadata reads enabled doesn't causes issues due to
|
||||
* collective metadata reads being made only by process 0 in H5D__chunk_addrmap().
|
||||
*
|
||||
* Failure in this test may either cause a hang, or, due to how the MPI calls
|
||||
* pertaining to this issue might mistakenly match up, may cause an MPI error
|
||||
* message similar to:
|
||||
*
|
||||
* #008: H5Dmpio.c line 2546 in H5D__obtain_mpio_mode(): MPI_BCast failed
|
||||
* major: Internal error (too specific to document in detail)
|
||||
* minor: Some MPI function failed
|
||||
* #009: H5Dmpio.c line 2546 in H5D__obtain_mpio_mode(): Message truncated, error stack:
|
||||
*PMPI_Bcast(1600)..................: MPI_Bcast(buf=0x1df98e0, count=18, MPI_BYTE, root=0, comm=0x84000006) failed
|
||||
*MPIR_Bcast_impl(1452).............:
|
||||
*MPIR_Bcast(1476)..................:
|
||||
*MPIR_Bcast_intra(1249)............:
|
||||
*MPIR_SMP_Bcast(1088)..............:
|
||||
*MPIR_Bcast_binomial(239)..........:
|
||||
*MPIDI_CH3U_Receive_data_found(131): Message from rank 0 and tag 2 truncated; 2616 bytes received but buffer size is 18
|
||||
* major: Internal error (too specific to document in detail)
|
||||
* minor: MPI Error String
|
||||
*
|
||||
*/
|
||||
void test_multi_chunk_io_addrmap_issue(void)
|
||||
{
|
||||
const char *filename;
|
||||
hsize_t start[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS];
|
||||
hsize_t stride[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS];
|
||||
hsize_t count[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS];
|
||||
hsize_t block[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS];
|
||||
hsize_t dims[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS] = {10, 5};
|
||||
hsize_t chunk_dims[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS] = {5, 5};
|
||||
hsize_t max_dims[MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS] = {H5S_UNLIMITED, H5S_UNLIMITED};
|
||||
hid_t file_id = H5I_INVALID_HID;
|
||||
hid_t fapl_id = H5I_INVALID_HID;
|
||||
hid_t dset_id = H5I_INVALID_HID;
|
||||
hid_t dcpl_id = H5I_INVALID_HID;
|
||||
hid_t dxpl_id = H5I_INVALID_HID;
|
||||
hid_t space_id = H5I_INVALID_HID;
|
||||
void *read_buf = NULL;
|
||||
int mpi_rank;
|
||||
int data[5][5] = { {0, 1, 2, 3, 4},
|
||||
{0, 1, 2, 3, 4},
|
||||
{0, 1, 2, 3, 4},
|
||||
{0, 1, 2, 3, 4},
|
||||
{0, 1, 2, 3, 4} };
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
|
||||
|
||||
filename = GetTestParameters();
|
||||
|
||||
fapl_id = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, facc_type);
|
||||
VRFY((fapl_id >= 0), "create_faccess_plist succeeded");
|
||||
|
||||
/*
|
||||
* Even though the testphdf5 framework currently sets collective metadata reads
|
||||
* on the FAPL, we call it here just to be sure this is futureproof, since
|
||||
* demonstrating this issue relies upon it.
|
||||
*/
|
||||
VRFY((H5Pset_all_coll_metadata_ops(fapl_id, true) >= 0), "Set collective metadata reads succeeded");
|
||||
|
||||
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
|
||||
VRFY((file_id >= 0), "H5Fcreate succeeded");
|
||||
|
||||
space_id = H5Screate_simple(MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS, dims, max_dims);
|
||||
VRFY((space_id >= 0), "H5Screate_simple succeeded");
|
||||
|
||||
dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
|
||||
VRFY((dcpl_id >= 0), "H5Pcreate succeeded");
|
||||
|
||||
VRFY((H5Pset_chunk(dcpl_id, MULTI_CHUNK_IO_ADDRMAP_ISSUE_DIMS, chunk_dims) >= 0), "H5Pset_chunk succeeded");
|
||||
|
||||
dset_id = H5Dcreate2(file_id, "dset", H5T_NATIVE_INT, space_id, H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
|
||||
VRFY((dset_id >= 0), "H5Dcreate2 succeeded");
|
||||
|
||||
dxpl_id = H5Pcreate(H5P_DATASET_XFER);
|
||||
VRFY((dxpl_id >= 0), "H5Pcreate succeeded");
|
||||
|
||||
VRFY((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE) >= 0), "H5Pset_dxpl_mpio succeeded");
|
||||
VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, H5FD_MPIO_CHUNK_MULTI_IO) >= 0), "H5Pset_dxpl_mpio_chunk_opt succeeded");
|
||||
|
||||
start[1] = 0;
|
||||
stride[0] = stride[1] = 1;
|
||||
count[0] = count[1] = 5;
|
||||
block[0] = block[1] = 1;
|
||||
|
||||
if (mpi_rank == 0)
|
||||
start[0] = 0;
|
||||
else
|
||||
start[0] = 5;
|
||||
|
||||
VRFY((H5Sselect_hyperslab(space_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded");
|
||||
if (mpi_rank != 0)
|
||||
VRFY((H5Sselect_none(space_id) >= 0), "H5Sselect_none succeeded");
|
||||
|
||||
VRFY((H5Dwrite(dset_id, H5T_NATIVE_INT, H5S_ALL, space_id, dxpl_id, data) >= 0), "H5Dwrite succeeded");
|
||||
|
||||
VRFY((H5Fflush(file_id, H5F_SCOPE_GLOBAL) >= 0), "H5Fflush succeeded");
|
||||
|
||||
read_buf = HDmalloc(50 * sizeof(int));
|
||||
VRFY((read_buf != NULL), "malloc succeeded");
|
||||
|
||||
VRFY((H5Dread(dset_id, H5T_NATIVE_INT, H5S_ALL, H5S_ALL, dxpl_id, read_buf) >= 0), "H5Dread succeeded");
|
||||
|
||||
if (read_buf) {
|
||||
HDfree(read_buf);
|
||||
read_buf = NULL;
|
||||
}
|
||||
|
||||
VRFY((H5Sclose(space_id) >= 0), "H5Sclose succeeded");
|
||||
VRFY((H5Pclose(dcpl_id) >= 0), "H5Pclose succeeded");
|
||||
VRFY((H5Pclose(dxpl_id) >= 0), "H5Pclose succeeded");
|
||||
VRFY((H5Dclose(dset_id) >= 0), "H5Dclose succeeded");
|
||||
VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded");
|
||||
VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
|
||||
}
|
||||
|
||||
/*
|
||||
* A test for HDFFV-10562 which attempts to verify that using linked-chunk
|
||||
* I/O with collective metadata reads enabled doesn't cause issues due to
|
||||
* collective metadata reads being made only by process 0 in H5D__sort_chunk().
|
||||
*
|
||||
* NOTE: Due to the way that the threshold value which pertains to this test
|
||||
* is currently calculated within HDF5, there are several conditions that this
|
||||
* test must maintain. Refer to the function H5D__sort_chunk in H5Dmpio.c for
|
||||
* a better idea of why.
|
||||
*
|
||||
* Condition 1: We need to make sure that the test always selects every single
|
||||
* chunk in the dataset. It is fine if the selection is split up among multiple
|
||||
* ranks, but their combined selection must cover the whole dataset.
|
||||
*
|
||||
* Condition 2: The number of chunks in the dataset divided by the number of MPI
|
||||
* ranks must exceed or equal 10000. In other words, each MPI rank must be
|
||||
* responsible for 10000 or more unique chunks.
|
||||
*
|
||||
* Condition 3: This test will currently only be reliably reproducable for 2 or 3
|
||||
* MPI ranks. The threshold value calculated reduces to a constant 100 / mpi_size,
|
||||
* and is compared against a default value of 30%.
|
||||
*
|
||||
* Failure in this test may either cause a hang, or, due to how the MPI calls
|
||||
* pertaining to this issue might mistakenly match up, may cause an MPI error
|
||||
* message similar to:
|
||||
*
|
||||
* #008: H5Dmpio.c line 2338 in H5D__sort_chunk(): MPI_BCast failed
|
||||
* major: Internal error (too specific to document in detail)
|
||||
* minor: Some MPI function failed
|
||||
* #009: H5Dmpio.c line 2338 in H5D__sort_chunk(): Other MPI error, error stack:
|
||||
*PMPI_Bcast(1600)........: MPI_Bcast(buf=0x7eae610, count=320000, MPI_BYTE, root=0, comm=0x84000006) failed
|
||||
*MPIR_Bcast_impl(1452)...:
|
||||
*MPIR_Bcast(1476)........:
|
||||
*MPIR_Bcast_intra(1249)..:
|
||||
*MPIR_SMP_Bcast(1088)....:
|
||||
*MPIR_Bcast_binomial(250): message sizes do not match across processes in the collective routine: Received 2096 but expected 320000
|
||||
* major: Internal error (too specific to document in detail)
|
||||
* minor: MPI Error String
|
||||
*/
|
||||
void test_link_chunk_io_sort_chunk_issue(void)
|
||||
{
|
||||
const char *filename;
|
||||
hsize_t *dataset_dims = NULL;
|
||||
hsize_t max_dataset_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
|
||||
hsize_t sel_dims[1];
|
||||
hsize_t chunk_dims[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS] = { LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS };
|
||||
hsize_t start[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
|
||||
hsize_t stride[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
|
||||
hsize_t count[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
|
||||
hsize_t block[LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS];
|
||||
hid_t file_id = H5I_INVALID_HID;
|
||||
hid_t fapl_id = H5I_INVALID_HID;
|
||||
hid_t dset_id = H5I_INVALID_HID;
|
||||
hid_t dcpl_id = H5I_INVALID_HID;
|
||||
hid_t dxpl_id = H5I_INVALID_HID;
|
||||
hid_t fspace_id = H5I_INVALID_HID;
|
||||
hid_t mspace_id = H5I_INVALID_HID;
|
||||
int mpi_rank, mpi_size;
|
||||
void *data = NULL;
|
||||
void *read_buf = NULL;
|
||||
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
|
||||
|
||||
filename = GetTestParameters();
|
||||
|
||||
fapl_id = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, facc_type);
|
||||
VRFY((fapl_id >= 0), "create_faccess_plist succeeded");
|
||||
|
||||
/*
|
||||
* Even though the testphdf5 framework currently sets collective metadata reads
|
||||
* on the FAPL, we call it here just to be sure this is futureproof, since
|
||||
* demonstrating this issue relies upon it.
|
||||
*/
|
||||
VRFY((H5Pset_all_coll_metadata_ops(fapl_id, true) >= 0), "Set collective metadata reads succeeded");
|
||||
|
||||
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
|
||||
VRFY((file_id >= 0), "H5Fcreate succeeded");
|
||||
|
||||
dataset_dims = HDmalloc(LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS * sizeof(*dataset_dims));
|
||||
VRFY((dataset_dims != NULL), "malloc succeeded");
|
||||
|
||||
dataset_dims[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE * mpi_size * LINK_CHUNK_IO_SORT_CHUNK_ISSUE_Y_DIM_SCALE;
|
||||
max_dataset_dims[0] = H5S_UNLIMITED;
|
||||
|
||||
fspace_id = H5Screate_simple(LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS, dataset_dims, max_dataset_dims);
|
||||
VRFY((fspace_id >= 0), "H5Screate_simple succeeded");
|
||||
|
||||
/*
|
||||
* Set up chunking on the dataset in order to reproduce the problem.
|
||||
*/
|
||||
dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
|
||||
VRFY((dcpl_id >= 0), "H5Pcreate succeeded");
|
||||
|
||||
VRFY((H5Pset_chunk(dcpl_id, LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DIMS, chunk_dims) >= 0), "H5Pset_chunk succeeded");
|
||||
|
||||
dset_id = H5Dcreate2(file_id, LINK_CHUNK_IO_SORT_CHUNK_ISSUE_DATASET_NAME, H5T_NATIVE_INT, fspace_id, H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
|
||||
VRFY((dset_id >= 0), "H5Dcreate2 succeeded");
|
||||
|
||||
/*
|
||||
* Setup hyperslab selection to split the dataset among the ranks.
|
||||
*
|
||||
* The ranks will write rows across the dataset.
|
||||
*/
|
||||
stride[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE;
|
||||
count[0] = (dataset_dims[0] / LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) / mpi_size;
|
||||
start[0] = count[0] * mpi_rank;
|
||||
block[0] = LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE;
|
||||
|
||||
VRFY((H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded");
|
||||
|
||||
sel_dims[0] = count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE);
|
||||
|
||||
mspace_id = H5Screate_simple(1, sel_dims, NULL);
|
||||
VRFY((mspace_id >= 0), "H5Screate_simple succeeded");
|
||||
|
||||
data = HDcalloc(1, count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int));
|
||||
VRFY((data != NULL), "calloc succeeded");
|
||||
|
||||
dxpl_id = H5Pcreate(H5P_DATASET_XFER);
|
||||
VRFY((dxpl_id >= 0), "H5Pcreate succeeded");
|
||||
|
||||
/*
|
||||
* Enable collective access for the data transfer.
|
||||
*/
|
||||
VRFY((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE) >= 0), "H5Pset_dxpl_mpio succeeded");
|
||||
|
||||
VRFY((H5Dwrite(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, dxpl_id, data) >= 0), "H5Dwrite succeeded");
|
||||
|
||||
VRFY((H5Fflush(file_id, H5F_SCOPE_GLOBAL) >= 0), "H5Fflush succeeded");
|
||||
|
||||
/*
|
||||
* Ensure that linked-chunk I/O is performed since this is
|
||||
* the particular code path where the issue lies and we don't
|
||||
* want the library doing multi-chunk I/O behind our backs.
|
||||
*/
|
||||
VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, H5FD_MPIO_CHUNK_ONE_IO) >= 0), "H5Pset_dxpl_mpio_chunk_opt succeeded");
|
||||
|
||||
read_buf = HDmalloc(count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int));
|
||||
VRFY((read_buf != NULL), "malloc succeeded");
|
||||
|
||||
VRFY((H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded");
|
||||
|
||||
sel_dims[0] = count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE);
|
||||
|
||||
VRFY((H5Sclose(mspace_id) >= 0), "H5Sclose succeeded");
|
||||
|
||||
mspace_id = H5Screate_simple(1, sel_dims, NULL);
|
||||
VRFY((mspace_id >= 0), "H5Screate_simple succeeded");
|
||||
|
||||
read_buf = HDrealloc(read_buf, count[0] * (LINK_CHUNK_IO_SORT_CHUNK_ISSUE_CHUNK_SIZE) * sizeof(int));
|
||||
VRFY((read_buf != NULL), "realloc succeeded");
|
||||
|
||||
/*
|
||||
* Finally have each rank read their section of data back from the dataset.
|
||||
*/
|
||||
VRFY((H5Dread(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, dxpl_id, read_buf) >= 0), "H5Dread succeeded");
|
||||
|
||||
if (dataset_dims) {
|
||||
HDfree(dataset_dims);
|
||||
dataset_dims = NULL;
|
||||
}
|
||||
|
||||
if (data) {
|
||||
HDfree(data);
|
||||
data = NULL;
|
||||
}
|
||||
|
||||
if (read_buf) {
|
||||
HDfree(read_buf);
|
||||
read_buf = NULL;
|
||||
}
|
||||
|
||||
|
@ -549,7 +549,10 @@ int main(int argc, char **argv)
|
||||
|
||||
AddTest("noselcollmdread", test_partial_no_selection_coll_md_read, NULL,
|
||||
"Collective Metadata read with some ranks having no selection", PARATESTFILE);
|
||||
|
||||
AddTest("MC coll MD read", test_multi_chunk_io_addrmap_issue, NULL,
|
||||
"Collective MD read with multi chunk I/O (H5D__chunk_addrmap)", PARATESTFILE);
|
||||
AddTest("LC coll MD read", test_link_chunk_io_sort_chunk_issue, NULL,
|
||||
"Collective MD read with link chunk I/O (H5D__sort_chunk)", PARATESTFILE);
|
||||
|
||||
/* Display testing information */
|
||||
TestInfo(argv[0]);
|
||||
|
@ -295,6 +295,8 @@ void compress_readAll(void);
|
||||
#endif /* H5_HAVE_FILTER_DEFLATE */
|
||||
void test_dense_attr(void);
|
||||
void test_partial_no_selection_coll_md_read(void);
|
||||
void test_multi_chunk_io_addrmap_issue(void);
|
||||
void test_link_chunk_io_sort_chunk_issue(void);
|
||||
|
||||
/* commonly used prototypes */
|
||||
hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type);
|
||||
|
Loading…
Reference in New Issue
Block a user