Merge pull request #1224 in HDFFV/hdf5 from ~JHENDERSON/hdf5:develop to develop

* commit '763b95c6b0f027025a5a5bfb203b77317ffa2368':
  Update RELEASE.txt with suggested changes
  Update MANIFEST file for new t_coll_md_read.c file
  Remove now-unused local variable
  Add fix for HDFFV-10501
This commit is contained in:
Larry Knox 2018-08-27 16:24:20 -05:00
commit 553b1a01f8
8 changed files with 270 additions and 18 deletions

View File

@ -1248,6 +1248,7 @@
./testpar/t_cache_image.c
./testpar/t_chunk_alloc.c
./testpar/t_coll_chunk.c
./testpar/t_coll_md_read.c
./testpar/t_dset.c
./testpar/t_file.c
./testpar/t_file_image.c

View File

@ -34,6 +34,7 @@ CONTENTS
- New Features
- Support for new platforms and languages
- Bug Fixes since HDF5-1.10.3
- Bug Fixes since HDF5-1.10.2
- Supported Platforms
- Tested Configuration Features Summary
@ -194,6 +195,68 @@ Support for new platforms, languages and compilers.
=======================================
-
Bug Fixes since HDF5-1.10.3 release
==================================
Library
-------
- A bug was discovered in the parallel library where an application
would hang if a collective read/write of a chunked dataset occurred
when collective metadata reads were enabled and some of the ranks
had no selection in the dataset's dataspace. The ranks which had no
selection in the dataset's dataspace called H5D__chunk_addrmap() to
retrieve the lowest chunk address in the dataset. This is because we
require reads/writes to be performed in strictly non-decreasing order
of chunk address in the file.
When the chunk index used was a version 1 or 2 B-tree, these
non-participating ranks would issue a collective MPI_Bcast() call
that the participating ranks would not issue, causing the hang. Since
the non-participating ranks are not actually reading/writing anything,
the H5D__chunk_addrmap() call can be safely removed and the address used
for the read/write can be set to an arbitrary number (0 was chosen).
(JTH - 2018/08/25, HDFFV-10501)
Configuration
-------------
-
Performance
-------------
-
Fortran
--------
-
Tools
-----
-
High-Level APIs:
------
-
Fortran High-Level APIs:
------
-
Documentation
-------------
-
F90 APIs
--------
-
C++ APIs
--------
-
Testing
-------
Bug Fixes since HDF5-1.10.2 release
==================================

View File

@ -925,7 +925,6 @@ H5D__link_chunk_collective_io(H5D_io_info_t *io_info, const H5D_type_info_t *typ
hbool_t chunk_final_ftype_is_derived = FALSE;
H5D_storage_t ctg_store; /* Storage info for "fake" contiguous dataset */
size_t total_chunks;
haddr_t *total_chunk_addr_array = NULL;
MPI_Datatype *chunk_mtype = NULL;
MPI_Datatype *chunk_ftype = NULL;
MPI_Aint *chunk_disp_array = NULL;
@ -1146,20 +1145,7 @@ if(H5DEBUG(D))
mpi_buf_count = (hsize_t)1;
} /* end if */
else { /* no selection at all for this process */
/* Allocate chunking information */
if(NULL == (total_chunk_addr_array = (haddr_t *)H5MM_malloc(sizeof(haddr_t) * total_chunks)))
HGOTO_ERROR(H5E_DATASET, H5E_CANTALLOC, FAIL, "couldn't allocate total chunk address arraybuffer")
/* Retrieve chunk address map */
if(H5D__chunk_addrmap(io_info, total_chunk_addr_array) < 0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't get chunk address")
/* Get chunk with lowest address */
ctg_store.contig.dset_addr = HADDR_MAX;
for(u = 0; u < total_chunks; u++)
if(total_chunk_addr_array[u] < ctg_store.contig.dset_addr)
ctg_store.contig.dset_addr = total_chunk_addr_array[u];
HDassert(ctg_store.contig.dset_addr != HADDR_MAX);
ctg_store.contig.dset_addr = 0;
/* Set the MPI datatype */
chunk_final_ftype = MPI_BYTE;
@ -1187,8 +1173,6 @@ if(H5DEBUG(D))
HDfprintf(H5DEBUG(D),"before freeing memory inside H5D_link_collective_io ret_value = %d\n", ret_value);
#endif
/* Release resources */
if(total_chunk_addr_array)
H5MM_xfree(total_chunk_addr_array);
if(chunk_addr_info_array)
H5MM_xfree(chunk_addr_info_array);
if(chunk_mtype)

View File

@ -17,6 +17,7 @@ set (testphdf5_SOURCES
${HDF5_TEST_PAR_SOURCE_DIR}/t_chunk_alloc.c
${HDF5_TEST_PAR_SOURCE_DIR}/t_filter_read.c
${HDF5_TEST_PAR_SOURCE_DIR}/t_prop.c
${HDF5_TEST_PAR_SOURCE_DIR}/t_coll_md_read.c
)
#-- Adding test for testhdf5

View File

@ -29,7 +29,7 @@ check_PROGRAMS = $(TEST_PROG_PARA)
testphdf5_SOURCES=testphdf5.c t_dset.c t_file.c t_file_image.c t_mdset.c \
t_ph5basic.c t_coll_chunk.c t_span_tree.c t_chunk_alloc.c t_filter_read.c \
t_prop.c
t_prop.c t_coll_md_read.c
# The tests all depend on the hdf5 library and the test library
LDADD = $(LIBH5TEST) $(LIBHDF5)

199
testpar/t_coll_md_read.c Normal file
View File

@ -0,0 +1,199 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Copyright by The HDF Group. *
* All rights reserved. *
* *
* This file is part of HDF5. The full HDF5 copyright notice, including *
* terms governing use, modification, and redistribution, is contained in *
* the COPYING file, which can be found at the root of the source code *
* distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. *
* If you do not have access to either file, you may request a copy from *
* help@hdfgroup.org. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* A test suite to test HDF5's collective metadata read capabilities, as enabled
* by making a call to H5Pset_all_coll_metadata_ops().
*/
#include "testphdf5.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/*
* Define the non-participating process as the "last"
* rank to avoid any weirdness potentially caused by
* an if (mpi_rank == 0) check.
*/
#define PARTIAL_NO_SELECTION_NO_SEL_PROCESS (mpi_rank == mpi_size - 1)
#define PARTIAL_NO_SELECTION_DATASET_NAME "partial_no_selection_dset"
#define PARTIAL_NO_SELECTION_DATASET_NDIMS 2
#define PARTIAL_NO_SELECTION_Y_DIM_SCALE 5
#define PARTIAL_NO_SELECTION_X_DIM_SCALE 5
/*
* A test for issue HDFFV-10501. A parallel hang was reported which occurred
* in linked-chunk I/O when collective metadata reads are enabled and some ranks
* do not have any selection in a dataset's dataspace, while others do. The ranks
* which have no selection during the read/write operation called H5D__chunk_addrmap()
* to retrieve the lowest chunk address, since we require that the read/write be done
* in strictly non-decreasing order of chunk address. For version 1 and 2 B-trees,
* this caused the non-participating ranks to issue a collective MPI_Bcast() call
* which the other ranks did not issue, thus causing a hang.
*
* However, since these ranks are not actually reading/writing anything, this call
* can simply be removed and the address used for the read/write can be set to an
* arbitrary number (0 was chosen).
*/
void test_partial_no_selection_coll_md_read(void)
{
const char *filename;
hsize_t *dataset_dims = NULL;
hsize_t max_dataset_dims[PARTIAL_NO_SELECTION_DATASET_NDIMS];
hsize_t sel_dims[1];
hsize_t chunk_dims[PARTIAL_NO_SELECTION_DATASET_NDIMS] = { PARTIAL_NO_SELECTION_Y_DIM_SCALE, PARTIAL_NO_SELECTION_X_DIM_SCALE };
hsize_t start[PARTIAL_NO_SELECTION_DATASET_NDIMS];
hsize_t stride[PARTIAL_NO_SELECTION_DATASET_NDIMS];
hsize_t count[PARTIAL_NO_SELECTION_DATASET_NDIMS];
hsize_t block[PARTIAL_NO_SELECTION_DATASET_NDIMS];
hid_t file_id = -1;
hid_t fapl_id = -1;
hid_t dset_id = -1;
hid_t dcpl_id = -1;
hid_t dxpl_id = -1;
hid_t fspace_id = -1;
hid_t mspace_id = -1;
int mpi_rank, mpi_size;
void *data = NULL;
void *read_buf = NULL;
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
filename = GetTestParameters();
fapl_id = create_faccess_plist(MPI_COMM_WORLD, MPI_INFO_NULL, facc_type);
VRFY((fapl_id >= 0), "create_faccess_plist succeeded");
/*
* Even though the testphdf5 framework currently sets collective metadata reads
* on the FAPL, we call it here just to be sure this is futureproof, since
* demonstrating this issue relies upon it.
*/
VRFY((H5Pset_all_coll_metadata_ops(fapl_id, true) >= 0), "Set collective metadata reads succeeded");
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
VRFY((file_id >= 0), "H5Fcreate succeeded");
dataset_dims = malloc(PARTIAL_NO_SELECTION_DATASET_NDIMS * sizeof(*dataset_dims));
VRFY((dataset_dims != NULL), "malloc succeeded");
dataset_dims[0] = PARTIAL_NO_SELECTION_Y_DIM_SCALE * mpi_size;
dataset_dims[1] = PARTIAL_NO_SELECTION_X_DIM_SCALE * mpi_size;
max_dataset_dims[0] = H5S_UNLIMITED;
max_dataset_dims[1] = H5S_UNLIMITED;
fspace_id = H5Screate_simple(PARTIAL_NO_SELECTION_DATASET_NDIMS, dataset_dims, max_dataset_dims);
VRFY((fspace_id >= 0), "H5Screate_simple succeeded");
/*
* Set up chunking on the dataset in order to reproduce the problem.
*/
dcpl_id = H5Pcreate(H5P_DATASET_CREATE);
VRFY((dcpl_id >= 0), "H5Pcreate succeeded");
VRFY((H5Pset_chunk(dcpl_id, PARTIAL_NO_SELECTION_DATASET_NDIMS, chunk_dims) >= 0), "H5Pset_chunk succeeded");
dset_id = H5Dcreate2(file_id, PARTIAL_NO_SELECTION_DATASET_NAME, H5T_NATIVE_INT, fspace_id, H5P_DEFAULT, dcpl_id, H5P_DEFAULT);
VRFY((dset_id >= 0), "H5Dcreate2 succeeded");
/*
* Setup hyperslab selection to split the dataset among the ranks.
*
* The ranks will write rows across the dataset.
*/
start[0] = PARTIAL_NO_SELECTION_Y_DIM_SCALE * mpi_rank;
start[1] = 0;
stride[0] = PARTIAL_NO_SELECTION_Y_DIM_SCALE;
stride[1] = PARTIAL_NO_SELECTION_X_DIM_SCALE;
count[0] = 1;
count[1] = mpi_size;
block[0] = PARTIAL_NO_SELECTION_Y_DIM_SCALE;
block[1] = PARTIAL_NO_SELECTION_X_DIM_SCALE;
VRFY((H5Sselect_hyperslab(fspace_id, H5S_SELECT_SET, start, stride, count, block) >= 0), "H5Sselect_hyperslab succeeded");
sel_dims[0] = count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE);
mspace_id = H5Screate_simple(1, sel_dims, NULL);
VRFY((mspace_id >= 0), "H5Screate_simple succeeded");
data = calloc(1, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
VRFY((data != NULL), "calloc succeeded");
dxpl_id = H5Pcreate(H5P_DATASET_XFER);
VRFY((dxpl_id >= 0), "H5Pcreate succeeded");
/*
* Enable collective access for the data transfer.
*/
VRFY((H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE) >= 0), "H5Pset_dxpl_mpio succeeded");
VRFY((H5Dwrite(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, dxpl_id, data) >= 0), "H5Dwrite succeeded");
VRFY((H5Fflush(file_id, H5F_SCOPE_GLOBAL) >= 0), "H5Fflush succeeded");
/*
* Ensure that linked-chunk I/O is performed since this is
* the particular code path where the issue lies and we don't
* want the library doing multi-chunk I/O behind our backs.
*/
VRFY((H5Pset_dxpl_mpio_chunk_opt(dxpl_id, H5FD_MPIO_CHUNK_ONE_IO) >= 0), "H5Pset_dxpl_mpio_chunk_opt succeeded");
read_buf = malloc(count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int));
VRFY((read_buf != NULL), "malloc succeeded");
/*
* Make sure to call H5Sselect_none() on the non-participating process.
*/
if (PARTIAL_NO_SELECTION_NO_SEL_PROCESS) {
VRFY((H5Sselect_none(fspace_id) >= 0), "H5Sselect_none succeeded");
VRFY((H5Sselect_none(mspace_id) >= 0), "H5Sselect_none succeeded");
}
/*
* Finally have each rank read their section of data back from the dataset.
*/
VRFY((H5Dread(dset_id, H5T_NATIVE_INT, mspace_id, fspace_id, dxpl_id, read_buf) >= 0), "H5Dread succeeded");
/*
* Check data integrity just to be sure.
*/
if (!PARTIAL_NO_SELECTION_NO_SEL_PROCESS) {
VRFY((!memcmp(data, read_buf, count[1] * (PARTIAL_NO_SELECTION_Y_DIM_SCALE * PARTIAL_NO_SELECTION_X_DIM_SCALE) * sizeof(int))), "memcmp succeeded");
}
if (dataset_dims) {
free(dataset_dims);
dataset_dims = NULL;
}
if (data) {
free(data);
data = NULL;
}
if (read_buf) {
free(read_buf);
read_buf = NULL;
}
VRFY((H5Sclose(fspace_id) >= 0), "H5Sclose succeeded");
VRFY((H5Sclose(mspace_id) >= 0), "H5Sclose succeeded");
VRFY((H5Pclose(dcpl_id) >= 0), "H5Pclose succeeded");
VRFY((H5Pclose(dxpl_id) >= 0), "H5Pclose succeeded");
VRFY((H5Dclose(dset_id) >= 0), "H5Dclose succeeded");
VRFY((H5Pclose(fapl_id) >= 0), "H5Pclose succeeded");
VRFY((H5Fclose(file_id) >= 0), "H5Fclose succeeded");
}

View File

@ -547,6 +547,9 @@ int main(int argc, char **argv)
AddTest("denseattr", test_dense_attr, NULL,
"Store Dense Attributes", PARATESTFILE);
AddTest("noselcollmdread", test_partial_no_selection_coll_md_read, NULL,
"Collective Metadata read with some ranks having no selection", PARATESTFILE);
/* Display testing information */
TestInfo(argv[0]);

View File

@ -294,6 +294,7 @@ void file_image_daisy_chain_test(void);
void compress_readAll(void);
#endif /* H5_HAVE_FILTER_DEFLATE */
void test_dense_attr(void);
void test_partial_no_selection_coll_md_read(void);
/* commonly used prototypes */
hid_t create_faccess_plist(MPI_Comm comm, MPI_Info info, int l_facc_type);