Allow usage of page buffering for serial file access from parallel HDF5 builds (#4568)

This commit is contained in:
jhendersonHDF 2024-06-13 14:18:31 -05:00 committed by GitHub
parent 539d1756e5
commit 4a0b1b66a5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 60 additions and 155 deletions

View File

@ -790,6 +790,16 @@ Bug Fixes since HDF5-1.14.0 release
===================================
Library
-------
- Fixed library to allow usage of page buffering feature for serial file
access with parallel builds of HDF5
When HDF5 is built with parallel support enabled, the library would previously
disallow any usage of page buffering, even if a file was not opened with
parallel access. The library now allows usage of page buffering for serial
file access with parallel builds of HDF5. Usage of page buffering is still
disabled for any form of parallel file access, even if only 1 MPI process
is used.
- Fixed a leak of datatype IDs created internally during datatype conversion
Fixed an issue where the library could leak IDs that it creates internally

View File

@ -2008,15 +2008,6 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id)
if (H5P_get(a_plist, H5F_ACS_PAGE_BUFFER_SIZE_NAME, &page_buf_size) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get page buffer size");
if (page_buf_size) {
#ifdef H5_HAVE_PARALLEL
/* Collective metadata writes are not supported with page buffering */
if (file->shared->coll_md_write)
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL,
"collective metadata writes are not supported with page buffering");
/* Temporary: fail file create when page buffering feature is enabled for parallel */
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "page buffering is disabled for parallel");
#endif /* H5_HAVE_PARALLEL */
/* Query for other page buffer cache properties */
if (H5P_get(a_plist, H5F_ACS_PAGE_BUFFER_MIN_META_PERC_NAME, &page_buf_min_meta_perc) < 0)
HGOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get minimum metadata fraction of page buffer");
@ -2029,14 +2020,30 @@ H5F_open(const char *name, unsigned flags, hid_t fcpl_id, hid_t fapl_id)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get evict on close value");
#ifdef H5_HAVE_PARALLEL
/* Check for evict on close in parallel (currently unsupported) */
/* Check for unsupported settings in parallel */
assert(file->shared);
if (H5F_SHARED_HAS_FEATURE(file->shared, H5FD_FEAT_HAS_MPI)) {
int mpi_size = H5F_shared_mpi_get_size(file->shared);
if ((mpi_size > 1) && evict_on_close)
HGOTO_ERROR(H5E_FILE, H5E_UNSUPPORTED, NULL,
"evict on close is currently not supported in parallel HDF5");
/*
* While there shouldn't be any problems in general with using page buffering
* with only 1 MPI process, there are still some testing issues to be fixed.
* Until then, page buffering is disabled for any form of parallel access.
*/
if (page_buf_size) {
/* Collective metadata writes are not supported with page buffering */
if (file->shared->coll_md_write)
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL,
"collective metadata writes are not supported with page buffering");
HGOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "page buffering is disabled for parallel");
}
if (mpi_size > 1) {
if (evict_on_close)
HGOTO_ERROR(H5E_FILE, H5E_UNSUPPORTED, NULL,
"evict on close is currently not supported in parallel HDF5");
}
}
#endif

View File

@ -38,9 +38,6 @@
#define FILENAME_LEN 1024
/* test routines */
#ifdef H5_HAVE_PARALLEL
static unsigned verify_page_buffering_disabled(hid_t orig_fapl, const char *driver_name);
#else
#define NUM_DSETS 5
#define NX 100
#define NY 50
@ -54,12 +51,9 @@ static unsigned test_stats_collection(hid_t orig_fapl, const char *driver_name);
/* helper routines */
static unsigned create_file(char *filename, hid_t fcpl, hid_t fapl);
static unsigned open_file(char *filename, hid_t fapl, hsize_t page_size, size_t page_buffer_size);
#endif /* H5_HAVE_PARALLEL */
static const char *FILENAME[] = {"filepaged", NULL};
#ifndef H5_HAVE_PARALLEL
/*-------------------------------------------------------------------------
* Function: create_file()
*
@ -289,7 +283,6 @@ error:
H5E_END_TRY
return 1;
}
#endif /* H5_HAVE_PARALLEL */
/*
*
@ -353,8 +346,6 @@ error:
} /* set_multi_split() */
#ifndef H5_HAVE_PARALLEL
/*-------------------------------------------------------------------------
* Function: test_args()
*
@ -2043,121 +2034,6 @@ error:
return 1;
} /* test_stats_collection */
#endif /* #ifndef H5_HAVE_PARALLEL */
/*-------------------------------------------------------------------------
* Function: verify_page_buffering_disabled()
*
* Purpose: This function should only be called in parallel
* builds.
*
* At present, page buffering should be disabled in parallel
* builds. Verify this.
*
* Return: 0 if test is successful
* 1 if test fails
*
*-------------------------------------------------------------------------
*/
#ifdef H5_HAVE_PARALLEL
static unsigned
verify_page_buffering_disabled(hid_t orig_fapl, const char *driver_name)
{
char filename[FILENAME_LEN]; /* Filename to use */
hid_t file_id = H5I_INVALID_HID; /* File ID */
hid_t fcpl = H5I_INVALID_HID;
hid_t fapl = H5I_INVALID_HID;
TESTING("Page Buffering Disabled");
h5_fixname(FILENAME[0], orig_fapl, filename, sizeof(filename));
/* first, try to create a file with page buffering enabled */
if ((fapl = H5Pcopy(orig_fapl)) < 0)
TEST_ERROR;
if (set_multi_split(driver_name, fapl, 4096) != 0)
TEST_ERROR;
if ((fcpl = H5Pcreate(H5P_FILE_CREATE)) < 0)
FAIL_STACK_ERROR;
if (H5Pset_file_space_strategy(fcpl, H5F_FSPACE_STRATEGY_PAGE, 0, (hsize_t)1) < 0)
FAIL_STACK_ERROR;
if (H5Pset_file_space_page_size(fcpl, 4096) < 0)
FAIL_STACK_ERROR;
if (H5Pset_page_buffer_size(fapl, 4096 * 8, 0, 0) < 0)
FAIL_STACK_ERROR;
/* try to create the file -- should fail */
H5E_BEGIN_TRY
{
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, fcpl, fapl);
}
H5E_END_TRY
if (file_id >= 0)
TEST_ERROR;
/* now, create a file, close it, and then try to open it with page
* buffering enabled.
*/
if ((fcpl = H5Pcreate(H5P_FILE_CREATE)) < 0)
FAIL_STACK_ERROR;
if (H5Pset_file_space_strategy(fcpl, H5F_FSPACE_STRATEGY_PAGE, 0, (hsize_t)1) < 0)
FAIL_STACK_ERROR;
if (H5Pset_file_space_page_size(fcpl, 4096) < 0)
FAIL_STACK_ERROR;
/* create the file */
if ((file_id = H5Fcreate(filename, H5F_ACC_TRUNC, fcpl, H5P_DEFAULT)) < 0)
FAIL_STACK_ERROR;
/* close the file */
if (H5Fclose(file_id) < 0)
FAIL_STACK_ERROR;
/* try to open the file using the fapl prepared above which enables
* page buffering. Should fail.
*/
H5E_BEGIN_TRY
{
file_id = H5Fopen(filename, H5F_ACC_RDWR, fapl);
}
H5E_END_TRY
if (file_id >= 0)
TEST_ERROR;
if (H5Pclose(fcpl) < 0)
FAIL_STACK_ERROR;
if (H5Pclose(fapl) < 0)
FAIL_STACK_ERROR;
PASSED();
return 0;
error:
H5E_BEGIN_TRY
{
H5Pclose(fapl);
H5Pclose(fcpl);
H5Fclose(file_id);
}
H5E_END_TRY
return 1;
} /* verify_page_buffering_disabled() */
#endif /* H5_HAVE_PARALLEL */
/*-------------------------------------------------------------------------
* Function: main()
@ -2203,13 +2079,6 @@ main(void)
FAIL_STACK_ERROR;
api_ctx_pushed = true;
#ifdef H5_HAVE_PARALLEL
puts("Page Buffering is disabled for parallel.");
nerrors += verify_page_buffering_disabled(fapl, driver_name);
#else /* H5_HAVE_PARALLEL */
nerrors += test_args(fapl, driver_name);
nerrors += test_raw_data_handling(fapl, driver_name);
nerrors += test_lru_processing(fapl, driver_name);
@ -2217,8 +2086,6 @@ main(void)
nerrors += test_stats_collection(fapl, driver_name);
nerrors += test_pb_fapl_tolerance_at_open();
#endif /* H5_HAVE_PARALLEL */
h5_clean_files(FILENAME, fapl);
if (nerrors)

View File

@ -33,13 +33,17 @@
#define H5MF_FRIEND /*suppress error about including H5MFpkg */
#include "H5MFpkg.h"
#ifdef PB_OUT
#define NUM_DSETS 5
#endif
int mpi_size, mpi_rank;
#ifdef PB_OUT
static int create_file(const char *filename, hid_t fcpl, hid_t fapl, int metadata_write_strategy);
static int open_file(const char *filename, hid_t fapl, int metadata_write_strategy, hsize_t page_size,
size_t page_buffer_size);
#endif
/*
* test file access by communicator besides COMM_WORLD.
@ -132,22 +136,29 @@ test_split_comm_access(void)
void
test_page_buffer_access(void)
{
const char *filename;
hid_t file_id = H5I_INVALID_HID; /* File ID */
hid_t fcpl, fapl;
size_t page_count = 0;
int i, num_elements = 200;
haddr_t raw_addr, meta_addr;
int *data;
H5F_t *f = NULL;
herr_t ret; /* generic return value */
const char *filename;
bool api_ctx_pushed = false; /* Whether API context pushed */
#ifdef PB_OUT
size_t page_count = 0;
int i, num_elements = 200;
haddr_t raw_addr, meta_addr;
int *data;
H5F_t *f = NULL;
bool api_ctx_pushed = false; /* Whether API context pushed */
#endif
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
filename = (const char *)GetTestParameters();
/* Until page buffering is supported in parallel in some form (even if
* just for a single MPI process), this test just will just check to
* make sure that an error is thrown when page buffering is enabled
* with parallel access.
*/
if (VERBOSE_MED)
printf("Page Buffer Usage in Parallel %s\n", filename);
@ -175,6 +186,15 @@ test_page_buffer_access(void)
ret = H5Pset_coll_metadata_write(fapl, false);
VRFY((ret >= 0), "");
/* This should fail due to page buffering not being supported in parallel */
H5E_BEGIN_TRY
{
file_id = H5Fcreate(filename, H5F_ACC_TRUNC, fcpl, fapl);
}
H5E_END_TRY
VRFY((file_id < 0), "H5Fcreate failed");
#ifdef PB_OUT
ret = create_file(filename, fcpl, fapl, H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED);
VRFY((ret == 0), "");
ret = open_file(filename, fapl, H5AC_METADATA_WRITE_STRATEGY__DISTRIBUTED, sizeof(int) * 100,
@ -442,8 +462,10 @@ test_page_buffer_access(void)
free(data);
data = NULL;
MPI_Barrier(MPI_COMM_WORLD);
#endif
}
#ifdef PB_OUT
static int
create_file(const char *filename, hid_t fcpl, hid_t fapl, int metadata_write_strategy)
{
@ -760,6 +782,7 @@ open_file(const char *filename, hid_t fapl, int metadata_write_strategy, hsize_t
return nerrors;
}
#endif
/*
* NOTE: See HDFFV-10894 and add tests later to verify MPI-specific properties in the

View File

@ -402,9 +402,7 @@ main(int argc, char **argv)
AddTest("split", test_split_comm_access, NULL, "dataset using split communicators", PARATESTFILE);
AddTest("h5oflusherror", test_oflush, NULL, "H5Oflush failure", PARATESTFILE);
#ifdef PB_OUT /* temporary: disable page buffering when parallel */
AddTest("page_buffer", test_page_buffer_access, NULL, "page buffer usage in parallel", PARATESTFILE);
#endif
AddTest("props", test_file_properties, NULL, "Coll Metadata file property settings", PARATESTFILE);