Fix issue with Subfiling VFD and multiple opens of same file (#4194)

* Fix issue with Subfiling VFD and multiple opens of same file

* Update H5_subfile_fid_to_context to return error value instead of ID

* Add helper routine to initialize open file mapping
This commit is contained in:
jhendersonHDF 2024-03-21 19:39:13 -05:00 committed by GitHub
parent 23d3d63323
commit c4cc33abe1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 264 additions and 200 deletions

View File

@ -693,6 +693,16 @@ Bug Fixes since HDF5-1.14.0 release
Library Library
------- -------
- Fixed an issue with the Subfiling VFD and multiple opens of a
file
An issue with the way the Subfiling VFD handles multiple opens
of the same file caused the file structures for the extra opens
to occasionally get mapped to an incorrect subfiling context
object. The VFD now correctly maps the file structures for
additional opens of an already open file to the same context
object.
- Fixed a bug that causes the library to incorrectly identify - Fixed a bug that causes the library to incorrectly identify
the endian-ness of 16-bit and smaller C floating-point datatypes the endian-ness of 16-bit and smaller C floating-point datatypes

View File

@ -843,12 +843,17 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiles for file '%s'", H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiles for file '%s'",
name); name);
/* Initialize I/O concentrator threads if this MPI rank is an I/O concentrator */ /*
* Initialize I/O concentrator threads if this MPI rank is an I/O
* concentrator and the threads haven't already been initialized by
* a different open of this file
*/
sf_context = H5_get_subfiling_object(file_ptr->context_id); sf_context = H5_get_subfiling_object(file_ptr->context_id);
if (sf_context && sf_context->topology->rank_is_ioc) { if (sf_context && sf_context->topology->rank_is_ioc && !sf_context->threads_inited) {
if (initialize_ioc_threads(sf_context) < 0) if (initialize_ioc_threads(sf_context) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL,
"unable to initialize I/O concentrator threads"); "unable to initialize I/O concentrator threads");
sf_context->threads_inited = true;
} }
ret_value = (H5FD_t *)file_ptr; ret_value = (H5FD_t *)file_ptr;
@ -917,14 +922,22 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr)
if (MPI_SUCCESS != (mpi_code = MPI_Barrier(file_ptr->comm))) if (MPI_SUCCESS != (mpi_code = MPI_Barrier(file_ptr->comm)))
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
if (sf_context && sf_context->topology->rank_is_ioc) { /* Only finalize IOC threads and close subfiles if this is
if (finalize_ioc_threads(sf_context) < 0) * the last file holding a reference to the context
/* Note that closing of subfiles is collective */ */
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, "unable to finalize IOC threads"); if (sf_context && sf_context->file_ref == 1) {
if (sf_context->topology->rank_is_ioc && sf_context->threads_inited) {
if (finalize_ioc_threads(sf_context) < 0)
/* Note that closing of subfiles is collective */
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
"unable to finalize IOC threads");
}
if (H5_close_subfiles(file_ptr->context_id, file_ptr->comm) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
"unable to close subfiling file(s)");
} }
if (H5_close_subfiles(file_ptr->context_id, file_ptr->comm) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, "unable to close subfiling file(s)");
file_ptr->context_id = -1; file_ptr->context_id = -1;
} }

View File

@ -1244,7 +1244,9 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
if (driver->value == H5_VFD_IOC) { if (driver->value == H5_VFD_IOC) {
/* Get a copy of the context ID for later use */ /* Get a copy of the context ID for later use */
file_ptr->context_id = H5_subfile_fid_to_context(file_ptr->file_id); if (H5_subfile_fid_to_context(file_ptr->file_id, &file_ptr->context_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL,
"unable to retrieve subfiling context ID for this file");
file_ptr->fa.require_ioc = true; file_ptr->fa.require_ioc = true;
} }
else if (driver->value == H5_VFD_SEC2) { else if (driver->value == H5_VFD_SEC2) {

View File

@ -38,7 +38,7 @@ static sf_topology_t **sf_topology_cache = NULL;
static size_t sf_context_cache_size = 0; static size_t sf_context_cache_size = 0;
static size_t sf_topology_cache_size = 0; static size_t sf_topology_cache_size = 0;
static size_t sf_context_cache_num_entries = 0; static size_t sf_context_cache_next_index = 0;
static size_t sf_topology_cache_num_entries = 0; static size_t sf_topology_cache_num_entries = 0;
static file_map_to_context_t *sf_open_file_map = NULL; static file_map_to_context_t *sf_open_file_map = NULL;
@ -67,9 +67,9 @@ static herr_t identify_ioc_ranks(int64_t sf_context_id, sf_topology_t *app_topol
static herr_t init_subfiling_context(subfiling_context_t *sf_context, const char *base_filename, static herr_t init_subfiling_context(subfiling_context_t *sf_context, const char *base_filename,
uint64_t file_id, H5FD_subfiling_params_t *subfiling_config, uint64_t file_id, H5FD_subfiling_params_t *subfiling_config,
sf_topology_t *app_topology, MPI_Comm file_comm); sf_topology_t *app_topology, MPI_Comm file_comm);
static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags); static herr_t init_open_file_map(void);
static herr_t record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_index); static herr_t record_fid_map_entry(uint64_t file_id, int64_t subfile_context_id, int *next_index);
static void clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id); static herr_t clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id);
static herr_t ioc_open_files(int64_t file_context_id, int file_acc_flags); static herr_t ioc_open_files(int64_t file_context_id, int file_acc_flags);
static herr_t create_config_file(subfiling_context_t *sf_context, const char *base_filename, static herr_t create_config_file(subfiling_context_t *sf_context, const char *base_filename,
const char *config_dir, const char *subfile_dir, bool truncate_if_exists); const char *config_dir, const char *subfile_dir, bool truncate_if_exists);
@ -92,7 +92,7 @@ H5_new_subfiling_object_id(sf_obj_type_t obj_type)
int64_t index_val = 0; int64_t index_val = 0;
if (obj_type == SF_CONTEXT) { if (obj_type == SF_CONTEXT) {
index_val = (int64_t)sf_context_cache_num_entries; index_val = (int64_t)sf_context_cache_next_index;
} }
else if (obj_type == SF_TOPOLOGY) { else if (obj_type == SF_TOPOLOGY) {
index_val = (int64_t)sf_topology_cache_num_entries; index_val = (int64_t)sf_topology_cache_num_entries;
@ -156,8 +156,8 @@ H5_get_subfiling_object(int64_t object_id)
if (NULL == (sf_context_cache = calloc(DEFAULT_CONTEXT_CACHE_SIZE, sizeof(*sf_context_cache)))) if (NULL == (sf_context_cache = calloc(DEFAULT_CONTEXT_CACHE_SIZE, sizeof(*sf_context_cache))))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL,
"couldn't allocate space for subfiling context cache"); "couldn't allocate space for subfiling context cache");
sf_context_cache_size = DEFAULT_CONTEXT_CACHE_SIZE; sf_context_cache_size = DEFAULT_CONTEXT_CACHE_SIZE;
sf_context_cache_num_entries = 0; sf_context_cache_next_index = 0;
} }
/* Make more space in context cache if needed */ /* Make more space in context cache if needed */
@ -166,7 +166,7 @@ H5_get_subfiling_object(int64_t object_id)
size_t new_size; size_t new_size;
void *tmp_realloc; void *tmp_realloc;
old_num_entries = sf_context_cache_num_entries; old_num_entries = sf_context_cache_size;
new_size = (sf_context_cache_size * 3) / 2; new_size = (sf_context_cache_size * 3) / 2;
@ -188,23 +188,27 @@ H5_get_subfiling_object(int64_t object_id)
assert((size_t)obj_index < sf_context_cache_size); assert((size_t)obj_index < sf_context_cache_size);
} }
/* ret_value = sf_context_cache[obj_index];
* Since this cache currently just keeps all entries until if (!ret_value) {
* application exit, context entry indices should just be size_t next_idx;
* consecutive
*/
assert((size_t)obj_index <= sf_context_cache_num_entries);
if ((size_t)obj_index < sf_context_cache_num_entries)
ret_value = sf_context_cache[obj_index];
else {
assert(!sf_context_cache[sf_context_cache_num_entries]);
/* Allocate a new subfiling context object */ /* Allocate a new subfiling context object */
if (NULL == (ret_value = calloc(1, sizeof(subfiling_context_t)))) if (NULL == (sf_context_cache[obj_index] = calloc(1, sizeof(subfiling_context_t))))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL,
"couldn't allocate subfiling context object"); "couldn't allocate subfiling context object");
sf_context_cache[sf_context_cache_num_entries++] = ret_value; ret_value = sf_context_cache[obj_index];
/* Set index for next available cache entry. If all available
* slots are filled, the index will be set to sf_context_cache_size
* and cause a reallocation of the cache the next time a new
* cache entry is created.
*/
next_idx = (size_t)obj_index + 1;
while (next_idx < sf_context_cache_size && sf_context_cache[next_idx])
next_idx++;
sf_context_cache_next_index = next_idx;
} }
} }
else if (obj_type == SF_TOPOLOGY) { else if (obj_type == SF_TOPOLOGY) {
@ -310,13 +314,21 @@ H5_free_subfiling_object(int64_t object_id)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
"couldn't get subfiling context for subfiling object ID"); "couldn't get subfiling context for subfiling object ID");
if (H5_free_subfiling_object_int(sf_context) < 0) if (sf_context->file_ref == 0 || --sf_context->file_ref == 0) {
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling context object"); if (H5_free_subfiling_object_int(sf_context) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL,
"couldn't free subfiling context object");
assert(sf_context_cache_num_entries > 0); for (size_t idx = 0; idx < sf_context_cache_size; idx++) {
assert(sf_context == sf_context_cache[sf_context_cache_num_entries - 1]); if (sf_context != sf_context_cache[idx])
sf_context_cache[sf_context_cache_num_entries - 1] = NULL; continue;
sf_context_cache_num_entries--;
if (idx < sf_context_cache_next_index)
sf_context_cache_next_index = idx;
sf_context_cache[idx] = NULL;
}
}
} }
else if (obj_type == SF_TOPOLOGY) { else if (obj_type == SF_TOPOLOGY) {
sf_topology_t *sf_topology; sf_topology_t *sf_topology;
@ -358,6 +370,8 @@ H5_free_subfiling_object_int(subfiling_context_t *sf_context)
sf_context->sf_context_id = -1; sf_context->sf_context_id = -1;
sf_context->h5_file_id = UINT64_MAX; sf_context->h5_file_id = UINT64_MAX;
sf_context->threads_inited = false;
sf_context->file_ref = 0;
sf_context->sf_num_fids = 0; sf_context->sf_num_fids = 0;
sf_context->sf_num_subfiles = -1; sf_context->sf_num_subfiles = -1;
sf_context->sf_write_count = 0; sf_context->sf_write_count = 0;
@ -611,23 +625,18 @@ done:
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* Function: H5_open_subfiles * Function: H5_open_subfiles
* *
* Purpose: Wrapper for the internal 'open__subfiles' function * Purpose: Initializes a subfiling context object for a file with the
* Similar to the other public wrapper functions, we * given filename and file ID (inode) and opens the associated
* discover (via the sf_context) the number of io concentrators * subfiles. As part of this process, information about the
* and pass that to the internal function so that vector * application topology will be gathered and stored in the
* storage arrays can be stack based rather than explicitly * context object for future use. This includes identifying
* allocated and freed. * which MPI ranks will act as I/O concentrators and "own"
* one or more of the opened subfiles. The process of
* initializing the subfiling context object also involves
* creating MPI communicators that facilitate messaging
* between HDF5 clients and the I/O concentrators.
* *
* The Internal function is responsible for sending all IOC * Return: Non-negative on success/Negative on failure
* instances, the (sub)file open requests.
*
* Prior to calling the internal open function, we initialize
* a new subfiling context that contains topology info and
* new MPI communicators that facilitate messaging between
* HDF5 clients and the IOCs.
*
* Return: Success (0) or Failure (non-zero)
* Errors: If MPI operations fail for some reason.
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -635,8 +644,9 @@ herr_t
H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_params_t *subfiling_config, H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_params_t *subfiling_config,
int file_acc_flags, MPI_Comm file_comm, int64_t *context_id_out) int file_acc_flags, MPI_Comm file_comm, int64_t *context_id_out)
{ {
subfiling_context_t *sf_context = NULL; subfiling_context_t *sf_context = NULL;
int64_t context_id = -1; int64_t context_id = -1;
bool recorded_fid = false;
int mpi_code; int mpi_code;
herr_t ret_value = SUCCEED; herr_t ret_value = SUCCEED;
@ -649,21 +659,44 @@ H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_par
if (!context_id_out) if (!context_id_out)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling context ID pointer"); H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling context ID pointer");
/* Initialize new subfiling context ID based on configuration information */ /* Make sure open file mapping is initialized in case this
if (init_subfiling(base_filename, file_id, subfiling_config, file_acc_flags, file_comm, &context_id) < 0) * is the first file open call with the VFD
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context");
/* Retrieve the subfiling object for the newly-created context ID */
if (NULL == (sf_context = H5_get_subfiling_object(context_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't get subfiling object from context ID");
/*
* If we're actually using the IOCs, we will
* start the service threads on the identified
* ranks as part of the subfile opening.
*/ */
if (open_subfile_with_context(sf_context, file_acc_flags) < 0) if (init_open_file_map() < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "couldn't open subfiling subfiles"); H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize open file mapping");
/* Check if this file is already open */
if (H5_subfile_fid_to_context(file_id, &context_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
"couldn't retrieve context ID from open file mapping");
if (context_id >= 0) {
/* Retrieve the subfiling object for the cached context ID */
if (NULL == (sf_context = H5_get_subfiling_object(context_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
"couldn't get subfiling object from context ID");
}
else {
/* Initialize new subfiling context ID based on configuration information */
if (init_subfiling(base_filename, file_id, subfiling_config, file_acc_flags, file_comm, &context_id) <
0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context");
/* Retrieve the subfiling object for the newly-created context ID */
if (NULL == (sf_context = H5_get_subfiling_object(context_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
"couldn't get subfiling object from context ID");
/*
* If this rank is an I/O concentrator, actually open
* the subfiles belonging to this IOC rank and start
* the I/O service threads
*/
if (sf_context->topology->rank_is_ioc) {
if (ioc_open_files(sf_context->sf_context_id, file_acc_flags) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile");
}
}
#ifdef H5_SUBFILING_DEBUG #ifdef H5_SUBFILING_DEBUG
{ {
@ -689,6 +722,16 @@ H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_par
} }
#endif #endif
/*
* Save the HDF5 file ID (e.g., inode) to subfile context mapping.
* There shouldn't be any issue, but check the status and
* return if there was a problem.
*/
if (record_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL,
"couldn't record HDF5 file ID to subfile context mapping");
recorded_fid = true;
*context_id_out = context_id; *context_id_out = context_id;
done: done:
@ -715,7 +758,9 @@ done:
} }
if (ret_value < 0) { if (ret_value < 0) {
clear_fid_map_entry(file_id, context_id); if (recorded_fid && clear_fid_map_entry(file_id, context_id) < 0)
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL,
"unable to clear entry from file ID to context mapping");
if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0)
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object"); H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object");
@ -1802,6 +1847,8 @@ init_subfiling_context(subfiling_context_t *sf_context, const char *base_filenam
assert(MPI_COMM_NULL != file_comm); assert(MPI_COMM_NULL != file_comm);
sf_context->h5_file_id = file_id; sf_context->h5_file_id = file_id;
sf_context->threads_inited = false;
sf_context->file_ref = 0;
sf_context->sf_fids = NULL; sf_context->sf_fids = NULL;
sf_context->sf_num_fids = 0; sf_context->sf_num_fids = 0;
sf_context->sf_num_subfiles = subfiling_config->stripe_count; sf_context->sf_num_subfiles = subfiling_config->stripe_count;
@ -1938,71 +1985,38 @@ done:
} }
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* Function: open_subfile_with_context * Function: init_open_file_map
* *
* Purpose: While we cannot know a priori, whether an HDF client will * Purpose: Allocates and initializes an array that keeps a mapping
* need to access data across the entirety of a file, e.g. * between a file's inode value (__ino_t st_ino) and the ID
* an individual MPI rank may read or write only small * of the context object associated with it.
* segments of the entire file space; this function sends
* a file OPEN_OP to every IO concentrator.
* *
* Prior to opening any subfiles, the H5FDopen will have * Return: Non-negative on success/Negative on failure
* created an HDF5 file with the user specified naming.
* A path prefix will be selected and is available as
* an input argument.
*
* The opened HDF5 file handle will contain device and
* inode values, these being constant for all processes
* opening the shared file. The inode value is utilized
* as a key value and is associated with the sf_context
* which we receive as one of the input arguments.
*
* IO Concentrator threads will be initialized on MPI ranks
* which have been identified via application toplogy
* discovery. The number and mapping of IOC to MPI_rank
* is part of the sf_context->topology structure.
*
* Return: Success (0) or Failure (non-zero)
* Errors: If MPI operations fail for some reason.
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
static herr_t static herr_t
open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags) init_open_file_map(void)
{ {
herr_t ret_value = SUCCEED; herr_t ret_value = SUCCEED;
assert(sf_context); if (!sf_open_file_map) {
assert(sf_context->h5_file_id != UINT64_MAX); if (NULL == (sf_open_file_map = malloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map))))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping");
/* sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES;
* Save the HDF5 file ID (e.g., inode) to subfile context mapping. for (int i = 0; i < sf_file_map_size; i++) {
* There shouldn't be any issue, but check the status and sf_open_file_map[i].file_id = UINT64_MAX;
* return if there was a problem. sf_open_file_map[i].sf_context_id = -1;
*/ }
if (record_fid_to_subfile(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL,
"couldn't record HDF5 file ID to subfile context mapping");
/*
* If this rank is an I/O concentrator, actually open
* the subfiles belonging to this IOC rank
*/
if (sf_context->topology->rank_is_ioc) {
if (ioc_open_files(sf_context->sf_context_id, file_acc_flags) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile");
} }
done: done:
if (ret_value < 0) {
clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id);
}
H5_SUBFILING_FUNC_LEAVE; H5_SUBFILING_FUNC_LEAVE;
} }
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
* Function: record_fid_to_subfile * Function: record_fid_map_entry
* *
* Purpose: Every opened HDF5 file will have (if utilizing subfiling) * Purpose: Every opened HDF5 file will have (if utilizing subfiling)
* a subfiling context associated with it. It is important that * a subfiling context associated with it. It is important that
@ -2020,37 +2034,35 @@ done:
* This function simply records the filesystem handle to * This function simply records the filesystem handle to
* subfiling context mapping. * subfiling context mapping.
* *
* Return: SUCCEED or FAIL. * Return: Non-negative on success/Negative on failure
* Errors: FAILs ONLY if storage for the mapping entry cannot
* be allocated.
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
static herr_t static herr_t
record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_index) record_fid_map_entry(uint64_t file_id, int64_t subfile_context_id, int *next_index)
{ {
int index; subfiling_context_t *sf_context = NULL;
herr_t ret_value = SUCCEED; int index;
herr_t ret_value = SUCCEED;
if (!sf_open_file_map) {
if (NULL == (sf_open_file_map = malloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map))))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping");
sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES;
for (int i = 0; i < sf_file_map_size; i++) {
sf_open_file_map[i].file_id = UINT64_MAX;
sf_open_file_map[i].sf_context_id = -1;
}
}
for (index = 0; index < sf_file_map_size; index++) { for (index = 0; index < sf_file_map_size; index++) {
if (sf_open_file_map[index].file_id == file_id) if (sf_open_file_map[index].file_id == file_id) {
/* Increment file ref. count for this context */
if (NULL == (sf_context = H5_get_subfiling_object(sf_open_file_map[index].sf_context_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context");
sf_context->file_ref++;
goto done; goto done;
}
if (sf_open_file_map[index].file_id == UINT64_MAX) { if (sf_open_file_map[index].file_id == UINT64_MAX) {
sf_open_file_map[index].file_id = file_id; sf_open_file_map[index].file_id = file_id;
sf_open_file_map[index].sf_context_id = subfile_context_id; sf_open_file_map[index].sf_context_id = subfile_context_id;
/* First open of this file - set file ref. count to 1 for this context */
if (NULL == (sf_context = H5_get_subfiling_object(subfile_context_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context");
sf_context->file_ref = 1;
if (next_index) { if (next_index) {
*next_index = index; *next_index = index;
} }
@ -2078,8 +2090,13 @@ record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_in
*next_index = index; *next_index = index;
} }
sf_open_file_map[index].file_id = file_id; sf_open_file_map[index].file_id = file_id;
sf_open_file_map[index++].sf_context_id = subfile_context_id; sf_open_file_map[index].sf_context_id = subfile_context_id;
/* First open of this file - set file ref. count to 1 for this context */
if (NULL == (sf_context = H5_get_subfiling_object(subfile_context_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context");
sf_context->file_ref = 1;
} }
done: done:
@ -2092,24 +2109,40 @@ done:
* Purpose: Remove the map entry associated with the file->inode. * Purpose: Remove the map entry associated with the file->inode.
* This is done at file close. * This is done at file close.
* *
* Return: None * Return: Non-negative on success/Negative on failure
* Errors: Cannot fail.
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
static void static herr_t
clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id) clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id)
{ {
if (sf_open_file_map) { herr_t ret_value = SUCCEED;
for (int i = 0; i < sf_file_map_size; i++) {
if ((sf_open_file_map[i].file_id == file_id) && if (!sf_open_file_map)
(sf_open_file_map[i].sf_context_id == sf_context_id)) { H5_SUBFILING_GOTO_DONE(SUCCEED);
sf_open_file_map[i].file_id = UINT64_MAX;
sf_open_file_map[i].sf_context_id = -1; for (int i = 0; i < sf_file_map_size; i++) {
return; subfiling_context_t *sf_context = NULL;
}
if ((sf_open_file_map[i].file_id != file_id) || (sf_open_file_map[i].sf_context_id != sf_context_id))
continue;
/* Only clear map entry if this is the last file
* holding a reference to the context
*/
if (NULL == (sf_context = H5_get_subfiling_object(sf_context_id)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context object");
if (sf_context->file_ref == 0 || sf_context->file_ref == 1) {
sf_open_file_map[i].file_id = UINT64_MAX;
sf_open_file_map[i].sf_context_id = -1;
} }
break;
} }
done:
return ret_value;
} /* end clear_fid_map_entry() */ } /* end clear_fid_map_entry() */
/*------------------------------------------------------------------------- /*-------------------------------------------------------------------------
@ -3013,27 +3046,29 @@ done:
* Function: H5_subfile_fid_to_context * Function: H5_subfile_fid_to_context
* *
* Purpose: This is a basic lookup function which returns the subfiling * Purpose: This is a basic lookup function which returns the subfiling
* context id associated with the specified file ID. * context ID associated with the specified file ID. If no
* such context ID exists, `context_id_out` will be set to a
* negative value.
* *
* Return: Non-negative subfiling context ID if the context exists * Return: Non-negative on success/Negative on failure
* Negative on failure or if the subfiling context doesn't
* exist
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
int64_t herr_t
H5_subfile_fid_to_context(uint64_t file_id) H5_subfile_fid_to_context(uint64_t file_id, int64_t *context_id_out)
{ {
int64_t ret_value = -1; herr_t ret_value = SUCCEED;
if (!sf_open_file_map) assert(context_id_out);
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, -1, "open file map is NULL");
for (int i = 0; i < sf_file_map_size; i++) { *context_id_out = -1;
if (sf_open_file_map[i].file_id == file_id) {
return sf_open_file_map[i].sf_context_id; if (init_open_file_map() < 0)
} H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize open file mapping");
}
for (int i = 0; i < sf_file_map_size; i++)
if (sf_open_file_map[i].file_id == file_id)
*context_id_out = sf_open_file_map[i].sf_context_id;
done: done:
H5_SUBFILING_FUNC_LEAVE; H5_SUBFILING_FUNC_LEAVE;
@ -3095,15 +3130,17 @@ H5_subfiling_terminate(void)
/* Clean up subfiling context and topology caches */ /* Clean up subfiling context and topology caches */
if (sf_context_cache) { if (sf_context_cache) {
for (size_t i = 0; i < sf_context_cache_num_entries; i++) { for (size_t i = 0; i < sf_context_cache_size; i++) {
if (H5_free_subfiling_object_int(sf_context_cache[i]) < 0) if (sf_context_cache[i]) {
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, if (H5_free_subfiling_object_int(sf_context_cache[i]) < 0)
"couldn't free subfiling context object"); H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL,
sf_context_cache[i] = NULL; "couldn't free subfiling context object");
sf_context_cache[i] = NULL;
}
} }
sf_context_cache_size = 0; sf_context_cache_size = 0;
sf_context_cache_num_entries = 0; sf_context_cache_next_index = 0;
free(sf_context_cache); free(sf_context_cache);
sf_context_cache = NULL; sf_context_cache = NULL;

View File

@ -208,29 +208,31 @@ typedef struct topology {
} sf_topology_t; } sf_topology_t;
typedef struct { typedef struct {
int64_t sf_context_id; /* Generated context ID which embeds the cache index */ int64_t sf_context_id; /* Generated context ID which embeds the cache index */
uint64_t h5_file_id; /* GUID (basically the inode value) */ uint64_t h5_file_id; /* GUID (basically the inode value) */
int *sf_fids; /* Array of file IDs for subfiles this rank owns */ bool threads_inited; /* Whether the IOC threads for this context were started */
int sf_num_fids; /* Number of subfiles this rank owns */ int file_ref; /* Reference count held by files using this context */
int sf_num_subfiles; /* Total number of subfiles for logical HDF5 file */ int *sf_fids; /* Array of file IDs for subfiles this rank owns */
size_t sf_write_count; /* Statistics: write_count */ int sf_num_fids; /* Number of subfiles this rank owns */
size_t sf_read_count; /* Statistics: read_count */ int sf_num_subfiles; /* Total number of subfiles for logical HDF5 file */
haddr_t sf_eof; /* File eof */ size_t sf_write_count; /* Statistics: write_count */
int64_t sf_stripe_size; /* Stripe-depth */ size_t sf_read_count; /* Statistics: read_count */
int64_t sf_blocksize_per_stripe; /* Stripe-depth X n_IOCs */ haddr_t sf_eof; /* File eof */
int64_t sf_base_addr; /* For an IOC, our base address */ int64_t sf_stripe_size; /* Stripe-depth */
MPI_Comm sf_msg_comm; /* MPI comm used to send RPC msg */ int64_t sf_blocksize_per_stripe; /* Stripe-depth X n_IOCs */
MPI_Comm sf_data_comm; /* MPI comm used to move data */ int64_t sf_base_addr; /* For an IOC, our base address */
MPI_Comm sf_eof_comm; /* MPI comm used to communicate EOF */ MPI_Comm sf_msg_comm; /* MPI comm used to send RPC msg */
MPI_Comm sf_node_comm; /* MPI comm used for intra-node comms */ MPI_Comm sf_data_comm; /* MPI comm used to move data */
MPI_Comm sf_group_comm; /* Not used: for IOC collectives */ MPI_Comm sf_eof_comm; /* MPI comm used to communicate EOF */
int sf_group_size; /* IOC count (in sf_group_comm) */ MPI_Comm sf_node_comm; /* MPI comm used for intra-node comms */
int sf_group_rank; /* IOC rank (in sf_group_comm) */ MPI_Comm sf_group_comm; /* Not used: for IOC collectives */
char *subfile_prefix; /* If subfiles are node-local */ int sf_group_size; /* IOC count (in sf_group_comm) */
char *config_file_prefix; /* Prefix added to config file name */ int sf_group_rank; /* IOC rank (in sf_group_comm) */
char *h5_filename; /* The user supplied file name */ char *subfile_prefix; /* If subfiles are node-local */
void *ioc_data; /* Private data for underlying IOC */ char *config_file_prefix; /* Prefix added to config file name */
sf_topology_t *topology; /* Pointer to our topology */ char *h5_filename; /* The user supplied file name */
void *ioc_data; /* Private data for underlying IOC */
sf_topology_t *topology; /* Pointer to our topology */
#ifdef H5_SUBFILING_DEBUG #ifdef H5_SUBFILING_DEBUG
char sf_logfile_name[PATH_MAX]; char sf_logfile_name[PATH_MAX];
@ -274,12 +276,12 @@ H5_DLL herr_t H5_get_subfiling_config_from_file(FILE *config_file, int64_t *str
int64_t *num_subfiles); int64_t *num_subfiles);
H5_DLL herr_t H5_resolve_pathname(const char *filepath, MPI_Comm comm, char **resolved_filepath); H5_DLL herr_t H5_resolve_pathname(const char *filepath, MPI_Comm comm, char **resolved_filepath);
H5_DLL herr_t H5_subfiling_set_config_prop(H5P_genplist_t *plist_ptr, H5_DLL herr_t H5_subfiling_set_config_prop(H5P_genplist_t *plist_ptr,
const H5FD_subfiling_params_t *vfd_config); const H5FD_subfiling_params_t *vfd_config);
H5_DLL herr_t H5_subfiling_get_config_prop(H5P_genplist_t *plist_ptr, H5FD_subfiling_params_t *vfd_config); H5_DLL herr_t H5_subfiling_get_config_prop(H5P_genplist_t *plist_ptr, H5FD_subfiling_params_t *vfd_config);
H5_DLL herr_t H5_subfiling_set_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t file_id); H5_DLL herr_t H5_subfiling_set_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t file_id);
H5_DLL herr_t H5_subfiling_get_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t *file_id); H5_DLL herr_t H5_subfiling_get_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t *file_id);
H5_DLL int64_t H5_subfile_fid_to_context(uint64_t file_id); H5_DLL herr_t H5_subfile_fid_to_context(uint64_t file_id, int64_t *context_id_out);
H5_DLL herr_t H5_subfiling_validate_config(const H5FD_subfiling_params_t *subf_config); H5_DLL herr_t H5_subfiling_validate_config(const H5FD_subfiling_params_t *subf_config);

View File

@ -29,9 +29,9 @@
static MPI_Comm comm = MPI_COMM_WORLD; static MPI_Comm comm = MPI_COMM_WORLD;
static MPI_Info info = MPI_INFO_NULL; static MPI_Info info = MPI_INFO_NULL;
bool pass = true; /* set to false on error */ static bool pass = true; /* set to false on error */
bool disp_failure_mssgs = true; /* global force display of failure messages */ static bool disp_failure_mssgs = true; /* global force display of failure messages */
const char *failure_mssg = NULL; static const char *failure_mssg = NULL;
const char *FILENAMES[] = {"mpio_vfd_test_file_0", /*0*/ const char *FILENAMES[] = {"mpio_vfd_test_file_0", /*0*/
"mpio_vfd_test_file_1", /*1*/ "mpio_vfd_test_file_1", /*1*/