mirror of
https://github.com/HDFGroup/hdf5.git
synced 2024-11-21 01:04:10 +08:00
Fix issue with Subfiling VFD and multiple opens of same file (#4194)
* Fix issue with Subfiling VFD and multiple opens of same file * Update H5_subfile_fid_to_context to return error value instead of ID * Add helper routine to initialize open file mapping
This commit is contained in:
parent
23d3d63323
commit
c4cc33abe1
@ -693,6 +693,16 @@ Bug Fixes since HDF5-1.14.0 release
|
||||
|
||||
Library
|
||||
-------
|
||||
- Fixed an issue with the Subfiling VFD and multiple opens of a
|
||||
file
|
||||
|
||||
An issue with the way the Subfiling VFD handles multiple opens
|
||||
of the same file caused the file structures for the extra opens
|
||||
to occasionally get mapped to an incorrect subfiling context
|
||||
object. The VFD now correctly maps the file structures for
|
||||
additional opens of an already open file to the same context
|
||||
object.
|
||||
|
||||
- Fixed a bug that causes the library to incorrectly identify
|
||||
the endian-ness of 16-bit and smaller C floating-point datatypes
|
||||
|
||||
|
@ -843,12 +843,17 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiles for file '%s'",
|
||||
name);
|
||||
|
||||
/* Initialize I/O concentrator threads if this MPI rank is an I/O concentrator */
|
||||
/*
|
||||
* Initialize I/O concentrator threads if this MPI rank is an I/O
|
||||
* concentrator and the threads haven't already been initialized by
|
||||
* a different open of this file
|
||||
*/
|
||||
sf_context = H5_get_subfiling_object(file_ptr->context_id);
|
||||
if (sf_context && sf_context->topology->rank_is_ioc) {
|
||||
if (sf_context && sf_context->topology->rank_is_ioc && !sf_context->threads_inited) {
|
||||
if (initialize_ioc_threads(sf_context) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL,
|
||||
"unable to initialize I/O concentrator threads");
|
||||
sf_context->threads_inited = true;
|
||||
}
|
||||
|
||||
ret_value = (H5FD_t *)file_ptr;
|
||||
@ -917,14 +922,22 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr)
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Barrier(file_ptr->comm)))
|
||||
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
|
||||
|
||||
if (sf_context && sf_context->topology->rank_is_ioc) {
|
||||
if (finalize_ioc_threads(sf_context) < 0)
|
||||
/* Note that closing of subfiles is collective */
|
||||
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, "unable to finalize IOC threads");
|
||||
/* Only finalize IOC threads and close subfiles if this is
|
||||
* the last file holding a reference to the context
|
||||
*/
|
||||
if (sf_context && sf_context->file_ref == 1) {
|
||||
if (sf_context->topology->rank_is_ioc && sf_context->threads_inited) {
|
||||
if (finalize_ioc_threads(sf_context) < 0)
|
||||
/* Note that closing of subfiles is collective */
|
||||
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
|
||||
"unable to finalize IOC threads");
|
||||
}
|
||||
|
||||
if (H5_close_subfiles(file_ptr->context_id, file_ptr->comm) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL,
|
||||
"unable to close subfiling file(s)");
|
||||
}
|
||||
|
||||
if (H5_close_subfiles(file_ptr->context_id, file_ptr->comm) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, "unable to close subfiling file(s)");
|
||||
file_ptr->context_id = -1;
|
||||
}
|
||||
|
||||
|
@ -1244,7 +1244,9 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
|
||||
|
||||
if (driver->value == H5_VFD_IOC) {
|
||||
/* Get a copy of the context ID for later use */
|
||||
file_ptr->context_id = H5_subfile_fid_to_context(file_ptr->file_id);
|
||||
if (H5_subfile_fid_to_context(file_ptr->file_id, &file_ptr->context_id) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL,
|
||||
"unable to retrieve subfiling context ID for this file");
|
||||
file_ptr->fa.require_ioc = true;
|
||||
}
|
||||
else if (driver->value == H5_VFD_SEC2) {
|
||||
|
@ -38,7 +38,7 @@ static sf_topology_t **sf_topology_cache = NULL;
|
||||
|
||||
static size_t sf_context_cache_size = 0;
|
||||
static size_t sf_topology_cache_size = 0;
|
||||
static size_t sf_context_cache_num_entries = 0;
|
||||
static size_t sf_context_cache_next_index = 0;
|
||||
static size_t sf_topology_cache_num_entries = 0;
|
||||
|
||||
static file_map_to_context_t *sf_open_file_map = NULL;
|
||||
@ -67,9 +67,9 @@ static herr_t identify_ioc_ranks(int64_t sf_context_id, sf_topology_t *app_topol
|
||||
static herr_t init_subfiling_context(subfiling_context_t *sf_context, const char *base_filename,
|
||||
uint64_t file_id, H5FD_subfiling_params_t *subfiling_config,
|
||||
sf_topology_t *app_topology, MPI_Comm file_comm);
|
||||
static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags);
|
||||
static herr_t record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_index);
|
||||
static void clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id);
|
||||
static herr_t init_open_file_map(void);
|
||||
static herr_t record_fid_map_entry(uint64_t file_id, int64_t subfile_context_id, int *next_index);
|
||||
static herr_t clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id);
|
||||
static herr_t ioc_open_files(int64_t file_context_id, int file_acc_flags);
|
||||
static herr_t create_config_file(subfiling_context_t *sf_context, const char *base_filename,
|
||||
const char *config_dir, const char *subfile_dir, bool truncate_if_exists);
|
||||
@ -92,7 +92,7 @@ H5_new_subfiling_object_id(sf_obj_type_t obj_type)
|
||||
int64_t index_val = 0;
|
||||
|
||||
if (obj_type == SF_CONTEXT) {
|
||||
index_val = (int64_t)sf_context_cache_num_entries;
|
||||
index_val = (int64_t)sf_context_cache_next_index;
|
||||
}
|
||||
else if (obj_type == SF_TOPOLOGY) {
|
||||
index_val = (int64_t)sf_topology_cache_num_entries;
|
||||
@ -156,8 +156,8 @@ H5_get_subfiling_object(int64_t object_id)
|
||||
if (NULL == (sf_context_cache = calloc(DEFAULT_CONTEXT_CACHE_SIZE, sizeof(*sf_context_cache))))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL,
|
||||
"couldn't allocate space for subfiling context cache");
|
||||
sf_context_cache_size = DEFAULT_CONTEXT_CACHE_SIZE;
|
||||
sf_context_cache_num_entries = 0;
|
||||
sf_context_cache_size = DEFAULT_CONTEXT_CACHE_SIZE;
|
||||
sf_context_cache_next_index = 0;
|
||||
}
|
||||
|
||||
/* Make more space in context cache if needed */
|
||||
@ -166,7 +166,7 @@ H5_get_subfiling_object(int64_t object_id)
|
||||
size_t new_size;
|
||||
void *tmp_realloc;
|
||||
|
||||
old_num_entries = sf_context_cache_num_entries;
|
||||
old_num_entries = sf_context_cache_size;
|
||||
|
||||
new_size = (sf_context_cache_size * 3) / 2;
|
||||
|
||||
@ -188,23 +188,27 @@ H5_get_subfiling_object(int64_t object_id)
|
||||
assert((size_t)obj_index < sf_context_cache_size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Since this cache currently just keeps all entries until
|
||||
* application exit, context entry indices should just be
|
||||
* consecutive
|
||||
*/
|
||||
assert((size_t)obj_index <= sf_context_cache_num_entries);
|
||||
if ((size_t)obj_index < sf_context_cache_num_entries)
|
||||
ret_value = sf_context_cache[obj_index];
|
||||
else {
|
||||
assert(!sf_context_cache[sf_context_cache_num_entries]);
|
||||
ret_value = sf_context_cache[obj_index];
|
||||
if (!ret_value) {
|
||||
size_t next_idx;
|
||||
|
||||
/* Allocate a new subfiling context object */
|
||||
if (NULL == (ret_value = calloc(1, sizeof(subfiling_context_t))))
|
||||
if (NULL == (sf_context_cache[obj_index] = calloc(1, sizeof(subfiling_context_t))))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL,
|
||||
"couldn't allocate subfiling context object");
|
||||
|
||||
sf_context_cache[sf_context_cache_num_entries++] = ret_value;
|
||||
ret_value = sf_context_cache[obj_index];
|
||||
|
||||
/* Set index for next available cache entry. If all available
|
||||
* slots are filled, the index will be set to sf_context_cache_size
|
||||
* and cause a reallocation of the cache the next time a new
|
||||
* cache entry is created.
|
||||
*/
|
||||
next_idx = (size_t)obj_index + 1;
|
||||
while (next_idx < sf_context_cache_size && sf_context_cache[next_idx])
|
||||
next_idx++;
|
||||
|
||||
sf_context_cache_next_index = next_idx;
|
||||
}
|
||||
}
|
||||
else if (obj_type == SF_TOPOLOGY) {
|
||||
@ -310,13 +314,21 @@ H5_free_subfiling_object(int64_t object_id)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
|
||||
"couldn't get subfiling context for subfiling object ID");
|
||||
|
||||
if (H5_free_subfiling_object_int(sf_context) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling context object");
|
||||
if (sf_context->file_ref == 0 || --sf_context->file_ref == 0) {
|
||||
if (H5_free_subfiling_object_int(sf_context) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL,
|
||||
"couldn't free subfiling context object");
|
||||
|
||||
assert(sf_context_cache_num_entries > 0);
|
||||
assert(sf_context == sf_context_cache[sf_context_cache_num_entries - 1]);
|
||||
sf_context_cache[sf_context_cache_num_entries - 1] = NULL;
|
||||
sf_context_cache_num_entries--;
|
||||
for (size_t idx = 0; idx < sf_context_cache_size; idx++) {
|
||||
if (sf_context != sf_context_cache[idx])
|
||||
continue;
|
||||
|
||||
if (idx < sf_context_cache_next_index)
|
||||
sf_context_cache_next_index = idx;
|
||||
|
||||
sf_context_cache[idx] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (obj_type == SF_TOPOLOGY) {
|
||||
sf_topology_t *sf_topology;
|
||||
@ -358,6 +370,8 @@ H5_free_subfiling_object_int(subfiling_context_t *sf_context)
|
||||
|
||||
sf_context->sf_context_id = -1;
|
||||
sf_context->h5_file_id = UINT64_MAX;
|
||||
sf_context->threads_inited = false;
|
||||
sf_context->file_ref = 0;
|
||||
sf_context->sf_num_fids = 0;
|
||||
sf_context->sf_num_subfiles = -1;
|
||||
sf_context->sf_write_count = 0;
|
||||
@ -611,23 +625,18 @@ done:
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: H5_open_subfiles
|
||||
*
|
||||
* Purpose: Wrapper for the internal 'open__subfiles' function
|
||||
* Similar to the other public wrapper functions, we
|
||||
* discover (via the sf_context) the number of io concentrators
|
||||
* and pass that to the internal function so that vector
|
||||
* storage arrays can be stack based rather than explicitly
|
||||
* allocated and freed.
|
||||
* Purpose: Initializes a subfiling context object for a file with the
|
||||
* given filename and file ID (inode) and opens the associated
|
||||
* subfiles. As part of this process, information about the
|
||||
* application topology will be gathered and stored in the
|
||||
* context object for future use. This includes identifying
|
||||
* which MPI ranks will act as I/O concentrators and "own"
|
||||
* one or more of the opened subfiles. The process of
|
||||
* initializing the subfiling context object also involves
|
||||
* creating MPI communicators that facilitate messaging
|
||||
* between HDF5 clients and the I/O concentrators.
|
||||
*
|
||||
* The Internal function is responsible for sending all IOC
|
||||
* instances, the (sub)file open requests.
|
||||
*
|
||||
* Prior to calling the internal open function, we initialize
|
||||
* a new subfiling context that contains topology info and
|
||||
* new MPI communicators that facilitate messaging between
|
||||
* HDF5 clients and the IOCs.
|
||||
*
|
||||
* Return: Success (0) or Failure (non-zero)
|
||||
* Errors: If MPI operations fail for some reason.
|
||||
* Return: Non-negative on success/Negative on failure
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -635,8 +644,9 @@ herr_t
|
||||
H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_params_t *subfiling_config,
|
||||
int file_acc_flags, MPI_Comm file_comm, int64_t *context_id_out)
|
||||
{
|
||||
subfiling_context_t *sf_context = NULL;
|
||||
int64_t context_id = -1;
|
||||
subfiling_context_t *sf_context = NULL;
|
||||
int64_t context_id = -1;
|
||||
bool recorded_fid = false;
|
||||
int mpi_code;
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
@ -649,21 +659,44 @@ H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_par
|
||||
if (!context_id_out)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling context ID pointer");
|
||||
|
||||
/* Initialize new subfiling context ID based on configuration information */
|
||||
if (init_subfiling(base_filename, file_id, subfiling_config, file_acc_flags, file_comm, &context_id) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context");
|
||||
|
||||
/* Retrieve the subfiling object for the newly-created context ID */
|
||||
if (NULL == (sf_context = H5_get_subfiling_object(context_id)))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't get subfiling object from context ID");
|
||||
|
||||
/*
|
||||
* If we're actually using the IOCs, we will
|
||||
* start the service threads on the identified
|
||||
* ranks as part of the subfile opening.
|
||||
/* Make sure open file mapping is initialized in case this
|
||||
* is the first file open call with the VFD
|
||||
*/
|
||||
if (open_subfile_with_context(sf_context, file_acc_flags) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "couldn't open subfiling subfiles");
|
||||
if (init_open_file_map() < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize open file mapping");
|
||||
|
||||
/* Check if this file is already open */
|
||||
if (H5_subfile_fid_to_context(file_id, &context_id) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
|
||||
"couldn't retrieve context ID from open file mapping");
|
||||
|
||||
if (context_id >= 0) {
|
||||
/* Retrieve the subfiling object for the cached context ID */
|
||||
if (NULL == (sf_context = H5_get_subfiling_object(context_id)))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
|
||||
"couldn't get subfiling object from context ID");
|
||||
}
|
||||
else {
|
||||
/* Initialize new subfiling context ID based on configuration information */
|
||||
if (init_subfiling(base_filename, file_id, subfiling_config, file_acc_flags, file_comm, &context_id) <
|
||||
0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context");
|
||||
|
||||
/* Retrieve the subfiling object for the newly-created context ID */
|
||||
if (NULL == (sf_context = H5_get_subfiling_object(context_id)))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL,
|
||||
"couldn't get subfiling object from context ID");
|
||||
|
||||
/*
|
||||
* If this rank is an I/O concentrator, actually open
|
||||
* the subfiles belonging to this IOC rank and start
|
||||
* the I/O service threads
|
||||
*/
|
||||
if (sf_context->topology->rank_is_ioc) {
|
||||
if (ioc_open_files(sf_context->sf_context_id, file_acc_flags) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile");
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef H5_SUBFILING_DEBUG
|
||||
{
|
||||
@ -689,6 +722,16 @@ H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_par
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Save the HDF5 file ID (e.g., inode) to subfile context mapping.
|
||||
* There shouldn't be any issue, but check the status and
|
||||
* return if there was a problem.
|
||||
*/
|
||||
if (record_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL,
|
||||
"couldn't record HDF5 file ID to subfile context mapping");
|
||||
recorded_fid = true;
|
||||
|
||||
*context_id_out = context_id;
|
||||
|
||||
done:
|
||||
@ -715,7 +758,9 @@ done:
|
||||
}
|
||||
|
||||
if (ret_value < 0) {
|
||||
clear_fid_map_entry(file_id, context_id);
|
||||
if (recorded_fid && clear_fid_map_entry(file_id, context_id) < 0)
|
||||
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL,
|
||||
"unable to clear entry from file ID to context mapping");
|
||||
|
||||
if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0)
|
||||
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object");
|
||||
@ -1802,6 +1847,8 @@ init_subfiling_context(subfiling_context_t *sf_context, const char *base_filenam
|
||||
assert(MPI_COMM_NULL != file_comm);
|
||||
|
||||
sf_context->h5_file_id = file_id;
|
||||
sf_context->threads_inited = false;
|
||||
sf_context->file_ref = 0;
|
||||
sf_context->sf_fids = NULL;
|
||||
sf_context->sf_num_fids = 0;
|
||||
sf_context->sf_num_subfiles = subfiling_config->stripe_count;
|
||||
@ -1938,71 +1985,38 @@ done:
|
||||
}
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: open_subfile_with_context
|
||||
* Function: init_open_file_map
|
||||
*
|
||||
* Purpose: While we cannot know a priori, whether an HDF client will
|
||||
* need to access data across the entirety of a file, e.g.
|
||||
* an individual MPI rank may read or write only small
|
||||
* segments of the entire file space; this function sends
|
||||
* a file OPEN_OP to every IO concentrator.
|
||||
* Purpose: Allocates and initializes an array that keeps a mapping
|
||||
* between a file's inode value (__ino_t st_ino) and the ID
|
||||
* of the context object associated with it.
|
||||
*
|
||||
* Prior to opening any subfiles, the H5FDopen will have
|
||||
* created an HDF5 file with the user specified naming.
|
||||
* A path prefix will be selected and is available as
|
||||
* an input argument.
|
||||
*
|
||||
* The opened HDF5 file handle will contain device and
|
||||
* inode values, these being constant for all processes
|
||||
* opening the shared file. The inode value is utilized
|
||||
* as a key value and is associated with the sf_context
|
||||
* which we receive as one of the input arguments.
|
||||
*
|
||||
* IO Concentrator threads will be initialized on MPI ranks
|
||||
* which have been identified via application toplogy
|
||||
* discovery. The number and mapping of IOC to MPI_rank
|
||||
* is part of the sf_context->topology structure.
|
||||
*
|
||||
* Return: Success (0) or Failure (non-zero)
|
||||
* Errors: If MPI operations fail for some reason.
|
||||
* Return: Non-negative on success/Negative on failure
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
static herr_t
|
||||
open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags)
|
||||
init_open_file_map(void)
|
||||
{
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
assert(sf_context);
|
||||
assert(sf_context->h5_file_id != UINT64_MAX);
|
||||
if (!sf_open_file_map) {
|
||||
if (NULL == (sf_open_file_map = malloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map))))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping");
|
||||
|
||||
/*
|
||||
* Save the HDF5 file ID (e.g., inode) to subfile context mapping.
|
||||
* There shouldn't be any issue, but check the status and
|
||||
* return if there was a problem.
|
||||
*/
|
||||
if (record_fid_to_subfile(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL,
|
||||
"couldn't record HDF5 file ID to subfile context mapping");
|
||||
|
||||
/*
|
||||
* If this rank is an I/O concentrator, actually open
|
||||
* the subfiles belonging to this IOC rank
|
||||
*/
|
||||
if (sf_context->topology->rank_is_ioc) {
|
||||
if (ioc_open_files(sf_context->sf_context_id, file_acc_flags) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile");
|
||||
sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES;
|
||||
for (int i = 0; i < sf_file_map_size; i++) {
|
||||
sf_open_file_map[i].file_id = UINT64_MAX;
|
||||
sf_open_file_map[i].sf_context_id = -1;
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
if (ret_value < 0) {
|
||||
clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id);
|
||||
}
|
||||
|
||||
H5_SUBFILING_FUNC_LEAVE;
|
||||
}
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: record_fid_to_subfile
|
||||
* Function: record_fid_map_entry
|
||||
*
|
||||
* Purpose: Every opened HDF5 file will have (if utilizing subfiling)
|
||||
* a subfiling context associated with it. It is important that
|
||||
@ -2020,37 +2034,35 @@ done:
|
||||
* This function simply records the filesystem handle to
|
||||
* subfiling context mapping.
|
||||
*
|
||||
* Return: SUCCEED or FAIL.
|
||||
* Errors: FAILs ONLY if storage for the mapping entry cannot
|
||||
* be allocated.
|
||||
* Return: Non-negative on success/Negative on failure
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
static herr_t
|
||||
record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_index)
|
||||
record_fid_map_entry(uint64_t file_id, int64_t subfile_context_id, int *next_index)
|
||||
{
|
||||
int index;
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
if (!sf_open_file_map) {
|
||||
if (NULL == (sf_open_file_map = malloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map))))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping");
|
||||
|
||||
sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES;
|
||||
for (int i = 0; i < sf_file_map_size; i++) {
|
||||
sf_open_file_map[i].file_id = UINT64_MAX;
|
||||
sf_open_file_map[i].sf_context_id = -1;
|
||||
}
|
||||
}
|
||||
subfiling_context_t *sf_context = NULL;
|
||||
int index;
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
for (index = 0; index < sf_file_map_size; index++) {
|
||||
if (sf_open_file_map[index].file_id == file_id)
|
||||
if (sf_open_file_map[index].file_id == file_id) {
|
||||
/* Increment file ref. count for this context */
|
||||
if (NULL == (sf_context = H5_get_subfiling_object(sf_open_file_map[index].sf_context_id)))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context");
|
||||
sf_context->file_ref++;
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (sf_open_file_map[index].file_id == UINT64_MAX) {
|
||||
sf_open_file_map[index].file_id = file_id;
|
||||
sf_open_file_map[index].sf_context_id = subfile_context_id;
|
||||
|
||||
/* First open of this file - set file ref. count to 1 for this context */
|
||||
if (NULL == (sf_context = H5_get_subfiling_object(subfile_context_id)))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context");
|
||||
sf_context->file_ref = 1;
|
||||
|
||||
if (next_index) {
|
||||
*next_index = index;
|
||||
}
|
||||
@ -2078,8 +2090,13 @@ record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_in
|
||||
*next_index = index;
|
||||
}
|
||||
|
||||
sf_open_file_map[index].file_id = file_id;
|
||||
sf_open_file_map[index++].sf_context_id = subfile_context_id;
|
||||
sf_open_file_map[index].file_id = file_id;
|
||||
sf_open_file_map[index].sf_context_id = subfile_context_id;
|
||||
|
||||
/* First open of this file - set file ref. count to 1 for this context */
|
||||
if (NULL == (sf_context = H5_get_subfiling_object(subfile_context_id)))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context");
|
||||
sf_context->file_ref = 1;
|
||||
}
|
||||
|
||||
done:
|
||||
@ -2092,24 +2109,40 @@ done:
|
||||
* Purpose: Remove the map entry associated with the file->inode.
|
||||
* This is done at file close.
|
||||
*
|
||||
* Return: None
|
||||
* Errors: Cannot fail.
|
||||
* Return: Non-negative on success/Negative on failure
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
static void
|
||||
static herr_t
|
||||
clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id)
|
||||
{
|
||||
if (sf_open_file_map) {
|
||||
for (int i = 0; i < sf_file_map_size; i++) {
|
||||
if ((sf_open_file_map[i].file_id == file_id) &&
|
||||
(sf_open_file_map[i].sf_context_id == sf_context_id)) {
|
||||
sf_open_file_map[i].file_id = UINT64_MAX;
|
||||
sf_open_file_map[i].sf_context_id = -1;
|
||||
return;
|
||||
}
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
if (!sf_open_file_map)
|
||||
H5_SUBFILING_GOTO_DONE(SUCCEED);
|
||||
|
||||
for (int i = 0; i < sf_file_map_size; i++) {
|
||||
subfiling_context_t *sf_context = NULL;
|
||||
|
||||
if ((sf_open_file_map[i].file_id != file_id) || (sf_open_file_map[i].sf_context_id != sf_context_id))
|
||||
continue;
|
||||
|
||||
/* Only clear map entry if this is the last file
|
||||
* holding a reference to the context
|
||||
*/
|
||||
if (NULL == (sf_context = H5_get_subfiling_object(sf_context_id)))
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context object");
|
||||
|
||||
if (sf_context->file_ref == 0 || sf_context->file_ref == 1) {
|
||||
sf_open_file_map[i].file_id = UINT64_MAX;
|
||||
sf_open_file_map[i].sf_context_id = -1;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
done:
|
||||
return ret_value;
|
||||
} /* end clear_fid_map_entry() */
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
@ -3013,27 +3046,29 @@ done:
|
||||
* Function: H5_subfile_fid_to_context
|
||||
*
|
||||
* Purpose: This is a basic lookup function which returns the subfiling
|
||||
* context id associated with the specified file ID.
|
||||
* context ID associated with the specified file ID. If no
|
||||
* such context ID exists, `context_id_out` will be set to a
|
||||
* negative value.
|
||||
*
|
||||
* Return: Non-negative subfiling context ID if the context exists
|
||||
* Negative on failure or if the subfiling context doesn't
|
||||
* exist
|
||||
* Return: Non-negative on success/Negative on failure
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
int64_t
|
||||
H5_subfile_fid_to_context(uint64_t file_id)
|
||||
herr_t
|
||||
H5_subfile_fid_to_context(uint64_t file_id, int64_t *context_id_out)
|
||||
{
|
||||
int64_t ret_value = -1;
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
if (!sf_open_file_map)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, -1, "open file map is NULL");
|
||||
assert(context_id_out);
|
||||
|
||||
for (int i = 0; i < sf_file_map_size; i++) {
|
||||
if (sf_open_file_map[i].file_id == file_id) {
|
||||
return sf_open_file_map[i].sf_context_id;
|
||||
}
|
||||
}
|
||||
*context_id_out = -1;
|
||||
|
||||
if (init_open_file_map() < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize open file mapping");
|
||||
|
||||
for (int i = 0; i < sf_file_map_size; i++)
|
||||
if (sf_open_file_map[i].file_id == file_id)
|
||||
*context_id_out = sf_open_file_map[i].sf_context_id;
|
||||
|
||||
done:
|
||||
H5_SUBFILING_FUNC_LEAVE;
|
||||
@ -3095,15 +3130,17 @@ H5_subfiling_terminate(void)
|
||||
|
||||
/* Clean up subfiling context and topology caches */
|
||||
if (sf_context_cache) {
|
||||
for (size_t i = 0; i < sf_context_cache_num_entries; i++) {
|
||||
if (H5_free_subfiling_object_int(sf_context_cache[i]) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL,
|
||||
"couldn't free subfiling context object");
|
||||
sf_context_cache[i] = NULL;
|
||||
for (size_t i = 0; i < sf_context_cache_size; i++) {
|
||||
if (sf_context_cache[i]) {
|
||||
if (H5_free_subfiling_object_int(sf_context_cache[i]) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL,
|
||||
"couldn't free subfiling context object");
|
||||
sf_context_cache[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
sf_context_cache_size = 0;
|
||||
sf_context_cache_num_entries = 0;
|
||||
sf_context_cache_size = 0;
|
||||
sf_context_cache_next_index = 0;
|
||||
|
||||
free(sf_context_cache);
|
||||
sf_context_cache = NULL;
|
||||
|
@ -208,29 +208,31 @@ typedef struct topology {
|
||||
} sf_topology_t;
|
||||
|
||||
typedef struct {
|
||||
int64_t sf_context_id; /* Generated context ID which embeds the cache index */
|
||||
uint64_t h5_file_id; /* GUID (basically the inode value) */
|
||||
int *sf_fids; /* Array of file IDs for subfiles this rank owns */
|
||||
int sf_num_fids; /* Number of subfiles this rank owns */
|
||||
int sf_num_subfiles; /* Total number of subfiles for logical HDF5 file */
|
||||
size_t sf_write_count; /* Statistics: write_count */
|
||||
size_t sf_read_count; /* Statistics: read_count */
|
||||
haddr_t sf_eof; /* File eof */
|
||||
int64_t sf_stripe_size; /* Stripe-depth */
|
||||
int64_t sf_blocksize_per_stripe; /* Stripe-depth X n_IOCs */
|
||||
int64_t sf_base_addr; /* For an IOC, our base address */
|
||||
MPI_Comm sf_msg_comm; /* MPI comm used to send RPC msg */
|
||||
MPI_Comm sf_data_comm; /* MPI comm used to move data */
|
||||
MPI_Comm sf_eof_comm; /* MPI comm used to communicate EOF */
|
||||
MPI_Comm sf_node_comm; /* MPI comm used for intra-node comms */
|
||||
MPI_Comm sf_group_comm; /* Not used: for IOC collectives */
|
||||
int sf_group_size; /* IOC count (in sf_group_comm) */
|
||||
int sf_group_rank; /* IOC rank (in sf_group_comm) */
|
||||
char *subfile_prefix; /* If subfiles are node-local */
|
||||
char *config_file_prefix; /* Prefix added to config file name */
|
||||
char *h5_filename; /* The user supplied file name */
|
||||
void *ioc_data; /* Private data for underlying IOC */
|
||||
sf_topology_t *topology; /* Pointer to our topology */
|
||||
int64_t sf_context_id; /* Generated context ID which embeds the cache index */
|
||||
uint64_t h5_file_id; /* GUID (basically the inode value) */
|
||||
bool threads_inited; /* Whether the IOC threads for this context were started */
|
||||
int file_ref; /* Reference count held by files using this context */
|
||||
int *sf_fids; /* Array of file IDs for subfiles this rank owns */
|
||||
int sf_num_fids; /* Number of subfiles this rank owns */
|
||||
int sf_num_subfiles; /* Total number of subfiles for logical HDF5 file */
|
||||
size_t sf_write_count; /* Statistics: write_count */
|
||||
size_t sf_read_count; /* Statistics: read_count */
|
||||
haddr_t sf_eof; /* File eof */
|
||||
int64_t sf_stripe_size; /* Stripe-depth */
|
||||
int64_t sf_blocksize_per_stripe; /* Stripe-depth X n_IOCs */
|
||||
int64_t sf_base_addr; /* For an IOC, our base address */
|
||||
MPI_Comm sf_msg_comm; /* MPI comm used to send RPC msg */
|
||||
MPI_Comm sf_data_comm; /* MPI comm used to move data */
|
||||
MPI_Comm sf_eof_comm; /* MPI comm used to communicate EOF */
|
||||
MPI_Comm sf_node_comm; /* MPI comm used for intra-node comms */
|
||||
MPI_Comm sf_group_comm; /* Not used: for IOC collectives */
|
||||
int sf_group_size; /* IOC count (in sf_group_comm) */
|
||||
int sf_group_rank; /* IOC rank (in sf_group_comm) */
|
||||
char *subfile_prefix; /* If subfiles are node-local */
|
||||
char *config_file_prefix; /* Prefix added to config file name */
|
||||
char *h5_filename; /* The user supplied file name */
|
||||
void *ioc_data; /* Private data for underlying IOC */
|
||||
sf_topology_t *topology; /* Pointer to our topology */
|
||||
|
||||
#ifdef H5_SUBFILING_DEBUG
|
||||
char sf_logfile_name[PATH_MAX];
|
||||
@ -274,12 +276,12 @@ H5_DLL herr_t H5_get_subfiling_config_from_file(FILE *config_file, int64_t *str
|
||||
int64_t *num_subfiles);
|
||||
H5_DLL herr_t H5_resolve_pathname(const char *filepath, MPI_Comm comm, char **resolved_filepath);
|
||||
|
||||
H5_DLL herr_t H5_subfiling_set_config_prop(H5P_genplist_t *plist_ptr,
|
||||
const H5FD_subfiling_params_t *vfd_config);
|
||||
H5_DLL herr_t H5_subfiling_get_config_prop(H5P_genplist_t *plist_ptr, H5FD_subfiling_params_t *vfd_config);
|
||||
H5_DLL herr_t H5_subfiling_set_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t file_id);
|
||||
H5_DLL herr_t H5_subfiling_get_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t *file_id);
|
||||
H5_DLL int64_t H5_subfile_fid_to_context(uint64_t file_id);
|
||||
H5_DLL herr_t H5_subfiling_set_config_prop(H5P_genplist_t *plist_ptr,
|
||||
const H5FD_subfiling_params_t *vfd_config);
|
||||
H5_DLL herr_t H5_subfiling_get_config_prop(H5P_genplist_t *plist_ptr, H5FD_subfiling_params_t *vfd_config);
|
||||
H5_DLL herr_t H5_subfiling_set_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t file_id);
|
||||
H5_DLL herr_t H5_subfiling_get_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t *file_id);
|
||||
H5_DLL herr_t H5_subfile_fid_to_context(uint64_t file_id, int64_t *context_id_out);
|
||||
|
||||
H5_DLL herr_t H5_subfiling_validate_config(const H5FD_subfiling_params_t *subf_config);
|
||||
|
||||
|
@ -29,9 +29,9 @@
|
||||
static MPI_Comm comm = MPI_COMM_WORLD;
|
||||
static MPI_Info info = MPI_INFO_NULL;
|
||||
|
||||
bool pass = true; /* set to false on error */
|
||||
bool disp_failure_mssgs = true; /* global force display of failure messages */
|
||||
const char *failure_mssg = NULL;
|
||||
static bool pass = true; /* set to false on error */
|
||||
static bool disp_failure_mssgs = true; /* global force display of failure messages */
|
||||
static const char *failure_mssg = NULL;
|
||||
|
||||
const char *FILENAMES[] = {"mpio_vfd_test_file_0", /*0*/
|
||||
"mpio_vfd_test_file_1", /*1*/
|
||||
|
Loading…
Reference in New Issue
Block a user