From c4cc33abe14d51d4e5b3f75bba24e9523d681d19 Mon Sep 17 00:00:00 2001 From: jhendersonHDF Date: Thu, 21 Mar 2024 19:39:13 -0500 Subject: [PATCH] Fix issue with Subfiling VFD and multiple opens of same file (#4194) * Fix issue with Subfiling VFD and multiple opens of same file * Update H5_subfile_fid_to_context to return error value instead of ID * Add helper routine to initialize open file mapping --- release_docs/RELEASE.txt | 10 + src/H5FDsubfiling/H5FDioc.c | 29 +- src/H5FDsubfiling/H5FDsubfiling.c | 4 +- src/H5FDsubfiling/H5subfiling_common.c | 355 ++++++++++++++----------- src/H5FDsubfiling/H5subfiling_common.h | 60 +++-- testpar/t_vfd.c | 6 +- 6 files changed, 264 insertions(+), 200 deletions(-) diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index 4d525ba263..fbd6b78e54 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -693,6 +693,16 @@ Bug Fixes since HDF5-1.14.0 release Library ------- + - Fixed an issue with the Subfiling VFD and multiple opens of a + file + + An issue with the way the Subfiling VFD handles multiple opens + of the same file caused the file structures for the extra opens + to occasionally get mapped to an incorrect subfiling context + object. The VFD now correctly maps the file structures for + additional opens of an already open file to the same context + object. + - Fixed a bug that causes the library to incorrectly identify the endian-ness of 16-bit and smaller C floating-point datatypes diff --git a/src/H5FDsubfiling/H5FDioc.c b/src/H5FDsubfiling/H5FDioc.c index f465d6f5a3..f43f638ff0 100644 --- a/src/H5FDsubfiling/H5FDioc.c +++ b/src/H5FDsubfiling/H5FDioc.c @@ -843,12 +843,17 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr) H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiles for file '%s'", name); - /* Initialize I/O concentrator threads if this MPI rank is an I/O concentrator */ + /* + * Initialize I/O concentrator threads if this MPI rank is an I/O + * concentrator and the threads haven't already been initialized by + * a different open of this file + */ sf_context = H5_get_subfiling_object(file_ptr->context_id); - if (sf_context && sf_context->topology->rank_is_ioc) { + if (sf_context && sf_context->topology->rank_is_ioc && !sf_context->threads_inited) { if (initialize_ioc_threads(sf_context) < 0) H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTINIT, NULL, "unable to initialize I/O concentrator threads"); + sf_context->threads_inited = true; } ret_value = (H5FD_t *)file_ptr; @@ -917,14 +922,22 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr) if (MPI_SUCCESS != (mpi_code = MPI_Barrier(file_ptr->comm))) H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code); - if (sf_context && sf_context->topology->rank_is_ioc) { - if (finalize_ioc_threads(sf_context) < 0) - /* Note that closing of subfiles is collective */ - H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, "unable to finalize IOC threads"); + /* Only finalize IOC threads and close subfiles if this is + * the last file holding a reference to the context + */ + if (sf_context && sf_context->file_ref == 1) { + if (sf_context->topology->rank_is_ioc && sf_context->threads_inited) { + if (finalize_ioc_threads(sf_context) < 0) + /* Note that closing of subfiles is collective */ + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, + "unable to finalize IOC threads"); + } + + if (H5_close_subfiles(file_ptr->context_id, file_ptr->comm) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, + "unable to close subfiling file(s)"); } - if (H5_close_subfiles(file_ptr->context_id, file_ptr->comm) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTCLOSEFILE, FAIL, "unable to close subfiling file(s)"); file_ptr->context_id = -1; } diff --git a/src/H5FDsubfiling/H5FDsubfiling.c b/src/H5FDsubfiling/H5FDsubfiling.c index 9594f676a8..9b37463fec 100644 --- a/src/H5FDsubfiling/H5FDsubfiling.c +++ b/src/H5FDsubfiling/H5FDsubfiling.c @@ -1244,7 +1244,9 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma if (driver->value == H5_VFD_IOC) { /* Get a copy of the context ID for later use */ - file_ptr->context_id = H5_subfile_fid_to_context(file_ptr->file_id); + if (H5_subfile_fid_to_context(file_ptr->file_id, &file_ptr->context_id) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, + "unable to retrieve subfiling context ID for this file"); file_ptr->fa.require_ioc = true; } else if (driver->value == H5_VFD_SEC2) { diff --git a/src/H5FDsubfiling/H5subfiling_common.c b/src/H5FDsubfiling/H5subfiling_common.c index aecc86894b..0493603f82 100644 --- a/src/H5FDsubfiling/H5subfiling_common.c +++ b/src/H5FDsubfiling/H5subfiling_common.c @@ -38,7 +38,7 @@ static sf_topology_t **sf_topology_cache = NULL; static size_t sf_context_cache_size = 0; static size_t sf_topology_cache_size = 0; -static size_t sf_context_cache_num_entries = 0; +static size_t sf_context_cache_next_index = 0; static size_t sf_topology_cache_num_entries = 0; static file_map_to_context_t *sf_open_file_map = NULL; @@ -67,9 +67,9 @@ static herr_t identify_ioc_ranks(int64_t sf_context_id, sf_topology_t *app_topol static herr_t init_subfiling_context(subfiling_context_t *sf_context, const char *base_filename, uint64_t file_id, H5FD_subfiling_params_t *subfiling_config, sf_topology_t *app_topology, MPI_Comm file_comm); -static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags); -static herr_t record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_index); -static void clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id); +static herr_t init_open_file_map(void); +static herr_t record_fid_map_entry(uint64_t file_id, int64_t subfile_context_id, int *next_index); +static herr_t clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id); static herr_t ioc_open_files(int64_t file_context_id, int file_acc_flags); static herr_t create_config_file(subfiling_context_t *sf_context, const char *base_filename, const char *config_dir, const char *subfile_dir, bool truncate_if_exists); @@ -92,7 +92,7 @@ H5_new_subfiling_object_id(sf_obj_type_t obj_type) int64_t index_val = 0; if (obj_type == SF_CONTEXT) { - index_val = (int64_t)sf_context_cache_num_entries; + index_val = (int64_t)sf_context_cache_next_index; } else if (obj_type == SF_TOPOLOGY) { index_val = (int64_t)sf_topology_cache_num_entries; @@ -156,8 +156,8 @@ H5_get_subfiling_object(int64_t object_id) if (NULL == (sf_context_cache = calloc(DEFAULT_CONTEXT_CACHE_SIZE, sizeof(*sf_context_cache)))) H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "couldn't allocate space for subfiling context cache"); - sf_context_cache_size = DEFAULT_CONTEXT_CACHE_SIZE; - sf_context_cache_num_entries = 0; + sf_context_cache_size = DEFAULT_CONTEXT_CACHE_SIZE; + sf_context_cache_next_index = 0; } /* Make more space in context cache if needed */ @@ -166,7 +166,7 @@ H5_get_subfiling_object(int64_t object_id) size_t new_size; void *tmp_realloc; - old_num_entries = sf_context_cache_num_entries; + old_num_entries = sf_context_cache_size; new_size = (sf_context_cache_size * 3) / 2; @@ -188,23 +188,27 @@ H5_get_subfiling_object(int64_t object_id) assert((size_t)obj_index < sf_context_cache_size); } - /* - * Since this cache currently just keeps all entries until - * application exit, context entry indices should just be - * consecutive - */ - assert((size_t)obj_index <= sf_context_cache_num_entries); - if ((size_t)obj_index < sf_context_cache_num_entries) - ret_value = sf_context_cache[obj_index]; - else { - assert(!sf_context_cache[sf_context_cache_num_entries]); + ret_value = sf_context_cache[obj_index]; + if (!ret_value) { + size_t next_idx; /* Allocate a new subfiling context object */ - if (NULL == (ret_value = calloc(1, sizeof(subfiling_context_t)))) + if (NULL == (sf_context_cache[obj_index] = calloc(1, sizeof(subfiling_context_t)))) H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, NULL, "couldn't allocate subfiling context object"); - sf_context_cache[sf_context_cache_num_entries++] = ret_value; + ret_value = sf_context_cache[obj_index]; + + /* Set index for next available cache entry. If all available + * slots are filled, the index will be set to sf_context_cache_size + * and cause a reallocation of the cache the next time a new + * cache entry is created. + */ + next_idx = (size_t)obj_index + 1; + while (next_idx < sf_context_cache_size && sf_context_cache[next_idx]) + next_idx++; + + sf_context_cache_next_index = next_idx; } } else if (obj_type == SF_TOPOLOGY) { @@ -310,13 +314,21 @@ H5_free_subfiling_object(int64_t object_id) H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context for subfiling object ID"); - if (H5_free_subfiling_object_int(sf_context) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling context object"); + if (sf_context->file_ref == 0 || --sf_context->file_ref == 0) { + if (H5_free_subfiling_object_int(sf_context) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, + "couldn't free subfiling context object"); - assert(sf_context_cache_num_entries > 0); - assert(sf_context == sf_context_cache[sf_context_cache_num_entries - 1]); - sf_context_cache[sf_context_cache_num_entries - 1] = NULL; - sf_context_cache_num_entries--; + for (size_t idx = 0; idx < sf_context_cache_size; idx++) { + if (sf_context != sf_context_cache[idx]) + continue; + + if (idx < sf_context_cache_next_index) + sf_context_cache_next_index = idx; + + sf_context_cache[idx] = NULL; + } + } } else if (obj_type == SF_TOPOLOGY) { sf_topology_t *sf_topology; @@ -358,6 +370,8 @@ H5_free_subfiling_object_int(subfiling_context_t *sf_context) sf_context->sf_context_id = -1; sf_context->h5_file_id = UINT64_MAX; + sf_context->threads_inited = false; + sf_context->file_ref = 0; sf_context->sf_num_fids = 0; sf_context->sf_num_subfiles = -1; sf_context->sf_write_count = 0; @@ -611,23 +625,18 @@ done: /*------------------------------------------------------------------------- * Function: H5_open_subfiles * - * Purpose: Wrapper for the internal 'open__subfiles' function - * Similar to the other public wrapper functions, we - * discover (via the sf_context) the number of io concentrators - * and pass that to the internal function so that vector - * storage arrays can be stack based rather than explicitly - * allocated and freed. + * Purpose: Initializes a subfiling context object for a file with the + * given filename and file ID (inode) and opens the associated + * subfiles. As part of this process, information about the + * application topology will be gathered and stored in the + * context object for future use. This includes identifying + * which MPI ranks will act as I/O concentrators and "own" + * one or more of the opened subfiles. The process of + * initializing the subfiling context object also involves + * creating MPI communicators that facilitate messaging + * between HDF5 clients and the I/O concentrators. * - * The Internal function is responsible for sending all IOC - * instances, the (sub)file open requests. - * - * Prior to calling the internal open function, we initialize - * a new subfiling context that contains topology info and - * new MPI communicators that facilitate messaging between - * HDF5 clients and the IOCs. - * - * Return: Success (0) or Failure (non-zero) - * Errors: If MPI operations fail for some reason. + * Return: Non-negative on success/Negative on failure * *------------------------------------------------------------------------- */ @@ -635,8 +644,9 @@ herr_t H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_params_t *subfiling_config, int file_acc_flags, MPI_Comm file_comm, int64_t *context_id_out) { - subfiling_context_t *sf_context = NULL; - int64_t context_id = -1; + subfiling_context_t *sf_context = NULL; + int64_t context_id = -1; + bool recorded_fid = false; int mpi_code; herr_t ret_value = SUCCEED; @@ -649,21 +659,44 @@ H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_par if (!context_id_out) H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "invalid subfiling context ID pointer"); - /* Initialize new subfiling context ID based on configuration information */ - if (init_subfiling(base_filename, file_id, subfiling_config, file_acc_flags, file_comm, &context_id) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context"); - - /* Retrieve the subfiling object for the newly-created context ID */ - if (NULL == (sf_context = H5_get_subfiling_object(context_id))) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't get subfiling object from context ID"); - - /* - * If we're actually using the IOCs, we will - * start the service threads on the identified - * ranks as part of the subfile opening. + /* Make sure open file mapping is initialized in case this + * is the first file open call with the VFD */ - if (open_subfile_with_context(sf_context, file_acc_flags) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "couldn't open subfiling subfiles"); + if (init_open_file_map() < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize open file mapping"); + + /* Check if this file is already open */ + if (H5_subfile_fid_to_context(file_id, &context_id) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't retrieve context ID from open file mapping"); + + if (context_id >= 0) { + /* Retrieve the subfiling object for the cached context ID */ + if (NULL == (sf_context = H5_get_subfiling_object(context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't get subfiling object from context ID"); + } + else { + /* Initialize new subfiling context ID based on configuration information */ + if (init_subfiling(base_filename, file_id, subfiling_config, file_acc_flags, file_comm, &context_id) < + 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize subfiling context"); + + /* Retrieve the subfiling object for the newly-created context ID */ + if (NULL == (sf_context = H5_get_subfiling_object(context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, + "couldn't get subfiling object from context ID"); + + /* + * If this rank is an I/O concentrator, actually open + * the subfiles belonging to this IOC rank and start + * the I/O service threads + */ + if (sf_context->topology->rank_is_ioc) { + if (ioc_open_files(sf_context->sf_context_id, file_acc_flags) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile"); + } + } #ifdef H5_SUBFILING_DEBUG { @@ -689,6 +722,16 @@ H5_open_subfiles(const char *base_filename, uint64_t file_id, H5FD_subfiling_par } #endif + /* + * Save the HDF5 file ID (e.g., inode) to subfile context mapping. + * There shouldn't be any issue, but check the status and + * return if there was a problem. + */ + if (record_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, + "couldn't record HDF5 file ID to subfile context mapping"); + recorded_fid = true; + *context_id_out = context_id; done: @@ -715,7 +758,9 @@ done: } if (ret_value < 0) { - clear_fid_map_entry(file_id, context_id); + if (recorded_fid && clear_fid_map_entry(file_id, context_id) < 0) + H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, + "unable to clear entry from file ID to context mapping"); if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "couldn't free subfiling object"); @@ -1802,6 +1847,8 @@ init_subfiling_context(subfiling_context_t *sf_context, const char *base_filenam assert(MPI_COMM_NULL != file_comm); sf_context->h5_file_id = file_id; + sf_context->threads_inited = false; + sf_context->file_ref = 0; sf_context->sf_fids = NULL; sf_context->sf_num_fids = 0; sf_context->sf_num_subfiles = subfiling_config->stripe_count; @@ -1938,71 +1985,38 @@ done: } /*------------------------------------------------------------------------- - * Function: open_subfile_with_context + * Function: init_open_file_map * - * Purpose: While we cannot know a priori, whether an HDF client will - * need to access data across the entirety of a file, e.g. - * an individual MPI rank may read or write only small - * segments of the entire file space; this function sends - * a file OPEN_OP to every IO concentrator. + * Purpose: Allocates and initializes an array that keeps a mapping + * between a file's inode value (__ino_t st_ino) and the ID + * of the context object associated with it. * - * Prior to opening any subfiles, the H5FDopen will have - * created an HDF5 file with the user specified naming. - * A path prefix will be selected and is available as - * an input argument. - * - * The opened HDF5 file handle will contain device and - * inode values, these being constant for all processes - * opening the shared file. The inode value is utilized - * as a key value and is associated with the sf_context - * which we receive as one of the input arguments. - * - * IO Concentrator threads will be initialized on MPI ranks - * which have been identified via application toplogy - * discovery. The number and mapping of IOC to MPI_rank - * is part of the sf_context->topology structure. - * - * Return: Success (0) or Failure (non-zero) - * Errors: If MPI operations fail for some reason. + * Return: Non-negative on success/Negative on failure * *------------------------------------------------------------------------- */ static herr_t -open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags) +init_open_file_map(void) { herr_t ret_value = SUCCEED; - assert(sf_context); - assert(sf_context->h5_file_id != UINT64_MAX); + if (!sf_open_file_map) { + if (NULL == (sf_open_file_map = malloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map)))) + H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping"); - /* - * Save the HDF5 file ID (e.g., inode) to subfile context mapping. - * There shouldn't be any issue, but check the status and - * return if there was a problem. - */ - if (record_fid_to_subfile(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, - "couldn't record HDF5 file ID to subfile context mapping"); - - /* - * If this rank is an I/O concentrator, actually open - * the subfiles belonging to this IOC rank - */ - if (sf_context->topology->rank_is_ioc) { - if (ioc_open_files(sf_context->sf_context_id, file_acc_flags) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTOPENFILE, FAIL, "IOC couldn't open subfile"); + sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES; + for (int i = 0; i < sf_file_map_size; i++) { + sf_open_file_map[i].file_id = UINT64_MAX; + sf_open_file_map[i].sf_context_id = -1; + } } done: - if (ret_value < 0) { - clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id); - } - H5_SUBFILING_FUNC_LEAVE; } /*------------------------------------------------------------------------- - * Function: record_fid_to_subfile + * Function: record_fid_map_entry * * Purpose: Every opened HDF5 file will have (if utilizing subfiling) * a subfiling context associated with it. It is important that @@ -2020,37 +2034,35 @@ done: * This function simply records the filesystem handle to * subfiling context mapping. * - * Return: SUCCEED or FAIL. - * Errors: FAILs ONLY if storage for the mapping entry cannot - * be allocated. + * Return: Non-negative on success/Negative on failure * *------------------------------------------------------------------------- */ static herr_t -record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_index) +record_fid_map_entry(uint64_t file_id, int64_t subfile_context_id, int *next_index) { - int index; - herr_t ret_value = SUCCEED; - - if (!sf_open_file_map) { - if (NULL == (sf_open_file_map = malloc((size_t)DEFAULT_FILE_MAP_ENTRIES * sizeof(*sf_open_file_map)))) - H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "couldn't allocate open file mapping"); - - sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES; - for (int i = 0; i < sf_file_map_size; i++) { - sf_open_file_map[i].file_id = UINT64_MAX; - sf_open_file_map[i].sf_context_id = -1; - } - } + subfiling_context_t *sf_context = NULL; + int index; + herr_t ret_value = SUCCEED; for (index = 0; index < sf_file_map_size; index++) { - if (sf_open_file_map[index].file_id == file_id) + if (sf_open_file_map[index].file_id == file_id) { + /* Increment file ref. count for this context */ + if (NULL == (sf_context = H5_get_subfiling_object(sf_open_file_map[index].sf_context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context"); + sf_context->file_ref++; goto done; + } if (sf_open_file_map[index].file_id == UINT64_MAX) { sf_open_file_map[index].file_id = file_id; sf_open_file_map[index].sf_context_id = subfile_context_id; + /* First open of this file - set file ref. count to 1 for this context */ + if (NULL == (sf_context = H5_get_subfiling_object(subfile_context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context"); + sf_context->file_ref = 1; + if (next_index) { *next_index = index; } @@ -2078,8 +2090,13 @@ record_fid_to_subfile(uint64_t file_id, int64_t subfile_context_id, int *next_in *next_index = index; } - sf_open_file_map[index].file_id = file_id; - sf_open_file_map[index++].sf_context_id = subfile_context_id; + sf_open_file_map[index].file_id = file_id; + sf_open_file_map[index].sf_context_id = subfile_context_id; + + /* First open of this file - set file ref. count to 1 for this context */ + if (NULL == (sf_context = H5_get_subfiling_object(subfile_context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context"); + sf_context->file_ref = 1; } done: @@ -2092,24 +2109,40 @@ done: * Purpose: Remove the map entry associated with the file->inode. * This is done at file close. * - * Return: None - * Errors: Cannot fail. + * Return: Non-negative on success/Negative on failure * *------------------------------------------------------------------------- */ -static void +static herr_t clear_fid_map_entry(uint64_t file_id, int64_t sf_context_id) { - if (sf_open_file_map) { - for (int i = 0; i < sf_file_map_size; i++) { - if ((sf_open_file_map[i].file_id == file_id) && - (sf_open_file_map[i].sf_context_id == sf_context_id)) { - sf_open_file_map[i].file_id = UINT64_MAX; - sf_open_file_map[i].sf_context_id = -1; - return; - } + herr_t ret_value = SUCCEED; + + if (!sf_open_file_map) + H5_SUBFILING_GOTO_DONE(SUCCEED); + + for (int i = 0; i < sf_file_map_size; i++) { + subfiling_context_t *sf_context = NULL; + + if ((sf_open_file_map[i].file_id != file_id) || (sf_open_file_map[i].sf_context_id != sf_context_id)) + continue; + + /* Only clear map entry if this is the last file + * holding a reference to the context + */ + if (NULL == (sf_context = H5_get_subfiling_object(sf_context_id))) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "couldn't get subfiling context object"); + + if (sf_context->file_ref == 0 || sf_context->file_ref == 1) { + sf_open_file_map[i].file_id = UINT64_MAX; + sf_open_file_map[i].sf_context_id = -1; } + + break; } + +done: + return ret_value; } /* end clear_fid_map_entry() */ /*------------------------------------------------------------------------- @@ -3013,27 +3046,29 @@ done: * Function: H5_subfile_fid_to_context * * Purpose: This is a basic lookup function which returns the subfiling - * context id associated with the specified file ID. + * context ID associated with the specified file ID. If no + * such context ID exists, `context_id_out` will be set to a + * negative value. * - * Return: Non-negative subfiling context ID if the context exists - * Negative on failure or if the subfiling context doesn't - * exist + * Return: Non-negative on success/Negative on failure * *------------------------------------------------------------------------- */ -int64_t -H5_subfile_fid_to_context(uint64_t file_id) +herr_t +H5_subfile_fid_to_context(uint64_t file_id, int64_t *context_id_out) { - int64_t ret_value = -1; + herr_t ret_value = SUCCEED; - if (!sf_open_file_map) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, -1, "open file map is NULL"); + assert(context_id_out); - for (int i = 0; i < sf_file_map_size; i++) { - if (sf_open_file_map[i].file_id == file_id) { - return sf_open_file_map[i].sf_context_id; - } - } + *context_id_out = -1; + + if (init_open_file_map() < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTINIT, FAIL, "couldn't initialize open file mapping"); + + for (int i = 0; i < sf_file_map_size; i++) + if (sf_open_file_map[i].file_id == file_id) + *context_id_out = sf_open_file_map[i].sf_context_id; done: H5_SUBFILING_FUNC_LEAVE; @@ -3095,15 +3130,17 @@ H5_subfiling_terminate(void) /* Clean up subfiling context and topology caches */ if (sf_context_cache) { - for (size_t i = 0; i < sf_context_cache_num_entries; i++) { - if (H5_free_subfiling_object_int(sf_context_cache[i]) < 0) - H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, - "couldn't free subfiling context object"); - sf_context_cache[i] = NULL; + for (size_t i = 0; i < sf_context_cache_size; i++) { + if (sf_context_cache[i]) { + if (H5_free_subfiling_object_int(sf_context_cache[i]) < 0) + H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, + "couldn't free subfiling context object"); + sf_context_cache[i] = NULL; + } } - sf_context_cache_size = 0; - sf_context_cache_num_entries = 0; + sf_context_cache_size = 0; + sf_context_cache_next_index = 0; free(sf_context_cache); sf_context_cache = NULL; diff --git a/src/H5FDsubfiling/H5subfiling_common.h b/src/H5FDsubfiling/H5subfiling_common.h index 6b5cfa45c2..2c614cc680 100644 --- a/src/H5FDsubfiling/H5subfiling_common.h +++ b/src/H5FDsubfiling/H5subfiling_common.h @@ -208,29 +208,31 @@ typedef struct topology { } sf_topology_t; typedef struct { - int64_t sf_context_id; /* Generated context ID which embeds the cache index */ - uint64_t h5_file_id; /* GUID (basically the inode value) */ - int *sf_fids; /* Array of file IDs for subfiles this rank owns */ - int sf_num_fids; /* Number of subfiles this rank owns */ - int sf_num_subfiles; /* Total number of subfiles for logical HDF5 file */ - size_t sf_write_count; /* Statistics: write_count */ - size_t sf_read_count; /* Statistics: read_count */ - haddr_t sf_eof; /* File eof */ - int64_t sf_stripe_size; /* Stripe-depth */ - int64_t sf_blocksize_per_stripe; /* Stripe-depth X n_IOCs */ - int64_t sf_base_addr; /* For an IOC, our base address */ - MPI_Comm sf_msg_comm; /* MPI comm used to send RPC msg */ - MPI_Comm sf_data_comm; /* MPI comm used to move data */ - MPI_Comm sf_eof_comm; /* MPI comm used to communicate EOF */ - MPI_Comm sf_node_comm; /* MPI comm used for intra-node comms */ - MPI_Comm sf_group_comm; /* Not used: for IOC collectives */ - int sf_group_size; /* IOC count (in sf_group_comm) */ - int sf_group_rank; /* IOC rank (in sf_group_comm) */ - char *subfile_prefix; /* If subfiles are node-local */ - char *config_file_prefix; /* Prefix added to config file name */ - char *h5_filename; /* The user supplied file name */ - void *ioc_data; /* Private data for underlying IOC */ - sf_topology_t *topology; /* Pointer to our topology */ + int64_t sf_context_id; /* Generated context ID which embeds the cache index */ + uint64_t h5_file_id; /* GUID (basically the inode value) */ + bool threads_inited; /* Whether the IOC threads for this context were started */ + int file_ref; /* Reference count held by files using this context */ + int *sf_fids; /* Array of file IDs for subfiles this rank owns */ + int sf_num_fids; /* Number of subfiles this rank owns */ + int sf_num_subfiles; /* Total number of subfiles for logical HDF5 file */ + size_t sf_write_count; /* Statistics: write_count */ + size_t sf_read_count; /* Statistics: read_count */ + haddr_t sf_eof; /* File eof */ + int64_t sf_stripe_size; /* Stripe-depth */ + int64_t sf_blocksize_per_stripe; /* Stripe-depth X n_IOCs */ + int64_t sf_base_addr; /* For an IOC, our base address */ + MPI_Comm sf_msg_comm; /* MPI comm used to send RPC msg */ + MPI_Comm sf_data_comm; /* MPI comm used to move data */ + MPI_Comm sf_eof_comm; /* MPI comm used to communicate EOF */ + MPI_Comm sf_node_comm; /* MPI comm used for intra-node comms */ + MPI_Comm sf_group_comm; /* Not used: for IOC collectives */ + int sf_group_size; /* IOC count (in sf_group_comm) */ + int sf_group_rank; /* IOC rank (in sf_group_comm) */ + char *subfile_prefix; /* If subfiles are node-local */ + char *config_file_prefix; /* Prefix added to config file name */ + char *h5_filename; /* The user supplied file name */ + void *ioc_data; /* Private data for underlying IOC */ + sf_topology_t *topology; /* Pointer to our topology */ #ifdef H5_SUBFILING_DEBUG char sf_logfile_name[PATH_MAX]; @@ -274,12 +276,12 @@ H5_DLL herr_t H5_get_subfiling_config_from_file(FILE *config_file, int64_t *str int64_t *num_subfiles); H5_DLL herr_t H5_resolve_pathname(const char *filepath, MPI_Comm comm, char **resolved_filepath); -H5_DLL herr_t H5_subfiling_set_config_prop(H5P_genplist_t *plist_ptr, - const H5FD_subfiling_params_t *vfd_config); -H5_DLL herr_t H5_subfiling_get_config_prop(H5P_genplist_t *plist_ptr, H5FD_subfiling_params_t *vfd_config); -H5_DLL herr_t H5_subfiling_set_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t file_id); -H5_DLL herr_t H5_subfiling_get_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t *file_id); -H5_DLL int64_t H5_subfile_fid_to_context(uint64_t file_id); +H5_DLL herr_t H5_subfiling_set_config_prop(H5P_genplist_t *plist_ptr, + const H5FD_subfiling_params_t *vfd_config); +H5_DLL herr_t H5_subfiling_get_config_prop(H5P_genplist_t *plist_ptr, H5FD_subfiling_params_t *vfd_config); +H5_DLL herr_t H5_subfiling_set_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t file_id); +H5_DLL herr_t H5_subfiling_get_file_id_prop(H5P_genplist_t *plist_ptr, uint64_t *file_id); +H5_DLL herr_t H5_subfile_fid_to_context(uint64_t file_id, int64_t *context_id_out); H5_DLL herr_t H5_subfiling_validate_config(const H5FD_subfiling_params_t *subf_config); diff --git a/testpar/t_vfd.c b/testpar/t_vfd.c index 9faf1dbe7e..93fa4992b5 100644 --- a/testpar/t_vfd.c +++ b/testpar/t_vfd.c @@ -29,9 +29,9 @@ static MPI_Comm comm = MPI_COMM_WORLD; static MPI_Info info = MPI_INFO_NULL; -bool pass = true; /* set to false on error */ -bool disp_failure_mssgs = true; /* global force display of failure messages */ -const char *failure_mssg = NULL; +static bool pass = true; /* set to false on error */ +static bool disp_failure_mssgs = true; /* global force display of failure messages */ +static const char *failure_mssg = NULL; const char *FILENAMES[] = {"mpio_vfd_test_file_0", /*0*/ "mpio_vfd_test_file_1", /*1*/