Subfiling updates for release (#1963)

* Remove generated file h5fuse.sh

* Link pthreads library when Subfiling VFD is built

* Switch to MPI I/O driver for Subfiling HDF5 stub file

* Rough first implementation for Subfiling file deletion

* Subfiling VFD - get file dirname for file deletion

* Subfiling VFD - set lock callback to NULL for now to avoid performance
issues

* Committing clang-format changes

* Minor tidying up of Subfiling testing

* Fixups for Subfiling VFD support in tools

* Tidy up Subfiling public interface and add Doxygen

* Respect Subfiling configuration settings from application

* Add release note for Subfiling VFD

* Committing clang-format changes

* Committing clang-format changes

* Shorten some Subfiling environment variable names

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
This commit is contained in:
jhendersonHDF 2022-08-04 12:56:48 -05:00 committed by GitHub
parent a71534fcc2
commit bf07e0f2c9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 1012 additions and 582 deletions

View File

@ -88,7 +88,7 @@ $Source = "";
"H5_index_t" => "Ii",
"H5I_iterate_func_t" => "II",
"H5_iter_order_t" => "Io",
"ioc_selection_t" => "IO",
"H5FD_subfiling_ioc_select_t" => "IO",
"H5I_future_realize_func_t" => "IR",
"int" => "Is",
"int32_t" => "Is",

View File

@ -28,7 +28,7 @@ if (DOXYGEN_FOUND)
set (DOXYGEN_SEARCHENGINE_URL)
set (DOXYGEN_STRIP_FROM_PATH ${HDF5_SOURCE_DIR})
set (DOXYGEN_STRIP_FROM_INC_PATH ${HDF5_SOURCE_DIR})
set (DOXYGEN_PREDEFINED "H5_HAVE_DIRECT H5_HAVE_LIBHDFS H5_HAVE_MAP_API H5_HAVE_PARALLEL H5_HAVE_ROS3_VFD")
set (DOXYGEN_PREDEFINED "H5_HAVE_DIRECT H5_HAVE_LIBHDFS H5_HAVE_MAP_API H5_HAVE_PARALLEL H5_HAVE_ROS3_VFD H5_HAVE_SUBFILING_VFD H5_HAVE_IOC_VFD")
# This configure and individual custom targets work together
# Replace variables inside @@ with the current values

View File

@ -860,6 +860,7 @@ FILE_PATTERNS = H5*public.h \
H5FDdirect.h \
H5FDfamily.h \
H5FDhdfs.h \
H5FDioc.h \
H5FDlog.h \
H5FDmirror.h \
H5FDmpi.h \
@ -869,6 +870,7 @@ FILE_PATTERNS = H5*public.h \
H5FDsec2.h \
H5FDsplitter.h \
H5FDstdio.h \
H5FDsubfiling.h \
H5FDwindows.h \
H5VLconnector.h \
H5VLconnector_passthru.h \

View File

@ -85,7 +85,28 @@ New Features
Library:
--------
-
- Subfiling VFD
The HDF5 Subfiling VFD is a new MPI-based file driver that allows an
HDF5 application to distribute an HDF5 file across a collection of
"sub-files" in equal-sized data segment "stripes". I/O to the logical
HDF5 file is then directed to the appropriate "sub-file" according to
the Subfiling configuration and a system of I/O concentrators, which
are MPI ranks operating worker threads.
By allowing a configurable stripe size, number of I/O concentrators and
method for selecting MPI ranks as I/O concentrators, the Subfiling VFD
aims to enable an HDF5 application to find a middle ground between the
single shared file and file-per-process approaches to parallel file I/O
for the particular machine the application is running on. In general, the
goal is to avoid some of the complexity of the file-per-process approach
while also minimizing the locking issues of the single shared file approach
on a parallel file system.
The Subfiling VFD can be used by calling H5Pset_fapl_subfiling() on a
File Access Property List and using that FAPL for file operations.
(JTH - 2022/07/22)
Parallel Library:

View File

@ -16,6 +16,8 @@
* another underlying VFD. Maintains two files simultaneously.
*/
#include <libgen.h>
/* This source code file is part of the H5FD driver module */
#include "H5FDdrvr_module.h"
@ -25,7 +27,7 @@
#include "H5FDprivate.h" /* File drivers */
#include "H5FDioc.h" /* IOC file driver */
#include "H5FDioc_priv.h" /* IOC file driver */
#include "H5FDsec2.h" /* Sec2 VFD */
#include "H5FDmpio.h" /* MPI I/O VFD */
#include "H5FLprivate.h" /* Free Lists */
#include "H5Fprivate.h" /* File access */
#include "H5Iprivate.h" /* IDs */
@ -53,7 +55,7 @@ typedef struct H5FD_ioc_t {
int mpi_rank;
int mpi_size;
H5FD_t *ioc_file; /* native HDF5 file pointer (sec2) */
H5FD_t *ioc_file; /* native HDF5 file pointer */
int64_t context_id; /* The value used to lookup a subfiling context for the file */
@ -68,7 +70,6 @@ typedef struct H5FD_ioc_t {
* Windows code further below.
*/
dev_t device; /* file device number */
ino_t inode; /* file i-node number */
#else
/* Files in windows are uniquely identified by the volume serial
* number and the file index (both low and high parts).
@ -161,7 +162,7 @@ static herr_t H5FD__ioc_ctl(H5FD_t *file, uint64_t op_code, uint64_t flags,
const void *input, void **result);
*/
static herr_t H5FD__ioc_get_default_config(H5FD_ioc_config_t *config_out);
static herr_t H5FD__ioc_get_default_config(hid_t fapl_id, H5FD_ioc_config_t *config_out);
static herr_t H5FD__ioc_validate_config(const H5FD_ioc_config_t *fa);
static int H5FD__copy_plist(hid_t fapl_id, hid_t *id_out_ptr);
@ -358,13 +359,13 @@ H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
if (vfd_config == NULL) {
if (NULL == (ioc_conf = HDcalloc(1, sizeof(*ioc_conf))))
if (NULL == (ioc_conf = H5FL_CALLOC(H5FD_ioc_config_t)))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"can't allocate IOC VFD configuration");
ioc_conf->ioc_fapl_id = H5I_INVALID_HID;
ioc_conf->under_fapl_id = H5I_INVALID_HID;
/* Get IOC VFD defaults */
if (H5FD__ioc_get_default_config(ioc_conf) < 0)
if (H5FD__ioc_get_default_config(fapl_id, ioc_conf) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't get default IOC VFD configuration");
vfd_config = ioc_conf;
@ -377,9 +378,9 @@ H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config)
done:
if (ioc_conf) {
if (ioc_conf->ioc_fapl_id >= 0 && H5I_dec_ref(ioc_conf->ioc_fapl_id) < 0)
H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTDEC, FAIL, "can't close IOC FAPL");
HDfree(ioc_conf);
if (ioc_conf->under_fapl_id >= 0 && H5I_dec_ref(ioc_conf->under_fapl_id) < 0)
H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTDEC, FAIL, "can't close IOC under FAPL");
H5FL_FREE(H5FD_ioc_config_t, ioc_conf);
}
H5_SUBFILING_FUNC_LEAVE;
@ -423,7 +424,7 @@ H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out)
}
if (use_default_config) {
if (H5FD__ioc_get_default_config(config_out) < 0)
if (H5FD__ioc_get_default_config(fapl_id, config_out) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get default IOC VFD configuration");
}
else {
@ -431,8 +432,8 @@ H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out)
HDmemcpy(config_out, config_ptr, sizeof(H5FD_ioc_config_t));
/* Copy the driver info value */
if (H5FD__copy_plist(config_ptr->ioc_fapl_id, &(config_out->ioc_fapl_id)) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't copy IOC FAPL");
if (H5FD__copy_plist(config_ptr->under_fapl_id, &(config_out->under_fapl_id)) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, FAIL, "can't copy IOC under FAPL");
}
done:
@ -451,35 +452,53 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
H5FD__ioc_get_default_config(H5FD_ioc_config_t *config_out)
H5FD__ioc_get_default_config(hid_t fapl_id, H5FD_ioc_config_t *config_out)
{
herr_t ret_value = SUCCEED;
MPI_Comm comm = MPI_COMM_NULL;
MPI_Info info = MPI_INFO_NULL;
herr_t ret_value = SUCCEED;
HDassert(config_out);
HDmemset(config_out, 0, sizeof(*config_out));
config_out->magic = H5FD_IOC_FAPL_MAGIC;
config_out->version = H5FD_CURR_IOC_FAPL_VERSION;
config_out->ioc_fapl_id = H5I_INVALID_HID;
config_out->stripe_count = 0;
config_out->stripe_depth = H5FD_DEFAULT_STRIPE_DEPTH;
config_out->ioc_selection = SELECT_IOC_ONE_PER_NODE;
config_out->version = H5FD_IOC_CURR_FAPL_VERSION;
config_out->under_fapl_id = H5I_INVALID_HID;
/*
* Use default subfiling configuration. Do NOT call
* H5Pget_fapl_subfiling here as that can cause issues
*/
config_out->subf_config.ioc_selection = SELECT_IOC_ONE_PER_NODE;
config_out->subf_config.stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE;
config_out->subf_config.stripe_count = 0;
/* Create a default FAPL and choose an appropriate underlying driver */
if ((config_out->ioc_fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0)
if ((config_out->under_fapl_id = H5Pcreate(H5P_FILE_ACCESS)) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTCREATE, FAIL, "can't create default FAPL");
/* Currently, only sec2 vfd supported */
if (H5Pset_fapl_sec2(config_out->ioc_fapl_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set Sec2 VFD on IOC FAPL");
/* Check if any MPI parameters were set on the FAPL */
if (H5Pget_mpi_params(fapl_id, &comm, &info) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI Comm/Info");
if (comm == MPI_COMM_NULL)
comm = MPI_COMM_WORLD;
/* Hardwire MPI I/O VFD for now */
if (H5Pset_fapl_mpio(config_out->under_fapl_id, comm, info) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set MPI I/O VFD on IOC under FAPL");
/* Specific to this I/O Concentrator */
config_out->thread_pool_count = H5FD_IOC_THREAD_POOL_SIZE;
config_out->thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE;
done:
if (H5_mpi_comm_free(&comm) < 0)
H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTFREE, FAIL, "can't free MPI Communicator");
if (H5_mpi_info_free(&info) < 0)
H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTFREE, FAIL, "can't free MPI Info object");
if (ret_value < 0) {
if (config_out->ioc_fapl_id >= 0 && H5Pclose(config_out->ioc_fapl_id) < 0)
if (config_out->under_fapl_id >= 0 && H5Pclose(config_out->under_fapl_id) < 0)
H5_SUBFILING_DONE_ERROR(H5E_PLIST, H5E_CANTCLOSEOBJ, FAIL, "can't close FAPL");
}
@ -510,7 +529,7 @@ H5FD__ioc_validate_config(const H5FD_ioc_config_t *fa)
HDassert(fa != NULL);
if (fa->version != H5FD_CURR_IOC_FAPL_VERSION)
if (fa->version != H5FD_IOC_CURR_FAPL_VERSION)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_ioc_config_t version");
if (fa->magic != H5FD_IOC_FAPL_MAGIC)
@ -696,8 +715,8 @@ H5FD__ioc_fapl_copy(const void *_old_fa)
HDmemcpy(new_fa_ptr, old_fa_ptr, sizeof(H5FD_ioc_config_t));
/* Copy the FAPL */
if (H5FD__copy_plist(old_fa_ptr->ioc_fapl_id, &(new_fa_ptr->ioc_fapl_id)) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy the IOC FAPL");
if (H5FD__copy_plist(old_fa_ptr->under_fapl_id, &(new_fa_ptr->under_fapl_id)) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy the IOC under FAPL");
ret_value = (void *)new_fa_ptr;
@ -728,8 +747,8 @@ H5FD__ioc_fapl_free(void *_fapl)
/* Check arguments */
HDassert(fapl);
if (fapl->ioc_fapl_id >= 0 && H5I_dec_ref(fapl->ioc_fapl_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "can't close FAPL ID");
if (fapl->under_fapl_id >= 0 && H5I_dec_ref(fapl->under_fapl_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTDEC, FAIL, "can't close IOC under FAPL ID");
/* Free the property list */
fapl = H5FL_FREE(H5FD_ioc_config_t, fapl);
@ -774,10 +793,10 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
if (NULL == (file_ptr = (H5FD_ioc_t *)H5FL_CALLOC(H5FD_ioc_t)))
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTALLOC, NULL, "unable to allocate file struct");
file_ptr->comm = MPI_COMM_NULL;
file_ptr->info = MPI_INFO_NULL;
file_ptr->context_id = -1;
file_ptr->fa.ioc_fapl_id = H5I_INVALID_HID;
file_ptr->comm = MPI_COMM_NULL;
file_ptr->info = MPI_INFO_NULL;
file_ptr->context_id = -1;
file_ptr->fa.under_fapl_id = H5I_INVALID_HID;
/* Get the driver-specific file access properties */
if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(fapl_id)))
@ -814,7 +833,7 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
config_ptr = H5P_peek_driver_info(plist_ptr);
if (!config_ptr || (H5P_FILE_ACCESS_DEFAULT == fapl_id)) {
if (H5FD__ioc_get_default_config(&default_config) < 0)
if (H5FD__ioc_get_default_config(fapl_id, &default_config) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, NULL, "can't get default IOC VFD configuration");
config_ptr = &default_config;
}
@ -848,11 +867,11 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
}
/* Copy the ioc FAPL. */
if (H5FD__copy_plist(config_ptr->ioc_fapl_id, &(file_ptr->fa.ioc_fapl_id)) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy FAPL");
if (H5FD__copy_plist(config_ptr->under_fapl_id, &(file_ptr->fa.under_fapl_id)) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL, "can't copy IOC under FAPL");
/* Check the underlying driver (sec2/mpio/etc.) */
if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(config_ptr->ioc_fapl_id)))
if (NULL == (plist_ptr = (H5P_genplist_t *)H5I_object(config_ptr->under_fapl_id)))
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, NULL, "not a file access property list");
if (H5P_peek(plist_ptr, H5F_ACS_FILE_DRV_NAME, &driver_prop) < 0)
@ -861,13 +880,13 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_BADVALUE, NULL,
"invalid driver ID in file access property list");
if (driver->value != H5_VFD_SEC2) {
if (driver->value != H5_VFD_MPIO) {
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL,
"unable to open file '%s' - only Sec2 VFD is currently supported", name);
"unable to open file '%s' - only MPI I/O VFD is currently supported", name);
}
else {
subfiling_context_t *sf_context = NULL;
uint64_t inode_id = UINT64_MAX;
subfiling_context_t *sf_context = NULL;
void *file_handle = NULL;
int ioc_flags;
int l_error = 0;
int g_error = 0;
@ -881,34 +900,12 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
if (H5F_ACC_EXCL & flags)
ioc_flags |= O_EXCL;
file_ptr->ioc_file = H5FD_open(file_ptr->file_path, flags, config_ptr->ioc_fapl_id, HADDR_UNDEF);
file_ptr->ioc_file = H5FD_open(file_ptr->file_path, flags, config_ptr->under_fapl_id, HADDR_UNDEF);
if (file_ptr->ioc_file) {
h5_stat_t sb;
void *file_handle = NULL;
if (file_ptr->mpi_rank == 0) {
if (H5FDget_vfd_handle(file_ptr->ioc_file, config_ptr->ioc_fapl_id, &file_handle) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle");
if (HDfstat(*(int *)file_handle, &sb) < 0)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file");
HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t));
file_ptr->inode = sb.st_ino;
inode_id = (uint64_t)sb.st_ino;
}
if (MPI_SUCCESS != (mpi_code = MPI_Bcast(&inode_id, 1, MPI_UINT64_T, 0, file_ptr->comm)))
H5_SUBFILING_MPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code);
if (file_ptr->mpi_rank != 0)
file_ptr->inode = (ino_t)inode_id;
if (H5FDget_vfd_handle(file_ptr->ioc_file, config_ptr->under_fapl_id, &file_handle) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle");
}
else {
/* The two-step file opening approach may be
* the root cause for the sec2 open to return a NULL.
* It is prudent then, to collectively fail (early) in this case.
*/
l_error = 1;
}
@ -925,7 +922,7 @@ H5FD__ioc_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t maxaddr)
* context ID will be returned, which is used for
* further interactions with this file's subfiles.
*/
if (H5_open_subfiles(file_ptr->file_path, inode_id, file_ptr->fa.ioc_selection, ioc_flags,
if (H5_open_subfiles(file_ptr->file_path, file_handle, &file_ptr->fa.subf_config, ioc_flags,
file_ptr->comm, &file_ptr->context_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiles for file '%s'",
name);
@ -991,9 +988,9 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr)
}
#endif
if (file_ptr->fa.ioc_fapl_id >= 0 && H5I_dec_ref(file_ptr->fa.ioc_fapl_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_ARGS, FAIL, "can't close FAPL");
file_ptr->fa.ioc_fapl_id = H5I_INVALID_HID;
if (file_ptr->fa.under_fapl_id >= 0 && H5I_dec_ref(file_ptr->fa.under_fapl_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_ARGS, FAIL, "can't close IOC under FAPL");
file_ptr->fa.under_fapl_id = H5I_INVALID_HID;
/* Close underlying file */
if (file_ptr->ioc_file) {
@ -1331,7 +1328,7 @@ H5FD__ioc_get_handle(H5FD_t *_file, hid_t H5_ATTR_UNUSED fapl, void **file_handl
HDassert(file->ioc_file);
HDassert(file_handle);
if (H5FD_get_vfd_handle(file->ioc_file, file->fa.ioc_fapl_id, file_handle) < 0)
if (H5FD_get_vfd_handle(file->ioc_file, file->fa.under_fapl_id, file_handle) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, FAIL, "unable to get handle of R/W file");
done:
@ -1597,12 +1594,138 @@ done:
static herr_t
H5FD__ioc_del(const char *name, hid_t fapl)
{
herr_t ret_value = SUCCEED;
H5P_genplist_t *plist;
h5_stat_t st;
MPI_Comm comm = MPI_COMM_NULL;
MPI_Info info = MPI_INFO_NULL;
FILE *config_file = NULL;
char *name_copy = NULL;
char *name_copy2 = NULL;
char *tmp_filename = NULL;
char *base_filename = NULL;
char *file_dirname = NULL;
int mpi_rank = INT_MAX;
int mpi_code;
herr_t ret_value = SUCCEED;
(void)name;
(void)fapl;
/* TODO: Eventually this routine should share common code
* with H5_subfiling_common's routines so it doesn't get
* out of sync
*/
/* TODO: implement later */
if (NULL == (plist = H5P_object_verify(fapl, H5P_FILE_ACCESS)))
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
HDassert(H5FD_IOC == H5P_peek_driver(plist));
if (H5FD_mpi_self_initialized) {
comm = MPI_COMM_WORLD;
}
else {
/* Get the MPI communicator and info from the fapl */
if (H5P_get(plist, H5F_ACS_MPI_PARAMS_INFO_NAME, &info) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI info object");
if (H5P_get(plist, H5F_ACS_MPI_PARAMS_COMM_NAME, &comm) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get MPI communicator");
}
/* Get the MPI rank of this process */
if (MPI_SUCCESS != (mpi_code = MPI_Comm_rank(comm, &mpi_rank)))
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Comm_rank failed", mpi_code);
if (mpi_rank == 0) {
int n_io_concentrators = 0;
int num_digits = 0;
if (HDstat(name, &st) < 0)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_SYSERRSTR, FAIL, "HDstat failed");
if (NULL == (name_copy = HDstrdup(name)))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename");
if (NULL == (name_copy2 = HDstrdup(name)))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL, "can't copy filename");
base_filename = basename(name_copy);
file_dirname = dirname(name_copy2);
/* Try to open the subfiling configuration file and get the number of IOCs */
if (NULL == (tmp_filename = HDmalloc(PATH_MAX)))
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTALLOC, FAIL,
"can't allocate config file name buffer");
/* TODO: No support for subfile directory prefix currently */
HDsnprintf(tmp_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, file_dirname,
base_filename, (uint64_t)st.st_ino);
if (NULL == (config_file = HDfopen(tmp_filename, "r"))) {
if (ENOENT == errno) {
#ifdef H5FD_IOC_DEBUG
HDprintf("** WARNING: couldn't delete Subfiling configuration file '%s'\n", tmp_filename);
#endif
H5_SUBFILING_GOTO_DONE(SUCCEED);
}
else
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL,
"can't open subfiling config file");
}
if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_READERROR, FAIL, "can't read subfiling config file");
/* Delete the Subfiling configuration file */
if (EOF == HDfclose(config_file)) {
config_file = NULL;
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
"can't close subfiling config file");
}
config_file = NULL;
if (HDremove(tmp_filename) < 0)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL,
"can't delete subfiling config file");
/* Try to delete each of the subfiles */
num_digits = (int)(HDlog10(n_io_concentrators) + 1);
for (int i = 0; i < n_io_concentrators; i++) {
/* TODO: No support for subfile directory prefix currently */
HDsnprintf(tmp_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, file_dirname,
base_filename, (uint64_t)st.st_ino, num_digits, i + 1, n_io_concentrators);
if (HDremove(tmp_filename) < 0) {
#ifdef H5FD_IOC_DEBUG
HDprintf("** WARNING: couldn't delete subfile '%s'\n", tmp_filename);
#endif
if (ENOENT != errno)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTDELETEFILE, FAIL, "can't delete subfile");
}
}
/* Delete the HDF5 stub file */
if (HDremove(name) < 0)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTDELETEFILE, FAIL, "can't delete HDF5 file");
}
done:
if (config_file)
if (EOF == HDfclose(config_file))
H5_SUBFILING_DONE_ERROR(H5E_FILE, H5E_CANTCLOSEFILE, FAIL, "can't close subfiling config file");
/* Set up a barrier (don't want processes to run ahead of the delete) */
if (MPI_SUCCESS != (mpi_code = MPI_Barrier(comm)))
H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
/* Free duplicated MPI Communicator and Info objects */
if (H5_mpi_comm_free(&comm) < 0)
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI communicator");
if (H5_mpi_info_free(&info) < 0)
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI info object");
HDfree(tmp_filename);
HDfree(name_copy);
HDfree(name_copy2);
H5_SUBFILING_FUNC_LEAVE;
}

View File

@ -11,7 +11,7 @@
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* Purpose: The public header file for the "io concentrator" driver.
* Purpose: The public header file for the "I/O concentrator" driver.
* This provides a similar functionality to that of the subfiling driver
* but introduces the necessary file access functionality via a multi-
* threading MPI service
@ -20,72 +20,191 @@
#ifndef H5FDioc_H
#define H5FDioc_H
#include "H5FDsubfiling.h"
#ifdef H5_HAVE_IOC_VFD
/**
* \def H5FD_IOC
* Macro that returns the identifier for the #H5FD_IOC driver. \hid_t{file driver}
*/
#define H5FD_IOC (H5FDperform_init(H5FD_ioc_init))
#else
#define H5FD_IOC (H5I_INVALID_HID)
#endif
/**
* \def H5FD_IOC_NAME
* The canonical name for the #H5FD_IOC driver
*/
#define H5FD_IOC_NAME "ioc"
#ifdef H5_HAVE_IOC_VFD
#ifndef H5FD_IOC_FAPL_MAGIC
#define H5FD_CURR_IOC_FAPL_VERSION 1
#define H5FD_IOC_FAPL_MAGIC 0xFED21331
/**
* \def H5FD_IOC_CURR_FAPL_VERSION
* The version number of the H5FD_ioc_config_t configuration
* structure for the #H5FD_IOC driver
*/
#define H5FD_IOC_CURR_FAPL_VERSION 1
/**
* \def H5FD_IOC_FAPL_MAGIC
* Unique number used to distinguish the #H5FD_IOC driver from other HDF5 file drivers
*/
#define H5FD_IOC_FAPL_MAGIC 0xFED21331
#endif
#define H5FD_IOC_THREAD_POOL_SIZE 4
/**
* \def H5FD_IOC_DEFAULT_THREAD_POOL_SIZE
* The default number of I/O concentrator worker threads
*/
#define H5FD_IOC_DEFAULT_THREAD_POOL_SIZE 4
/*
* Environment variables interpreted by the IOC VFD
*/
#define H5_IOC_THREAD_POOL_COUNT "H5_IOC_THREAD_POOL_COUNT"
/*
* Define the various constants to allow different allocations
* of subfile ranks. The choices are self explanatory, starting
* with the default of one IO Concentrator (IOC) per node and
* lastly, defining a fixed number.
*/
typedef enum {
SELECT_IOC_ONE_PER_NODE = 0, /* Default */
SELECT_IOC_EVERY_NTH_RANK, /* Starting at rank 0, select-next += N */
SELECT_IOC_WITH_CONFIG, /* NOT IMPLEMENTED: Read-from-file */
SELECT_IOC_TOTAL, /* Starting at rank 0, mpi_size / total */
ioc_selection_options /* (Uses same selection as every Nth rank) */
} ioc_selection_t;
/*
* In addition to the common configuration fields, we can have
* VFD specific fields. Here's one for the IO Concentrator VFD.
/**
* \def H5FD_IOC_THREAD_POOL_SIZE
* Macro for name of the environment variable that controls/overrides
* the number of I/O concentrator worker threads
*
* thread_pool_count (int32_t)
* Indicate the number of helper threads that we want for
* creating a thread pool
*
* ----------------------------------------------------------------------------
* The value set for this environment variable is interpreted as an
* int value and must be > 0.
*/
#define H5FD_IOC_THREAD_POOL_SIZE "H5FD_IOC_THREAD_POOL_SIZE"
//! <!-- [H5FD_ioc_config_t_snip] -->
/**
* \struct H5FD_ioc_config_t
* \brief Configuration structure for H5Pset_fapl_ioc() / H5Pget_fapl_ioc()
*
* \details H5FD_ioc_config_t is a public structure that is used to pass
* configuration data to the #H5FD_IOC driver via a File Access
* Property List. A pointer to an instance of this structure is
* a parameter to H5Pset_fapl_ioc() and H5Pget_fapl_ioc().
*
* The #H5FD_IOC driver shares much of its configuration with the
* #H5FD_SUBFILING driver and so its configuration structure
* contains an instance of a H5FD_subfiling_shared_config_t
* configuration structure.
*
* \var uint32_t H5FD_ioc_config_t::magic
* A somewhat unique number which distinguishes the #H5FD_IOC driver
* from other drivers. Used in combination with a version number, it
* can help to validate a user-generated File Access Property List.
* This field should be set to #H5FD_IOC_FAPL_MAGIC.
*
* \var uint32_t H5FD_ioc_config_t::version
* Version number of the H5FD_ioc_config_t structure. Any instance passed
* to H5Pset_fapl_ioc() / H5Pget_fapl_ioc() must have a recognized version
* number or an error will be raised. Currently, this field should be set
* to #H5FD_IOC_CURR_FAPL_VERSION.
*
* \var hid_t H5FD_ioc_config_t::under_fapl_id
* The File Access Property List which is setup with the file driver
* to use for I/O to the HDF5 stub file. The stub file looks like a
* typical HDF5 file, but currently only contains the superblock metadata
* for compatibility with legacy HDF5 applications. The default driver used
* is currently the #H5FD_MPIO driver.
*
* \var int32_t H5FD_ioc_config_t::thread_pool_count
* The number of I/O concentrator worker threads to use.
*
* This value can also be set or adjusted with the #H5FD_IOC_THREAD_POOL_SIZE
* environment variable.
*
* \var H5FD_subfiling_shared_config_t H5FD_ioc_config_t::subf_config
* Subfiling configuration data for the parent #H5FD_SUBFILING driver. This
* includes the sub-file stripe size, number of I/O concentrators, IOC
* selection method, etc.
*
*/
typedef struct H5FD_ioc_config_t {
uint32_t magic; /* set to H5FD_IOC_FAPL_MAGIC */
uint32_t version; /* set to H5FD_CURR_IOC_FAPL_VERSION */
int32_t stripe_count; /* How many io concentrators */
int64_t stripe_depth; /* Max # of bytes in contiguous IO to an IOC */
ioc_selection_t ioc_selection; /* Method to select IO Concentrators */
hid_t ioc_fapl_id; /* The hid_t value of the stacked VFD */
int32_t thread_pool_count;
uint32_t magic; /* Must be set to H5FD_IOC_FAPL_MAGIC */
uint32_t version; /* Must be set to H5FD_IOC_CURR_FAPL_VERSION */
hid_t under_fapl_id; /* FAPL setup with the VFD to use for I/O to the HDF5 stub file */
int32_t thread_pool_count; /* Number of I/O concentrator worker threads to use */
H5FD_subfiling_shared_config_t subf_config; /* Subfiling driver configuration */
} H5FD_ioc_config_t;
//! <!-- [H5FD_ioc_config_t_snip] -->
#ifdef __cplusplus
extern "C" {
#endif
H5_DLL hid_t H5FD_ioc_init(void);
H5_DLL herr_t H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_ptr);
H5_DLL herr_t H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_ptr);
H5_DLL void begin_thread_exclusive(void);
H5_DLL void end_thread_exclusive(void);
/**
* \brief Internal routine to initialize #H5FD_IOC driver. Not meant to be
* called directly by an HDF5 application
*/
H5_DLL hid_t H5FD_ioc_init(void);
/**
* \ingroup FAPL
*
* \brief Modifies the specified File Access Property List to use the #H5FD_IOC driver
*
* \fapl_id
* \param[in] vfd_config Pointer to #H5FD_IOC driver configuration structure. May be NULL.
* \returns \herr_t
*
* \details H5Pset_fapl_ioc() modifies the File Access Property List to use the
* #H5FD_IOC driver.
*
* The #H5FD_IOC driver is a reference implementation of an "I/O concentrator"
* file driver that works in conjunction with the #H5FD_SUBFILING driver and
* provides the I/O backend for servicing I/O requests to sub-files.
*
* Typically, an HDF5 application won't need to call this routine directly.
* The #H5FD_IOC driver is usually set up as a side effect of an HDF5 application
* using the #H5FD_SUBFILING driver, but this routine is provided in case the
* application wishes to manually configure the #H5FD_IOC driver.
*
* \note The \p vfd_config parameter may be NULL. In this case, the driver will
* be setup with default settings. Note that in this case, it is assumed
* the parent #H5FD_SUBFILING driver was also setup with default settings.
* If the two drivers differ in configuration settings, application behavior
* may not be as expected.
*
* \since 1.13.2
*
*/
H5_DLL herr_t H5Pset_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *vfd_config);
/**
* \ingroup FAPL
*
* \brief Queries a File Access Property List for #H5FD_IOC file driver properties
*
* \fapl_id
* \param[out] config_out Pointer to H5FD_ioc_config_t structure through which the
* #H5FD_IOC file driver properties will be returned.
*
* \returns \herr_t
*
* \details H5Pget_fapl_ioc() queries the specified File Access Property List for
* #H5FD_IOC driver properties as set by H5Pset_fapl_ioc(). If the #H5FD_IOC
* driver has not been set on the File Access Property List, a default
* configuration is returned. An HDF5 application may use this functionality
* to manually configure the #H5FD_IOC driver by calling H5Pget_fapl_ioc()
* on a newly-created File Access Property List, adjusting the default
* values and then calling H5Pset_fapl_ioc() with the configured
* H5FD_ioc_config_t structure.
*
* \since 1.13.2
*
*/
H5_DLL herr_t H5Pget_fapl_ioc(hid_t fapl_id, H5FD_ioc_config_t *config_out);
/**
* \brief Internal routine for managing exclusive access to critical sections
* by the #H5FD_IOC driver's worker threads. Not meant to be called
* directly by an HDF5 application
*/
H5_DLL void H5FD_ioc_begin_thread_exclusive(void);
/**
* \brief Internal routine for managing exclusive access to critical sections
* by the #H5FD_IOC driver's worker threads. Not meant to be called
* directly by an HDF5 application
*/
H5_DLL void H5FD_ioc_end_thread_exclusive(void);
#ifdef __cplusplus
}

View File

@ -14,10 +14,6 @@
#include "H5FDsubfiling.h"
#ifndef HG_TEST_NUM_THREADS_DEFAULT
#define HG_TEST_NUM_THREADS_DEFAULT 4
#endif
#define MIN_READ_RETRIES 10
/*
@ -118,7 +114,7 @@ initialize_ioc_threads(void *_sf_context)
{
subfiling_context_t *sf_context = _sf_context;
ioc_data_t *ioc_data = NULL;
unsigned thread_pool_count = HG_TEST_NUM_THREADS_DEFAULT;
unsigned thread_pool_count = H5FD_IOC_DEFAULT_THREAD_POOL_SIZE;
char *env_value;
int ret_value = 0;
#ifdef H5FD_IOC_COLLECT_STATS
@ -174,7 +170,7 @@ initialize_ioc_threads(void *_sf_context)
H5_SUBFILING_GOTO_ERROR(H5E_RESOURCE, H5E_CANTINIT, (-1), "can't initialize IOC thread queue mutex");
/* Allow experimentation with the number of helper threads */
if ((env_value = HDgetenv(H5_IOC_THREAD_POOL_COUNT)) != NULL) {
if ((env_value = HDgetenv(H5FD_IOC_THREAD_POOL_SIZE)) != NULL) {
int value_check = HDatoi(env_value);
if (value_check > 0) {
thread_pool_count = (unsigned int)value_check;
@ -589,7 +585,7 @@ handle_work_request(void *arg)
}
/*-------------------------------------------------------------------------
* Function: begin_thread_exclusive
* Function: H5FD_ioc_begin_thread_exclusive
*
* Purpose: Mutex lock to restrict access to code or variables.
*
@ -603,13 +599,13 @@ handle_work_request(void *arg)
*-------------------------------------------------------------------------
*/
void
begin_thread_exclusive(void)
H5FD_ioc_begin_thread_exclusive(void)
{
hg_thread_mutex_lock(&ioc_thread_mutex);
}
/*-------------------------------------------------------------------------
* Function: end_thread_exclusive
* Function: H5FD_ioc_end_thread_exclusive
*
* Purpose: Mutex unlock. Should only be called by the current holder
* of the locked mutex.
@ -624,7 +620,7 @@ begin_thread_exclusive(void)
*-------------------------------------------------------------------------
*/
void
end_thread_exclusive(void)
H5FD_ioc_end_thread_exclusive(void)
{
hg_thread_mutex_unlock(&ioc_thread_mutex);
}
@ -840,13 +836,13 @@ ioc_file_queue_write_indep(sf_work_request_t *msg, int subfile_rank, int source,
sf_queue_delay_time += t_queue_delay;
#endif
begin_thread_exclusive();
H5FD_ioc_begin_thread_exclusive();
/* Adjust EOF if necessary */
if (sf_eof > sf_context->sf_eof)
sf_context->sf_eof = sf_eof;
end_thread_exclusive();
H5FD_ioc_end_thread_exclusive();
done:
if (send_nack) {

View File

@ -209,11 +209,13 @@ static herr_t H5FD__subfiling_read_vector(H5FD_t *file, hid_t dxpl_id, uint32_t
static herr_t H5FD__subfiling_write_vector(H5FD_t *file, hid_t dxpl_id, uint32_t count, H5FD_mem_t types[],
haddr_t addrs[], size_t sizes[], const void *bufs[] /* in */);
static herr_t H5FD__subfiling_truncate(H5FD_t *_file, hid_t dxpl_id, hbool_t closing);
#if 0
static herr_t H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw);
static herr_t H5FD__subfiling_unlock(H5FD_t *_file);
static herr_t H5FD__subfiling_del(const char *name, hid_t fapl);
static herr_t H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void *input,
void **output);
#endif
static herr_t H5FD__subfiling_del(const char *name, hid_t fapl);
static herr_t H5FD__subfiling_ctl(H5FD_t *_file, uint64_t op_code, uint64_t flags, const void *input,
void **output);
static herr_t H5FD__subfiling_get_default_config(hid_t fapl_id, H5FD_subfiling_config_t *config_out);
static herr_t H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa);
@ -246,46 +248,46 @@ static herr_t iovec_fill_uniform(subfiling_context_t *sf_context, int64_t iovec_
void H5FD__subfiling_mpi_finalize(void);
static const H5FD_class_t H5FD_subfiling_g = {
H5FD_CLASS_VERSION, /* VFD interface version */
H5_VFD_SUBFILING, /* value */
H5FD_SUBFILING_NAME, /* name */
MAXADDR, /* maxaddr */
H5F_CLOSE_WEAK, /* fc_degree */
H5FD__subfiling_term, /* terminate */
NULL, /* sb_size */
NULL, /* sb_encode */
NULL, /* sb_decode */
sizeof(H5FD_subfiling_config_t), /* fapl_size */
H5FD__subfiling_fapl_get, /* fapl_get */
H5FD__subfiling_fapl_copy, /* fapl_copy */
H5FD__subfiling_fapl_free, /* fapl_free */
0, /* dxpl_size */
NULL, /* dxpl_copy */
NULL, /* dxpl_free */
H5FD__subfiling_open, /* open */
H5FD__subfiling_close, /* close */
H5FD__subfiling_cmp, /* cmp */
H5FD__subfiling_query, /* query */
NULL, /* get_type_map */
NULL, /* alloc */
NULL, /* free */
H5FD__subfiling_get_eoa, /* get_eoa */
H5FD__subfiling_set_eoa, /* set_eoa */
H5FD__subfiling_get_eof, /* get_eof */
H5FD__subfiling_get_handle, /* get_handle */
H5FD__subfiling_read, /* read */
H5FD__subfiling_write, /* write */
H5FD__subfiling_read_vector, /* read_vector */
H5FD__subfiling_write_vector, /* write_vector */
NULL, /* read_selection */
NULL, /* write_selection */
NULL, /* flush */
H5FD__subfiling_truncate, /* truncate */
H5FD__subfiling_lock, /* lock */
H5FD__subfiling_unlock, /* unlock */
H5FD__subfiling_del, /* del */
H5FD__subfiling_ctl, /* ctl */
H5FD_FLMAP_DICHOTOMY /* fl_map */
H5FD_CLASS_VERSION, /* VFD interface version */
H5_VFD_SUBFILING, /* value */
H5FD_SUBFILING_NAME, /* name */
MAXADDR, /* maxaddr */
H5F_CLOSE_WEAK, /* fc_degree */
H5FD__subfiling_term, /* terminate */
NULL, /* sb_size */
NULL, /* sb_encode */
NULL, /* sb_decode */
sizeof(H5FD_subfiling_config_t), /* fapl_size */
H5FD__subfiling_fapl_get, /* fapl_get */
H5FD__subfiling_fapl_copy, /* fapl_copy */
H5FD__subfiling_fapl_free, /* fapl_free */
0, /* dxpl_size */
NULL, /* dxpl_copy */
NULL, /* dxpl_free */
H5FD__subfiling_open, /* open */
H5FD__subfiling_close, /* close */
H5FD__subfiling_cmp, /* cmp */
H5FD__subfiling_query, /* query */
NULL, /* get_type_map */
NULL, /* alloc */
NULL, /* free */
H5FD__subfiling_get_eoa, /* get_eoa */
H5FD__subfiling_set_eoa, /* set_eoa */
H5FD__subfiling_get_eof, /* get_eof */
H5FD__subfiling_get_handle, /* get_handle */
H5FD__subfiling_read, /* read */
H5FD__subfiling_write, /* write */
H5FD__subfiling_read_vector, /* read_vector */
H5FD__subfiling_write_vector, /* write_vector */
NULL, /* read_selection */
NULL, /* write_selection */
NULL, /* flush */
H5FD__subfiling_truncate, /* truncate */
NULL /* H5FD__subfiling_lock */, /* lock */
NULL /* H5FD__subfiling_unlock */, /* unlock */
H5FD__subfiling_del, /* del */
H5FD__subfiling_ctl, /* ctl */
H5FD_FLMAP_DICHOTOMY /* fl_map */
};
/* Declare a free list to manage the H5FD_subfiling_t struct */
@ -457,7 +459,7 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config)
H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t *vfd_config)
{
H5FD_subfiling_config_t *subfiling_conf = NULL;
H5P_genplist_t *plist = NULL;
@ -485,7 +487,7 @@ H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config)
if (H5FD__subfiling_validate_config(vfd_config) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "invalid subfiling VFD configuration");
ret_value = H5P_set_driver(plist, H5FD_SUBFILING, (void *)vfd_config, NULL);
ret_value = H5P_set_driver(plist, H5FD_SUBFILING, vfd_config, NULL);
done:
if (subfiling_conf) {
@ -564,13 +566,14 @@ H5FD__subfiling_get_default_config(hid_t fapl_id, H5FD_subfiling_config_t *confi
HDmemset(config_out, 0, sizeof(*config_out));
config_out->magic = H5FD_SUBFILING_FAPL_MAGIC;
config_out->version = H5FD_CURR_SUBFILING_FAPL_VERSION;
config_out->ioc_fapl_id = H5I_INVALID_HID;
config_out->stripe_count = 0;
config_out->stripe_depth = H5FD_DEFAULT_STRIPE_DEPTH;
config_out->ioc_selection = SELECT_IOC_ONE_PER_NODE;
config_out->require_ioc = TRUE;
config_out->magic = H5FD_SUBFILING_FAPL_MAGIC;
config_out->version = H5FD_SUBFILING_CURR_FAPL_VERSION;
config_out->ioc_fapl_id = H5I_INVALID_HID;
config_out->require_ioc = TRUE;
config_out->shared_cfg.ioc_selection = SELECT_IOC_ONE_PER_NODE;
config_out->shared_cfg.stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE;
config_out->shared_cfg.stripe_count = 0;
if ((h5_require_ioc = HDgetenv("H5_REQUIRE_IOC")) != NULL) {
int value_check = HDatoi(h5_require_ioc);
@ -644,7 +647,7 @@ H5FD__subfiling_validate_config(const H5FD_subfiling_config_t *fa)
HDassert(fa != NULL);
if (fa->version != H5FD_CURR_SUBFILING_FAPL_VERSION)
if (fa->version != H5FD_SUBFILING_CURR_FAPL_VERSION)
H5_SUBFILING_GOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "Unknown H5FD_subfiling_config_t version");
if (fa->magic != H5FD_SUBFILING_FAPL_MAGIC)
@ -842,9 +845,9 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
H5FD_class_t *driver = NULL; /* VFD for file */
H5P_genplist_t *plist_ptr = NULL;
H5FD_driver_prop_t driver_prop; /* Property for driver ID & info */
hbool_t bcasted_inode = FALSE;
hbool_t bcasted_eof = FALSE;
int64_t sf_eof = -1;
hbool_t bcasted_eof = FALSE;
int64_t sf_eof = -1;
void *file_handle = NULL;
int mpi_code; /* MPI return code */
H5FD_t *ret_value = NULL;
@ -958,34 +961,16 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
H5E_FILE, H5E_CANTOPENFILE, NULL,
"unable to open file '%s' - only IOC and Sec2 VFDs are currently supported for subfiles", name);
if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get file handle");
if (driver->value == H5_VFD_IOC) {
h5_stat_t sb;
uint64_t fid;
void *file_handle = NULL;
if (file_ptr->mpi_rank == 0) {
if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTGET, NULL, "can't get file handle");
if (HDfstat(*(int *)file_handle, &sb) < 0)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file");
HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t));
fid = (uint64_t)sb.st_ino;
}
if (MPI_SUCCESS != (mpi_code = MPI_Bcast(&fid, 1, MPI_UINT64_T, 0, file_ptr->comm)))
H5_SUBFILING_MPI_GOTO_ERROR(NULL, "MPI_Bcast failed", mpi_code);
bcasted_inode = TRUE;
/* Get a copy of the context ID for later use */
file_ptr->context_id = H5_subfile_fid_to_context(fid);
file_ptr->context_id = H5_subfile_fhandle_to_context(file_handle);
file_ptr->fa.require_ioc = true;
}
else if (driver->value == H5_VFD_SEC2) {
uint64_t inode_id = (uint64_t)-1;
int ioc_flags;
int ioc_flags;
/* Translate the HDF5 file open flags into standard POSIX open flags */
ioc_flags = (H5F_ACC_RDWR & flags) ? O_RDWR : O_RDONLY;
@ -996,44 +981,12 @@ H5FD__subfiling_open(const char *name, unsigned flags, hid_t fapl_id, haddr_t ma
if (H5F_ACC_EXCL & flags)
ioc_flags |= O_EXCL;
/* Let MPI rank 0 to the file stat operation and broadcast a result */
if (file_ptr->mpi_rank == 0) {
if (file_ptr->sf_file) {
h5_stat_t sb;
void *file_handle = NULL;
if (H5FDget_vfd_handle(file_ptr->sf_file, file_ptr->fa.ioc_fapl_id, &file_handle) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTGET, NULL, "can't get file handle");
/* We create a new file descriptor for our file structure.
* Basically, we want these separate so that sec2 can
* deal with the opened file for additional operations
* (especially close) without interfering with subfiling.
*/
file_ptr->fd = HDdup(*(int *)file_handle);
if (HDfstat(*(int *)file_handle, &sb) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_BADFILE, NULL, "unable to fstat file");
inode_id = sb.st_ino;
}
}
if (MPI_SUCCESS == MPI_Bcast(&inode_id, 1, MPI_UNSIGNED_LONG_LONG, 0, file_ptr->comm)) {
file_ptr->inode = inode_id;
}
bcasted_inode = TRUE;
/* All ranks can now detect an error and fail. */
if (inode_id == (uint64_t)-1)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open file = %s\n", name);
/*
* Open the subfiles for this HDF5 file. A subfiling
* context ID will be returned, which is used for
* further interactions with this file's subfiles.
*/
if (H5_open_subfiles(file_ptr->file_path, inode_id, file_ptr->fa.ioc_selection, ioc_flags,
if (H5_open_subfiles(file_ptr->file_path, file_handle, &file_ptr->fa.shared_cfg, ioc_flags,
file_ptr->comm, &file_ptr->context_id) < 0)
H5_SUBFILING_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, NULL, "unable to open subfiling files = %s\n",
name);
@ -1062,13 +1015,6 @@ done:
if (file_ptr) {
/* Participate in possible MPI collectives on failure */
if (file_ptr->comm != MPI_COMM_NULL) {
if (!bcasted_inode) {
uint64_t tmp_inode = UINT64_MAX;
if (MPI_SUCCESS !=
(mpi_code = MPI_Bcast(&tmp_inode, 1, MPI_UNSIGNED_LONG_LONG, 0, file_ptr->comm)))
H5_SUBFILING_MPI_DONE_ERROR(NULL, "MPI_Bcast failed", mpi_code);
}
if (!bcasted_eof) {
sf_eof = -1;
@ -2367,6 +2313,7 @@ done:
*
*-------------------------------------------------------------------------
*/
#if 0
static herr_t
H5FD__subfiling_lock(H5FD_t *_file, hbool_t rw)
{
@ -2415,6 +2362,7 @@ H5FD__subfiling_unlock(H5FD_t *_file)
done:
H5_SUBFILING_FUNC_LEAVE_API;
} /* end H5FD__subfiling_unlock() */
#endif
static herr_t
H5FD__subfiling_del(const char *name, hid_t fapl)

View File

@ -14,120 +14,240 @@
#ifndef H5FDsubfiling_H
#define H5FDsubfiling_H
#include "H5FDioc.h"
#ifdef H5_HAVE_SUBFILING_VFD
/**
* \def H5FD_SUBFILING
* Macro that returns the identifier for the #H5FD_SUBFILING driver. \hid_t{file driver}
*/
#define H5FD_SUBFILING (H5FDperform_init(H5FD_subfiling_init))
#else
#define H5FD_SUBFILING (H5I_INVALID_HID)
#endif
/**
* \def H5FD_SUBFILING_NAME
* The canonical name for the #H5FD_SUBFILING driver
*/
#define H5FD_SUBFILING_NAME "subfiling"
#ifdef H5_HAVE_SUBFILING_VFD
#ifndef H5FD_SUBFILING_FAPL_MAGIC
#define H5FD_CURR_SUBFILING_FAPL_VERSION 1
#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331
/**
* \def H5FD_SUBFILING_CURR_FAPL_VERSION
* The version number of the H5FD_subfiling_config_t configuration
* structure for the #H5FD_SUBFILING driver
*/
#define H5FD_SUBFILING_CURR_FAPL_VERSION 1
/**
* \def H5FD_SUBFILING_FAPL_MAGIC
* Unique number used to distinguish the #H5FD_SUBFILING driver from other HDF5 file drivers
*/
#define H5FD_SUBFILING_FAPL_MAGIC 0xFED01331
#endif
/****************************************************************************
*
* Structure: H5FD_subfiling_config_t
*
* Purpose:
*
* H5FD_subfiling_config_t is a public structure that is used to pass
* subfiling configuration data to the appropriate subfiling VFD via
* the FAPL. A pointer to an instance of this structure is a parameter
* to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling().
*
* `magic` (uint32_t)
*
* Magic is a somewhat unique number which identifies this VFD from
* other VFDs. Used in combination with a version number, we can
* validate a user generated file access property list (fapl).
* This field should be set to H5FD_SUBFILING_FAPL_MAGIC.
*
* `version` (uint32_t)
*
* Version number of the H5FD_subfiling_config_t structure. Any instance
* passed to the above calls must have a recognized version number, or
* an error will be flagged.
*
* This field should be set to H5FD_CURR_SUBFILING_FAPL_VERSION.
*
*** IO Concentrator Info ***
*** These fields will be replicated in the stacked IOC VFD which
*** provides the extended support for aggregating reads and writes
*** and allows global file access to node-local storage containers.
*
* `stripe_count` (int32_t)
*
* The integer value which identifies the total number of
* subfiles that have been algorithmically been selected to
* to contain the segments of raw data which make up an HDF5
* file. This value is used to implement the RAID-0 functionality
* when reading or writing datasets.
*
* `stripe_depth` (int64_t)
*
* The stripe depth defines a limit on the maximum number of contiguous
* bytes that can be read or written in a single operation on any
* selected subfile. Larger IO operations can exceed this limit
* by utilizing MPI derived types to construct an IO request which
* gathers additional data segments from memory for the IO request.
*
* `ioc_selection` (enum io_selection datatype)
*
* The io_selection_t defines a specific algorithm by which IO
* concentrators (IOCs) and sub-files are identified. The available
* algorithms are: SELECT_IOC_ONE_PER_NODE, SELECT_IOC_EVERY_NTH_RANK,
* SELECT_IOC_WITH_CONFIG, and SELECT_IOC_TOTAL.
*
*** STACKING and other VFD support
*** i.e. FAPL caching
***
*
* `ioc_fapl_id` (hid_t)
*
* A valid file access property list (fapl) is cached on each
* process and thus enables selection of an alternative provider
* for subsequent file operations.
* By default, Sub-filing employs an additional support VFD that
* provides file IO proxy capabilities to all MPI ranks in a
* distributed parallel application. This IO indirection
* thus allows application access all sub-files even while
* these may actually be node-local and thus not directly
* accessible to remote ranks.
*
****************************************************************************/
/**
* \def H5FD_SUBFILING_DEFAULT_STRIPE_SIZE
* The default stripe size (in bytes) for data stripes in sub-files
*/
#define H5FD_SUBFILING_DEFAULT_STRIPE_SIZE (32 * 1024 * 1024)
/**
* \def H5FD_SUBFILING_FILENAME_TEMPLATE
* The basic template for a sub-file filename
*/
#define H5FD_SUBFILING_FILENAME_TEMPLATE ".subfile_%" PRIu64 "_%0*d_of_%d"
/**
* \def H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE
* The basic template for a #H5FD_SUBFILING driver configuration filename
*/
#define H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE ".subfile_%" PRIu64 ".config"
/*
* In addition to the common configuration fields, we can have
* VFD specific fields. Here's one for the subfiling VFD.
*
* `require_ioc` (hbool_t)
*
* Require_IOC is a boolean flag with a default value of TRUE.
* This flag indicates that the stacked H5FDioc VFD should be
* employed for sub-filing operations. The default flag can be
* overridden with an environment variable: H5_REQUIRE_IOC=0
*
* Environment variables interpreted by the HDF5 Subfiling feature
*/
/**
* \def H5FD_SUBFILING_STRIPE_SIZE
* Macro for name of the environment variable that specifies the size
* (in bytes) for data stripes in sub-files
*
* The value set for this environment variable is interpreted as a
* long long value and must be > 0.
*/
#define H5FD_SUBFILING_STRIPE_SIZE "H5FD_SUBFILING_STRIPE_SIZE"
/**
* \def H5FD_SUBFILING_IOC_PER_NODE
* Macro for name of the environment variable that specifies the number
* of MPI ranks per node to use as I/O concentrators
*
* The value set for this environment variable is interpreted as a
* long value and must be > 0.
*/
#define H5FD_SUBFILING_IOC_PER_NODE "H5FD_SUBFILING_IOC_PER_NODE"
/**
* \def H5FD_SUBFILING_IOC_SELECTION_CRITERIA
* Macro for name of the environment variable that provides information
* for selection MPI ranks as I/O concentrators
*
* The value set for this environment variable is interpreted differently,
* depending on the IOC selection method chosen.
*
* For #SELECT_IOC_ONE_PER_NODE, this value is ignored.
*
* For #SELECT_IOC_EVERY_NTH_RANK, this value is interpreted as a
* long value and must be > 0. The value will correspond to the
* `N` value when selecting every `N`-th MPI rank as an I/O
* concentrator.
*
* For #SELECT_IOC_WITH_CONFIG, this value is ignored as that particular
* IOC selection method is not currently supported.
*
* For #SELECT_IOC_TOTAL, this value is interpreted as a long value
* and must be > 0. The value will correspond to the total number
* of I/O concentrators to be used.
*/
#define H5FD_SUBFILING_IOC_SELECTION_CRITERIA "H5FD_SUBFILING_IOC_SELECTION_CRITERIA"
/**
* \def H5FD_SUBFILING_SUBFILE_PREFIX
* Macro for name of the environment variable that specifies a prefix
* to apply to the filenames generated for sub-files
*
* The value set for this environment variable is interpreted as a
* pathname.
*/
#define H5FD_SUBFILING_SUBFILE_PREFIX "H5FD_SUBFILING_SUBFILE_PREFIX"
/**
* \enum H5FD_subfiling_ioc_select_t
* This enum defines the various constants to allow different
* allocations of MPI ranks as I/O concentrators.
*
* \var SELECT_IOC_ONE_PER_NODE
* Default selection method. One MPI rank per node is used as an
* I/O concentrator. If this selection method is used, the number
* of I/O concentrators per node can be adjusted with the
* #H5FD_SUBFILING_IOC_PER_NODE environment variable.
*
* \var SELECT_IOC_EVERY_NTH_RANK
* Starting with MPI rank 0, a stride of 'N' is applied to the MPI
* rank values to determine the next I/O concentrator. The
* #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must
* be set to the value desired for 'N'.
*
* \var SELECT_IOC_WITH_CONFIG
* Currently unsupported. Use a configuration file to determine
* the mapping from MPI ranks to I/O concentrators. The
* #H5FD_SUBFILING_IOC_SELECTION_CRITERIA environment variable must
* be set to the path to the configuration file.
*
* \var SELECT_IOC_TOTAL
* Specifies that a total of 'N' I/O concentrators should be used.
* Starting with MPI rank 0, a stride of 'MPI comm size' / 'N' is
* applied to the MPI rank values to determine the next I/O
* concentrator. The #H5FD_SUBFILING_IOC_SELECTION_CRITERIA
* environment variable must be set to the value desired for 'N'.
*
* \var ioc_selection_options
* Unused. Sentinel value
*/
typedef enum {
SELECT_IOC_ONE_PER_NODE = 0, /* Default */
SELECT_IOC_EVERY_NTH_RANK, /* Starting at rank 0, select-next += N */
SELECT_IOC_WITH_CONFIG, /* NOT IMPLEMENTED: Read-from-file */
SELECT_IOC_TOTAL, /* Starting at rank 0, mpi_size / total */
ioc_selection_options /* Sentinel value */
} H5FD_subfiling_ioc_select_t;
/**
* \struct H5FD_subfiling_shared_config_t
* \brief Subfiling configuration structure that is shared between the #H5FD_SUBFILING
* and #H5FD_IOC drivers
*
* \var H5FD_subfiling_ioc_select_t H5FD_subfiling_shared_config_t::ioc_selection
* The method to use for selecting MPI ranks to be I/O concentrators. The
* current default is to select one MPI rank per node to be an I/O concentrator.
* Refer to #H5FD_subfiling_ioc_select_t for a description of the algorithms
* available for use.
*
* \var int64_t H5FD_subfiling_shared_config_t::stripe_size
* The stripe size defines the size (in bytes) of the data stripes in the
* sub-files for the logical HDF5 file. Data is striped across the sub-files
* in a round-robin wrap-around fashion in segments equal to the stripe size.
*
* For example, in an HDF5 file consisting of four sub-files with a 1MiB stripe
* size, the first and fifth 1MiB of data would reside in the first sub-file,
* the second and sixth 1MiB of data would reside in the second sub-file and so
* on.
*
* This value can also be set or adjusted with the #H5FD_SUBFILING_STRIPE_SIZE
* environment variable.
*
* \var int32_t H5FD_subfiling_shared_config_t::stripe_count
* The number of I/O concentrators (and, currently, the number of sub-files)
* to use for the logical HDF5 file. This value is used in conjunction with
* the IOC selection method to determine which MPI ranks will be assigned as
* I/O concentrators.
*
* Alternatively, the mapping between MPI ranks and I/O concentrators can be
* set or adjusted with a combination of the #ioc_selection field and the
* #H5FD_SUBFILING_IOC_PER_NODE and #H5FD_SUBFILING_IOC_SELECTION_CRITERIA
* environment variables.
*/
typedef struct H5FD_subfiling_shared_config_t {
H5FD_subfiling_ioc_select_t ioc_selection; /* Method to select I/O concentrators */
int64_t stripe_size; /* Size (in bytes) of data stripes in sub-files */
int32_t stripe_count; /* Number of I/O concentrators to use */
} H5FD_subfiling_shared_config_t;
//! <!-- [H5FD_subfiling_config_t_snip] -->
/**
* Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling()
* \struct H5FD_subfiling_config_t
* \brief Configuration structure for H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling()
*
* \details H5FD_subfiling_config_t is a public structure that is used to pass
* subfiling configuration data to the #H5FD_SUBFILING driver via
* a File Access Property List. A pointer to an instance of this structure
* is a parameter to H5Pset_fapl_subfiling() and H5Pget_fapl_subfiling().
*
* \var uint32_t H5FD_subfiling_config_t::magic
* A somewhat unique number which distinguishes the #H5FD_SUBFILING driver
* from other drivers. Used in combination with a version number, it can
* help to validate a user-generated File Access Property List. This field
* should be set to #H5FD_SUBFILING_FAPL_MAGIC.
*
* \var uint32_t H5FD_subfiling_config_t::version
* Version number of the H5FD_subfiling_config_t structure. Any instance
* passed to H5Pset_fapl_subfiling() / H5Pget_fapl_subfiling() must have
* a recognized version number or an error will be raised. Currently, this
* field should be set to #H5FD_SUBFILING_CURR_FAPL_VERSION.
*
* \var hid_t H5FD_subfiling_config_t::ioc_fapl_id
* The File Access Property List which is setup with the file driver that
* the #H5FD_SUBFILING driver will use for servicing I/O requests to the
* sub-files. Currently, the File Access Property List must be setup with
* the #H5FD_IOC driver by calling H5Pset_fapl_ioc(), but future development
* may allow other file drivers to be used.
*
* \var hbool_t H5FD_subfiling_config_t::require_ioc
* A boolean flag which indicates whether the #H5FD_SUBFILING driver should
* use the #H5FD_IOC driver for its I/O operations. This field should currently
* always be set to TRUE.
*
* \var H5FD_subfiling_shared_config_t H5FD_subfiling_config_t::shared_cfg
* A structure which contains the subfiling parameters that are shared between
* the #H5FD_SUBFILING and #H5FD_IOC drivers. This includes the sub-file stripe
* size, number of I/O concentrators, IOC selection method, etc.
*
*/
typedef struct H5FD_subfiling_config_t {
uint32_t magic; /* set to H5FD_SUBFILING_FAPL_MAGIC */
uint32_t version; /* set to H5FD_CURR_SUBFILING_FAPL_VERSION */
int32_t stripe_count; /* How many io concentrators */
int64_t stripe_depth; /* Max # of bytes in contiguous IO to an IOC */
ioc_selection_t ioc_selection; /* Method to select IO Concentrators */
hid_t ioc_fapl_id; /* The hid_t value of the stacked VFD */
hbool_t require_ioc;
uint32_t magic; /* Must be set to H5FD_SUBFILING_FAPL_MAGIC */
uint32_t version; /* Must be set to H5FD_SUBFILING_CURR_FAPL_VERSION */
hid_t ioc_fapl_id; /* The FAPL setup with the stacked VFD to use for I/O concentrators */
hbool_t require_ioc; /* Whether to use the IOC VFD (currently must always be TRUE) */
H5FD_subfiling_shared_config_t
shared_cfg; /* Subfiling/IOC parameters (stripe size, stripe count, etc.) */
} H5FD_subfiling_config_t;
//! <!-- [H5FD_subfiling_config_t_snip] -->
@ -135,41 +255,79 @@ typedef struct H5FD_subfiling_config_t {
extern "C" {
#endif
/**
* \brief Internal routine to initialize #H5FD_SUBFILING driver. Not meant to be
* called directly by an HDF5 application
*/
H5_DLL hid_t H5FD_subfiling_init(void);
/**
* \ingroup FAPL
*
* \brief Modifies the file access property list to use the #H5FD_SUBFILING driver
* \brief Modifies the specified File Access Property List to use the #H5FD_SUBFILING driver
*
* \fapl_id
* \param[in] vfd_config #H5FD_SUBFILING driver specific properties. If NULL, then
* the IO concentrator VFD will be used.
* \param[in] vfd_config Pointer to #H5FD_SUBFILING driver configuration structure. May be NULL.
* \returns \herr_t
*
* \details H5Pset_fapl_core() modifies the file access property list to use the
* \details H5Pset_fapl_subfiling() modifies the File Access Property List to use the
* #H5FD_SUBFILING driver.
*
* \todo Expand details!
* The #H5FD_SUBFILING driver is an MPI-based file driver that allows an
* HDF5 application to distribute a logical HDF5 file across a collection
* of "sub-files" in equal-sized data segment "stripes". I/O to the logical
* HDF5 file is then directed to the appropriate "sub-file" according to the
* #H5FD_SUBFILING configuration and a system of I/O concentrators, which
* are MPI ranks operating worker threads.
*
* \since 1.14.0
* By allowing a configurable stripe size, number of I/O concentrators and
* method for selecting MPI ranks as I/O concentrators, the #H5FD_SUBFILING
* driver aims to enable an HDF5 application to find a middle ground between
* the single shared file and file-per-process approaches to parallel file I/O
* for the particular machine the application is running on. In general, the
* goal is to avoid some of the complexity of the file-per-process approach
* while also minimizing the locking issues of the single shared file approach
* on a parallel file system.
*
* \note Since the #H5FD_SUBFILING driver is an MPI-based file driver, the HDF5
* application should ensure that H5Pset_mpi_params() is called before this
* routine so that the appropriate MPI communicator and info objects will be
* setup for use by the #H5FD_SUBFILING and #H5FD_IOC drivers.
*
* \note The current architecture of the #H5FD_SUBFILING driver requires that the
* HDF5 application must have been initialized with MPI_Init_thread() using
* a value of MPI_THREAD_MULTIPLE for the thread support level.
*
* \note The \p vfd_config parameter may be NULL. In this case, the reference
* implementation I/O concentrator VFD will be used with the default settings
* of one I/O concentrator per node and a stripe size of 32MiB. Refer to the
* H5FD_subfiling_config_t documentation for information about configuration
* for the #H5FD_SUBFILING driver.
*
* \since 1.13.2
*
*/
H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *vfd_config);
H5_DLL herr_t H5Pset_fapl_subfiling(hid_t fapl_id, const H5FD_subfiling_config_t *vfd_config);
/**
* \ingroup FAPL
*
* \brief Queries subfiling file driver properties
* \brief Queries a File Access Property List for #H5FD_SUBFILING file driver properties
*
* \fapl_id
* \param[out] config_out The subfiling fapl data.
* \param[out] config_out Pointer to H5FD_subfiling_config_t structure through which the
* #H5FD_SUBFILING file driver properties will be returned.
*
* \returns \herr_t
*
* \details H5Pget_fapl_subfiling() queries the #H5FD_SUBFILING driver properties as set
* by H5Pset_fapl_subfiling(). If the #H5FD_SUBFILING driver has not been set on
* the File Access Property List, a default configuration is returned.
* \details H5Pget_fapl_subfiling() queries the specified File Access Property List for
* #H5FD_SUBFILING driver properties as set by H5Pset_fapl_subfiling(). If the
* #H5FD_SUBFILING driver has not been set on the File Access Property List, a
* default configuration is returned. An HDF5 application may use this
* functionality to manually configure the #H5FD_SUBFILING driver by calling
* H5Pget_fapl_subfiling() on a newly-created File Access Property List, adjusting
* the default values and then calling H5Pset_fapl_subfiling() with the configured
* H5FD_subfiling_config_t structure.
*
* \since 1.14.0
* \since 1.13.2
*
*/
H5_DLL herr_t H5Pget_fapl_subfiling(hid_t fapl_id, H5FD_subfiling_config_t *config_out);

View File

@ -19,9 +19,9 @@
#include "H5subfiling_common.h"
#include "H5subfiling_err.h"
typedef struct { /* Format of a context map entry */
uint64_t h5_file_id; /* key value (linear search of the cache) */
int64_t sf_context_id; /* The return value if matching h5_file_id */
typedef struct { /* Format of a context map entry */
void *file_handle; /* key value (linear search of the cache) */
int64_t sf_context_id; /* The return value if matching file_handle */
} file_map_to_context_t;
typedef struct stat_record {
@ -83,13 +83,15 @@ static int sf_open_file_count = 0;
static herr_t H5_free_subfiling_object_int(subfiling_context_t *sf_context);
static herr_t H5_free_subfiling_topology(sf_topology_t *topology);
static herr_t init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *context_id_out);
static herr_t init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm,
static herr_t init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm,
int64_t *context_id_out);
static herr_t init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
sf_topology_t **app_topology_out);
static herr_t init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topology,
MPI_Comm file_comm);
static herr_t init_subfiling_context(subfiling_context_t *sf_context,
H5FD_subfiling_shared_config_t *subfiling_config,
sf_topology_t *app_topology, MPI_Comm file_comm);
static herr_t open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags);
static herr_t record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next_index);
static herr_t record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index);
static herr_t ioc_open_file(sf_work_request_t *msg, int file_acc_flags);
static herr_t generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char *filename_out,
size_t filename_out_len, char **filename_basename_out,
@ -102,10 +104,10 @@ static herr_t open_config_file(subfiling_context_t *sf_context, const char *base
static void initialize_statistics(void);
static int numDigits(int n);
static int get_next_fid_map_index(void);
static void clear_fid_map_entry(uint64_t sf_fid, int64_t sf_context_id);
static void clear_fid_map_entry(void *file_handle, int64_t sf_context_id);
static int compare_hostid(const void *h1, const void *h2);
static herr_t get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type,
char **ioc_sel_info_str);
static herr_t get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type,
char **ioc_sel_info_str);
static int count_nodes(sf_topology_t *info, MPI_Comm comm);
static herr_t gather_topology_info(sf_topology_t *info, MPI_Comm comm);
static int identify_ioc_ranks(sf_topology_t *info, int node_count, int iocs_per_node);
@ -192,7 +194,7 @@ get_next_fid_map_index(void)
HDassert(sf_open_file_map || (sf_file_map_size == 0));
for (int i = 0; i < sf_file_map_size; i++) {
if (sf_open_file_map[i].h5_file_id == UINT64_MAX) {
if (sf_open_file_map[i].file_handle == NULL) {
index = i;
break;
}
@ -222,14 +224,13 @@ get_next_fid_map_index(void)
*-------------------------------------------------------------------------
*/
static void
clear_fid_map_entry(uint64_t sf_fid, int64_t sf_context_id)
clear_fid_map_entry(void *file_handle, int64_t sf_context_id)
{
if (sf_open_file_map) {
int i;
for (i = 0; i < sf_file_map_size; i++) {
if ((sf_open_file_map[i].h5_file_id == sf_fid) &&
for (int i = 0; i < sf_file_map_size; i++) {
if ((sf_open_file_map[i].file_handle == file_handle) &&
(sf_open_file_map[i].sf_context_id == sf_context_id)) {
sf_open_file_map[i].h5_file_id = UINT64_MAX;
sf_open_file_map[i].file_handle = NULL;
sf_open_file_map[i].sf_context_id = -1;
return;
}
@ -287,8 +288,9 @@ compare_hostid(const void *h1, const void *h2)
Purpose: Return a character string which represents either the
default selection method: SELECT_IOC_ONE_PER_NODE; or
if the user has selected a method via the environment
variable (H5_IOC_SELECTION_CRITERIA), we return that
along with any optional qualifier with for that method.
variable (H5FD_SUBFILING_IOC_SELECTION_CRITERIA), we
return that along with any optional qualifier with for
that method.
Errors: None.
@ -296,10 +298,10 @@ compare_hostid(const void *h1, const void *h2)
-------------------------------------------------------------------------
*/
static herr_t
get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **ioc_sel_info_str)
get_ioc_selection_criteria_from_env(H5FD_subfiling_ioc_select_t *ioc_selection_type, char **ioc_sel_info_str)
{
char *opt_value = NULL;
char *env_value = HDgetenv(H5_IOC_SELECTION_CRITERIA);
char *env_value = HDgetenv(H5FD_SUBFILING_IOC_SELECTION_CRITERIA);
HDassert(ioc_selection_type);
HDassert(ioc_sel_info_str);
@ -323,7 +325,8 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **
if (errno == ERANGE) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't parse value from " H5_IOC_SELECTION_CRITERIA " environment variable\n",
HDprintf("%s: couldn't parse value from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA
" environment variable\n",
__func__);
#endif
@ -332,7 +335,7 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **
if ((check_value < 0) || (check_value >= ioc_selection_options)) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: invalid IOC selection type value %ld from " H5_IOC_SELECTION_CRITERIA
HDprintf("%s: invalid IOC selection type value %ld from " H5FD_SUBFILING_IOC_SELECTION_CRITERIA
" environment variable\n",
__func__, check_value);
#endif
@ -340,7 +343,7 @@ get_ioc_selection_criteria_from_env(ioc_selection_t *ioc_selection_type, char **
return FAIL;
}
*ioc_selection_type = (ioc_selection_t)check_value;
*ioc_selection_type = (H5FD_subfiling_ioc_select_t)check_value;
*ioc_sel_info_str = opt_value;
}
@ -784,6 +787,7 @@ H5_free_subfiling_object_int(subfiling_context_t *sf_context)
sf_context->sf_context_id = -1;
sf_context->h5_file_id = UINT64_MAX;
sf_context->h5_file_handle = NULL;
sf_context->sf_fid = -1;
sf_context->sf_write_count = 0;
sf_context->sf_read_count = 0;
@ -912,8 +916,9 @@ H5_free_subfiling_topology(sf_topology_t *topology)
*/
/* TODO: revise description */
herr_t
H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t ioc_selection_type,
int file_acc_flags, MPI_Comm file_comm, int64_t *context_id_out)
H5_open_subfiles(const char *base_filename, void *file_handle,
H5FD_subfiling_shared_config_t *subfiling_config, int file_acc_flags, MPI_Comm file_comm,
int64_t *context_id_out)
{
subfiling_context_t *sf_context = NULL;
int64_t context_id = -1;
@ -931,6 +936,15 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t
goto done;
}
if (!subfiling_config) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: invalid subfiling configuration pointer\n", __func__);
#endif
ret_value = FAIL;
goto done;
}
if (!context_id_out) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: context_id_out is NULL\n", __func__);
@ -953,7 +967,7 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t
#endif
/* Initialize new subfiling context ID based on configuration information */
if (init_subfiling(ioc_selection_type, file_comm, &context_id) < 0) {
if (init_subfiling(subfiling_config, file_comm, &context_id) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't initialize subfiling context\n", __func__);
#endif
@ -973,7 +987,7 @@ H5_open_subfiles(const char *base_filename, uint64_t h5_file_id, ioc_selection_t
}
/* Save some basic things in the new subfiling context */
sf_context->h5_file_id = h5_file_id;
sf_context->h5_file_handle = file_handle;
if (NULL == (sf_context->h5_filename = HDstrdup(base_filename))) {
#ifdef H5_SUBFILING_DEBUG
@ -1058,7 +1072,7 @@ done:
}
if (ret_value < 0) {
clear_fid_map_entry(h5_file_id, context_id);
clear_fid_map_entry(file_handle, context_id);
if (context_id >= 0 && H5_free_subfiling_object(context_id) < 0) {
#ifdef H5_SUBFILING_DEBUG
@ -1092,7 +1106,7 @@ done:
-------------------------------------------------------------------------
*/
static herr_t
init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *context_id_out)
init_subfiling(H5FD_subfiling_shared_config_t *subfiling_config, MPI_Comm comm, int64_t *context_id_out)
{
subfiling_context_t *new_context = NULL;
sf_topology_t *app_topology = NULL;
@ -1129,7 +1143,7 @@ init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *conte
* Setup the application topology information, including the computed
* number and distribution map of the set of I/O concentrators
*/
if (init_app_topology(ioc_selection_type, comm, &app_topology) < 0) {
if (init_app_topology(subfiling_config->ioc_selection, comm, &app_topology) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't initialize application topology\n", __func__);
#endif
@ -1140,7 +1154,7 @@ init_subfiling(ioc_selection_t ioc_selection_type, MPI_Comm comm, int64_t *conte
new_context->sf_context_id = context_id;
if (init_subfiling_context(new_context, app_topology, comm) < 0) {
if (init_subfiling_context(new_context, subfiling_config, app_topology, comm) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't initialize subfiling topology object\n", __func__);
#endif
@ -1207,7 +1221,8 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm, sf_topology_t **app_topology_out)
init_app_topology(H5FD_subfiling_ioc_select_t ioc_selection_type, MPI_Comm comm,
sf_topology_t **app_topology_out)
{
sf_topology_t *app_topology = NULL;
app_layout_t *app_layout = sf_app_layout;
@ -1392,11 +1407,11 @@ init_app_topology(ioc_selection_t ioc_selection_type, MPI_Comm comm, sf_topology
/* Check for an IOC-per-node value set in the environment */
/* TODO: should this env. var. be interpreted for other selection types? */
if ((env_value = HDgetenv(H5_IOC_COUNT_PER_NODE))) {
if ((env_value = HDgetenv(H5FD_SUBFILING_IOC_PER_NODE))) {
errno = 0;
ioc_select_val = HDstrtol(env_value, NULL, 0);
if ((ERANGE == errno)) {
HDprintf("invalid value '%s' for " H5_IOC_COUNT_PER_NODE "\n", env_value);
HDprintf("invalid value '%s' for " H5FD_SUBFILING_IOC_PER_NODE "\n", env_value);
ioc_select_val = 1;
}
@ -1509,7 +1524,8 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topology, MPI_Comm file_comm)
init_subfiling_context(subfiling_context_t *sf_context, H5FD_subfiling_shared_config_t *subfiling_config,
sf_topology_t *app_topology, MPI_Comm file_comm)
{
char *env_value = NULL;
int comm_rank;
@ -1518,6 +1534,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
HDassert(sf_context);
HDassert(sf_context->topology == NULL);
HDassert(subfiling_config);
HDassert(app_topology);
HDassert(app_topology->n_io_concentrators > 0);
HDassert(MPI_COMM_NULL != file_comm);
@ -1529,10 +1546,11 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
sf_context->sf_barrier_comm = MPI_COMM_NULL;
sf_context->sf_group_comm = MPI_COMM_NULL;
sf_context->sf_intercomm = MPI_COMM_NULL;
sf_context->sf_stripe_size = H5FD_DEFAULT_STRIPE_DEPTH;
sf_context->sf_stripe_size = H5FD_SUBFILING_DEFAULT_STRIPE_SIZE;
sf_context->sf_write_count = 0;
sf_context->sf_read_count = 0;
sf_context->sf_eof = HADDR_UNDEF;
sf_context->h5_file_handle = NULL;
sf_context->sf_fid = -1;
sf_context->sf_group_size = 1;
sf_context->sf_group_rank = 0;
@ -1545,8 +1563,14 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
sf_context->sf_logfile = NULL;
#endif
/* Check for an IOC stripe size setting in the environment */
if ((env_value = HDgetenv(H5_IOC_STRIPE_SIZE))) {
/*
* Set IOC stripe size from subfiling configuration, then check
* for a setting from the environment
*/
if (subfiling_config->stripe_size > 0)
sf_context->sf_stripe_size = subfiling_config->stripe_size;
if ((env_value = HDgetenv(H5FD_SUBFILING_STRIPE_SIZE))) {
long long stripe_size = -1;
errno = 0;
@ -1554,7 +1578,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
stripe_size = HDstrtoll(env_value, NULL, 0);
if (ERANGE == errno) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: invalid stripe size setting '%s' for " H5_IOC_STRIPE_SIZE "\n", __func__,
HDprintf("%s: invalid stripe size setting '%s' for " H5FD_SUBFILING_STRIPE_SIZE "\n", __func__,
env_value);
#endif
@ -1574,7 +1598,7 @@ init_subfiling_context(subfiling_context_t *sf_context, sf_topology_t *app_topol
sf_context->sf_blocksize_per_stripe = sf_context->sf_stripe_size * app_topology->n_io_concentrators;
/* Check for a subfile name prefix setting in the environment */
if ((env_value = HDgetenv(H5_IOC_SUBFILE_PREFIX))) {
if ((env_value = HDgetenv(H5FD_SUBFILING_SUBFILE_PREFIX))) {
if (NULL == (sf_context->subfile_prefix = HDstrdup(env_value))) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't copy subfile prefix value\n", __func__);
@ -1767,7 +1791,7 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags)
* There shouldn't be any issue, but check the status and
* return if there was a problem.
*/
if (record_fid_to_subfile(sf_context->h5_file_id, sf_context->sf_context_id, NULL) < 0) {
if (record_fid_to_subfile(sf_context->h5_file_handle, sf_context->sf_context_id, NULL) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't record HDF5 file ID to subfile context mapping\n", __func__);
#endif
@ -1792,6 +1816,21 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags)
0,
0,
0};
h5_stat_t st;
/* Retrieve Inode value for HDF5 stub file */
if (HDstat(sf_context->h5_filename, &st) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("[%s %d]: couldn't stat file %s\n", __func__,
sf_context->topology->app_layout->world_rank, sf_context->h5_filename);
#endif
ret_value = FAIL;
goto done;
}
HDcompile_assert(sizeof(uint64_t) >= sizeof(ino_t));
sf_context->h5_file_id = (uint64_t)st.st_ino;
if (ioc_open_file(&msg, file_acc_flags) < 0) {
#ifdef H5_SUBFILING_DEBUG
@ -1806,7 +1845,7 @@ open_subfile_with_context(subfiling_context_t *sf_context, int file_acc_flags)
done:
if (ret_value < 0) {
clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id);
clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id);
}
return ret_value;
@ -1843,7 +1882,7 @@ done:
*-------------------------------------------------------------------------
*/
static herr_t
record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next_index)
record_fid_to_subfile(void *file_handle, int64_t subfile_context_id, int *next_index)
{
int index;
herr_t ret_value = SUCCEED;
@ -1861,17 +1900,17 @@ record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next
sf_file_map_size = DEFAULT_FILE_MAP_ENTRIES;
for (int i = 0; i < sf_file_map_size; i++) {
sf_open_file_map[i].h5_file_id = UINT64_MAX;
sf_open_file_map[i].file_handle = NULL;
sf_open_file_map[i].sf_context_id = -1;
}
}
for (index = 0; index < sf_file_map_size; index++) {
if (sf_open_file_map[index].h5_file_id == h5_file_id)
if (sf_open_file_map[index].file_handle == file_handle)
goto done;
if (sf_open_file_map[index].h5_file_id == UINT64_MAX) {
sf_open_file_map[index].h5_file_id = h5_file_id;
if (sf_open_file_map[index].file_handle == NULL) {
sf_open_file_map[index].file_handle = file_handle;
sf_open_file_map[index].sf_context_id = subfile_context_id;
if (next_index) {
@ -1899,14 +1938,14 @@ record_fid_to_subfile(uint64_t h5_file_id, int64_t subfile_context_id, int *next
sf_file_map_size *= 2;
for (int i = index; i < sf_file_map_size; i++) {
sf_open_file_map[i].h5_file_id = UINT64_MAX;
sf_open_file_map[i].file_handle = NULL;
}
if (next_index) {
*next_index = index;
}
sf_open_file_map[index].h5_file_id = h5_file_id;
sf_open_file_map[index].file_handle = file_handle;
sf_open_file_map[index++].sf_context_id = subfile_context_id;
}
@ -1957,13 +1996,12 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags)
{
subfiling_context_t *sf_context = NULL;
int64_t file_context_id;
hbool_t mutex_locked = FALSE;
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
char *filepath = NULL;
char *subfile_dir = NULL;
char *base = NULL;
int fd = -1;
herr_t ret_value = SUCCEED;
mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
char *filepath = NULL;
char *subfile_dir = NULL;
char *base = NULL;
int fd = -1;
herr_t ret_value = SUCCEED;
HDassert(msg);
@ -2011,9 +2049,6 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags)
goto done;
}
begin_thread_exclusive();
mutex_locked = TRUE;
/* Attempt to create/open the subfile for this IOC rank */
if ((fd = HDopen(filepath, file_acc_flags, mode)) < 0)
H5_SUBFILING_SYS_GOTO_ERROR(H5E_FILE, H5E_CANTOPENFILE, FAIL, "failed to open subfile");
@ -2033,11 +2068,6 @@ ioc_open_file(sf_work_request_t *msg, int file_acc_flags)
}
done:
if (mutex_locked) {
end_thread_exclusive();
mutex_locked = FALSE;
}
if (ret_value < 0) {
if (sf_context) {
HDfree(sf_context->sf_filename);
@ -2074,7 +2104,6 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
size_t filename_out_len, char **filename_basename_out, char **subfile_dir_out)
{
FILE *config_file = NULL;
char *config_buf = NULL;
char *subfile_dir = NULL;
char *prefix = NULL;
char *base = NULL;
@ -2181,83 +2210,14 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
* in order to generate the correct subfile names.
*/
if (config_file) {
char *ioc_substr = NULL;
long config_file_len = 0;
if (HDfseek(config_file, 0, SEEK_END) < 0) {
if (H5_get_num_iocs_from_config_file(config_file, &n_io_concentrators) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__,
errno);
HDprintf("%s: couldn't read from subfiling configuration file\n", __func__);
#endif
ret_value = FAIL;
goto done;
}
if ((config_file_len = HDftell(config_file)) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno);
#endif
ret_value = FAIL;
goto done;
}
if (HDfseek(config_file, 0, SEEK_SET) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__,
errno);
#endif
ret_value = FAIL;
goto done;
}
if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__);
#endif
ret_value = FAIL;
goto done;
}
if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno);
#endif
ret_value = FAIL;
goto done;
}
config_buf[config_file_len] = '\0';
if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__);
#endif
ret_value = FAIL;
goto done;
}
if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &n_io_concentrators)) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n",
__func__);
#endif
ret_value = FAIL;
goto done;
}
if (n_io_concentrators <= 0) {
HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n",
__func__, n_io_concentrators);
ret_value = FAIL;
goto done;
}
}
/*
@ -2272,7 +2232,7 @@ generate_subfile_name(subfiling_context_t *sf_context, int file_acc_flags, char
* ABC.h5.subfile_<file-number>.config
*/
num_digits = numDigits(n_io_concentrators);
HDsnprintf(filename_out, filename_out_len, "%s/%s" SF_FILENAME_TEMPLATE, subfile_dir, base,
HDsnprintf(filename_out, filename_out_len, "%s/%s" H5FD_SUBFILING_FILENAME_TEMPLATE, subfile_dir, base,
sf_context->h5_file_id, num_digits, sf_context->topology->subfile_rank + 1,
n_io_concentrators);
@ -2296,7 +2256,6 @@ done:
}
}
HDfree(config_buf);
HDfree(prefix);
return ret_value;
@ -2361,8 +2320,8 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c
goto done;
}
HDsnprintf(config_filename, PATH_MAX, "%s/%s" SF_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename,
sf_context->h5_file_id);
HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir,
base_filename, sf_context->h5_file_id);
/* Determine whether a subfiling configuration file exists */
errno = 0;
@ -2455,7 +2414,7 @@ create_config_file(subfiling_context_t *sf_context, const char *base_filename, c
/* Write out each subfile name to the configuration file */
num_digits = numDigits(n_io_concentrators);
for (int k = 0; k < n_io_concentrators; k++) {
HDsnprintf(line_buf, PATH_MAX, "%s" SF_FILENAME_TEMPLATE "\n", base_filename,
HDsnprintf(line_buf, PATH_MAX, "%s" H5FD_SUBFILING_FILENAME_TEMPLATE "\n", base_filename,
sf_context->h5_file_id, num_digits, k + 1, n_io_concentrators);
if (HDfwrite(line_buf, HDstrlen(line_buf), 1, config_file) != 1) {
@ -2546,8 +2505,8 @@ open_config_file(subfiling_context_t *sf_context, const char *base_filename, con
goto done;
}
HDsnprintf(config_filename, PATH_MAX, "%s/%s" SF_CONFIG_FILENAME_TEMPLATE, subfile_dir, base_filename,
sf_context->h5_file_id);
HDsnprintf(config_filename, PATH_MAX, "%s/%s" H5FD_SUBFILING_CONFIG_FILENAME_TEMPLATE, subfile_dir,
base_filename, sf_context->h5_file_id);
/* Determine whether a subfiling configuration file exists */
errno = 0;
@ -2594,6 +2553,110 @@ done:
return ret_value;
}
/*-------------------------------------------------------------------------
* Function: H5_get_num_iocs_from_config_file
*
* Purpose: Reads a Subfiling configuration file to get the number of
* I/O concentrators used for the logical HDF5 file.
*
* Return: Non-negative on success/Negative on failure
*
*-------------------------------------------------------------------------
*/
herr_t
H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators)
{
char *config_buf = NULL;
char *ioc_substr = NULL;
long config_file_len = 0;
int read_n_io_concs = 0;
herr_t ret_value = SUCCEED;
HDassert(config_file);
HDassert(n_io_concentrators);
if (HDfseek(config_file, 0, SEEK_END) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't seek to end of subfiling configuration file; errno = %d\n", __func__, errno);
#endif
ret_value = FAIL;
goto done;
}
if ((config_file_len = HDftell(config_file)) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't get size of subfiling configuration file; errno = %d\n", __func__, errno);
#endif
ret_value = FAIL;
goto done;
}
if (HDfseek(config_file, 0, SEEK_SET) < 0) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't seek to beginning of subfiling configuration file; errno = %d\n", __func__,
errno);
#endif
ret_value = FAIL;
goto done;
}
if (NULL == (config_buf = HDmalloc((size_t)config_file_len + 1))) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't allocate space for reading subfiling configuration file\n", __func__);
#endif
ret_value = FAIL;
goto done;
}
if (HDfread(config_buf, (size_t)config_file_len, 1, config_file) != 1) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't read from subfiling configuration file; errno = %d\n", __func__, errno);
#endif
ret_value = FAIL;
goto done;
}
config_buf[config_file_len] = '\0';
if (NULL == (ioc_substr = HDstrstr(config_buf, "aggregator_count"))) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: malformed subfiling configuration file - no aggregator_count entry\n", __func__);
#endif
ret_value = FAIL;
goto done;
}
if (EOF == HDsscanf(ioc_substr, "aggregator_count=%d", &read_n_io_concs)) {
#ifdef H5_SUBFILING_DEBUG
HDprintf("%s: couldn't get number of I/O concentrators from subfiling configuration file\n",
__func__);
#endif
ret_value = FAIL;
goto done;
}
if (read_n_io_concs <= 0) {
HDprintf("%s: invalid number of I/O concentrators (%d) read from subfiling configuration file\n",
__func__, read_n_io_concs);
ret_value = FAIL;
goto done;
}
*n_io_concentrators = read_n_io_concs;
done:
HDfree(config_buf);
H5_SUBFILING_FUNC_LEAVE;
}
/*-------------------------------------------------------------------------
* Function: H5_close_subfiles
*
@ -2713,9 +2776,9 @@ H5_close_subfiles(int64_t subfiling_context_id)
}
#endif
/* The map from FID to subfiling context can now be cleared */
if (sf_context->h5_file_id != UINT64_MAX) {
clear_fid_map_entry(sf_context->h5_file_id, sf_context->sf_context_id);
/* The map from file handle to subfiling context can now be cleared */
if (sf_context->h5_file_handle != NULL) {
clear_fid_map_entry(sf_context->h5_file_handle, sf_context->sf_context_id);
}
if (sf_context->topology->rank_is_ioc) {
@ -2822,10 +2885,10 @@ done:
}
/*-------------------------------------------------------------------------
* Function: H5_subfile_fid_to_context
* Function: H5_subfile_fhandle_to_context
*
* Purpose: This is a basic lookup function which returns the subfiling
* context id associated with the specified file->inode.
* context id associated with the specified file handle.
*
* Return: Non-negative subfiling context ID if the context exists
* Negative on failure or if the subfiling context doesn't
@ -2839,7 +2902,7 @@ done:
*-------------------------------------------------------------------------
*/
int64_t
H5_subfile_fid_to_context(uint64_t sf_fid)
H5_subfile_fhandle_to_context(void *file_handle)
{
if (!sf_open_file_map) {
#ifdef H5_SUBFILING_DEBUG
@ -2850,13 +2913,13 @@ H5_subfile_fid_to_context(uint64_t sf_fid)
}
for (int i = 0; i < sf_file_map_size; i++) {
if (sf_open_file_map[i].h5_file_id == sf_fid) {
if (sf_open_file_map[i].file_handle == file_handle) {
return sf_open_file_map[i].sf_context_id;
}
}
return -1;
} /* end H5_subfile_fid_to_context() */
} /* end H5_subfile_fhandle_to_context() */
#ifdef H5_SUBFILING_DEBUG
void
@ -2873,7 +2936,7 @@ H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...)
goto done;
}
begin_thread_exclusive();
H5FD_ioc_begin_thread_exclusive();
if (sf_context->sf_logfile) {
HDvfprintf(sf_context->sf_logfile, fmt, log_args);
@ -2886,7 +2949,7 @@ H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...)
HDfflush(stdout);
}
end_thread_exclusive();
H5FD_ioc_end_thread_exclusive();
done:
va_end(log_args);

View File

@ -22,38 +22,13 @@
#include "H5private.h"
#include "H5Iprivate.h"
/* TODO: needed for ioc_selection_t, which also needs to be public */
#include "H5FDioc.h"
#include "H5FDsubfiling.h"
/*
* Some definitions for debugging the Subfiling feature
*/
/* #define H5_SUBFILING_DEBUG */
/*
* The following is our basic template for a subfile filename.
* Note that eventually we shouldn't use 0_of_N since we
* intend to use the user defined HDF5 filename for a
* zeroth subfile as well as for all metadata.
*/
#define SF_FILENAME_TEMPLATE ".subfile_%" PRIu64 "_%0*d_of_%d"
/*
* The following is our basic template for a subfiling
* configuration filename.
*/
#define SF_CONFIG_FILENAME_TEMPLATE ".subfile_%" PRIu64 ".config"
/*
* Environment variables interpreted by the HDF5 subfiling feature
*/
#define H5_IOC_SELECTION_CRITERIA "H5_IOC_SELECTION_CRITERIA"
#define H5_IOC_COUNT_PER_NODE "H5_IOC_COUNT_PER_NODE"
#define H5_IOC_STRIPE_SIZE "H5_IOC_STRIPE_SIZE"
#define H5_IOC_SUBFILE_PREFIX "H5_IOC_SUBFILE_PREFIX"
#define H5FD_DEFAULT_STRIPE_DEPTH (32 * 1024 * 1024)
/*
* MPI Tags are 32 bits, we treat them as unsigned
* to allow the use of the available bits for RPC
@ -166,18 +141,19 @@ typedef struct app_layout_t {
/* This typedef defines things related to IOC selections */
typedef struct topology {
app_layout_t *app_layout; /* Pointer to our layout struct */
bool rank_is_ioc; /* Indicates that we host an IOC */
int subfile_rank; /* Valid only if rank_is_ioc */
int n_io_concentrators; /* Number of IO concentrators */
int *io_concentrators; /* Vector of ranks which are IOCs */
int *subfile_fd; /* file descriptor (if IOC) */
ioc_selection_t selection_type; /* Cache our IOC selection criteria */
app_layout_t *app_layout; /* Pointer to our layout struct */
bool rank_is_ioc; /* Indicates that we host an IOC */
int subfile_rank; /* Valid only if rank_is_ioc */
int n_io_concentrators; /* Number of IO concentrators */
int *io_concentrators; /* Vector of ranks which are IOCs */
int *subfile_fd; /* file descriptor (if IOC) */
H5FD_subfiling_ioc_select_t selection_type; /* Cache our IOC selection criteria */
} sf_topology_t;
typedef struct {
int64_t sf_context_id; /* Generated context ID which embeds the cache index */
uint64_t h5_file_id; /* GUID (basically the inode value) */
void *h5_file_handle; /* Low-level handle for the HDF5 stub file */
int sf_fid; /* value returned by open(file,..) */
size_t sf_write_count; /* Statistics: write_count */
size_t sf_read_count; /* Statistics: read_count */
@ -236,15 +212,16 @@ extern app_layout_t *sf_app_layout;
extern "C" {
#endif
H5_DLL herr_t H5_open_subfiles(const char *base_filename, uint64_t h5_file_id,
ioc_selection_t ioc_selection_type, int file_acc_flags, MPI_Comm file_comm,
int64_t *context_id_out);
H5_DLL herr_t H5_open_subfiles(const char *base_filename, void *h5_file_handle,
H5FD_subfiling_shared_config_t *subfiling_config, int file_acc_flags,
MPI_Comm file_comm, int64_t *context_id_out);
H5_DLL herr_t H5_close_subfiles(int64_t subfiling_context_id);
H5_DLL int64_t H5_new_subfiling_object_id(sf_obj_type_t obj_type, int64_t index_val);
H5_DLL void *H5_get_subfiling_object(int64_t object_id);
H5_DLL int64_t H5_subfile_fid_to_context(uint64_t h5_fid);
H5_DLL int64_t H5_subfile_fhandle_to_context(void *file_handle);
H5_DLL herr_t H5_free_subfiling_object(int64_t object_id);
H5_DLL herr_t H5_get_num_iocs_from_config_file(FILE *config_file, int *n_io_concentrators);
H5_DLL void H5_subfiling_log(int64_t sf_context_id, const char *fmt, ...);

View File

@ -1097,6 +1097,11 @@ H5_trace_args(H5RS_str_t *rs, const char *type, va_list ap)
case H5_VFD_ROS3:
H5RS_acat(rs, "H5_VFD_ROS3");
break;
#endif
#ifdef H5_HAVE_SUBFILING_VFD
case H5_VFD_SUBFILING:
H5RS_acat(rs, "H5_VFD_SUBFILING");
break;
#endif
case H5_VFD_ONION:
H5RS_acat(rs, "H5_VFD_ONION");

View File

@ -107,6 +107,8 @@ create_subfiling_ioc_fapl(void)
TEST_ERROR;
}
if (H5Pclose(subfiling_conf->ioc_fapl_id) < 0)
TEST_ERROR;
subfiling_conf->ioc_fapl_id = ioc_fapl;
if (H5Pset_fapl_subfiling(ret_value, subfiling_conf) < 0)
@ -140,19 +142,16 @@ error:
static void
test_create_and_close(void)
{
H5FD_subfiling_config_t subfiling_config;
const char *test_filenames[2];
hid_t file_id = H5I_INVALID_HID;
hid_t fapl_id = H5I_INVALID_HID;
const char *test_filenames[2];
hid_t file_id = H5I_INVALID_HID;
hid_t fapl_id = H5I_INVALID_HID;
if (MAINPROCESS)
TESTING("File creation and immediate close");
HDprintf("File creation and immediate close\n");
fapl_id = create_subfiling_ioc_fapl();
VRFY((fapl_id >= 0), "FAPL creation succeeded");
VRFY((H5Pget_fapl_subfiling(fapl_id, &subfiling_config) >= 0), "H5Pget_fapl_subfiling succeeded");
file_id = H5Fcreate("basic_create.h5", H5F_ACC_TRUNC, H5P_DEFAULT, fapl_id);
VRFY((file_id >= 0), "H5Fcreate succeeded");
@ -162,9 +161,6 @@ test_create_and_close(void)
test_filenames[1] = NULL;
h5_clean_files(test_filenames, fapl_id);
if (H5P_DEFAULT != subfiling_config.ioc_fapl_id)
VRFY((H5Pclose(subfiling_config.ioc_fapl_id) >= 0), "FAPL close succeeded");
return;
}
@ -191,13 +187,13 @@ main(int argc, char **argv)
MPI_Comm_size(comm, &mpi_size);
MPI_Comm_rank(comm, &mpi_rank);
H5open();
if (H5dont_atexit() < 0) {
if (MAINPROCESS)
HDprintf("Failed to turn off atexit processing. Continue.\n");
}
H5open();
/* Enable selection I/O using internal temporary workaround */
H5_use_selection_io_g = TRUE;

View File

@ -329,22 +329,26 @@ setup_vfd_test_file(int file_name_id, char *file_name, int mpi_size, H5FD_mpio_x
#ifdef H5_HAVE_SUBFILING_VFD
else if (HDstrcmp(vfd_name, H5FD_SUBFILING_NAME) == 0) {
hid_t ioc_fapl;
H5FD_ioc_config_t ioc_config = {/* magic = */ H5FD_IOC_FAPL_MAGIC,
/* version = */ H5FD_CURR_IOC_FAPL_VERSION,
/* stripe_count = */ 0, /* will over write */
/* stripe_depth = */ (INTS_PER_RANK / 2),
/* ioc_selection = */ SELECT_IOC_ONE_PER_NODE,
/* ioc_fapl_id = */ H5P_DEFAULT, /* will over write? */
/* thread_pool_count = */ H5FD_IOC_THREAD_POOL_SIZE};
H5FD_subfiling_config_t subfiling_conf = {
/* magic = */ H5FD_IOC_FAPL_MAGIC,
/* version = */ H5FD_CURR_IOC_FAPL_VERSION,
/* stripe_count = */ 0, /* will over write */
/* stripe_depth = */ (INTS_PER_RANK / 2),
H5FD_subfiling_shared_config_t shared_conf = {
/* ioc_selection = */ SELECT_IOC_ONE_PER_NODE,
/* stripe_size = */ (INTS_PER_RANK / 2),
/* stripe_count = */ 0, /* will over write */
};
H5FD_subfiling_config_t subfiling_conf = {
/* magic = */ H5FD_SUBFILING_FAPL_MAGIC,
/* version = */ H5FD_SUBFILING_CURR_FAPL_VERSION,
/* ioc_fapl_id = */ H5P_DEFAULT, /* will over write? */
/* require_ioc = */ TRUE};
/* require_ioc = */ TRUE,
/* shared_cfg = */ shared_conf,
};
H5FD_ioc_config_t ioc_config = {
/* magic = */ H5FD_IOC_FAPL_MAGIC,
/* version = */ H5FD_IOC_CURR_FAPL_VERSION,
/* under_fapl_id = */ H5P_DEFAULT,
/* thread_pool_count = */ H5FD_IOC_DEFAULT_THREAD_POOL_SIZE,
/* subf_config = */ shared_conf,
};
hid_t ioc_fapl = H5I_INVALID_HID;
if ((pass) && ((ioc_fapl = H5Pcreate(H5P_FILE_ACCESS)) < 0)) {

View File

@ -81,11 +81,20 @@ const char *volnames[] = {
*
*/
const char *drivernames[] = {
[SEC2_VFD_IDX] = "sec2", [DIRECT_VFD_IDX] = "direct", [LOG_VFD_IDX] = "log",
[WINDOWS_VFD_IDX] = "windows", [STDIO_VFD_IDX] = "stdio", [CORE_VFD_IDX] = "core",
[FAMILY_VFD_IDX] = "family", [SPLIT_VFD_IDX] = "split", [MULTI_VFD_IDX] = "multi",
[MPIO_VFD_IDX] = "mpio", [ROS3_VFD_IDX] = "ros3", [HDFS_VFD_IDX] = "hdfs",
[ONION_VFD_IDX] = "onion",
[SEC2_VFD_IDX] = "sec2",
[DIRECT_VFD_IDX] = "direct",
[LOG_VFD_IDX] = "log",
[WINDOWS_VFD_IDX] = "windows",
[STDIO_VFD_IDX] = "stdio",
[CORE_VFD_IDX] = "core",
[FAMILY_VFD_IDX] = "family",
[SPLIT_VFD_IDX] = "split",
[MULTI_VFD_IDX] = "multi",
[MPIO_VFD_IDX] = "mpio",
[ROS3_VFD_IDX] = "ros3",
[HDFS_VFD_IDX] = "hdfs",
[SUBFILING_VFD_IDX] = H5FD_SUBFILING_NAME,
[ONION_VFD_IDX] = "onion",
};
#define NUM_VOLS (sizeof(volnames) / sizeof(volnames[0]))
@ -572,6 +581,14 @@ h5tools_set_fapl_vfd(hid_t fapl_id, h5tools_vfd_info_t *vfd_info)
H5TOOLS_GOTO_ERROR(FAIL, "H5Pset_fapl_hdfs() failed");
#else
H5TOOLS_GOTO_ERROR(FAIL, "The HDFS VFD is not enabled");
#endif
}
else if (!HDstrcmp(vfd_info->u.name, drivernames[SUBFILING_VFD_IDX])) {
#ifdef H5_HAVE_SUBFILING_VFD
if (H5Pset_fapl_subfiling(fapl_id, (const H5FD_subfiling_config_t *)vfd_info->info) < 0)
H5TOOLS_GOTO_ERROR(FAIL, "H5Pset_fapl_subfiling() failed");
#else
H5TOOLS_GOTO_ERROR(FAIL, "The Subfiling VFD is not enabled");
#endif
}
else if (!HDstrcmp(vfd_info->u.name, drivernames[ONION_VFD_IDX])) {

View File

@ -578,15 +578,15 @@ typedef struct h5tools_vfd_info_t {
} u;
} h5tools_vfd_info_t;
/* This enum should match the entries in the above 'volnames'
* since they are indices into the 'volnames' array. */
/* This enum should match the entries in the 'volnames'
* array since they are indices into that array. */
typedef enum {
NATIVE_VOL_IDX = 0,
PASS_THROUGH_VOL_IDX,
} vol_idx;
/* This enum should match the entries in the above 'drivernames'
* since they are indices into the 'drivernames' array. */
/* This enum should match the entries in the 'drivernames'
* array since they are indices into that array. */
typedef enum {
SEC2_VFD_IDX = 0,
DIRECT_VFD_IDX,
@ -600,6 +600,7 @@ typedef enum {
MPIO_VFD_IDX,
ROS3_VFD_IDX,
HDFS_VFD_IDX,
SUBFILING_VFD_IDX,
ONION_VFD_IDX,
} driver_idx;