mirror of
https://github.com/HDFGroup/hdf5.git
synced 2025-03-31 17:10:47 +08:00
Subfiling VFD - check if MPI is finalized during VFD termination (#2683)
This commit is contained in:
parent
14a19b8c90
commit
b5ecb0af6d
@ -887,16 +887,20 @@ done:
|
||||
static herr_t
|
||||
H5FD__ioc_close_int(H5FD_ioc_t *file_ptr)
|
||||
{
|
||||
int mpi_finalized;
|
||||
int mpi_code;
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
HDassert(file_ptr);
|
||||
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Finalized(&mpi_finalized)))
|
||||
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Finalized failed", mpi_code);
|
||||
|
||||
if (file_ptr->context_id >= 0) {
|
||||
subfiling_context_t *sf_context = H5_get_subfiling_object(file_ptr->context_id);
|
||||
int mpi_code;
|
||||
|
||||
/* Don't allow IOC threads to be finalized until everyone gets here */
|
||||
if (file_ptr->mpi_size > 1)
|
||||
if (!mpi_finalized && (file_ptr->mpi_size > 1))
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Barrier(file_ptr->comm)))
|
||||
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
|
||||
|
||||
@ -911,10 +915,12 @@ H5FD__ioc_close_int(H5FD_ioc_t *file_ptr)
|
||||
file_ptr->context_id = -1;
|
||||
}
|
||||
|
||||
if (H5_mpi_comm_free(&file_ptr->comm) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI Communicator");
|
||||
if (H5_mpi_info_free(&file_ptr->info) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI Info object");
|
||||
if (!mpi_finalized) {
|
||||
if (H5_mpi_comm_free(&file_ptr->comm) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI Communicator");
|
||||
if (H5_mpi_info_free(&file_ptr->info) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI Info object");
|
||||
}
|
||||
|
||||
done:
|
||||
HDfree(file_ptr->file_path);
|
||||
|
@ -374,12 +374,29 @@ H5FD__subfiling_term(void)
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
if (H5FD_SUBFILING_g >= 0) {
|
||||
int mpi_finalized;
|
||||
int mpi_code;
|
||||
|
||||
/*
|
||||
* Retrieve status of whether MPI has already been terminated.
|
||||
* This can happen if an HDF5 ID is left unclosed and HDF5
|
||||
* shuts down after MPI_Finalize() is called in an application.
|
||||
*/
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Finalized(&mpi_finalized)))
|
||||
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Finalized failed", mpi_code);
|
||||
|
||||
/* Free RPC message MPI Datatype */
|
||||
if (H5_subfiling_rpc_msg_type != MPI_DATATYPE_NULL)
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&H5_subfiling_rpc_msg_type)))
|
||||
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code);
|
||||
if (H5_subfiling_rpc_msg_type != MPI_DATATYPE_NULL) {
|
||||
if (!mpi_finalized) {
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Type_free(&H5_subfiling_rpc_msg_type)))
|
||||
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Type_free failed", mpi_code);
|
||||
}
|
||||
#ifdef H5FD_SUBFILING_DEBUG
|
||||
else
|
||||
HDprintf("** WARNING **: HDF5 is terminating the Subfiling VFD after MPI_Finalize() was "
|
||||
"called - an HDF5 ID was probably left unclosed\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Clean up resources */
|
||||
if (H5_subfiling_terminate() < 0)
|
||||
@ -1297,10 +1314,15 @@ done:
|
||||
static herr_t
|
||||
H5FD__subfiling_close_int(H5FD_subfiling_t *file_ptr)
|
||||
{
|
||||
int mpi_finalized;
|
||||
int mpi_code;
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
HDassert(file_ptr);
|
||||
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Finalized(&mpi_finalized)))
|
||||
H5_SUBFILING_MPI_GOTO_ERROR(FAIL, "MPI_Finalized failed", mpi_code);
|
||||
|
||||
if (file_ptr->sf_file && H5FD_close(file_ptr->sf_file) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_IO, H5E_CANTCLOSEFILE, FAIL, "unable to close subfile");
|
||||
if (file_ptr->stub_file && H5FD_close(file_ptr->stub_file) < 0)
|
||||
@ -1311,13 +1333,15 @@ H5FD__subfiling_close_int(H5FD_subfiling_t *file_ptr)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_ARGS, FAIL, "can't close IOC FAPL");
|
||||
file_ptr->fa.ioc_fapl_id = H5I_INVALID_HID;
|
||||
|
||||
if (H5_mpi_comm_free(&file_ptr->comm) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI Communicator");
|
||||
if (H5_mpi_info_free(&file_ptr->info) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI Info object");
|
||||
if (!mpi_finalized) {
|
||||
if (H5_mpi_comm_free(&file_ptr->comm) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI Communicator");
|
||||
if (H5_mpi_info_free(&file_ptr->info) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "unable to free MPI Info object");
|
||||
|
||||
if (H5_mpi_comm_free(&file_ptr->ext_comm) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "can't free MPI communicator");
|
||||
if (H5_mpi_comm_free(&file_ptr->ext_comm) < 0)
|
||||
H5_SUBFILING_GOTO_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "can't free MPI communicator");
|
||||
}
|
||||
|
||||
file_ptr->fail_to_encode = FALSE;
|
||||
|
||||
|
@ -338,8 +338,18 @@ done:
|
||||
static herr_t
|
||||
H5_free_subfiling_object_int(subfiling_context_t *sf_context)
|
||||
{
|
||||
int mpi_finalized;
|
||||
int mpi_code;
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
HDassert(sf_context);
|
||||
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Finalized(&mpi_finalized))) {
|
||||
/* Assume MPI is finalized or worse, and try to clean up what we can */
|
||||
H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Finalized failed", mpi_code);
|
||||
mpi_finalized = 1;
|
||||
}
|
||||
|
||||
sf_context->sf_context_id = -1;
|
||||
sf_context->h5_file_id = UINT64_MAX;
|
||||
sf_context->sf_num_fids = 0;
|
||||
@ -352,28 +362,38 @@ H5_free_subfiling_object_int(subfiling_context_t *sf_context)
|
||||
sf_context->sf_base_addr = -1;
|
||||
|
||||
if (sf_context->sf_msg_comm != MPI_COMM_NULL) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_msg_comm) < 0)
|
||||
return FAIL;
|
||||
if (!mpi_finalized) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_msg_comm) < 0)
|
||||
return FAIL;
|
||||
}
|
||||
sf_context->sf_msg_comm = MPI_COMM_NULL;
|
||||
}
|
||||
if (sf_context->sf_data_comm != MPI_COMM_NULL) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_data_comm) < 0)
|
||||
return FAIL;
|
||||
if (!mpi_finalized) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_data_comm) < 0)
|
||||
return FAIL;
|
||||
}
|
||||
sf_context->sf_data_comm = MPI_COMM_NULL;
|
||||
}
|
||||
if (sf_context->sf_eof_comm != MPI_COMM_NULL) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_eof_comm) < 0)
|
||||
return FAIL;
|
||||
if (!mpi_finalized) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_eof_comm) < 0)
|
||||
return FAIL;
|
||||
}
|
||||
sf_context->sf_eof_comm = MPI_COMM_NULL;
|
||||
}
|
||||
if (sf_context->sf_node_comm != MPI_COMM_NULL) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_node_comm) < 0)
|
||||
return FAIL;
|
||||
if (!mpi_finalized) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_node_comm) < 0)
|
||||
return FAIL;
|
||||
}
|
||||
sf_context->sf_node_comm = MPI_COMM_NULL;
|
||||
}
|
||||
if (sf_context->sf_group_comm != MPI_COMM_NULL) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_group_comm) < 0)
|
||||
return FAIL;
|
||||
if (!mpi_finalized) {
|
||||
if (H5_mpi_comm_free(&sf_context->sf_group_comm) < 0)
|
||||
return FAIL;
|
||||
}
|
||||
sf_context->sf_group_comm = MPI_COMM_NULL;
|
||||
}
|
||||
|
||||
@ -402,16 +422,24 @@ H5_free_subfiling_object_int(subfiling_context_t *sf_context)
|
||||
|
||||
HDfree(sf_context);
|
||||
|
||||
return SUCCEED;
|
||||
H5_SUBFILING_FUNC_LEAVE;
|
||||
}
|
||||
|
||||
static herr_t
|
||||
H5_free_subfiling_topology(sf_topology_t *topology)
|
||||
{
|
||||
int mpi_finalized;
|
||||
int mpi_code;
|
||||
herr_t ret_value = SUCCEED;
|
||||
|
||||
HDassert(topology);
|
||||
|
||||
if (MPI_SUCCESS != (mpi_code = MPI_Finalized(&mpi_finalized))) {
|
||||
/* Assume MPI is finalized or worse, but clean up what we can */
|
||||
H5_SUBFILING_MPI_DONE_ERROR(FAIL, "MPI_Finalized failed", mpi_code);
|
||||
mpi_finalized = 1;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
{
|
||||
hbool_t topology_cached = FALSE;
|
||||
@ -442,8 +470,9 @@ H5_free_subfiling_topology(sf_topology_t *topology)
|
||||
HDfree(topology->io_concentrators);
|
||||
topology->io_concentrators = NULL;
|
||||
|
||||
if (H5_mpi_comm_free(&topology->app_comm) < 0)
|
||||
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "can't free MPI communicator");
|
||||
if (!mpi_finalized)
|
||||
if (H5_mpi_comm_free(&topology->app_comm) < 0)
|
||||
H5_SUBFILING_DONE_ERROR(H5E_VFL, H5E_CANTFREE, FAIL, "can't free MPI communicator");
|
||||
|
||||
HDfree(topology);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user