[svn-r11241] Purpose:

Code cleanup

Description:
    Fix logic error in previous checkin and also finish refactoring I/O
initialization, including simplifying all the collective & parallel cases
into a more unified mechanism.

Platforms tested:
    FreeBSD 4.11 (sleipnir) w/ & w/o parallel
    Linux 2.4 (mir)
This commit is contained in:
Quincey Koziol 2005-08-13 13:09:57 -05:00
parent 2efc3027ec
commit 45f1eba213
3 changed files with 175 additions and 359 deletions

View File

@ -102,8 +102,6 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
static herr_t H5D_ioinfo_make_ind(H5D_io_info_t *io_info);
static herr_t H5D_ioinfo_make_coll(H5D_io_info_t *io_info);
static herr_t H5D_ioinfo_term(H5D_io_info_t *io_info);
static htri_t H5D_get_collective_io_consensus(const H5F_t *file,
const htri_t local_opinion, const unsigned flags);
static herr_t H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info,
const fm_map *fm, int *min_chunkf);
#endif /* H5_HAVE_PARALLEL */
@ -111,7 +109,7 @@ static herr_t H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info,
/* I/O info operations */
static herr_t H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache,
hid_t dxpl_id, const H5S_t *mem_space, const H5S_t *file_space,
H5T_path_t *tpath, unsigned flags, H5D_io_info_t *io_info);
H5T_path_t *tpath, H5D_io_info_t *io_info);
/* Chunk operations */
static herr_t H5D_create_chunk_map(const H5D_t *dataset, const H5T_t *mem_type,
@ -644,19 +642,11 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
H5T_path_t *tpath = NULL; /*type conversion info */
const H5T_t *mem_type = NULL; /* Memory datatype */
H5D_io_info_t io_info; /* Dataset I/O info */
#ifdef H5_HAVE_PARALLEL
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
int prop_value,new_value;
htri_t check_prop;
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
#endif /*H5_HAVE_PARALLEL*/
hbool_t io_info_init = FALSE; /* Whether the I/O info has been initialized */
H5D_dxpl_cache_t _dxpl_cache; /* Data transfer property cache buffer */
H5D_dxpl_cache_t *dxpl_cache=&_dxpl_cache; /* Data transfer property cache */
unsigned sconv_flags=0; /* Flags for the space conversion */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI_NOINIT(H5D_read)
/* check args */
@ -682,11 +672,6 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
/* Collective access is not permissible without a MPI based VFD */
if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file))
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based drivers only")
/* Set the "parallel I/O possible" flag, for H5S_find(), if we are doing collective I/O */
/* (Don't set the parallel I/O possible flag for the MPI-posix driver, since it doesn't do real collective I/O) */
if (H5S_mpi_opt_types_g && dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(dataset->ent.file))
sconv_flags |= H5S_CONV_PAR_IO_POSSIBLE;
#endif /*H5_HAVE_PARALLEL*/
/* Make certain that the number of elements in each selection is the same */
@ -744,51 +729,10 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
if (NULL==(tpath=H5T_path_find(dataset->shared->type, mem_type, NULL, NULL, dxpl_id)))
HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert between src and dest data types")
/* Set the storage flags for the space conversion check */
switch(dataset->shared->layout.type) {
case H5D_COMPACT:
sconv_flags |= H5S_CONV_STORAGE_COMPACT;
break;
case H5D_CONTIGUOUS:
sconv_flags |= H5S_CONV_STORAGE_CONTIGUOUS;
break;
case H5D_CHUNKED:
sconv_flags |= H5S_CONV_STORAGE_CHUNKED;
break;
default:
assert(0 && "Unhandled layout type!");
} /* end switch */
/* Set up I/O operation */
if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,sconv_flags,&io_info)<0)
if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,&io_info)<0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation")
#ifdef H5_HAVE_PARALLEL
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
/**** Test for collective chunk IO
notice the following code should be removed after
a more general collective chunk IO algorithm is applied.
*/
if(dataset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */
check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME);
if(check_prop < 0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list");
if(check_prop > 0) {
if(H5Pget(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to get property value");
if(!io_info.use_par_opt_io) {
new_value = 0;
if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&new_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value");
}
}
}
/* end Test for collective chunk IO */
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
#endif /*H5_HAVE_PARALLEL*/
io_info_init = TRUE;
/* Determine correct I/O routine to invoke */
if(dataset->shared->layout.type!=H5D_CHUNKED) {
@ -805,8 +749,9 @@ H5D_read(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
done:
#ifdef H5_HAVE_PARALLEL
/* Shut down io_info struct */
if (H5D_ioinfo_term(&io_info) < 0)
HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info")
if (io_info_init)
if(H5D_ioinfo_term(&io_info) < 0)
HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info")
#endif /*H5_HAVE_PARALLEL*/
FUNC_LEAVE_NOAPI(ret_value)
@ -868,15 +813,9 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
H5T_path_t *tpath = NULL; /*type conversion info */
const H5T_t *mem_type = NULL; /* Memory datatype */
H5D_io_info_t io_info; /* Dataset I/O info */
#ifdef H5_HAVE_PARALLEL
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
int prop_value,new_value;
htri_t check_prop;
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
#endif /*H5_HAVE_PARALLEL*/
hbool_t io_info_init = FALSE; /* Whether the I/O info has been initialized */
H5D_dxpl_cache_t _dxpl_cache; /* Data transfer property cache buffer */
H5D_dxpl_cache_t *dxpl_cache=&_dxpl_cache; /* Data transfer property cache */
unsigned sconv_flags=0; /* Flags for the space conversion */
herr_t ret_value = SUCCEED; /* Return value */
FUNC_ENTER_NOAPI_NOINIT(H5D_write)
@ -897,22 +836,6 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
dataset->shared->checked_filters = TRUE;
}
/* If MPI based VFD is used, no VL datatype support yet. */
/* This is because they use the global heap in the file and we don't */
/* support parallel access of that yet */
if (IS_H5FD_MPI(dataset->ent.file) && H5T_detect_class(mem_type, H5T_VLEN)>0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL datatypes yet")
/* If MPI based VFD is used, no VL datatype support yet. */
/* This is because they use the global heap in the file and we don't */
/* support parallel access of that yet */
/* We should really use H5T_detect_class() here, but it will be difficult
* to detect the type of the reference if it is nested... -QAK
*/
if (IS_H5FD_MPI(dataset->ent.file) &&
H5T_get_class(mem_type, TRUE)==H5T_REFERENCE &&
H5T_get_ref_type(mem_type)==H5R_DATASET_REGION)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing region reference datatypes yet")
/* Check if we are allowed to write to this file */
if (0==(H5F_get_intent(dataset->ent.file) & H5F_ACC_RDWR))
HGOTO_ERROR(H5E_DATASET, H5E_WRITEERROR, FAIL, "no write intent on file")
@ -921,6 +844,32 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
if (H5D_get_dxpl_cache(dxpl_id,&dxpl_cache)<0)
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "can't fill dxpl cache")
/* Various MPI based checks */
if (IS_H5FD_MPI(dataset->ent.file)) {
/* If MPI based VFD is used, no VL datatype support yet. */
/* This is because they use the global heap in the file and we don't */
/* support parallel access of that yet */
if(H5T_detect_class(mem_type, H5T_VLEN)>0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing VL datatypes yet")
/* If MPI based VFD is used, no VL datatype support yet. */
/* This is because they use the global heap in the file and we don't */
/* support parallel access of that yet */
/* We should really use H5T_detect_class() here, but it will be difficult
* to detect the type of the reference if it is nested... -QAK
*/
if (H5T_get_class(mem_type, TRUE)==H5T_REFERENCE &&
H5T_get_ref_type(mem_type)==H5R_DATASET_REGION)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "Parallel IO does not support writing region reference datatypes yet")
} /* end if */
#ifdef H5_HAVE_PARALLEL
else {
/* Collective access is not permissible without a MPI based VFD */
if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based driver only")
} /* end else */
#endif /*H5_HAVE_PARALLEL*/
if (!file_space)
file_space = dataset->shared->space;
if (!mem_space)
@ -929,17 +878,6 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
HGOTO_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL, "src dataspace has invalid selection")
H5_ASSIGN_OVERFLOW(nelmts,snelmts,hssize_t,hsize_t);
#ifdef H5_HAVE_PARALLEL
/* Collective access is not permissible without a MPI based VFD */
if (dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPI(dataset->ent.file))
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "collective access for MPI-based driver only")
/* Set the "parallel I/O possible" flag, for H5S_find(), if we are doing collective I/O */
/* (Don't set the parallel I/O possible flag for the MPI-posix driver, since it doesn't do real collective I/O) */
if (H5S_mpi_opt_types_g && dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(dataset->ent.file))
sconv_flags |= H5S_CONV_PAR_IO_POSSIBLE;
#endif /*H5_HAVE_PARALLEL*/
/* Make certain that the number of elements in each selection is the same */
if (nelmts!=(hsize_t)H5S_GET_SELECT_NPOINTS(file_space))
HGOTO_ERROR (H5E_ARGS, H5E_BADVALUE, FAIL, "src and dest data spaces have different sizes")
@ -986,51 +924,10 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
if (NULL==(tpath=H5T_path_find(mem_type, dataset->shared->type, NULL, NULL, dxpl_id)))
HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to convert between src and dest data types")
/* Set the storage flags for the space conversion check */
switch(dataset->shared->layout.type) {
case H5D_COMPACT:
sconv_flags |= H5S_CONV_STORAGE_COMPACT;
break;
case H5D_CONTIGUOUS:
sconv_flags |= H5S_CONV_STORAGE_CONTIGUOUS;
break;
case H5D_CHUNKED:
sconv_flags |= H5S_CONV_STORAGE_CHUNKED;
break;
default:
assert(0 && "Unhandled layout type!");
} /* end switch */
/* Set up I/O operation */
if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,sconv_flags,&io_info)<0)
if(H5D_ioinfo_init(dataset,dxpl_cache,dxpl_id,mem_space,file_space,tpath,&io_info)<0)
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation")
#ifdef H5_HAVE_PARALLEL
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
/**** Test for collective chunk IO
notice the following code should be removed after
a more general collective chunk IO algorithm is applied.
*/
if(dataset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */
check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME);
if(check_prop < 0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list");
if(check_prop > 0) {
if(H5Pget(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to get property value");
if(!io_info.use_par_opt_io) {
new_value = 0;
if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&new_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value");
}
}
}
/* end Test for collective chunk IO */
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
#endif /*H5_HAVE_PARALLEL*/
io_info_init = TRUE;
/* Determine correct I/O routine to invoke */
if(dataset->shared->layout.type!=H5D_CHUNKED) {
@ -1062,8 +959,9 @@ H5D_write(H5D_t *dataset, hid_t mem_type_id, const H5S_t *mem_space,
done:
#ifdef H5_HAVE_PARALLEL
/* Shut down io_info struct */
if (H5D_ioinfo_term(&io_info) < 0)
HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info")
if (io_info_init)
if(H5D_ioinfo_term(&io_info) < 0)
HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "can't shut down io_info")
#endif /*H5_HAVE_PARALLEL*/
FUNC_LEAVE_NOAPI(ret_value)
@ -1711,32 +1609,25 @@ H5D_chunk_read(H5D_io_info_t *io_info, hsize_t nelmts,
int mpi_code; /* MPI error code */
int all_regular = 0; /* If this chunk's selections are regular on all processes */
/* Determine if this process has regular selections */
if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE &&
H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE)
is_regular = 1;
else
is_regular = 0;
if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
/* Determine if all processes have regular selections */
if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&is_regular, &all_regular, 1, MPI_INT, MPI_LAND, io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
if(!is_regular) {
/* For regular selection,
if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
unless spaces for all processors are regular, independent read operation should be performed.*/
if(!all_regular) {
/* Switch to independent I/O (temporarily) */
make_ind = TRUE;
make_coll = TRUE;
}
else {
/* For regular selection,
if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
unless spaces for all processors are regular, independent read operation should be performed.*/
if(!all_regular) {
/* Switch to independent I/O (temporarily) */
make_ind = TRUE;
make_coll = TRUE;
} /* end if */
} /* end else */
} /* end if */
} /* end else */
#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
} /* end if */
@ -2104,32 +1995,25 @@ H5D_chunk_write(H5D_io_info_t *io_info, hsize_t nelmts,
int mpi_code; /* MPI error code */
int all_regular = 0; /* If this chunk's selections are regular on all processes */
/* Determine if this process has regular selections */
if(H5S_SELECT_IS_REGULAR(chunk_info->fspace) == TRUE &&
H5S_SELECT_IS_REGULAR(chunk_info->mspace) == TRUE)
is_regular = 1;
is_regular = 1;
else
is_regular = 0;
if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&all_regular,&is_regular,1,MPI_INT,MPI_MIN,0,comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
if (MPI_SUCCESS != (mpi_code= MPI_Bcast(&all_regular,1,MPI_INT,0,comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
/* Determine if all processes have regular selections */
if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&is_regular, &all_regular, 1, MPI_INT, MPI_LAND, io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
if(!is_regular) {
/* For regular selection,
if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
unless spaces for all processors are regular, independent read operation should be performed.*/
if(!all_regular) {
/* Switch to independent I/O (temporarily) */
make_ind = TRUE;
make_coll = TRUE;
}
else {
/* For regular selection,
if MPI_COMPLEX_DERIVED_DATATYPE is not defined,
unless spaces for all processors are regular, independent read operation should be performed.*/
if(!all_regular) {
/* Switch to independent I/O (temporarily) */
make_ind = TRUE;
make_coll = TRUE;
} /* end if */
} /* end else */
} /* end if */
} /* end else */
#endif /* H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS */
} /* end if */
@ -3201,103 +3085,6 @@ done:
FUNC_LEAVE_NOAPI(ret_value)
} /* end H5D_chunk_mem_cb() */
/*-------------------------------------------------------------------------
* Function: H5D_get_collective_io_consensus
*
* Purpose: Compare notes with all other processes involved in this I/O
* and see if all are go for collective I/O.
*
* If all are, return TRUE.
*
* If any process can't manage collective I/O, then collective
* I/O is impossible, and we return FALSE.
*
* If the flags indicate that collective I/O is impossible,
* skip the interprocess communication and just return FALSE.
*
* In any error is detected, return FAIL.
*
* Return: Success: TRUE or FALSE
*
* Failure: FAIL
*
* Programmer: JRM -- 8/30/04
*
* Modifications:
*
* None.
*
*-------------------------------------------------------------------------
*/
#ifdef H5_HAVE_PARALLEL
static htri_t
H5D_get_collective_io_consensus(const H5F_t *file,
const htri_t local_opinion,
const unsigned flags)
{
htri_t ret_value = FAIL; /* will update if successful */
MPI_Comm comm;
int int_local_opinion;
int consensus;
int mpi_result;
FUNC_ENTER_NOAPI_NOINIT(H5D_get_collective_io_consensus);
HDassert ( ( local_opinion == TRUE ) || ( local_opinion == FALSE ) );
/* Don't do the interprocess communication unless the Parallel I/O
* conversion flag is set -- there may not be other processes to
* talk to.
*/
if ( ! ( flags & flags&H5S_CONV_PAR_IO_POSSIBLE ) ) {
HGOTO_DONE(FALSE);
}
comm = H5F_mpi_get_comm(file);
if ( comm == MPI_COMM_NULL )
HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, \
"can't retrieve MPI communicator")
if ( local_opinion == TRUE ) {
int_local_opinion = 1;
} else {
int_local_opinion = 0;
}
mpi_result = MPI_Allreduce((void *)(&int_local_opinion),
(void *)(&consensus),
1,
MPI_INT,
MPI_LAND,
comm);
if ( mpi_result != MPI_SUCCESS )
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_result)
if ( consensus ) {
ret_value = TRUE;
} else {
ret_value = FALSE;
}
done:
FUNC_LEAVE_NOAPI(ret_value);
} /* H5D_get_collective_io_consensus() */
#endif /* H5_HAVE_PARALLEL */
/*-------------------------------------------------------------------------
* Function: H5D_ioinfo_init
@ -3328,16 +3115,9 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
#ifndef H5_HAVE_PARALLEL
UNUSED
#endif /* H5_HAVE_PARALLEL */
*tpath, unsigned
#ifndef H5_HAVE_PARALLEL
UNUSED
#endif /* H5_HAVE_PARALLEL */
flags,
*tpath,
H5D_io_info_t *io_info)
{
#ifdef H5_HAVE_PARALLEL
htri_t opt; /* Flag whether a selection is optimizable */
#endif /* H5_HAVE_PARALLEL */
herr_t ret_value = SUCCEED; /* Return value */
#if defined H5_HAVE_PARALLEL || defined H5S_DEBUG
@ -3364,7 +3144,12 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
io_info->ops=dset->shared->io_ops;
#ifdef H5_HAVE_PARALLEL
/* Start in the "not modified" xfer_mode state */
io_info->xfer_mode_changed = FALSE;
if(IS_H5FD_MPI(dset->ent.file)) {
htri_t opt; /* Flag whether a selection is optimizable */
/* Get MPI communicator */
if((io_info->comm = H5F_mpi_get_comm(dset->ent.file)) == MPI_COMM_NULL)
HGOTO_ERROR(H5E_DATASPACE, H5E_CANTGET, FAIL, "can't retrieve MPI communicator")
@ -3372,72 +3157,69 @@ H5D_ioinfo_init(H5D_t *dset, const H5D_dxpl_cache_t *dxpl_cache, hid_t dxpl_id,
/*
* Check if we can set direct MPI-IO read/write functions
*/
opt=H5D_mpio_opt_possible(dset,mem_space,file_space,flags);
opt=H5D_mpio_opt_possible(io_info, mem_space, file_space, tpath);
if(opt==FAIL)
HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "invalid check for direct IO dataspace ");
opt = H5D_get_collective_io_consensus(dset->ent.file, opt, flags);
if ( opt == FAIL )
HGOTO_ERROR(H5E_DATASPACE, H5E_BADRANGE, FAIL, "check for collective I/O consensus failed.");
/* Check if we can use the optimized parallel I/O routines */
if(opt==TRUE) {
/* Set the pointers to the MPI-specific routines */
io_info->ops.read = H5D_mpio_select_read;
io_info->ops.write = H5D_mpio_select_write;
/* Indicate that the I/O will use collective */
io_info->use_par_opt_io=TRUE;
} /* end if */
else {
/* Set the pointers to the non-MPI-specific routines */
io_info->ops.read = H5D_select_read;
io_info->ops.write = H5D_select_write;
/* Indicate that the I/O will _NOT_ be parallel, use independent IO */
io_info->use_par_opt_io=FALSE;
} /* end else */
/* Start in the "not modified" state */
io_info->xfer_mode_changed=FALSE;
/* If we can't or won't be doing collective I/O, but the user
* asked for collective I/O, change the request to use independent I/O
*/
if(!(io_info->use_par_opt_io && H5T_path_noop(tpath) &&
H5Z_xform_noop(dxpl_cache->data_xform_prop))
&& io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) {
H5P_genplist_t *dx_plist; /* Data transer property list */
/* Get the dataset transfer property list */
if (NULL == (dx_plist = H5I_object(dxpl_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
/* Change the xfer_mode to independent for handling the I/O */
io_info->dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT;
if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
/* Set the pointers to the non-MPI-specific routines */
io_info->ops.read = H5D_select_read;
io_info->ops.write = H5D_select_write;
/* Indicate that the transfer mode should be restored before returning
* to user.
/* If we won't be doing collective I/O, but the user asked for
* collective I/O, change the request to use independent I/O, but
* mark it so that we remember to revert the change.
*/
io_info->xfer_mode_changed=TRUE;
} /* end if */
if(io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE) {
H5P_genplist_t *dx_plist; /* Data transer property list */
/* Get the dataset transfer property list */
if (NULL == (dx_plist = H5I_object(dxpl_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset creation property list")
/* Change the xfer_mode to independent for handling the I/O */
io_info->dxpl_cache->xfer_mode = H5FD_MPIO_INDEPENDENT;
if(H5P_set (dx_plist, H5D_XFER_IO_XFER_MODE_NAME, &io_info->dxpl_cache->xfer_mode) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't set transfer mode")
/* Indicate that the transfer mode should be restored before returning
* to user.
*/
io_info->xfer_mode_changed = TRUE;
} /* end if */
#ifdef H5_HAVE_INSTRUMENTED_LIBRARY
/**** Test for collective chunk IO
notice the following code should be removed after
a more general collective chunk IO algorithm is applied.
(This property is only reset for independent I/O)
*/
if(dset->shared->layout.type == H5D_CHUNKED) { /*only check for chunking storage */
htri_t check_prop;
check_prop = H5Pexist(dxpl_id,H5D_XFER_COLL_CHUNK_NAME);
if(check_prop < 0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to check property list");
if(check_prop > 0) {
int prop_value = 0;
if(H5Pset(dxpl_id,H5D_XFER_COLL_CHUNK_NAME,&prop_value)<0)
HGOTO_ERROR(H5E_PLIST, H5E_UNSUPPORTED, FAIL, "unable to set property value");
} /* end if */
} /* end if */
#endif /* H5_HAVE_INSTRUMENTED_LIBRARY */
} /* end else */
} /* end if */
else {
/* Indicate that the I/O will _NOT_ be parallel */
io_info->use_par_opt_io=FALSE;
/* Set the pointers to the non-MPI-specific routines */
io_info->ops.read = H5D_select_read;
io_info->ops.write = H5D_select_write;
/* Set the "not modified" state */
io_info->xfer_mode_changed=FALSE;
} /* end else */
#else /* H5_HAVE_PARALLEL */
io_info->ops.read = H5D_select_read;
@ -3570,13 +3352,14 @@ done:
static herr_t
H5D_ioinfo_term(H5D_io_info_t *io_info)
{
H5P_genplist_t *dx_plist; /* Data transer property list */
herr_t ret_value = SUCCEED; /*return value */
FUNC_ENTER_NOAPI_NOINIT(H5D_ioinfo_term)
/* Check if we need to revert the change to the xfer mode */
if (io_info->use_par_opt_io && io_info->xfer_mode_changed) {
if (io_info->xfer_mode_changed) {
H5P_genplist_t *dx_plist; /* Data transer property list */
/* Get the dataset transfer property list */
if (NULL == (dx_plist = H5I_object(io_info->dxpl_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a dataset transfer property list")
@ -3619,13 +3402,9 @@ H5D_mpio_get_min_chunk(const H5D_io_info_t *io_info,
/* Get the number of chunks to perform I/O on */
num_chunkf = H5SL_count(fm->fsel);
/* Determine the minimum and maximum # of chunks for all processes */
if (MPI_SUCCESS != (mpi_code= MPI_Reduce(&num_chunkf,min_chunkf,1,MPI_INT,MPI_MIN,0,io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Reduce failed", mpi_code)
/* Broadcast the flag indicating the number of chunks are the same */
if (MPI_SUCCESS != (mpi_code= MPI_Bcast(min_chunkf,1,MPI_INT,0,io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
/* Determine the minimum # of chunks for all processes */
if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&num_chunkf, min_chunkf, 1, MPI_INT, MPI_MIN, io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
done:
FUNC_LEAVE_NOAPI(ret_value);

View File

@ -24,7 +24,6 @@
#define H5D_PACKAGE /*suppress error about including H5Dpkg */
#include "H5private.h" /* Generic Functions */
#include "H5Dpkg.h" /* Datasets */
#include "H5Eprivate.h" /* Error handling */
@ -61,35 +60,51 @@ H5D_mpio_spaces_xfer(H5D_io_info_t *io_info, size_t elmt_size,
*-------------------------------------------------------------------------
*/
htri_t
H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *file_space, const unsigned flags)
H5D_mpio_opt_possible( const H5D_io_info_t *io_info,
const H5S_t *mem_space, const H5S_t *file_space, const H5T_path_t *tpath)
{
int local_opinion = TRUE; /* This process's idea of whether to perform collective I/O or not */
int consensus; /* Consensus opinion of all processes */
int mpi_code; /* MPI error code */
htri_t ret_value=TRUE;
FUNC_ENTER_NOAPI(H5D_mpio_opt_possible, FAIL);
/* Check args */
assert(dset);
assert(io_info);
assert(mem_space);
assert(file_space);
/* Parallel I/O conversion flag must be set, if it is not collective IO, go to false. */
if(!(flags&H5S_CONV_PAR_IO_POSSIBLE))
/* For independent I/O, get out quickly and don't try to form consensus */
if (io_info->dxpl_cache->xfer_mode==H5FD_MPIO_INDEPENDENT)
HGOTO_DONE(FALSE);
/* Optimized MPI types flag must be set and it is must be collective IO */
/* (Don't allow parallel I/O for the MPI-posix driver, since it doesn't do real collective I/O) */
if (!(H5S_mpi_opt_types_g && io_info->dxpl_cache->xfer_mode==H5FD_MPIO_COLLECTIVE && !IS_H5FD_MPIPOSIX(io_info->dset->ent.file))) {
local_opinion = FALSE;
goto broadcast;
} /* end if */
/* Check whether these are both simple or scalar dataspaces */
if (!((H5S_SIMPLE==H5S_GET_EXTENT_TYPE(mem_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(mem_space))
&& (H5S_SIMPLE==H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(file_space))))
HGOTO_DONE(FALSE);
&& (H5S_SIMPLE==H5S_GET_EXTENT_TYPE(file_space) || H5S_SCALAR==H5S_GET_EXTENT_TYPE(file_space)))) {
local_opinion = FALSE;
goto broadcast;
} /* end if */
/* Can't currently handle point selections */
if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space))
HGOTO_DONE(FALSE);
if (H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(mem_space) || H5S_SEL_POINTS==H5S_GET_SELECT_TYPE(file_space)) {
local_opinion = FALSE;
goto broadcast;
} /* end if */
/* Dataset storage must be contiguous or chunked */
if ((flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CONTIGUOUS &&
(flags&H5S_CONV_STORAGE_MASK)!=H5S_CONV_STORAGE_CHUNKED)
HGOTO_DONE(FALSE);
if (!(io_info->dset->shared->layout.type == H5D_CONTIGUOUS ||
io_info->dset->shared->layout.type == H5D_CHUNKED)) {
local_opinion = FALSE;
goto broadcast;
} /* end if */
/*The handling of memory space is different for chunking
and contiguous storage,
@ -102,18 +117,41 @@ H5D_mpio_opt_possible( const H5D_t *dset, const H5S_t *mem_space, const H5S_t *f
support complicated MPI derived data type, we will
set use_par_opt_io = FALSE.
*/
if(dset->shared->layout.type == H5D_CONTIGUOUS) {
#ifndef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
if((H5S_SELECT_IS_REGULAR(file_space) != TRUE) ||
(H5S_SELECT_IS_REGULAR(mem_space) != TRUE))
HGOTO_DONE(FALSE);
if(io_info->dset->shared->layout.type == H5D_CONTIGUOUS)
if((H5S_SELECT_IS_REGULAR(file_space) != TRUE) ||
(H5S_SELECT_IS_REGULAR(mem_space) != TRUE)) {
local_opinion = FALSE;
goto broadcast;
} /* end if */
#endif
}
if(dset->shared->layout.type == H5D_CHUNKED)
if(dset->shared->dcpl_cache.pline.nused>0)
HGOTO_DONE(FALSE); /* Perform the independent write operation */
/* Don't allow collective operations if filters need to be applied */
if(io_info->dset->shared->layout.type == H5D_CHUNKED)
if(io_info->dset->shared->dcpl_cache.pline.nused>0) {
local_opinion = FALSE;
goto broadcast;
} /* end if */
/* Don't allow collective operations if datatype conversions need to happen */
if(!H5T_path_noop(tpath)) {
local_opinion = FALSE;
goto broadcast;
} /* end if */
/* Don't allow collective operations if data transform operations should occur */
if(!H5Z_xform_noop(io_info->dxpl_cache->data_xform_prop)) {
local_opinion = FALSE;
goto broadcast;
} /* end if */
broadcast:
/* Form consensus opinion among all processes about whether to perform
* collective I/O */
if (MPI_SUCCESS != (mpi_code = MPI_Allreduce(&local_opinion, &consensus, 1, MPI_INT, MPI_LAND, io_info->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Allreduce failed", mpi_code)
ret_value = consensus > 0 ? TRUE : FALSE;
done:
FUNC_LEAVE_NOAPI(ret_value);

View File

@ -105,7 +105,6 @@ typedef struct H5D_io_info_t {
hid_t dxpl_id; /* Original DXPL ID */
#ifdef H5_HAVE_PARALLEL
MPI_Comm comm; /* MPI communicator for file */
hbool_t use_par_opt_io; /* Whether the 'optimized' I/O routines with be parallel */
hbool_t xfer_mode_changed; /* Whether the transfer mode was changed */
#endif /* H5_HAVE_PARALLEL */
const H5D_storage_t *store; /* Dataset storage info */
@ -324,8 +323,8 @@ H5_DLL herr_t H5D_mpio_spaces_span_write(H5D_io_info_t *io_info,
/* MPI-IO function to check if a direct I/O transfer is possible between
* memory and the file */
H5_DLL htri_t H5D_mpio_opt_possible(const H5D_t *dset, const H5S_t *mem_space,
const H5S_t *file_space, const unsigned flags);
H5_DLL htri_t H5D_mpio_opt_possible(const H5D_io_info_t *io_info, const H5S_t *mem_space,
const H5S_t *file_space, const H5T_path_t *tpath);
#endif /* H5_HAVE_PARALLEL */
/* Testing functions */