mirror of
https://github.com/HDFGroup/hdf5.git
synced 2024-11-21 01:04:10 +08:00
[svn-r2990] Purpose:
Bug fix (feature, kind of) Description: The library used to hang if a collective dataset read/write request does not have the same number of eventual MPIO request. Part of the reason is that H5FD_read/H5FD_write immediately returns succeess if it sees the request size is 0. This caused problem since other processes with I/O to do would be hanging by waiting for the early returned process(es). Solution: H5FD.c: disable the early return code in parallel mode. Make it go on even with "nothing" to transfer. H5D.c: the optimized MPIO xfer routines can handle collect calls correctly when the condition is right (e.g., no conversion). When the COLLECTIVE request cannot be handled correctly without the risk of hanging, the COLLECTIVE is changed to INDEPENDENT calls for the eventual MPIO calls. Platforms tested: IRIX64 parallel (-64, n32), IRIX64 -64 sequential, Linux sequential.
This commit is contained in:
parent
b9c8954ad9
commit
91da899a1f
170
src/H5D.c
170
src/H5D.c
@ -1534,25 +1534,30 @@ H5D_close(H5D_t *dataset)
|
||||
* Thursday, December 4, 1997
|
||||
*
|
||||
* Modifications:
|
||||
* Robb Matzke, 1998-06-09
|
||||
* The data space is no longer cached in the dataset struct.
|
||||
* Robb Matzke, 1998-06-09
|
||||
* The data space is no longer cached in the dataset struct.
|
||||
*
|
||||
* Robb Matzke, 1998-08-11
|
||||
* Added timing calls around all the data space I/O functions.
|
||||
* Robb Matzke, 1998-08-11
|
||||
* Added timing calls around all the data space I/O functions.
|
||||
*
|
||||
* rky, 1998-09-18
|
||||
* Added must_convert to do non-optimized read when necessary.
|
||||
* rky, 1998-09-18
|
||||
* Added must_convert to do non-optimized read when necessary.
|
||||
*
|
||||
* Quincey Koziol, 1999-07-02
|
||||
* Changed xfer_parms parameter to xfer plist parameter, so it
|
||||
* could be passed to H5T_convert.
|
||||
* Quincey Koziol, 1999-07-02
|
||||
* Changed xfer_parms parameter to xfer plist parameter, so it
|
||||
* could be passed to H5T_convert.
|
||||
*
|
||||
* Albert Cheng, 2000-11-21
|
||||
* Added the code that when it detects it is not safe to process a
|
||||
* COLLECTIVE read request without hanging, it changes it to
|
||||
* INDEPENDENT calls.
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
herr_t
|
||||
H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
const H5S_t *file_space, hid_t dxpl_id, void *buf/*out*/)
|
||||
{
|
||||
const H5D_xfer_t *xfer_parms = NULL;
|
||||
const H5D_xfer_t *xfer_parms = NULL;
|
||||
hssize_t nelmts; /*number of elements */
|
||||
size_t smine_start; /*strip mine start loc */
|
||||
size_t n, smine_nelmts; /*elements per strip */
|
||||
@ -1574,6 +1579,12 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
H5T_bkg_t need_bkg; /*type of background buf*/
|
||||
H5S_t *free_this_space=NULL; /*data space to free */
|
||||
hbool_t must_convert; /*have to xfer the slow way*/
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
H5FD_mpio_dxpl_t *dx = NULL;
|
||||
H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */
|
||||
hbool_t xfer_mode_changed=0; /*xfer_mode needs restore */
|
||||
hbool_t doing_mpio=0; /*This is an MPIO access */
|
||||
#endif
|
||||
#ifdef H5S_DEBUG
|
||||
H5_timer_t timer;
|
||||
#endif
|
||||
@ -1609,17 +1620,20 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
nelmts = H5S_get_select_npoints(mem_space);
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
{
|
||||
/* Collective access is not permissible without the MPIO driver */
|
||||
H5FD_mpio_dxpl_t *dx;
|
||||
if (H5FD_MPIO==xfer_parms->driver_id &&
|
||||
(dx=xfer_parms->driver_info) &&
|
||||
H5FD_MPIO_COLLECTIVE==dx->xfer_mode) {
|
||||
if (!(IS_H5FD_MPIO(dataset->ent.file)))
|
||||
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL,
|
||||
"collective access for MPIO driver only");
|
||||
}
|
||||
/* Collect Parallel I/O information for possible later use */
|
||||
if (H5FD_MPIO==xfer_parms->driver_id){
|
||||
doing_mpio++;
|
||||
if (dx=xfer_parms->driver_info){
|
||||
xfer_mode = dx->xfer_mode;
|
||||
}else
|
||||
HGOTO_ERROR (H5E_DATASET, H5E_CANTINIT, FAIL,
|
||||
"unable to retrieve data xfer info");
|
||||
}
|
||||
/* Collective access is not permissible without the MPIO driver */
|
||||
if (doing_mpio && xfer_mode==H5FD_MPIO_COLLECTIVE &&
|
||||
!(IS_H5FD_MPIO(dataset->ent.file)))
|
||||
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL,
|
||||
"collective access for MPIO driver only");
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -1705,6 +1719,30 @@ H5D_read(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
H5E_clear ();
|
||||
}
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
/* The following may not handle a collective call correctly
|
||||
* since it does not ensure all processes can handle the read
|
||||
* request according to the MPI collective specification.
|
||||
* Do the collective request via independent mode.
|
||||
*/
|
||||
if (doing_mpio && xfer_mode==H5FD_MPIO_COLLECTIVE){
|
||||
/* Kludge: change the xfer_mode to independent, handle the request,
|
||||
* then xfer_mode before return.
|
||||
* Better way is to get a temporary data_xfer property with
|
||||
* INDEPENDENT xfer_mode and pass it downwards.
|
||||
*/
|
||||
dx->xfer_mode = H5FD_MPIO_INDEPENDENT;
|
||||
xfer_mode_changed++; /* restore it before return */
|
||||
#ifdef H5D_DEBUG
|
||||
if (H5DEBUG(D)) {
|
||||
fprintf(H5DEBUG(D),
|
||||
"H5D: Cannot handle this COLLECTIVE read request. Do it via INDEPENDENT calls\n"
|
||||
"dx->xfermode was %d, changed to %d\n",
|
||||
xfer_mode, dx->xfer_mode);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* This is the general case. Figure out the strip mine size.
|
||||
*/
|
||||
@ -1884,6 +1922,19 @@ printf("%s: check 2.0, src_type_size=%d, dst_type_size=%d, target_size=%d, min_e
|
||||
ret_value = SUCCEED;
|
||||
|
||||
done:
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
/* restore xfer_mode due to the kludge */
|
||||
if (doing_mpio && xfer_mode_changed){
|
||||
|
||||
#ifdef H5D_DEBUG
|
||||
if (H5DEBUG(D)) {
|
||||
fprintf (H5DEBUG(D), "H5D: dx->xfermode was %d, restored to %d\n",
|
||||
dx->xfer_mode, xfer_mode);
|
||||
}
|
||||
#endif
|
||||
dx->xfer_mode = xfer_mode;
|
||||
}
|
||||
#endif
|
||||
/* Release selection iterators */
|
||||
H5S_sel_iter_release(file_space,&file_iter);
|
||||
H5S_sel_iter_release(mem_space,&mem_iter);
|
||||
@ -1918,17 +1969,22 @@ printf("%s: check 2.0, src_type_size=%d, dst_type_size=%d, target_size=%d, min_e
|
||||
* rky 980918
|
||||
* Added must_convert to do non-optimized read when necessary.
|
||||
*
|
||||
* Quincey Koziol, 2 July 1999
|
||||
* Changed xfer_parms parameter to xfer plist parameter, so it could be passed
|
||||
* to H5T_convert
|
||||
* Quincey Koziol, 2 July 1999
|
||||
* Changed xfer_parms parameter to xfer plist parameter, so it could
|
||||
* be passed to H5T_convert
|
||||
*
|
||||
* Albert Cheng, 2000-11-21
|
||||
* Added the code that when it detects it is not safe to process a
|
||||
* COLLECTIVE write request without hanging, it changes it to
|
||||
* INDEPENDENT calls.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
herr_t
|
||||
H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
const H5S_t *file_space, hid_t dxpl_id, const void *buf)
|
||||
{
|
||||
const H5D_xfer_t *xfer_parms = NULL;
|
||||
const H5D_xfer_t *xfer_parms = NULL;
|
||||
hssize_t nelmts; /*total number of elmts */
|
||||
size_t smine_start; /*strip mine start loc */
|
||||
size_t n, smine_nelmts; /*elements per strip */
|
||||
@ -1950,6 +2006,12 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
H5T_bkg_t need_bkg; /*type of background buf*/
|
||||
H5S_t *free_this_space=NULL; /*data space to free */
|
||||
hbool_t must_convert; /*have to xfer the slow way*/
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
H5FD_mpio_dxpl_t *dx = NULL;
|
||||
H5FD_mpio_xfer_t xfer_mode; /*xfer_mode for this request */
|
||||
hbool_t xfer_mode_changed=0; /*xfer_mode needs restore */
|
||||
hbool_t doing_mpio=0; /*This is an MPIO access */
|
||||
#endif
|
||||
#ifdef H5S_DEBUG
|
||||
H5_timer_t timer;
|
||||
#endif
|
||||
@ -2015,17 +2077,20 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
nelmts = H5S_get_select_npoints(mem_space);
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
{
|
||||
/* Collective access is not permissible without the MPIO driver */
|
||||
H5FD_mpio_dxpl_t *dx;
|
||||
if (H5FD_MPIO==xfer_parms->driver_id &&
|
||||
(dx=xfer_parms->driver_info) &&
|
||||
H5FD_MPIO_COLLECTIVE==dx->xfer_mode) {
|
||||
if (!(IS_H5FD_MPIO(dataset->ent.file)))
|
||||
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL,
|
||||
"collective access for MPIO driver only");
|
||||
}
|
||||
/* Collect Parallel I/O information for possible later use */
|
||||
if (H5FD_MPIO==xfer_parms->driver_id){
|
||||
doing_mpio++;
|
||||
if (dx=xfer_parms->driver_info){
|
||||
xfer_mode = dx->xfer_mode;
|
||||
}else
|
||||
HGOTO_ERROR (H5E_DATASET, H5E_CANTINIT, FAIL,
|
||||
"unable to retrieve data xfer info");
|
||||
}
|
||||
/* Collective access is not permissible without the MPIO driver */
|
||||
if (doing_mpio && xfer_mode==H5FD_MPIO_COLLECTIVE &&
|
||||
!(IS_H5FD_MPIO(dataset->ent.file)))
|
||||
HGOTO_ERROR (H5E_DATASET, H5E_UNSUPPORTED, FAIL,
|
||||
"collective access for MPIO driver only");
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -2116,6 +2181,30 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
H5E_clear ();
|
||||
}
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
/* The following may not handle a collective call correctly
|
||||
* since it does not ensure all processes can handle the write
|
||||
* request according to the MPI collective specification.
|
||||
* Do the collective request via independent mode.
|
||||
*/
|
||||
if (doing_mpio && xfer_mode==H5FD_MPIO_COLLECTIVE){
|
||||
/* Kludge: change the xfer_mode to independent, handle the request,
|
||||
* then xfer_mode before return.
|
||||
* Better way is to get a temporary data_xfer property with
|
||||
* INDEPENDENT xfer_mode and pass it downwards.
|
||||
*/
|
||||
dx->xfer_mode = H5FD_MPIO_INDEPENDENT;
|
||||
xfer_mode_changed++; /* restore it before return */
|
||||
#ifdef H5D_DEBUG
|
||||
if (H5DEBUG(D)) {
|
||||
fprintf(H5DEBUG(D),
|
||||
"H5D: Cannot handle this COLLECTIVE write request. Do it via INDEPENDENT calls\n"
|
||||
"dx->xfermode was %d, changed to %d\n",
|
||||
xfer_mode, dx->xfer_mode);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* This is the general case. Figure out the strip mine size.
|
||||
*/
|
||||
@ -2305,6 +2394,19 @@ H5D_write(H5D_t *dataset, const H5T_t *mem_type, const H5S_t *mem_space,
|
||||
ret_value = SUCCEED;
|
||||
|
||||
done:
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
/* restore xfer_mode due to the kludge */
|
||||
if (doing_mpio && xfer_mode_changed){
|
||||
|
||||
#ifdef H5D_DEBUG
|
||||
if (H5DEBUG(D)) {
|
||||
fprintf (H5DEBUG(D), "H5D: dx->xfermode was %d, restored to %d\n",
|
||||
dx->xfer_mode, xfer_mode);
|
||||
}
|
||||
#endif
|
||||
dx->xfer_mode = xfer_mode;
|
||||
}
|
||||
#endif
|
||||
/* Release selection iterators */
|
||||
H5S_sel_iter_release(file_space,&file_iter);
|
||||
H5S_sel_iter_release(mem_space,&mem_iter);
|
||||
|
16
src/H5FD.c
16
src/H5FD.c
@ -1852,6 +1852,10 @@ H5FDread(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, hsize_t siz
|
||||
* Wednesday, August 4, 1999
|
||||
*
|
||||
* Modifications:
|
||||
* Albert Cheng, 2000-11-21
|
||||
* Disable the code that does early return when size==0 for
|
||||
* Parallel mode since a collective call would require the process
|
||||
* to continue on with "nothing" to transfer.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -1865,8 +1869,12 @@ H5FD_read(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, hsize_t si
|
||||
(H5P_DATA_XFER==H5P_get_class(dxpl_id) || H5I_object(dxpl_id)));
|
||||
assert(buf);
|
||||
|
||||
#ifndef H5_HAVE_PARALLEL
|
||||
/* Do not return early for Parallel mode since the I/O could be a */
|
||||
/* collective transfer. */
|
||||
/* The no-op case */
|
||||
if (0==size) HRETURN(SUCCEED);
|
||||
#endif
|
||||
|
||||
/* Check if this information is in the metadata accumulator */
|
||||
if((file->feature_flags&H5FD_FEAT_ACCUMULATE_METADATA) &&
|
||||
@ -2001,6 +2009,10 @@ H5FDwrite(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, hsize_t si
|
||||
* Wednesday, August 4, 1999
|
||||
*
|
||||
* Modifications:
|
||||
* Albert Cheng, 2000-11-21
|
||||
* Disable the code that does early return when size==0 for
|
||||
* Parallel mode since a collective call would require the process
|
||||
* to continue on with "nothing" to transfer.
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -2017,8 +2029,12 @@ H5FD_write(H5FD_t *file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr, hsize_t s
|
||||
(H5P_DATA_XFER==H5P_get_class(dxpl_id) && H5I_object(dxpl_id)));
|
||||
assert(buf);
|
||||
|
||||
#ifndef H5_HAVE_PARALLEL
|
||||
/* Do not return early for Parallel mode since the I/O could be a */
|
||||
/* collective transfer. */
|
||||
/* The no-op case */
|
||||
if (0==size) HRETURN(SUCCEED);
|
||||
#endif
|
||||
|
||||
/* Check for accumulating metadata */
|
||||
if((file->feature_flags&H5FD_FEAT_ACCUMULATE_METADATA) && type!=H5FD_MEM_DRAW) {
|
||||
|
Loading…
Reference in New Issue
Block a user