mirror of
https://github.com/HDFGroup/hdf5.git
synced 2024-11-27 02:10:55 +08:00
[svn-r5690] Purpose:
Bug Fix Description: When parallel I/O is used, the MPI-I/O VFL driver uses a "lazy" model to call MPI_File_set_view() in order to reduce the number of calls to this function. However, this is unsafe, because if a collective I/O which uses MPI derived types (and thus uses MPI_File_set_view()) is immediately followed by an independent I/O, the code will attempt to call MPI_File_set_view() in order to switch back to the default view of the file. MPI_File_set_view() is a collective call however, and this causes the application to hang. Solution: Removed "lazy" MPI_File_set_view() code, instead set the file view when it is needed (with MPI derived types) and immediately set the file view back to the default view before leaving the I/O routine. Platforms tested: IRIX64 6.5 (modi4) w/parallel. Also, tested with the latest development and release code for the SAF library, which now works correctly with this change. (Although the release branch of the SAF library seems to have a bug, this 1.4.4 release candidate code gets as far as the version the SAF library is released on top of (1.4.2-patch1, I believe)).
This commit is contained in:
parent
4e184b81a2
commit
ace1f474b7
@ -35,6 +35,9 @@ Bug Fixes since HDF5-1.4.0
|
|||||||
|
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
* Fixed bug in parallel I/O routines where a collective I/O which used
|
||||||
|
MPI derived types, followed by an independent I/O would cause the library
|
||||||
|
to hang. QAK 2002/06/24
|
||||||
* Fixed bug in chunking routines where they were using internal allocation
|
* Fixed bug in chunking routines where they were using internal allocation
|
||||||
free routines, instead of malloc/free, preventing user filters from
|
free routines, instead of malloc/free, preventing user filters from
|
||||||
working correctly. Chunks are now allocated/freed with malloc/free and
|
working correctly. Chunks are now allocated/freed with malloc/free and
|
||||||
|
@ -56,7 +56,6 @@ typedef struct H5FD_mpio_t {
|
|||||||
haddr_t eof; /*end-of-file marker */
|
haddr_t eof; /*end-of-file marker */
|
||||||
haddr_t eoa; /*end-of-address marker */
|
haddr_t eoa; /*end-of-address marker */
|
||||||
haddr_t last_eoa; /* Last known end-of-address marker */
|
haddr_t last_eoa; /* Last known end-of-address marker */
|
||||||
unsigned old_use_view; /*remember value of use_view */
|
|
||||||
} H5FD_mpio_t;
|
} H5FD_mpio_t;
|
||||||
|
|
||||||
/* Prototypes */
|
/* Prototypes */
|
||||||
@ -1178,6 +1177,11 @@ H5FD_mpio_get_eof(H5FD_t *_file)
|
|||||||
* the address of the dataset in MPI_File_set_view() calls, as
|
* the address of the dataset in MPI_File_set_view() calls, as
|
||||||
* necessary.
|
* necessary.
|
||||||
*
|
*
|
||||||
|
* Quincey Koziol - 2002/06/24
|
||||||
|
* Removed "lazy" MPI_File_set_view() calls, since they would fail
|
||||||
|
* if the first I/O was a collective I/O using MPI derived types
|
||||||
|
* and the next I/O was an independent I/O.
|
||||||
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
static herr_t
|
static herr_t
|
||||||
@ -1191,7 +1195,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
|
|||||||
MPI_Status mpi_stat;
|
MPI_Status mpi_stat;
|
||||||
MPI_Datatype buf_type, file_type;
|
MPI_Datatype buf_type, file_type;
|
||||||
int size_i, bytes_read, n;
|
int size_i, bytes_read, n;
|
||||||
unsigned use_view_this_time=0, used_view_last_time;
|
unsigned use_view_this_time=0;
|
||||||
H5P_genplist_t *plist; /* Property list pointer */
|
H5P_genplist_t *plist; /* Property list pointer */
|
||||||
herr_t ret_value=SUCCEED;
|
herr_t ret_value=SUCCEED;
|
||||||
|
|
||||||
@ -1256,7 +1260,8 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
|
|||||||
*/
|
*/
|
||||||
mpi_disp=mpi_off;
|
mpi_disp=mpi_off;
|
||||||
mpi_off=0;
|
mpi_off=0;
|
||||||
} else {
|
} /* end if */
|
||||||
|
else {
|
||||||
/*
|
/*
|
||||||
* Prepare for a simple xfer of a contiguous block of bytes. The
|
* Prepare for a simple xfer of a contiguous block of bytes. The
|
||||||
* btype, ftype, and disp fields are not used.
|
* btype, ftype, and disp fields are not used.
|
||||||
@ -1264,23 +1269,17 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
|
|||||||
buf_type = MPI_BYTE;
|
buf_type = MPI_BYTE;
|
||||||
file_type = MPI_BYTE;
|
file_type = MPI_BYTE;
|
||||||
mpi_disp = 0; /* mpi_off is alread set */
|
mpi_disp = 0; /* mpi_off is alread set */
|
||||||
}
|
} /* end else */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't bother to reset the view if we're not using the types this time,
|
* Set the file view when we are using MPI derived types
|
||||||
* and did we didn't use them last time either.
|
|
||||||
*/
|
*/
|
||||||
used_view_last_time = file->old_use_view;
|
if (use_view_this_time) {
|
||||||
if (used_view_last_time || /* change to new ftype or MPI_BYTE */
|
|
||||||
use_view_this_time) { /* almost certainly a different ftype */
|
|
||||||
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
||||||
if (MPI_SUCCESS != MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))
|
if (MPI_SUCCESS != MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))
|
||||||
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
|
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
|
||||||
}
|
} /* end if */
|
||||||
|
|
||||||
/* Keep the 'use view' flag around for the next I/O */
|
|
||||||
file->old_use_view = use_view_this_time;
|
|
||||||
|
|
||||||
/* Read the data. */
|
/* Read the data. */
|
||||||
assert(H5FD_MPIO_INDEPENDENT==dx->xfer_mode || H5FD_MPIO_COLLECTIVE==dx->xfer_mode);
|
assert(H5FD_MPIO_INDEPENDENT==dx->xfer_mode || H5FD_MPIO_COLLECTIVE==dx->xfer_mode);
|
||||||
if (H5FD_MPIO_INDEPENDENT==dx->xfer_mode) {
|
if (H5FD_MPIO_INDEPENDENT==dx->xfer_mode) {
|
||||||
@ -1341,6 +1340,15 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
|
|||||||
if (bytes_read<0 || bytes_read>size_i)
|
if (bytes_read<0 || bytes_read>size_i)
|
||||||
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed");
|
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset the file view when we used MPI derived types
|
||||||
|
*/
|
||||||
|
if (use_view_this_time) {
|
||||||
|
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
||||||
|
if (MPI_SUCCESS != MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))
|
||||||
|
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
|
||||||
|
} /* end if */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This gives us zeroes beyond end of physical MPI file. What about
|
* This gives us zeroes beyond end of physical MPI file. What about
|
||||||
* reading past logical end of HDF5 file???
|
* reading past logical end of HDF5 file???
|
||||||
@ -1471,6 +1479,11 @@ done:
|
|||||||
* the address of the dataset in MPI_File_set_view() calls, as
|
* the address of the dataset in MPI_File_set_view() calls, as
|
||||||
* necessary.
|
* necessary.
|
||||||
*
|
*
|
||||||
|
* Quincey Koziol - 2002/06/24
|
||||||
|
* Removed "lazy" MPI_File_set_view() calls, since they would fail
|
||||||
|
* if the first I/O was a collective I/O using MPI derived types
|
||||||
|
* and the next I/O was an independent I/O.
|
||||||
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
static herr_t
|
static herr_t
|
||||||
@ -1484,7 +1497,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
|
|||||||
MPI_Status mpi_stat;
|
MPI_Status mpi_stat;
|
||||||
MPI_Datatype buf_type, file_type;
|
MPI_Datatype buf_type, file_type;
|
||||||
int size_i, bytes_written;
|
int size_i, bytes_written;
|
||||||
unsigned use_view_this_time=0, used_view_last_time;
|
unsigned use_view_this_time=0;
|
||||||
H5P_genplist_t *plist; /* Property list pointer */
|
H5P_genplist_t *plist; /* Property list pointer */
|
||||||
herr_t ret_value=SUCCEED;
|
herr_t ret_value=SUCCEED;
|
||||||
|
|
||||||
@ -1549,7 +1562,8 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
|
|||||||
*/
|
*/
|
||||||
mpi_disp=mpi_off;
|
mpi_disp=mpi_off;
|
||||||
mpi_off=0;
|
mpi_off=0;
|
||||||
} else {
|
} /* end if */
|
||||||
|
else {
|
||||||
/*
|
/*
|
||||||
* Prepare for a simple xfer of a contiguous block of bytes.
|
* Prepare for a simple xfer of a contiguous block of bytes.
|
||||||
* The btype, ftype, and disp fields are not used.
|
* The btype, ftype, and disp fields are not used.
|
||||||
@ -1557,27 +1571,17 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
|
|||||||
buf_type = MPI_BYTE;
|
buf_type = MPI_BYTE;
|
||||||
file_type = MPI_BYTE;
|
file_type = MPI_BYTE;
|
||||||
mpi_disp = 0; /* mpi_off is already set */
|
mpi_disp = 0; /* mpi_off is already set */
|
||||||
}
|
} /* end else */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't bother to reset the view if we're not using the types this time,
|
* Set the file view when we are using MPI derived types
|
||||||
* and did we didn't use them last time either.
|
|
||||||
*/
|
*/
|
||||||
used_view_last_time = file->old_use_view;
|
if (use_view_this_time) {
|
||||||
if (used_view_last_time || /* change to new ftype or MPI_BYTE */
|
|
||||||
use_view_this_time) { /* almost certainly a different ftype */
|
|
||||||
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
||||||
if (MPI_SUCCESS != MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))
|
if (MPI_SUCCESS != MPI_File_set_view(file->f, mpi_disp, MPI_BYTE, file_type, (char*)"native", file->info))
|
||||||
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
|
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
|
||||||
}
|
} /* end if */
|
||||||
|
|
||||||
/*
|
|
||||||
* We always set the use_view flag to 0 because the default is not to
|
|
||||||
* use types next time, unless someone explicitly requests it by setting
|
|
||||||
* this flag to !=0.
|
|
||||||
*/
|
|
||||||
file->old_use_view = use_view_this_time;
|
|
||||||
|
|
||||||
/* Only p<round> will do the actual write if all procs in comm write same data */
|
/* Only p<round> will do the actual write if all procs in comm write same data */
|
||||||
if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) {
|
if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) {
|
||||||
if (file->mpi_rank != file->mpi_round) {
|
if (file->mpi_rank != file->mpi_round) {
|
||||||
@ -1654,6 +1658,15 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
|
|||||||
if (bytes_written<0 || bytes_written>size_i)
|
if (bytes_written<0 || bytes_written>size_i)
|
||||||
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed");
|
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed");
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset the file view when we used MPI derived types
|
||||||
|
*/
|
||||||
|
if (use_view_this_time) {
|
||||||
|
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
||||||
|
if (MPI_SUCCESS != MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info))
|
||||||
|
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
|
||||||
|
} /* end if */
|
||||||
|
|
||||||
/* Forget the EOF value (see H5FD_mpio_get_eof()) --rpm 1999-08-06 */
|
/* Forget the EOF value (see H5FD_mpio_get_eof()) --rpm 1999-08-06 */
|
||||||
file->eof = HADDR_UNDEF;
|
file->eof = HADDR_UNDEF;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user