mirror of
https://github.com/HDFGroup/hdf5.git
synced 2025-03-13 16:47:58 +08:00
[svn-r7978] Purpose:
Bug fix Description: MPIO driver did not detect partial read or write. It assumed if read completed, all bytes requested are read. Therefore, if a dataset was not completely written yet, the unread part will not be filled with default fill value. Solution: Added the MPI_Get_elements call to find exactly how many bytes are read (or written). For read case, if not enough is read, the rest is padded with zeros. For write case, it is an error if not able to write them all in first attempt. (It is not always possible to repeat write for MPIO since file-view may be changed, collective would hang, etc. So, we do just one attempt.) Platforms tested: eirene (pp). Should work for other platforms since these have been tested in v1.6. Misc. update:
This commit is contained in:
parent
4b0b1191da
commit
41e958a30f
188
src/H5FDmpio.c
188
src/H5FDmpio.c
@ -18,13 +18,6 @@
|
|||||||
*
|
*
|
||||||
* Purpose: This is the MPI-2 I/O driver.
|
* Purpose: This is the MPI-2 I/O driver.
|
||||||
*
|
*
|
||||||
* Limitations:
|
|
||||||
* H5FD_mpio_read
|
|
||||||
* One implementation of MPI/MPI-IO causes MPI_Get_count
|
|
||||||
* to return (incorrectly) a negative count. I (who?) added code
|
|
||||||
* to detect this, and a kludge to pretend that the number of
|
|
||||||
* bytes read is always equal to the number requested. This
|
|
||||||
* kluge is activated by #ifdef MPI_KLUGE0202.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* Pablo information */
|
/* Pablo information */
|
||||||
@ -1454,6 +1447,10 @@ done:
|
|||||||
* if the first I/O was a collective I/O using MPI derived types
|
* if the first I/O was a collective I/O using MPI derived types
|
||||||
* and the next I/O was an independent I/O.
|
* and the next I/O was an independent I/O.
|
||||||
*
|
*
|
||||||
|
* Quincey Koziol - 2003/10/22-31
|
||||||
|
* Restructured code massively, straightening out logic and finally
|
||||||
|
* getting the bytes_read stuff working.
|
||||||
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
static herr_t
|
static herr_t
|
||||||
@ -1465,7 +1462,11 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
|
|||||||
MPI_Status mpi_stat;
|
MPI_Status mpi_stat;
|
||||||
int mpi_code; /* mpi return code */
|
int mpi_code; /* mpi return code */
|
||||||
MPI_Datatype buf_type=MPI_BYTE; /* MPI description of the selection in memory */
|
MPI_Datatype buf_type=MPI_BYTE; /* MPI description of the selection in memory */
|
||||||
int size_i, bytes_read, n;
|
int size_i; /* Integer copy of 'size' to read */
|
||||||
|
int bytes_read; /* Number of bytes read in */
|
||||||
|
int n;
|
||||||
|
int type_size; /* MPI datatype used for I/O's size */
|
||||||
|
int io_size; /* Actual number of bytes requested */
|
||||||
unsigned use_view_this_time=0;
|
unsigned use_view_this_time=0;
|
||||||
herr_t ret_value=SUCCEED;
|
herr_t ret_value=SUCCEED;
|
||||||
|
|
||||||
@ -1548,81 +1549,41 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
|
|||||||
#endif
|
#endif
|
||||||
if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat )))
|
if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at_all(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat )))
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
|
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at_all failed", mpi_code)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset the file view when we used MPI derived types
|
||||||
|
*/
|
||||||
|
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
||||||
|
if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info)))
|
||||||
|
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
|
||||||
} else {
|
} else {
|
||||||
if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
|
if (MPI_SUCCESS!= (mpi_code=MPI_File_read_at(file->f, mpi_off, buf, size_i, buf_type, &mpi_stat)))
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
|
HMPI_GOTO_ERROR(FAIL, "MPI_File_read_at failed", mpi_code)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* KLUDGE, Robb Matzke, 2000-12-29
|
/* How many bytes were actually read? */
|
||||||
* The LAM implementation of MPI_Get_count() says
|
/* [This works because the "basic elements" we use for all our MPI derived
|
||||||
* MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD)
|
* types are MPI_BYTE - QAK]
|
||||||
* So I'm commenting this out until it can be investigated. The
|
|
||||||
* returned `bytes_written' isn't used anyway because of Kim's
|
|
||||||
* kludge to avoid bytes_written<0. Likewise in H5FD_mpio_write(). */
|
|
||||||
|
|
||||||
#ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/
|
|
||||||
/* Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11.
|
|
||||||
* Many systems don't support MPI_Get_count so we need to do a
|
|
||||||
* configure thingy to fix this. */
|
|
||||||
|
|
||||||
/* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually
|
|
||||||
* had the 'buf_type' set to MPI_BYTE -QAK
|
|
||||||
*/
|
*/
|
||||||
if(use_view_this_time) {
|
if (MPI_SUCCESS != (mpi_code=MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_read)))
|
||||||
/* Figure out the mapping from the MPI 'buf_type' to bytes, someday...
|
HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
|
||||||
* If this gets fixed (and MPI_Get_count() is reliable), the
|
|
||||||
* kludge below where the 'bytes_read' value from MPI_Get_count() is
|
|
||||||
* overwritten with the 'size_i' parameter can be removed. -QAK
|
|
||||||
*/
|
|
||||||
} /* end if */
|
|
||||||
else {
|
|
||||||
/* How many bytes were actually read? */
|
|
||||||
if (MPI_SUCCESS != (mpi_code=MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_read)))
|
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code)
|
|
||||||
} /* end else */
|
|
||||||
#ifdef H5FDmpio_DEBUG
|
|
||||||
if (H5FD_mpio_Debug[(int)'c'])
|
|
||||||
fprintf(stdout,
|
|
||||||
"In H5FD_mpio_read after Get_count size_i=%d bytes_read=%d\n",
|
|
||||||
size_i, bytes_read );
|
|
||||||
#endif
|
|
||||||
#endif /* H5_HAVE_MPI_GET_COUNT */
|
|
||||||
|
|
||||||
/*
|
/* Get the type's size */
|
||||||
* KLUGE rky 1998-02-02
|
if (MPI_SUCCESS != (mpi_code=MPI_Type_size(buf_type,&type_size)))
|
||||||
* MPI_Get_count incorrectly returns negative count; fake a complete
|
HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
|
||||||
* read.
|
|
||||||
*/
|
/* Compute the actual number of bytes requested */
|
||||||
bytes_read = size_i;
|
io_size=type_size*size_i;
|
||||||
|
|
||||||
/* Check for read failure */
|
/* Check for read failure */
|
||||||
if (bytes_read<0 || bytes_read>size_i)
|
if (bytes_read<0 || bytes_read>io_size)
|
||||||
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed")
|
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file read failed")
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reset the file view when we used MPI derived types
|
* This gives us zeroes beyond end of physical MPI file.
|
||||||
*/
|
*/
|
||||||
if (use_view_this_time) {
|
if ((n=(io_size-bytes_read)) > 0)
|
||||||
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
HDmemset((char*)buf+bytes_read, 0, (size_t)n);
|
||||||
if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info)))
|
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
|
|
||||||
} /* end if */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This gives us zeroes beyond end of physical MPI file. What about
|
|
||||||
* reading past logical end of HDF5 file???
|
|
||||||
*/
|
|
||||||
if ((n=(size_i-bytes_read)) > 0) {
|
|
||||||
if (use_view_this_time) {
|
|
||||||
/*
|
|
||||||
* INCOMPLETE rky 1998-09-18
|
|
||||||
* Haven't implemented reading zeros beyond EOF. What to do???
|
|
||||||
*/
|
|
||||||
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "eof file read failed")
|
|
||||||
} else {
|
|
||||||
memset((char*)buf+bytes_read, 0, (size_t)n);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
#ifdef H5FDmpio_DEBUG
|
#ifdef H5FDmpio_DEBUG
|
||||||
@ -1749,6 +1710,10 @@ done:
|
|||||||
* that all the processes must sync up before (one of them)
|
* that all the processes must sync up before (one of them)
|
||||||
* writing metadata.
|
* writing metadata.
|
||||||
*
|
*
|
||||||
|
* Quincey Koziol - 2003/10/22-31
|
||||||
|
* Restructured code massively, straightening out logic and finally
|
||||||
|
* getting the bytes_written stuff working.
|
||||||
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
static herr_t
|
static herr_t
|
||||||
@ -1761,6 +1726,8 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
|
|||||||
MPI_Datatype buf_type=MPI_BYTE; /* MPI description of the selection in memory */
|
MPI_Datatype buf_type=MPI_BYTE; /* MPI description of the selection in memory */
|
||||||
int mpi_code; /* MPI return code */
|
int mpi_code; /* MPI return code */
|
||||||
int size_i, bytes_written;
|
int size_i, bytes_written;
|
||||||
|
int type_size; /* MPI datatype used for I/O's size */
|
||||||
|
int io_size; /* Actual number of bytes requested */
|
||||||
unsigned use_view_this_time=0;
|
unsigned use_view_this_time=0;
|
||||||
H5P_genplist_t *plist; /* Property list pointer */
|
H5P_genplist_t *plist; /* Property list pointer */
|
||||||
herr_t ret_value=SUCCEED;
|
herr_t ret_value=SUCCEED;
|
||||||
@ -1897,79 +1864,46 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
|
|||||||
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
||||||
if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
|
if (MPI_SUCCESS != (mpi_code=MPI_File_write_at_all(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
|
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at_all failed", mpi_code)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Reset the file view when we used MPI derived types
|
||||||
|
*/
|
||||||
|
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
||||||
|
if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info)))
|
||||||
|
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
|
||||||
} else {
|
} else {
|
||||||
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
||||||
if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
|
if (MPI_SUCCESS != (mpi_code=MPI_File_write_at(file->f, mpi_off, (void*)buf, size_i, buf_type, &mpi_stat)))
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
|
HMPI_GOTO_ERROR(FAIL, "MPI_File_write_at failed", mpi_code)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* KLUDGE, Robb Matzke, 2000-12-29
|
/* How many bytes were actually written? */
|
||||||
* The LAM implementation of MPI_Get_count() says
|
/* [This works because the "basic elements" we use for all our MPI derived
|
||||||
* MPI_Get_count: invalid argument (rank 0, MPI_COMM_WORLD)
|
* types are MPI_BYTE - QAK]
|
||||||
* So I'm commenting this out until it can be investigated. The
|
|
||||||
* returned `bytes_written' isn't used anyway because of Kim's
|
|
||||||
* kludge to avoid bytes_written<0. Likewise in H5FD_mpio_read(). */
|
|
||||||
|
|
||||||
#ifdef H5_HAVE_MPI_GET_COUNT /* Bill and Albert's kludge*/
|
|
||||||
/* Yet Another KLUDGE, Albert Cheng & Bill Wendling, 2001-05-11.
|
|
||||||
* Many systems don't support MPI_Get_count so we need to do a
|
|
||||||
* configure thingy to fix this. */
|
|
||||||
|
|
||||||
/* Calling MPI_Get_count with "MPI_BYTE" is only valid when we actually
|
|
||||||
* had the 'buf_type' set to MPI_BYTE -QAK
|
|
||||||
*/
|
*/
|
||||||
if(use_view_this_time) {
|
if (MPI_SUCCESS != (mpi_code=MPI_Get_elements(&mpi_stat, MPI_BYTE, &bytes_written)))
|
||||||
/* Figure out the mapping from the MPI 'buf_type' to bytes, someday...
|
HMPI_GOTO_ERROR(FAIL, "MPI_Get_elements failed", mpi_code)
|
||||||
* If this gets fixed (and MPI_Get_count() is reliable), the
|
|
||||||
* kludge below where the 'bytes_written' value from MPI_Get_count() is
|
|
||||||
* overwritten with the 'size_i' parameter can be removed. -QAK
|
|
||||||
*/
|
|
||||||
} /* end if */
|
|
||||||
else {
|
|
||||||
/* How many bytes were actually written? */
|
|
||||||
if (MPI_SUCCESS!= (mpi_code=MPI_Get_count(&mpi_stat, MPI_BYTE, &bytes_written)))
|
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_Get_count failed", mpi_code)
|
|
||||||
} /* end else */
|
|
||||||
#ifdef H5FDmpio_DEBUG
|
|
||||||
if (H5FD_mpio_Debug[(int)'c'])
|
|
||||||
fprintf(stdout,
|
|
||||||
"In H5FD_mpio_write after Get_count size_i=%d bytes_written=%d\n",
|
|
||||||
size_i, bytes_written );
|
|
||||||
#endif
|
|
||||||
#endif /* H5_HAVE_MPI_GET_COUNT */
|
|
||||||
|
|
||||||
/*
|
/* Get the type's size */
|
||||||
* KLUGE rky, 1998-02-02
|
if (MPI_SUCCESS != (mpi_code=MPI_Type_size(buf_type,&type_size)))
|
||||||
* MPI_Get_count incorrectly returns negative count; fake a complete
|
HMPI_GOTO_ERROR(FAIL, "MPI_Type_size failed", mpi_code)
|
||||||
* write.
|
|
||||||
*/
|
/* Compute the actual number of bytes requested */
|
||||||
bytes_written = size_i;
|
io_size=type_size*size_i;
|
||||||
|
|
||||||
/* Check for write failure */
|
/* Check for write failure */
|
||||||
if (bytes_written<0 || bytes_written>size_i)
|
if (bytes_written != io_size)
|
||||||
HGOTO_ERROR(H5E_IO, H5E_READERROR, FAIL, "file write failed")
|
HGOTO_ERROR(H5E_IO, H5E_WRITEERROR, FAIL, "file write failed")
|
||||||
|
|
||||||
/*
|
|
||||||
* Reset the file view when we used MPI derived types
|
|
||||||
*/
|
|
||||||
if (use_view_this_time) {
|
|
||||||
/*OKAY: CAST DISCARDS CONST QUALIFIER*/
|
|
||||||
if (MPI_SUCCESS != (mpi_code=MPI_File_set_view(file->f, 0, MPI_BYTE, MPI_BYTE, (char*)"native", file->info)))
|
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_File_set_view failed", mpi_code)
|
|
||||||
} /* end if */
|
|
||||||
|
|
||||||
/* Forget the EOF value (see H5FD_mpio_get_eof()) --rpm 1999-08-06 */
|
/* Forget the EOF value (see H5FD_mpio_get_eof()) --rpm 1999-08-06 */
|
||||||
file->eof = HADDR_UNDEF;
|
file->eof = HADDR_UNDEF;
|
||||||
|
|
||||||
done:
|
done:
|
||||||
#ifdef OLD_METADATA_WRITE
|
#ifdef OLD_METADATA_WRITE
|
||||||
/* Guard against getting into metadate broadcast in failure cases */
|
/* if only p<round> writes, need to broadcast the ret_value to other processes */
|
||||||
if(ret_value!=FAIL) {
|
if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) {
|
||||||
/* if only p<round> writes, need to broadcast the ret_value to other processes */
|
if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm)))
|
||||||
if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) {
|
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
|
||||||
if (MPI_SUCCESS != (mpi_code=MPI_Bcast(&ret_value, sizeof(ret_value), MPI_BYTE, H5_PAR_META_WRITE, file->comm)))
|
|
||||||
HMPI_GOTO_ERROR(FAIL, "MPI_Bcast failed", mpi_code)
|
|
||||||
} /* end if */
|
|
||||||
} /* end if */
|
} /* end if */
|
||||||
#endif /* OLD_METADATA_WRITE */
|
#endif /* OLD_METADATA_WRITE */
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user