mirror of
https://github.com/HDFGroup/hdf5.git
synced 2025-03-31 17:10:47 +08:00
[svn-r12044] Purpose:
Add new tests Description: Collective IO doesn't work for some platforms/mpio packages when more than one process has no contributions to IO. Solution: 1. Add a collective IO test to verify the correctness of the library when more than one process has no contributions to IO. 2. Add the similar MPI-IO test in t_mpi to help us maintain in more platforms. Platforms tested: heping, mir, copper Misc. update:
This commit is contained in:
parent
3ba9425ab2
commit
0f0a5eb918
@ -470,7 +470,7 @@ ccslab_set(int mpi_rank,
|
||||
block[1] = 1;
|
||||
stride[0] = 1;
|
||||
stride[1] = 1;
|
||||
count[0] = ((mpi_rank == (mpi_size-1))?0:SPACE_DIM1/mpi_size);
|
||||
count[0] = ((mpi_rank >= MAX(1,(mpi_size-2)))?0:SPACE_DIM1/mpi_size);
|
||||
count[1] = SPACE_DIM2;
|
||||
start[0] = mpi_rank*count[0];
|
||||
start[1] = 0;
|
||||
|
260
testpar/t_mpi.c
260
testpar/t_mpi.c
@ -829,6 +829,177 @@ static int test_mpio_derived_dtype(char *filename) {
|
||||
#endif
|
||||
return retcode;
|
||||
}
|
||||
/*
|
||||
|
||||
Function: test_mpio_special_collective
|
||||
|
||||
Test Whether collective IO is still working when more than one process
|
||||
has no contribution to IO. To properly test this case, at least FOUR
|
||||
processes are needed.
|
||||
|
||||
1. Details for the test:
|
||||
1) Create one derived datatype with MPI_Type_hindexed:
|
||||
|
||||
2) Choosing at least two processes to contribute none for IO with
|
||||
the buf size inside MPI_Write_at_all to 0.
|
||||
3) Choosing at least two processes to have real contributions for IO.
|
||||
4) Do collective IO.
|
||||
|
||||
2. This test will fail with the MPI-IO package that doesn't support this. For example,
|
||||
mpich 1.2.6.
|
||||
|
||||
If this bug has been fixed in the previous not-working package, this test will issue a printf message to tell the developer to change
|
||||
the configuration specific file of HDF5 so that we can change our configurationsetting to support special collective IO; currently only special collective IO.
|
||||
|
||||
If it turns out that the previous working MPI-IO package no longer works, this test will also issue a message to inform the corresponding failure so that
|
||||
we can turn off the support for special collective IO; currently only special collective IO.
|
||||
*/
|
||||
|
||||
static int test_mpio_special_collective(char *filename) {
|
||||
|
||||
char hostname[128];
|
||||
int mpi_size, mpi_rank;
|
||||
MPI_File fh;
|
||||
MPI_Datatype etype,buftype,filetype;
|
||||
char mpi_err_str[MPI_MAX_ERROR_STRING];
|
||||
int mpi_err_strlen;
|
||||
int mpi_err;
|
||||
char writedata[2];
|
||||
char *buf;
|
||||
char expect_val;
|
||||
int i, irank;
|
||||
int count,bufcount;
|
||||
int blocklens[2];
|
||||
MPI_Aint offsets[2];
|
||||
int nerrors = 0; /* number of errors */
|
||||
MPI_Offset mpi_off;
|
||||
MPI_Status mpi_stat;
|
||||
int retcode;
|
||||
|
||||
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
|
||||
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
|
||||
retcode = 0;
|
||||
|
||||
/* create MPI data type */
|
||||
etype = MPI_BYTE;
|
||||
if(mpi_rank == 0 || mpi_rank == 1) {
|
||||
count = DIMSIZE;
|
||||
bufcount = 1;
|
||||
}
|
||||
else {
|
||||
count = 0;
|
||||
bufcount = 0;
|
||||
}
|
||||
|
||||
blocklens[0] = count;
|
||||
offsets[0] = mpi_rank*count;
|
||||
blocklens[1] = count;
|
||||
offsets[1] = (mpi_size+mpi_rank)*count;
|
||||
|
||||
if(count !=0) {
|
||||
if((mpi_err= MPI_Type_hindexed(2,blocklens,offsets,etype,&filetype))
|
||||
!= MPI_SUCCESS){
|
||||
MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
|
||||
printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if((mpi_err=MPI_Type_commit(&filetype))!=MPI_SUCCESS){
|
||||
MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
|
||||
printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
if((mpi_err= MPI_Type_hindexed(2,blocklens,offsets,etype,&buftype))
|
||||
!= MPI_SUCCESS){
|
||||
MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
|
||||
printf("MPI_Type_contiguous failed (%s)\n", mpi_err_str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if((mpi_err=MPI_Type_commit(&buftype))!=MPI_SUCCESS){
|
||||
MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
|
||||
printf("MPI_Type_commit failed (%s)\n", mpi_err_str);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
||||
filetype = MPI_BYTE;
|
||||
buftype = MPI_BYTE;
|
||||
}
|
||||
|
||||
/* Open a file */
|
||||
if ((mpi_err = MPI_File_open(MPI_COMM_WORLD, filename,
|
||||
MPI_MODE_RDWR | MPI_MODE_CREATE ,
|
||||
MPI_INFO_NULL, &fh))
|
||||
!= MPI_SUCCESS){
|
||||
MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
|
||||
printf("MPI_File_open failed (%s)\n", mpi_err_str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* each process writes some data */
|
||||
for (i=0; i < 2*DIMSIZE; i++)
|
||||
writedata[i] = mpi_rank*DIMSIZE + i;
|
||||
|
||||
|
||||
mpi_off = 0;
|
||||
if((mpi_err = MPI_File_set_view(fh, mpi_off, MPI_BYTE, filetype, "native", MPI_INFO_NULL))
|
||||
!= MPI_SUCCESS) {
|
||||
MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
|
||||
printf("MPI_File_set_view failed (%s)\n", mpi_err_str);
|
||||
return 1;
|
||||
}
|
||||
|
||||
buf = writedata;
|
||||
if ((mpi_err = MPI_File_write_at_all(fh, mpi_off, buf, bufcount, buftype,
|
||||
&mpi_stat))
|
||||
!= MPI_SUCCESS){
|
||||
MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
|
||||
printf("MPI_File_write_at offset(%ld), bytes (%d), failed (%s)\n",
|
||||
(long) mpi_off, bufcount, mpi_err_str);
|
||||
return 1;
|
||||
};
|
||||
|
||||
if ((mpi_err = MPI_File_close(&fh))
|
||||
!= MPI_SUCCESS){
|
||||
MPI_Error_string(mpi_err, mpi_err_str, &mpi_err_strlen);
|
||||
printf("MPI_File_close failed. \n");
|
||||
return 1;
|
||||
};
|
||||
|
||||
mpi_err = MPI_Barrier(MPI_COMM_WORLD);
|
||||
#ifdef H5_MPI_SPECIAL_COLLECTIVE_IO_WORKS
|
||||
if(retcode != 0) {
|
||||
if(mpi_rank == 0) {
|
||||
printf("special collective IO is NOT working at this platform\n");
|
||||
printf("Go back to hdf5/config and find the corresponding\n");
|
||||
printf("configure-specific file (for example, powerpc-ibm-aix5.x) and add\n");
|
||||
printf("hdf5_mpi_special_collective_io_works=${hdf5_mpi_special_collective_io_works='no'}\n");
|
||||
printf(" at the end of the file.\n");
|
||||
printf(" Please report to hdfhelp@ncsa.uiuc.edu about this problem.\n");
|
||||
}
|
||||
retcode = 1;
|
||||
}
|
||||
#else
|
||||
if(retcode == 0) {
|
||||
if(mpi_rank == 0) {
|
||||
printf(" This is NOT an error, What it really says is\n");
|
||||
printf("special collective IO is WORKING at this platform\n");
|
||||
printf(" Go back to hdf5/config and find the corresponding \n");
|
||||
printf(" configure-specific file (for example, powerpc-ibm-aix5.x) and delete the line\n");
|
||||
printf("hdf5_mpi_special_collective_io_works=${hdf5_mpi_special_collective_io_works='no'}\n");
|
||||
printf(" at the end of the file.\n");
|
||||
printf("Please report to hdfhelp@ncsa.uiuc.edu about this problem.\n");
|
||||
}
|
||||
retcode = 1;
|
||||
}
|
||||
#endif
|
||||
return retcode;
|
||||
}
|
||||
|
||||
/*
|
||||
* parse the command line options
|
||||
*/
|
||||
@ -947,31 +1118,6 @@ main(int argc, char **argv)
|
||||
/* set alarm. */
|
||||
ALARM_ON;
|
||||
|
||||
/*=======================================
|
||||
* MPIO complicated derived datatype test
|
||||
*=======================================*/
|
||||
/* test_mpio_derived_dtype often hangs when fails.
|
||||
* Do not run it if it is known NOT working unless ask to
|
||||
* run explicitly by high verbose mode.
|
||||
*/
|
||||
#ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
|
||||
MPI_BANNER("MPIO complicated derived datatype test...");
|
||||
ret_code = test_mpio_derived_dtype(filenames[0]);
|
||||
#else
|
||||
if (VERBOSE_HI){
|
||||
MPI_BANNER("MPIO complicated derived datatype test...");
|
||||
ret_code = test_mpio_derived_dtype(filenames[0]);
|
||||
}else{
|
||||
MPI_BANNER("MPIO complicated derived datatype test SKIPPED.");
|
||||
ret_code = 0; /* fake ret_code */
|
||||
}
|
||||
#endif
|
||||
ret_code = errors_sum(ret_code);
|
||||
if (mpi_rank==0 && ret_code > 0){
|
||||
printf("***FAILED with %d total errors\n", ret_code);
|
||||
nerrors += ret_code;
|
||||
}
|
||||
|
||||
|
||||
/*=======================================
|
||||
* MPIO 1 write Many read test
|
||||
@ -1028,6 +1174,70 @@ main(int argc, char **argv)
|
||||
nerrors += ret_code;
|
||||
}
|
||||
|
||||
/*=======================================
|
||||
* MPIO complicated derived datatype test
|
||||
*=======================================*/
|
||||
/* test_mpio_derived_dtype often hangs when fails.
|
||||
* Do not run it if it is known NOT working unless ask to
|
||||
* run explicitly by high verbose mode.
|
||||
*/
|
||||
#ifdef H5_MPI_COMPLEX_DERIVED_DATATYPE_WORKS
|
||||
MPI_BANNER("MPIO complicated derived datatype test...");
|
||||
ret_code = test_mpio_derived_dtype(filenames[0]);
|
||||
#else
|
||||
if (VERBOSE_HI){
|
||||
MPI_BANNER("MPIO complicated derived datatype test...");
|
||||
ret_code = test_mpio_derived_dtype(filenames[0]);
|
||||
}else{
|
||||
MPI_BANNER("MPIO complicated derived datatype test SKIPPED.");
|
||||
ret_code = 0; /* fake ret_code */
|
||||
}
|
||||
#endif
|
||||
ret_code = errors_sum(ret_code);
|
||||
if (mpi_rank==0 && ret_code > 0){
|
||||
printf("***FAILED with %d total errors\n", ret_code);
|
||||
nerrors += ret_code;
|
||||
}
|
||||
|
||||
/*=======================================
|
||||
* MPIO special collective IO test
|
||||
*=======================================*/
|
||||
/* test_special_collective_io often hangs when fails.
|
||||
* Do not run it if it is known NOT working unless ask to
|
||||
* run explicitly by high verbose mode.
|
||||
*/
|
||||
if(mpi_size !=4){
|
||||
MPI_BANNER("MPIO special collective io test SKIPPED.");
|
||||
if(mpi_rank == 0){
|
||||
printf("Use FOUR processes to run this test\n");
|
||||
printf("If you still see the <test SKIPPED>, use <-vh> option to verify the test\n");
|
||||
}
|
||||
ret_code = 0;
|
||||
goto sc_finish;
|
||||
}
|
||||
|
||||
#ifdef H5_MPI_SPECIAL_COLLECTIVE_IO_WORKS
|
||||
MPI_BANNER("MPIO special collective io test...");
|
||||
ret_code = test_mpio_special_collective(filenames[0]);
|
||||
|
||||
#else
|
||||
if (VERBOSE_HI){
|
||||
MPI_BANNER("MPIO special collective io test...");
|
||||
ret_code = test_mpio_special_collective(filenames[0]);
|
||||
}else{
|
||||
MPI_BANNER("MPIO special collective io test SKIPPED.");
|
||||
ret_code = 0; /* fake ret_code */
|
||||
}
|
||||
#endif
|
||||
|
||||
sc_finish:
|
||||
ret_code = errors_sum(ret_code);
|
||||
if (mpi_rank==0 && ret_code > 0){
|
||||
printf("***FAILED with %d total errors\n", ret_code);
|
||||
nerrors += ret_code;
|
||||
}
|
||||
|
||||
|
||||
finish:
|
||||
/* make sure all processes are finished before final report, cleanup
|
||||
* and exit.
|
||||
|
Loading…
x
Reference in New Issue
Block a user