hdf5/testpar/t_mpi.c

454 lines
13 KiB
C
Raw Normal View History

/*
* MPIO independent overlapping writes.
*
* First n-1 processes open 1 file.
* Each of the n-1 process writes chunks of data to the file in round-robin
* fashion, in a interleaved but not overlapped fashion. Using increasing
* chunk sizes for the benefits of testing different write sizes and also
* reducing the numbers of writes.
*
* Last process (n-1) just waits.
* First n-1 processes finish writing and cloose the file.
* Last process opens the same file and verifies the data.
*/
#include "testphdf5.h"
/* FILENAME and filenames must have the same number of names */
const char *FILENAME[2]={
"MPItest",
NULL};
char filenames[2][200];
int nerrors = 0;
int verbose = 0;
hid_t fapl; /* file access property list */
#define MPIO_TEST_WRITE_SIZE 1024*1024 /* 1 MB */
void
test_mpio_overlap_writes(char *filename)
{
int mpi_size, mpi_rank;
MPI_Comm comm;
MPI_Info info = MPI_INFO_NULL;
int color, mrc;
MPI_File fh;
int i;
int vrfyerrs;
char buf[4093]; /* use some prime number for size */
int bufsize = sizeof(buf);
int stride;
MPI_Offset mpi_off;
MPI_Status mpi_stat;
if (verbose)
printf("MPIO independent overlapping writes test on file %s\n",
filename);
/* set up MPI parameters */
MPI_Comm_size(MPI_COMM_WORLD,&mpi_size);
MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);
/* Need at least 2 processes */
if (mpi_size < 2) {
if (MAINPROCESS)
printf("Need at least 2 processes to run MPIO test.\n");
printf(" -SKIP- \n");
return;
}
/* splits processes 0 to n-2 into one comm. and the last one into another */
color = ((mpi_rank < (mpi_size - 1)) ? 0 : 1);
mrc = MPI_Comm_split (MPI_COMM_WORLD, color, mpi_rank, &comm);
VRFY((mrc==MPI_SUCCESS), "Comm_split succeeded");
if (color==0){
/* First n-1 processes (color==0) open a file and write it */
mrc = MPI_File_open(comm, filename, MPI_MODE_CREATE|MPI_MODE_RDWR,
info, &fh);
VRFY((mrc==MPI_SUCCESS), "");
stride = 1;
mpi_off = mpi_rank*stride;
while (mpi_off < MPIO_TEST_WRITE_SIZE){
/* make sure the write does not exceed the TEST_WRITE_SIZE */
if (mpi_off+stride > MPIO_TEST_WRITE_SIZE)
stride = MPIO_TEST_WRITE_SIZE - mpi_off;
/* set data to some trivial pattern for easy verification */
for (i=0; i<stride; i++)
buf[i] = (mpi_off+i) & 0x7f;
mrc = MPI_File_write_at(fh, mpi_off, buf, stride, MPI_BYTE,
&mpi_stat);
VRFY((mrc==MPI_SUCCESS), "");
/* move the offset pointer to last byte written by all processes */
mpi_off += (mpi_size - 1 - mpi_rank) * stride;
/* Increase chunk size without exceeding buffer size. */
/* Then move the starting offset for next write. */
stride *= 2;
if (stride > bufsize)
stride = bufsize;
mpi_off += mpi_rank*stride;
}
/* close file and free the communicator */
mrc = MPI_File_close(&fh);
VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");
mrc = MPI_Comm_free(&comm);
VRFY((mrc==MPI_SUCCESS), "MPI_Comm_free");
/* sync with the other waiting processes */
mrc = MPI_Barrier(MPI_COMM_WORLD);
VRFY((mrc==MPI_SUCCESS), "Sync after writes");
}else{
/* last process waits till writes are done,
* then opens file to verify data.
*/
mrc = MPI_Barrier(MPI_COMM_WORLD);
VRFY((mrc==MPI_SUCCESS), "Sync after writes");
mrc = MPI_File_open(comm, filename, MPI_MODE_RDONLY,
info, &fh);
VRFY((mrc==MPI_SUCCESS), "");
stride = bufsize;
for (mpi_off=0; mpi_off < MPIO_TEST_WRITE_SIZE; mpi_off += bufsize){
/* make sure it does not read beyond end of data */
if (mpi_off+stride > MPIO_TEST_WRITE_SIZE)
stride = MPIO_TEST_WRITE_SIZE - mpi_off;
mrc = MPI_File_read_at(fh, mpi_off, buf, stride, MPI_BYTE,
&mpi_stat);
VRFY((mrc==MPI_SUCCESS), "");
vrfyerrs=0;
for (i=0; i<stride; i++){
char expected;
expected = (mpi_off+i) & 0x7f;
if ((buf[i] != expected) &&
(vrfyerrs++ < MAX_ERR_REPORT || verbose))
printf("proc %d: found data error at [%ld], expect %d, got %d\n",
mpi_rank, mpi_off+i, expected, buf[i]);
}
if (vrfyerrs > MAX_ERR_REPORT && !verbose)
printf("proc %d: [more errors ...]\n", mpi_rank);
}
/* close file and free the communicator */
mrc = MPI_File_close(&fh);
VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");
mrc = MPI_Comm_free(&comm);
VRFY((mrc==MPI_SUCCESS), "MPI_Comm_free");
}
/*
* one more sync to ensure all processes have done reading
* before ending this test.
*/
mrc = MPI_Barrier(MPI_COMM_WORLD);
VRFY((mrc==MPI_SUCCESS), "Sync before leaving test");
}
#define MB 1048576 /* 1024*1024 == 2**20 */
#define GB 1073741824 /* 1024**3 == 2**30 */
#define TWO_GB_LESS1 2147483647 /* 2**31 - 1 */
#define FOUR_GB_LESS1 4294967295L /* 2**32 - 1 */
/*
* Verify that MPI_Offset exceeding 2**31 can be computed correctly.
* Print any failure as information only, not as an error so that this
* won't abort the remaining test or other separated tests.
*/
void
test_mpio_offset(void)
{
int mpi_size, mpi_rank;
MPI_Offset mpi_off;
MPI_Offset mpi_off_old;
int i;
/* set up MPI parameters */
MPI_Comm_size(MPI_COMM_WORLD,&mpi_size);
MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);
if (verbose)
printf("MPIO OFFSET test\n");
/* verify correctness of assigning 2GB sizes */
mpi_off = 2 * 1024 * (MPI_Offset)MB;
INFO((mpi_off>0), "2GB OFFSET assignment no overflow");
INFO((mpi_off-1)==TWO_GB_LESS1, "2GB OFFSET assignment succeed");
/* verify correctness of increasing from below 2 GB to above 2GB */
mpi_off = TWO_GB_LESS1;
for (i=0; i < 3; i++){
mpi_off_old = mpi_off;
mpi_off = mpi_off + 1;
/* no overflow */
INFO((mpi_off>0), "2GB OFFSET increment no overflow");
/* correct inc. */
INFO((mpi_off-1)==mpi_off_old, "2GB OFFSET increment succeed");
}
/* verify correctness of assigning 4GB sizes */
mpi_off = 4 * 1024 * (MPI_Offset)MB;
INFO((mpi_off>0), "4GB OFFSET assignment no overflow");
INFO((mpi_off-1)==FOUR_GB_LESS1, "4GB OFFSET assignment succeed");
/* verify correctness of increasing from below 4 GB to above 4 GB */
mpi_off = FOUR_GB_LESS1;
for (i=0; i < 3; i++){
mpi_off_old = mpi_off;
mpi_off = mpi_off + 1;
/* no overflow */
INFO((mpi_off>0), "4GB OFFSET increment no overflow");
/* correct inc. */
INFO((mpi_off-1)==mpi_off_old, "4GB OFFSET increment succeed");
}
}
/*
* Test if MPIO can write file from under 2GB to over 2GB and then
* from under 4GB to over 4GB.
* Each process writes 1MB in round robin fashion.
* Then reads the file back in by reverse order, that is process 0
* reads the data of process n-1 and vice versa.
*/
void
test_mpio_gb_file(char *filename)
{
int mpi_size, mpi_rank;
MPI_Info info = MPI_INFO_NULL;
int mrc;
MPI_File fh;
int i, j, n;
int vrfyerrs;
int writerrs; /* write errors */
int ntimes; /* how many times */
char *buf;
char expected;
MPI_Offset mpi_off;
MPI_Status mpi_stat;
/* set up MPI parameters */
MPI_Comm_size(MPI_COMM_WORLD,&mpi_size);
MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);
if (verbose)
printf("MPIO GB file test %s\n", filename);
buf = malloc(MB);
VRFY((buf!=NULL), "malloc succeed");
/* open a new file. Remove it first in case it exists. */
if (MAINPROCESS)
remove(filename);
MPI_Barrier(MPI_COMM_WORLD); /* prevent racing condition */
mrc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_CREATE|MPI_MODE_RDWR,
info, &fh);
VRFY((mrc==MPI_SUCCESS), "");
printf("MPIO GB file write test %s\n", filename);
/* instead of writing every bytes of the file, we will just write
* some data around the 2 and 4 GB boundaries. That should cover
* potential integer overflow and filesystem size limits.
*/
writerrs = 0;
for (n=2; n <= 4; n+=2){
ntimes = GB/MB*n/mpi_size + 1;
for (i=ntimes-2; i <= ntimes; i++){
mpi_off = (i*mpi_size + mpi_rank)*(MPI_Offset)MB;
if (verbose)
printf("proc %d: write to mpi_off=%016llx, %lld\n",
mpi_rank, mpi_off, mpi_off);
/* set data to some trivial pattern for easy verification */
for (j=0; j<MB; j++)
*(buf+j) = i*mpi_size + mpi_rank;
if (verbose)
printf("proc %d: writing %d bytes at offset %lld\n",
mpi_rank, MB, mpi_off);
mrc = MPI_File_write_at(fh, mpi_off, buf, MB, MPI_BYTE, &mpi_stat);
INFO((mrc==MPI_SUCCESS), "");
if (mrc!=MPI_SUCCESS)
writerrs++;
}
}
/* close file and free the communicator */
mrc = MPI_File_close(&fh);
VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");
mrc = MPI_Barrier(MPI_COMM_WORLD);
VRFY((mrc==MPI_SUCCESS), "Sync after writes");
/* open it again to verify the data written */
/* but only if there was no write errors */
printf("MPIO GB file read test %s\n", filename);
if (writerrs){
printf("proc %d: Skip read test due to previous write errors\n",
mpi_rank);
return;
}
mrc = MPI_File_open(MPI_COMM_WORLD, filename, MPI_MODE_RDONLY, info, &fh);
VRFY((mrc==MPI_SUCCESS), "");
/* Only read back parts of the file that have been written. */
for (n=2; n <= 4; n+=2){
ntimes = GB/MB*n/mpi_size + 1;
for (i=ntimes-2; i <= ntimes; i++){
mpi_off = (i*mpi_size + (mpi_size - mpi_rank - 1))*(MPI_Offset)MB;
if (verbose)
printf("proc %d: read from mpi_off=%016llx, %lld\n",
mpi_rank, mpi_off, mpi_off);
mrc = MPI_File_read_at(fh, mpi_off, buf, MB, MPI_BYTE, &mpi_stat);
INFO((mrc==MPI_SUCCESS), "");
expected = i*mpi_size + (mpi_size - mpi_rank - 1);
vrfyerrs=0;
for (j=0; j<MB; j++){
if ((*(buf+j) != expected) &&
(vrfyerrs++ < MAX_ERR_REPORT || verbose))
printf("proc %d: found data error at [%ld+%d], expect %d, got %d\n",
mpi_rank, mpi_off, j, expected, *(buf+j));
}
if (vrfyerrs > MAX_ERR_REPORT && !verbose)
printf("proc %d: [more errors ...]\n", mpi_rank);
}
}
/* close file and free the communicator */
mrc = MPI_File_close(&fh);
VRFY((mrc==MPI_SUCCESS), "MPI_FILE_CLOSE");
/*
* one more sync to ensure all processes have done reading
* before ending this test.
*/
mrc = MPI_Barrier(MPI_COMM_WORLD);
VRFY((mrc==MPI_SUCCESS), "Sync before leaving test");
}
/*
* parse the command line options
*/
int
parse_options(int argc, char **argv)
{
while (--argc){
if (**(++argv) != '-'){
break;
}else{
switch(*(*argv+1)){
case 'v': verbose = 1;
break;
case 'f': if (--argc < 1) {
nerrors++;
return(1);
}
if (**(++argv) == '-') {
nerrors++;
return(1);
}
paraprefix = *argv;
break;
case 'h': /* print help message--return with nerrors set */
return(1);
default: nerrors++;
return(1);
}
}
} /*while*/
/* compose the test filenames */
{
int i, n;
hid_t plist;
plist = H5Pcreate (H5P_FILE_ACCESS);
H5Pset_fapl_mpio(plist, MPI_COMM_WORLD, MPI_INFO_NULL);
n = sizeof(FILENAME)/sizeof(FILENAME[0]) - 1; /* exclude the NULL */
for (i=0; i < n; i++)
if (h5_fixname(FILENAME[i],plist,filenames[i],sizeof(filenames[i]))
== NULL){
printf("h5_fixname failed\n");
nerrors++;
return(1);
}
H5Pclose(plist);
printf("Test filenames are:\n");
for (i=0; i < n; i++)
printf(" %s\n", filenames[i]);
}
return(0);
}
/*
* Show command usage
*/
void
usage(void)
{
printf("Usage: t_mpi [-v] [-f <prefix>]\n");
printf("\t-v\t\tverbose on\n");
printf("\t-f <prefix>\tfilename prefix\n");
printf("\n");
}
int
main(int argc, char **argv)
{
int mpi_size, mpi_rank; /* mpi variables */
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
if (MAINPROCESS){
printf("===================================\n");
printf("MPI functionality tests\n");
printf("===================================\n");
}
fapl = H5Pcreate (H5P_FILE_ACCESS);
H5Pset_fapl_mpio(fapl, MPI_COMM_WORLD, MPI_INFO_NULL);
if (parse_options(argc, argv) != 0){
if (MAINPROCESS)
usage();
goto finish;
}
MPI_BANNER("MPIO OFFSET overflow test...");
test_mpio_offset();
MPI_BANNER("MPIO GB size file test...");
test_mpio_gb_file(filenames[0]);
MPI_BANNER("MPIO independent overlapping writes...");
test_mpio_overlap_writes(filenames[0]);
finish:
if (MAINPROCESS){ /* only process 0 reports */
printf("===================================\n");
if (nerrors){
printf("***MPI tests detected %d errors***\n", nerrors);
}
else{
printf("MPI tests finished with no errors\n");
}
printf("===================================\n");
}
MPI_Finalize();
h5_cleanup(FILENAME, fapl);
/* always return 0 as this test is informational only. */
return(0);
}