[svn-r22763] Purpose:

HDFFV-8143 Provide a routine(s) for telling the user why the library broke collective data access

Description:
    Fixed Daily test failed from the previous commit r22735. (ember)
    Also changed H5Pget_mpio_no_collective_cause() parameter type from
    H5D_mpio_no_collective_cause_t to uint32_t due to change to return
    combined bitmap value which can be not emun defined value.

Tested: 
    jam (linux32-LE), koala-pp (linux64-LE), ember, h5committest
This commit is contained in:
Jonathan Kim 2012-09-14 15:24:02 -05:00
parent 602c716f76
commit 0a5b0304ad
3 changed files with 29 additions and 40 deletions

View File

@ -103,7 +103,7 @@
#define H5D_MPIO_ACTUAL_IO_MODE_SIZE sizeof(H5D_mpio_actual_io_mode_t)
#define H5D_MPIO_ACTUAL_IO_MODE_DEF H5D_MPIO_NO_COLLECTIVE
/* Definitions for cause of broken collective io property */
#define H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE sizeof(H5D_mpio_no_collective_cause_t)
#define H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE sizeof(uint32_t)
#define H5D_MPIO_NO_COLLECTIVE_CAUSE_DEF H5D_MPIO_COLLECTIVE
/* Definitions for memory MPI type property */
#define H5FD_MPI_XFER_MEM_MPI_TYPE_SIZE sizeof(MPI_Datatype)
@ -292,11 +292,11 @@ H5P__dxfr_reg_prop(H5P_genclass_t *pclass)
HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
/* Register the local cause of broken collective I/O */
if(H5P_register_real(pclass, H5D_MPIO_LOCAL_NO_COLLECTIVE_CAUSE_NAME, H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE, &def_mpio_actual_io_mode, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
if(H5P_register_real(pclass, H5D_MPIO_LOCAL_NO_COLLECTIVE_CAUSE_NAME, H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE, &def_mpio_no_collective_cause, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
/* Register the global cause of broken collective I/O */
if(H5P_register_real(pclass, H5D_MPIO_GLOBAL_NO_COLLECTIVE_CAUSE_NAME, H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE, &def_mpio_actual_io_mode, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
if(H5P_register_real(pclass, H5D_MPIO_GLOBAL_NO_COLLECTIVE_CAUSE_NAME, H5D_MPIO_NO_COLLECTIVE_CAUSE_SIZE, &def_mpio_no_collective_cause, NULL, NULL, NULL, NULL, NULL, NULL, NULL) < 0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTINSERT, FAIL, "can't insert property into class")
/* Register the MPI memory type property */
@ -1385,7 +1385,7 @@ done:
*-------------------------------------------------------------------------
*/
herr_t
H5Pget_mpio_no_collective_cause(hid_t plist_id, H5D_mpio_no_collective_cause_t *local_no_collective_cause, H5D_mpio_no_collective_cause_t *global_no_collective_cause)
H5Pget_mpio_no_collective_cause(hid_t plist_id, uint32_t *local_no_collective_cause, uint32_t *global_no_collective_cause)
{
H5P_genplist_t *plist;
herr_t ret_value = SUCCEED; /* return value */

View File

@ -412,7 +412,7 @@ H5_DLL herr_t H5Pget_type_conv_cb(hid_t dxpl_id, H5T_conv_except_func_t *op, voi
#ifdef H5_HAVE_PARALLEL
H5_DLL herr_t H5Pget_mpio_actual_chunk_opt_mode(hid_t plist_id, H5D_mpio_actual_chunk_opt_mode_t *actual_chunk_opt_mode);
H5_DLL herr_t H5Pget_mpio_actual_io_mode(hid_t plist_id, H5D_mpio_actual_io_mode_t *actual_io_mode);
H5_DLL herr_t H5Pget_mpio_no_collective_cause(hid_t plist_id, H5D_mpio_no_collective_cause_t *local_no_collective_cause, H5D_mpio_no_collective_cause_t *global_no_collective_cause);
H5_DLL herr_t H5Pget_mpio_no_collective_cause(hid_t plist_id, uint32_t *local_no_collective_cause, uint32_t *global_no_collective_cause);
#endif /* H5_HAVE_PARALLEL */
/* Link creation property list (LCPL) routines */

View File

@ -3107,12 +3107,12 @@ actual_io_mode_tests(void) {
static void
test_no_collective_cause_mode(int selection_mode)
{
int no_collective_cause_local_write = 0;
int no_collective_cause_local_read = 0;
int no_collective_cause_local_expected = 0;
int no_collective_cause_global_write = 0;
int no_collective_cause_global_read = 0;
int no_collective_cause_global_expected = 0;
uint32_t no_collective_cause_local_write = 0;
uint32_t no_collective_cause_local_read = 0;
uint32_t no_collective_cause_local_expected = 0;
uint32_t no_collective_cause_global_write = 0;
uint32_t no_collective_cause_global_read = 0;
uint32_t no_collective_cause_global_expected = 0;
hsize_t coord[NELM][RANK];
const char * filename;
@ -3145,6 +3145,7 @@ test_no_collective_cause_mode(int selection_mode)
#endif
/* set to global value as default */
int l_facc_type = facc_type;
char message[256];
/* Set up MPI parameters */
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
@ -3401,17 +3402,12 @@ test_no_collective_cause_mode(int selection_mode)
"reading and writing are the same for global cause of Broken Collective I/O");
/* Test values */
if(no_collective_cause_local_expected != (unsigned) -1 && no_collective_cause_global_expected != (unsigned) -1) {
char message[100];
sprintf(message, "Local cause of Broken Collective I/O has the correct value for %s.\n",test_name);
VRFY((no_collective_cause_local_write == no_collective_cause_local_expected), message);
sprintf(message, "Global cause of Broken Collective I/O has the correct value for %s.\n",test_name);
VRFY((no_collective_cause_global_write == no_collective_cause_global_expected), message);
} else {
HDfprintf(stderr, "%s %d -> (%d,%d)\n", test_name, mpi_rank,
test_no_collective_cause_mode, no_collective_cause_local_write);
}
memset (message, 0, sizeof (message));
sprintf(message, "Local cause of Broken Collective I/O has the correct value for %s.\n",test_name);
VRFY((no_collective_cause_local_write == no_collective_cause_local_expected), message);
memset (message, 0, sizeof (message));
sprintf(message, "Global cause of Broken Collective I/O has the correct value for %s.\n",test_name);
VRFY((no_collective_cause_global_write == no_collective_cause_global_expected), message);
/* Release some resources */
if (sid)
@ -3465,10 +3461,10 @@ test_no_collective_cause_mode(int selection_mode)
static void
test_no_collective_cause_mode_filter(int selection_mode)
{
int no_collective_cause_local_read = 0;
int no_collective_cause_local_expected = 0;
int no_collective_cause_global_read = 0;
int no_collective_cause_global_expected = 0;
uint32_t no_collective_cause_local_read = 0;
uint32_t no_collective_cause_local_expected = 0;
uint32_t no_collective_cause_global_read = 0;
uint32_t no_collective_cause_global_expected = 0;
const char * filename;
const char * test_name;
@ -3497,6 +3493,7 @@ test_no_collective_cause_mode_filter(int selection_mode)
#ifdef H5_HAVE_FILTER_FLETCHER32
H5Z_filter_t filter_info;
#endif
char message[256];
/* Set up MPI parameters */
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
@ -3654,16 +3651,12 @@ test_no_collective_cause_mode_filter(int selection_mode)
VRFY((ret >= 0), "retriving no collective cause succeeded" );
/* Test values */
if(no_collective_cause_local_expected != (unsigned) -1 && no_collective_cause_global_expected != (unsigned) -1) {
char message[100];
sprintf(message, "Local cause of Broken Collective I/O has the correct value for %s.\n",test_name);
VRFY((no_collective_cause_local_read == no_collective_cause_local_expected), message);
sprintf(message, "Global cause of Broken Collective I/O has the correct value for %s.\n",test_name);
VRFY((no_collective_cause_global_read == no_collective_cause_global_expected), message);
} else {
HDfprintf(stderr, "%s %d -> (%d,%d)\n", test_name, mpi_rank,
test_no_collective_cause_mode_filter, no_collective_cause_local_read);
}
memset (message, 0, sizeof (message));
sprintf(message, "Local cause of Broken Collective I/O has the correct value for %s.\n",test_name);
VRFY((no_collective_cause_local_read == (uint32_t)no_collective_cause_local_expected), message);
memset (message, 0, sizeof (message));
sprintf(message, "Global cause of Broken Collective I/O has the correct value for %s.\n",test_name);
VRFY((no_collective_cause_global_read == (uint32_t)no_collective_cause_global_expected), message);
/* Release some resources */
if (sid)
@ -3701,8 +3694,6 @@ no_collective_cause_tests(void)
MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
MPI_Comm_size(MPI_COMM_WORLD, &mpi_rank);
/* skipped these not to disrupt other tests while finding a fix on ember */
#ifdef TODO_FIX_EMBER
/*
* Test individual cause
*/
@ -3730,8 +3721,6 @@ no_collective_cause_tests(void)
test_no_collective_cause_mode (TEST_DATATYPE_CONVERSION | TEST_DATA_TRANSFORMS);
test_no_collective_cause_mode (TEST_DATATYPE_CONVERSION | TEST_DATA_TRANSFORMS | TEST_POINT_SELECTIONS);
#endif /* TODO_FIX_EMBER */
return;
}