[svn-r5814] Purpose:

Bug Fix

Description:
    It was possible to create corrupted metadata information (either in memory
    or in the file or both) with a parallel I/O program because of the way
    metadata writes were being handled for writes out of the metadata cache.

Solution:
    Added a dataset transfer property called "block before metadata write"
    which is used by the MPI-I/O and MPI-posix drivers to sync up all the
    processes before attempting a metadata write.  This property is currently
    only for metadata writes from the metadata cache.

Platforms tested:
    IRIX64 6.5 (modi4) w/parallel
This commit is contained in:
Quincey Koziol 2002-07-19 14:27:09 -05:00
parent 814ea8b962
commit 99eee6dff9
12 changed files with 310 additions and 112 deletions

View File

@ -35,21 +35,23 @@ Bug Fixes since HDF5-1.4.0
Library
-------
* Fixed metadata corruption problem which could occur when many objects
are created in a file during parallel I/O. QAK - 2002/07/19
* Fixed VL memory leak when data is overwritten. The heap objects holding
old data are freed. If the fill value writting time is set to
H5D_FILL_TIME_NEVER, the library prohibits user to create VL type dataset.
The library free all the heap objects storing VL type if there is nested
VL type(a VL type contains another VL type). SLU 2002/07/10
VL type(a VL type contains another VL type). SLU - 2002/07/10
* Fixed bug in parallel I/O routines where a collective I/O which used
MPI derived types, followed by an independent I/O would cause the library
to hang. QAK 2002/06/24
to hang. QAK - 2002/06/24
* Fixed bug in chunking routines where they were using internal allocation
free routines, instead of malloc/free, preventing user filters from
working correctly. Chunks are now allocated/freed with malloc/free and
so should the chunks in user filters. QAK 2002/06/18
so should the chunks in user filters. QAK - 2002/06/18
* Fixed bug where regular hyperslab selection could get incorrectly
transferred when the number of elements in a row did not fit evenly
into the buffer provided. QAK 2002/06/12
into the buffer provided. QAK - 2002/06/12
* Fixed bug (#499) which allowed an "empty" compound or enumerated datatype
(one with no members) to be used to create a dataset or committed to a
file. QAK - 2002/06/11

View File

@ -201,6 +201,7 @@ H5_term_library(void)
pending += DOWN(TN);
pending += DOWN(T);
pending += DOWN(A);
pending += DOWN(AC);
pending += DOWN(P);
pending += DOWN(I);
} while (pending && ntries++ < 100);

View File

@ -31,7 +31,9 @@
#include "H5Eprivate.h"
#include "H5Fpkg.h"
#include "H5FLprivate.h" /*Free Lists */
#include "H5Iprivate.h" /* IDs */
#include "H5MMprivate.h"
#include "H5Pprivate.h" /* Property lists */
/*
* Sorting the cache by address before flushing is sometimes faster
@ -43,8 +45,16 @@
* Private file-scope variables.
*/
#define PABLO_MASK H5AC_mask
#define INTERFACE_INIT NULL
/* Interface initialization */
static int interface_initialize_g = 0;
#define INTERFACE_INIT H5AC_init_interface
static herr_t H5AC_init_interface(void);
#ifdef H5_HAVE_PARALLEL
/* Dataset transfer property list for flush calls */
static hid_t H5AC_dxpl_id=(-1);
#endif /* H5_HAVE_PARALLEL */
#ifdef H5AC_SORT_BY_ADDR
static H5AC_t *current_cache_g = NULL; /*for sorting */
@ -64,6 +74,107 @@ H5FL_ARR_DEFINE_STATIC(H5AC_info_ptr_t,-1);
H5FL_ARR_DEFINE_STATIC(H5AC_prot_t,-1);
#endif /* H5AC_DEBUG */
/*-------------------------------------------------------------------------
* Function: H5AC_init_interface
*
* Purpose: Initialize interface-specific information
*
* Return: Non-negative on success/Negative on failure
*
* Programmer: Quincey Koziol
* Thursday, July 18, 2002
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static herr_t
H5AC_init_interface(void)
{
#ifdef H5_HAVE_PARALLEL
H5P_genclass_t *xfer_pclass; /* Dataset transfer property list class object */
H5P_genplist_t *xfer_plist; /* Dataset transfer property list object */
unsigned block_before_meta_write=1; /* Custom value for "block before meta write" property */
#endif /* H5_HAVE_PARALLEL */
FUNC_ENTER_NOINIT(H5AC_init_interface);
#ifdef H5_HAVE_PARALLEL
/* Sanity check */
assert(H5P_CLS_DATASET_XFER_g!=(-1));
/* Get the dataset transfer property list class object */
if (H5I_GENPROP_CLS != H5I_get_type(H5P_CLS_DATASET_XFER_g) || NULL == (xfer_pclass = H5I_object(H5P_CLS_DATASET_XFER_g)))
HRETURN_ERROR(H5E_CACHE, H5E_BADATOM, FAIL, "can't get property list class");
/* Create a new dataset transfer property list */
if ((H5AC_dxpl_id=H5P_create_id(xfer_pclass)) < 0)
HRETURN_ERROR(H5E_CACHE, H5E_CANTCREATE, FAIL, "unable to register property list");
/* Get the property list object */
if (NULL == (xfer_plist = H5I_object(H5AC_dxpl_id)))
HRETURN_ERROR(H5E_CACHE, H5E_BADATOM, FAIL, "can't get new property list object");
/* Insert 'block before metadata write' property */
if(H5P_insert(xfer_plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,H5AC_BLOCK_BEFORE_META_WRITE_SIZE,&block_before_meta_write,NULL,NULL,NULL,NULL,NULL)<0)
HRETURN_ERROR(H5E_PLIST, H5E_CANTSET, FAIL, "can't insert metadata cache dxpl property");
#endif /* H5_HAVE_PARALLEL */
FUNC_LEAVE(SUCCEED);
} /* end H5AC_init_interface() */
/*-------------------------------------------------------------------------
* Function: H5AC_term_interface
*
* Purpose: Terminate this interface.
*
* Return: Success: Positive if anything was done that might
* affect other interfaces; zero otherwise.
*
* Failure: Negative.
*
* Programmer: Quincey Koziol
* Thursday, July 18, 2002
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
int
H5AC_term_interface(void)
{
int n=0;
FUNC_ENTER_NOINIT(H5AC_term_interface);
if (interface_initialize_g) {
#ifdef H5_HAVE_PARALLEL
if(H5AC_dxpl_id>0) {
/* Indicate more work to do */
n = 1; /* H5I */
/* Close H5AC dxpl */
if (H5Pclose(H5AC_dxpl_id) < 0)
H5E_clear(); /*ignore the error*/
else {
/* Reset static ID */
H5AC_dxpl_id=(-1);
/* Reset interface initialization flag */
interface_initialize_g = 0;
} /* end else */
} /* end if */
else
#endif /* H5_HAVE_PARALLEL */
/* Reset interface initialization flag */
interface_initialize_g = 0;
} /* end if */
FUNC_LEAVE(n);
} /* end H5AC_term_interface() */
/*-------------------------------------------------------------------------
* Function: H5AC_create
@ -273,7 +384,7 @@ H5AC_find_f(H5F_t *f, const H5AC_class_t *type, haddr_t addr,
* Load a new thing. If it can't be loaded, then return an error
* without preempting anything.
*/
if (NULL == (thing = (type->load)(f, addr, udata1, udata2))) {
if (NULL == (thing = (type->load)(f, H5P_DATASET_XFER_DEFAULT, addr, udata1, udata2))) {
HRETURN_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL, "unable to load object");
}
/*
@ -283,13 +394,13 @@ H5AC_find_f(H5F_t *f, const H5AC_class_t *type, haddr_t addr,
H5AC_subid_t type_id=(*info)->type->id; /* Remember this for later */
flush = (*info)->type->flush;
status = (flush)(f, TRUE, (*info)->addr, (*info));
status = (flush)(f, H5AC_dxpl_id, TRUE, (*info)->addr, (*info));
if (status < 0) {
/*
* The old thing could not be removed from the stack.
* Release the new thing and fail.
*/
if ((type->flush)(f, TRUE, addr, thing) < 0) {
if ((type->flush)(f, H5AC_dxpl_id, TRUE, addr, thing) < 0) {
HRETURN_ERROR(H5E_CACHE, H5E_CANTFLUSH, NULL,
"unable to flush just-loaded object");
}
@ -413,6 +524,7 @@ H5AC_flush(H5F_t *f, const H5AC_class_t *type, haddr_t addr, hbool_t destroy)
cache = f->shared->cache;
if (!H5F_addr_defined(addr)) {
unsigned first_flush=1; /* Indicate if this is the first flush */
#ifdef H5AC_SORT_BY_ADDR
/*
@ -457,7 +569,14 @@ H5AC_flush(H5F_t *f, const H5AC_class_t *type, haddr_t addr, hbool_t destroy)
H5AC_subid_t type_id=(*info)->type->id; /* Remember this for later */
flush = (*info)->type->flush;
status = (flush)(f, destroy, (*info)->addr, (*info));
/* Only block for all the processes on the first piece of metadata */
if(first_flush) {
status = (flush)(f, H5AC_dxpl_id, destroy, (*info)->addr, (*info));
first_flush=0;
} /* end if */
else
status = (flush)(f, H5P_DATASET_XFER_DEFAULT, destroy, (*info)->addr, (*info));
if (status < 0) {
#ifdef H5AC_SORT_BY_ADDR
map = H5FL_ARR_FREE(int,map);
@ -493,7 +612,7 @@ H5AC_flush(H5F_t *f, const H5AC_class_t *type, haddr_t addr, hbool_t destroy)
* Flush just this entry.
*/
flush = cache->slot[i]->type->flush;
status = (flush)(f, destroy, cache->slot[i]->addr,
status = (flush)(f, H5AC_dxpl_id, destroy, cache->slot[i]->addr,
cache->slot[i]);
if (status < 0) {
HRETURN_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL,
@ -567,7 +686,7 @@ H5AC_set(H5F_t *f, const H5AC_class_t *type, haddr_t addr, void *thing)
H5AC_subid_t type_id=(*info)->type->id; /* Remember this for later */
flush = (*info)->type->flush;
status = (flush)(f, TRUE, (*info)->addr, (*info));
status = (flush)(f, H5AC_dxpl_id, TRUE, (*info)->addr, (*info));
if (status < 0) {
HRETURN_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL,
"unable to flush object");
@ -656,7 +775,7 @@ H5AC_rename(H5F_t *f, const H5AC_class_t *type, haddr_t old_addr,
H5AC_subid_t type_id=cache->slot[new_idx]->type->id; /* Remember this for later */
flush = cache->slot[new_idx]->type->flush;
status = (flush)(f, TRUE, cache->slot[new_idx]->addr,
status = (flush)(f, H5AC_dxpl_id, TRUE, cache->slot[new_idx]->addr,
cache->slot[new_idx]);
if (status < 0) {
HRETURN_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL,
@ -774,7 +893,7 @@ H5AC_protect(H5F_t *f, const H5AC_class_t *type, haddr_t addr,
* without preempting anything.
*/
cache->diagnostics[type->id].nmisses++;
if (NULL == (thing = (type->load)(f, addr, udata1, udata2))) {
if (NULL == (thing = (type->load)(f, H5P_DATASET_XFER_DEFAULT, addr, udata1, udata2))) {
HRETURN_ERROR(H5E_CACHE, H5E_CANTLOAD, NULL,
"unable to load object");
}
@ -860,7 +979,7 @@ H5AC_unprotect(H5F_t *f, const H5AC_class_t *type, haddr_t addr, void *thing)
assert(H5F_addr_ne((*info)->addr, addr));
flush = (*info)->type->flush;
status = (flush)(f, TRUE, (*info)->addr, (*info));
status = (flush)(f, H5AC_dxpl_id, TRUE, (*info)->addr, (*info));
if (status < 0) {
HRETURN_ERROR(H5E_CACHE, H5E_CANTFLUSH, FAIL,
"unable to flush object");

View File

@ -58,8 +58,8 @@ typedef enum H5AC_subid_t {
H5AC_NTYPES = 5 /*THIS MUST BE LAST! */
} H5AC_subid_t;
typedef void *(*H5AC_load_func_t)(H5F_t*, haddr_t addr, const void *udata1, void *udata2);
typedef herr_t (*H5AC_flush_func_t)(H5F_t*, hbool_t dest, haddr_t addr, void *thing);
typedef void *(*H5AC_load_func_t)(H5F_t*, hid_t dxpl_id, haddr_t addr, const void *udata1, void *udata2);
typedef herr_t (*H5AC_flush_func_t)(H5F_t*, hid_t dxpl_id, hbool_t dest, haddr_t addr, void *thing);
typedef struct H5AC_class_t {
H5AC_subid_t id;
@ -107,6 +107,13 @@ typedef struct H5AC_t {
} diagnostics[H5AC_NTYPES]; /*diagnostics for each type of object*/
} H5AC_t;
#ifdef H5_HAVE_PARALLEL
/* Definitions for "block before metadata write" property */
#define H5AC_BLOCK_BEFORE_META_WRITE_NAME "H5AC_block_before_meta_write"
#define H5AC_BLOCK_BEFORE_META_WRITE_SIZE sizeof(unsigned)
#define H5AC_BLOCK_BEFORE_META_WRITE_DEF 0
#endif /* H5_HAVE_PARALLEL */
/*
* Library prototypes.
*/

View File

@ -118,8 +118,8 @@ static H5B_ins_t H5B_insert_helper(H5F_t *f, haddr_t addr,
static herr_t H5B_insert_child(H5F_t *f, const H5B_class_t *type,
H5B_t *bt, int idx, haddr_t child,
H5B_ins_t anchor, void *md_key);
static herr_t H5B_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5B_t *b);
static H5B_t *H5B_load(H5F_t *f, haddr_t addr, const void *_type, void *udata);
static herr_t H5B_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5B_t *b);
static H5B_t *H5B_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void *_type, void *udata);
static herr_t H5B_decode_key(H5F_t *f, H5B_t *bt, int idx);
static herr_t H5B_decode_keys(H5F_t *f, H5B_t *bt, int idx);
static size_t H5B_nodesize(H5F_t *f, const H5B_class_t *type,
@ -137,8 +137,8 @@ static herr_t H5B_assert(H5F_t *f, haddr_t addr, const H5B_class_t *type,
/* H5B inherits cache-like properties from H5AC */
static const H5AC_class_t H5AC_BT[1] = {{
H5AC_BT_ID,
(void *(*)(H5F_t*, haddr_t, const void*, void*))H5B_load,
(herr_t (*)(H5F_t*, hbool_t, haddr_t, void*))H5B_flush,
(H5AC_load_func_t)H5B_load,
(H5AC_flush_func_t)H5B_flush,
}};
/* Interface initialization? */
@ -343,10 +343,14 @@ H5B_Kvalue(H5F_t *f, const H5B_class_t *type)
* Modifications:
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static H5B_t *
H5B_load(H5F_t *f, haddr_t addr, const void *_type, void *udata)
H5B_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void *_type, void *udata)
{
const H5B_class_t *type = (const H5B_class_t *) _type;
size_t total_nkey_size;
@ -380,7 +384,7 @@ H5B_load(H5F_t *f, haddr_t addr, const void *_type, void *udata)
HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, NULL,
"memory allocation failed");
}
if (H5F_block_read(f, H5FD_MEM_BTREE, addr, size, H5P_DATASET_XFER_DEFAULT, bt->page)<0) {
if (H5F_block_read(f, H5FD_MEM_BTREE, addr, size, dxpl_id, bt->page)<0) {
HGOTO_ERROR(H5E_BTREE, H5E_READERROR, NULL,
"can't read B-tree node");
}
@ -456,10 +460,14 @@ H5B_load(H5F_t *f, haddr_t addr, const void *_type, void *udata)
*
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static herr_t
H5B_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5B_t *bt)
H5B_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5B_t *bt)
{
int i;
size_t size = 0;
@ -525,7 +533,7 @@ H5B_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5B_t *bt)
* bother writing data for the child entries that don't exist or
* for the final unchanged children.
*/
if (H5F_block_write(f, H5FD_MEM_BTREE, addr, size, H5P_DATASET_XFER_DEFAULT, bt->page)<0) {
if (H5F_block_write(f, H5FD_MEM_BTREE, addr, size, dxpl_id, bt->page)<0) {
HRETURN_ERROR(H5E_BTREE, H5E_CANTFLUSH, FAIL,
"unable to save B-tree node to disk");
}

View File

@ -8,11 +8,6 @@
* Purpose: This is the MPI-2 I/O driver.
*
* Limitations:
* H5FD_mpio_read & H5FD_mpio_write
* Eventually these should choose collective or independent i/o
* based on a parameter that is passed down to it from H5Dwrite,
* rather than the access_parms (which are fixed at the open).
*
* H5FD_mpio_read
* One implementation of MPI/MPI-IO causes MPI_Get_count
* to return (incorrectly) a negative count. I (who?) added code
@ -21,6 +16,7 @@
* kluge is activated by #ifdef MPI_KLUGE0202.
*/
#include "H5private.h" /*library functions */
#include "H5ACprivate.h" /* Metadata cache */
#include "H5Eprivate.h" /*error handling */
#include "H5Fprivate.h" /*files */
#include "H5FDprivate.h" /*file driver */
@ -71,9 +67,9 @@ static herr_t H5FD_mpio_query(const H5FD_t *_f1, unsigned long *flags);
static haddr_t H5FD_mpio_get_eoa(H5FD_t *_file);
static herr_t H5FD_mpio_set_eoa(H5FD_t *_file, haddr_t addr);
static haddr_t H5FD_mpio_get_eof(H5FD_t *_file);
static herr_t H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
static herr_t H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
size_t size, void *buf);
static herr_t H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t fapl_id, haddr_t addr,
static herr_t H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
size_t size, const void *buf);
static herr_t H5FD_mpio_flush(H5FD_t *_file, unsigned closing);
@ -1210,6 +1206,7 @@ H5FD_mpio_read(H5FD_t *_file, H5FD_mem_t UNUSED type, hid_t dxpl_id, haddr_t add
/* Make certain we have the correct type of property list */
assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id));
assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER));
assert(buf);
/* Portably initialize MPI status variable */
HDmemset(&mpi_stat,0,sizeof(MPI_Status));
@ -1484,6 +1481,12 @@ done:
* if the first I/O was a collective I/O using MPI derived types
* and the next I/O was an independent I/O.
*
* Quincey Koziol - 2002/07/18
* Added "block_before_meta_write" dataset transfer flag, which
* is set during writes from a metadata cache flush and indicates
* that all the processes must sync up before (one of them)
* writing metadata.
*
*-------------------------------------------------------------------------
*/
static herr_t
@ -1496,8 +1499,10 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
MPI_Offset mpi_off, mpi_disp;
MPI_Status mpi_stat;
MPI_Datatype buf_type, file_type;
int mpi_code; /* MPI return code */
int size_i, bytes_written;
unsigned use_view_this_time=0;
unsigned block_before_meta_write=0; /* Whether to block before a metadata write */
H5P_genplist_t *plist; /* Property list pointer */
herr_t ret_value=SUCCEED;
@ -1512,6 +1517,7 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
/* Make certain we have the correct type of property list */
assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id));
assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER));
assert(buf);
/* Portably initialize MPI status variable */
HDmemset(&mpi_stat,0,sizeof(MPI_Status));
@ -1582,19 +1588,36 @@ H5FD_mpio_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
HGOTO_ERROR(H5E_INTERNAL, H5E_MPI, FAIL, "MPI_File_set_view failed");
} /* end if */
/* Only p<round> will do the actual write if all procs in comm write same data */
if ((type!=H5FD_MEM_DRAW) && H5_mpi_1_metawrite_g) {
if (file->mpi_rank != file->mpi_round) {
/* Metadata specific actions */
if(type!=H5FD_MEM_DRAW) {
/* Check if we need to syncronize all processes before attempting metadata write
* (Prevents race condition where the process writing the metadata goes ahead
* and writes the metadata to the file before all the processes have
* read the data, "transmitting" data from the "future" to the reading
* process. -QAK )
*/
if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0)
if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property");
if(block_before_meta_write)
if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
/* Only p<round> will do the actual write if all procs in comm write same metadata */
if (H5_mpi_1_metawrite_g) {
if (file->mpi_rank != file->mpi_round) {
#ifdef H5FDmpio_DEBUG
if (H5FD_mpio_Debug[(int)'w']) {
fprintf(stdout,
" proc %d: in H5FD_mpio_write (write omitted)\n",
file->mpi_rank );
}
if (H5FD_mpio_Debug[(int)'w']) {
fprintf(stdout,
" proc %d: in H5FD_mpio_write (write omitted)\n",
file->mpi_rank );
}
#endif
HGOTO_DONE(SUCCEED) /* skip the actual write */
HGOTO_DONE(SUCCEED) /* skip the actual write */
}
}
}
} /* end if */
/* Write the data. */
assert(H5FD_MPIO_INDEPENDENT==dx->xfer_mode || H5FD_MPIO_COLLECTIVE==dx->xfer_mode);
@ -1681,18 +1704,6 @@ done:
/* Round-robin rotate to the next process */
file->mpi_round = (++file->mpi_round)%file->mpi_size;
#ifdef QAK
{
int max,min;
MPI_Allreduce(&file->mpi_round, &max, 1, MPI_INT, MPI_MAX, file->comm);
MPI_Allreduce(&file->mpi_round, &min, 1, MPI_INT, MPI_MIN, file->comm);
if(max!=file->mpi_round)
printf("%s: rank=%d, round=%d, max=%d\n",FUNC,file->mpi_rank,file->mpi_round,max);
if(min!=file->mpi_round)
printf("%s: rank=%d, round=%d, min=%d\n",FUNC,file->mpi_rank,file->mpi_round,min);
}
#endif /* QAK */
} /* end if */
} /* end if */

View File

@ -32,6 +32,7 @@
*
*/
#include "H5private.h" /*library functions */
#include "H5ACprivate.h" /* Metadata cache */
#include "H5Eprivate.h" /*error handling */
#include "H5Fprivate.h" /*files */
#include "H5FDprivate.h" /*file driver */
@ -965,21 +966,31 @@ done:
*
* Modifications:
*
* Quincey Koziol - 2002/07/18
* Added "block_before_meta_write" dataset transfer flag, which
* is set during writes from a metadata cache flush and indicates
* that all the processes must sync up before (one of them)
* writing metadata.
*
*-------------------------------------------------------------------------
*/
static herr_t
H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t UNUSED dxpl_id, haddr_t addr,
H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t dxpl_id, haddr_t addr,
size_t size, const void *buf)
{
H5FD_mpiposix_t *file = (H5FD_mpiposix_t*)_file;
int mpi_code; /* MPI return code */
ssize_t nbytes; /* Number of bytes written each I/O call */
H5P_genplist_t *plist; /* Property list pointer */
unsigned block_before_meta_write=0; /* Whether to block before a metadata write */
herr_t ret_value=SUCCEED; /* Return value */
FUNC_ENTER_NOAPI(H5FD_mpiposix_write, FAIL);
assert(file);
assert(H5FD_MPIPOSIX==file->pub.driver_id);
assert(H5I_GENPROP_LST==H5I_get_type(dxpl_id));
assert(TRUE==H5P_isa_class(dxpl_id,H5P_DATASET_XFER));
assert(buf);
/* Check for overflow conditions */
@ -990,17 +1001,36 @@ H5FD_mpiposix_write(H5FD_t *_file, H5FD_mem_t type, hid_t UNUSED dxpl_id, haddr_
if (addr+size>file->eoa)
HGOTO_ERROR(H5E_ARGS, H5E_OVERFLOW, FAIL, "addr overflow");
/* Only p<round> will do the actual write if all procs in comm write same data */
if ((type!=H5FD_MEM_DRAW) && H5_mpiposix_1_metawrite_g) {
if (file->mpi_rank != file->mpi_round)
HGOTO_DONE(SUCCEED) /* skip the actual write */
/* Obtain the data transfer properties */
if(NULL == (plist = H5I_object(dxpl_id)))
HGOTO_ERROR(H5E_ARGS, H5E_BADTYPE, FAIL, "not a file access property list");
/* Metadata specific actions */
if(type!=H5FD_MEM_DRAW) {
/* Check if we need to syncronize all processes before attempting metadata write
* (Prevents race condition where the process writing the metadata goes ahead
* and writes the metadata to the file before all the processes have
* read the data, "transmitting" data from the "future" to the reading
* process. -QAK )
*/
if(H5P_exist_plist(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME)>0)
if(H5P_get(plist,H5AC_BLOCK_BEFORE_META_WRITE_NAME,&block_before_meta_write)<0)
HGOTO_ERROR(H5E_PLIST, H5E_CANTGET, FAIL, "can't get H5AC property");
if(block_before_meta_write)
if (MPI_SUCCESS!= (mpi_code=MPI_Barrier(file->comm)))
HMPI_GOTO_ERROR(FAIL, "MPI_Barrier failed", mpi_code);
/* Only p<round> will do the actual write if all procs in comm write same metadata */
if (H5_mpiposix_1_metawrite_g)
if (file->mpi_rank != file->mpi_round)
HGOTO_DONE(SUCCEED) /* skip the actual write */
} /* end if */
/* Seek to the correct location */
if ((addr!=file->pos || OP_WRITE!=file->op) &&
file_seek(file->fd, (file_offset_t)addr, SEEK_SET)<0)
HGOTO_ERROR(H5E_IO, H5E_SEEKERROR, FAIL,
"unable to seek to proper position");
HGOTO_ERROR(H5E_IO, H5E_SEEKERROR, FAIL, "unable to seek to proper position");
/*
* Write the data, being careful of interrupted system calls and partial
@ -1039,18 +1069,6 @@ done:
/* Round-robin rotate to the next process */
file->mpi_round = (++file->mpi_round)%file->mpi_size;
#ifdef QAK
{
int max,min;
MPI_Allreduce(&file->mpi_round, &max, 1, MPI_INT, MPI_MAX, file->comm);
MPI_Allreduce(&file->mpi_round, &min, 1, MPI_INT, MPI_MIN, file->comm);
if(max!=file->mpi_round)
printf("%s: rank=%d, round=%d, max=%d\n",FUNC,file->mpi_rank,file->mpi_round,max);
if(min!=file->mpi_round)
printf("%s: rank=%d, round=%d, min=%d\n",FUNC,file->mpi_rank,file->mpi_round,min);
}
#endif /* QAK */
} /* end if */
} /* end else */

View File

@ -46,9 +46,9 @@ static size_t H5G_node_size(H5F_t *f);
static herr_t H5G_node_create(H5F_t *f, H5B_ins_t op, void *_lt_key,
void *_udata, void *_rt_key,
haddr_t *addr_p/*out*/);
static herr_t H5G_node_flush(H5F_t *f, hbool_t destroy, haddr_t addr,
static herr_t H5G_node_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr,
H5G_node_t *sym);
static H5G_node_t *H5G_node_load(H5F_t *f, haddr_t addr, const void *_udata1,
static H5G_node_t *H5G_node_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void *_udata1,
void *_udata2);
static int H5G_node_cmp2(H5F_t *f, void *_lt_key, void *_udata,
void *_rt_key);
@ -70,7 +70,7 @@ static size_t H5G_node_sizeof_rkey(H5F_t *f, const void *_udata);
const H5AC_class_t H5AC_SNODE[1] = {{
H5AC_SNODE_ID,
(H5AC_load_func_t)H5G_node_load,
(herr_t (*)(H5F_t*, hbool_t, haddr_t, void*))H5G_node_flush,
(H5AC_flush_func_t)H5G_node_flush,
}};
/* H5G inherits B-tree like properties from H5B */
@ -315,10 +315,14 @@ H5G_node_create(H5F_t *f, H5B_ins_t UNUSED op, void *_lt_key,
*
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static herr_t
H5G_node_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5G_node_t *sym)
H5G_node_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5G_node_t *sym)
{
uint8_t *buf = NULL, *p = NULL;
size_t size;
@ -376,7 +380,7 @@ H5G_node_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5G_node_t *sym)
H5G_ent_encode_vec(f, &p, sym->entry, sym->nsyms);
HDmemset(p, 0, size - (p - buf));
status = H5F_block_write(f, H5FD_MEM_BTREE, addr, size, H5P_DATASET_XFER_DEFAULT, buf);
status = H5F_block_write(f, H5FD_MEM_BTREE, addr, size, dxpl_id, buf);
if (status < 0)
HRETURN_ERROR(H5E_SYM, H5E_WRITEERROR, FAIL,
"unable to write symbol table node to the file");
@ -415,10 +419,14 @@ H5G_node_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5G_node_t *sym)
* Modifications:
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static H5G_node_t *
H5G_node_load(H5F_t *f, haddr_t addr, const void * UNUSED _udata1,
H5G_node_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void * UNUSED _udata1,
void * UNUSED _udata2)
{
H5G_node_t *sym = NULL;
@ -450,9 +458,9 @@ H5G_node_load(H5F_t *f, haddr_t addr, const void * UNUSED _udata1,
HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, NULL,
"memory allocation failed");
}
if (H5F_block_read(f, H5FD_MEM_BTREE, addr, size, H5P_DATASET_XFER_DEFAULT, buf) < 0) {
if (H5F_block_read(f, H5FD_MEM_BTREE, addr, size, dxpl_id, buf) < 0) {
HGOTO_ERROR(H5E_SYM, H5E_READERROR, NULL,
"unabel to read symbol table node");
"unable to read symbol table node");
}
/* magic */
if (HDmemcmp(p, H5G_NODE_MAGIC, H5G_NODE_SIZEOF_MAGIC)) {

View File

@ -56,9 +56,9 @@ struct H5HG_heap_t {
};
/* PRIVATE PROTOTYPES */
static H5HG_heap_t *H5HG_load(H5F_t *f, haddr_t addr, const void *udata1,
static H5HG_heap_t *H5HG_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void *udata1,
void *udata2);
static herr_t H5HG_flush(H5F_t *f, hbool_t dest, haddr_t addr,
static herr_t H5HG_flush(H5F_t *f, hid_t dxpl_id, hbool_t dest, haddr_t addr,
H5HG_heap_t *heap);
/*
@ -66,8 +66,8 @@ static herr_t H5HG_flush(H5F_t *f, hbool_t dest, haddr_t addr,
*/
static const H5AC_class_t H5AC_GHEAP[1] = {{
H5AC_GHEAP_ID,
(void *(*)(H5F_t*, haddr_t, const void*, void*))H5HG_load,
(herr_t (*)(H5F_t*, hbool_t, haddr_t, void*))H5HG_flush,
(H5AC_load_func_t)H5HG_load,
(H5AC_flush_func_t)H5HG_flush,
}};
/* Interface initialization */
@ -221,10 +221,14 @@ H5HG_create (H5F_t *f, size_t size)
* Modifications:
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static H5HG_heap_t *
H5HG_load (H5F_t *f, haddr_t addr, const void * UNUSED udata1,
H5HG_load (H5F_t *f, hid_t dxpl_id, haddr_t addr, const void * UNUSED udata1,
void * UNUSED udata2)
{
H5HG_heap_t *heap = NULL;
@ -251,7 +255,7 @@ H5HG_load (H5F_t *f, haddr_t addr, const void * UNUSED udata1,
HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, NULL,
"memory allocation failed");
}
if (H5F_block_read(f, H5FD_MEM_GHEAP, addr, H5HG_MINSIZE, H5P_DATASET_XFER_DEFAULT,
if (H5F_block_read(f, H5FD_MEM_GHEAP, addr, H5HG_MINSIZE, dxpl_id,
heap->chunk)<0) {
HGOTO_ERROR (H5E_HEAP, H5E_READERROR, NULL,
"unable to read global heap collection");
@ -288,7 +292,7 @@ H5HG_load (H5F_t *f, haddr_t addr, const void * UNUSED udata1,
"memory allocation failed");
}
if (H5F_block_read (f, H5FD_MEM_GHEAP, next_addr, (heap->size-H5HG_MINSIZE),
H5P_DATASET_XFER_DEFAULT, heap->chunk+H5HG_MINSIZE)<0) {
dxpl_id, heap->chunk+H5HG_MINSIZE)<0) {
HGOTO_ERROR (H5E_HEAP, H5E_READERROR, NULL,
"unable to read global heap collection");
}
@ -397,10 +401,14 @@ H5HG_load (H5F_t *f, haddr_t addr, const void * UNUSED udata1,
* Modifications:
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static herr_t
H5HG_flush (H5F_t *f, hbool_t destroy, haddr_t addr, H5HG_heap_t *heap)
H5HG_flush (H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5HG_heap_t *heap)
{
int i;
@ -414,7 +422,7 @@ H5HG_flush (H5F_t *f, hbool_t destroy, haddr_t addr, H5HG_heap_t *heap)
if (heap->dirty) {
if (H5F_block_write (f, H5FD_MEM_GHEAP, addr, heap->size,
H5P_DATASET_XFER_DEFAULT, heap->chunk)<0) {
dxpl_id, heap->chunk)<0) {
HRETURN_ERROR (H5E_HEAP, H5E_WRITEERROR, FAIL,
"unable to write global heap collection to file");
}

View File

@ -52,17 +52,17 @@ typedef struct H5HL_t {
} H5HL_t;
/* PRIVATE PROTOTYPES */
static H5HL_t *H5HL_load(H5F_t *f, haddr_t addr, const void *udata1,
static H5HL_t *H5HL_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void *udata1,
void *udata2);
static herr_t H5HL_flush(H5F_t *f, hbool_t dest, haddr_t addr, H5HL_t *heap);
static herr_t H5HL_flush(H5F_t *f, hid_t dxpl_id, hbool_t dest, haddr_t addr, H5HL_t *heap);
/*
* H5HL inherits cache-like properties from H5AC
*/
static const H5AC_class_t H5AC_LHEAP[1] = {{
H5AC_LHEAP_ID,
(void *(*)(H5F_t*, haddr_t, const void*, void*))H5HL_load,
(herr_t (*)(H5F_t*, hbool_t, haddr_t, void*))H5HL_flush,
(H5AC_load_func_t)H5HL_load,
(H5AC_flush_func_t)H5HL_flush,
}};
/* Interface initialization */
@ -195,10 +195,14 @@ H5HL_create(H5F_t *f, size_t size_hint, haddr_t *addr_p/*out*/)
* Modifications:
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static H5HL_t *
H5HL_load(H5F_t *f, haddr_t addr, const void * UNUSED udata1,
H5HL_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void * UNUSED udata1,
void * UNUSED udata2)
{
uint8_t hdr[52];
@ -217,7 +221,7 @@ H5HL_load(H5F_t *f, haddr_t addr, const void * UNUSED udata1,
assert(!udata1);
assert(!udata2);
if (H5F_block_read(f, H5FD_MEM_LHEAP, addr, H5HL_SIZEOF_HDR(f), H5P_DATASET_XFER_DEFAULT,
if (H5F_block_read(f, H5FD_MEM_LHEAP, addr, H5HL_SIZEOF_HDR(f), dxpl_id,
hdr) < 0) {
HRETURN_ERROR(H5E_HEAP, H5E_READERROR, NULL,
"unable to read heap header");
@ -258,7 +262,7 @@ H5HL_load(H5F_t *f, haddr_t addr, const void * UNUSED udata1,
}
if (heap->disk_alloc &&
H5F_block_read(f, H5FD_MEM_LHEAP, heap->addr, heap->disk_alloc,
H5P_DATASET_XFER_DEFAULT, heap->chunk + H5HL_SIZEOF_HDR(f)) < 0) {
dxpl_id, heap->chunk + H5HL_SIZEOF_HDR(f)) < 0) {
HGOTO_ERROR(H5E_HEAP, H5E_CANTLOAD, NULL,
"unable to read heap data");
}
@ -323,10 +327,14 @@ H5HL_load(H5F_t *f, haddr_t addr, const void * UNUSED udata1,
*
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static herr_t
H5HL_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5HL_t *heap)
H5HL_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5HL_t *heap)
{
uint8_t *p = heap->chunk;
H5HL_free_t *fl = heap->freelist;
@ -394,19 +402,18 @@ H5HL_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5HL_t *heap)
/* The header and data are contiguous */
if (H5F_block_write(f, H5FD_MEM_LHEAP, addr,
(H5HL_SIZEOF_HDR(f)+heap->disk_alloc),
H5P_DATASET_XFER_DEFAULT, heap->chunk) < 0) {
dxpl_id, heap->chunk) < 0) {
HRETURN_ERROR(H5E_HEAP, H5E_WRITEERROR, FAIL,
"unable to write heap header and data to file");
}
} else {
if (H5F_block_write(f, H5FD_MEM_LHEAP, addr, H5HL_SIZEOF_HDR(f),
H5P_DATASET_XFER_DEFAULT, heap->chunk)<0) {
dxpl_id, heap->chunk)<0) {
HRETURN_ERROR(H5E_HEAP, H5E_WRITEERROR, FAIL,
"unable to write heap header to file");
}
if (H5F_block_write(f, H5FD_MEM_LHEAP, heap->addr, heap->disk_alloc,
H5P_DATASET_XFER_DEFAULT,
heap->chunk + H5HL_SIZEOF_HDR(f)) < 0) {
dxpl_id, heap->chunk + H5HL_SIZEOF_HDR(f)) < 0) {
HRETURN_ERROR(H5E_HEAP, H5E_WRITEERROR, FAIL,
"unable to write heap data to file");
}

View File

@ -30,8 +30,8 @@
#define PABLO_MASK H5O_mask
/* PRIVATE PROTOTYPES */
static herr_t H5O_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5O_t *oh);
static H5O_t *H5O_load(H5F_t *f, haddr_t addr, const void *_udata1,
static herr_t H5O_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5O_t *oh);
static H5O_t *H5O_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void *_udata1,
void *_udata2);
static unsigned H5O_find_in_ohdr(H5F_t *f, haddr_t addr,
const H5O_class_t **type_p, int sequence);
@ -44,8 +44,8 @@ static herr_t H5O_touch_oh(H5F_t *f, H5O_t *oh, hbool_t force);
/* H5O inherits cache-like properties from H5AC */
static const H5AC_class_t H5AC_OHDR[1] = {{
H5AC_OHDR_ID,
(void *(*)(H5F_t *, haddr_t, const void *, void *)) H5O_load,
(herr_t (*)(H5F_t *, hbool_t, haddr_t, void *)) H5O_flush,
(H5AC_load_func_t)H5O_load,
(H5AC_flush_func_t)H5O_flush,
}};
/* Interface initialization */
@ -339,10 +339,14 @@ H5O_close(H5G_entry_t *obj_ent)
*
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static H5O_t *
H5O_load(H5F_t *f, haddr_t addr, const void * UNUSED _udata1,
H5O_load(H5F_t *f, hid_t dxpl_id, haddr_t addr, const void * UNUSED _udata1,
void * UNUSED _udata2)
{
H5O_t *oh = NULL;
@ -376,7 +380,7 @@ H5O_load(H5F_t *f, haddr_t addr, const void * UNUSED _udata1,
/* read fixed-lenth part of object header */
hdr_size = H5O_SIZEOF_HDR(f);
assert(hdr_size<=sizeof(buf));
if (H5F_block_read(f, H5FD_MEM_OHDR, addr, hdr_size, H5P_DATASET_XFER_DEFAULT, buf) < 0) {
if (H5F_block_read(f, H5FD_MEM_OHDR, addr, hdr_size, dxpl_id, buf) < 0) {
HGOTO_ERROR(H5E_OHDR, H5E_READERROR, NULL,
"unable to read object header");
}
@ -434,7 +438,7 @@ H5O_load(H5F_t *f, haddr_t addr, const void * UNUSED _udata1,
HGOTO_ERROR (H5E_RESOURCE, H5E_NOSPACE, NULL,
"memory allocation failed");
}
if (H5F_block_read(f, H5FD_MEM_OHDR, chunk_addr, chunk_size, H5P_DATASET_XFER_DEFAULT,
if (H5F_block_read(f, H5FD_MEM_OHDR, chunk_addr, chunk_size, dxpl_id,
oh->chunk[chunkno].image) < 0) {
HGOTO_ERROR(H5E_OHDR, H5E_READERROR, NULL,
"unable to read object header data");
@ -537,10 +541,14 @@ done:
*
* Robb Matzke, 1999-07-28
* The ADDR argument is passed by value.
*
* Quincey Koziol, 2002-7-180
* Added dxpl parameter to allow more control over I/O from metadata
* cache.
*-------------------------------------------------------------------------
*/
static herr_t
H5O_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5O_t *oh)
H5O_flush(H5F_t *f, hid_t dxpl_id, hbool_t destroy, haddr_t addr, H5O_t *oh)
{
uint8_t buf[16], *p;
int id;
@ -586,7 +594,7 @@ H5O_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5O_t *oh)
} /* end if */
else {
if (H5F_block_write(f, H5FD_MEM_OHDR, addr, H5O_SIZEOF_HDR(f),
H5P_DATASET_XFER_DEFAULT, buf) < 0) {
dxpl_id, buf) < 0) {
HRETURN_ERROR(H5E_OHDR, H5E_WRITEERROR, FAIL,
"unable to write object header hdr to disk");
}
@ -669,7 +677,7 @@ H5O_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5O_t *oh)
/* Write the combined prefix/chunk out */
if (H5F_block_write(f, H5FD_MEM_OHDR, addr,
(H5O_SIZEOF_HDR(f)+oh->chunk[u].size),
H5P_DATASET_XFER_DEFAULT, p) < 0) {
dxpl_id, p) < 0) {
HRETURN_ERROR(H5E_OHDR, H5E_WRITEERROR, FAIL,
"unable to write object header data to disk");
} /* end if */
@ -680,7 +688,7 @@ H5O_flush(H5F_t *f, hbool_t destroy, haddr_t addr, H5O_t *oh)
else {
if (H5F_block_write(f, H5FD_MEM_OHDR, oh->chunk[u].addr,
(oh->chunk[u].size),
H5P_DATASET_XFER_DEFAULT, oh->chunk[u].image) < 0) {
dxpl_id, oh->chunk[u].image) < 0) {
HRETURN_ERROR(H5E_OHDR, H5E_WRITEERROR, FAIL,
"unable to write object header data to disk");
} /* end if */

View File

@ -1181,6 +1181,7 @@ __DLL__ void H5_term_library(void);
/* Functions to terminate interfaces */
__DLL__ int H5A_term_interface(void);
__DLL__ int H5AC_term_interface(void);
__DLL__ int H5D_term_interface(void);
__DLL__ int H5F_term_interface(void);
__DLL__ int H5G_term_interface(void);