mirror of
https://github.com/HDFGroup/hdf5.git
synced 2025-01-18 15:15:56 +08:00
Update parallel compression feature to support multi-dataset I/O (#3591)
This commit is contained in:
parent
7631015ea4
commit
bfbfaf72e1
@ -61,8 +61,8 @@ H5Pset_dxpl_mpio(dxpl_id, H5FD_MPIO_COLLECTIVE);
|
||||
H5Dwrite(..., dxpl_id, ...);
|
||||
```
|
||||
|
||||
The following are two simple examples of using the parallel compression
|
||||
feature:
|
||||
The following are two simple examples of using the parallel
|
||||
compression feature:
|
||||
|
||||
[ph5_filtered_writes.c](https://github.com/HDFGroup/hdf5/blob/develop/examples/ph5_filtered_writes.c)
|
||||
|
||||
@ -76,9 +76,30 @@ Remember that the feature requires these writes to use collective
|
||||
I/O, so the MPI ranks which have nothing to contribute must still
|
||||
participate in the collective write call.
|
||||
|
||||
## Multi-dataset I/O support
|
||||
|
||||
The parallel compression feature is supported when using the
|
||||
multi-dataset I/O API routines ([H5Dwrite_multi](https://hdfgroup.github.io/hdf5/group___h5_d.html#gaf6213bf3a876c1741810037ff2bb85d8)/[H5Dread_multi](https://hdfgroup.github.io/hdf5/group___h5_d.html#ga8eb1c838aff79a17de385d0707709915)), but the
|
||||
following should be kept in mind:
|
||||
|
||||
- Parallel writes to filtered datasets **must** still be collective,
|
||||
even when using the multi-dataset I/O API routines
|
||||
|
||||
- When the multi-dataset I/O API routines are passed a mixture of
|
||||
filtered and unfiltered datasets, the library currently has to
|
||||
perform I/O on them separately in two phases. Since there is
|
||||
some slight complexity involved in this, it may be best (depending
|
||||
on the number of datasets, number of selected chunks, number of
|
||||
filtered vs. unfiltered datasets, etc.) to make two individual
|
||||
multi-dataset I/O calls, one for the filtered datasets and one
|
||||
for the unfiltered datasets. When performing writes to the datasets,
|
||||
this would also allow independent write access to the unfiltered
|
||||
datasets if desired, while still performing collective writes to
|
||||
the filtered datasets.
|
||||
|
||||
## Incremental file space allocation support
|
||||
|
||||
HDF5's [file space allocation time](https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALLOC_TIME)
|
||||
HDF5's [file space allocation time](https://hdfgroup.github.io/hdf5/group___d_c_p_l.html#ga85faefca58387bba409b65c470d7d851)
|
||||
is a dataset creation property that can have significant effects
|
||||
on application performance, especially if the application uses
|
||||
parallel HDF5. In a serial HDF5 application, the default file space
|
||||
@ -97,7 +118,7 @@ While this strategy has worked in the past, it has some noticeable
|
||||
drawbacks. For one, the larger the chunked dataset being created,
|
||||
the more noticeable overhead there will be during dataset creation
|
||||
as all of the data chunks are being allocated in the HDF5 file.
|
||||
Further, these data chunks will, by default, be [filled](https://portal.hdfgroup.org/display/HDF5/H5P_SET_FILL_VALUE)
|
||||
Further, these data chunks will, by default, be [filled](https://hdfgroup.github.io/hdf5/group___d_c_p_l.html#ga4335bb45b35386daa837b4ff1b9cd4a4)
|
||||
with HDF5's default fill data value, leading to extraordinary
|
||||
dataset creation overhead and resulting in pre-filling large
|
||||
portions of a dataset that the application might have been planning
|
||||
@ -105,7 +126,7 @@ to overwrite anyway. Even worse, there will be more initial overhead
|
||||
from compressing that fill data before writing it out, only to have
|
||||
it read back in, unfiltered and modified the first time a chunk is
|
||||
written to. In the past, it was typically suggested that parallel
|
||||
HDF5 applications should use [H5Pset_fill_time](https://portal.hdfgroup.org/display/HDF5/H5P_SET_FILL_TIME)
|
||||
HDF5 applications should use [H5Pset_fill_time](https://hdfgroup.github.io/hdf5/group___d_c_p_l.html#ga6bd822266b31f86551a9a1d79601b6a2)
|
||||
with a value of `H5D_FILL_TIME_NEVER` in order to disable writing of
|
||||
the fill value to dataset chunks, but this isn't ideal if the
|
||||
application actually wishes to make use of fill values.
|
||||
@ -199,14 +220,14 @@ chunks to end up at addresses in the file that do not align
|
||||
well with the underlying file system, possibly leading to
|
||||
poor performance. As an example, Lustre performance is generally
|
||||
good when writes are aligned with the chosen stripe size.
|
||||
The HDF5 application can use [H5Pset_alignment](https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALIGNMENT)
|
||||
The HDF5 application can use [H5Pset_alignment](https://hdfgroup.github.io/hdf5/group___f_a_p_l.html#gab99d5af749aeb3896fd9e3ceb273677a)
|
||||
to have a bit more control over where objects in the HDF5
|
||||
file end up. However, do note that setting the alignment
|
||||
of objects generally wastes space in the file and has the
|
||||
potential to dramatically increase its resulting size, so
|
||||
caution should be used when choosing the alignment parameters.
|
||||
|
||||
[H5Pset_alignment](https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALIGNMENT)
|
||||
[H5Pset_alignment](https://hdfgroup.github.io/hdf5/group___f_a_p_l.html#gab99d5af749aeb3896fd9e3ceb273677a)
|
||||
has two parameters that control the alignment of objects in
|
||||
the HDF5 file, the "threshold" value and the alignment
|
||||
value. The threshold value specifies that any object greater
|
||||
@ -243,19 +264,19 @@ in a file, this can create significant amounts of free space
|
||||
in the file over its lifetime and eventually cause performance
|
||||
issues.
|
||||
|
||||
An HDF5 application can use [H5Pset_file_space_strategy](http://portal.hdfgroup.org/display/HDF5/H5P_SET_FILE_SPACE_STRATEGY)
|
||||
An HDF5 application can use [H5Pset_file_space_strategy](https://hdfgroup.github.io/hdf5/group___f_c_p_l.html#ga167ff65f392ca3b7f1933b1cee1b9f70)
|
||||
with a value of `H5F_FSPACE_STRATEGY_PAGE` to enable the paged
|
||||
aggregation feature, which can accumulate metadata and raw
|
||||
data for dataset data chunks into well-aligned, configurably
|
||||
sized "pages" for better performance. However, note that using
|
||||
the paged aggregation feature will cause any setting from
|
||||
[H5Pset_alignment](https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALIGNMENT)
|
||||
[H5Pset_alignment](https://hdfgroup.github.io/hdf5/group___f_a_p_l.html#gab99d5af749aeb3896fd9e3ceb273677a)
|
||||
to be ignored. While an application should be able to get
|
||||
comparable performance effects by [setting the size of these pages](http://portal.hdfgroup.org/display/HDF5/H5P_SET_FILE_SPACE_PAGE_SIZE) to be equal to the value that
|
||||
would have been set for [H5Pset_alignment](https://portal.hdfgroup.org/display/HDF5/H5P_SET_ALIGNMENT),
|
||||
comparable performance effects by [setting the size of these pages](https://hdfgroup.github.io/hdf5/group___f_c_p_l.html#gad012d7f3c2f1e1999eb1770aae3a4963) to be equal to the value that
|
||||
would have been set for [H5Pset_alignment](https://hdfgroup.github.io/hdf5/group___f_a_p_l.html#gab99d5af749aeb3896fd9e3ceb273677a),
|
||||
this may not necessarily be the case and should be studied.
|
||||
|
||||
Note that [H5Pset_file_space_strategy](http://portal.hdfgroup.org/display/HDF5/H5P_SET_FILE_SPACE_STRATEGY)
|
||||
Note that [H5Pset_file_space_strategy](https://hdfgroup.github.io/hdf5/group___f_c_p_l.html#ga167ff65f392ca3b7f1933b1cee1b9f70)
|
||||
has a `persist` parameter. This determines whether or not the
|
||||
file free space manager should include extra metadata in the
|
||||
HDF5 file about free space sections in the file. If this
|
||||
@ -279,12 +300,12 @@ hid_t file_id = H5Fcreate("file.h5", H5F_ACC_TRUNC, fcpl_id, fapl_id);
|
||||
|
||||
While the parallel compression feature requires that the HDF5
|
||||
application set and maintain collective I/O at the application
|
||||
interface level (via [H5Pset_dxpl_mpio](https://portal.hdfgroup.org/display/HDF5/H5P_SET_DXPL_MPIO)),
|
||||
interface level (via [H5Pset_dxpl_mpio](https://hdfgroup.github.io/hdf5/group___d_x_p_l.html#ga001a22b64f60b815abf5de8b4776f09e)),
|
||||
it does not require that the actual MPI I/O that occurs at
|
||||
the lowest layers of HDF5 be collective; independent I/O may
|
||||
perform better depending on the application I/O patterns and
|
||||
parallel file system performance, among other factors. The
|
||||
application may use [H5Pset_dxpl_mpio_collective_opt](https://portal.hdfgroup.org/display/HDF5/H5P_SET_DXPL_MPIO_COLLECTIVE_OPT)
|
||||
application may use [H5Pset_dxpl_mpio_collective_opt](https://hdfgroup.github.io/hdf5/group___d_x_p_l.html#gacb30d14d1791ec7ff9ee73aa148a51a3)
|
||||
to control this setting and see which I/O method provides the
|
||||
best performance.
|
||||
|
||||
@ -297,7 +318,7 @@ H5Dwrite(..., dxpl_id, ...);
|
||||
|
||||
### Runtime HDF5 Library version
|
||||
|
||||
An HDF5 application can use the [H5Pset_libver_bounds](http://portal.hdfgroup.org/display/HDF5/H5P_SET_LIBVER_BOUNDS)
|
||||
An HDF5 application can use the [H5Pset_libver_bounds](https://hdfgroup.github.io/hdf5/group___f_a_p_l.html#gacbe1724e7f70cd17ed687417a1d2a910)
|
||||
routine to set the upper and lower bounds on library versions
|
||||
to use when creating HDF5 objects. For parallel compression
|
||||
specifically, setting the library version to the latest available
|
||||
|
@ -258,7 +258,16 @@ New Features
|
||||
|
||||
Parallel Library:
|
||||
-----------------
|
||||
-
|
||||
- Added optimized support for the parallel compression feature when
|
||||
using the multi-dataset I/O API routines collectively
|
||||
|
||||
Previously, calling H5Dwrite_multi/H5Dread_multi collectively in parallel
|
||||
with a list containing one or more filtered datasets would cause HDF5 to
|
||||
break out of the optimized multi-dataset I/O mode and instead perform I/O
|
||||
by looping over each dataset in the I/O request. The library has now been
|
||||
updated to perform I/O in a more optimized manner in this case by first
|
||||
performing I/O on all the filtered datasets at once and then performing
|
||||
I/O on all the unfiltered datasets at once.
|
||||
|
||||
|
||||
Fortran Library:
|
||||
|
@ -1114,6 +1114,31 @@ H5D__chunk_io_init(H5D_io_info_t *io_info, H5D_dset_io_info_t *dinfo)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
/*
|
||||
* If collective metadata reads are enabled, ensure all ranks
|
||||
* have the dataset's chunk index open (if it was created) to
|
||||
* prevent possible metadata inconsistency issues or unintentional
|
||||
* independent metadata reads later on.
|
||||
*/
|
||||
if (H5F_SHARED_HAS_FEATURE(io_info->f_sh, H5FD_FEAT_HAS_MPI) &&
|
||||
H5F_shared_get_coll_metadata_reads(io_info->f_sh) &&
|
||||
H5D__chunk_is_space_alloc(&dataset->shared->layout.storage)) {
|
||||
H5D_chunk_ud_t udata;
|
||||
hsize_t scaled[H5O_LAYOUT_NDIMS] = {0};
|
||||
|
||||
/*
|
||||
* TODO: Until the dataset chunk index callback structure has
|
||||
* callbacks for checking if an index is opened and also for
|
||||
* directly opening the index, the following fake chunk lookup
|
||||
* serves the purpose of forcing a chunk index open operation
|
||||
* on all ranks
|
||||
*/
|
||||
if (H5D__chunk_lookup(dataset, scaled, &udata) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to collectively open dataset chunk index");
|
||||
}
|
||||
#endif
|
||||
|
||||
done:
|
||||
if (file_space_normalized == true)
|
||||
if (H5S_hyper_denormalize_offset(dinfo->file_space, old_offset) < 0)
|
||||
@ -1556,6 +1581,9 @@ H5D__create_piece_map_single(H5D_dset_io_info_t *di, H5D_io_info_t *io_info)
|
||||
piece_info->in_place_tconv = false;
|
||||
piece_info->buf_off = 0;
|
||||
|
||||
/* Check if chunk is in a dataset with filters applied */
|
||||
piece_info->filtered_dset = di->dset->shared->dcpl_cache.pline.nused > 0;
|
||||
|
||||
/* make connection to related dset info from this piece_info */
|
||||
piece_info->dset_info = di;
|
||||
|
||||
@ -1591,6 +1619,7 @@ H5D__create_piece_file_map_all(H5D_dset_io_info_t *di, H5D_io_info_t *io_info)
|
||||
hsize_t curr_partial_clip[H5S_MAX_RANK]; /* Current partial dimension sizes to clip against */
|
||||
hsize_t partial_dim_size[H5S_MAX_RANK]; /* Size of a partial dimension */
|
||||
bool is_partial_dim[H5S_MAX_RANK]; /* Whether a dimension is currently a partial chunk */
|
||||
bool filtered_dataset; /* Whether the dataset in question has filters applied */
|
||||
unsigned num_partial_dims; /* Current number of partial dimensions */
|
||||
unsigned u; /* Local index variable */
|
||||
herr_t ret_value = SUCCEED; /* Return value */
|
||||
@ -1640,6 +1669,9 @@ H5D__create_piece_file_map_all(H5D_dset_io_info_t *di, H5D_io_info_t *io_info)
|
||||
/* Set the index of this chunk */
|
||||
chunk_index = 0;
|
||||
|
||||
/* Check whether dataset has filters applied */
|
||||
filtered_dataset = di->dset->shared->dcpl_cache.pline.nused > 0;
|
||||
|
||||
/* Create "temporary" chunk for selection operations (copy file space) */
|
||||
if (NULL == (tmp_fchunk = H5S_create_simple(fm->f_ndims, fm->chunk_dim, NULL)))
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTCREATE, FAIL, "unable to create dataspace for chunk");
|
||||
@ -1686,6 +1718,8 @@ H5D__create_piece_file_map_all(H5D_dset_io_info_t *di, H5D_io_info_t *io_info)
|
||||
new_piece_info->in_place_tconv = false;
|
||||
new_piece_info->buf_off = 0;
|
||||
|
||||
new_piece_info->filtered_dset = filtered_dataset;
|
||||
|
||||
/* Insert the new chunk into the skip list */
|
||||
if (H5SL_insert(fm->dset_sel_pieces, new_piece_info, &new_piece_info->index) < 0) {
|
||||
H5D__free_piece_info(new_piece_info, NULL, NULL);
|
||||
@ -1798,6 +1832,7 @@ H5D__create_piece_file_map_hyper(H5D_dset_io_info_t *dinfo, H5D_io_info_t *io_in
|
||||
hsize_t chunk_index; /* Index of chunk */
|
||||
hsize_t start_scaled[H5S_MAX_RANK]; /* Starting scaled coordinates of selection */
|
||||
hsize_t scaled[H5S_MAX_RANK]; /* Scaled coordinates for this chunk */
|
||||
bool filtered_dataset; /* Whether the dataset in question has filters applied */
|
||||
int curr_dim; /* Current dimension to increment */
|
||||
unsigned u; /* Local index variable */
|
||||
herr_t ret_value = SUCCEED; /* Return value */
|
||||
@ -1831,6 +1866,9 @@ H5D__create_piece_file_map_hyper(H5D_dset_io_info_t *dinfo, H5D_io_info_t *io_in
|
||||
/* Calculate the index of this chunk */
|
||||
chunk_index = H5VM_array_offset_pre(fm->f_ndims, dinfo->layout->u.chunk.down_chunks, scaled);
|
||||
|
||||
/* Check whether dataset has filters applied */
|
||||
filtered_dataset = dinfo->dset->shared->dcpl_cache.pline.nused > 0;
|
||||
|
||||
/* Iterate through each chunk in the dataset */
|
||||
while (sel_points) {
|
||||
/* Check for intersection of current chunk and file selection */
|
||||
@ -1885,6 +1923,8 @@ H5D__create_piece_file_map_hyper(H5D_dset_io_info_t *dinfo, H5D_io_info_t *io_in
|
||||
new_piece_info->in_place_tconv = false;
|
||||
new_piece_info->buf_off = 0;
|
||||
|
||||
new_piece_info->filtered_dset = filtered_dataset;
|
||||
|
||||
/* Add piece to global piece_count */
|
||||
io_info->piece_count++;
|
||||
|
||||
@ -2257,6 +2297,8 @@ H5D__piece_file_cb(void H5_ATTR_UNUSED *elem, const H5T_t H5_ATTR_UNUSED *type,
|
||||
piece_info->in_place_tconv = false;
|
||||
piece_info->buf_off = 0;
|
||||
|
||||
piece_info->filtered_dset = dinfo->dset->shared->dcpl_cache.pline.nused > 0;
|
||||
|
||||
/* Make connection to related dset info from this piece_info */
|
||||
piece_info->dset_info = dinfo;
|
||||
|
||||
@ -2417,6 +2459,9 @@ H5D__chunk_mdio_init(H5D_io_info_t *io_info, H5D_dset_io_info_t *dinfo)
|
||||
|
||||
/* Add to sel_pieces and update pieces_added */
|
||||
io_info->sel_pieces[io_info->pieces_added++] = piece_info;
|
||||
|
||||
if (piece_info->filtered_dset)
|
||||
io_info->filtered_pieces_added++;
|
||||
}
|
||||
|
||||
/* Advance to next skip list node */
|
||||
@ -2728,6 +2773,9 @@ H5D__chunk_read(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_info)
|
||||
if (io_info->sel_pieces)
|
||||
io_info->sel_pieces[io_info->pieces_added] = chunk_info;
|
||||
io_info->pieces_added++;
|
||||
|
||||
if (io_info->sel_pieces && chunk_info->filtered_dset)
|
||||
io_info->filtered_pieces_added++;
|
||||
}
|
||||
} /* end if */
|
||||
else if (!skip_missing_chunks) {
|
||||
@ -3142,6 +3190,9 @@ H5D__chunk_write(H5D_io_info_t *io_info, H5D_dset_io_info_t *dset_info)
|
||||
if (io_info->sel_pieces)
|
||||
io_info->sel_pieces[io_info->pieces_added] = chunk_info;
|
||||
io_info->pieces_added++;
|
||||
|
||||
if (io_info->sel_pieces && chunk_info->filtered_dset)
|
||||
io_info->filtered_pieces_added++;
|
||||
}
|
||||
} /* end else */
|
||||
|
||||
|
@ -644,6 +644,8 @@ H5D__contig_io_init(H5D_io_info_t *io_info, H5D_dset_io_info_t *dinfo)
|
||||
new_piece_info->in_place_tconv = false;
|
||||
new_piece_info->buf_off = 0;
|
||||
|
||||
new_piece_info->filtered_dset = dinfo->dset->shared->dcpl_cache.pline.nused > 0;
|
||||
|
||||
/* Calculate type conversion buffer size and check for in-place conversion if necessary. Currently
|
||||
* only implemented for selection I/O. */
|
||||
if (io_info->use_select_io != H5D_SELECTION_IO_MODE_OFF &&
|
||||
|
67
src/H5Dio.c
67
src/H5Dio.c
@ -107,6 +107,17 @@ H5D__read(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
|
||||
FUNC_ENTER_NOAPI(FAIL)
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
/* Reset the actual io mode properties to the default values in case
|
||||
* the DXPL (if it's non-default) was previously used in a collective
|
||||
* I/O operation.
|
||||
*/
|
||||
if (!H5CX_is_def_dxpl()) {
|
||||
H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_NO_CHUNK_OPTIMIZATION);
|
||||
H5CX_set_mpio_actual_io_mode(H5D_MPIO_NO_COLLECTIVE);
|
||||
} /* end if */
|
||||
#endif
|
||||
|
||||
/* Init io_info */
|
||||
if (H5D__ioinfo_init(count, H5D_IO_OP_READ, dset_info, &io_info) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize I/O info");
|
||||
@ -222,6 +233,14 @@ H5D__read(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
dset_info[i].buf.vp = (void *)(((uint8_t *)dset_info[i].buf.vp) + buf_adj);
|
||||
} /* end if */
|
||||
|
||||
/* Set up I/O operation */
|
||||
if (H5D__dset_ioinfo_init(dset_info[i].dset, &(dset_info[i]), &(store[i])) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to set up I/O operation");
|
||||
|
||||
/* Check if any filters are applied to the dataset */
|
||||
if (dset_info[i].dset->shared->dcpl_cache.pline.nused > 0)
|
||||
io_info.filtered_count++;
|
||||
|
||||
/* If space hasn't been allocated and not using external storage,
|
||||
* return fill value to buffer if fill time is upon allocation, or
|
||||
* do nothing if fill time is never. If the dataset is compact and
|
||||
@ -259,10 +278,6 @@ H5D__read(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
io_skipped = io_skipped + 1;
|
||||
} /* end if */
|
||||
else {
|
||||
/* Set up I/O operation */
|
||||
if (H5D__dset_ioinfo_init(dset_info[i].dset, &(dset_info[i]), &(store[i])) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_UNSUPPORTED, FAIL, "unable to set up I/O operation");
|
||||
|
||||
/* Sanity check that space is allocated, if there are elements */
|
||||
if (dset_info[i].nelmts > 0)
|
||||
assert(
|
||||
@ -273,22 +288,23 @@ H5D__read(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
dset_info[i].dset->shared->dcpl_cache.efl.nused > 0 ||
|
||||
dset_info[i].dset->shared->layout.type == H5D_COMPACT);
|
||||
|
||||
dset_info[i].skip_io = false;
|
||||
}
|
||||
|
||||
/* Call storage method's I/O initialization routine */
|
||||
if (dset_info[i].layout_ops.io_init &&
|
||||
(dset_info[i].layout_ops.io_init)(&io_info, &(dset_info[i])) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize I/O info");
|
||||
dset_info[i].skip_io = false;
|
||||
io_op_init++;
|
||||
|
||||
/* Reset metadata tagging */
|
||||
H5AC_tag(prev_tag, NULL);
|
||||
}
|
||||
} /* end of for loop */
|
||||
|
||||
assert(io_op_init + io_skipped == count);
|
||||
assert(io_op_init == count);
|
||||
|
||||
/* If no datasets have I/O, we're done */
|
||||
if (io_op_init == 0)
|
||||
if (io_skipped == count)
|
||||
HGOTO_DONE(SUCCEED);
|
||||
|
||||
/* Perform second phase of type info initialization */
|
||||
@ -323,7 +339,11 @@ H5D__read(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
}
|
||||
|
||||
/* MDIO-specific second phase initialization */
|
||||
for (i = 0; i < count; i++)
|
||||
for (i = 0; i < count; i++) {
|
||||
/* Check for skipped I/O */
|
||||
if (dset_info[i].skip_io)
|
||||
continue;
|
||||
|
||||
if (dset_info[i].layout_ops.mdio_init) {
|
||||
haddr_t prev_tag = HADDR_UNDEF;
|
||||
|
||||
@ -337,6 +357,7 @@ H5D__read(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
/* Reset metadata tagging */
|
||||
H5AC_tag(prev_tag, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Invoke correct "high level" I/O routine */
|
||||
if ((*io_info.md_io_ops.multi_read_md)(&io_info) < 0)
|
||||
@ -430,7 +451,7 @@ H5D__read(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
done:
|
||||
/* Shut down the I/O op information */
|
||||
for (i = 0; i < io_op_init; i++)
|
||||
if (!dset_info[i].skip_io && dset_info[i].layout_ops.io_term &&
|
||||
if (dset_info[i].layout_ops.io_term &&
|
||||
(*dset_info[i].layout_ops.io_term)(&io_info, &(dset_info[i])) < 0)
|
||||
HDONE_ERROR(H5E_DATASET, H5E_CANTCLOSEOBJ, FAIL, "unable to shut down I/O op info");
|
||||
|
||||
@ -512,6 +533,17 @@ H5D__write(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
|
||||
FUNC_ENTER_NOAPI(FAIL)
|
||||
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
/* Reset the actual io mode properties to the default values in case
|
||||
* the DXPL (if it's non-default) was previously used in a collective
|
||||
* I/O operation.
|
||||
*/
|
||||
if (!H5CX_is_def_dxpl()) {
|
||||
H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_NO_CHUNK_OPTIMIZATION);
|
||||
H5CX_set_mpio_actual_io_mode(H5D_MPIO_NO_COLLECTIVE);
|
||||
} /* end if */
|
||||
#endif
|
||||
|
||||
/* Init io_info */
|
||||
if (H5D__ioinfo_init(count, H5D_IO_OP_WRITE, dset_info, &io_info) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "can't initialize I/O info");
|
||||
@ -586,7 +618,7 @@ H5D__write(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
if (NULL == dset_info[i].buf.cvp) {
|
||||
/* Check for any elements selected (which is invalid) */
|
||||
if (dset_info[i].nelmts > 0)
|
||||
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no output buffer");
|
||||
HGOTO_ERROR(H5E_ARGS, H5E_BADVALUE, FAIL, "no input buffer");
|
||||
|
||||
/* If the buffer is nil, and 0 element is selected, make a fake buffer.
|
||||
* This is for some MPI package like ChaMPIon on NCSA's tungsten which
|
||||
@ -655,6 +687,10 @@ H5D__write(size_t count, H5D_dset_io_info_t *dset_info)
|
||||
if (H5D__dset_ioinfo_init(dset_info[i].dset, &(dset_info[i]), &(store[i])) < 0)
|
||||
HGOTO_ERROR(H5E_DATASET, H5E_CANTINIT, FAIL, "unable to set up I/O operation");
|
||||
|
||||
/* Check if any filters are applied to the dataset */
|
||||
if (dset_info[i].dset->shared->dcpl_cache.pline.nused > 0)
|
||||
io_info.filtered_count++;
|
||||
|
||||
/* Allocate dataspace and initialize it if it hasn't been. */
|
||||
should_alloc_space = dset_info[i].dset->shared->dcpl_cache.efl.nused == 0 &&
|
||||
!(*dset_info[i].dset->shared->layout.ops->is_space_alloc)(
|
||||
@ -1225,15 +1261,6 @@ H5D__ioinfo_adjust(H5D_io_info_t *io_info)
|
||||
dset0 = io_info->dsets_info[0].dset;
|
||||
assert(dset0->oloc.file);
|
||||
|
||||
/* Reset the actual io mode properties to the default values in case
|
||||
* the DXPL (if it's non-default) was previously used in a collective
|
||||
* I/O operation.
|
||||
*/
|
||||
if (!H5CX_is_def_dxpl()) {
|
||||
H5CX_set_mpio_actual_chunk_opt(H5D_MPIO_NO_CHUNK_OPTIMIZATION);
|
||||
H5CX_set_mpio_actual_io_mode(H5D_MPIO_NO_COLLECTIVE);
|
||||
} /* end if */
|
||||
|
||||
/* Make any parallel I/O adjustments */
|
||||
if (io_info->using_mpi_vfd) {
|
||||
H5FD_mpio_xfer_t xfer_mode; /* Parallel transfer for this request */
|
||||
|
1604
src/H5Dmpio.c
1604
src/H5Dmpio.c
File diff suppressed because it is too large
Load Diff
@ -254,6 +254,7 @@ typedef struct H5D_piece_info_t {
|
||||
unsigned mspace_shared; /* Indicate that the memory space for a chunk is shared and shouldn't be freed */
|
||||
bool in_place_tconv; /* Whether to perform type conversion in-place */
|
||||
size_t buf_off; /* Buffer offset for in-place type conversion */
|
||||
bool filtered_dset; /* Whether the dataset this chunk is in has filters applied */
|
||||
struct H5D_dset_io_info_t *dset_info; /* Pointer to dset_info */
|
||||
} H5D_piece_info_t;
|
||||
|
||||
@ -293,9 +294,11 @@ typedef struct H5D_io_info_t {
|
||||
H5D_md_io_ops_t md_io_ops; /* Multi dataset I/O operation function pointers */
|
||||
H5D_io_op_type_t op_type;
|
||||
size_t count; /* Number of datasets in I/O request */
|
||||
size_t filtered_count; /* Number of datasets with filters applied in I/O request */
|
||||
H5D_dset_io_info_t *dsets_info; /* dsets info where I/O is done to/from */
|
||||
size_t piece_count; /* Number of pieces in I/O request */
|
||||
size_t pieces_added; /* Number of pieces added so far to arrays */
|
||||
size_t filtered_pieces_added; /* Number of filtered pieces in I/O request */
|
||||
H5D_piece_info_t **sel_pieces; /* Array of info struct for all pieces in I/O */
|
||||
H5S_t **mem_spaces; /* Array of chunk memory spaces */
|
||||
H5S_t **file_spaces; /* Array of chunk file spaces */
|
||||
|
@ -3025,9 +3025,9 @@ H5FD__mpio_read_selection(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED d
|
||||
* s_bufs[] to find the smallest value, and choose that for
|
||||
* mpi_bufs_base.
|
||||
*/
|
||||
j = 0; /* guess at the index of the smallest value of s_bufs[] */
|
||||
|
||||
if (s_bufs[j + 1].vp != NULL) {
|
||||
j = 0; /* guess at the index of the smallest value of s_bufs[] */
|
||||
if ((count > 1) && (s_bufs[1].vp != NULL)) {
|
||||
for (i = 1; i < count; i++)
|
||||
if (s_bufs[i].vp < s_bufs[j].vp)
|
||||
j = i;
|
||||
@ -3375,9 +3375,9 @@ H5FD__mpio_write_selection(H5FD_t *_file, H5FD_mem_t type, hid_t H5_ATTR_UNUSED
|
||||
* s_bufs[] to find the smallest value, and choose that for
|
||||
* mpi_bufs_base.
|
||||
*/
|
||||
j = 0; /* guess at the index of the smallest value of s_bufs[] */
|
||||
|
||||
if (s_bufs[j + 1].cvp != NULL) {
|
||||
j = 0; /* guess at the index of the smallest value of s_bufs[] */
|
||||
if ((count > 1) && (s_bufs[1].cvp != NULL)) {
|
||||
for (i = 1; i < count; i++)
|
||||
if (s_bufs[i].cvp < s_bufs[j].cvp)
|
||||
j = i;
|
||||
|
29
src/H5Fmpi.c
29
src/H5Fmpi.c
@ -407,16 +407,39 @@ done:
|
||||
*/
|
||||
bool
|
||||
H5F_get_coll_metadata_reads(const H5F_t *file)
|
||||
{
|
||||
FUNC_ENTER_NOAPI_NOERR
|
||||
|
||||
assert(file && file->shared);
|
||||
|
||||
FUNC_LEAVE_NOAPI(H5F_shared_get_coll_metadata_reads(file->shared));
|
||||
} /* end H5F_get_coll_metadata_reads() */
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: H5F_shared_get_coll_metadata_reads
|
||||
*
|
||||
* Purpose: Determines whether collective metadata reads should be
|
||||
* performed. This routine is meant to be the single source of
|
||||
* truth for the collective metadata reads status, as it
|
||||
* coordinates between the file-global flag and the flag set
|
||||
* for the current operation in the current API context.
|
||||
*
|
||||
* Return: true/false (can't fail)
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
bool
|
||||
H5F_shared_get_coll_metadata_reads(const H5F_shared_t *f_sh)
|
||||
{
|
||||
H5P_coll_md_read_flag_t file_flag = H5P_USER_FALSE;
|
||||
bool ret_value = false;
|
||||
|
||||
FUNC_ENTER_NOAPI_NOERR
|
||||
|
||||
assert(file && file->shared);
|
||||
assert(f_sh);
|
||||
|
||||
/* Retrieve the file-global flag */
|
||||
file_flag = H5F_COLL_MD_READ(file);
|
||||
file_flag = H5F_SHARED_COLL_MD_READ(f_sh);
|
||||
|
||||
/* If file flag is set to H5P_FORCE_FALSE, exit early
|
||||
* with false, since collective metadata reads have
|
||||
@ -442,7 +465,7 @@ H5F_get_coll_metadata_reads(const H5F_t *file)
|
||||
}
|
||||
|
||||
FUNC_LEAVE_NOAPI(ret_value)
|
||||
} /* end H5F_get_coll_metadata_reads() */
|
||||
} /* end H5F_shared_get_coll_metadata_reads() */
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: H5F_set_coll_metadata_reads
|
||||
|
@ -86,6 +86,7 @@ typedef struct H5F_t H5F_t;
|
||||
#define H5F_IS_TMP_ADDR(F, ADDR) (H5_addr_le((F)->shared->fs.tmp_addr, (ADDR)))
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
#define H5F_COLL_MD_READ(F) ((F)->shared->coll_md_read)
|
||||
#define H5F_SHARED_COLL_MD_READ(F_SH) ((F_SH)->coll_md_read)
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
#define H5F_USE_MDC_LOGGING(F) ((F)->shared->use_mdc_logging)
|
||||
#define H5F_START_MDC_LOG_ON_ACCESS(F) ((F)->shared->start_mdc_log_on_access)
|
||||
@ -149,6 +150,7 @@ typedef struct H5F_t H5F_t;
|
||||
#define H5F_IS_TMP_ADDR(F, ADDR) (H5F_is_tmp_addr((F), (ADDR)))
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
#define H5F_COLL_MD_READ(F) (H5F_coll_md_read(F))
|
||||
#define H5F_SHARED_COLL_MD_READ(F_SH) (H5F_shared_coll_md_read(F))
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
#define H5F_USE_MDC_LOGGING(F) (H5F_use_mdc_logging(F))
|
||||
#define H5F_START_MDC_LOG_ON_ACCESS(F) (H5F_start_mdc_log_on_access(F))
|
||||
@ -556,6 +558,7 @@ H5_DLL hsize_t H5F_get_alignment(const H5F_t *f);
|
||||
H5_DLL hsize_t H5F_get_threshold(const H5F_t *f);
|
||||
#ifdef H5_HAVE_PARALLEL
|
||||
H5_DLL H5P_coll_md_read_flag_t H5F_coll_md_read(const H5F_t *f);
|
||||
H5_DLL H5P_coll_md_read_flag_t H5F_shared_coll_md_read(const H5F_shared_t *f_sh);
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
H5_DLL bool H5F_use_mdc_logging(const H5F_t *f);
|
||||
H5_DLL bool H5F_start_mdc_log_on_access(const H5F_t *f);
|
||||
@ -642,6 +645,7 @@ H5_DLL int H5F_mpi_get_size(const H5F_t *f);
|
||||
H5_DLL herr_t H5F_mpi_retrieve_comm(hid_t loc_id, hid_t acspl_id, MPI_Comm *mpi_comm);
|
||||
H5_DLL herr_t H5F_mpi_get_file_block_type(bool commit, MPI_Datatype *new_type, bool *new_type_derived);
|
||||
H5_DLL bool H5F_get_coll_metadata_reads(const H5F_t *f);
|
||||
H5_DLL bool H5F_shared_get_coll_metadata_reads(const H5F_shared_t *f_sh);
|
||||
H5_DLL void H5F_set_coll_metadata_reads(H5F_t *f, H5P_coll_md_read_flag_t *file_flag, bool *context_flag);
|
||||
H5_DLL herr_t H5F_shared_get_mpi_file_sync_required(const H5F_shared_t *f_sh, bool *flag);
|
||||
#endif /* H5_HAVE_PARALLEL */
|
||||
|
@ -1054,11 +1054,31 @@ H5F_coll_md_read(const H5F_t *f)
|
||||
/* Use FUNC_ENTER_NOAPI_NOINIT_NOERR here to avoid performance issues */
|
||||
FUNC_ENTER_NOAPI_NOINIT_NOERR
|
||||
|
||||
assert(f);
|
||||
assert(f && f->shared);
|
||||
|
||||
FUNC_LEAVE_NOAPI(f->shared->coll_md_read)
|
||||
} /* end H5F_coll_md_read() */
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: H5F_shared_coll_md_read
|
||||
*
|
||||
* Purpose: Retrieve the 'collective metadata reads' flag for the file.
|
||||
*
|
||||
* Return: Success: Non-negative, the 'collective metadata reads' flag
|
||||
* Failure: (can't happen)
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
H5P_coll_md_read_flag_t
|
||||
H5F_shared_coll_md_read(const H5F_shared_t *f_sh)
|
||||
{
|
||||
/* Use FUNC_ENTER_NOAPI_NOINIT_NOERR here to avoid performance issues */
|
||||
FUNC_ENTER_NOAPI_NOINIT_NOERR
|
||||
|
||||
assert(f_sh);
|
||||
|
||||
FUNC_LEAVE_NOAPI(f_sh->coll_md_read)
|
||||
} /* end H5F_shared_coll_md_read() */
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: H5F_shared_get_mpi_file_sync_required
|
||||
*
|
||||
|
@ -4102,7 +4102,7 @@ H5P_object_verify(hid_t plist_id, hid_t pclass_id)
|
||||
|
||||
/* Compare the property list's class against the other class */
|
||||
if (H5P_isa_class(plist_id, pclass_id) != true)
|
||||
HGOTO_ERROR(H5E_PLIST, H5E_CANTREGISTER, NULL, "property list is not a member of the class");
|
||||
HGOTO_ERROR(H5E_PLIST, H5E_CANTCOMPARE, NULL, "property list is not a member of the class");
|
||||
|
||||
/* Get the plist structure */
|
||||
if (NULL == (ret_value = (H5P_genplist_t *)H5I_object(plist_id)))
|
||||
|
@ -97,6 +97,16 @@ H5VL__native_dataset_io_setup(size_t count, void *obj[], hid_t mem_type_id[], hi
|
||||
|
||||
/* Iterate over datasets */
|
||||
for (i = 0; i < count; i++) {
|
||||
/* Initialize fields not set here to prevent use of uninitialized */
|
||||
memset(&dinfo[i].layout_ops, 0, sizeof(dinfo[i].layout_ops));
|
||||
memset(&dinfo[i].io_ops, 0, sizeof(dinfo[i].io_ops));
|
||||
memset(&dinfo[i].layout_io_info, 0, sizeof(dinfo[i].layout_io_info));
|
||||
memset(&dinfo[i].type_info, 0, sizeof(dinfo[i].type_info));
|
||||
dinfo[i].store = NULL;
|
||||
dinfo[i].layout = NULL;
|
||||
dinfo[i].nelmts = 0;
|
||||
dinfo[i].skip_io = false;
|
||||
|
||||
/* Set up dset */
|
||||
dinfo[i].dset = (H5D_t *)obj[i];
|
||||
assert(dinfo[i].dset);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -58,6 +58,11 @@ size_t cd_nelmts = FILTER_NUM_CDVALUES;
|
||||
#define DIM0_SCALE_FACTOR 4
|
||||
#define DIM1_SCALE_FACTOR 2
|
||||
|
||||
/* The maximum number of datasets to work on simultaneously
|
||||
* when using H5Dwrite_multi/H5Dread_multi
|
||||
*/
|
||||
#define MAX_NUM_DSETS_MULTI 5
|
||||
|
||||
/* Struct type for the compound datatype filtered dataset tests */
|
||||
typedef struct {
|
||||
short field1;
|
||||
|
Loading…
Reference in New Issue
Block a user