2
0
mirror of https://github.com/Unidata/netcdf-c.git synced 2025-04-18 18:20:39 +08:00

fixing parallel I/O bug with dim length for unlimited dimensions

This commit is contained in:
Edward Hartnett 2022-04-26 07:54:47 -06:00
parent 155467ba2d
commit 08b6ea65a4
3 changed files with 30 additions and 26 deletions

@ -161,20 +161,24 @@ HDF5_inq_dim(int ncid, int dimid, char *name, size_t *lenp)
{
if (dim->unlimited)
{
*lenp = 0;
#ifndef USE_PARALLEL
/* Shortcut for non-parallel operation: if the dim->len is
* non-zero, it will be set to the correct size. */
if (dim->len)
*lenp = dim->len;
#endif
/* Since this is an unlimited dimension, go to the file
and see how many records there are. Take the max number
of records from all the vars that share this
dimension. */
*lenp = 0;
if (dim->len == 0) {
if (*lenp == 0)
{
if ((ret = nc4_find_dim_len(dim_grp, dimid, &lenp)))
return ret;
if (h5->no_write == NC_TRUE) {
dim->len = *lenp;
}
}
else {
*lenp = dim->len;
}
}
else

@ -117,6 +117,8 @@ find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid,
*maxlen = 0;
LOG((3, "find_var_dim_max_length varid %d dimid %d", varid, dimid));
/* Find this var. */
var = (NC_VAR_INFO_T*)ncindexith(grp->vars,varid);
if (!var) return NC_ENOTVAR;
@ -162,24 +164,20 @@ find_var_dim_max_length(NC_GRP_INFO_T *grp, int varid, int dimid,
*maxlen = *maxlen > h5dimlen[d] ? *maxlen : h5dimlen[d];
#ifdef USE_PARALLEL
/* If we are doing parallel I/O in collective mode, then
* communicate with all other tasks in the collective and
* find out which has the max value for the dimension
* size. */
/* If we are doing parallel I/O in collective mode (with
* either pnetcdf or HDF5), then communicate with all
* other tasks in the collective and find out which has
* the max value for the dimension size. */
assert(grp->nc4_info);
LOG((3, "before Allreduce *maxlen %ld grp->nc4_info->parallel %d var->parallel_access %d",
*maxlen, grp->nc4_info->parallel, var->parallel_access));
if (grp->nc4_info->parallel && var->parallel_access == NC_COLLECTIVE)
{
assert(grp->nc4_info);
size_t real_maxlen;
/* If parallel is in use, and var is collective,
* reduce to largest value of maxlen, putting result
* into real_maxlen. */
if (grp->nc4_info->parallel && var->parallel_access == NC_COLLECTIVE)
{
if (MPI_Allreduce(maxlen, &real_maxlen, 1, NC_MPI_SIZE_T, MPI_MAX,
grp->nc4_info->comm))
BAIL(NC_EMPI);
*maxlen = real_maxlen;
}
if ((MPI_SUCCESS != MPI_Allreduce(MPI_IN_PLACE, maxlen, 1,
MPI_UNSIGNED_LONG_LONG, MPI_MAX,
grp->nc4_info->comm)))
BAIL(NC_EMPI);
LOG((3, "after Allreduce *maxlen %ld", *maxlen));
}
#endif /* USE_PARALLEL */
}

@ -24,7 +24,7 @@
int main(int argc, char** argv)
{
int err = NC_NOERR, rank, nprocs;
int ncid, cmode, varid, dimid;
int ncid, varid, dimid;
size_t start[1], count[1], nrecs;
MPI_Init(&argc, &argv);
@ -37,6 +37,7 @@ int main(int argc, char** argv)
if (!rank)
printf("*** testing record lenth with multiple processes writing records...");
nc_set_log_level(4);
if (nc_create_par(FILENAME, NC_CLOBBER | NC_NETCDF4, MPI_COMM_WORLD,
MPI_INFO_NULL, &ncid)) ERR;
@ -49,6 +50,8 @@ int main(int argc, char** argv)
count[0] = 1;
if (nc_put_vara_int(ncid, varid, start, count, &rank)) ERR;
if (nc_inq_dimlen(ncid, dimid, &nrecs)) ERR;
if (nc_close(ncid)) ERR;
nc_set_log_level(-1);
if (nrecs != nprocs)
{
@ -58,7 +61,6 @@ int main(int argc, char** argv)
nprocs, nrecs);
ERR;
}
if (nc_close(ncid)) ERR;
if (!rank)
SUMMARIZE_ERR;