From 6d13b7a243e968b4ee365a8ea2b54c5f7640e8f6 Mon Sep 17 00:00:00 2001 From: Neil Fortner Date: Mon, 20 Apr 2009 11:37:47 -0500 Subject: [PATCH] [svn-r16800] Purpose: Fix bug 1516 Description: h5repack previously would not take named datatypes into consideration when copying datasets and attributes. This would cause extra anonymous datatypes in the target file at best, and cause errors halfway through the repacking at worst. h5repack should now always handle named datatypes correctly. Named datatypes are also now converted to the native type when -n is given. Tested: jam, linew, smirom (h5committest) --- MANIFEST | 1 + release_docs/RELEASE.txt | 2 + tools/h5repack/h5repack.sh.in | 9 +- tools/h5repack/h5repack_copy.c | 237 ++++++++++++++++-- tools/h5repack/h5repacktst.c | 159 ++++++++++++ .../testfiles/h5repack_named_dtypes.h5 | Bin 0 -> 4304 bytes 6 files changed, 387 insertions(+), 21 deletions(-) create mode 100644 tools/h5repack/testfiles/h5repack_named_dtypes.h5 diff --git a/MANIFEST b/MANIFEST index f4758086b9..2c56bc48ff 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1520,6 +1520,7 @@ ./tools/h5repack/testfiles/h5repack_ext.bin ./tools/h5repack/testfiles/h5repack_ext.h5 ./tools/h5repack/testfiles/ublock.bin +./tools/h5repack/testfiles/h5repack_named_dtypes.h5 # jam utility and tests diff --git a/release_docs/RELEASE.txt b/release_docs/RELEASE.txt index b702907c6e..33fed05569 100644 --- a/release_docs/RELEASE.txt +++ b/release_docs/RELEASE.txt @@ -293,6 +293,8 @@ Bug Fixes since HDF5-1.8.0 release Tools ----- + - Fixed many problems that could occur when using h5repack with named + datatypes. (NAF - 2009/4/20 - 1516/1466) - h5dump, h5diff, h5repack were not reading (by hyperslabs) datasets that have a datatype datum size greater than H5TOOLS_BUFSIZE, a constant defined as 1024Kb, such as array types with large diff --git a/tools/h5repack/h5repack.sh.in b/tools/h5repack/h5repack.sh.in index 4a48c8a08e..a09e8b9cdd 100755 --- a/tools/h5repack/h5repack.sh.in +++ b/tools/h5repack/h5repack.sh.in @@ -53,6 +53,7 @@ FILE12=h5repack_nbit.h5 FILE13=h5repack_soffset.h5 FILE14=h5repack_layouto.h5 # A file with an older version of the layout message # (copy of test/tlayouto.h5) +FILE15=h5repack_named_dtypes.h5 nerrors=0 @@ -455,7 +456,10 @@ TOOLTEST $FILE4 -l dset_chunk:CONTI TOOLTEST $FILE4 -l dset_chunk:CHUNK=18x13 # Native option -TOOLTEST $FILE1 -n +# Do not use FILE1, as the named dtype will be converted to native, and h5diff will +# report a difference. +TOOLTEST $FILE0 -n +TOOLTEST $FILE2 -n # latest file format with long switches. use FILE4=h5repack_layout.h5 (no filters) @@ -507,6 +511,9 @@ TOOLTEST $FILE14 # test for datum size > H5TOOLS_MALLOCSIZE TOOLTEST $FILE1 -f GZIP=1 +# Check repacking file with committed datatypes in odd configurations +TOOLTEST $FILE15 + if test $nerrors -eq 0 ; then echo "All $H5REPACK tests passed." fi diff --git a/tools/h5repack/h5repack_copy.c b/tools/h5repack/h5repack_copy.c index 8ab5a35013..73f57e303c 100644 --- a/tools/h5repack/h5repack_copy.c +++ b/tools/h5repack/h5repack_copy.c @@ -21,8 +21,21 @@ #include "h5tools.h" #include "h5tools_utils.h" -extern char *progname; +/*------------------------------------------------------------------------- +* typedefs +*------------------------------------------------------------------------- +*/ +typedef struct named_dt_t { + haddr_t addr_in; /* Address of the named dtype in the in file */ + hid_t id_out; /* Open identifier for the dtype in the out file */ + struct named_dt_t *next; /* Next dtype */ +} named_dt_t; +/*------------------------------------------------------------------------- +* globals +*------------------------------------------------------------------------- +*/ +extern char *progname; /*------------------------------------------------------------------------- * macros @@ -36,7 +49,11 @@ extern char *progname; */ static void print_dataset_info(hid_t dcpl_id,char *objname,double per, int pr); static int do_copy_objects(hid_t fidin,hid_t fidout,trav_table_t *travt,pack_opt_t *options); -static int copy_attr(hid_t loc_in,hid_t loc_out,pack_opt_t *options); +static int copy_attr(hid_t loc_in, hid_t loc_out, named_dt_t **named_dt_head_p, + trav_table_t *travt, pack_opt_t *options); +static hid_t copy_named_datatype(hid_t type_in, hid_t fidout, named_dt_t **named_dt_head_p, + trav_table_t *travt, pack_opt_t *options); +static int named_datatype_free(named_dt_t **named_dt_head_p, int ignore_err); static int copy_user_block(const char *infile, const char *outfile, hsize_t size); #if defined (H5REPACK_DEBUG_USER_BLOCK) static void print_user_block(const char *filename, hid_t fid); @@ -506,6 +523,7 @@ int do_copy_objects(hid_t fidin, hid_t f_space_id=-1; /* file space ID */ hid_t ftype_id=-1; /* file type ID */ hid_t wtype_id=-1; /* read/write type ID */ + named_dt_t *named_dt_head=NULL; /* Pointer to the stack of named datatypes copied */ size_t msize; /* size of type */ hsize_t nelmts; /* number of elements in dataset */ int rank; /* rank of dataset */ @@ -522,6 +540,7 @@ int do_copy_objects(hid_t fidin, unsigned i; unsigned u; int is_ref=0; + htri_t is_named; /*------------------------------------------------------------------------- * copy the suppplied object list @@ -605,7 +624,7 @@ int do_copy_objects(hid_t fidin, * copy attrs *------------------------------------------------------------------------- */ - if(copy_attr(grp_in,grp_out,options) < 0) + if(copy_attr(grp_in, grp_out, &named_dt_head, travt, options) < 0) goto error; @@ -657,6 +676,14 @@ int do_copy_objects(hid_t fidin, goto error; if(H5T_REFERENCE == H5Tget_class(ftype_id)) is_ref = 1; + + /* Check if the datatype is committed */ + if((is_named = H5Tcommitted(ftype_id)) < 0) + goto error; + if(is_named) + if((wtype_id = copy_named_datatype(ftype_id, fidout, &named_dt_head, travt, options)) < 0) + goto error; + if(H5Tclose(ftype_id) < 0) goto error; if(H5Dclose(dset_in) < 0) @@ -672,7 +699,8 @@ int do_copy_objects(hid_t fidin, if ( options->op_tbl->nelems || options->all_filter == 1 || options->all_layout == 1 || - is_ref) + is_ref || + is_named) { int j; @@ -698,10 +726,13 @@ int do_copy_objects(hid_t fidin, nelmts *= dims[j]; } - if(options->use_native == 1) - wtype_id = h5tools_get_native_type(ftype_id); - else - wtype_id = H5Tcopy(ftype_id); + /* wtype_id will have already been set if using a named dtype */ + if(!is_named) { + if(options->use_native == 1) + wtype_id = h5tools_get_native_type(ftype_id); + else + wtype_id = H5Tcopy(ftype_id); + } /* end if */ if((msize = H5Tget_size(wtype_id)) == 0) goto error; @@ -933,7 +964,7 @@ int do_copy_objects(hid_t fidin, * copy attrs *------------------------------------------------------------------------- */ - if (copy_attr(dset_in,dset_out,options) < 0) + if (copy_attr(dset_in, dset_out, &named_dt_head, travt, options) < 0) goto error; /*close */ @@ -1004,7 +1035,7 @@ int do_copy_objects(hid_t fidin, goto error; if((dset_out = H5Dopen2(fidout, travt->objs[i].name, H5P_DEFAULT)) < 0) goto error; - if(copy_attr(dset_in, dset_out, options) < 0) + if(copy_attr(dset_in, dset_out, &named_dt_head, travt, options) < 0) goto error; if(H5Dclose(dset_in) < 0) goto error; @@ -1033,17 +1064,21 @@ int do_copy_objects(hid_t fidin, if((type_in = H5Topen2(fidin, travt->objs[i].name, H5P_DEFAULT)) < 0) goto error; - if((type_out = H5Tcopy(type_in)) < 0) + /* Copy the datatype anonymously */ + if((type_out = copy_named_datatype(type_in, fidout, &named_dt_head, + travt, options)) < 0) goto error; - if((H5Tcommit2(fidout, travt->objs[i].name, type_out, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) + /* Link in to group structure */ + if(H5Lcreate_hard(type_out, ".", fidout, travt->objs[i].name, + H5P_DEFAULT, H5P_DEFAULT) < 0) goto error; /*------------------------------------------------------------------------- * copy attrs *------------------------------------------------------------------------- */ - if(copy_attr(type_in, type_out, options) < 0) + if(copy_attr(type_in, type_out, &named_dt_head, travt, options) < 0) goto error; if(H5Tclose(type_in) < 0) @@ -1094,6 +1129,9 @@ int do_copy_objects(hid_t fidin, } /* i */ + /* Finalize (link) the stack of named datatypes (if any) */ + named_datatype_free(&named_dt_head, 0); + return 0; error: @@ -1110,6 +1148,7 @@ error: H5Tclose(wtype_id); H5Tclose(type_in); H5Tclose(type_out); + named_datatype_free(&named_dt_head, 1); } H5E_END_TRY; /* free */ if (buf!=NULL) @@ -1139,6 +1178,8 @@ error: int copy_attr(hid_t loc_in, hid_t loc_out, + named_dt_t **named_dt_head_p, + trav_table_t *travt, pack_opt_t *options ) { @@ -1151,6 +1192,7 @@ int copy_attr(hid_t loc_in, void *buf=NULL; /* data buffer */ hsize_t nelmts; /* number of elements in dataset */ int rank; /* rank of dataset */ + htri_t is_named; /* Whether the datatype is named */ hsize_t dims[H5S_MAX_RANK];/* dimensions of dataset */ char name[255]; H5O_info_t oinfo; /* object info */ @@ -1182,6 +1224,27 @@ int copy_attr(hid_t loc_in, if ((ftype_id = H5Aget_type( attr_id )) < 0 ) goto error; + /* Check if the datatype is committed */ + if((is_named = H5Tcommitted(ftype_id)) < 0) + goto error; + if(is_named) { + hid_t fidout; + + /* Create out file id */ + if((fidout = H5Iget_file_id(loc_out)) < 0) + goto error; + + /* Copy named dt */ + if((wtype_id = copy_named_datatype(ftype_id, fidout, named_dt_head_p, + travt, options)) < 0) { + H5Fclose(fidout); + goto error; + } /* end if */ + + if(H5Fclose(fidout) < 0) + goto error; + } /* end if */ + /* get the dataspace handle */ if ((space_id = H5Aget_space( attr_id )) < 0 ) goto error; @@ -1194,10 +1257,13 @@ int copy_attr(hid_t loc_in, for (j=0; juse_native==1) - wtype_id = h5tools_get_native_type(ftype_id); - else - wtype_id = H5Tcopy(ftype_id); + /* wtype_id will have already been set if using a named dtype */ + if(!is_named) { + if (options->use_native==1) + wtype_id = h5tools_get_native_type(ftype_id); + else + wtype_id = H5Tcopy(ftype_id); + } /* end if */ if ((msize=H5Tget_size(wtype_id))==0) goto error; @@ -1234,7 +1300,7 @@ int copy_attr(hid_t loc_in, *------------------------------------------------------------------------- */ - if((attr_out = H5Acreate2(loc_out, name, ftype_id, space_id, H5P_DEFAULT, H5P_DEFAULT)) < 0) + if((attr_out = H5Acreate2(loc_out, name, wtype_id, space_id, H5P_DEFAULT, H5P_DEFAULT)) < 0) goto error; if(H5Awrite(attr_out, wtype_id, buf) < 0) goto error; @@ -1282,8 +1348,6 @@ error: } - - /*------------------------------------------------------------------------- * Function: print_dataset_info * @@ -1388,6 +1452,139 @@ static void print_dataset_info(hid_t dcpl_id, } } + +/*------------------------------------------------------------------------- +* Function: copy_named_datatype +* +* Purpose: Copies the specified datatype anonymously, and returns an open +* id for that datatype in the output file. The first time this +* is called it scans every named datatype in travt into a +* private stack, afterwards it simply scans that stack. The id +* returned must be closed after it is no longer needed. +* named_datatype_free must be called before the program exits +* to free the stack. +* +* Programmer: Neil Fortner +* +* Date: April 14, 2009 +* +*------------------------------------------------------------------------- +*/ +static hid_t +copy_named_datatype(hid_t type_in, hid_t fidout, named_dt_t **named_dt_head_p, trav_table_t *travt, pack_opt_t *options) +{ + named_dt_t *dt = *named_dt_head_p; /* Stack pointer */ + named_dt_t *dt_ret = NULL; /* Datatype to return */ + H5O_info_t oinfo; /* Object info of input dtype */ + hid_t ret_value = -1; /* The identifier of the named dtype in the out file */ + + if(H5Oget_info(type_in, &oinfo) < 0) + goto error; + + if(*named_dt_head_p) { + /* Stack already exists, search for the datatype */ + while(dt && dt->addr_in != oinfo.addr) + dt = dt->next; + + dt_ret = dt; + } else { + /* Create the stack */ + size_t i; + + for(i=0; inobjs; i++) + if(travt->objs[i].type == H5TRAV_TYPE_NAMED_DATATYPE) { + /* Push onto the stack */ + if(NULL == (dt = (named_dt_t *) HDmalloc(sizeof(named_dt_t)))) + goto error; + dt->next = *named_dt_head_p; + *named_dt_head_p = dt; + + /* Update the address and id */ + dt->addr_in = travt->objs[i].objno; + dt->id_out = -1; + + /* Check if this type is the one requested */ + if(oinfo.addr == dt->addr_in) { + HDassert(!dt_ret); + dt_ret = dt; + } /* end if */ + } /* end if */ + } /* end else */ + + /* Handle the case that the requested datatype was not found. This is + * possible if the datatype was committed anonymously in the input file. */ + if(!dt_ret) { + /* Push the new datatype onto the stack */ + if(NULL == (dt_ret = (named_dt_t *) HDmalloc(sizeof(named_dt_t)))) + goto error; + dt_ret->next = *named_dt_head_p; + *named_dt_head_p = dt_ret; + + /* Update the address and id */ + dt_ret->addr_in = oinfo.addr; + dt_ret->id_out = -1; + } /* end if */ + + /* If the requested datatype does not yet exist in the output file, copy it + * anonymously */ + if(dt_ret->id_out < 0) { + if (options->use_native==1) + dt_ret->id_out = h5tools_get_native_type(type_in); + else + dt_ret->id_out = H5Tcopy(type_in); + if(dt_ret->id_out < 0) + goto error; + if(H5Tcommit_anon(fidout, dt_ret->id_out, H5P_DEFAULT, H5P_DEFAULT) < 0) + goto error; + } /* end if */ + + /* Set return value */ + ret_value = dt_ret->id_out; + + /* Increment the ref count on id_out, because the calling function will try + * to close it */ + if(H5Iinc_ref(ret_value) < 0) + goto error; + + return(ret_value); + +error: + return(-1); +} /* end copy_named_datatype */ + + +/*------------------------------------------------------------------------- +* Function: named_datatype_free +* +* Purpose: Frees the stack of named datatypes. +* +* Programmer: Neil Fortner +* +* Date: April 14, 2009 +* +*------------------------------------------------------------------------- +*/ +static int +named_datatype_free(named_dt_t **named_dt_head_p, int ignore_err) +{ + named_dt_t *dt = *named_dt_head_p; + + while(dt) { + /* Pop the datatype off the stack and free it */ + if(H5Tclose(dt->id_out) < 0 && !ignore_err) + goto error; + dt = dt->next; + HDfree(*named_dt_head_p); + *named_dt_head_p = dt; + } /* end while */ + + return 0; + +error: + return -1; +} /* end named_datatype_free */ + + /*------------------------------------------------------------------------- * Function: copy_user_block * diff --git a/tools/h5repack/h5repacktst.c b/tools/h5repack/h5repacktst.c index 9a753ef6e5..af4d67105a 100644 --- a/tools/h5repack/h5repacktst.c +++ b/tools/h5repack/h5repacktst.c @@ -72,6 +72,9 @@ /* File w/userblock */ #define FNAME16 "h5repack_ub.h5" #define FNAME16OUT "h5repack_ub_out.h5" +/* Named datatypes */ +#define FNAME17 "h5repack_named_dtypes.h5" +#define FNAME17OUT "h5repack_named_dtypes_out.h5" #define FNAME_UB "ublock.bin" @@ -129,6 +132,7 @@ static int make_external(hid_t loc_id); static int make_userblock(void); static int verify_userblock( const char* filename); static int make_userblock_file(void); +static int make_named_dtype(hid_t loc_id); /*------------------------------------------------------------------------- @@ -1487,6 +1491,26 @@ int main (void) #endif + /*------------------------------------------------------------------------- + * test file with userblock + *------------------------------------------------------------------------- + */ + TESTING(" file with committed datatypes"); + + if(h5repack_init(&pack_options, 0) < 0) + GOERROR; + + if(h5repack(FNAME17, FNAME17OUT, &pack_options) < 0) + GOERROR; + if(h5diff(FNAME17, FNAME17OUT, NULL, NULL, &diff_options) > 0) + GOERROR; + if(h5repack_verify(FNAME17OUT, &pack_options) <= 0) + GOERROR; + if(h5repack_end(&pack_options) < 0) + GOERROR; + + + PASSED(); @@ -1706,6 +1730,17 @@ int make_testfiles(void) if(make_userblock_file() < 0) goto out; + /*------------------------------------------------------------------------- + * create a file with named datatypes + *------------------------------------------------------------------------- + */ + if((fid = H5Fcreate(FNAME17,H5F_ACC_TRUNC,H5P_DEFAULT,H5P_DEFAULT)) < 0) + return -1; + if (make_named_dtype(fid) < 0) + goto out; + if(H5Fclose(fid) < 0) + return -1; + return 0; out: @@ -5374,4 +5409,128 @@ out: } +/*------------------------------------------------------------------------- +* Function: make_named_dtype +* +* Purpose: create a file with named datatypes in various configurations +* +*------------------------------------------------------------------------- +*/ +static +int make_named_dtype(hid_t loc_id) +{ + hsize_t dims[1] ={3}; + hid_t did=-1; + hid_t aid=-1; + hid_t sid=-1; + hid_t tid=-1; + hid_t gid=-1; + + if ((sid = H5Screate_simple(1, dims, NULL)) < 0) + goto out; + + /* Create a dataset with an anonymous committed datatype as the first thing + * h5repack sees */ + if((tid = H5Tcopy(H5T_STD_I16LE)) < 0) + goto out; + if(H5Tcommit_anon(loc_id, tid, H5P_DEFAULT, H5P_DEFAULT) < 0) + goto out; + if ((did = H5Dcreate2(loc_id, "A", tid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Tclose(tid) < 0) + goto out; + + /* Create an attribute on that dataset that uses a committed datatype in + * a remote group */ + if((gid = H5Gcreate2(loc_id, "M", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Gclose(gid) < 0) + goto out; + if((gid = H5Gcreate2(loc_id, "M/M", H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Gclose(gid) < 0) + goto out; + if((tid = H5Tcopy(H5T_STD_I16BE)) < 0) + goto out; + if(H5Tcommit2(loc_id, "/M/M/A", tid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT) < 0) + goto out; + if((aid = H5Acreate2(did, "A", tid, sid, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Aclose(aid) < 0) + goto out; + if(H5Tclose(tid) < 0) + goto out; + if(H5Dclose(did) < 0) + goto out; + + /* Create a dataset in the remote group that uses a committed datatype in + * the root group */ + if((tid = H5Tcopy(H5T_STD_I32LE)) < 0) + goto out; + if(H5Tcommit2(loc_id, "N", tid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT) < 0) + goto out; + if((did = H5Dcreate2(loc_id, "M/M/B", tid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Tclose(tid) < 0) + goto out; + + /* Create an attribute on the remote dataset that uses an anonymous + * committed datatype */ + if((tid = H5Tcopy(H5T_STD_I32BE)) < 0) + goto out; + if(H5Tcommit_anon(loc_id, tid, H5P_DEFAULT, H5P_DEFAULT) < 0) + goto out; + if((aid = H5Acreate2(did, "A", tid, sid, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Aclose(aid) < 0) + goto out; + + /* Create another attribute that uses the same anonymous datatype */ + if((aid = H5Acreate2(did, "B", tid, sid, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Aclose(aid) < 0) + goto out; + if(H5Tclose(tid) < 0) + goto out; + if(H5Dclose(did) < 0) + goto out; + + /* Create a dataset in the root group that uses the committed datatype in + * the root group */ + if((tid = H5Topen2(loc_id, "N", H5P_DEFAULT)) < 0) + goto out; + if((did = H5Dcreate2(loc_id, "O", tid, sid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Dclose(did) < 0) + goto out; + + /* Create 2 attributes on the committed datatype that use that datatype */ + if((aid = H5Acreate2(tid, "A", tid, sid, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Aclose(aid) < 0) + goto out; + if((aid = H5Acreate2(tid, "B", tid, sid, H5P_DEFAULT, H5P_DEFAULT)) < 0) + goto out; + if(H5Aclose(aid) < 0) + goto out; + if(H5Tclose(tid) < 0) + goto out; + + /* Close */ + if (H5Sclose(sid) < 0) + goto out; + + return 0; + +out: + H5E_BEGIN_TRY + { + H5Tclose(tid); + H5Aclose(aid); + H5Sclose(sid); + H5Dclose(did); + H5Gclose(gid); + } H5E_END_TRY; + return -1; +} /* end make_named_dtype() */ diff --git a/tools/h5repack/testfiles/h5repack_named_dtypes.h5 b/tools/h5repack/testfiles/h5repack_named_dtypes.h5 new file mode 100644 index 0000000000000000000000000000000000000000..108bb9f393d3d829b9e89863dabd78d47984a6bd GIT binary patch literal 4304 zcmeHK!A=4(5S95YS)UuLot=K%w8>edm8Od&Vpt6Gum@^p#W&ZnuTqftPev+-me@4LfUwH>+@E@@)2Ga!< zKfV%yiqyShDt*VsUc6LXZeztc4{*IC_m