mirror of
https://github.com/HDFGroup/hdf5.git
synced 2025-01-12 15:04:59 +08:00
11dfa25910
* Updated source file copyright headers to remove "Copyright by the Board of Trustees of the University of Illinois", which is kept in the top-level COPYING file.
864 lines
29 KiB
C
864 lines
29 KiB
C
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
|
* Copyright by The HDF Group. *
|
|
* All rights reserved. *
|
|
* *
|
|
* This file is part of HDF5. The full HDF5 copyright notice, including *
|
|
* terms governing use, modification, and redistribution, is contained in *
|
|
* the COPYING file, which can be found at the root of the source code *
|
|
* distribution tree, or in https://www.hdfgroup.org/licenses. *
|
|
* If you do not have access to either file, you may request a copy from *
|
|
* help@hdfgroup.org. *
|
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
|
|
|
/* Unicode test */
|
|
#include "testhdf5.h"
|
|
|
|
#define NUM_CHARS 16
|
|
#define MAX_STRING_LENGTH ((NUM_CHARS * 4) + 1) /* Max length in bytes */
|
|
#define MAX_PATH_LENGTH (MAX_STRING_LENGTH + 20) /* Max length in bytes */
|
|
#define MAX_CODE_POINT 0x200000
|
|
#define FILENAME "unicode.h5"
|
|
/* A buffer to hold two copies of the UTF-8 string */
|
|
#define LONG_BUF_SIZE (2 * MAX_STRING_LENGTH + 4)
|
|
|
|
#define DSET1_NAME "fl_string_dataset"
|
|
#define DSET3_NAME "dataset3"
|
|
#define DSET4_NAME "dataset4"
|
|
#define VL_DSET1_NAME "vl_dset_1"
|
|
#define GROUP1_NAME "group1"
|
|
#define GROUP2_NAME "group2"
|
|
#define GROUP3_NAME "group3"
|
|
#define GROUP4_NAME "group4"
|
|
|
|
#define RANK 1
|
|
#define COMP_INT_VAL 7
|
|
#define COMP_FLOAT_VAL (-42.0F)
|
|
#define COMP_DOUBLE_VAL 42.0
|
|
|
|
/* Test function prototypes */
|
|
void test_fl_string(hid_t fid, const char *string);
|
|
void test_strpad(hid_t fid, const char *string);
|
|
void test_vl_string(hid_t fid, const char *string);
|
|
void test_objnames(hid_t fid, const char *string);
|
|
void test_attrname(hid_t fid, const char *string);
|
|
void test_compound(hid_t fid, const char *string);
|
|
void test_enum(hid_t fid, const char *string);
|
|
void test_opaque(hid_t fid, const char *string);
|
|
|
|
/* Utility function prototypes */
|
|
static hid_t mkstr(size_t len, H5T_str_t strpad);
|
|
unsigned int write_char(unsigned int c, char *test_string, unsigned int cur_pos);
|
|
void dump_string(const char *string);
|
|
|
|
/*
|
|
* test_fl_string
|
|
* Tests that UTF-8 can be used for fixed-length string data.
|
|
* Writes the string to a dataset and reads it back again.
|
|
*/
|
|
void
|
|
test_fl_string(hid_t fid, const char *string)
|
|
{
|
|
hid_t dtype_id, space_id, dset_id;
|
|
hsize_t dims = 1;
|
|
char read_buf[MAX_STRING_LENGTH];
|
|
H5T_cset_t cset;
|
|
herr_t ret;
|
|
|
|
/* Create the datatype, ensure that the character set behaves
|
|
* correctly (it should default to ASCII and can be set to UTF8)
|
|
*/
|
|
dtype_id = H5Tcopy(H5T_C_S1);
|
|
CHECK(dtype_id, FAIL, "H5Tcopy");
|
|
ret = H5Tset_size(dtype_id, (size_t)MAX_STRING_LENGTH);
|
|
CHECK(ret, FAIL, "H5Tset_size");
|
|
cset = H5Tget_cset(dtype_id);
|
|
VERIFY(cset, H5T_CSET_ASCII, "H5Tget_cset");
|
|
ret = H5Tset_cset(dtype_id, H5T_CSET_UTF8);
|
|
CHECK(ret, FAIL, "H5Tset_cset");
|
|
cset = H5Tget_cset(dtype_id);
|
|
VERIFY(cset, H5T_CSET_UTF8, "H5Tget_cset");
|
|
|
|
/* Create dataspace for a dataset */
|
|
space_id = H5Screate_simple(RANK, &dims, NULL);
|
|
CHECK(space_id, FAIL, "H5Screate_simple");
|
|
|
|
/* Create a dataset */
|
|
dset_id = H5Dcreate2(fid, DSET1_NAME, dtype_id, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(dset_id, FAIL, "H5Dcreate2");
|
|
|
|
/* Write UTF-8 string to dataset */
|
|
ret = H5Dwrite(dset_id, dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, string);
|
|
CHECK(ret, FAIL, "H5Dwrite");
|
|
|
|
/* Read string back and make sure it is unchanged */
|
|
ret = H5Dread(dset_id, dtype_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, read_buf);
|
|
CHECK(ret, FAIL, "H5Dread");
|
|
|
|
VERIFY(HDstrcmp(string, read_buf), 0, "strcmp");
|
|
|
|
/* Close all */
|
|
ret = H5Dclose(dset_id);
|
|
CHECK(ret, FAIL, "H5Dclose");
|
|
|
|
ret = H5Tclose(dtype_id);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
ret = H5Sclose(space_id);
|
|
CHECK(ret, FAIL, "H5Sclose");
|
|
}
|
|
|
|
/*
|
|
* test_strpad
|
|
* Tests string padding for a UTF-8 string.
|
|
* Converts strings to shorter and then longer strings.
|
|
* Borrows heavily from dtypes.c, but is more complicated because
|
|
* the string is randomly generated.
|
|
*/
|
|
void
|
|
test_strpad(hid_t H5_ATTR_UNUSED fid, const char *string)
|
|
{
|
|
/* buf is used to hold the data that H5Tconvert operates on. */
|
|
char buf[LONG_BUF_SIZE];
|
|
|
|
/* cmpbuf holds the output that H5Tconvert should produce,
|
|
* to compare against the actual output. */
|
|
char cmpbuf[LONG_BUF_SIZE];
|
|
|
|
/* new_string is a slightly modified version of the UTF-8
|
|
* string to make the tests run more smoothly. */
|
|
char new_string[MAX_STRING_LENGTH + 2];
|
|
|
|
size_t length; /* Length of new_string in bytes */
|
|
size_t small_len; /* Size of the small datatype */
|
|
size_t big_len; /* Size of the larger datatype */
|
|
hid_t src_type, dst_type;
|
|
herr_t ret;
|
|
|
|
/* The following tests are simpler if the UTF-8 string contains
|
|
* the right number of bytes (even or odd, depending on the test).
|
|
* We create a 'new_string' whose length is convenient by prepending
|
|
* an 'x' to 'string' when necessary. */
|
|
length = HDstrlen(string);
|
|
if (length % 2 != 1) {
|
|
HDstrcpy(new_string, "x");
|
|
HDstrcat(new_string, string);
|
|
length++;
|
|
}
|
|
else {
|
|
HDstrcpy(new_string, string);
|
|
}
|
|
|
|
/* Convert a null-terminated string to a shorter and longer null
|
|
* terminated string. */
|
|
|
|
/* Create a src_type that holds the UTF-8 string and its final NULL */
|
|
big_len = length + 1; /* +1 byte for final NULL */
|
|
HDassert((2 * big_len) <= sizeof(cmpbuf));
|
|
src_type = mkstr(big_len, H5T_STR_NULLTERM);
|
|
CHECK(src_type, FAIL, "mkstr");
|
|
/* Create a dst_type that holds half of the UTF-8 string and a final
|
|
* NULL */
|
|
small_len = (length + 1) / 2;
|
|
dst_type = mkstr(small_len, H5T_STR_NULLTERM);
|
|
CHECK(dst_type, FAIL, "mkstr");
|
|
|
|
/* Fill the buffer with two copies of the UTF-8 string, each with a
|
|
* terminating NULL. It will look like "abcdefg\0abcdefg\0". */
|
|
HDstrncpy(buf, new_string, big_len);
|
|
HDstrncpy(&buf[big_len], new_string, big_len);
|
|
|
|
ret = H5Tconvert(src_type, dst_type, (size_t)2, buf, NULL, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Tconvert");
|
|
|
|
/* After conversion, the buffer should look like
|
|
* "abc\0abc\0abcdefg\0". Note that this is just what the bytes look
|
|
* like; UTF-8 characters may well have been truncated.
|
|
* To check that the conversion worked properly, we'll build this
|
|
* string manually. */
|
|
HDstrncpy(cmpbuf, new_string, small_len - 1);
|
|
cmpbuf[small_len - 1] = '\0';
|
|
HDstrncpy(&cmpbuf[small_len], new_string, small_len - 1);
|
|
cmpbuf[2 * small_len - 1] = '\0';
|
|
HDstrcpy(&cmpbuf[2 * small_len], new_string);
|
|
|
|
VERIFY(HDmemcmp(buf, cmpbuf, 2 * big_len), 0, "HDmemcmp");
|
|
|
|
/* Now convert from smaller datatype to bigger datatype. This should
|
|
* leave our buffer looking like: "abc\0\0\0\0\0abc\0\0\0\0\0" */
|
|
ret = H5Tconvert(dst_type, src_type, (size_t)2, buf, NULL, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Tconvert");
|
|
|
|
/* First fill the buffer with NULLs */
|
|
HDmemset(cmpbuf, '\0', (size_t)LONG_BUF_SIZE);
|
|
/* Copy in the characters */
|
|
HDstrncpy(cmpbuf, new_string, small_len - 1);
|
|
HDstrncpy(&cmpbuf[big_len], new_string, small_len - 1);
|
|
|
|
VERIFY(HDmemcmp(buf, cmpbuf, 2 * big_len), 0, "HDmemcmp");
|
|
|
|
ret = H5Tclose(src_type);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
ret = H5Tclose(dst_type);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
|
|
/* Now test null padding. Null-padded strings do *not* need
|
|
* terminating NULLs, so the sizes of the datatypes are slightly
|
|
* different and we want a string with an even number of characters. */
|
|
length = HDstrlen(string);
|
|
if (length % 2 != 0) {
|
|
HDstrcpy(new_string, "x");
|
|
HDstrcat(new_string, string);
|
|
length++;
|
|
}
|
|
else {
|
|
HDstrcpy(new_string, string);
|
|
}
|
|
|
|
/* Create a src_type that holds the UTF-8 string */
|
|
big_len = length;
|
|
HDassert((2 * big_len) <= sizeof(cmpbuf));
|
|
src_type = mkstr(big_len, H5T_STR_NULLPAD);
|
|
CHECK(src_type, FAIL, "mkstr");
|
|
/* Create a dst_type that holds half of the UTF-8 string */
|
|
small_len = length / 2;
|
|
dst_type = mkstr(small_len, H5T_STR_NULLPAD);
|
|
CHECK(dst_type, FAIL, "mkstr");
|
|
|
|
/* Fill the buffer with two copies of the UTF-8 string.
|
|
* It will look like "abcdefghabcdefgh". */
|
|
HDstrncpy(buf, new_string, big_len);
|
|
HDstrncpy(&buf[big_len], new_string, big_len);
|
|
|
|
ret = H5Tconvert(src_type, dst_type, (size_t)2, buf, NULL, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Tconvert");
|
|
|
|
/* After conversion, the buffer should look like
|
|
* "abcdabcdabcdefgh". Note that this is just what the bytes look
|
|
* like; UTF-8 characters may well have been truncated.
|
|
* To check that the conversion worked properly, we'll build this
|
|
* string manually. */
|
|
HDstrncpy(cmpbuf, new_string, small_len);
|
|
HDstrncpy(&cmpbuf[small_len], new_string, small_len);
|
|
HDstrncpy(&cmpbuf[2 * small_len], new_string, big_len);
|
|
|
|
VERIFY(HDmemcmp(buf, cmpbuf, 2 * big_len), 0, "HDmemcmp");
|
|
|
|
/* Now convert from smaller datatype to bigger datatype. This should
|
|
* leave our buffer looking like: "abcd\0\0\0\0abcd\0\0\0\0" */
|
|
ret = H5Tconvert(dst_type, src_type, (size_t)2, buf, NULL, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Tconvert");
|
|
|
|
/* First fill the buffer with NULLs */
|
|
HDmemset(cmpbuf, '\0', (size_t)LONG_BUF_SIZE);
|
|
/* Copy in the characters */
|
|
HDstrncpy(cmpbuf, new_string, small_len);
|
|
HDstrncpy(&cmpbuf[big_len], new_string, small_len);
|
|
|
|
VERIFY(HDmemcmp(buf, cmpbuf, 2 * big_len), 0, "HDmemcmp");
|
|
|
|
ret = H5Tclose(src_type);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
ret = H5Tclose(dst_type);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
|
|
/* Test space padding. This is very similar to null-padding; we can
|
|
use the same values of length, small_len, and big_len. */
|
|
|
|
src_type = mkstr(big_len, H5T_STR_SPACEPAD);
|
|
CHECK(src_type, FAIL, "mkstr");
|
|
dst_type = mkstr(small_len, H5T_STR_SPACEPAD);
|
|
CHECK(src_type, FAIL, "mkstr");
|
|
|
|
/* Fill the buffer with two copies of the UTF-8 string.
|
|
* It will look like "abcdefghabcdefgh". */
|
|
HDstrcpy(buf, new_string);
|
|
HDstrcpy(&buf[big_len], new_string);
|
|
|
|
ret = H5Tconvert(src_type, dst_type, (size_t)2, buf, NULL, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Tconvert");
|
|
|
|
/* After conversion, the buffer should look like
|
|
* "abcdabcdabcdefgh". Note that this is just what the bytes look
|
|
* like; UTF-8 characters may have been truncated.
|
|
* To check that the conversion worked properly, we'll build this
|
|
* string manually. */
|
|
HDstrncpy(cmpbuf, new_string, small_len);
|
|
HDstrncpy(&cmpbuf[small_len], new_string, small_len);
|
|
HDstrncpy(&cmpbuf[2 * small_len], new_string, big_len);
|
|
|
|
VERIFY(HDmemcmp(buf, cmpbuf, 2 * big_len), 0, "HDmemcmp");
|
|
|
|
/* Now convert from smaller datatype to bigger datatype. This should
|
|
* leave our buffer looking like: "abcd abcd " */
|
|
ret = H5Tconvert(dst_type, src_type, (size_t)2, buf, NULL, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Tconvert");
|
|
|
|
/* First fill the buffer with spaces */
|
|
HDmemset(cmpbuf, ' ', (size_t)LONG_BUF_SIZE);
|
|
/* Copy in the characters */
|
|
HDstrncpy(cmpbuf, new_string, small_len);
|
|
HDstrncpy(&cmpbuf[big_len], new_string, small_len);
|
|
|
|
VERIFY(HDmemcmp(buf, cmpbuf, 2 * big_len), 0, "HDmemcmp");
|
|
|
|
ret = H5Tclose(src_type);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
ret = H5Tclose(dst_type);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
}
|
|
|
|
/*
|
|
* test_vl_string
|
|
* Tests variable-length string datatype with UTF-8 strings.
|
|
*/
|
|
void
|
|
test_vl_string(hid_t fid, const char *string)
|
|
{
|
|
hid_t type_id, space_id, dset_id;
|
|
hsize_t dims = 1;
|
|
hsize_t size; /* Number of bytes used */
|
|
char *read_buf[1];
|
|
herr_t ret;
|
|
|
|
/* Create dataspace for datasets */
|
|
space_id = H5Screate_simple(RANK, &dims, NULL);
|
|
CHECK(space_id, FAIL, "H5Screate_simple");
|
|
|
|
/* Create a datatype to refer to */
|
|
type_id = H5Tcopy(H5T_C_S1);
|
|
CHECK(type_id, FAIL, "H5Tcopy");
|
|
ret = H5Tset_size(type_id, H5T_VARIABLE);
|
|
CHECK(ret, FAIL, "H5Tset_size");
|
|
|
|
/* Create a dataset */
|
|
dset_id = H5Dcreate2(fid, VL_DSET1_NAME, type_id, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(dset_id, FAIL, "H5Dcreate2");
|
|
|
|
/* Write dataset to disk */
|
|
ret = H5Dwrite(dset_id, type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, &string);
|
|
CHECK(ret, FAIL, "H5Dwrite");
|
|
|
|
/* Make certain the correct amount of memory will be used */
|
|
ret = H5Dvlen_get_buf_size(dset_id, type_id, space_id, &size);
|
|
CHECK(ret, FAIL, "H5Dvlen_get_buf_size");
|
|
VERIFY(size, (hsize_t)HDstrlen(string) + 1, "H5Dvlen_get_buf_size");
|
|
|
|
/* Read dataset from disk */
|
|
ret = H5Dread(dset_id, type_id, H5S_ALL, H5S_ALL, H5P_DEFAULT, read_buf);
|
|
CHECK(ret, FAIL, "H5Dread");
|
|
|
|
/* Compare data read in */
|
|
VERIFY(HDstrcmp(string, read_buf[0]), 0, "strcmp");
|
|
|
|
/* Reclaim the read VL data */
|
|
ret = H5Treclaim(type_id, space_id, H5P_DEFAULT, read_buf);
|
|
CHECK(ret, FAIL, "H5Treclaim");
|
|
|
|
/* Close all */
|
|
ret = H5Dclose(dset_id);
|
|
CHECK(ret, FAIL, "H5Dclose");
|
|
ret = H5Tclose(type_id);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
ret = H5Sclose(space_id);
|
|
CHECK(ret, FAIL, "H5Sclose");
|
|
}
|
|
|
|
/*
|
|
* test_objnames
|
|
* Tests that UTF-8 can be used for object names in the file.
|
|
* Tests groups, datasets, named datatypes, and soft links.
|
|
* Note that this test doesn't actually mark the names as being
|
|
* in UTF-8. At the time this test was written, that feature
|
|
* didn't exist in HDF5, and when the character encoding property
|
|
* was added to links it didn't change how they were stored in the file,
|
|
* -JML 2/2/2006
|
|
*/
|
|
void
|
|
test_objnames(hid_t fid, const char *string)
|
|
{
|
|
hid_t grp_id, grp1_id, grp2_id, grp3_id;
|
|
hid_t type_id, dset_id, space_id;
|
|
char read_buf[MAX_STRING_LENGTH];
|
|
char path_buf[MAX_PATH_LENGTH];
|
|
hsize_t dims = 1;
|
|
hobj_ref_t obj_ref;
|
|
ssize_t size;
|
|
herr_t ret;
|
|
|
|
/* Create a group with a UTF-8 name */
|
|
grp_id = H5Gcreate2(fid, string, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(grp_id, FAIL, "H5Gcreate2");
|
|
|
|
/* Set a comment on the group to test that we can access the group
|
|
* Also test that UTF-8 comments can be read.
|
|
*/
|
|
ret = H5Oset_comment_by_name(fid, string, string, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Oset_comment_by_name");
|
|
size = H5Oget_comment_by_name(fid, string, read_buf, (size_t)MAX_STRING_LENGTH, H5P_DEFAULT);
|
|
CHECK(size, FAIL, "H5Oget_comment_by_name");
|
|
|
|
ret = H5Gclose(grp_id);
|
|
CHECK(ret, FAIL, "H5Gclose");
|
|
|
|
VERIFY(HDstrcmp(string, read_buf), 0, "strcmp");
|
|
|
|
/* Create a new dataset with a UTF-8 name */
|
|
grp1_id = H5Gcreate2(fid, GROUP1_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(grp1_id, FAIL, "H5Gcreate2");
|
|
|
|
space_id = H5Screate_simple(RANK, &dims, NULL);
|
|
CHECK(space_id, FAIL, "H5Screate_simple");
|
|
dset_id = H5Dcreate2(grp1_id, string, H5T_NATIVE_INT, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(dset_id, FAIL, "H5Dcreate2");
|
|
|
|
/* Make sure that dataset can be opened again */
|
|
ret = H5Dclose(dset_id);
|
|
CHECK(ret, FAIL, "H5Dclose");
|
|
ret = H5Sclose(space_id);
|
|
CHECK(ret, FAIL, "H5Sclose");
|
|
|
|
dset_id = H5Dopen2(grp1_id, string, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Dopen2");
|
|
ret = H5Dclose(dset_id);
|
|
CHECK(ret, FAIL, "H5Dclose");
|
|
ret = H5Gclose(grp1_id);
|
|
CHECK(ret, FAIL, "H5Gclose");
|
|
|
|
/* Do the same for a named datatype */
|
|
grp2_id = H5Gcreate2(fid, GROUP2_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(grp2_id, FAIL, "H5Gcreate2");
|
|
|
|
type_id = H5Tcreate(H5T_OPAQUE, (size_t)1);
|
|
CHECK(type_id, FAIL, "H5Tcreate");
|
|
ret = H5Tcommit2(grp2_id, string, type_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(type_id, FAIL, "H5Tcommit2");
|
|
ret = H5Tclose(type_id);
|
|
CHECK(type_id, FAIL, "H5Tclose");
|
|
|
|
type_id = H5Topen2(grp2_id, string, H5P_DEFAULT);
|
|
CHECK(type_id, FAIL, "H5Topen2");
|
|
ret = H5Tclose(type_id);
|
|
CHECK(type_id, FAIL, "H5Tclose");
|
|
|
|
/* Don't close the group -- use it to test that object references
|
|
* can refer to objects named in UTF-8 */
|
|
|
|
space_id = H5Screate_simple(RANK, &dims, NULL);
|
|
CHECK(space_id, FAIL, "H5Screate_simple");
|
|
dset_id =
|
|
H5Dcreate2(grp2_id, DSET3_NAME, H5T_STD_REF_OBJ, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Dcreate2");
|
|
|
|
/* Create reference to named datatype */
|
|
ret = H5Rcreate(&obj_ref, grp2_id, string, H5R_OBJECT, (hid_t)-1);
|
|
CHECK(ret, FAIL, "H5Rcreate");
|
|
/* Write selection and read it back*/
|
|
ret = H5Dwrite(dset_id, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, &obj_ref);
|
|
CHECK(ret, FAIL, "H5Dwrite");
|
|
ret = H5Dread(dset_id, H5T_STD_REF_OBJ, H5S_ALL, H5S_ALL, H5P_DEFAULT, &obj_ref);
|
|
CHECK(ret, FAIL, "H5Dread");
|
|
|
|
/* Ensure that we can open named datatype using object reference */
|
|
type_id = H5Rdereference2(dset_id, H5P_DEFAULT, H5R_OBJECT, &obj_ref);
|
|
CHECK(type_id, FAIL, "H5Rdereference2");
|
|
ret = H5Tcommitted(type_id);
|
|
VERIFY(ret, 1, "H5Tcommitted");
|
|
|
|
ret = H5Tclose(type_id);
|
|
CHECK(type_id, FAIL, "H5Tclose");
|
|
ret = H5Dclose(dset_id);
|
|
CHECK(ret, FAIL, "H5Dclose");
|
|
ret = H5Sclose(space_id);
|
|
CHECK(ret, FAIL, "H5Sclose");
|
|
|
|
ret = H5Gclose(grp2_id);
|
|
CHECK(ret, FAIL, "H5Gclose");
|
|
|
|
/* Create "group3". Build a hard link from group3 to group2, which has
|
|
* a datatype with the UTF-8 name. Create a soft link in group3
|
|
* pointing through the hard link to the datatype. Give the soft
|
|
* link a name in UTF-8. Ensure that the soft link works. */
|
|
|
|
grp3_id = H5Gcreate2(fid, GROUP3_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(grp3_id, FAIL, "H5Gcreate2");
|
|
|
|
ret = H5Lcreate_hard(fid, GROUP2_NAME, grp3_id, GROUP2_NAME, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Lcreate_hard");
|
|
HDstrcpy(path_buf, GROUP2_NAME);
|
|
HDstrcat(path_buf, "/");
|
|
HDstrcat(path_buf, string);
|
|
ret = H5Lcreate_hard(grp3_id, path_buf, H5L_SAME_LOC, string, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(ret, FAIL, "H5Lcreate_hard");
|
|
|
|
/* Open named datatype using soft link */
|
|
type_id = H5Topen2(grp3_id, string, H5P_DEFAULT);
|
|
CHECK(type_id, FAIL, "H5Topen2");
|
|
|
|
ret = H5Tclose(type_id);
|
|
CHECK(type_id, FAIL, "H5Tclose");
|
|
ret = H5Gclose(grp3_id);
|
|
CHECK(ret, FAIL, "H5Gclose");
|
|
}
|
|
|
|
/*
|
|
* test_attrname
|
|
* Test that attributes can deal with UTF-8 strings
|
|
*/
|
|
void
|
|
test_attrname(hid_t fid, const char *string)
|
|
{
|
|
hid_t group_id, attr_id;
|
|
hid_t dtype_id, space_id;
|
|
hsize_t dims = 1;
|
|
char read_buf[MAX_STRING_LENGTH];
|
|
ssize_t size;
|
|
herr_t ret;
|
|
|
|
/* Create a new group and give it an attribute whose
|
|
* name and value are UTF-8 strings.
|
|
*/
|
|
group_id = H5Gcreate2(fid, GROUP4_NAME, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(group_id, FAIL, "H5Gcreate2");
|
|
|
|
space_id = H5Screate_simple(RANK, &dims, NULL);
|
|
CHECK(space_id, FAIL, "H5Screate_simple");
|
|
dtype_id = H5Tcopy(H5T_C_S1);
|
|
CHECK(dtype_id, FAIL, "H5Tcopy");
|
|
ret = H5Tset_size(dtype_id, (size_t)MAX_STRING_LENGTH);
|
|
CHECK(ret, FAIL, "H5Tset_size");
|
|
|
|
/* Create the attribute and check that its name is correct */
|
|
attr_id = H5Acreate2(group_id, string, dtype_id, space_id, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(attr_id, FAIL, "H5Acreate2");
|
|
size = H5Aget_name(attr_id, (size_t)MAX_STRING_LENGTH, read_buf);
|
|
CHECK(size, FAIL, "H5Aget_name");
|
|
ret = HDstrcmp(read_buf, string);
|
|
VERIFY(ret, 0, "strcmp");
|
|
read_buf[0] = '\0';
|
|
|
|
/* Try writing and reading from the attribute */
|
|
ret = H5Awrite(attr_id, dtype_id, string);
|
|
CHECK(ret, FAIL, "H5Awrite");
|
|
ret = H5Aread(attr_id, dtype_id, read_buf);
|
|
CHECK(ret, FAIL, "H5Aread");
|
|
ret = HDstrcmp(read_buf, string);
|
|
VERIFY(ret, 0, "strcmp");
|
|
|
|
/* Clean up */
|
|
ret = H5Aclose(attr_id);
|
|
CHECK(ret, FAIL, "H5Aclose");
|
|
ret = H5Tclose(dtype_id);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
ret = H5Sclose(space_id);
|
|
CHECK(ret, FAIL, "H5Sclose");
|
|
ret = H5Gclose(group_id);
|
|
CHECK(ret, FAIL, "H5Gclose");
|
|
}
|
|
|
|
/*
|
|
* test_compound
|
|
* Test that compound datatypes can have UTF-8 field names.
|
|
*/
|
|
void
|
|
test_compound(hid_t fid, const char *string)
|
|
{
|
|
/* Define two compound structures, s1_t and s2_t.
|
|
* s2_t is a subset of s1_t, with two out of three
|
|
* fields.
|
|
* This is stolen from the h5_compound example.
|
|
*/
|
|
typedef struct s1_t {
|
|
int a;
|
|
double c;
|
|
float b;
|
|
} s1_t;
|
|
typedef struct s2_t {
|
|
double c;
|
|
int a;
|
|
} s2_t;
|
|
/* Actual variable declarations */
|
|
s1_t s1;
|
|
s2_t s2;
|
|
hid_t s1_tid, s2_tid;
|
|
hid_t space_id, dset_id;
|
|
hsize_t dim = 1;
|
|
char *readbuf;
|
|
herr_t ret;
|
|
|
|
/* Initialize compound data */
|
|
HDmemset(&s1, 0, sizeof(s1_t)); /* To make purify happy */
|
|
s1.a = COMP_INT_VAL;
|
|
s1.c = COMP_DOUBLE_VAL;
|
|
s1.b = COMP_FLOAT_VAL;
|
|
|
|
/* Create compound datatypes using UTF-8 field name */
|
|
s1_tid = H5Tcreate(H5T_COMPOUND, sizeof(s1_t));
|
|
CHECK(s1_tid, FAIL, "H5Tcreate");
|
|
ret = H5Tinsert(s1_tid, string, HOFFSET(s1_t, a), H5T_NATIVE_INT);
|
|
CHECK(ret, FAIL, "H5Tinsert");
|
|
|
|
/* Check that the field name was stored correctly */
|
|
readbuf = H5Tget_member_name(s1_tid, 0);
|
|
ret = HDstrcmp(readbuf, string);
|
|
VERIFY(ret, 0, "strcmp");
|
|
H5free_memory(readbuf);
|
|
|
|
/* Add the other fields to the datatype */
|
|
ret = H5Tinsert(s1_tid, "c_name", HOFFSET(s1_t, c), H5T_NATIVE_DOUBLE);
|
|
CHECK(ret, FAIL, "H5Tinsert");
|
|
ret = H5Tinsert(s1_tid, "b_name", HOFFSET(s1_t, b), H5T_NATIVE_FLOAT);
|
|
CHECK(ret, FAIL, "H5Tinsert");
|
|
|
|
/* Create second datatype, with only two fields. */
|
|
s2_tid = H5Tcreate(H5T_COMPOUND, sizeof(s2_t));
|
|
CHECK(s2_tid, FAIL, "H5Tcreate");
|
|
ret = H5Tinsert(s2_tid, "c_name", HOFFSET(s2_t, c), H5T_NATIVE_DOUBLE);
|
|
CHECK(ret, FAIL, "H5Tinsert");
|
|
ret = H5Tinsert(s2_tid, string, HOFFSET(s2_t, a), H5T_NATIVE_INT);
|
|
CHECK(ret, FAIL, "H5Tinsert");
|
|
|
|
/* Create the dataspace and dataset. */
|
|
space_id = H5Screate_simple(1, &dim, NULL);
|
|
CHECK(space_id, FAIL, "H5Screate_simple");
|
|
dset_id = H5Dcreate2(fid, DSET4_NAME, s1_tid, space_id, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(dset_id, FAIL, "H5Dcreate2");
|
|
|
|
/* Write data to the dataset. */
|
|
ret = H5Dwrite(dset_id, s1_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &s1);
|
|
CHECK(ret, FAIL, "H5Dwrite");
|
|
|
|
/* Ensure that data can be read back by field name into s2 struct */
|
|
ret = H5Dread(dset_id, s2_tid, H5S_ALL, H5S_ALL, H5P_DEFAULT, &s2);
|
|
CHECK(ret, FAIL, "H5Dread");
|
|
|
|
VERIFY(s2.a, COMP_INT_VAL, "H5Dread");
|
|
VERIFY(s2.c, COMP_DOUBLE_VAL, "H5Dread");
|
|
|
|
/* Clean up */
|
|
ret = H5Tclose(s1_tid);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
ret = H5Tclose(s2_tid);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
ret = H5Sclose(space_id);
|
|
CHECK(ret, FAIL, "H5Sclose");
|
|
ret = H5Dclose(dset_id);
|
|
CHECK(ret, FAIL, "H5Dclose");
|
|
}
|
|
|
|
/*
|
|
* test_enum
|
|
* Test that enumerated datatypes can have UTF-8 member names.
|
|
*/
|
|
void
|
|
test_enum(hid_t H5_ATTR_UNUSED fid, const char *string)
|
|
{
|
|
/* Define an enumerated type */
|
|
typedef enum { E1_RED, E1_GREEN, E1_BLUE, E1_WHITE } c_e1;
|
|
/* Variable declarations */
|
|
c_e1 val;
|
|
herr_t ret;
|
|
hid_t type_id;
|
|
char readbuf[MAX_STRING_LENGTH];
|
|
|
|
/* Create an enumerated datatype in HDF5 with a UTF-8 member name*/
|
|
type_id = H5Tcreate(H5T_ENUM, sizeof(c_e1));
|
|
CHECK(type_id, FAIL, "H5Tcreate");
|
|
val = E1_RED;
|
|
ret = H5Tenum_insert(type_id, "RED", &val);
|
|
CHECK(ret, FAIL, "H5Tenum_insert");
|
|
val = E1_GREEN;
|
|
ret = H5Tenum_insert(type_id, "GREEN", &val);
|
|
CHECK(ret, FAIL, "H5Tenum_insert");
|
|
val = E1_BLUE;
|
|
ret = H5Tenum_insert(type_id, "BLUE", &val);
|
|
CHECK(ret, FAIL, "H5Tenum_insert");
|
|
val = E1_WHITE;
|
|
ret = H5Tenum_insert(type_id, string, &val);
|
|
CHECK(ret, FAIL, "H5Tenum_insert");
|
|
|
|
/* Ensure that UTF-8 member name gives the right value and vice versa. */
|
|
ret = H5Tenum_valueof(type_id, string, &val);
|
|
CHECK(ret, FAIL, "H5Tenum_valueof");
|
|
VERIFY(val, E1_WHITE, "H5Tenum_valueof");
|
|
ret = H5Tenum_nameof(type_id, &val, readbuf, (size_t)MAX_STRING_LENGTH);
|
|
CHECK(ret, FAIL, "H5Tenum_nameof");
|
|
ret = HDstrcmp(readbuf, string);
|
|
VERIFY(ret, 0, "strcmp");
|
|
|
|
/* Close the datatype */
|
|
ret = H5Tclose(type_id);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
}
|
|
|
|
/*
|
|
* test_opaque
|
|
* Test comments on opaque datatypes
|
|
*/
|
|
void
|
|
test_opaque(hid_t H5_ATTR_UNUSED fid, const char *string)
|
|
{
|
|
hid_t type_id;
|
|
char *read_buf;
|
|
herr_t ret;
|
|
|
|
/* Create an opaque type and give it a UTF-8 tag */
|
|
type_id = H5Tcreate(H5T_OPAQUE, (size_t)4);
|
|
CHECK(type_id, FAIL, "H5Tcreate");
|
|
ret = H5Tset_tag(type_id, string);
|
|
CHECK(ret, FAIL, "H5Tset_tag");
|
|
|
|
/* Read the tag back. */
|
|
read_buf = H5Tget_tag(type_id);
|
|
ret = HDstrcmp(read_buf, string);
|
|
VERIFY(ret, 0, "H5Tget_tag");
|
|
H5free_memory(read_buf);
|
|
|
|
ret = H5Tclose(type_id);
|
|
CHECK(ret, FAIL, "H5Tclose");
|
|
}
|
|
|
|
/*********************/
|
|
/* Utility functions */
|
|
/*********************/
|
|
|
|
/* mkstr
|
|
* Borrwed from dtypes.c.
|
|
* Creates a new string data type. Used in string padding tests */
|
|
static hid_t
|
|
mkstr(size_t len, H5T_str_t strpad)
|
|
{
|
|
hid_t t;
|
|
if ((t = H5Tcopy(H5T_C_S1)) < 0)
|
|
return -1;
|
|
if (H5Tset_size(t, len) < 0)
|
|
return -1;
|
|
if (H5Tset_strpad(t, strpad) < 0)
|
|
return -1;
|
|
return t;
|
|
}
|
|
|
|
/* write_char
|
|
* Append a unicode code point c to test_string in UTF-8 encoding.
|
|
* Return the new end of the string.
|
|
*/
|
|
unsigned int
|
|
write_char(unsigned int c, char *test_string, unsigned int cur_pos)
|
|
{
|
|
if (c < 0x80) {
|
|
test_string[cur_pos] = (char)c;
|
|
cur_pos++;
|
|
}
|
|
else if (c < 0x800) {
|
|
test_string[cur_pos] = (char)(0xC0 | c >> 6);
|
|
test_string[cur_pos + 1] = (char)(0x80 | (c & 0x3F));
|
|
cur_pos += 2;
|
|
}
|
|
else if (c < 0x10000) {
|
|
test_string[cur_pos] = (char)(0xE0 | c >> 12);
|
|
test_string[cur_pos + 1] = (char)(0x80 | (c >> 6 & 0x3F));
|
|
test_string[cur_pos + 2] = (char)(0x80 | (c & 0x3F));
|
|
cur_pos += 3;
|
|
}
|
|
else if (c < 0x200000) {
|
|
test_string[cur_pos] = (char)(0xF0 | c >> 18);
|
|
test_string[cur_pos + 1] = (char)(0x80 | (c >> 12 & 0x3F));
|
|
test_string[cur_pos + 2] = (char)(0x80 | (c >> 6 & 0x3F));
|
|
test_string[cur_pos + 3] = (char)(0x80 | (c & 0x3F));
|
|
cur_pos += 4;
|
|
}
|
|
|
|
return cur_pos;
|
|
}
|
|
|
|
/* dump_string
|
|
* Print a string both as text (which will look like garbage) and as hex.
|
|
* The text display is not guaranteed to be accurate--certain characters
|
|
* could confuse printf (e.g., '\n'). */
|
|
void
|
|
dump_string(const char *string)
|
|
{
|
|
size_t length;
|
|
size_t x;
|
|
|
|
HDprintf("The string was:\n %s", string);
|
|
HDprintf("Or in hex:\n");
|
|
|
|
length = HDstrlen(string);
|
|
|
|
for (x = 0; x < length; x++)
|
|
HDprintf("%x ", string[x] & (0x000000FF));
|
|
|
|
HDprintf("\n");
|
|
}
|
|
|
|
/* Main test.
|
|
* Create a string of random Unicode characters, then run each test with
|
|
* that string.
|
|
*/
|
|
void
|
|
test_unicode(void)
|
|
{
|
|
char test_string[MAX_STRING_LENGTH];
|
|
unsigned int cur_pos = 0; /* Current position in test_string */
|
|
unsigned int unicode_point; /* Unicode code point for a single character */
|
|
hid_t fid; /* ID of file */
|
|
int x; /* Temporary variable */
|
|
herr_t ret; /* Generic return value */
|
|
|
|
/* Output message about test being performed */
|
|
MESSAGE(5, ("Testing UTF-8 Encoding\n"));
|
|
|
|
/* Create a random string with length NUM_CHARS */
|
|
HDsrandom((unsigned)HDtime(NULL));
|
|
|
|
HDmemset(test_string, 0, sizeof(test_string));
|
|
for (x = 0; x < NUM_CHARS; x++) {
|
|
/* We need to avoid unprintable characters (codes 0-31) and the
|
|
* . and / characters, since they aren't allowed in path names.
|
|
*/
|
|
unicode_point = (unsigned)(HDrandom() % (MAX_CODE_POINT - 32)) + 32;
|
|
if (unicode_point != 46 && unicode_point != 47)
|
|
cur_pos = write_char(unicode_point, test_string, cur_pos);
|
|
}
|
|
|
|
/* Avoid unlikely case of the null string */
|
|
if (cur_pos == 0) {
|
|
test_string[cur_pos] = 'Q';
|
|
cur_pos++;
|
|
}
|
|
test_string[cur_pos] = '\0';
|
|
|
|
/* Create file */
|
|
fid = H5Fcreate(FILENAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT);
|
|
CHECK(fid, FAIL, "H5Fcreate");
|
|
|
|
test_fl_string(fid, test_string);
|
|
test_strpad(fid, "abcdefgh");
|
|
test_strpad(fid, test_string);
|
|
test_vl_string(fid, test_string);
|
|
test_objnames(fid, test_string);
|
|
test_attrname(fid, test_string);
|
|
test_compound(fid, test_string);
|
|
test_enum(fid, test_string);
|
|
test_opaque(fid, test_string);
|
|
|
|
/* Close file */
|
|
ret = H5Fclose(fid);
|
|
CHECK(ret, FAIL, "H5Fclose");
|
|
|
|
/* This function could be useful in debugging if certain strings
|
|
* create errors.
|
|
*/
|
|
#ifdef DEBUG
|
|
dump_string(test_string);
|
|
#endif /* DEBUG */
|
|
}
|
|
|
|
/* cleanup_unicode(void)
|
|
* Delete the file this test created.
|
|
*/
|
|
void
|
|
cleanup_unicode(void)
|
|
{
|
|
HDremove(FILENAME);
|
|
}
|