mirror of
https://github.com/HDFGroup/hdf5.git
synced 2024-11-27 02:10:55 +08:00
[svn-r14039]
New feature: implementation of h5import conversion of an ASCII plain-text file containing text data The string type H5T_C_S1 is used to define the data (the datum is defined here as one line of text in the text file). The size is set to variable length (H5T_VARIABLE) The space used is a 1D array with as many elements as there are lines in the ASCII file (a line is defined by the inclusion of an end of line character, ASCII number 10). A first traversal of the input text file must be made to determine the number of lines in the file and thus the dimensionality of the dataset. New test to the test script added text input files and teststr.h5 for h5dump to compare added Tested: windows, linux, solaris
This commit is contained in:
parent
e2477c8d0b
commit
6c95c46fcb
@ -254,8 +254,25 @@ gtoken(char *s)
|
||||
return (token);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: processDataFile
|
||||
*
|
||||
* Purpose: allocate memory and read data file
|
||||
*
|
||||
* Return: 0, success, -1, error
|
||||
*
|
||||
* Programmer: pkmat
|
||||
*
|
||||
* Modifications: pvn
|
||||
* 7/23/2007. Added support for STR type
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
static int
|
||||
processDataFile(char *infile, struct Input *in, FILE **strm)
|
||||
processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id)
|
||||
{
|
||||
const char *err1 = "Unable to open the input file %s for reading.\n";
|
||||
const char *err2 = "Error in allocating integer data storage.\n";
|
||||
@ -265,6 +282,7 @@ processDataFile(char *infile, struct Input *in, FILE **strm)
|
||||
const char *err6 = "Error in allocating unsigned integer data storage.\n";
|
||||
const char *err7 = "Error in reading unsigned integer data.\n";
|
||||
const char *err10 = "Unrecognized input class type.\n";
|
||||
const char *err11 = "Error in reading string data.\n";
|
||||
|
||||
if ((*strm = fopen(infile, "r")) == NULL)
|
||||
{
|
||||
@ -307,6 +325,15 @@ processDataFile(char *infile, struct Input *in, FILE **strm)
|
||||
break;
|
||||
|
||||
case 5: /* STR */
|
||||
|
||||
if (processStrData(strm, in, file_id) == -1)
|
||||
{
|
||||
(void) fprintf(stderr, err11, infile);
|
||||
return(-1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
break;
|
||||
|
||||
case 6: /* TEXTUIN */
|
||||
@ -755,6 +782,162 @@ readFloatData(FILE **strm, struct Input *in)
|
||||
return(0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* Function: processStrData
|
||||
*
|
||||
* Purpose: read an ASCII file with string data and generate an HDF5 dataset
|
||||
* with a variable length type
|
||||
*
|
||||
* Return: 0, ok, -1 no
|
||||
*
|
||||
* Programmer: Pedro Vicente, pvn@hdfgroup.org
|
||||
*
|
||||
* Date: July, 26, 2007
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
static int
|
||||
processStrData(FILE **strm, struct Input *in, hid_t file_id)
|
||||
{
|
||||
hid_t group_id, dset_id, space_id, mspace_id, type_id, handle;
|
||||
hsize_t dims[1];
|
||||
char str[1024];
|
||||
char c;
|
||||
int i = 0, j, nlines = 0, line;
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* get number of lines in the input file
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
while ( !feof( *strm ) )
|
||||
{
|
||||
c = fgetc( *strm );
|
||||
|
||||
if ( c == 10 ) /* eol */
|
||||
{
|
||||
nlines++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if ( !nlines )
|
||||
return 0;
|
||||
|
||||
/* number of records */
|
||||
dims[0] = nlines;
|
||||
|
||||
/* rewind */
|
||||
fseek(*strm,0L,0);
|
||||
|
||||
/*-------------------------------------------------------------------------
|
||||
* read file again and generate an HDF5 dataset
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
if (( type_id = H5Tcopy(H5T_C_S1)) < 0 )
|
||||
goto out;
|
||||
|
||||
if ( H5Tset_size (type_id,H5T_VARIABLE) < 0 )
|
||||
goto out;
|
||||
|
||||
/* disable error reporting */
|
||||
H5E_BEGIN_TRY
|
||||
{
|
||||
|
||||
/* create parent groups */
|
||||
if (in->path.count > 1)
|
||||
{
|
||||
j = 0;
|
||||
handle = file_id;
|
||||
while (j<in->path.count-1)
|
||||
{
|
||||
if ((group_id = H5Gopen(handle, in->path.group[j])) < 0)
|
||||
{
|
||||
group_id = H5Gcreate(handle, in->path.group[j++], 0);
|
||||
for (; j<in->path.count-1; j++)
|
||||
group_id = H5Gcreate(group_id, in->path.group[j], 0);
|
||||
handle = group_id;
|
||||
break;
|
||||
}
|
||||
handle = group_id;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
handle = file_id;
|
||||
j=0;
|
||||
}
|
||||
|
||||
/*enable error reporting */
|
||||
} H5E_END_TRY;
|
||||
|
||||
if (( space_id = H5Screate_simple(1,dims,NULL)) < 0 )
|
||||
goto out;
|
||||
|
||||
if (( mspace_id = H5Screate(H5S_SCALAR)) < 0 )
|
||||
goto out;
|
||||
|
||||
if (( dset_id = H5Dcreate(handle, in->path.group[j], type_id, space_id, H5P_DEFAULT)) < 0)
|
||||
goto out;
|
||||
|
||||
line = 0;
|
||||
|
||||
while ( !feof( *strm ) )
|
||||
{
|
||||
c = fgetc( *strm );
|
||||
|
||||
str[ i ] = c;
|
||||
|
||||
i++;
|
||||
|
||||
if ( c == 10 ) /* eol */
|
||||
{
|
||||
char *str2 = str;
|
||||
hid_t fspace_id;
|
||||
hsize_t start[1];
|
||||
hsize_t count[1] = { 1 };
|
||||
|
||||
str[ i-1 ] = '\0'; /* terminate string */
|
||||
|
||||
if (( fspace_id = H5Dget_space (dset_id)) < 0 )
|
||||
goto out;
|
||||
|
||||
start[0] = line ++ ;
|
||||
|
||||
if ( H5Sselect_hyperslab(fspace_id,H5S_SELECT_SET,start,NULL,count,NULL) < 0 )
|
||||
goto out;
|
||||
|
||||
if ( H5Dwrite(dset_id,type_id,mspace_id,fspace_id,H5P_DEFAULT, &str2 ) < 0 )
|
||||
goto out;
|
||||
|
||||
if ( H5Sclose(fspace_id) < 0 )
|
||||
goto out;
|
||||
|
||||
i = 0;
|
||||
str[ 0 ] = '\0';
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* close */
|
||||
H5Dclose(dset_id);
|
||||
H5Sclose(space_id);
|
||||
H5Sclose(mspace_id);
|
||||
H5Tclose(type_id);
|
||||
|
||||
return(0);
|
||||
|
||||
out:
|
||||
|
||||
return (-1);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
allocateIntegerStorage(struct Input *in)
|
||||
{
|
||||
@ -1258,6 +1441,10 @@ validateConfigurationParameters(struct Input * in)
|
||||
const char *err6 = "No support for reading 64-bit integer (INPUT-CLASS: IN, TEXTIN, UIN, TEXTUIN files\n";
|
||||
#endif
|
||||
|
||||
/* for class STR other parameters are ignored */
|
||||
if (in->inputClass == 5) /* STR */
|
||||
return (0);
|
||||
|
||||
if (
|
||||
(in->configOptionVector[DIM] != 1) ||
|
||||
(in->configOptionVector[RANK] != 1))
|
||||
@ -2242,12 +2429,15 @@ process(struct Options *opt)
|
||||
}
|
||||
}
|
||||
|
||||
if (processDataFile(opt->infiles[k].datafile, in, &strm) == -1)
|
||||
if (processDataFile(opt->infiles[k].datafile, in, &strm, file_id ) == -1)
|
||||
{
|
||||
(void) fprintf(stderr, err3, opt->infiles[k].datafile);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (in->inputClass != 5) /* STR */
|
||||
{
|
||||
|
||||
for (j=0; j<in->rank;j++)
|
||||
numOfElements *= in->sizeOfDimension[j];
|
||||
|
||||
@ -2355,6 +2545,10 @@ process(struct Options *opt)
|
||||
H5Pclose(proplist);
|
||||
H5Sclose(dataspace);
|
||||
}
|
||||
|
||||
} /* STR */
|
||||
|
||||
|
||||
H5Fclose(file_id);
|
||||
return (0);
|
||||
}
|
||||
|
@ -214,7 +214,7 @@ static int CompressionTypeStrToInt(char *temp);
|
||||
static int getCompressionParameter(struct Input *in, FILE** strm);
|
||||
static int getExternalFilename(struct Input *in, FILE** strm);
|
||||
static int getMaximumDimensionSizes(struct Input *in, FILE **strm);
|
||||
static int processDataFile(char *infile, struct Input *in, FILE **strm);
|
||||
static int processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id);
|
||||
static int readIntegerData(FILE **strm, struct Input *in);
|
||||
static int readFloatData(FILE **strm, struct Input *in);
|
||||
static int allocateIntegerStorage(struct Input *in);
|
||||
@ -224,6 +224,7 @@ hid_t createInputDataType(struct Input *in);
|
||||
static int readUIntegerData(FILE **strm, struct Input *in);
|
||||
static int allocateUIntegerStorage(struct Input *in);
|
||||
static int validateConfigurationParameters(struct Input * in);
|
||||
static int processStrData(FILE **strm, struct Input *in, hid_t file_id);
|
||||
|
||||
#endif /* H5IMPORT_H__ */
|
||||
|
||||
|
@ -98,6 +98,9 @@ TOOLTEST buin16 -c $srcdir/testfiles/conbuin16 -o test12.h5
|
||||
TESTING "BINARY UI32 - rank 3 - Output LE + CHUNKED "
|
||||
TOOLTEST buin32 -c $srcdir/testfiles/conbuin32 -o test13.h5
|
||||
|
||||
TESTING "STR"
|
||||
TOOLTEST $srcdir/testfiles/txtstr -c $srcdir/testfiles/textstr -o teststr.h5
|
||||
|
||||
rm -f tx* b* *.dat
|
||||
rm -f test*.h5
|
||||
rm -rf tmp_testfiles
|
||||
|
BIN
tools/h5import/testfiles/teststr.h5
Normal file
BIN
tools/h5import/testfiles/teststr.h5
Normal file
Binary file not shown.
6
tools/h5import/testfiles/textstr
Normal file
6
tools/h5import/testfiles/textstr
Normal file
@ -0,0 +1,6 @@
|
||||
PATH /mytext/data
|
||||
INPUT-CLASS STR
|
||||
|
||||
|
||||
|
||||
|
2
tools/h5import/testfiles/txtstr
Normal file
2
tools/h5import/testfiles/txtstr
Normal file
@ -0,0 +1,2 @@
|
||||
hello world
|
||||
hello world again
|
Loading…
Reference in New Issue
Block a user