[svn-r14039]

New feature: implementation of h5import conversion of an ASCII plain-text file containing text data

The string type H5T_C_S1 is used to define the data (the datum is defined here as one line of text in the text file). The size is set to variable length (H5T_VARIABLE) 

The space used is a 1D array with as many elements as there are lines in the ASCII file (a line is defined by the inclusion of an end of line character, ASCII number 10). A first traversal of the input text file must be made to determine the number of lines in the file and thus the dimensionality of the dataset.

New test to the test script added
text input files and teststr.h5 for h5dump to compare added

Tested: windows, linux, solaris
This commit is contained in:
Pedro Vicente Nunes 2007-08-07 11:19:11 -05:00
parent e2477c8d0b
commit 6c95c46fcb
6 changed files with 209 additions and 3 deletions

View File

@ -254,8 +254,25 @@ gtoken(char *s)
return (token); return (token);
} }
/*-------------------------------------------------------------------------
* Function: processDataFile
*
* Purpose: allocate memory and read data file
*
* Return: 0, success, -1, error
*
* Programmer: pkmat
*
* Modifications: pvn
* 7/23/2007. Added support for STR type
*
*-------------------------------------------------------------------------
*/
static int static int
processDataFile(char *infile, struct Input *in, FILE **strm) processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id)
{ {
const char *err1 = "Unable to open the input file %s for reading.\n"; const char *err1 = "Unable to open the input file %s for reading.\n";
const char *err2 = "Error in allocating integer data storage.\n"; const char *err2 = "Error in allocating integer data storage.\n";
@ -265,6 +282,7 @@ processDataFile(char *infile, struct Input *in, FILE **strm)
const char *err6 = "Error in allocating unsigned integer data storage.\n"; const char *err6 = "Error in allocating unsigned integer data storage.\n";
const char *err7 = "Error in reading unsigned integer data.\n"; const char *err7 = "Error in reading unsigned integer data.\n";
const char *err10 = "Unrecognized input class type.\n"; const char *err10 = "Unrecognized input class type.\n";
const char *err11 = "Error in reading string data.\n";
if ((*strm = fopen(infile, "r")) == NULL) if ((*strm = fopen(infile, "r")) == NULL)
{ {
@ -307,6 +325,15 @@ processDataFile(char *infile, struct Input *in, FILE **strm)
break; break;
case 5: /* STR */ case 5: /* STR */
if (processStrData(strm, in, file_id) == -1)
{
(void) fprintf(stderr, err11, infile);
return(-1);
}
break; break;
case 6: /* TEXTUIN */ case 6: /* TEXTUIN */
@ -755,6 +782,162 @@ readFloatData(FILE **strm, struct Input *in)
return(0); return(0);
} }
/*-------------------------------------------------------------------------
* Function: processStrData
*
* Purpose: read an ASCII file with string data and generate an HDF5 dataset
* with a variable length type
*
* Return: 0, ok, -1 no
*
* Programmer: Pedro Vicente, pvn@hdfgroup.org
*
* Date: July, 26, 2007
*
*-------------------------------------------------------------------------
*/
static int
processStrData(FILE **strm, struct Input *in, hid_t file_id)
{
hid_t group_id, dset_id, space_id, mspace_id, type_id, handle;
hsize_t dims[1];
char str[1024];
char c;
int i = 0, j, nlines = 0, line;
/*-------------------------------------------------------------------------
* get number of lines in the input file
*-------------------------------------------------------------------------
*/
while ( !feof( *strm ) )
{
c = fgetc( *strm );
if ( c == 10 ) /* eol */
{
nlines++;
}
}
if ( !nlines )
return 0;
/* number of records */
dims[0] = nlines;
/* rewind */
fseek(*strm,0L,0);
/*-------------------------------------------------------------------------
* read file again and generate an HDF5 dataset
*-------------------------------------------------------------------------
*/
if (( type_id = H5Tcopy(H5T_C_S1)) < 0 )
goto out;
if ( H5Tset_size (type_id,H5T_VARIABLE) < 0 )
goto out;
/* disable error reporting */
H5E_BEGIN_TRY
{
/* create parent groups */
if (in->path.count > 1)
{
j = 0;
handle = file_id;
while (j<in->path.count-1)
{
if ((group_id = H5Gopen(handle, in->path.group[j])) < 0)
{
group_id = H5Gcreate(handle, in->path.group[j++], 0);
for (; j<in->path.count-1; j++)
group_id = H5Gcreate(group_id, in->path.group[j], 0);
handle = group_id;
break;
}
handle = group_id;
j++;
}
}
else
{
handle = file_id;
j=0;
}
/*enable error reporting */
} H5E_END_TRY;
if (( space_id = H5Screate_simple(1,dims,NULL)) < 0 )
goto out;
if (( mspace_id = H5Screate(H5S_SCALAR)) < 0 )
goto out;
if (( dset_id = H5Dcreate(handle, in->path.group[j], type_id, space_id, H5P_DEFAULT)) < 0)
goto out;
line = 0;
while ( !feof( *strm ) )
{
c = fgetc( *strm );
str[ i ] = c;
i++;
if ( c == 10 ) /* eol */
{
char *str2 = str;
hid_t fspace_id;
hsize_t start[1];
hsize_t count[1] = { 1 };
str[ i-1 ] = '\0'; /* terminate string */
if (( fspace_id = H5Dget_space (dset_id)) < 0 )
goto out;
start[0] = line ++ ;
if ( H5Sselect_hyperslab(fspace_id,H5S_SELECT_SET,start,NULL,count,NULL) < 0 )
goto out;
if ( H5Dwrite(dset_id,type_id,mspace_id,fspace_id,H5P_DEFAULT, &str2 ) < 0 )
goto out;
if ( H5Sclose(fspace_id) < 0 )
goto out;
i = 0;
str[ 0 ] = '\0';
}
}
/* close */
H5Dclose(dset_id);
H5Sclose(space_id);
H5Sclose(mspace_id);
H5Tclose(type_id);
return(0);
out:
return (-1);
}
static int static int
allocateIntegerStorage(struct Input *in) allocateIntegerStorage(struct Input *in)
{ {
@ -1258,6 +1441,10 @@ validateConfigurationParameters(struct Input * in)
const char *err6 = "No support for reading 64-bit integer (INPUT-CLASS: IN, TEXTIN, UIN, TEXTUIN files\n"; const char *err6 = "No support for reading 64-bit integer (INPUT-CLASS: IN, TEXTIN, UIN, TEXTUIN files\n";
#endif #endif
/* for class STR other parameters are ignored */
if (in->inputClass == 5) /* STR */
return (0);
if ( if (
(in->configOptionVector[DIM] != 1) || (in->configOptionVector[DIM] != 1) ||
(in->configOptionVector[RANK] != 1)) (in->configOptionVector[RANK] != 1))
@ -2242,12 +2429,15 @@ process(struct Options *opt)
} }
} }
if (processDataFile(opt->infiles[k].datafile, in, &strm) == -1) if (processDataFile(opt->infiles[k].datafile, in, &strm, file_id ) == -1)
{ {
(void) fprintf(stderr, err3, opt->infiles[k].datafile); (void) fprintf(stderr, err3, opt->infiles[k].datafile);
return (-1); return (-1);
} }
if (in->inputClass != 5) /* STR */
{
for (j=0; j<in->rank;j++) for (j=0; j<in->rank;j++)
numOfElements *= in->sizeOfDimension[j]; numOfElements *= in->sizeOfDimension[j];
@ -2355,6 +2545,10 @@ process(struct Options *opt)
H5Pclose(proplist); H5Pclose(proplist);
H5Sclose(dataspace); H5Sclose(dataspace);
} }
} /* STR */
H5Fclose(file_id); H5Fclose(file_id);
return (0); return (0);
} }

View File

@ -214,7 +214,7 @@ static int CompressionTypeStrToInt(char *temp);
static int getCompressionParameter(struct Input *in, FILE** strm); static int getCompressionParameter(struct Input *in, FILE** strm);
static int getExternalFilename(struct Input *in, FILE** strm); static int getExternalFilename(struct Input *in, FILE** strm);
static int getMaximumDimensionSizes(struct Input *in, FILE **strm); static int getMaximumDimensionSizes(struct Input *in, FILE **strm);
static int processDataFile(char *infile, struct Input *in, FILE **strm); static int processDataFile(char *infile, struct Input *in, FILE **strm, hid_t file_id);
static int readIntegerData(FILE **strm, struct Input *in); static int readIntegerData(FILE **strm, struct Input *in);
static int readFloatData(FILE **strm, struct Input *in); static int readFloatData(FILE **strm, struct Input *in);
static int allocateIntegerStorage(struct Input *in); static int allocateIntegerStorage(struct Input *in);
@ -224,6 +224,7 @@ hid_t createInputDataType(struct Input *in);
static int readUIntegerData(FILE **strm, struct Input *in); static int readUIntegerData(FILE **strm, struct Input *in);
static int allocateUIntegerStorage(struct Input *in); static int allocateUIntegerStorage(struct Input *in);
static int validateConfigurationParameters(struct Input * in); static int validateConfigurationParameters(struct Input * in);
static int processStrData(FILE **strm, struct Input *in, hid_t file_id);
#endif /* H5IMPORT_H__ */ #endif /* H5IMPORT_H__ */

View File

@ -98,6 +98,9 @@ TOOLTEST buin16 -c $srcdir/testfiles/conbuin16 -o test12.h5
TESTING "BINARY UI32 - rank 3 - Output LE + CHUNKED " TESTING "BINARY UI32 - rank 3 - Output LE + CHUNKED "
TOOLTEST buin32 -c $srcdir/testfiles/conbuin32 -o test13.h5 TOOLTEST buin32 -c $srcdir/testfiles/conbuin32 -o test13.h5
TESTING "STR"
TOOLTEST $srcdir/testfiles/txtstr -c $srcdir/testfiles/textstr -o teststr.h5
rm -f tx* b* *.dat rm -f tx* b* *.dat
rm -f test*.h5 rm -f test*.h5
rm -rf tmp_testfiles rm -rf tmp_testfiles

Binary file not shown.

View File

@ -0,0 +1,6 @@
PATH /mytext/data
INPUT-CLASS STR

View File

@ -0,0 +1,2 @@
hello world
hello world again