hdf5/tools/pdb2hdf.c
Robb Matzke bc520e88b4 [svn-r2262] * 2000-05-18
** src/H5Tconv.c
** src/H5Tpkg.h
** src/H5Tpublic.h
	The H5T_conv_struct_opt() function had a design flaw -- it
	didn't keep information about the stride to use to step
	through the temporary/background-value buffer and thus nested
	invocations would clobber each other's temp buffers.  This was
	fixed by splitting the `stride' argument into `buf_stride' and
	`bkg_stride' arguments for all the conversion functions. THIS
	IS AN API CHANGE, but users will get a compiler warning when
	they pass their conversion function pointer to H5Tregister().

** src/H5T.c
** src/H5Tprivate.h
	Added a bkg_stride argument to the H5T_convert() definition in
	order to fix a bug related to the optimized compound datatype
	conversion function.

** src/H5T.c
** src/H5A.c
** src/H5D.c
** src/H5Ofill.c
** src/H5P.c
	Added bkg_stride=0 argument to the H5T_convert() calls.

** test/dtypes.c
	Added a test for the H5T_conv_struct_opt() bug fixed above.

** src/H5FL.c
	The H5FL_term() function should return non-zero even when it
	couldn't free all the free lists do to their being used by
	some other package.  When that other package terminates it
	will return non-zero, causing H5FL_term() to be called
	again. This fixes some of the `infinite loop closing library'
	messages.

** tools/pdb2hdf
	Uses print_version() instead of doing that itself.

** src/H5Ppublic.h
	Renamed H5Pget_gc_reference() declaration to make it match the
	definition.

** src/H5FDlog.c
	Added API tracing macros.

	Removed `const' qualifier from a `char*' member of a struct
	which was allocated on the heap.

** src/H5TB.c
	Added curly braces to a couple deeply-nested `if' statements
	to make them clearer and to shut up the increadibly stupid and
	just plain incorrect gcc warning about ambiguous `else'.

** test/titerate.c
	Removed incomplete initialization in favor of memset() for one
	auto variable to stop compiler warnings.

** tools/Depencencies
	Regenerated to remove references to h5dumputil.c
2000-05-18 11:40:20 -05:00

504 lines
13 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright © 1999 NCSA
* All rights reserved.
*
* Programmer: Robb Matzke <matzke@llnl.gov>
* Tuesday, October 12, 1999
*
* Purpose: Creates an HDF5 file from a PDB file. The raw data can be
* left in the PDB file, creating an HDF5 file that contains
* meta data that points into the PDB file.
*/
#include <assert.h>
#include <hdf5.h>
#include <pdb.h>
#include <score.h>
#include <stdio.h>
#include <string.h>
/*
* libsilo renames all the PDB functions. However, this source files uses
* their documented names, so we have #define's to translate them to Silo
* terminology.
*/
#ifdef H5_HAVE_LIBSILO
# define PD_open lite_PD_open
# define PD_close lite_PD_close
# define PD_ls lite_PD_ls
# define PD_cd lite_PD_cd
# define PD_inquire_entry lite_PD_inquire_entry
# define PD_read lite_PD_read
# define _PD_fixname _lite_PD_fixname
# define _PD_rl_defstr _lite_PD_rl_defstr
# define SC_free lite_SC_free
#endif
static int verbose_g = 0; /*verbose output? */
static int cached_g = 0; /*use core file driver? */
/*-------------------------------------------------------------------------
* Function: usage
*
* Purpose: Print a usage message.
*
* Return: void
*
* Programmer: Robb Matzke
* Tuesday, October 12, 1999
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static void
usage(const char *arg0)
{
char *progname;
if ((progname=strrchr(arg0, '/')) && progname[1]) progname++;
else progname = arg0;
fprintf(stderr, "\
usage: %s [OPTIONS] [PDBFILE ...]\n\
OPTIONS\n\
-h, -?, --help Print a usage message and exit\n\
-c, --cached Cache all data in memory before writing the output\n\
-v, --verbose Print the name of each object processed\n\
-V, --version Show the version number of this program\n\
\n\
The options and PDB file names may be interspersed and are processed from\n\
left to right.\n\
\n\
The name of the HDF5 file is generated by taking the basename of the PDB\n\
file and replacing the last extension (or appending if no extension) with\n\
the characters \".h5\". For example, \"/tmp/test/eos.data\" would result\n\
in an HDF5 file called \"eos.h5\" in the current directory.\n",
progname);
}
/*-------------------------------------------------------------------------
* Function: version
*
* Purpose: Print the version number.
*
* Return: void
*
* Programmer: Robb Matzke
* Friday, October 15, 1999
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static void
version(const char *arg0)
{
const char *progname;
if ((progname=strrchr(arg0, '/')) && progname[1]) progname++;
else progname = arg0;
print_version(progname);
}
/*-------------------------------------------------------------------------
* Function: fix_name
*
* Purpose: Given a PDB file name create the corresponding HDF5 file
* name. This is done by taking the base name of the PDB file
* and replacing (or appending) the last extension with ".h5".
*
* Return: Success: HDF_NAME
*
* Failure: NULL
*
* Programmer: Robb Matzke
* Tuesday, October 12, 1999
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static char *
fix_name(const char *pdb_name, char *hdf_name, size_t size)
{
char *s;
const char *ext;
if (!pdb_name || !hdf_name) return NULL;
if ((s=strrchr(pdb_name, '/'))) pdb_name = s;
if (NULL==(ext=strrchr(pdb_name, '.'))) ext = pdb_name + strlen(pdb_name);
if ((size_t)((ext-pdb_name)+4) > size) return NULL; /*overflow*/
memcpy(hdf_name, pdb_name, ext-pdb_name);
strcpy(hdf_name+(ext-pdb_name), ".h5");
return hdf_name;
}
/*-------------------------------------------------------------------------
* Function: fix_type
*
* Purpose: Given a PDB datatype return a corresponding hdf5 datatype.
* The hdf5 datatype should be closed when the caller is
* finished using it.
*
* Return: Success: HDF5 datatype
*
* Failure: negative
*
* Programmer: Robb Matzke
* Tuesday, October 12, 1999
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static hid_t
fix_type(PDBfile *pdb, const char *s)
{
hid_t type = -1;
defstr *d = _lite_PD_lookup_type((char*)s, pdb->chart);
/* PDB checking */
assert(d);
assert(d->size>0);
if (d->onescmp) return -1;
if (!strcmp(s, "char")) {
/*
* Character datatypes. Use whatever sign the native system uses by
* default.
*/
type = H5Tcopy(H5T_NATIVE_CHAR);
} else if (!strcmp(s, "integer")) {
/*
* Integer datatypes. PDB supports various sizes of signed or
* unsigned integers.
*/
type = H5Tcopy(d->unsgned?H5T_NATIVE_UINT:H5T_NATIVE_INT);
H5Tset_size(type, d->size);
H5Tset_precision(type, 8*d->size);
assert(NORMAL_ORDER==d->order_flag || REVERSE_ORDER==d->order_flag);
H5Tset_order(type,
NORMAL_ORDER==d->order_flag?H5T_ORDER_BE:H5T_ORDER_LE);
} else if (!strcmp(s, "float") || !strcmp(s, "double")) {
/*
* Floating-point datatypes
*/
size_t nbits, spos, epos, esize, mpos, msize;
type = H5Tcopy(H5T_NATIVE_FLOAT);
H5Tset_size(type, d->size);
H5Tset_precision(type, 8*d->size);
assert(d->order);
H5Tset_order(type, 1==d->order[0]?H5T_ORDER_BE:H5T_ORDER_LE);
/*
* format[0] = # of bits per number
* format[1] = # of bits in exponent
* format[2] = # of bits in mantissa
* format[3] = start bit of sign
* format[4] = start bit of exponent
* format[5] = start bit of mantissa
* format[6] = high order mantissa bit (CRAY needs this)
* format[7] = bias of exponent
*/
assert(d->format && d->format[0] == 8*d->size);
nbits = d->format[0];
spos = nbits - (d->format[3]+1);
esize = d->format[1];
epos = nbits - (d->format[4]+esize);
msize = d->format[2];
mpos = nbits - (d->format[5]+msize);
H5Tset_fields(type, spos, epos, esize, mpos, msize);
H5Tset_ebias(type, d->format[7]);
}
return type;
}
/*-------------------------------------------------------------------------
* Function: fix_space
*
* Purpose: Convert a PDB dimension list into an HDF5 data space.
*
* Return: Success: HDF5 data space
*
* Failure: negative
*
* Programmer: Robb Matzke
* Tuesday, October 12, 1999
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static hid_t
fix_space(const dimdes *dim)
{
hsize_t size[H5S_MAX_RANK];
int rank;
for (rank=0; rank<H5S_MAX_RANK && dim; rank++, dim=dim->next) {
size[rank] = dim->number;
}
if (rank>=H5S_MAX_RANK) return -1;
return H5Screate_simple(rank, size, NULL);
}
/*-------------------------------------------------------------------------
* Function: fix_external
*
* Purpose: Sets the external file information for a dataset creation
* property list based on information from PDB.
*
* Return: Success: non-negative
*
* Failure: negative
*
* Programmer: Robb Matzke
* Tuesday, October 12, 1999
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static int
fix_external(hid_t dcpl, const char *pdb_file_name, long nelmts,
hsize_t elmt_size, symblock *block)
{
int i;
for (i=0; nelmts>0; i++) {
hsize_t nbytes = block[i].number * elmt_size;
H5Pset_external(dcpl, pdb_file_name, block[i].diskaddr, nbytes);
nelmts -= block[i].number;
}
return 0;
}
/*-------------------------------------------------------------------------
* Function: traverse
*
* Purpose: Traverse the current working directory of the PDB file.
*
* Return: Success: 0
*
* Failure: -1
*
* Programmer: Robb Matzke
* Tuesday, October 12, 1999
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static int
traverse(PDBfile *pdb, const char *pdb_file_name, hid_t hdf)
{
int nitems, i, in_subdir=FALSE;
char **list=NULL;
hid_t group=-1, h_type=-1, h_space=-1, dset=-1, dcpl=-1;
hsize_t elmt_size;
const syment *ep=NULL;
if (NULL==(list=PD_ls(pdb, ".", NULL, &nitems))) {
fprintf(stderr, "cannot obtain PDB directory contents\n");
goto error;
}
for (i=0; i<nitems; i++) {
ep = PD_inquire_entry(pdb, list[i], TRUE, NULL);
if (verbose_g) {
printf("%s %s\n", _PD_fixname(pdb, list[i]), ep->type);
fflush(stdout);
}
if ('/'==list[i][strlen(list[i])-1]) {
/*
* This is a PDB directory. Make a corresponding HDF5 group and
* traverse into that PDB directory and HDF5 group
*/
if ((group=H5Gcreate(hdf, list[i], 0))<0) {
fprintf(stderr, "cannot create HDF group %s\n", list[i]);
goto error;
}
if (!PD_cd(pdb, list[i])) {
fprintf(stderr, "cannot cd into PDB directory %s\n", list[i]);
goto error;
} else {
in_subdir = TRUE;
}
traverse(pdb, pdb_file_name, group);
if (!PD_cd(pdb, "..")) {
fprintf(stderr, "cannot traverse out of PDB %s\n", list[i]);
goto error;
}
H5Gclose(group);
} else {
/* This is some non-directory PDB object */
/* Create an HDF5 datatype from the PDB type */
if ((h_type=fix_type(pdb, ep->type))<0) {
fprintf(stderr, "cannot create datatype for %s (%s)\n",
list[i], ep->type);
continue;
}
elmt_size = H5Tget_size(h_type);
/* Create an HDF5 dataspace from the PDB dimensions */
if ((h_space=fix_space(ep->dimensions))<0) {
fprintf(stderr, "cannot create datatype for %s\n", list[i]);
continue;
}
/* Create pointers to the external PDB data */
dcpl = H5Pcreate(H5P_DATASET_CREATE);
fix_external(dcpl, pdb_file_name, ep->number, elmt_size,
ep->blocks);
/* Create the dataset */
if ((dset=H5Dcreate(hdf, list[i], h_type, h_space, dcpl))<0) {
fprintf(stderr, "cannot create dataset for %s\n", list[i]);
}
H5Pclose(dcpl);
H5Dclose(dset);
H5Sclose(h_space);
H5Tclose(h_type);
}
}
for (i=0; i<nitems; i++) {
SC_free(list[i]);
}
SC_free(list);
return 0;
error:
if (group>=0) H5Gclose(group);
if (in_subdir) PD_cd(pdb, "..");
if (list) {
for (i=0; i<nitems; i++) {
SC_free(list[i]);
}
SC_free(list);
}
return -1;
}
/*-------------------------------------------------------------------------
* Function: main
*
* Purpose: Create an HDF5 file from a PDB file.
*
* Return: Success: 0
*
* Failure: non-zero
*
* Programmer: Robb Matzke
* Tuesday, October 12, 1999
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
int
main(int argc, char *argv[])
{
int argno;
char _hdf_name[512], *hdf_name, *pdb_name, *s;
PDBfile *pdb;
hid_t hdf, fapl;
/* Print a help message if called with no arguments */
if (1==argc) {
usage(argv[0]);
exit(1);
}
/* Process arguments in order; switches interspersed with files */
for (argno=1; argno<argc; argno++) {
if (!strcmp("--help", argv[argno])) {
usage(argv[0]);
exit(1);
} else if (!strcmp("--verbose", argv[argno])) {
verbose_g++;
} else if (!strcmp("--cached", argv[argno])) {
cached_g++;
} else if (!strcmp("--version", argv[argno])) {
version(argv[0]);
} else if ('-'==argv[argno][0] && '-'!=argv[argno][1]) {
for (s=argv[argno]+1; *s; s++) {
switch (*s) {
case '?':
case 'h': /*--help*/
usage(argv[0]);
exit(0);
case 'c': /*--cached*/
cached_g++;
break;
case 'v': /*--verbose*/
verbose_g++;
break;
case 'V': /*--version*/
version(argv[0]);
break;
default:
usage(argv[0]);
exit(1);
}
}
} else if ('-'==argv[argno][0]) {
usage(argv[0]);
exit(1);
} else {
/* This must be a file name. Process it. */
fapl = H5Pcreate(H5P_FILE_ACCESS);
if (cached_g) H5Pset_fapl_core(fapl, 1024*1024, TRUE);
pdb_name = argv[argno];
hdf_name = fix_name(argv[argno], _hdf_name, sizeof _hdf_name);
if (NULL==(pdb=PD_open(pdb_name, "r"))) {
fprintf(stderr, "%s: unable to open PDB file\n", pdb_name);
exit(1);
}
if ((hdf=H5Fcreate(hdf_name, H5F_ACC_TRUNC, H5P_DEFAULT,
fapl))<0) {
fprintf(stderr, "%s: unable to open HDF file\n", hdf_name);
exit(1);
}
H5Pclose(fapl);
/*
* Traverse the PDB file to create the HDF5 file.
*/
traverse(pdb, pdb_name, hdf);
/* Close the files */
if (!PD_close(pdb)) {
fprintf(stderr, "%s: problems closing PDB file\n", pdb_name);
exit(1);
}
if (H5Fclose(hdf)<0) {
fprintf(stderr, "%s: problems closing HDF file\n", hdf_name);
exit(1);
}
}
}
return 0;
}