1998-05-14 01:58:24 +08:00
|
|
|
|
/*
|
|
|
|
|
* Copyright (C) 1998 NCSA
|
|
|
|
|
* All rights reserved.
|
|
|
|
|
*
|
|
|
|
|
* Programmer: Robb Matzke <matzke@llnl.gov>
|
|
|
|
|
* Wednesday, May 13, 1998
|
|
|
|
|
*
|
|
|
|
|
* Purpose: Repartitions a file family. This program can be used to
|
|
|
|
|
* split a single file into a family of files, join a family of
|
|
|
|
|
* files into a single file, or copy one family to another while
|
|
|
|
|
* changing the size of the family members. It can also be used
|
|
|
|
|
* to copy a single file to a single file with holes.
|
|
|
|
|
*/
|
1998-11-12 00:33:33 +08:00
|
|
|
|
|
|
|
|
|
/* See H5private.h for how to include system headers */
|
1998-11-20 02:52:56 +08:00
|
|
|
|
#include <hdf5.h>
|
1999-11-17 03:08:14 +08:00
|
|
|
|
#ifdef H5_STDC_HEADERS
|
1998-11-12 00:33:33 +08:00
|
|
|
|
# include <ctype.h>
|
|
|
|
|
# include <errno.h>
|
|
|
|
|
# include <fcntl.h>
|
|
|
|
|
# include <stdio.h>
|
|
|
|
|
# include <stdlib.h>
|
|
|
|
|
# include <string.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
1999-11-17 03:08:14 +08:00
|
|
|
|
#ifdef H5_HAVE_UNISTD_H
|
1998-11-12 00:33:33 +08:00
|
|
|
|
# include <sys/types.h>
|
|
|
|
|
# include <unistd.h>
|
1998-11-11 02:18:31 +08:00
|
|
|
|
#endif
|
1998-11-12 00:33:33 +08:00
|
|
|
|
|
1999-11-17 03:08:14 +08:00
|
|
|
|
#ifdef H5_HAVE_SYS_STAT_H
|
1998-11-12 00:33:33 +08:00
|
|
|
|
# include <sys/stat.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
1999-09-29 08:31:07 +08:00
|
|
|
|
#ifndef FALSE
|
1998-05-14 01:58:24 +08:00
|
|
|
|
#define FALSE 0
|
1999-09-29 08:31:07 +08:00
|
|
|
|
#endif
|
|
|
|
|
#ifndef TRUE
|
1998-05-14 01:58:24 +08:00
|
|
|
|
#define TRUE 1
|
1999-09-29 08:31:07 +08:00
|
|
|
|
#endif
|
1998-05-14 01:58:24 +08:00
|
|
|
|
#define NAMELEN 4096
|
|
|
|
|
#define GB *1024*1024*1024
|
|
|
|
|
|
1999-09-29 08:31:07 +08:00
|
|
|
|
#ifndef MIN
|
1998-05-14 01:58:24 +08:00
|
|
|
|
#define MIN(X,Y) ((X)<(Y)?(X):(Y))
|
1999-09-29 08:31:07 +08:00
|
|
|
|
#endif
|
|
|
|
|
#ifndef MIN3
|
1998-05-14 01:58:24 +08:00
|
|
|
|
#define MIN3(X,Y,Z) MIN(MIN(X,Y),Z)
|
1999-09-29 08:31:07 +08:00
|
|
|
|
#endif
|
1998-05-14 01:58:24 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
* Function: usage
|
|
|
|
|
*
|
|
|
|
|
* Purpose: Prints a usage message.
|
|
|
|
|
*
|
|
|
|
|
* Return: void
|
|
|
|
|
*
|
|
|
|
|
* Programmer: Robb Matzke
|
|
|
|
|
* Wednesday, May 13, 1998
|
|
|
|
|
*
|
|
|
|
|
* Modifications:
|
|
|
|
|
*
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
static void
|
|
|
|
|
usage (const char *progname)
|
|
|
|
|
{
|
1998-07-21 00:05:43 +08:00
|
|
|
|
fprintf(stderr, "usage: %s [-v] [-V] [-[b|m] N[g|m|k]] SRC DST\n",
|
|
|
|
|
progname);
|
|
|
|
|
fprintf(stderr, " -v Produce verbose output\n");
|
|
|
|
|
fprintf(stderr, " -V Print a version number and exit\n");
|
|
|
|
|
fprintf(stderr, " -b N The I/O block size, defaults to 1kB\n");
|
|
|
|
|
fprintf(stderr, " -m N The destination member size or 1GB\n");
|
|
|
|
|
fprintf(stderr, " SRC The name of the source file\n");
|
|
|
|
|
fprintf(stderr, " DST The name of the destination files\n");
|
|
|
|
|
fprintf(stderr, "Sizes may be suffixed with `g' for GB, `m' for MB or "
|
|
|
|
|
"`k' for kB.\n");
|
|
|
|
|
fprintf(stderr, "File family names include an integer printf "
|
|
|
|
|
"format such as `%%d'\n");
|
1998-05-14 01:58:24 +08:00
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
* Function: get_size
|
|
|
|
|
*
|
|
|
|
|
* Purpose: Reads a size option of the form `-XNS' where `X' is any
|
|
|
|
|
* letter, `N' is a multi-character positive decimal number, and
|
|
|
|
|
* `S' is an optional suffix letter in the set [GgMmk]. The
|
|
|
|
|
* option may also be split among two arguments as: `-X NS'.
|
|
|
|
|
* The input value of ARGNO is the argument number for the
|
|
|
|
|
* switch in the ARGV vector and ARGC is the number of entries
|
|
|
|
|
* in that vector.
|
|
|
|
|
*
|
|
|
|
|
* Return: Success: The value N multiplied according to the
|
|
|
|
|
* suffix S. On return ARGNO will be the number
|
|
|
|
|
* of the next argument to process.
|
|
|
|
|
*
|
|
|
|
|
* Failure: Calls usage() which exits with a non-zero
|
|
|
|
|
* status.
|
|
|
|
|
*
|
|
|
|
|
* Programmer: Robb Matzke
|
|
|
|
|
* Wednesday, May 13, 1998
|
|
|
|
|
*
|
|
|
|
|
* Modifications:
|
|
|
|
|
*
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
static off_t
|
|
|
|
|
get_size (const char *progname, int *argno, int argc, char *argv[])
|
|
|
|
|
{
|
1998-09-03 21:49:48 +08:00
|
|
|
|
off_t retval=-1;
|
1998-05-14 01:58:24 +08:00
|
|
|
|
char *suffix;
|
|
|
|
|
|
1999-09-28 00:09:41 +08:00
|
|
|
|
if (isdigit ((int)(argv[*argno][2]))) {
|
1998-05-14 01:58:24 +08:00
|
|
|
|
retval = strtol (argv[*argno]+2, &suffix, 10);
|
|
|
|
|
(*argno)++;
|
|
|
|
|
} else if (argv[*argno][2] || *argno+1>=argc) {
|
|
|
|
|
usage (progname);
|
|
|
|
|
} else {
|
|
|
|
|
retval = strtol (argv[*argno+1], &suffix, 0);
|
|
|
|
|
if (suffix==argv[*argno+1]) usage (progname);
|
|
|
|
|
*argno += 2;
|
|
|
|
|
}
|
|
|
|
|
if (suffix && suffix[0] && !suffix[1]) {
|
|
|
|
|
switch (*suffix) {
|
|
|
|
|
case 'G':
|
|
|
|
|
case 'g':
|
|
|
|
|
retval *= 1024 * 1024 * 1024;
|
|
|
|
|
break;
|
|
|
|
|
case 'M':
|
|
|
|
|
case 'm':
|
|
|
|
|
retval *= 1024 * 1024;
|
|
|
|
|
break;
|
|
|
|
|
case 'k':
|
|
|
|
|
retval *= 1024;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
usage (progname);
|
|
|
|
|
}
|
|
|
|
|
} else if (suffix && suffix[0]) {
|
|
|
|
|
usage (progname);
|
|
|
|
|
}
|
|
|
|
|
return retval;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*-------------------------------------------------------------------------
|
|
|
|
|
* Function: main
|
|
|
|
|
*
|
|
|
|
|
* Purpose: Split an hdf5 file
|
|
|
|
|
*
|
|
|
|
|
* Return: Success:
|
|
|
|
|
*
|
|
|
|
|
* Failure:
|
|
|
|
|
*
|
|
|
|
|
* Programmer: Robb Matzke
|
|
|
|
|
* Wednesday, May 13, 1998
|
|
|
|
|
*
|
|
|
|
|
* Modifications:
|
|
|
|
|
*
|
|
|
|
|
*-------------------------------------------------------------------------
|
|
|
|
|
*/
|
|
|
|
|
int
|
|
|
|
|
main (int argc, char *argv[])
|
|
|
|
|
{
|
|
|
|
|
const char *prog_name; /*program name */
|
|
|
|
|
size_t blk_size=1024; /*size of each I/O block */
|
|
|
|
|
char *buf=NULL; /*I/O block buffer */
|
|
|
|
|
size_t n, i; /*counters */
|
|
|
|
|
ssize_t nio; /*I/O return value */
|
|
|
|
|
int argno=1; /*program argument number */
|
|
|
|
|
int src, dst=-1; /*source & destination files */
|
|
|
|
|
int need_seek=FALSE; /*destination needs to seek? */
|
|
|
|
|
int need_write; /*data needs to be written? */
|
|
|
|
|
struct stat sb; /*temporary file stat buffer */
|
|
|
|
|
int verbose=FALSE; /*display file names? */
|
|
|
|
|
size_t left_overs=0; /*amount of zeros left over */
|
|
|
|
|
|
|
|
|
|
const char *src_gen_name; /*general source name */
|
|
|
|
|
char src_name[NAMELEN]; /*source member name */
|
|
|
|
|
off_t src_offset=0; /*offset in source member */
|
|
|
|
|
int src_is_family; /*is source name a family name? */
|
|
|
|
|
int src_membno=0; /*source member number */
|
|
|
|
|
off_t src_size; /*source logical member size */
|
|
|
|
|
off_t src_act_size; /*source actual member size */
|
|
|
|
|
|
|
|
|
|
const char *dst_gen_name; /*general destination name */
|
|
|
|
|
char dst_name[NAMELEN]; /*destination member name */
|
|
|
|
|
off_t dst_offset=0; /*offset in destination member */
|
|
|
|
|
int dst_is_family; /*is dst name a family name? */
|
|
|
|
|
int dst_membno=0; /*destination member number */
|
|
|
|
|
off_t dst_size=1 GB; /*destination logical memb size */
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Get the program name from argv[0]. Use only the last component.
|
|
|
|
|
*/
|
|
|
|
|
if ((prog_name=strrchr (argv[0], '/'))) prog_name++;
|
|
|
|
|
else prog_name = argv[0];
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Parse switches.
|
|
|
|
|
*/
|
|
|
|
|
while (argno<argc && '-'==argv[argno][0]) {
|
|
|
|
|
if (!strcmp (argv[argno], "-v")) {
|
|
|
|
|
verbose = TRUE;
|
|
|
|
|
argno++;
|
1998-07-21 00:05:43 +08:00
|
|
|
|
} else if (!strcmp(argv[argno], "-V")) {
|
|
|
|
|
printf("This is %s version %u.%u release %u\n",
|
|
|
|
|
prog_name, H5_VERS_MAJOR, H5_VERS_MINOR, H5_VERS_RELEASE);
|
|
|
|
|
exit(0);
|
1998-05-14 01:58:24 +08:00
|
|
|
|
} else if ('b'==argv[argno][1]) {
|
|
|
|
|
blk_size = get_size (prog_name, &argno, argc, argv);
|
|
|
|
|
} else if ('m'==argv[argno][1]) {
|
|
|
|
|
dst_size = get_size (prog_name, &argno, argc, argv);
|
|
|
|
|
} else {
|
|
|
|
|
usage (prog_name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Get the name for the source file and open the first member. The size
|
|
|
|
|
* of the first member determines the logical size of all the members.
|
|
|
|
|
*/
|
|
|
|
|
if (argno>=argc) usage (prog_name);
|
|
|
|
|
src_gen_name = argv[argno++];
|
|
|
|
|
sprintf (src_name, src_gen_name, src_membno);
|
|
|
|
|
src_is_family = strcmp (src_name, src_gen_name);
|
|
|
|
|
if ((src=open (src_name, O_RDONLY))<0) {
|
|
|
|
|
perror (src_name);
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (fstat (src, &sb)<0) {
|
|
|
|
|
perror ("fstat");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
src_size = src_act_size = sb.st_size;
|
|
|
|
|
if (verbose) fprintf (stderr, "< %s\n", src_name);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Get the name for the destination file and open the first member.
|
|
|
|
|
*/
|
|
|
|
|
if (argno>=argc) usage (prog_name);
|
|
|
|
|
dst_gen_name = argv[argno++];
|
|
|
|
|
sprintf (dst_name, dst_gen_name, dst_membno);
|
|
|
|
|
dst_is_family = strcmp (dst_name, dst_gen_name);
|
|
|
|
|
if ((dst=open (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) {
|
|
|
|
|
perror (dst_name);
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (verbose) fprintf (stderr, "> %s\n", dst_name);
|
|
|
|
|
|
|
|
|
|
/* No more arguments */
|
|
|
|
|
if (argno<argc) usage (prog_name);
|
|
|
|
|
|
|
|
|
|
/* Now the real work, split the file */
|
|
|
|
|
buf = malloc (blk_size);
|
|
|
|
|
while (src_offset<src_size) {
|
|
|
|
|
|
|
|
|
|
/* Read a block. The amount to read is the minimum of:
|
|
|
|
|
* 1. The I/O block size
|
|
|
|
|
* 2. What's left to write in the destination member
|
|
|
|
|
* 3. Left over zeros or what's left in the source member.
|
|
|
|
|
*/
|
|
|
|
|
n = blk_size;
|
|
|
|
|
if (dst_is_family) n = (size_t)MIN((off_t)n, dst_size-dst_offset);
|
|
|
|
|
if (left_overs) {
|
|
|
|
|
n = MIN (n, left_overs);
|
|
|
|
|
left_overs -= n;
|
|
|
|
|
need_write = FALSE;
|
|
|
|
|
} else if (src_offset<src_act_size) {
|
|
|
|
|
n = (size_t)MIN ((off_t)n, src_act_size-src_offset);
|
|
|
|
|
if ((nio=read (src, buf, n))<0) {
|
|
|
|
|
perror ("read");
|
|
|
|
|
exit (1);
|
|
|
|
|
} else if ((size_t)nio!=n) {
|
|
|
|
|
fprintf (stderr, "%s: short read\n", src_name);
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
for (i=0; i<n; i++) {
|
|
|
|
|
if (buf[i]) break;
|
|
|
|
|
}
|
|
|
|
|
need_write = (i<n);
|
|
|
|
|
} else {
|
|
|
|
|
n = 0;
|
|
|
|
|
left_overs = src_size - src_act_size;
|
|
|
|
|
need_write = FALSE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If the block contains non-zero data then write it to the
|
|
|
|
|
* destination, otherwise just remember that we'll have to do a seek
|
|
|
|
|
* later in the destination when we finally get non-zero data.
|
|
|
|
|
*/
|
|
|
|
|
if (need_write) {
|
|
|
|
|
if (need_seek && lseek (dst, dst_offset, SEEK_SET)<0) {
|
|
|
|
|
perror ("lseek");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if ((nio=write (dst, buf, n))<0) {
|
|
|
|
|
perror ("write");
|
|
|
|
|
exit (1);
|
|
|
|
|
} else if ((size_t)nio!=n) {
|
|
|
|
|
fprintf (stderr, "%s: short write\n", dst_name);
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
need_seek = FALSE;
|
|
|
|
|
} else {
|
|
|
|
|
need_seek = TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Update the source offset and open the next source family member if
|
|
|
|
|
* necessary. The source stream ends at the first member which
|
|
|
|
|
* cannot be opened because it doesn't exist. At the end of the
|
|
|
|
|
* source stream, update the destination offset and break out of the
|
|
|
|
|
* loop. The destination offset must be updated so we can fix
|
|
|
|
|
* trailing holes.
|
|
|
|
|
*/
|
|
|
|
|
src_offset += n;
|
|
|
|
|
if (src_offset==src_act_size) {
|
|
|
|
|
close (src);
|
|
|
|
|
if (!src_is_family) {
|
|
|
|
|
dst_offset += n;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
sprintf (src_name, src_gen_name, ++src_membno);
|
|
|
|
|
if ((src=open (src_name, O_RDONLY))<0 && ENOENT==errno) {
|
|
|
|
|
dst_offset += n;
|
|
|
|
|
break;
|
|
|
|
|
} else if (src<0) {
|
|
|
|
|
perror (src_name);
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (fstat (src, &sb)<0) {
|
|
|
|
|
perror ("fstat");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
src_act_size = sb.st_size;
|
|
|
|
|
if (src_act_size>src_size) {
|
|
|
|
|
fprintf (stderr, "%s: member truncated to %lu bytes\n",
|
|
|
|
|
src_name, (unsigned long)src_size);
|
|
|
|
|
}
|
|
|
|
|
src_offset = 0;
|
|
|
|
|
if (verbose) fprintf (stderr, "< %s\n", src_name);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Update the destination offset, opening a new member if one will be
|
|
|
|
|
* needed. The first member is extended to the logical member size
|
|
|
|
|
* but other members might be smaller if they end with a hole.
|
|
|
|
|
*/
|
|
|
|
|
dst_offset += n;
|
|
|
|
|
if (dst_is_family && dst_offset==dst_size) {
|
|
|
|
|
if (0==dst_membno) {
|
|
|
|
|
if (lseek (dst, dst_size-1, SEEK_SET)<0) {
|
|
|
|
|
perror ("lseek");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (read (dst, buf, 1)<0) {
|
|
|
|
|
perror ("read");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (lseek (dst, dst_size-1, SEEK_SET)<0) {
|
|
|
|
|
perror ("lseek");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (write (dst, buf, 1)<0) {
|
|
|
|
|
perror ("write");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
close (dst);
|
|
|
|
|
sprintf (dst_name, dst_gen_name, ++dst_membno);
|
|
|
|
|
if ((dst=open (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) {
|
|
|
|
|
perror (dst_name);
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
dst_offset = 0;
|
|
|
|
|
need_seek = FALSE;
|
|
|
|
|
if (verbose) fprintf (stderr, "> %s\n", dst_name);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Make sure the last family member is the right size and then close it.
|
|
|
|
|
* The last member can't end with a hole or hdf5 will think that the
|
|
|
|
|
* family has been truncated.
|
|
|
|
|
*/
|
|
|
|
|
if (need_seek) {
|
|
|
|
|
if (lseek (dst, dst_offset-1, SEEK_SET)<0) {
|
|
|
|
|
perror ("lseek");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (read (dst, buf, 1)<0) {
|
|
|
|
|
perror ("read");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (lseek (dst, dst_offset-1, SEEK_SET)<0) {
|
|
|
|
|
perror ("lseek");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
if (write (dst, buf, 1)<0) {
|
|
|
|
|
perror ("write");
|
|
|
|
|
exit (1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
close (dst);
|
|
|
|
|
|
|
|
|
|
/* Free resources and return */
|
|
|
|
|
free (buf);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|