hdf5/tools/misc/h5repart.c

518 lines
16 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Copyright by The HDF Group. *
* Copyright by the Board of Trustees of the University of Illinois. *
* All rights reserved. *
* *
* This file is part of HDF5. The full HDF5 copyright notice, including *
* terms governing use, modification, and redistribution, is contained in *
* the files COPYING and Copyright.html. COPYING can be found at the root *
* of the source code distribution tree; Copyright.html can be found at the *
* root level of an installed copy of the electronic HDF5 document set and *
* is linked from the top-level documents page. It can also be found at *
* http://hdfgroup.org/HDF5/doc/Copyright.html. If you do not have *
* access to either file, you may request a copy from help@hdfgroup.org. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* Programmer: Robb Matzke <matzke@llnl.gov>
* Wednesday, May 13, 1998
*
* Purpose: Repartitions a file family. This program can be used to
* split a single file into a family of files, join a family of
* files into a single file, or copy one family to another while
* changing the size of the family members. It can also be used
* to copy a single file to a single file with holes.
*/
/* See H5private.h for how to include system headers */
#include "hdf5.h"
#include "H5private.h"
#ifdef H5_STDC_HEADERS
# include <ctype.h>
# include <errno.h>
# include <fcntl.h>
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
#endif
#ifdef H5_HAVE_UNISTD_H
# include <sys/types.h>
# include <unistd.h>
#endif
#ifdef H5_HAVE_SYS_STAT_H
# include <sys/stat.h>
#endif
#ifdef _WIN32
# include <io.h>
# include <fcntl.h>
#endif
#ifndef FALSE
# define FALSE 0
#endif
#ifndef TRUE
# define TRUE 1
#endif
# define NAMELEN 4096
#define GB *1024*1024*1024
#ifndef MIN
# define MIN(X,Y) ((X)<(Y)?(X):(Y))
#endif
#ifndef MIN3
# define MIN3(X,Y,Z) MIN(MIN(X,Y),Z)
#endif
/*Make these 2 private properties(defined in H5Fprivate.h) available to h5repart.
*The first one updates the member file size in the superblock. The second one
*change file driver from family to sec2. */
#define H5F_ACS_FAMILY_NEWSIZE_NAME "family_newsize"
#define H5F_ACS_FAMILY_TO_SEC2_NAME "family_to_sec2"
/*-------------------------------------------------------------------------
* Function: usage
*
* Purpose: Prints a usage message.
*
* Return: void
*
* Programmer: Robb Matzke
* Wednesday, May 13, 1998
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static void
usage (const char *progname)
{
fprintf(stderr, "usage: %s [-v] [-V] [-[b|m] N[g|m|k]] [-family_to_sec2] SRC DST\n",
progname);
fprintf(stderr, " -v Produce verbose output\n");
fprintf(stderr, " -V Print a version number and exit\n");
fprintf(stderr, " -b N The I/O block size, defaults to 1kB\n");
fprintf(stderr, " -m N The destination member size or 1GB\n");
fprintf(stderr, " -family_to_sec2 Change file driver from family to sec2\n");
fprintf(stderr, " SRC The name of the source file\n");
fprintf(stderr, " DST The name of the destination files\n");
fprintf(stderr, "Sizes may be suffixed with `g' for GB, `m' for MB or "
"`k' for kB.\n");
fprintf(stderr, "File family names include an integer printf "
"format such as `%%d'\n");
exit (1);
}
/*-------------------------------------------------------------------------
* Function: get_size
*
* Purpose: Reads a size option of the form `-XNS' where `X' is any
* letter, `N' is a multi-character positive decimal number, and
* `S' is an optional suffix letter in the set [GgMmk]. The
* option may also be split among two arguments as: `-X NS'.
* The input value of ARGNO is the argument number for the
* switch in the ARGV vector and ARGC is the number of entries
* in that vector.
*
* Return: Success: The value N multiplied according to the
* suffix S. On return ARGNO will be the number
* of the next argument to process.
*
* Failure: Calls usage() which exits with a non-zero
* status.
*
* Programmer: Robb Matzke
* Wednesday, May 13, 1998
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static off_t
get_size (const char *progname, int *argno, int argc, char *argv[])
{
off_t retval=-1;
char *suffix;
if (isdigit ((int)(argv[*argno][2]))) {
retval = strtol (argv[*argno]+2, &suffix, 10);
(*argno)++;
} else if (argv[*argno][2] || *argno+1>=argc) {
usage (progname);
} else {
retval = strtol (argv[*argno+1], &suffix, 0);
if (suffix==argv[*argno+1]) usage (progname);
*argno += 2;
}
if (suffix && suffix[0] && !suffix[1]) {
switch (*suffix) {
case 'G':
case 'g':
retval *= 1024 * 1024 * 1024;
break;
case 'M':
case 'm':
retval *= 1024 * 1024;
break;
case 'k':
retval *= 1024;
break;
default:
usage (progname);
}
} else if (suffix && suffix[0]) {
usage (progname);
}
return retval;
}
/*-------------------------------------------------------------------------
* Function: main
*
* Purpose: Split an hdf5 file
*
* Return: Success:
*
* Failure:
*
* Programmer: Robb Matzke
* Wednesday, May 13, 1998
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
int
main (int argc, char *argv[])
{
const char *prog_name; /*program name */
size_t blk_size=1024; /*size of each I/O block */
char *buf=NULL; /*I/O block buffer */
size_t n, i; /*counters */
ssize_t nio; /*I/O return value */
int argno=1; /*program argument number */
int src, dst=-1; /*source & destination files */
int need_seek=FALSE; /*destination needs to seek? */
int need_write; /*data needs to be written? */
/*struct stat sb; temporary file stat buffer */
/*struct _stati64 sb;*/
h5_stat_t sb;
int verbose=FALSE; /*display file names? */
const char *src_gen_name; /*general source name */
char src_name[NAMELEN]; /*source member name */
int src_is_family; /*is source name a family name? */
int src_membno=0; /*source member number */
const char *dst_gen_name; /*general destination name */
char dst_name[NAMELEN]; /*destination member name */
int dst_is_family; /*is dst name a family name? */
int dst_membno=0; /*destination member number */
#if defined(_WIN32) && ! defined (__MWERKS__)
__int64 left_overs=0; /*amount of zeros left over */
__int64 src_offset=0; /*offset in source member */
__int64 dst_offset=0; /*offset in destination member */
__int64 src_size; /*source logical member size */
__int64 src_act_size; /*source actual member size */
__int64 dst_size=1 GB; /*destination logical memb size */
#else
off_t left_overs=0; /*amount of zeros left over */
off_t src_offset=0; /*offset in source member */
off_t dst_offset=0; /*offset in destination member */
off_t src_size; /*source logical member size */
off_t src_act_size; /*source actual member size */
off_t dst_size=1 GB; /*destination logical memb size */
#endif
hid_t fapl; /*file access property list */
hid_t file;
hsize_t hdsize; /*destination logical memb size */
hbool_t family_to_sec2=FALSE; /*change family to sec2 driver? */
/*
* Get the program name from argv[0]. Use only the last component.
*/
if ((prog_name=strrchr (argv[0], '/'))) prog_name++;
else prog_name = argv[0];
/*
* Parse switches.
*/
while (argno<argc && '-'==argv[argno][0]) {
if (!strcmp (argv[argno], "-v")) {
verbose = TRUE;
argno++;
} else if (!strcmp(argv[argno], "-V")) {
printf("This is %s version %u.%u release %u\n",
prog_name, H5_VERS_MAJOR, H5_VERS_MINOR, H5_VERS_RELEASE);
exit(0);
} else if (!strcmp (argv[argno], "-family_to_sec2")) {
family_to_sec2 = TRUE;
argno++;
} else if ('b'==argv[argno][1]) {
blk_size = get_size (prog_name, &argno, argc, argv);
} else if ('m'==argv[argno][1]) {
dst_size = get_size (prog_name, &argno, argc, argv);
} else {
usage (prog_name);
}
}
/*
* Get the name for the source file and open the first member. The size
* of the first member determines the logical size of all the members.
*/
if (argno>=argc) usage (prog_name);
src_gen_name = argv[argno++];
sprintf (src_name, src_gen_name, src_membno);
src_is_family = strcmp (src_name, src_gen_name);
if ((src=HDopen(src_name, O_RDONLY,0))<0) {
perror (src_name);
exit (1);
}
if (HDfstat(src, &sb)<0) {
perror ("fstat");
exit (1);
}
src_size = src_act_size = sb.st_size;
if (verbose) fprintf (stderr, "< %s\n", src_name);
/*
* Get the name for the destination file and open the first member.
*/
if (argno>=argc) usage (prog_name);
dst_gen_name = argv[argno++];
sprintf (dst_name, dst_gen_name, dst_membno);
dst_is_family = strcmp (dst_name, dst_gen_name);
if ((dst=HDopen (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) {
perror (dst_name);
exit (1);
}
if (verbose) fprintf (stderr, "> %s\n", dst_name);
/* No more arguments */
if (argno<argc) usage (prog_name);
/* Now the real work, split the file */
buf = malloc (blk_size);
while (src_offset<src_size) {
/* Read a block. The amount to read is the minimum of:
* 1. The I/O block size
* 2. What's left to write in the destination member
* 3. Left over zeros or what's left in the source member.
*/
n = blk_size;
if (dst_is_family) n = (size_t)MIN((off_t)n, dst_size-dst_offset);
if (left_overs) {
n = (size_t)MIN (n, left_overs);
left_overs -= n;
need_write = FALSE;
} else if (src_offset<src_act_size) {
n = (size_t)MIN ((off_t)n, src_act_size-src_offset);
if ((nio=read (src, buf, n))<0) {
perror ("read");
exit (1);
} else if ((size_t)nio!=n) {
fprintf (stderr, "%s: short read\n", src_name);
exit (1);
}
for (i=0; i<n; i++) {
if (buf[i]) break;
}
need_write = (i<n);
} else {
n = 0;
left_overs = src_size - src_act_size;
need_write = FALSE;
}
/*
* If the block contains non-zero data then write it to the
* destination, otherwise just remember that we'll have to do a seek
* later in the destination when we finally get non-zero data.
*/
if (need_write) {
if (need_seek && HDlseek (dst, dst_offset, SEEK_SET)<0) {
perror ("HDlseek");
exit (1);
}
if ((nio=write (dst, buf, n))<0) {
perror ("write");
exit (1);
} else if ((size_t)nio!=n) {
fprintf (stderr, "%s: short write\n", dst_name);
exit (1);
}
need_seek = FALSE;
} else {
need_seek = TRUE;
}
/*
* Update the source offset and open the next source family member if
* necessary. The source stream ends at the first member which
* cannot be opened because it doesn't exist. At the end of the
* source stream, update the destination offset and break out of the
* loop. The destination offset must be updated so we can fix
* trailing holes.
*/
src_offset += n;
if (src_offset==src_act_size) {
close (src);
if (!src_is_family) {
dst_offset += n;
break;
}
sprintf (src_name, src_gen_name, ++src_membno);
if ((src=HDopen (src_name, O_RDONLY,0))<0 && ENOENT==errno) {
dst_offset += n;
break;
} else if (src<0) {
perror (src_name);
exit (1);
}
if (HDfstat (src, &sb)<0) {
perror ("fstat");
exit (1);
}
src_act_size = sb.st_size;
if (src_act_size>src_size) {
fprintf (stderr, "%s: member truncated to %lu bytes\n",
src_name, (unsigned long)src_size);
}
src_offset = 0;
if (verbose) fprintf (stderr, "< %s\n", src_name);
}
/*
* Update the destination offset, opening a new member if one will be
* needed. The first member is extended to the logical member size
* but other members might be smaller if they end with a hole.
*/
dst_offset += n;
if (dst_is_family && dst_offset==dst_size) {
if (0==dst_membno) {
if (HDlseek (dst, dst_size-1, SEEK_SET)<0) {
perror ("HDHDlseek");
exit (1);
}
if (read (dst, buf, 1)<0) {
perror ("read");
exit (1);
}
if (HDlseek (dst, dst_size-1, SEEK_SET)<0) {
perror ("HDlseek");
exit (1);
}
if (write (dst, buf, 1)<0) {
perror ("write");
exit (1);
}
}
close (dst);
sprintf (dst_name, dst_gen_name, ++dst_membno);
if ((dst=HDopen (dst_name, O_RDWR|O_CREAT|O_TRUNC, 0666))<0) {
perror (dst_name);
exit (1);
}
dst_offset = 0;
need_seek = FALSE;
if (verbose) fprintf (stderr, "> %s\n", dst_name);
}
}
/*
* Make sure the last family member is the right size and then close it.
* The last member can't end with a hole or hdf5 will think that the
* family has been truncated.
*/
if (need_seek) {
if (HDlseek (dst, dst_offset-1, SEEK_SET)<0) {
perror ("HDlseek");
exit (1);
}
if (read (dst, buf, 1)<0) {
perror ("read");
exit (1);
}
if (HDlseek (dst, dst_offset-1, SEEK_SET)<0) {
perror ("HDlseek");
exit (1);
}
if (write (dst, buf, 1)<0) {
perror ("write");
exit (1);
}
}
close (dst);
/* Modify family driver information saved in superblock through private property.
* These private properties are for this tool only. */
if ((fapl=H5Pcreate(H5P_FILE_ACCESS))<0) {
perror ("H5Pcreate");
exit (1);
}
if(family_to_sec2) {
/* The user wants to change file driver from family to sec2. Open the file
* with sec2 driver. This property signals the library to ignore the family
* driver information saved in the superblock. */
if(H5Pset(fapl, H5F_ACS_FAMILY_TO_SEC2_NAME, &family_to_sec2) < 0) {
perror ("H5Pset");
exit (1);
}
} else {
/* Modify family size saved in superblock through private property. It signals
* library to save the new member size(specified in command line) in superblock.
* This private property is for this tool only. */
if(H5Pset_fapl_family(fapl, H5F_FAMILY_DEFAULT, H5P_DEFAULT) < 0) {
perror ("H5Pset_fapl_family");
exit (1);
}
/* Set the property of the new member size as hsize_t */
hdsize = dst_size;
if(H5Pset(fapl, H5F_ACS_FAMILY_NEWSIZE_NAME, &hdsize) < 0) {
perror ("H5Pset");
exit (1);
}
}
/* If the new file is a family file, try to open file for "read and write" to
* flush metadata. Flushing metadata will update the superblock to the new
* member size. If the original file is a family file and the new file is a sec2
* file, the property FAMILY_TO_SEC2 will signal the library to switch to sec2
* driver when the new file is opened. If the original file is a sec2 file and the
* new file can only be a sec2 file, reopen the new file should fail. There's
* nothing to do in this case. */
H5E_BEGIN_TRY {
file=H5Fopen(dst_gen_name, H5F_ACC_RDWR, fapl);
} H5E_END_TRY;
if(file>=0) {
if(H5Fclose(file)<0) {
perror ("H5Fclose");
exit (1);
}
}
if(H5Pclose(fapl)<0) {
perror ("H5Pclose");
exit (1);
}
/* Free resources and return */
free (buf);
return 0;
}