Merge pull request #1316 in HDFFV/hdf5 from ~SONGYULU/hdf5_ray:bugfix/HDFFV-10601-issues-with-chunk-cache-hash to develop

* commit 'cd13d24e5140578a880aebe4e2d8b899179d0870':
  HDFFV-10601: I added error checking to the HDF5 functions.
  HDFFV10601: Adding performance test to verify the improvement.
  HDFFV-10601: I changed to a better way to calculate the number of chunks in a dataset.
  HDFFV-10601 Issues with chunk cache hash value calcuation:
This commit is contained in:
Ray Lu 2018-11-15 09:43:46 -06:00
commit e07d097da1
4 changed files with 479 additions and 14 deletions

View File

@ -949,7 +949,10 @@ H5D__chunk_init(H5F_t *f, const H5D_t *dset, hid_t dapl_id)
/* Initial scaled dimension sizes */
if(dset->shared->layout.u.chunk.dim[u] == 0)
HGOTO_ERROR(H5E_DATASET, H5E_BADVALUE, FAIL, "chunk size must be > 0, dim = %u ", u)
rdcc->scaled_dims[u] = dset->shared->curr_dims[u] / dset->shared->layout.u.chunk.dim[u];
/* Round up to the next integer # of chunks, to accommodate partial chunks */
rdcc->scaled_dims[u] = (dset->shared->curr_dims[u] + dset->shared->layout.u.chunk.dim[u] - 1) /
dset->shared->layout.u.chunk.dim[u];
if( !(scaled_power2up = H5VM_power2up(rdcc->scaled_dims[u])) )
HGOTO_ERROR(H5E_DATASET, H5E_CANTGET, FAIL, "unable to get the next power of 2")
@ -2799,6 +2802,7 @@ H5D__chunk_hash_val(const H5D_shared_t *shared, const hsize_t *scaled)
hsize_t val; /* Intermediate value */
unsigned ndims = shared->ndims; /* Rank of dataset */
unsigned ret = 0; /* Value to return */
unsigned u; /* Local index variable */
FUNC_ENTER_STATIC_NOERR
@ -2809,17 +2813,11 @@ H5D__chunk_hash_val(const H5D_shared_t *shared, const hsize_t *scaled)
/* If the fastest changing dimension doesn't have enough entropy, use
* other dimensions too
*/
if(ndims > 1 && shared->cache.chunk.scaled_dims[ndims - 1] <= shared->cache.chunk.nslots) {
unsigned u; /* Local index variable */
val = scaled[0];
for(u = 1; u < ndims; u++) {
val <<= shared->cache.chunk.scaled_encode_bits[u];
val ^= scaled[u];
} /* end for */
} /* end if */
else
val = scaled[ndims - 1];
val = scaled[0];
for(u = 1; u < ndims; u++) {
val <<= shared->cache.chunk.scaled_encode_bits[u];
val ^= scaled[u];
} /* end for */
/* Modulo value against the number of array slots */
ret = (unsigned)(val % shared->cache.chunk.nslots);

View File

@ -51,6 +51,16 @@ TARGET_C_PROPERTIES (iopipe STATIC)
target_link_libraries (iopipe PRIVATE ${HDF5_LIB_TARGET} ${HDF5_TOOLS_LIB_TARGET})
set_target_properties (iopipe PROPERTIES FOLDER perform)
#-- Adding test for chunk_cache
set (chunk_cache_SOURCES
${HDF5_TOOLS_TEST_PERFORM_SOURCE_DIR}/chunk_cache.c
)
add_executable (chunk_cache ${chunk_cache_SOURCES})
target_include_directories(chunk_cache PRIVATE "${HDF5_SRC_DIR};${HDF5_BINARY_DIR};$<$<BOOL:${HDF5_ENABLE_PARALLEL}>:${MPI_C_INCLUDE_DIRS}>")
TARGET_C_PROPERTIES (chunk_cache STATIC)
target_link_libraries (chunk_cache PRIVATE ${HDF5_LIB_TARGET} ${HDF5_TOOLS_LIB_TARGET})
set_target_properties (chunk_cache PROPERTIES FOLDER perform)
#-- Adding test for overhead
set (overhead_SOURCES
${HDF5_TOOLS_TEST_PERFORM_SOURCE_DIR}/overhead.c

View File

@ -50,12 +50,12 @@ if BUILD_PARALLEL_CONDITIONAL
TEST_PROG_PARA=h5perf perf
endif
# Serial test programs.
TEST_PROG = iopipe chunk overhead zip_perf perf_meta h5perf_serial $(BUILD_ALL_PROGS)
TEST_PROG = iopipe chunk chunk_cache overhead zip_perf perf_meta h5perf_serial $(BUILD_ALL_PROGS)
# check_PROGRAMS will be built but not installed. Do not any executable
# that is in bin_PROGRAMS already. Otherwise, it will be removed twice in
# "make clean" and some systems, e.g., AIX, do not like it.
check_PROGRAMS= iopipe chunk overhead zip_perf perf_meta $(BUILD_ALL_PROGS) perf
check_PROGRAMS= iopipe chunk chunk_cache overhead zip_perf perf_meta $(BUILD_ALL_PROGS) perf
h5perf_SOURCES=pio_perf.c pio_engine.c
h5perf_serial_SOURCES=sio_perf.c sio_engine.c

View File

@ -0,0 +1,457 @@
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* Copyright by The HDF Group. *
* Copyright by the Board of Trustees of the University of Illinois. *
* All rights reserved. *
* *
* This file is part of HDF5. The full HDF5 copyright notice, including *
* terms governing use, modification, and redistribution, is contained in *
* the COPYING file, which can be found at the root of the source code *
* distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases. *
* If you do not have access to either file, you may request a copy from *
* help@hdfgroup.org. *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
/*
* Purpose: check the performance of chunk cache in these two cases (HDFFV-10601):
* 1. partial chunks exist along any dimension.
* 2. number of slots in chunk cache is smaller than the number of chunks
* in the fastest-growing dimension.
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "hdf5.h"
#define FILENAME "chunk_cache_perf.h5"
#define RANK 2
#define DSET1_NAME "partial_chunks"
#define DSET1_DIM1 9 * 1000
#define DSET1_DIM2 9
#define CHUNK1_DIM1 2 * 1000
#define CHUNK1_DIM2 2
#define DSET2_NAME "hash_value"
#define DSET2_DIM1 300
#define DSET2_DIM2 600
#define CHUNK2_DIM1 100
#define CHUNK2_DIM2 100
#define RDCC_NSLOTS 5
#define RDCC_NBYTES 1024 * 1024 * 10
#define RDCC_W0 0.75F
#define FILTER_COUNTER 306
static size_t nbytes_global;
typedef struct test_time_t {
long tv_sec;
long tv_usec;
} test_time_t;
/* Local function prototypes for the dummy filter */
static size_t
counter (unsigned flags, size_t cd_nelmts,
const unsigned *cd_values, size_t nbytes,
size_t *buf_size, void **buf);
/* This message derives from H5Z */
const H5Z_class2_t H5Z_COUNTER[1] = {{
H5Z_CLASS_T_VERS, /* H5Z_class_t version */
FILTER_COUNTER, /* Filter id number */
1, 1, /* Encoding and decoding enabled */
"counter", /* Filter name for debugging */
NULL, /* The "can apply" callback */
NULL, /* The "set local" callback */
counter, /* The actual filter function */
}};
/*-------------------------------------------------------------------------
* Count number of bytes but don't do anything else. Keep
* track of the data of chunks being read from file into memory.
*/
static size_t
counter (unsigned flags, size_t cd_nelmts,
const unsigned *cd_values, size_t nbytes,
size_t *buf_size, void **buf)
{
nbytes_global += nbytes;
return nbytes;
}
/*---------------------------------------------------------------------------*/
static int
test_time_get_current(test_time_t *tv)
{
struct timespec tp;
if (!tv)
return -1;
if (clock_gettime(CLOCK_MONOTONIC, &tp))
return -1;
tv->tv_sec = tp.tv_sec;
tv->tv_usec = tp.tv_nsec / 1000;
return 0;
}
/*---------------------------------------------------------------------------*/
static double
test_time_to_double(test_time_t tv)
{
return (double) tv.tv_sec + (double) (tv.tv_usec) * 0.000001;
}
/*---------------------------------------------------------------------------*/
static test_time_t
test_time_add(test_time_t in1, test_time_t in2)
{
test_time_t out;
out.tv_sec = in1.tv_sec + in2.tv_sec;
out.tv_usec = in1.tv_usec + in2.tv_usec;
if(out.tv_usec > 1000000) {
out.tv_usec -= 1000000;
out.tv_sec += 1;
}
return out;
}
/*---------------------------------------------------------------------------*/
static test_time_t
test_time_subtract(test_time_t in1, test_time_t in2)
{
test_time_t out;
out.tv_sec = in1.tv_sec - in2.tv_sec;
out.tv_usec = in1.tv_usec - in2.tv_usec;
if(out.tv_usec < 0) {
out.tv_usec += 1000000;
out.tv_sec -= 1;
}
return out;
}
/*-------------------------------------------------------------------------
* Function: cleanup
*
* Purpose: Removes test files
*
* Return: void
*
* Programmer: Robb Matzke
* Thursday, June 4, 1998
*
* Modifications:
*
*-------------------------------------------------------------------------
*/
static void
cleanup (void)
{
if (!getenv ("HDF5_NOCLEANUP")) {
remove (FILENAME);
}
}
/*-------------------------------------------------------------------------------
* Create a chunked dataset with partial chunks along either dimensions:
* dataset dimension: 9000 x 9
* chunk dimension: 2000 x 2
*/
static int create_dset1(hid_t file)
{
hid_t dataspace, dataset;
hid_t dcpl;
hsize_t dims[RANK] = {DSET1_DIM1, DSET1_DIM2};
hsize_t chunk_dims[RANK] = {CHUNK1_DIM1, CHUNK1_DIM2};
int data[DSET1_DIM1][DSET1_DIM2]; /* data for writing */
int i, j;
/* Create the data space. */
if((dataspace = H5Screate_simple (RANK, dims, NULL)) < 0)
goto error;
/* Modify dataset creation properties, i.e. enable chunking */
if((dcpl = H5Pcreate (H5P_DATASET_CREATE)) < 0)
goto error;
if(H5Pset_chunk (dcpl, RANK, chunk_dims) < 0)
goto error;
/* Set the dummy filter simply for counting the number of bytes being read into the memory */
if(H5Zregister(H5Z_COUNTER) < 0)
goto error;
if(H5Pset_filter(dcpl, FILTER_COUNTER, 0, 0, NULL) < 0)
goto error;
/* Create a new dataset within the file using chunk creation properties. */
if((dataset = H5Dcreate2 (file, DSET1_NAME, H5T_NATIVE_INT, dataspace,
H5P_DEFAULT, dcpl, H5P_DEFAULT)) < 0)
goto error;
for (i = 0; i < DSET1_DIM1; i++)
for (j = 0; j < DSET1_DIM2; j++)
data[i][j] = i+j;
/* Write data to dataset */
if(H5Dwrite (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL,
H5P_DEFAULT, data) < 0)
goto error;
/* Close resources */
H5Dclose (dataset);
H5Pclose (dcpl);
H5Sclose (dataspace);
return 0;
error:
H5E_BEGIN_TRY {
H5Dclose (dataset);
H5Pclose (dcpl);
H5Sclose (dataspace);
} H5E_END_TRY;
return 1;
}
/*---------------------------------------------------------------------------
* Create a chunked dataset for testing hash values:
* dataset dimensions: 300 x 600
* chunk dimensions: 100 x 100
*/
static int create_dset2(hid_t file)
{
hid_t dataspace, dataset;
hid_t dcpl;
hsize_t dims[RANK] = {DSET2_DIM1, DSET2_DIM2};
hsize_t chunk_dims[RANK] = {CHUNK2_DIM1, CHUNK2_DIM2};
int data[DSET2_DIM1][DSET2_DIM2]; /* data for writing */
int i, j;
/* Create the data space. */
if((dataspace = H5Screate_simple (RANK, dims, NULL)) < 0)
goto error;
/* Modify dataset creation properties, i.e. enable chunking */
if((dcpl = H5Pcreate (H5P_DATASET_CREATE)) < 0)
goto error;
if(H5Pset_chunk (dcpl, RANK, chunk_dims) < 0)
goto error;
/* Set the dummy filter simply for counting the number of bytes being read into the memory */
if(H5Zregister(H5Z_COUNTER) < 0)
goto error;
if(H5Pset_filter(dcpl, FILTER_COUNTER, 0, 0, NULL) < 0)
goto error;
/* Create a new dataset within the file using chunk creation properties. */
if((dataset = H5Dcreate2 (file, DSET2_NAME, H5T_NATIVE_INT, dataspace,
H5P_DEFAULT, dcpl, H5P_DEFAULT)) < 0)
goto error;
for (i = 0; i < DSET2_DIM1; i++)
for (j = 0; j < DSET2_DIM2; j++)
data[i][j] = i+j;
/* Write data to dataset */
if(H5Dwrite (dataset, H5T_NATIVE_INT, H5S_ALL, H5S_ALL,
H5P_DEFAULT, data) < 0)
goto error;
/* Close resources */
H5Dclose (dataset);
H5Pclose (dcpl);
H5Sclose (dataspace);
return 0;
error:
H5E_BEGIN_TRY {
H5Dclose (dataset);
H5Pclose (dcpl);
H5Sclose (dataspace);
} H5E_END_TRY;
return 1;
}
/*---------------------------------------------------------------------------
* Check the performance of the chunk cache when partial chunks exist
* along the dataset dimensions.
*/
static int check_partial_chunks_perf(hid_t file)
{
hid_t dataset;
hid_t filespace;
hid_t memspace;
hid_t dapl;
int rdata[DSET1_DIM2]; /* data for reading */
int i;
hsize_t row_rank = 1;
hsize_t row_dim[1] = {DSET1_DIM2};
hsize_t start[RANK] = {0, 0};
hsize_t count[RANK] = {1, DSET1_DIM2};
test_time_t t = {0, 0}, t1 = {0, 0}, t2 = {0, 0};
if((dapl = H5Pcreate(H5P_DATASET_ACCESS)) < 0)
goto error;
if(H5Pset_chunk_cache (dapl, RDCC_NSLOTS, RDCC_NBYTES, RDCC_W0) < 0)
goto error;
dataset = H5Dopen2 (file, DSET1_NAME, dapl);
memspace = H5Screate_simple(row_rank, row_dim, NULL);
filespace = H5Dget_space(dataset);
nbytes_global = 0;
test_time_get_current(&t1);
/* Read the data row by row */
for(i = 0; i < DSET1_DIM1; i++) {
start[0] = i;
if(H5Sselect_hyperslab(filespace, H5S_SELECT_SET,
start, NULL, count, NULL) < 0)
goto error;
if(H5Dread (dataset, H5T_NATIVE_INT, memspace, filespace,
H5P_DEFAULT, rdata) < 0)
goto error;
}
test_time_get_current(&t2);
t = test_time_add(t, test_time_subtract(t2, t1));
printf("1. Partial chunks: total read time is %lf; number of bytes being read from file is %lu\n", test_time_to_double(t), nbytes_global);
H5Dclose (dataset);
H5Sclose (filespace);
H5Sclose (memspace);
H5Pclose (dapl);
return 0;
error:
H5E_BEGIN_TRY {
H5Dclose (dataset);
H5Sclose (filespace);
H5Sclose (memspace);
H5Pclose (dapl);
} H5E_END_TRY;
return 1;
}
/*---------------------------------------------------------------------------
* Check the performance of chunk cache when the number of cache slots
* is smaller than the number of chunks along the fastest-growing
* dimension of the dataset.
*/
static int check_hash_value_perf(hid_t file)
{
hid_t dataset;
hid_t filespace;
hid_t memspace;
hid_t dapl;
int rdata[DSET2_DIM1]; /* data for reading */
int i;
hsize_t column_rank = 1;
hsize_t column_dim[1] = {DSET2_DIM1};
hsize_t start[RANK] = {0, 0};
hsize_t count[RANK] = {DSET2_DIM1, 1};
test_time_t t = {0, 0}, t1 = {0, 0}, t2 = {0, 0};
if((dapl = H5Pcreate(H5P_DATASET_ACCESS)) < 0)
goto error;
if(H5Pset_chunk_cache (dapl, RDCC_NSLOTS, RDCC_NBYTES, RDCC_W0) < 0)
goto error;
if((dataset = H5Dopen2 (file, DSET2_NAME, dapl)) < 0)
goto error;
if((memspace = H5Screate_simple(column_rank, column_dim, NULL)) < 0)
goto error;
if((filespace = H5Dget_space(dataset)) < 0)
goto error;
nbytes_global = 0;
test_time_get_current(&t1);
/* Read the data column by column */
for(i = 0; i < DSET2_DIM2; i++) {
start[1] = i;
if(H5Sselect_hyperslab(filespace, H5S_SELECT_SET,
start, NULL, count, NULL) < 0)
goto error;
if(H5Dread (dataset, H5T_NATIVE_INT, memspace, filespace,
H5P_DEFAULT, rdata) < 0)
goto error;
}
test_time_get_current(&t2);
t = test_time_add(t, test_time_subtract(t2, t1));
printf("2. Hash value: total read time is %lf; number of bytes being read from file is %lu\n", test_time_to_double(t), nbytes_global);
H5Dclose (dataset);
H5Sclose (filespace);
H5Sclose (memspace);
H5Pclose (dapl);
return 0;
error:
H5E_BEGIN_TRY {
H5Dclose (dataset);
H5Sclose (filespace);
H5Sclose (memspace);
H5Pclose (dapl);
} H5E_END_TRY;
return 1;
}
/*-------------------------------------------------------------------------------------
* Purpose: check the performance of chunk cache in these two cases (HDFFV-10601):
* 1. partial chunks exist along any dimension.
* 2. number of slots in chunk cache is smaller than the number of chunks
* in the fastest-growing dimension.
*-------------------------------------------------------------------------------------*/
int
main (void)
{
hid_t file; /* handles */
int nerrors = 0;
/* Create a new file. If file exists its contents will be overwritten. */
if((file = H5Fcreate (FILENAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0)
goto error;
nerrors += create_dset1(file);
nerrors += create_dset2(file);
if(H5Fclose (file) < 0)
goto error;
/* Re-open the file for testing performance. */
if((file = H5Fopen (FILENAME, H5F_ACC_RDONLY, H5P_DEFAULT)) < 0)
goto error;
nerrors += check_partial_chunks_perf(file);
nerrors += check_hash_value_perf(file);
if(H5Fclose (file) < 0)
goto error;
if (nerrors>0) goto error;
cleanup();
return 0;
error:
fprintf(stderr, "*** ERRORS DETECTED ***\n");
return 1;
}