mirror of
https://github.com/Unidata/netcdf-c.git
synced 2024-11-21 03:13:42 +08:00
Merge pull request #1560 from NOAA-GSD/ejh_cache_docs
increase default cache size for netCDF-4/HDF5 files, also improve cache docs and add benchmarking program
This commit is contained in:
commit
438119dd69
@ -310,10 +310,10 @@ ENDIF()
|
||||
################################
|
||||
|
||||
# HDF5 cache variables.
|
||||
SET(DEFAULT_CHUNK_SIZE 4194304 CACHE STRING "Default Chunk Cache Size.")
|
||||
SET(DEFAULT_CHUNK_SIZE 16777216 CACHE STRING "Default Chunk Cache Size.")
|
||||
SET(DEFAULT_CHUNKS_IN_CACHE 10 CACHE STRING "Default number of chunks in cache.")
|
||||
SET(CHUNK_CACHE_SIZE 4194304 CACHE STRING "Default Chunk Cache Size.")
|
||||
SET(CHUNK_CACHE_NELEMS 1009 CACHE STRING "Default maximum number of elements in cache.")
|
||||
SET(CHUNK_CACHE_SIZE 16777216 CACHE STRING "Default Chunk Cache Size.")
|
||||
SET(CHUNK_CACHE_NELEMS 4133 CACHE STRING "Default maximum number of elements in cache.")
|
||||
SET(CHUNK_CACHE_PREEMPTION 0.75 CACHE STRING "Default file chunk cache preemption policy for HDf5 files(a number between 0 and 1, inclusive.")
|
||||
SET(MAX_DEFAULT_CACHE_SIZE 67108864 CACHE STRING "Default maximum cache size.")
|
||||
SET(NETCDF_LIB_NAME "" CACHE STRING "Default name of the netcdf library.")
|
||||
|
@ -7,6 +7,8 @@ This file contains a high-level description of this package's evolution. Release
|
||||
|
||||
## 4.7.4 - TBD
|
||||
|
||||
* [Enhancement] Increased default size of cache buffer to 16 MB, from 4 MB. Increased number of slots to 4133. See [https://github.com/Unidata/netcdf-c/issues/1541].
|
||||
|
||||
## 4.7.3 - November 20, 2019
|
||||
|
||||
* [Bug Fix]Fixed an issue where installs from tarballs will not properly compile in parallel environments.
|
||||
|
@ -304,7 +304,7 @@ AC_MSG_CHECKING([whether a default file cache size for HDF5 was specified])
|
||||
AC_ARG_WITH([chunk-cache-size],
|
||||
[AS_HELP_STRING([--with-chunk-cache-size=<integer>],
|
||||
[Specify default file cache chunk size for HDF5 files in bytes.])],
|
||||
[CHUNK_CACHE_SIZE=$with_chunk_cache_size], [CHUNK_CACHE_SIZE=4194304])
|
||||
[CHUNK_CACHE_SIZE=$with_chunk_cache_size], [CHUNK_CACHE_SIZE=16777216])
|
||||
AC_MSG_RESULT([$CHUNK_CACHE_SIZE])
|
||||
AC_DEFINE_UNQUOTED([CHUNK_CACHE_SIZE], [$CHUNK_CACHE_SIZE], [default file chunk cache size in bytes.])
|
||||
|
||||
@ -313,7 +313,7 @@ AC_MSG_CHECKING([whether a default file cache maximum number of elements for HDF
|
||||
AC_ARG_WITH([chunk-cache-nelems],
|
||||
[AS_HELP_STRING([--with-chunk-cache-nelems=<integer>],
|
||||
[Specify default maximum number of elements in the file chunk cache chunk for HDF5 files (should be prime number).])],
|
||||
[CHUNK_CACHE_NELEMS=$with_chunk_cache_nelems], [CHUNK_CACHE_NELEMS=1009])
|
||||
[CHUNK_CACHE_NELEMS=$with_chunk_cache_nelems], [CHUNK_CACHE_NELEMS=4133])
|
||||
AC_MSG_RESULT([$CHUNK_CACHE_NELEMS])
|
||||
AC_DEFINE_UNQUOTED([CHUNK_CACHE_NELEMS], [$CHUNK_CACHE_NELEMS], [default file chunk cache nelems.])
|
||||
|
||||
|
@ -803,6 +803,7 @@ INPUT = \
|
||||
@abs_top_srcdir@/libsrc4/nc4dim.c \
|
||||
@abs_top_srcdir@/libsrc4/nc4attr.c \
|
||||
@abs_top_srcdir@/libhdf5/nc4info.c \
|
||||
@abs_top_srcdir@/libhdf5/hdf5cache.c \
|
||||
@abs_top_srcdir@/libsrc4/nc4dispatch.c \
|
||||
@abs_top_srcdir@/examples/C/simple_xy_wr.c \
|
||||
@abs_top_srcdir@/examples/C/simple_xy_rd.c \
|
||||
|
@ -1637,7 +1637,8 @@ and then verify them in HDF5. (And vice versa).
|
||||
|
||||
When data are first read or written to a netCDF-4/HDF5 variable, the
|
||||
HDF5 library opens a cache for that variable. The default size of that
|
||||
cache (settable with the –with-chunk-cache-size at netCDF build time).
|
||||
cache is 16 MB (settable with the –with-chunk-cache-size at netCDF
|
||||
build time).
|
||||
|
||||
For good performance your chunk cache must be larger than one chunk of
|
||||
your data - preferably that it be large enough to hold multiple chunks
|
||||
|
@ -1129,6 +1129,12 @@ nc_free_string(size_t len, char **data)
|
||||
until the file is closed. Once re-opened, the variable chunk cache
|
||||
returns to its default value.
|
||||
|
||||
Current cache settings for each var may be obtained with
|
||||
nc_get_var_chunk_cache().
|
||||
|
||||
Default values for these settings may be changed for the whole file
|
||||
with nc_set_chunk_cache().
|
||||
|
||||
@param ncid NetCDF or group ID, from a previous call to nc_open(),
|
||||
nc_create(), nc_def_grp(), or associated inquiry functions such as
|
||||
nc_inq_ncid().
|
||||
@ -1190,7 +1196,10 @@ nc_set_var_chunk_cache(int ncid, int varid, size_t size, size_t nelems,
|
||||
}
|
||||
|
||||
/**
|
||||
Get the per-variable chunk cache settings from the HDF5 layer.
|
||||
Get the per-variable chunk cache settings from the HDF5
|
||||
layer. These settings may be changed with nc_set_var_chunk_cache().
|
||||
|
||||
See nc_set_chunk_cache() for a full discussion of these settings.
|
||||
|
||||
@param ncid NetCDF or group ID, from a previous call to nc_open(),
|
||||
nc_create(), nc_def_grp(), or associated inquiry functions such as
|
||||
|
@ -2,9 +2,10 @@
|
||||
* Research. See COPYRIGHT file for copying and redistribution
|
||||
* conditions. */
|
||||
/**
|
||||
* @file @internal The netCDF-4 functions which control HDF5
|
||||
* caching. These caching controls allow the user to change the cache
|
||||
* sizes of HDF5 before opening files.
|
||||
* @file
|
||||
* The netCDF-4 functions which control HDF5 caching. These caching
|
||||
* controls allow the user to change the cache sizes of HDF5 before
|
||||
* opening files.
|
||||
*
|
||||
* @author Ed Hartnett
|
||||
*/
|
||||
@ -19,16 +20,53 @@ extern size_t nc4_chunk_cache_nelems;
|
||||
extern float nc4_chunk_cache_preemption;
|
||||
|
||||
/**
|
||||
* Set chunk cache size. Only affects files opened/created *after* it
|
||||
* is called.
|
||||
* Set chunk cache size. Only affects netCDF-4/HDF5 files
|
||||
* opened/created *after* it is called.
|
||||
*
|
||||
* @param size Size in bytes to set cache.
|
||||
* @param nelems Number of elements to hold in cache.
|
||||
* @param preemption Preemption stragety (between 0 and 1).
|
||||
* The HDF5 chunk cache for each dataset is used by HDF5 when reading
|
||||
* and writing files. The size of the chunk cache can be set with this
|
||||
* function (for all variables in the file) or on a variable basis
|
||||
* with nc_set_var_chunk_cache().
|
||||
*
|
||||
* Increasing the size of the cache only helps if data access patterns
|
||||
* support it. If data is read in one sequential pass through the
|
||||
* file, then the cache will not help much. If data are read from the
|
||||
* same file multiple times, the chunk cache can improve performance.
|
||||
*
|
||||
* The current settings for the file level chunk cache can be obtained
|
||||
* with nc_get_chunk_cache().
|
||||
*
|
||||
* For more information on HDF5 caching, see
|
||||
* https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetCache.
|
||||
*
|
||||
* @param size Size in bytes to set cache. The default value is 64 MB;
|
||||
* the default may be changed with configure option
|
||||
* --with-chunk-cache-size.
|
||||
*
|
||||
* @param nelems Number of elements to hold in cache. This is passed
|
||||
* to the nslots parameter of the HDF5 function H5Pset_cache(). This
|
||||
* should be a prime number at least ten times larger than the maximum
|
||||
* number of chunks that are set in the cache. The default value is
|
||||
* 4133; the default may be set with configure option
|
||||
* --with-chunk-cache-nelems.
|
||||
*
|
||||
* @param preemption Preemption stragety, a float between 0 and 1
|
||||
* inclusive and indicates the weighting according to which chunks
|
||||
* which have been fully read or written are penalized when
|
||||
* determining which chunks to flush from cache. A value of 0 means
|
||||
* fully read or written chunks are treated no differently than other
|
||||
* chunks (the preemption is strictly LRU) while a value of 1 means
|
||||
* fully read or written chunks are always preempted before other
|
||||
* chunks. If your application only reads or writes data once, this
|
||||
* can be safely set to 1. Otherwise, this should be set lower
|
||||
* depending on how often you re-read or re-write the same data. The
|
||||
* default value is 0.75; the default may be set with configure option
|
||||
* --with-chunk-cache-preemption.
|
||||
*
|
||||
* @return ::NC_NOERR No error.
|
||||
* @return ::NC_EINVAL Bad preemption.
|
||||
* @author Ed Hartnett
|
||||
* @ingroup datasets
|
||||
*/
|
||||
int
|
||||
nc_set_chunk_cache(size_t size, size_t nelems, float preemption)
|
||||
@ -42,15 +80,19 @@ nc_set_chunk_cache(size_t size, size_t nelems, float preemption)
|
||||
}
|
||||
|
||||
/**
|
||||
* Get chunk cache size. Only affects files opened/created *after* it
|
||||
* is called.
|
||||
* Get current chunk cache settings. These settings may be changed
|
||||
* with nc_set_chunk_cache().
|
||||
*
|
||||
* @param sizep Pointer that gets size in bytes to set cache.
|
||||
* @param nelemsp Pointer that gets number of elements to hold in cache.
|
||||
* @param preemptionp Pointer that gets preemption stragety (between 0 and 1).
|
||||
* @param sizep Pointer that gets size in bytes to set cache. Ignored
|
||||
* if NULL.
|
||||
* @param nelemsp Pointer that gets number of elements to hold in
|
||||
* cache. Ignored if NULL.
|
||||
* @param preemptionp Pointer that gets preemption stragety (between 0
|
||||
* and 1). Ignored if NULL.
|
||||
*
|
||||
* @return ::NC_NOERR No error.
|
||||
* @author Ed Hartnett
|
||||
* @ingroup datasets
|
||||
*/
|
||||
int
|
||||
nc_get_chunk_cache(size_t *sizep, size_t *nelemsp, float *preemptionp)
|
||||
@ -67,8 +109,10 @@ nc_get_chunk_cache(size_t *sizep, size_t *nelemsp, float *preemptionp)
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal Set the chunk cache. Required for fortran to avoid size_t
|
||||
* issues.
|
||||
* @internal Set the chunk cache. This is like nc_set_chunk_cache()
|
||||
* but with integers instead of size_t, and with an integer preemption
|
||||
* (which is the float preemtion * 100). This was required for fortran
|
||||
* to avoid size_t issues.
|
||||
*
|
||||
* @param size Cache size.
|
||||
* @param nelems Number of elements.
|
||||
@ -89,8 +133,10 @@ nc_set_chunk_cache_ints(int size, int nelems, int preemption)
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal Get the chunk cache settings. Required for fortran to
|
||||
* avoid size_t issues.
|
||||
* @internal Get the chunk cache settings. This is like
|
||||
* nc_get_chunk_cache() but with integers instead of size_t, and with
|
||||
* an integer preemption (which is the float preemtion * 100). This
|
||||
* was required for fortran to avoid size_t issues.
|
||||
*
|
||||
* @param sizep Pointer that gets cache size.
|
||||
* @param nelemsp Pointer that gets number of elements.
|
||||
|
@ -20,7 +20,7 @@ LDADD = ${top_builddir}/liblib/libnetcdf.la
|
||||
check_PROGRAMS = tst_create_files bm_file tst_chunks3 tst_ar4 \
|
||||
tst_ar4_3d tst_ar4_4d bm_many_objs tst_h_many_atts bm_many_atts \
|
||||
tst_files2 tst_files3 tst_mem tst_knmi bm_netcdf4_recs tst_wrf_reads \
|
||||
tst_attsperf bigmeta openbigmeta
|
||||
tst_attsperf bigmeta openbigmeta tst_bm_rando
|
||||
|
||||
bm_file_SOURCES = bm_file.c tst_utils.c
|
||||
bm_netcdf4_recs_SOURCES = bm_netcdf4_recs.c tst_utils.c
|
||||
@ -33,10 +33,11 @@ tst_ar4_SOURCES = tst_ar4.c tst_utils.c
|
||||
tst_h_many_atts_SOURCES = tst_h_many_atts.c tst_utils.c
|
||||
tst_knmi_SOURCES = tst_knmi.c tst_utils.c
|
||||
tst_wrf_reads_SOURCES = tst_wrf_reads.c tst_utils.c
|
||||
tst_bm_rando_SOURCES = tst_bm_rando.c tst_utils.c
|
||||
|
||||
TESTS = tst_ar4_3d tst_create_files tst_files3 tst_mem run_knmi_bm.sh \
|
||||
tst_wrf_reads tst_attsperf perftest.sh run_tst_chunks.sh \
|
||||
run_bm_elena.sh
|
||||
run_bm_elena.sh tst_bm_rando
|
||||
|
||||
run_bm_elena.log: tst_create_files.log
|
||||
|
||||
|
146
nc_perf/tst_bm_rando.c
Normal file
146
nc_perf/tst_bm_rando.c
Normal file
@ -0,0 +1,146 @@
|
||||
/*
|
||||
Copyright 2019, UCAR/Unidata
|
||||
See COPYRIGHT file for copying and redistribution conditions.
|
||||
|
||||
This program benchmarks random access to chunked data.
|
||||
|
||||
Ed Hartnett 11/25/19
|
||||
*/
|
||||
|
||||
#include <nc_tests.h>
|
||||
#include <err_macros.h>
|
||||
#include <time.h>
|
||||
#include <sys/time.h> /* Extra high precision time info. */
|
||||
#include <math.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
/* #define FILE_NAME "gfs.t00z.sfcf024.nc" */
|
||||
#define FILE_NAME "tst_bm_rando.nc"
|
||||
#define MILLION 1000000
|
||||
#define NDIM3 3
|
||||
|
||||
/* These dim lengths taken from the current NOAA GFS surface data. */
|
||||
/* #define DIM1_LEN 3072 */
|
||||
/* #define DIM2_LEN 1536 */
|
||||
#define DIM1_LEN 512
|
||||
#define DIM2_LEN 512
|
||||
|
||||
#define DIM0_NAME "unlimited"
|
||||
#define DIM1_NAME "x"
|
||||
#define DIM2_NAME "y"
|
||||
#define NUM_VAR 10
|
||||
#define NUM_REC 100
|
||||
#define NUM_TRY 10000
|
||||
#define NUM_CACHE_SETTING 3
|
||||
|
||||
/* Prototype from tst_utils.c. */
|
||||
int nc4_timeval_subtract(struct timeval *result, struct timeval *x,
|
||||
struct timeval *y);
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
printf("Benchmarking random access to file.\n");
|
||||
printf("Reading a file randomly...\n");
|
||||
{
|
||||
int ncid;
|
||||
int dimid[NDIM3];
|
||||
int varid[NUM_VAR];
|
||||
size_t chunksize[NDIM3] = {1, 512, 512};
|
||||
char name[NC_MAX_NAME + 1];
|
||||
size_t start[NDIM3] = {0, 0, 0};
|
||||
size_t count[NDIM3] = {1, DIM1_LEN, DIM2_LEN};
|
||||
float *data;
|
||||
float *data_in;
|
||||
size_t size[NUM_CACHE_SETTING] = {4194304, (4194304), (4194304)};
|
||||
/* size_t size[NUM_CACHE_SETTING] = {4194304, (4194304 * 4), (4194304 * 16)}; */
|
||||
size_t nelems[NUM_CACHE_SETTING] = {1009, 4133, 16141};
|
||||
/* size_t nelems[NUM_CACHE_SETTING] = {1009, 1009, 1009}; */
|
||||
float preemption[NUM_CACHE_SETTING] = {0.75f, 0.75f, 0.75f};
|
||||
|
||||
struct timeval start_time, end_time, diff_time;
|
||||
float read_us;
|
||||
|
||||
int v, d, t, c;
|
||||
|
||||
/* Set random number seed. */
|
||||
srand(time(NULL));
|
||||
|
||||
/* Create the file. */
|
||||
if (nc_create(FILE_NAME, NC_CLOBBER|NC_NETCDF4, &ncid)) ERR;
|
||||
if (nc_def_dim(ncid, DIM0_NAME, NC_UNLIMITED, &dimid[0])) ERR;
|
||||
if (nc_def_dim(ncid, DIM1_NAME, DIM1_LEN, &dimid[1])) ERR;
|
||||
if (nc_def_dim(ncid, DIM2_NAME, DIM2_LEN, &dimid[2])) ERR;
|
||||
for (v = 0; v < NUM_VAR; v++)
|
||||
{
|
||||
sprintf(name, "var_%d", v);
|
||||
if (nc_def_var(ncid, name, NC_FLOAT, NDIM3, dimid, &varid[v])) ERR;
|
||||
if (nc_def_var_chunking(ncid, v, NC_CHUNKED, chunksize)) ERR;
|
||||
}
|
||||
if (!(data = malloc(DIM1_LEN * DIM2_LEN * sizeof(float)))) ERR;
|
||||
for (d = 0; d < DIM1_LEN * DIM2_LEN; d++)
|
||||
data[d] = d + 1.0f/(float)(rand()%1000);
|
||||
for (v = 0; v < NUM_VAR; v++)
|
||||
{
|
||||
for (start[0] = 0; start[0] < NUM_REC; start[0]++)
|
||||
{
|
||||
if (nc_put_vara_float(ncid, v, start, count, data)) ERR;
|
||||
}
|
||||
}
|
||||
if (nc_close(ncid)) ERR;
|
||||
free(data);
|
||||
if (!(data_in = malloc(DIM1_LEN * DIM2_LEN * sizeof(float)))) ERR;
|
||||
|
||||
/* nc_set_log_level(3); */
|
||||
printf("size\tnelems\tpreemption\tread time(s)\n");
|
||||
for (c = 0; c < NUM_CACHE_SETTING; c++)
|
||||
{
|
||||
char cmd[NC_MAX_NAME * 2 + 20];
|
||||
char file_2[NC_MAX_NAME + 1];
|
||||
|
||||
if (c)
|
||||
{
|
||||
/* Create a copy of file_out. This will defeat any OS
|
||||
* buffering. */
|
||||
sprintf(file_2, "tst_copy_%d_%s", c, FILE_NAME);
|
||||
sprintf(cmd, "cp %s %s\n", FILE_NAME, file_2);
|
||||
system(cmd);
|
||||
}
|
||||
else
|
||||
strcpy(file_2, FILE_NAME);
|
||||
|
||||
/* Start timer. */
|
||||
if (gettimeofday(&start_time, NULL)) ERR;
|
||||
|
||||
/* Set cache settings. */
|
||||
if (nc_set_chunk_cache(size[c], nelems[c], preemption[c])) ERR;
|
||||
|
||||
/* Open the file. */
|
||||
if (nc_open(file_2, NC_NOWRITE, &ncid)) ERR;
|
||||
|
||||
/* Read a random record of a random var. */
|
||||
for (t = 0; t < NUM_TRY; t++)
|
||||
{
|
||||
int var = rand() % NUM_VAR;
|
||||
start[0] = rand() % NUM_REC;
|
||||
if (nc_get_vara_float(ncid, var, start, count, data_in)) ERR;
|
||||
}
|
||||
|
||||
/* Close the file. */
|
||||
if (nc_close(ncid)) ERR;
|
||||
|
||||
/* Stop timer. */
|
||||
if (gettimeofday(&end_time, NULL)) ERR;
|
||||
if (nc4_timeval_subtract(&diff_time, &end_time, &start_time)) ERR;
|
||||
read_us = (int)diff_time.tv_sec + (float)diff_time.tv_usec / MILLION ;
|
||||
printf("%zu, %ld, %g, %g\n", size[c], nelems[c], preemption[c], read_us);
|
||||
}
|
||||
/* Free data storage. */
|
||||
free(data_in);
|
||||
|
||||
|
||||
}
|
||||
SUMMARIZE_ERR;
|
||||
FINAL_RESULTS;
|
||||
}
|
Loading…
Reference in New Issue
Block a user