2020-11-20 08:01:04 +08:00
|
|
|
/* Copyright 2018, University Corporation for Atmospheric
|
|
|
|
* Research. See COPYRIGHT file for copying and redistribution
|
|
|
|
* conditions. */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @file @internal The functions which control NCZ
|
|
|
|
* caching. These caching controls allow the user to change the cache
|
|
|
|
* sizes of ZARR before opening files.
|
|
|
|
*
|
|
|
|
* @author Dennis Heimbigner, Ed Hartnett
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "zincludes.h"
|
|
|
|
#include "zcache.h"
|
|
|
|
#include "ncxcache.h"
|
2021-09-03 07:04:26 +08:00
|
|
|
#include "zfilter.h"
|
2020-11-20 08:01:04 +08:00
|
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
|
|
|
#undef FLUSH
|
|
|
|
|
|
|
|
#define LEAFLEN 32
|
|
|
|
|
|
|
|
/* Forward */
|
|
|
|
static int get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry);
|
2021-09-03 07:04:26 +08:00
|
|
|
static int put_chunk(NCZChunkCache* cache, NCZCacheEntry*);
|
2020-11-20 08:01:04 +08:00
|
|
|
static int makeroom(NCZChunkCache* cache);
|
2021-09-03 07:04:26 +08:00
|
|
|
static int flushcache(NCZChunkCache* cache);
|
|
|
|
static int constraincache(NCZChunkCache* cache);
|
2020-11-20 08:01:04 +08:00
|
|
|
|
|
|
|
/**************************************************/
|
|
|
|
/* Dispatch table per-var cache functions */
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @internal Set chunk cache size for a variable. This is the internal
|
|
|
|
* function called by nc_set_var_chunk_cache().
|
|
|
|
*
|
|
|
|
* @param ncid File ID.
|
|
|
|
* @param varid Variable ID.
|
|
|
|
* @param size Size in bytes to set cache.
|
|
|
|
* @param nelems # of entries in cache
|
|
|
|
* @param preemption Controls cache swapping.
|
|
|
|
*
|
|
|
|
* @returns ::NC_NOERR No error.
|
|
|
|
* @returns ::NC_EBADID Bad ncid.
|
|
|
|
* @returns ::NC_ENOTVAR Invalid variable ID.
|
|
|
|
* @returns ::NC_ESTRICTNC3 Attempting netcdf-4 operation on strict
|
|
|
|
* nc3 netcdf-4 file.
|
|
|
|
* @returns ::NC_EINVAL Invalid input.
|
|
|
|
* @returns ::NC_EHDFERR HDF5 error.
|
|
|
|
* @author Ed Hartnett
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
NCZ_set_var_chunk_cache(int ncid, int varid, size_t cachesize, size_t nelems, float preemption)
|
|
|
|
{
|
|
|
|
NC_GRP_INFO_T *grp;
|
|
|
|
NC_FILE_INFO_T *h5;
|
|
|
|
NC_VAR_INFO_T *var;
|
|
|
|
NCZ_VAR_INFO_T *zvar;
|
2021-09-03 07:04:26 +08:00
|
|
|
int retval = NC_NOERR;
|
2020-11-20 08:01:04 +08:00
|
|
|
|
|
|
|
/* Check input for validity. */
|
|
|
|
if (preemption < 0 || preemption > 1)
|
2021-09-03 07:04:26 +08:00
|
|
|
{retval = NC_EINVAL; goto done;}
|
2020-11-20 08:01:04 +08:00
|
|
|
|
|
|
|
/* Find info for this file and group, and set pointer to each. */
|
|
|
|
if ((retval = nc4_find_nc_grp_h5(ncid, NULL, &grp, &h5)))
|
2021-09-03 07:04:26 +08:00
|
|
|
goto done;
|
2020-11-20 08:01:04 +08:00
|
|
|
assert(grp && h5);
|
|
|
|
|
|
|
|
/* Find the var. */
|
|
|
|
if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, varid)))
|
2021-09-03 07:04:26 +08:00
|
|
|
{retval = NC_ENOTVAR; goto done;}
|
2020-11-20 08:01:04 +08:00
|
|
|
assert(var && var->hdr.id == varid);
|
|
|
|
|
|
|
|
zvar = (NCZ_VAR_INFO_T*)var->format_var_info;
|
|
|
|
assert(zvar != NULL && zvar->cache != NULL);
|
|
|
|
|
|
|
|
/* Set the values. */
|
2022-01-30 06:27:52 +08:00
|
|
|
var->chunkcache.size = cachesize;
|
|
|
|
var->chunkcache.nelems = nelems;
|
|
|
|
var->chunkcache.preemption = preemption;
|
2020-11-20 08:01:04 +08:00
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Fix up cache */
|
|
|
|
if((retval = NCZ_adjust_var_cache(var))) goto done;
|
|
|
|
done:
|
|
|
|
return retval;
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @internal Adjust the chunk cache of a var for better
|
|
|
|
* performance.
|
|
|
|
*
|
|
|
|
* @note For contiguous and compact storage vars, or when parallel I/O
|
|
|
|
* is in use, this function will do nothing and return ::NC_NOERR;
|
|
|
|
*
|
|
|
|
* @param grp Pointer to group info struct.
|
|
|
|
* @param var Pointer to var info struct.
|
|
|
|
*
|
|
|
|
* @return ::NC_NOERR No error.
|
|
|
|
* @author Ed Hartnett
|
|
|
|
*/
|
|
|
|
int
|
2021-09-03 07:04:26 +08:00
|
|
|
NCZ_adjust_var_cache(NC_VAR_INFO_T *var)
|
2020-11-20 08:01:04 +08:00
|
|
|
{
|
2021-09-03 07:04:26 +08:00
|
|
|
int stat = NC_NOERR;
|
2020-11-20 08:01:04 +08:00
|
|
|
NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info;
|
2022-01-25 06:22:24 +08:00
|
|
|
NCZChunkCache* zcache = NULL;
|
|
|
|
|
|
|
|
zcache = zvar->cache;
|
|
|
|
if(zcache->valid) goto done;
|
2021-09-03 07:04:26 +08:00
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
fprintf(stderr,"xxx: adjusting cache for: %s\n",var->hdr.name);
|
|
|
|
#endif
|
2020-11-20 08:01:04 +08:00
|
|
|
|
2022-01-25 06:22:24 +08:00
|
|
|
/* completely empty the cache */
|
|
|
|
flushcache(zcache);
|
|
|
|
|
|
|
|
/* Reclaim any existing fill_chunk */
|
|
|
|
if((stat = NCZ_reclaim_fill_chunk(zcache))) goto done;
|
2020-11-20 08:01:04 +08:00
|
|
|
/* Reset the parameters */
|
2022-01-30 06:27:52 +08:00
|
|
|
zvar->cache->maxsize = var->chunkcache.size;
|
|
|
|
zvar->cache->maxentries = var->chunkcache.nelems;
|
2020-11-20 08:01:04 +08:00
|
|
|
#ifdef DEBUG
|
|
|
|
fprintf(stderr,"%s.cache.adjust: size=%ld nelems=%ld\n",
|
2021-09-03 07:04:26 +08:00
|
|
|
var->hdr.name,(unsigned long)zvar->cache->maxsize,(unsigned long)zvar->cache->maxentries);
|
2020-11-20 08:01:04 +08:00
|
|
|
#endif
|
2022-01-25 06:22:24 +08:00
|
|
|
/* One more thing, adjust the chunksize and count*/
|
|
|
|
zcache->chunksize = zvar->chunksize;
|
|
|
|
zcache->chunkcount = 1;
|
|
|
|
if(var->ndims > 0) {
|
|
|
|
int i;
|
|
|
|
for(i=0;i<var->ndims;i++) {
|
|
|
|
zcache->chunkcount *= var->chunksizes[i];
|
|
|
|
}
|
2021-09-03 07:04:26 +08:00
|
|
|
}
|
2022-01-25 06:22:24 +08:00
|
|
|
zcache->valid = 1;
|
|
|
|
done:
|
2021-09-03 07:04:26 +08:00
|
|
|
return stat;
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**************************************************/
|
|
|
|
/**
|
|
|
|
* Create a chunk cache object
|
|
|
|
*
|
|
|
|
* @param var containing var
|
|
|
|
* @param entrysize Size in bytes of an entry
|
|
|
|
* @param cachep return cache pointer
|
|
|
|
*
|
|
|
|
* @return ::NC_NOERR No error.
|
|
|
|
* @return ::NC_EINVAL Bad preemption.
|
|
|
|
* @author Dennis Heimbigner, Ed Hartnett
|
|
|
|
*/
|
|
|
|
int
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
NCZ_create_chunk_cache(NC_VAR_INFO_T* var, size64_t chunksize, char dimsep, NCZChunkCache** cachep)
|
2020-11-20 08:01:04 +08:00
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
NCZChunkCache* cache = NULL;
|
|
|
|
void* fill = NULL;
|
|
|
|
NCZ_VAR_INFO_T* zvar = NULL;
|
|
|
|
|
|
|
|
if(chunksize == 0) return NC_EINVAL;
|
|
|
|
|
|
|
|
zvar = (NCZ_VAR_INFO_T*)var->format_var_info;
|
|
|
|
|
|
|
|
if((cache = calloc(1,sizeof(NCZChunkCache))) == NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
cache->var = var;
|
|
|
|
cache->ndims = var->ndims + zvar->scalar;
|
|
|
|
cache->fillchunk = NULL;
|
|
|
|
cache->chunksize = chunksize;
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
cache->dimension_separator = dimsep;
|
2021-09-03 07:04:26 +08:00
|
|
|
zvar->cache = cache;
|
2020-11-20 08:01:04 +08:00
|
|
|
|
2022-01-25 06:22:24 +08:00
|
|
|
cache->chunkcount = 1;
|
|
|
|
if(var->ndims > 0) {
|
|
|
|
int i;
|
|
|
|
for(i=0;i<var->ndims;i++) {
|
|
|
|
cache->chunkcount *= var->chunksizes[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
#ifdef FLUSH
|
|
|
|
cache->maxentries = 1;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
fprintf(stderr,"%s.cache: nelems=%ld size=%ld\n",
|
2021-09-03 07:04:26 +08:00
|
|
|
var->hdr.name,(unsigned long)cache->maxentries,(unsigned long)cache->maxsize);
|
2020-11-20 08:01:04 +08:00
|
|
|
#endif
|
|
|
|
if((stat = ncxcachenew(LEAFLEN,&cache->xcache))) goto done;
|
|
|
|
if((cache->mru = nclistnew()) == NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
nclistsetalloc(cache->mru,cache->maxentries);
|
2022-01-25 06:22:24 +08:00
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
if(cachep) {*cachep = cache; cache = NULL;}
|
|
|
|
done:
|
|
|
|
nullfree(fill);
|
|
|
|
NCZ_free_chunk_cache(cache);
|
|
|
|
return THROW(stat);
|
|
|
|
}
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
static void
|
2022-08-28 10:21:13 +08:00
|
|
|
free_cache_entry(NCZChunkCache* cache, NCZCacheEntry* entry)
|
2021-09-03 07:04:26 +08:00
|
|
|
{
|
|
|
|
if(entry) {
|
2022-08-28 10:21:13 +08:00
|
|
|
int tid = cache->var->type_info->hdr.id;
|
|
|
|
if(tid == NC_STRING && !entry->isfixedstring) {
|
|
|
|
int ncid = cache->var->container->nc4_info->controller->ext_ncid;
|
|
|
|
nc_reclaim_data(ncid,tid,entry->data,cache->chunkcount);
|
|
|
|
}
|
2021-09-03 07:04:26 +08:00
|
|
|
nullfree(entry->data);
|
|
|
|
nullfree(entry->key.varkey);
|
|
|
|
nullfree(entry->key.chunkkey);
|
|
|
|
nullfree(entry);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
void
|
|
|
|
NCZ_free_chunk_cache(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
if(cache == NULL) return;
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
ZTRACE(4,"cache.var=%s",cache->var->hdr.name);
|
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
/* Iterate over the entries */
|
|
|
|
while(nclistlength(cache->mru) > 0) {
|
|
|
|
void* ptr;
|
|
|
|
NCZCacheEntry* entry = nclistremove(cache->mru,0);
|
|
|
|
(void)ncxcacheremove(cache->xcache,entry->hashkey,&ptr);
|
|
|
|
assert(ptr == entry);
|
2022-08-28 10:21:13 +08:00
|
|
|
free_cache_entry(cache,entry);
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
|
|
fprintf(stderr,"|cache.free|=%ld\n",nclistlength(cache->mru));
|
|
|
|
#endif
|
|
|
|
ncxcachefree(cache->xcache);
|
|
|
|
nclistfree(cache->mru);
|
|
|
|
cache->mru = NULL;
|
2022-01-25 06:22:24 +08:00
|
|
|
(void)NCZ_reclaim_fill_chunk(cache);
|
2020-11-20 08:01:04 +08:00
|
|
|
nullfree(cache);
|
2021-01-29 11:11:01 +08:00
|
|
|
(void)ZUNTRACE(NC_NOERR);
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
size64_t
|
|
|
|
NCZ_cache_entrysize(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
assert(cache);
|
|
|
|
return cache->chunksize;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Return number of active entries in cache */
|
|
|
|
size64_t
|
|
|
|
NCZ_cache_size(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
assert(cache);
|
|
|
|
return nclistlength(cache->mru);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
NCZ_read_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void** datap)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
int rank = cache->ndims;
|
|
|
|
NCZCacheEntry* entry = NULL;
|
|
|
|
ncexhashkey_t hkey = 0;
|
|
|
|
int created = 0;
|
2021-09-03 07:04:26 +08:00
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
/* the hash key */
|
|
|
|
hkey = ncxcachekey(indices,sizeof(size64_t)*cache->ndims);
|
|
|
|
/* See if already in cache */
|
2020-12-17 11:48:02 +08:00
|
|
|
stat = ncxcachelookup(cache->xcache,hkey,(void**)&entry);
|
|
|
|
switch(stat) {
|
2020-11-20 08:01:04 +08:00
|
|
|
case NC_NOERR:
|
|
|
|
/* Move to front of the lru */
|
2020-12-17 11:48:02 +08:00
|
|
|
(void)ncxcachetouch(cache->xcache,hkey);
|
|
|
|
break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case NC_ENOOBJECT:
|
2020-11-20 08:01:04 +08:00
|
|
|
entry = NULL; /* not found; */
|
|
|
|
break;
|
|
|
|
default: goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(entry == NULL) { /*!found*/
|
|
|
|
/* Create a new entry */
|
|
|
|
if((entry = calloc(1,sizeof(NCZCacheEntry)))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
memcpy(entry->indices,indices,rank*sizeof(size64_t));
|
|
|
|
/* Create the key for this cache */
|
|
|
|
if((stat = NCZ_buildchunkpath(cache,indices,&entry->key))) goto done;
|
|
|
|
entry->hashkey = hkey;
|
2022-01-25 06:22:24 +08:00
|
|
|
assert(entry->data == NULL && entry->size == 0);
|
|
|
|
/* Try to read the object from "disk"; might change size; will create if non-existent */
|
2021-09-03 07:04:26 +08:00
|
|
|
if((stat=get_chunk(cache,entry))) goto done;
|
2022-01-25 06:22:24 +08:00
|
|
|
assert(entry->data != NULL);
|
|
|
|
/* Ensure cache constraints not violated; but do it before entry is added */
|
|
|
|
if((stat=makeroom(cache))) goto done;
|
2020-11-20 08:01:04 +08:00
|
|
|
nclistpush(cache->mru,entry);
|
|
|
|
if((stat = ncxcacheinsert(cache->xcache,entry->hashkey,entry))) goto done;
|
|
|
|
}
|
2021-09-03 07:04:26 +08:00
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
#ifdef DEBUG
|
|
|
|
fprintf(stderr,"|cache.read.lru|=%ld\n",nclistlength(cache->mru));
|
|
|
|
#endif
|
|
|
|
if(datap) *datap = entry->data;
|
|
|
|
entry = NULL;
|
2021-01-29 11:11:01 +08:00
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
done:
|
2021-01-29 11:11:01 +08:00
|
|
|
if(created && stat == NC_NOERR) stat = NC_EEMPTY; /* tell upper layers */
|
2022-08-28 10:21:13 +08:00
|
|
|
if(entry) free_cache_entry(cache,entry);
|
2020-11-20 08:01:04 +08:00
|
|
|
return THROW(stat);
|
|
|
|
}
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
#if 0
|
2020-11-20 08:01:04 +08:00
|
|
|
int
|
2021-09-03 07:04:26 +08:00
|
|
|
NCZ_write_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void* content)
|
2020-11-20 08:01:04 +08:00
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
int rank = cache->ndims;
|
|
|
|
NCZCacheEntry* entry = NULL;
|
|
|
|
ncexhashkey_t hkey;
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/* create the hash key */
|
2020-11-20 08:01:04 +08:00
|
|
|
hkey = ncxcachekey(indices,sizeof(size64_t)*cache->ndims);
|
|
|
|
|
|
|
|
if(entry == NULL) { /*!found*/
|
|
|
|
/* Create a new entry */
|
|
|
|
if((entry = calloc(1,sizeof(NCZCacheEntry)))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
memcpy(entry->indices,indices,rank*sizeof(size64_t));
|
2021-09-03 07:04:26 +08:00
|
|
|
if((stat = NCZ_buildchunkpath(cache,indices,&entry->key))) goto done;
|
|
|
|
entry->hashkey = hkey;
|
2020-11-20 08:01:04 +08:00
|
|
|
/* Create the local copy space */
|
2021-09-03 07:04:26 +08:00
|
|
|
entry->size = cache->chunksize;
|
2020-11-20 08:01:04 +08:00
|
|
|
if((entry->data = calloc(1,cache->chunksize)) == NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
2021-09-03 07:04:26 +08:00
|
|
|
memcpy(entry->data,content,cache->chunksize);
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
entry->modified = 1;
|
|
|
|
nclistpush(cache->mru,entry); /* MRU order */
|
|
|
|
#ifdef DEBUG
|
|
|
|
fprintf(stderr,"|cache.write|=%ld\n",nclistlength(cache->mru));
|
|
|
|
#endif
|
|
|
|
entry = NULL;
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Ensure cache constraints not violated */
|
|
|
|
if((stat=makeroom(cache))) goto done;
|
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
done:
|
2022-08-28 10:21:13 +08:00
|
|
|
if(entry) free_cache_entry(cache,entry);
|
2020-11-20 08:01:04 +08:00
|
|
|
return THROW(stat);
|
|
|
|
}
|
2021-09-03 07:04:26 +08:00
|
|
|
#endif
|
2020-11-20 08:01:04 +08:00
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Constrain cache, but allow at least one entry */
|
2020-11-20 08:01:04 +08:00
|
|
|
static int
|
|
|
|
makeroom(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
2021-09-03 07:04:26 +08:00
|
|
|
|
|
|
|
/* Sanity check; make sure at least one entry is always allowed */
|
|
|
|
if(nclistlength(cache->mru) == 1)
|
|
|
|
goto done;
|
|
|
|
stat = constraincache(cache);
|
|
|
|
done:
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Completely flush cache */
|
|
|
|
|
|
|
|
static int
|
|
|
|
flushcache(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
cache->maxentries = 0;
|
|
|
|
return constraincache(cache);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Remove entries to ensure cache is not
|
|
|
|
violating any of its constraints.
|
|
|
|
On entry, constraints might be violated.
|
2022-01-25 06:22:24 +08:00
|
|
|
Make sure that the entryinuse (NULL => no constraint) is not reclaimed.
|
2021-09-03 07:04:26 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
constraincache(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
|
2022-01-25 06:22:24 +08:00
|
|
|
/* If the cache is empty then do nothing */
|
|
|
|
if(cache->used == 0) goto done;
|
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
/* Flush from LRU end if we are at capacity */
|
2021-09-03 07:04:26 +08:00
|
|
|
while(nclistlength(cache->mru) > cache->maxentries || cache->used > cache->maxsize) {
|
2020-11-20 08:01:04 +08:00
|
|
|
int i;
|
|
|
|
void* ptr;
|
|
|
|
NCZCacheEntry* e = ncxcachelast(cache->xcache); /* last entry is the least recently used */
|
|
|
|
if((stat = ncxcacheremove(cache->xcache,e->hashkey,&ptr))) goto done;
|
2022-01-25 06:22:24 +08:00
|
|
|
assert(e == ptr);
|
2020-11-20 08:01:04 +08:00
|
|
|
for(i=0;i<nclistlength(cache->mru);i++) {
|
|
|
|
e = nclistget(cache->mru,i);
|
|
|
|
if(ptr == e) break;
|
|
|
|
}
|
2022-01-25 06:22:24 +08:00
|
|
|
assert(e != NULL);
|
2020-11-20 08:01:04 +08:00
|
|
|
assert(i >= 0 && i < nclistlength(cache->mru));
|
|
|
|
nclistremove(cache->mru,i);
|
2022-01-25 06:22:24 +08:00
|
|
|
assert(cache->used >= e->size);
|
|
|
|
/* Note that |old chunk data| may not be same as |new chunk data| because of filters */
|
|
|
|
cache->used -= e->size; /* old size */
|
2020-11-20 08:01:04 +08:00
|
|
|
if(e->modified) /* flush to file */
|
|
|
|
stat=put_chunk(cache,e);
|
|
|
|
/* reclaim */
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
nullfree(e->data); nullfree(e->key.varkey); nullfree(e->key.chunkkey); nullfree(e);
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
|
|
fprintf(stderr,"|cache.makeroom|=%ld\n",nclistlength(cache->mru));
|
|
|
|
#endif
|
|
|
|
done:
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
NCZ_flush_chunk_cache(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
size_t i;
|
|
|
|
|
2021-01-29 11:11:01 +08:00
|
|
|
ZTRACE(4,"cache.var=%s |cache|=%d",cache->var->hdr.name,(int)nclistlength(cache->mru));
|
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
if(NCZ_cache_size(cache) == 0) goto done;
|
|
|
|
|
|
|
|
/* Iterate over the entries in hashmap */
|
|
|
|
for(i=0;i<nclistlength(cache->mru);i++) {
|
|
|
|
NCZCacheEntry* entry = nclistget(cache->mru,i);
|
|
|
|
if(entry->modified) {
|
2022-01-25 06:22:24 +08:00
|
|
|
/* Make cache used be consistent across filter application */
|
|
|
|
cache->used -= entry->size;
|
2020-11-20 08:01:04 +08:00
|
|
|
/* Write out this chunk in toto*/
|
|
|
|
if((stat=put_chunk(cache,entry)))
|
|
|
|
goto done;
|
2022-01-25 06:22:24 +08:00
|
|
|
cache->used += entry->size;
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
entry->modified = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
2021-01-29 11:11:01 +08:00
|
|
|
return ZUNTRACE(stat);
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
|
2022-01-25 06:22:24 +08:00
|
|
|
/* Ensure existence of some kind of fill chunk */
|
|
|
|
int
|
|
|
|
NCZ_ensure_fill_chunk(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
int i, stat = NC_NOERR;
|
|
|
|
NC_VAR_INFO_T* var = cache->var;
|
2022-08-28 10:21:13 +08:00
|
|
|
nc_type typeid = var->type_info->hdr.id;
|
2022-01-25 06:22:24 +08:00
|
|
|
size_t typesize = var->type_info->size;
|
|
|
|
|
|
|
|
if(cache->fillchunk) goto done;
|
|
|
|
|
|
|
|
if((cache->fillchunk = malloc(cache->chunksize))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
if(var->no_fill) {
|
|
|
|
/* use zeros */
|
|
|
|
memset(cache->fillchunk,0,cache->chunksize);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
if((stat = NCZ_ensure_fill_value(var))) goto done;
|
2022-08-28 10:21:13 +08:00
|
|
|
if(typeid == NC_STRING) {
|
|
|
|
char* src = *((char**)(var->fill_value));
|
|
|
|
char** dst = (char**)(cache->fillchunk);
|
|
|
|
for(i=0;i<cache->chunkcount;i++) dst[i] = strdup(src);
|
|
|
|
} else
|
2022-01-25 06:22:24 +08:00
|
|
|
switch (typesize) {
|
|
|
|
case 1: {
|
|
|
|
unsigned char c = *((unsigned char*)var->fill_value);
|
|
|
|
memset(cache->fillchunk,c,cache->chunksize);
|
|
|
|
} break;
|
|
|
|
case 2: {
|
|
|
|
unsigned short fv = *((unsigned short*)var->fill_value);
|
|
|
|
unsigned short* p2 = (unsigned short*)cache->fillchunk;
|
|
|
|
for(i=0;i<cache->chunksize;i+=typesize) *p2++ = fv;
|
|
|
|
} break;
|
|
|
|
case 4: {
|
|
|
|
unsigned int fv = *((unsigned int*)var->fill_value);
|
|
|
|
unsigned int* p4 = (unsigned int*)cache->fillchunk;
|
|
|
|
for(i=0;i<cache->chunksize;i+=typesize) *p4++ = fv;
|
|
|
|
} break;
|
|
|
|
case 8: {
|
|
|
|
unsigned long long fv = *((unsigned long long*)var->fill_value);
|
|
|
|
unsigned long long* p8 = (unsigned long long*)cache->fillchunk;
|
|
|
|
for(i=0;i<cache->chunksize;i+=typesize) *p8++ = fv;
|
|
|
|
} break;
|
|
|
|
default: {
|
|
|
|
unsigned char* p;
|
|
|
|
for(p=cache->fillchunk,i=0;i<cache->chunksize;i+=typesize,p+=typesize)
|
|
|
|
memcpy(p,var->fill_value,typesize);
|
|
|
|
} break;
|
|
|
|
}
|
|
|
|
done:
|
|
|
|
return NC_NOERR;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
NCZ_reclaim_fill_chunk(NCZChunkCache* zcache)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
if(zcache && zcache->fillchunk) {
|
|
|
|
NC_VAR_INFO_T* var = zcache->var;
|
|
|
|
int ncid = var->container->nc4_info->controller->ext_ncid;
|
|
|
|
int tid = var->type_info->hdr.id;
|
|
|
|
size_t chunkcount = zcache->chunkcount;
|
|
|
|
stat = nc_reclaim_data_all(ncid,tid,zcache->fillchunk,chunkcount);
|
|
|
|
zcache->fillchunk = NULL;
|
|
|
|
}
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
#if 0
|
|
|
|
int
|
|
|
|
NCZ_chunk_cache_modified(NCZChunkCache* cache, const size64_t* indices)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
char* key = NULL;
|
|
|
|
NCZCacheEntry* entry = NULL;
|
|
|
|
int rank = cache->ndims;
|
|
|
|
|
|
|
|
/* Create the key for this cache */
|
2020-12-17 11:48:02 +08:00
|
|
|
if((stat=NCZ_buildchunkkey(rank, indices, &key))) goto done;
|
2020-11-20 08:01:04 +08:00
|
|
|
|
|
|
|
/* See if already in cache */
|
|
|
|
if(NC_hashmapget(cache->mru, key, strlen(key), (uintptr_t*)entry)) { /* found */
|
|
|
|
entry->modified = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
nullfree(key);
|
|
|
|
return THROW(stat);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/**************************************************/
|
|
|
|
/*
|
|
|
|
From Zarr V2 Specification:
|
|
|
|
"The compressed sequence of bytes for each chunk is stored under
|
|
|
|
a key formed from the index of the chunk within the grid of
|
|
|
|
chunks representing the array. To form a string key for a
|
|
|
|
chunk, the indices are converted to strings and concatenated
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
with the dimension_separator character ('.' or '/') separating
|
|
|
|
each index. For example, given an array with shape (10000,
|
|
|
|
10000) and chunk shape (1000, 1000) there will be 100 chunks
|
|
|
|
laid out in a 10 by 10 grid. The chunk with indices (0, 0)
|
|
|
|
provides data for rows 0-1000 and columns 0-1000 and is stored
|
|
|
|
under the key "0.0"; the chunk with indices (2, 4) provides data
|
|
|
|
for rows 2000-3000 and columns 4000-5000 and is stored under the
|
|
|
|
key "2.4"; etc."
|
2020-11-20 08:01:04 +08:00
|
|
|
*/
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param R Rank
|
|
|
|
* @param chunkindices The chunk indices
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
* @param dimsep the dimension separator
|
2020-11-20 08:01:04 +08:00
|
|
|
* @param keyp Return the chunk key string
|
|
|
|
*/
|
2020-12-17 11:48:02 +08:00
|
|
|
int
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
NCZ_buildchunkkey(size_t R, const size64_t* chunkindices, char dimsep, char** keyp)
|
2020-11-20 08:01:04 +08:00
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
int r;
|
|
|
|
NCbytes* key = ncbytesnew();
|
|
|
|
|
|
|
|
if(keyp) *keyp = NULL;
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
|
|
|
|
assert(islegaldimsep(dimsep));
|
2020-11-20 08:01:04 +08:00
|
|
|
|
|
|
|
for(r=0;r<R;r++) {
|
|
|
|
char sindex[64];
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
if(r > 0) ncbytesappend(key,dimsep);
|
2020-11-20 08:01:04 +08:00
|
|
|
/* Print as decimal with no leading zeros */
|
|
|
|
snprintf(sindex,sizeof(sindex),"%lu",(unsigned long)chunkindices[r]);
|
|
|
|
ncbytescat(key,sindex);
|
|
|
|
}
|
|
|
|
ncbytesnull(key);
|
|
|
|
if(keyp) *keyp = ncbytesextract(key);
|
|
|
|
|
|
|
|
ncbytesfree(key);
|
|
|
|
return THROW(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @internal Push data to chunk of a file.
|
|
|
|
* If chunk does not exist, create it
|
|
|
|
*
|
|
|
|
* @param file Pointer to file info struct.
|
|
|
|
* @param proj Chunk projection
|
|
|
|
* @param datalen size of data
|
|
|
|
* @param data Buffer containing the chunk data to write
|
|
|
|
*
|
|
|
|
* @return ::NC_NOERR No error.
|
|
|
|
* @author Dennis Heimbigner
|
|
|
|
*/
|
|
|
|
static int
|
2021-09-03 07:04:26 +08:00
|
|
|
put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry)
|
2020-11-20 08:01:04 +08:00
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
2021-09-03 07:04:26 +08:00
|
|
|
NC_FILE_INFO_T* file = NULL;
|
2020-11-20 08:01:04 +08:00
|
|
|
NCZ_FILE_INFO_T* zfile = NULL;
|
|
|
|
NCZMAP* map = NULL;
|
2021-09-03 07:04:26 +08:00
|
|
|
char* path = NULL;
|
2022-08-28 10:21:13 +08:00
|
|
|
nc_type tid = NC_NAT;
|
|
|
|
void* strchunk = NULL;
|
|
|
|
int ncid = 0;
|
2020-11-20 08:01:04 +08:00
|
|
|
|
2021-01-29 11:11:01 +08:00
|
|
|
ZTRACE(5,"cache.var=%s entry.key=%s",cache->var->hdr.name,entry->key);
|
2020-11-20 08:01:04 +08:00
|
|
|
LOG((3, "%s: var: %p", __func__, cache->var));
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
file = (cache->var->container)->nc4_info;
|
|
|
|
zfile = file->format_file_info;
|
2020-11-20 08:01:04 +08:00
|
|
|
map = zfile->map;
|
|
|
|
|
2022-08-28 10:21:13 +08:00
|
|
|
/* Collect some info */
|
|
|
|
ncid = file->controller->ext_ncid;
|
|
|
|
tid = cache->var->type_info->hdr.id;
|
|
|
|
|
|
|
|
if(tid == NC_STRING && !entry->isfixedstring) {
|
|
|
|
/* Convert from char* to char[strlen] format */
|
|
|
|
int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var);
|
|
|
|
assert(maxstrlen > 0);
|
|
|
|
if((strchunk = malloc(cache->chunkcount*maxstrlen))==NULL) {stat = NC_ENOMEM; goto done;}
|
|
|
|
/* copy char* to char[] format */
|
|
|
|
if((stat = NCZ_char2fixed((const char**)entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done;
|
|
|
|
/* Reclaim the old chunk */
|
|
|
|
if((stat = nc_reclaim_data_all(ncid,tid,entry->data,cache->chunkcount))) goto done;
|
|
|
|
entry->data = NULL;
|
|
|
|
entry->data = strchunk; strchunk = NULL;
|
|
|
|
entry->size = cache->chunkcount * maxstrlen;
|
|
|
|
entry->isfixedstring = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-12-24 13:18:56 +08:00
|
|
|
#ifdef ENABLE_NCZARR_FILTERS
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Make sure the entry is in filtered state */
|
|
|
|
if(!entry->isfiltered) {
|
|
|
|
NC_VAR_INFO_T* var = cache->var;
|
|
|
|
void* filtered = NULL; /* pointer to the filtered data */
|
|
|
|
size_t flen; /* length of filtered data */
|
|
|
|
/* Get the filter chain to apply */
|
|
|
|
NClist* filterchain = (NClist*)var->filters;
|
|
|
|
if(nclistlength(filterchain) > 0) {
|
2022-01-25 06:22:24 +08:00
|
|
|
/* Apply the filter chain to get the filtered data; will reclaim entry->data */
|
2021-09-03 07:04:26 +08:00
|
|
|
if((stat = NCZ_applyfilterchain(file,var,filterchain,entry->size,entry->data,&flen,&filtered,ENCODING))) goto done;
|
|
|
|
/* Fix up the cache entry */
|
|
|
|
/* Note that if filtered is different from entry->data, then entry->data will have been freed */
|
|
|
|
entry->data = filtered;
|
|
|
|
entry->size = flen;
|
|
|
|
entry->isfiltered = 1;
|
|
|
|
}
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
}
|
2021-12-24 13:18:56 +08:00
|
|
|
#endif
|
2021-09-03 07:04:26 +08:00
|
|
|
|
|
|
|
path = NCZ_chunkpath(entry->key);
|
|
|
|
stat = nczmap_write(map,path,0,entry->size,entry->data);
|
|
|
|
nullfree(path); path = NULL;
|
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
switch(stat) {
|
2021-01-29 11:11:01 +08:00
|
|
|
case NC_NOERR:
|
2020-11-20 08:01:04 +08:00
|
|
|
break;
|
2021-01-29 11:11:01 +08:00
|
|
|
case NC_EEMPTY:
|
2020-11-20 08:01:04 +08:00
|
|
|
default: goto done;
|
|
|
|
}
|
|
|
|
done:
|
2022-08-28 10:21:13 +08:00
|
|
|
nullfree(strchunk);
|
2021-09-03 07:04:26 +08:00
|
|
|
nullfree(path);
|
2021-01-29 11:11:01 +08:00
|
|
|
return ZUNTRACE(stat);
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @internal Push data from memory to file.
|
|
|
|
*
|
|
|
|
* @param cache Pointer to parent cache
|
|
|
|
* @param key chunk key
|
|
|
|
* @param entry cache entry to read into
|
|
|
|
*
|
|
|
|
* @return ::NC_NOERR No error.
|
|
|
|
* @author Dennis Heimbigner
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
NCZMAP* map = NULL;
|
|
|
|
NC_FILE_INFO_T* file = NULL;
|
|
|
|
NCZ_FILE_INFO_T* zfile = NULL;
|
2022-08-28 10:21:13 +08:00
|
|
|
NC_TYPE_INFO_T* xtype = NULL;
|
|
|
|
char** strchunk = NULL;
|
2021-09-03 07:04:26 +08:00
|
|
|
size64_t size;
|
|
|
|
int empty = 0;
|
|
|
|
char* path = NULL;
|
2022-08-28 10:21:13 +08:00
|
|
|
int tid;
|
2020-11-20 08:01:04 +08:00
|
|
|
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
ZTRACE(5,"cache.var=%s entry.key=%s sep=%d",cache->var->hdr.name,entry->key,cache->dimension_separator);
|
2021-01-29 11:11:01 +08:00
|
|
|
|
2020-11-20 08:01:04 +08:00
|
|
|
LOG((3, "%s: file: %p", __func__, file));
|
|
|
|
|
|
|
|
file = (cache->var->container)->nc4_info;
|
|
|
|
zfile = file->format_file_info;
|
|
|
|
map = zfile->map;
|
2021-09-03 07:04:26 +08:00
|
|
|
assert(map);
|
2020-11-20 08:01:04 +08:00
|
|
|
|
2022-08-28 10:21:13 +08:00
|
|
|
/* Collect some info */
|
|
|
|
xtype = cache->var->type_info;
|
|
|
|
tid = xtype->hdr.id;
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/* get size of the "raw" data on "disk" */
|
|
|
|
path = NCZ_chunkpath(entry->key);
|
|
|
|
stat = nczmap_len(map,path,&size);
|
|
|
|
nullfree(path); path = NULL;
|
|
|
|
switch(stat) {
|
2022-08-28 10:21:13 +08:00
|
|
|
case NC_NOERR: entry->size = size; break;
|
2021-09-03 07:04:26 +08:00
|
|
|
case NC_EEMPTY: empty = 1; stat = NC_NOERR; break;
|
|
|
|
default: goto done;
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
}
|
2020-11-20 08:01:04 +08:00
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
if(!empty) {
|
|
|
|
/* Make sure we have a place to read it */
|
2022-08-28 10:21:13 +08:00
|
|
|
if((entry->data = (void*)calloc(1,entry->size)) == NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Read the raw data */
|
|
|
|
path = NCZ_chunkpath(entry->key);
|
|
|
|
stat = nczmap_read(map,path,0,entry->size,(char*)entry->data);
|
|
|
|
nullfree(path); path = NULL;
|
|
|
|
switch (stat) {
|
|
|
|
case NC_NOERR: break;
|
|
|
|
case NC_EEMPTY: empty = 1; stat = NC_NOERR;break;
|
|
|
|
default: goto done;
|
|
|
|
}
|
2022-08-28 10:21:13 +08:00
|
|
|
entry->isfiltered = FILTERED(cache); /* Is the data being read filtered? */
|
|
|
|
if(tid == NC_STRING)
|
|
|
|
entry->isfixedstring = 1; /* fill cache is in char[maxstrlen] format */
|
2021-09-03 07:04:26 +08:00
|
|
|
}
|
|
|
|
if(empty) {
|
|
|
|
/* fake the chunk */
|
|
|
|
entry->modified = (file->no_write?0:1);
|
|
|
|
entry->size = cache->chunksize;
|
2022-08-28 10:21:13 +08:00
|
|
|
entry->data = NULL;
|
|
|
|
entry->isfixedstring = 0;
|
|
|
|
entry->isfiltered = 0;
|
2021-09-03 07:04:26 +08:00
|
|
|
/* apply fill value */
|
2022-01-25 06:22:24 +08:00
|
|
|
if(cache->fillchunk == NULL)
|
|
|
|
{if((stat = NCZ_ensure_fill_chunk(cache))) goto done;}
|
2022-08-28 10:21:13 +08:00
|
|
|
if((entry->data = calloc(1,entry->size))==NULL) {stat = NC_ENOMEM; goto done;}
|
|
|
|
if((stat = NCZ_copy_data(file,xtype,cache->fillchunk,cache->chunkcount,!ZCLEAR,entry->data))) goto done;
|
2021-09-03 07:04:26 +08:00
|
|
|
stat = NC_NOERR;
|
|
|
|
}
|
2021-12-24 13:18:56 +08:00
|
|
|
#ifdef ENABLE_NCZARR_FILTERS
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Make sure the entry is in unfiltered state */
|
2022-08-28 10:21:13 +08:00
|
|
|
if(!empty && entry->isfiltered) {
|
2021-09-03 07:04:26 +08:00
|
|
|
NC_VAR_INFO_T* var = cache->var;
|
|
|
|
void* unfiltered = NULL; /* pointer to the unfiltered data */
|
|
|
|
void* filtered = NULL; /* pointer to the filtered data */
|
|
|
|
size_t unflen; /* length of unfiltered data */
|
2022-08-28 10:21:13 +08:00
|
|
|
assert(tid != NC_STRING || entry->isfixedstring);
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Get the filter chain to apply */
|
|
|
|
NClist* filterchain = (NClist*)var->filters;
|
|
|
|
if(nclistlength(filterchain) == 0) {stat = NC_EFILTER; goto done;}
|
|
|
|
/* Apply the filter chain to get the unfiltered data */
|
|
|
|
filtered = entry->data;
|
|
|
|
entry->data = NULL;
|
|
|
|
if((stat = NCZ_applyfilterchain(file,var,filterchain,entry->size,filtered,&unflen,&unfiltered,!ENCODING))) goto done;
|
|
|
|
/* Fix up the cache entry */
|
|
|
|
entry->data = unfiltered;
|
|
|
|
entry->size = unflen;
|
|
|
|
entry->isfiltered = 0;
|
|
|
|
}
|
2021-12-24 13:18:56 +08:00
|
|
|
#endif
|
2021-09-03 07:04:26 +08:00
|
|
|
|
2022-08-28 10:21:13 +08:00
|
|
|
if(tid == NC_STRING && entry->isfixedstring) {
|
|
|
|
/* Convert from char[strlen] to char* format */
|
|
|
|
int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var);
|
|
|
|
assert(maxstrlen > 0);
|
|
|
|
/* copy char[] to char* format */
|
|
|
|
if((strchunk = (char**)malloc(sizeof(char*)*cache->chunkcount))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
if((stat = NCZ_fixed2char(entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done;
|
|
|
|
/* Reclaim the old chunk */
|
|
|
|
nullfree(entry->data);
|
|
|
|
entry->data = NULL;
|
|
|
|
entry->data = strchunk; strchunk = NULL;
|
|
|
|
entry->size = cache->chunkcount * sizeof(char*);
|
|
|
|
entry->isfixedstring = 0;
|
|
|
|
}
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
done:
|
2022-08-28 10:21:13 +08:00
|
|
|
nullfree(strchunk);
|
2021-09-03 07:04:26 +08:00
|
|
|
nullfree(path);
|
2021-01-29 11:11:01 +08:00
|
|
|
return ZUNTRACE(stat);
|
2020-11-20 08:01:04 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
NCZ_buildchunkpath(NCZChunkCache* cache, const size64_t* chunkindices, struct ChunkKey* key)
|
2020-11-20 08:01:04 +08:00
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
char* chunkname = NULL;
|
|
|
|
char* varkey = NULL;
|
|
|
|
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
assert(key != NULL);
|
2020-11-20 08:01:04 +08:00
|
|
|
/* Get the chunk object name */
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
if((stat = NCZ_buildchunkkey(cache->ndims, chunkindices, cache->dimension_separator, &chunkname))) goto done;
|
2020-11-20 08:01:04 +08:00
|
|
|
/* Get the var object key */
|
|
|
|
if((stat = NCZ_varkey(cache->var,&varkey))) goto done;
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
key->varkey = varkey; varkey = NULL;
|
|
|
|
key->chunkkey = chunkname; chunkname = NULL;
|
2020-11-20 08:01:04 +08:00
|
|
|
|
|
|
|
done:
|
|
|
|
nullfree(chunkname);
|
|
|
|
nullfree(varkey);
|
|
|
|
return THROW(stat);
|
|
|
|
}
|
2022-01-25 06:22:24 +08:00
|
|
|
|
|
|
|
void
|
|
|
|
NCZ_dumpxcacheentry(NCZChunkCache* cache, NCZCacheEntry* e, NCbytes* buf)
|
|
|
|
{
|
|
|
|
char s[8192];
|
|
|
|
char idx[64];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ncbytescat(buf,"{");
|
|
|
|
snprintf(s,sizeof(s),"modified=%u isfiltered=%u indices=",
|
|
|
|
(unsigned)e->modified,
|
|
|
|
(unsigned)e->isfiltered
|
|
|
|
);
|
|
|
|
ncbytescat(buf,s);
|
|
|
|
for(i=0;i<cache->ndims;i++) {
|
|
|
|
snprintf(idx,sizeof(idx),"%s%llu",(i==0?"":"."),e->indices[i]);
|
|
|
|
ncbytescat(buf,idx);
|
|
|
|
}
|
|
|
|
snprintf(s,sizeof(s),"size=%llu data=%p",
|
|
|
|
e->size,
|
|
|
|
e->data
|
|
|
|
);
|
|
|
|
ncbytescat(buf,s);
|
|
|
|
ncbytescat(buf,"}");
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
NCZ_printxcache(NCZChunkCache* cache)
|
|
|
|
{
|
|
|
|
static char xs[20000];
|
|
|
|
NCbytes* buf = ncbytesnew();
|
|
|
|
char s[8192];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ncbytescat(buf,"NCZChunkCache:\n");
|
|
|
|
snprintf(s,sizeof(s),"\tvar=%s\n\tndims=%u\n\tchunksize=%u\n\tchunkcount=%u\n\tfillchunk=%p\n",
|
|
|
|
cache->var->hdr.name,
|
|
|
|
(unsigned)cache->ndims,
|
|
|
|
(unsigned)cache->chunksize,
|
|
|
|
(unsigned)cache->chunkcount,
|
|
|
|
cache->fillchunk
|
|
|
|
);
|
|
|
|
ncbytescat(buf,s);
|
|
|
|
|
|
|
|
snprintf(s,sizeof(s),"\tmaxentries=%u\n\tmaxsize=%u\n\tused=%u\n\tdimsep='%c'\n",
|
|
|
|
(unsigned)cache->maxentries,
|
|
|
|
(unsigned)cache->maxsize,
|
|
|
|
(unsigned)cache->used,
|
|
|
|
cache->dimension_separator
|
|
|
|
);
|
|
|
|
ncbytescat(buf,s);
|
|
|
|
|
|
|
|
snprintf(s,sizeof(s),"\tmru: (%u)\n",(unsigned)nclistlength(cache->mru));
|
|
|
|
ncbytescat(buf,s);
|
|
|
|
if(nclistlength(cache->mru)==0)
|
|
|
|
ncbytescat(buf,"\t\t<empty>\n");
|
|
|
|
for(i=0;i<nclistlength(cache->mru);i++) {
|
|
|
|
NCZCacheEntry* e = (NCZCacheEntry*)nclistget(cache->mru,i);
|
|
|
|
snprintf(s,sizeof(s),"\t\t[%d] ",i);
|
|
|
|
ncbytescat(buf,s);
|
|
|
|
if(e == NULL)
|
|
|
|
ncbytescat(buf,"<null>");
|
|
|
|
else
|
|
|
|
NCZ_dumpxcacheentry(cache, e, buf);
|
|
|
|
ncbytescat(buf,"\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
xs[0] = '\0';
|
|
|
|
strlcat(xs,ncbytescontents(buf),sizeof(xs));
|
|
|
|
ncbytesfree(buf);
|
|
|
|
fprintf(stderr,"%s\n",xs);
|
|
|
|
// return xs;
|
|
|
|
}
|