netcdf-c/libnczarr/zxcache.c
Dennis Heimbigner 231ae96c4b Add support for Zarr string type to NCZarr
* re: https://github.com/Unidata/netcdf-c/pull/2278
* re: https://github.com/Unidata/netcdf-c/issues/2485
* re: https://github.com/Unidata/netcdf-c/issues/2474

This PR subsumes PR https://github.com/Unidata/netcdf-c/pull/2278.
Actually is a bit an omnibus covering several issues.

## PR https://github.com/Unidata/netcdf-c/pull/2278
Add support for the Zarr string type.
Zarr strings are restricted currently to be of fixed size.
The primary issue to be addressed is to provide a way for user to
specify the size of the fixed length strings. This is handled by providing
the following new attributes special:
1. **_nczarr_default_maxstrlen** —
This is an attribute of the root group. It specifies the default
maximum string length for string types. If not specified, then
it has the value of 64 characters.
2. **_nczarr_maxstrlen** —
This is a per-variable attribute. It specifies the maximum
string length for the string type associated with the variable.
If not specified, then it is assigned the value of
**_nczarr_default_maxstrlen**.

This PR also requires some hacking to handle the existing netcdf-c NC_CHAR
type, which does not exist in zarr. The goal was to choose numpy types for
both the netcdf-c NC_STRING type and the netcdf-c NC_CHAR type such that
if a pure zarr implementation read them, it would still work and an
NC_CHAR type would be handled by zarr as a string of length 1.

For writing variables and NCZarr attributes, the type mapping is as follows:
* "|S1" for NC_CHAR.
* ">S1" for NC_STRING && MAXSTRLEN==1
* ">Sn" for NC_STRING && MAXSTRLEN==n

Note that it is a bit of a hack to use endianness, but it should be ok since for
string/char, the endianness has no meaning.

For reading attributes with pure zarr (i.e. with no nczarr
atribute types defined), they will always be interpreted as of
type NC_CHAR.

## Issue: https://github.com/Unidata/netcdf-c/issues/2474
This PR partly fixes this issue because it provided more
comprehensive support for Zarr attributes that are JSON valued expressions.
This PR still does not address the problem in that issue where the
_ARRAY_DIMENSION attribute is incorrectly set. Than can only be
fixed by the creator of the datasets.

## Issue: https://github.com/Unidata/netcdf-c/issues/2485
This PR also fixes the scalar failure shown in this issue.
It generally cleans up scalar handling.
It also adds a note to the documentation describing that
NCZarr supports scalars while Zarr does not and also how
scalar interoperability is achieved.

## Misc. Other Changes
1. Convert the nczarr special attributes and keys to be all lower case. So "_NCZARR_ATTR" now used "_nczarr_attr. Support back compatibility for the upper case names.
2. Cleanup my too-clever-by-half handling of scalars in libnczarr.
2022-08-27 20:21:13 -06:00

893 lines
25 KiB
C

/* Copyright 2018, University Corporation for Atmospheric
* Research. See COPYRIGHT file for copying and redistribution
* conditions. */
/**
* @file @internal The functions which control NCZ
* caching. These caching controls allow the user to change the cache
* sizes of ZARR before opening files.
*
* @author Dennis Heimbigner, Ed Hartnett
*/
#include "zincludes.h"
#include "zcache.h"
#include "ncxcache.h"
#include "zfilter.h"
#undef DEBUG
#undef FLUSH
#define LEAFLEN 32
/* Forward */
static int get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry);
static int put_chunk(NCZChunkCache* cache, NCZCacheEntry*);
static int makeroom(NCZChunkCache* cache);
static int flushcache(NCZChunkCache* cache);
static int constraincache(NCZChunkCache* cache);
/**************************************************/
/* Dispatch table per-var cache functions */
/**
* @internal Set chunk cache size for a variable. This is the internal
* function called by nc_set_var_chunk_cache().
*
* @param ncid File ID.
* @param varid Variable ID.
* @param size Size in bytes to set cache.
* @param nelems # of entries in cache
* @param preemption Controls cache swapping.
*
* @returns ::NC_NOERR No error.
* @returns ::NC_EBADID Bad ncid.
* @returns ::NC_ENOTVAR Invalid variable ID.
* @returns ::NC_ESTRICTNC3 Attempting netcdf-4 operation on strict
* nc3 netcdf-4 file.
* @returns ::NC_EINVAL Invalid input.
* @returns ::NC_EHDFERR HDF5 error.
* @author Ed Hartnett
*/
int
NCZ_set_var_chunk_cache(int ncid, int varid, size_t cachesize, size_t nelems, float preemption)
{
NC_GRP_INFO_T *grp;
NC_FILE_INFO_T *h5;
NC_VAR_INFO_T *var;
NCZ_VAR_INFO_T *zvar;
int retval = NC_NOERR;
/* Check input for validity. */
if (preemption < 0 || preemption > 1)
{retval = NC_EINVAL; goto done;}
/* Find info for this file and group, and set pointer to each. */
if ((retval = nc4_find_nc_grp_h5(ncid, NULL, &grp, &h5)))
goto done;
assert(grp && h5);
/* Find the var. */
if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, varid)))
{retval = NC_ENOTVAR; goto done;}
assert(var && var->hdr.id == varid);
zvar = (NCZ_VAR_INFO_T*)var->format_var_info;
assert(zvar != NULL && zvar->cache != NULL);
/* Set the values. */
var->chunkcache.size = cachesize;
var->chunkcache.nelems = nelems;
var->chunkcache.preemption = preemption;
/* Fix up cache */
if((retval = NCZ_adjust_var_cache(var))) goto done;
done:
return retval;
}
/**
* @internal Adjust the chunk cache of a var for better
* performance.
*
* @note For contiguous and compact storage vars, or when parallel I/O
* is in use, this function will do nothing and return ::NC_NOERR;
*
* @param grp Pointer to group info struct.
* @param var Pointer to var info struct.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
int
NCZ_adjust_var_cache(NC_VAR_INFO_T *var)
{
int stat = NC_NOERR;
NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info;
NCZChunkCache* zcache = NULL;
zcache = zvar->cache;
if(zcache->valid) goto done;
#ifdef DEBUG
fprintf(stderr,"xxx: adjusting cache for: %s\n",var->hdr.name);
#endif
/* completely empty the cache */
flushcache(zcache);
/* Reclaim any existing fill_chunk */
if((stat = NCZ_reclaim_fill_chunk(zcache))) goto done;
/* Reset the parameters */
zvar->cache->maxsize = var->chunkcache.size;
zvar->cache->maxentries = var->chunkcache.nelems;
#ifdef DEBUG
fprintf(stderr,"%s.cache.adjust: size=%ld nelems=%ld\n",
var->hdr.name,(unsigned long)zvar->cache->maxsize,(unsigned long)zvar->cache->maxentries);
#endif
/* One more thing, adjust the chunksize and count*/
zcache->chunksize = zvar->chunksize;
zcache->chunkcount = 1;
if(var->ndims > 0) {
int i;
for(i=0;i<var->ndims;i++) {
zcache->chunkcount *= var->chunksizes[i];
}
}
zcache->valid = 1;
done:
return stat;
}
/**************************************************/
/**
* Create a chunk cache object
*
* @param var containing var
* @param entrysize Size in bytes of an entry
* @param cachep return cache pointer
*
* @return ::NC_NOERR No error.
* @return ::NC_EINVAL Bad preemption.
* @author Dennis Heimbigner, Ed Hartnett
*/
int
NCZ_create_chunk_cache(NC_VAR_INFO_T* var, size64_t chunksize, char dimsep, NCZChunkCache** cachep)
{
int stat = NC_NOERR;
NCZChunkCache* cache = NULL;
void* fill = NULL;
NCZ_VAR_INFO_T* zvar = NULL;
if(chunksize == 0) return NC_EINVAL;
zvar = (NCZ_VAR_INFO_T*)var->format_var_info;
if((cache = calloc(1,sizeof(NCZChunkCache))) == NULL)
{stat = NC_ENOMEM; goto done;}
cache->var = var;
cache->ndims = var->ndims + zvar->scalar;
cache->fillchunk = NULL;
cache->chunksize = chunksize;
cache->dimension_separator = dimsep;
zvar->cache = cache;
cache->chunkcount = 1;
if(var->ndims > 0) {
int i;
for(i=0;i<var->ndims;i++) {
cache->chunkcount *= var->chunksizes[i];
}
}
#ifdef FLUSH
cache->maxentries = 1;
#endif
#ifdef DEBUG
fprintf(stderr,"%s.cache: nelems=%ld size=%ld\n",
var->hdr.name,(unsigned long)cache->maxentries,(unsigned long)cache->maxsize);
#endif
if((stat = ncxcachenew(LEAFLEN,&cache->xcache))) goto done;
if((cache->mru = nclistnew()) == NULL)
{stat = NC_ENOMEM; goto done;}
nclistsetalloc(cache->mru,cache->maxentries);
if(cachep) {*cachep = cache; cache = NULL;}
done:
nullfree(fill);
NCZ_free_chunk_cache(cache);
return THROW(stat);
}
static void
free_cache_entry(NCZChunkCache* cache, NCZCacheEntry* entry)
{
if(entry) {
int tid = cache->var->type_info->hdr.id;
if(tid == NC_STRING && !entry->isfixedstring) {
int ncid = cache->var->container->nc4_info->controller->ext_ncid;
nc_reclaim_data(ncid,tid,entry->data,cache->chunkcount);
}
nullfree(entry->data);
nullfree(entry->key.varkey);
nullfree(entry->key.chunkkey);
nullfree(entry);
}
}
void
NCZ_free_chunk_cache(NCZChunkCache* cache)
{
if(cache == NULL) return;
ZTRACE(4,"cache.var=%s",cache->var->hdr.name);
/* Iterate over the entries */
while(nclistlength(cache->mru) > 0) {
void* ptr;
NCZCacheEntry* entry = nclistremove(cache->mru,0);
(void)ncxcacheremove(cache->xcache,entry->hashkey,&ptr);
assert(ptr == entry);
free_cache_entry(cache,entry);
}
#ifdef DEBUG
fprintf(stderr,"|cache.free|=%ld\n",nclistlength(cache->mru));
#endif
ncxcachefree(cache->xcache);
nclistfree(cache->mru);
cache->mru = NULL;
(void)NCZ_reclaim_fill_chunk(cache);
nullfree(cache);
(void)ZUNTRACE(NC_NOERR);
}
size64_t
NCZ_cache_entrysize(NCZChunkCache* cache)
{
assert(cache);
return cache->chunksize;
}
/* Return number of active entries in cache */
size64_t
NCZ_cache_size(NCZChunkCache* cache)
{
assert(cache);
return nclistlength(cache->mru);
}
int
NCZ_read_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void** datap)
{
int stat = NC_NOERR;
int rank = cache->ndims;
NCZCacheEntry* entry = NULL;
ncexhashkey_t hkey = 0;
int created = 0;
/* the hash key */
hkey = ncxcachekey(indices,sizeof(size64_t)*cache->ndims);
/* See if already in cache */
stat = ncxcachelookup(cache->xcache,hkey,(void**)&entry);
switch(stat) {
case NC_NOERR:
/* Move to front of the lru */
(void)ncxcachetouch(cache->xcache,hkey);
break;
case NC_ENOOBJECT:
entry = NULL; /* not found; */
break;
default: goto done;
}
if(entry == NULL) { /*!found*/
/* Create a new entry */
if((entry = calloc(1,sizeof(NCZCacheEntry)))==NULL)
{stat = NC_ENOMEM; goto done;}
memcpy(entry->indices,indices,rank*sizeof(size64_t));
/* Create the key for this cache */
if((stat = NCZ_buildchunkpath(cache,indices,&entry->key))) goto done;
entry->hashkey = hkey;
assert(entry->data == NULL && entry->size == 0);
/* Try to read the object from "disk"; might change size; will create if non-existent */
if((stat=get_chunk(cache,entry))) goto done;
assert(entry->data != NULL);
/* Ensure cache constraints not violated; but do it before entry is added */
if((stat=makeroom(cache))) goto done;
nclistpush(cache->mru,entry);
if((stat = ncxcacheinsert(cache->xcache,entry->hashkey,entry))) goto done;
}
#ifdef DEBUG
fprintf(stderr,"|cache.read.lru|=%ld\n",nclistlength(cache->mru));
#endif
if(datap) *datap = entry->data;
entry = NULL;
done:
if(created && stat == NC_NOERR) stat = NC_EEMPTY; /* tell upper layers */
if(entry) free_cache_entry(cache,entry);
return THROW(stat);
}
#if 0
int
NCZ_write_cache_chunk(NCZChunkCache* cache, const size64_t* indices, void* content)
{
int stat = NC_NOERR;
int rank = cache->ndims;
NCZCacheEntry* entry = NULL;
ncexhashkey_t hkey;
/* create the hash key */
hkey = ncxcachekey(indices,sizeof(size64_t)*cache->ndims);
if(entry == NULL) { /*!found*/
/* Create a new entry */
if((entry = calloc(1,sizeof(NCZCacheEntry)))==NULL)
{stat = NC_ENOMEM; goto done;}
memcpy(entry->indices,indices,rank*sizeof(size64_t));
if((stat = NCZ_buildchunkpath(cache,indices,&entry->key))) goto done;
entry->hashkey = hkey;
/* Create the local copy space */
entry->size = cache->chunksize;
if((entry->data = calloc(1,cache->chunksize)) == NULL)
{stat = NC_ENOMEM; goto done;}
memcpy(entry->data,content,cache->chunksize);
}
entry->modified = 1;
nclistpush(cache->mru,entry); /* MRU order */
#ifdef DEBUG
fprintf(stderr,"|cache.write|=%ld\n",nclistlength(cache->mru));
#endif
entry = NULL;
/* Ensure cache constraints not violated */
if((stat=makeroom(cache))) goto done;
done:
if(entry) free_cache_entry(cache,entry);
return THROW(stat);
}
#endif
/* Constrain cache, but allow at least one entry */
static int
makeroom(NCZChunkCache* cache)
{
int stat = NC_NOERR;
/* Sanity check; make sure at least one entry is always allowed */
if(nclistlength(cache->mru) == 1)
goto done;
stat = constraincache(cache);
done:
return stat;
}
/* Completely flush cache */
static int
flushcache(NCZChunkCache* cache)
{
cache->maxentries = 0;
return constraincache(cache);
}
/* Remove entries to ensure cache is not
violating any of its constraints.
On entry, constraints might be violated.
Make sure that the entryinuse (NULL => no constraint) is not reclaimed.
*/
static int
constraincache(NCZChunkCache* cache)
{
int stat = NC_NOERR;
/* If the cache is empty then do nothing */
if(cache->used == 0) goto done;
/* Flush from LRU end if we are at capacity */
while(nclistlength(cache->mru) > cache->maxentries || cache->used > cache->maxsize) {
int i;
void* ptr;
NCZCacheEntry* e = ncxcachelast(cache->xcache); /* last entry is the least recently used */
if((stat = ncxcacheremove(cache->xcache,e->hashkey,&ptr))) goto done;
assert(e == ptr);
for(i=0;i<nclistlength(cache->mru);i++) {
e = nclistget(cache->mru,i);
if(ptr == e) break;
}
assert(e != NULL);
assert(i >= 0 && i < nclistlength(cache->mru));
nclistremove(cache->mru,i);
assert(cache->used >= e->size);
/* Note that |old chunk data| may not be same as |new chunk data| because of filters */
cache->used -= e->size; /* old size */
if(e->modified) /* flush to file */
stat=put_chunk(cache,e);
/* reclaim */
nullfree(e->data); nullfree(e->key.varkey); nullfree(e->key.chunkkey); nullfree(e);
}
#ifdef DEBUG
fprintf(stderr,"|cache.makeroom|=%ld\n",nclistlength(cache->mru));
#endif
done:
return stat;
}
int
NCZ_flush_chunk_cache(NCZChunkCache* cache)
{
int stat = NC_NOERR;
size_t i;
ZTRACE(4,"cache.var=%s |cache|=%d",cache->var->hdr.name,(int)nclistlength(cache->mru));
if(NCZ_cache_size(cache) == 0) goto done;
/* Iterate over the entries in hashmap */
for(i=0;i<nclistlength(cache->mru);i++) {
NCZCacheEntry* entry = nclistget(cache->mru,i);
if(entry->modified) {
/* Make cache used be consistent across filter application */
cache->used -= entry->size;
/* Write out this chunk in toto*/
if((stat=put_chunk(cache,entry)))
goto done;
cache->used += entry->size;
}
entry->modified = 0;
}
done:
return ZUNTRACE(stat);
}
/* Ensure existence of some kind of fill chunk */
int
NCZ_ensure_fill_chunk(NCZChunkCache* cache)
{
int i, stat = NC_NOERR;
NC_VAR_INFO_T* var = cache->var;
nc_type typeid = var->type_info->hdr.id;
size_t typesize = var->type_info->size;
if(cache->fillchunk) goto done;
if((cache->fillchunk = malloc(cache->chunksize))==NULL)
{stat = NC_ENOMEM; goto done;}
if(var->no_fill) {
/* use zeros */
memset(cache->fillchunk,0,cache->chunksize);
goto done;
}
if((stat = NCZ_ensure_fill_value(var))) goto done;
if(typeid == NC_STRING) {
char* src = *((char**)(var->fill_value));
char** dst = (char**)(cache->fillchunk);
for(i=0;i<cache->chunkcount;i++) dst[i] = strdup(src);
} else
switch (typesize) {
case 1: {
unsigned char c = *((unsigned char*)var->fill_value);
memset(cache->fillchunk,c,cache->chunksize);
} break;
case 2: {
unsigned short fv = *((unsigned short*)var->fill_value);
unsigned short* p2 = (unsigned short*)cache->fillchunk;
for(i=0;i<cache->chunksize;i+=typesize) *p2++ = fv;
} break;
case 4: {
unsigned int fv = *((unsigned int*)var->fill_value);
unsigned int* p4 = (unsigned int*)cache->fillchunk;
for(i=0;i<cache->chunksize;i+=typesize) *p4++ = fv;
} break;
case 8: {
unsigned long long fv = *((unsigned long long*)var->fill_value);
unsigned long long* p8 = (unsigned long long*)cache->fillchunk;
for(i=0;i<cache->chunksize;i+=typesize) *p8++ = fv;
} break;
default: {
unsigned char* p;
for(p=cache->fillchunk,i=0;i<cache->chunksize;i+=typesize,p+=typesize)
memcpy(p,var->fill_value,typesize);
} break;
}
done:
return NC_NOERR;
}
int
NCZ_reclaim_fill_chunk(NCZChunkCache* zcache)
{
int stat = NC_NOERR;
if(zcache && zcache->fillchunk) {
NC_VAR_INFO_T* var = zcache->var;
int ncid = var->container->nc4_info->controller->ext_ncid;
int tid = var->type_info->hdr.id;
size_t chunkcount = zcache->chunkcount;
stat = nc_reclaim_data_all(ncid,tid,zcache->fillchunk,chunkcount);
zcache->fillchunk = NULL;
}
return stat;
}
#if 0
int
NCZ_chunk_cache_modified(NCZChunkCache* cache, const size64_t* indices)
{
int stat = NC_NOERR;
char* key = NULL;
NCZCacheEntry* entry = NULL;
int rank = cache->ndims;
/* Create the key for this cache */
if((stat=NCZ_buildchunkkey(rank, indices, &key))) goto done;
/* See if already in cache */
if(NC_hashmapget(cache->mru, key, strlen(key), (uintptr_t*)entry)) { /* found */
entry->modified = 1;
}
done:
nullfree(key);
return THROW(stat);
}
#endif
/**************************************************/
/*
From Zarr V2 Specification:
"The compressed sequence of bytes for each chunk is stored under
a key formed from the index of the chunk within the grid of
chunks representing the array. To form a string key for a
chunk, the indices are converted to strings and concatenated
with the dimension_separator character ('.' or '/') separating
each index. For example, given an array with shape (10000,
10000) and chunk shape (1000, 1000) there will be 100 chunks
laid out in a 10 by 10 grid. The chunk with indices (0, 0)
provides data for rows 0-1000 and columns 0-1000 and is stored
under the key "0.0"; the chunk with indices (2, 4) provides data
for rows 2000-3000 and columns 4000-5000 and is stored under the
key "2.4"; etc."
*/
/**
* @param R Rank
* @param chunkindices The chunk indices
* @param dimsep the dimension separator
* @param keyp Return the chunk key string
*/
int
NCZ_buildchunkkey(size_t R, const size64_t* chunkindices, char dimsep, char** keyp)
{
int stat = NC_NOERR;
int r;
NCbytes* key = ncbytesnew();
if(keyp) *keyp = NULL;
assert(islegaldimsep(dimsep));
for(r=0;r<R;r++) {
char sindex[64];
if(r > 0) ncbytesappend(key,dimsep);
/* Print as decimal with no leading zeros */
snprintf(sindex,sizeof(sindex),"%lu",(unsigned long)chunkindices[r]);
ncbytescat(key,sindex);
}
ncbytesnull(key);
if(keyp) *keyp = ncbytesextract(key);
ncbytesfree(key);
return THROW(stat);
}
/**
* @internal Push data to chunk of a file.
* If chunk does not exist, create it
*
* @param file Pointer to file info struct.
* @param proj Chunk projection
* @param datalen size of data
* @param data Buffer containing the chunk data to write
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry)
{
int stat = NC_NOERR;
NC_FILE_INFO_T* file = NULL;
NCZ_FILE_INFO_T* zfile = NULL;
NCZMAP* map = NULL;
char* path = NULL;
nc_type tid = NC_NAT;
void* strchunk = NULL;
int ncid = 0;
ZTRACE(5,"cache.var=%s entry.key=%s",cache->var->hdr.name,entry->key);
LOG((3, "%s: var: %p", __func__, cache->var));
file = (cache->var->container)->nc4_info;
zfile = file->format_file_info;
map = zfile->map;
/* Collect some info */
ncid = file->controller->ext_ncid;
tid = cache->var->type_info->hdr.id;
if(tid == NC_STRING && !entry->isfixedstring) {
/* Convert from char* to char[strlen] format */
int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var);
assert(maxstrlen > 0);
if((strchunk = malloc(cache->chunkcount*maxstrlen))==NULL) {stat = NC_ENOMEM; goto done;}
/* copy char* to char[] format */
if((stat = NCZ_char2fixed((const char**)entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done;
/* Reclaim the old chunk */
if((stat = nc_reclaim_data_all(ncid,tid,entry->data,cache->chunkcount))) goto done;
entry->data = NULL;
entry->data = strchunk; strchunk = NULL;
entry->size = cache->chunkcount * maxstrlen;
entry->isfixedstring = 1;
}
#ifdef ENABLE_NCZARR_FILTERS
/* Make sure the entry is in filtered state */
if(!entry->isfiltered) {
NC_VAR_INFO_T* var = cache->var;
void* filtered = NULL; /* pointer to the filtered data */
size_t flen; /* length of filtered data */
/* Get the filter chain to apply */
NClist* filterchain = (NClist*)var->filters;
if(nclistlength(filterchain) > 0) {
/* Apply the filter chain to get the filtered data; will reclaim entry->data */
if((stat = NCZ_applyfilterchain(file,var,filterchain,entry->size,entry->data,&flen,&filtered,ENCODING))) goto done;
/* Fix up the cache entry */
/* Note that if filtered is different from entry->data, then entry->data will have been freed */
entry->data = filtered;
entry->size = flen;
entry->isfiltered = 1;
}
}
#endif
path = NCZ_chunkpath(entry->key);
stat = nczmap_write(map,path,0,entry->size,entry->data);
nullfree(path); path = NULL;
switch(stat) {
case NC_NOERR:
break;
case NC_EEMPTY:
default: goto done;
}
done:
nullfree(strchunk);
nullfree(path);
return ZUNTRACE(stat);
}
/**
* @internal Push data from memory to file.
*
* @param cache Pointer to parent cache
* @param key chunk key
* @param entry cache entry to read into
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry)
{
int stat = NC_NOERR;
NCZMAP* map = NULL;
NC_FILE_INFO_T* file = NULL;
NCZ_FILE_INFO_T* zfile = NULL;
NC_TYPE_INFO_T* xtype = NULL;
char** strchunk = NULL;
size64_t size;
int empty = 0;
char* path = NULL;
int tid;
ZTRACE(5,"cache.var=%s entry.key=%s sep=%d",cache->var->hdr.name,entry->key,cache->dimension_separator);
LOG((3, "%s: file: %p", __func__, file));
file = (cache->var->container)->nc4_info;
zfile = file->format_file_info;
map = zfile->map;
assert(map);
/* Collect some info */
xtype = cache->var->type_info;
tid = xtype->hdr.id;
/* get size of the "raw" data on "disk" */
path = NCZ_chunkpath(entry->key);
stat = nczmap_len(map,path,&size);
nullfree(path); path = NULL;
switch(stat) {
case NC_NOERR: entry->size = size; break;
case NC_EEMPTY: empty = 1; stat = NC_NOERR; break;
default: goto done;
}
if(!empty) {
/* Make sure we have a place to read it */
if((entry->data = (void*)calloc(1,entry->size)) == NULL)
{stat = NC_ENOMEM; goto done;}
/* Read the raw data */
path = NCZ_chunkpath(entry->key);
stat = nczmap_read(map,path,0,entry->size,(char*)entry->data);
nullfree(path); path = NULL;
switch (stat) {
case NC_NOERR: break;
case NC_EEMPTY: empty = 1; stat = NC_NOERR;break;
default: goto done;
}
entry->isfiltered = FILTERED(cache); /* Is the data being read filtered? */
if(tid == NC_STRING)
entry->isfixedstring = 1; /* fill cache is in char[maxstrlen] format */
}
if(empty) {
/* fake the chunk */
entry->modified = (file->no_write?0:1);
entry->size = cache->chunksize;
entry->data = NULL;
entry->isfixedstring = 0;
entry->isfiltered = 0;
/* apply fill value */
if(cache->fillchunk == NULL)
{if((stat = NCZ_ensure_fill_chunk(cache))) goto done;}
if((entry->data = calloc(1,entry->size))==NULL) {stat = NC_ENOMEM; goto done;}
if((stat = NCZ_copy_data(file,xtype,cache->fillchunk,cache->chunkcount,!ZCLEAR,entry->data))) goto done;
stat = NC_NOERR;
}
#ifdef ENABLE_NCZARR_FILTERS
/* Make sure the entry is in unfiltered state */
if(!empty && entry->isfiltered) {
NC_VAR_INFO_T* var = cache->var;
void* unfiltered = NULL; /* pointer to the unfiltered data */
void* filtered = NULL; /* pointer to the filtered data */
size_t unflen; /* length of unfiltered data */
assert(tid != NC_STRING || entry->isfixedstring);
/* Get the filter chain to apply */
NClist* filterchain = (NClist*)var->filters;
if(nclistlength(filterchain) == 0) {stat = NC_EFILTER; goto done;}
/* Apply the filter chain to get the unfiltered data */
filtered = entry->data;
entry->data = NULL;
if((stat = NCZ_applyfilterchain(file,var,filterchain,entry->size,filtered,&unflen,&unfiltered,!ENCODING))) goto done;
/* Fix up the cache entry */
entry->data = unfiltered;
entry->size = unflen;
entry->isfiltered = 0;
}
#endif
if(tid == NC_STRING && entry->isfixedstring) {
/* Convert from char[strlen] to char* format */
int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var);
assert(maxstrlen > 0);
/* copy char[] to char* format */
if((strchunk = (char**)malloc(sizeof(char*)*cache->chunkcount))==NULL)
{stat = NC_ENOMEM; goto done;}
if((stat = NCZ_fixed2char(entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done;
/* Reclaim the old chunk */
nullfree(entry->data);
entry->data = NULL;
entry->data = strchunk; strchunk = NULL;
entry->size = cache->chunkcount * sizeof(char*);
entry->isfixedstring = 0;
}
done:
nullfree(strchunk);
nullfree(path);
return ZUNTRACE(stat);
}
int
NCZ_buildchunkpath(NCZChunkCache* cache, const size64_t* chunkindices, struct ChunkKey* key)
{
int stat = NC_NOERR;
char* chunkname = NULL;
char* varkey = NULL;
assert(key != NULL);
/* Get the chunk object name */
if((stat = NCZ_buildchunkkey(cache->ndims, chunkindices, cache->dimension_separator, &chunkname))) goto done;
/* Get the var object key */
if((stat = NCZ_varkey(cache->var,&varkey))) goto done;
key->varkey = varkey; varkey = NULL;
key->chunkkey = chunkname; chunkname = NULL;
done:
nullfree(chunkname);
nullfree(varkey);
return THROW(stat);
}
void
NCZ_dumpxcacheentry(NCZChunkCache* cache, NCZCacheEntry* e, NCbytes* buf)
{
char s[8192];
char idx[64];
int i;
ncbytescat(buf,"{");
snprintf(s,sizeof(s),"modified=%u isfiltered=%u indices=",
(unsigned)e->modified,
(unsigned)e->isfiltered
);
ncbytescat(buf,s);
for(i=0;i<cache->ndims;i++) {
snprintf(idx,sizeof(idx),"%s%llu",(i==0?"":"."),e->indices[i]);
ncbytescat(buf,idx);
}
snprintf(s,sizeof(s),"size=%llu data=%p",
e->size,
e->data
);
ncbytescat(buf,s);
ncbytescat(buf,"}");
}
void
NCZ_printxcache(NCZChunkCache* cache)
{
static char xs[20000];
NCbytes* buf = ncbytesnew();
char s[8192];
int i;
ncbytescat(buf,"NCZChunkCache:\n");
snprintf(s,sizeof(s),"\tvar=%s\n\tndims=%u\n\tchunksize=%u\n\tchunkcount=%u\n\tfillchunk=%p\n",
cache->var->hdr.name,
(unsigned)cache->ndims,
(unsigned)cache->chunksize,
(unsigned)cache->chunkcount,
cache->fillchunk
);
ncbytescat(buf,s);
snprintf(s,sizeof(s),"\tmaxentries=%u\n\tmaxsize=%u\n\tused=%u\n\tdimsep='%c'\n",
(unsigned)cache->maxentries,
(unsigned)cache->maxsize,
(unsigned)cache->used,
cache->dimension_separator
);
ncbytescat(buf,s);
snprintf(s,sizeof(s),"\tmru: (%u)\n",(unsigned)nclistlength(cache->mru));
ncbytescat(buf,s);
if(nclistlength(cache->mru)==0)
ncbytescat(buf,"\t\t<empty>\n");
for(i=0;i<nclistlength(cache->mru);i++) {
NCZCacheEntry* e = (NCZCacheEntry*)nclistget(cache->mru,i);
snprintf(s,sizeof(s),"\t\t[%d] ",i);
ncbytescat(buf,s);
if(e == NULL)
ncbytescat(buf,"<null>");
else
NCZ_dumpxcacheentry(cache, e, buf);
ncbytescat(buf,"\n");
}
xs[0] = '\0';
strlcat(xs,ncbytescontents(buf),sizeof(xs));
ncbytesfree(buf);
fprintf(stderr,"%s\n",xs);
// return xs;
}