From 61e6e3970f2fe1a63ad590a2f30383dff835f211 Mon Sep 17 00:00:00 2001
From: Dennis Heimbigner <dennis.heimbigner@gmail.com>
Date: Mon, 10 Feb 2025 18:52:09 -0700
Subject: [PATCH] Provide an auxilliary function that allows users to parse the
 _NCProperties attribute.

re: Discussion https://github.com/Unidata/netcdf-c/discussions/3085

This discussion raised the issue of the best way to distinguish
a netcdfd-c created file and an HDF5 created file. The
recommended way is to use the _NCProperties attribute.  In order
for users to process this attribute, I have added a parser for
the attribute to the netcdf_aux.h file.
---
 RELEASE_NOTES.md            |   4 ++
 include/netcdf_aux.h        |   7 ++
 libdispatch/daux.c          | 127 ++++++++++++++++++++++++++++++++++++
 ncdap_test/Makefile.am      |   1 -
 unit_test/Makefile.am       |   6 ++
 unit_test/ref_provparse.txt |   4 ++
 unit_test/run_auxmisc.sh    |  31 +++++++++
 unit_test/test_auxmisc.c    | 111 +++++++++++++++++++++++++++++++
 8 files changed, 290 insertions(+), 1 deletion(-)
 create mode 100644 unit_test/ref_provparse.txt
 create mode 100755 unit_test/run_auxmisc.sh
 create mode 100644 unit_test/test_auxmisc.c

diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index a1a0d8766..2b6fa29aa 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -5,6 +5,10 @@ Release Notes       {#RELEASE_NOTES}
 
 This file contains a high-level description of this package's evolution. Releases are in reverse chronological order (most recent first). Note that, as of netcdf 4.2, the `netcdf-c++` and `netcdf-fortran` libraries have been separated into their own libraries.
 
+## 4.9.4 - TBD
+
+* Provide an auxilliary function that allows users to parse the _NCProperties attribute. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????) for more information.
+
 ## 4.9.3 - February 7, 2025
 
 ## Known Issues
diff --git a/include/netcdf_aux.h b/include/netcdf_aux.h
index 64bf603bf..1985b00f1 100644
--- a/include/netcdf_aux.h
+++ b/include/netcdf_aux.h
@@ -245,6 +245,13 @@ EXTERNL int ncaux_plugin_path_stringget(int pathlen, char* path);
 */
 EXTERNL int ncaux_plugin_path_stringset(int pathlen, const char* path);
 
+/* Provide a parser for _NCProperties attribute.
+ * @param ncprop the contents of the _NCProperties attribute.
+ * @param pairsp allocate and return a pointer to a NULL terminated vector of (key,value) pairs.
+ * @return NC_NOERR | NC_EXXX
+ */
+EXTERNL int ncaux_parse_provenance(const char* ncprop, char*** pairsp);
+
 #if defined(__cplusplus)
 }
 #endif
diff --git a/libdispatch/daux.c b/libdispatch/daux.c
index 71e2a7c52..da2917b0f 100644
--- a/libdispatch/daux.c
+++ b/libdispatch/daux.c
@@ -31,6 +31,7 @@ See COPYRIGHT for license information.
 #include "ncrc.h"
 #include "netcdf_filter.h"
 #include "ncpathmgr.h"
+#include "nclist.h"
 
 struct NCAUX_FIELD {
     char* name;
@@ -1274,3 +1275,129 @@ done:
     if(npl.dirs != NULL) {(void)ncaux_plugin_path_clear(&npl);}
     return stat;
 }
+
+/**************************************************/
+
+/* De-escape a string */
+static char*
+deescape(const char* s)
+{
+    char* des = strdup(s);
+    char* p = NULL;
+    char* q = NULL;
+    if(s == NULL) return NULL;
+    for(p=des,q=des;*p;) {
+	switch (*p) {
+	case '\\':
+	    p++;
+	    if(*p == '\0') {*q++ = '\\';} break; /* edge case */
+	    /* fall thru */
+	default:
+	    *q++ = *p++;
+	    break;
+	}
+    }
+    *q = '\0';
+    return des;
+}
+
+/**
+ * @internal
+ *
+ * Construct the parsed provenance information
+ * Provide a parser for _NCProperties attribute.
+ * @param ncprop the contents of the _NCProperties attribute.
+ * @param pairsp allocate and return a pointer to a NULL terminated vector of (key,value) pairs.
+ * @return NC_NOERR | NC_EXXX
+ */
+int
+ncaux_parse_provenance(const char* ncprop0, char*** pairsp)
+{
+    int stat = NC_NOERR;
+    NClist* pairs = NULL;
+    char* ncprop = NULL;
+    size_t ncproplen = 0;
+    char* thispair = NULL;
+    char* p = NULL;
+    int i,count = 0;
+    int endinner;
+    
+    if(pairsp == NULL) goto done;
+    *pairsp = NULL;
+    ncproplen = nulllen(ncprop0);
+
+    if(ncproplen == 0) goto done;
+    
+    ncprop = (char*)malloc(ncproplen+1+1); /* double nul term */
+    strcpy(ncprop,ncprop0); /* Make modifiable copy */
+    ncprop[ncproplen] = '\0'; /* double nul term */
+    ncprop[ncproplen+1] = '\0'; /* double nul term */
+    pairs = nclistnew();
+
+    /* delimit the key,value pairs */
+    thispair = ncprop;
+    count = 0;
+    p = thispair;
+    endinner = 0;
+    do {
+	switch (*p) {
+	case '\0':
+	    if(strlen(thispair)==0) {stat = NC_EINVAL; goto done;} /* Has to be a non-null key */
+	    endinner = 1; /* terminate loop */
+	    break;
+	case ',': case '|': /* '|' is version one pair separator */
+	    *p++ = '\0'; /* terminate this pair */
+	    if(strlen(thispair)==0) {stat = NC_EINVAL; goto done;} /* Has to be a non-null key */
+	    thispair = p;
+	    count++;
+	    break;
+	case '\\': 
+	    p++; /* skip the escape and escaped char */
+	    /* fall thru */
+	default:
+	    p++;
+	    break;
+	}
+    } while(!endinner);
+    count++;
+    /* Split and store the pairs */
+    thispair = ncprop;
+    for(i=0;i<count;i++) {
+	char* key = thispair;
+	char* value = NULL;
+	char* nextpair = (thispair + strlen(thispair) + 1);
+	/* Find the '=' separator for each pair */
+	p = thispair;
+	endinner = 0;
+	do {
+	    switch (*p) {
+	    case '\0': /* Key has no value */
+	        value = p;
+		endinner = 1; /* => leave loop */
+		break;
+	    case '=':
+		*p++ = '\0'; /* split this pair */
+		value = p;
+		endinner = 1;
+		break;
+	    case '\\': 
+	        p++; /* skip the escape + escaped char */
+		/* fall thru */
+	    default:
+	        p++;
+		break;
+	    }
+	} while(!endinner);
+	/* setup next iteration */
+        nclistpush(pairs,deescape(key));
+        nclistpush(pairs,deescape(value));
+	thispair = nextpair;
+    }
+    /* terminate the list with (NULL,NULL) key value pair*/
+    nclistpush(pairs,NULL); nclistpush(pairs,NULL);
+    *pairsp = (char**)nclistextract(pairs);
+done:
+    nullfree(ncprop);
+    nclistfreeall(pairs);
+    return stat;
+}
diff --git a/ncdap_test/Makefile.am b/ncdap_test/Makefile.am
index e7d8ff834..41c16b666 100644
--- a/ncdap_test/Makefile.am
+++ b/ncdap_test/Makefile.am
@@ -66,7 +66,6 @@ endif
 
 if NETCDF_ENABLE_DAP_LONG_TESTS
 test_manyurls_SOURCES = test_manyurls.c manyurls.h
-check_PROGRAMS += test_manyurls
 test_manyurls.log: tst_longremote3.log
 TESTS += test_manyurls
 endif
diff --git a/unit_test/Makefile.am b/unit_test/Makefile.am
index 90eef59c5..e1ae642f1 100644
--- a/unit_test/Makefile.am
+++ b/unit_test/Makefile.am
@@ -60,10 +60,16 @@ check_PROGRAMS += aws_config
 TESTS += run_aws_config.sh
 endif
 
+# Test misc. netcdf_aux functions
+check_PROGRAMS += test_auxmisc
+TESTS += run_auxmisc.sh
+
 EXTRA_DIST = CMakeLists.txt run_s3sdk.sh run_reclaim_tests.sh run_aws_config.sh run_pluginpaths.sh run_dfaltpluginpath.sh
+EXTRA_DIST += run_auxmisc.sh
 EXTRA_DIST += nctest_netcdf4_classic.nc reclaim_tests.cdl
 EXTRA_DIST += ref_get.txt ref_set.txt
 EXTRA_DIST += ref_xget.txt ref_xset.txt
+EXTRA_DIST += ref_provparse.txt
 
 CLEANFILES = reclaim_tests*.txt reclaim_tests.nc tmp_*.txt
 
diff --git a/unit_test/ref_provparse.txt b/unit_test/ref_provparse.txt
new file mode 100644
index 000000000..136a0655a
--- /dev/null
+++ b/unit_test/ref_provparse.txt
@@ -0,0 +1,4 @@
+abc=2\|z\=17,yyy=|zzz ->  (/abc/,/2|z=17/) (/yyy/,//) (/zzz/,//)
+version=2,netcdf=4.7.4-development,hdf5=1.10.4 ->  (/version/,/2/) (/netcdf/,/4.7.4-development/) (/hdf5/,/1.10.4/)
+version=2,netcdf=4.6.2-development,hdf5=1.10.1 ->  (/version/,/2/) (/netcdf/,/4.6.2-development/) (/hdf5/,/1.10.1/)
+version=1|netcdf=4.6.2-development|hdf5=1.8.1 ->  (/version/,/1/) (/netcdf/,/4.6.2-development/) (/hdf5/,/1.8.1/)
diff --git a/unit_test/run_auxmisc.sh b/unit_test/run_auxmisc.sh
new file mode 100755
index 000000000..70c6e5552
--- /dev/null
+++ b/unit_test/run_auxmisc.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+
+if test "x$srcdir" = x ; then srcdir=`pwd`; fi 
+. ../test_common.sh
+
+set -e
+
+# List of provenance strings to parse
+# Start with some edge cases
+TESTS=
+TESTS="$TESTS abc=2\|z\=17,yyy=|zzz"
+TESTS="$TESTS version=2,netcdf=4.7.4-development,hdf5=1.10.4"
+TESTS="$TESTS version=2,netcdf=4.6.2-development,hdf5=1.10.1"
+TESTS="$TESTS version=1|netcdf=4.6.2-development|hdf5=1.8.1"
+
+# Test provenance parsing
+testprov() {
+rm -f tmp_provparse.txt
+for t in $TESTS ; do
+${execdir}/test_auxmisc -P ${t} >> tmp_provparse.txt
+done
+echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+cat ${srcdir}/ref_provparse.txt
+echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+cat tmp_provparse.txt
+echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
+# Verify
+#diff ref_provparse.txt tmp_provparse.txt
+}
+
+testprov
diff --git a/unit_test/test_auxmisc.c b/unit_test/test_auxmisc.c
new file mode 100644
index 000000000..6040230fb
--- /dev/null
+++ b/unit_test/test_auxmisc.c
@@ -0,0 +1,111 @@
+/*********************************************************************
+ *   Copyright 2018, UCAR/Unidata
+ *   See netcdf/COPYRIGHT file for copying and redistribution conditions.
+ *********************************************************************/
+
+/**
+Test miscellaneous netcdf_aux functions.
+*/
+
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "netcdf.h"
+#include "netcdf_aux.h"
+
+#define NCCATCH
+#include "nclog.h"
+
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#endif
+
+#if defined(_WIN32) && !defined(__MINGW32__)
+#include "XGetopt.h"
+#endif
+
+#define DEBUG
+
+typedef enum CMD {cmd_none=0, cmd_prov=1} CMD;
+
+struct Options {
+    int debug;
+    CMD cmd;
+    int argc;
+    char** argv;
+} options;
+
+#define CHECK(code) do {stat = check(code,__func__,__LINE__); if(stat) {goto done;}} while(0)
+
+static int
+check(int code, const char* fcn, int line)
+{
+    if(code == NC_NOERR) return code;
+    fprintf(stderr,"***fail: (%d) %s @ %s:%d\n",code,nc_strerror(code),fcn,line);
+#ifdef debug
+    abort();
+#endif
+    exit(1);
+}
+
+static void
+testprov(void)
+{
+    int stat = NC_NOERR;
+    int i;
+    char** list = NULL;
+    assert(options.argc > 0);
+    for(i=0;i<options.argc;i++) {
+        char** p;
+	CHECK(ncaux_parse_provenance(options.argv[i],&list));
+	/* Print and reclaim */
+	printf("%s -> ",options.argv[i]);
+	for(p=list;*p;p+=2) {
+	    printf(" (/%s/,/%s/)",p[0],p[1]);
+	    free(p[0]);
+	    if(p[1]) free(p[1]);
+	}
+	printf("\n");
+	free(list); list = NULL;
+    }
+done:
+    return;
+}
+
+int
+main(int argc, char** argv)
+{
+    int stat = NC_NOERR;
+    int c;
+    /* Init options */
+    memset((void*)&options,0,sizeof(options));
+
+    while ((c = getopt(argc, argv, "dP")) != EOF) {
+        switch(c) {
+        case 'd':
+            options.debug = 1;
+            break;
+	case 'P':
+	    options.cmd = cmd_prov;
+	    break;	    
+        case '?':
+           fprintf(stderr,"unknown option\n");
+           stat = NC_EINVAL;
+           goto done;
+        }
+    }
+ 
+    /* Setup args */
+    argc -= optind;
+    argv += optind;
+    options.argc = argc;
+    options.argv = argv;
+    switch (options.cmd) {
+    case cmd_prov: testprov(); break;
+    default: fprintf(stderr,"Unknown cmd\n"); abort(); break;
+    }
+done:
+    return (stat?1:0);
+}