diff --git a/Doxyfile b/Doxyfile index 1ba6dd2db..0ed8234f7 100644 --- a/Doxyfile +++ b/Doxyfile @@ -582,7 +582,8 @@ libdispatch/nc4.c libdispatch/dcompound.c libdispatch/dv2i.c \ libdispatch/dvlen.c libdispatch/denum.c libdispatch/dopaque.c \ libdispatch/dtype.c libsrc4/nc4file.c \ man4/mainpage.doc man4/tutorial.doc COPYRIGHT man4/install.doc man4/dispatch.doc \ -man4/guide.doc man4/types.doc man4/notes.doc +man4/guide.doc man4/types.doc man4/notes.doc man4/cdl.doc \ +ncdump/ncdump.c # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is diff --git a/man4/cdl.doc b/man4/cdl.doc new file mode 100644 index 000000000..318b07c01 --- /dev/null +++ b/man4/cdl.doc @@ -0,0 +1,271 @@ +/** +\file +Documentation for Common Data Language + +\page CDL + +\section CDL Syntax + +Below is an example of CDL, describing a netCDF dataset with several +named dimensions (lat, lon, time), variables (z, t, p, rh, lat, lon, +time), variable attributes (units, _FillValue, valid_range), and some +data. + +\code + netcdf foo { // example netCDF specification in CDL + + dimensions: + lat = 10, lon = 5, time = unlimited; + + variables: + int lat(lat), lon(lon), time(time); + float z(time,lat,lon), t(time,lat,lon); + double p(time,lat,lon); + int rh(time,lat,lon); + + lat:units = "degrees_north"; + lon:units = "degrees_east"; + time:units = "seconds"; + z:units = "meters"; + z:valid_range = 0., 5000.; + p:_FillValue = -9999.; + rh:_FillValue = -1; + + data: + lat = 0, 10, 20, 30, 40, 50, 60, 70, 80, 90; + lon = -140, -118, -96, -84, -52; + } +\endcode + +All CDL statements are terminated by a semicolon. Spaces, tabs, and +newlines can be used freely for readability. Comments may follow the +double slash characters '//' on any line. + +A CDL description for a classic model file consists of three optional +parts: dimensions, variables, and data. The variable part may contain +variable declarations and attribute assignments. For the enhanced +model supported by netCDF-4, a CDL decription may also includes +groups, subgroups, and user-defined types. + +A dimension is used to define the shape of one or more of the +multidimensional variables described by the CDL description. A +dimension has a name and a length. At most one dimension in a classic +CDL description can have the unlimited length, which means a variable +using this dimension can grow to any length (like a record number in a +file). Any number of dimensions can be declared of unlimited length in +CDL for an enhanced model file. + +A variable represents a multidimensional array of values of the same +type. A variable has a name, a data type, and a shape described by its +list of dimensions. Each variable may also have associated attributes +(see below) as well as data values. The name, data type, and shape of +a variable are specified by its declaration in the variable section of +a CDL description. A variable may have the same name as a dimension; +by convention such a variable contains coordinates of the dimension it +names. + +An attribute contains information about a variable or about the whole +netCDF dataset or containing group. Attributes may be used to specify +such properties as units, special values, maximum and minimum valid +values, and packing parameters. Attribute information is represented +by single values or one-dimensional arrays of values. For example, +“units” might be an attribute represented by a string such as +“celsius”. An attribute has an associated variable, a name, a data +type, a length, and a value. In contrast to variables that are +intended for data, attributes are intended for ancillary data or +metadata (data about data). + +In CDL, an attribute is designated by a variable and attribute name, +separated by a colon (':'). It is possible to assign global attributes +to the netCDF dataset as a whole by omitting the variable name and +beginning the attribute name with a colon (':'). The data type of an +attribute in CDL, if not explicitly specified, is derived from the +type of the value assigned to it. The length of an attribute is the +number of data values or the number of characters in the character +string assigned to it. Multiple values are assigned to non-character +attributes by separating the values with commas (','). All values +assigned to an attribute must be of the same type. In the netCDF-4 +enhanced model, attributes may be declared to be of user-defined type, +like variables. + +In CDL, just as for netCDF, the names of dimensions, variables and +attributes (and, in netCDF-4 files, groups, user-defined types, +compound member names, and enumeration symbols) consist of arbitrary +sequences of alphanumeric characters, underscore '_', period '.', plus +'+', hyphen '-', or at sign '@', but beginning with a letter or +underscore. However names commencing with underscore are reserved for +system use. Case is significant in netCDF names. A zero-length name is +not allowed. Some widely used conventions restrict names to only +alphanumeric characters or underscores. Names that have trailing space +characters are also not permitted. + +Beginning with versions 3.6.3 and 4.0, names may also include UTF-8 +encoded Unicode characters as well as other special characters, except +for the character '/', which may not appear in a name (because it is +reserved for path names of nested groups). In CDL, most special +characters are escaped with a backslash '\' character, but that +character is not actually part of the netCDF name. The special +characters that do not need to be escaped in CDL names are underscore +'_', period '.', plus '+', hyphen '-', or at sign '@'. For the formal +specification of CDL name syntax See Format. Note that by using +special characters in names, you may make your data not compliant with +conventions that have more stringent requirements on valid names for +netCDF components, for example the CF Conventions. + +The names for the primitive data types are reserved words in CDL, so +names of variables, dimensions, and attributes must not be primitive +type names. + +The optional data section of a CDL description is where netCDF +variables may be initialized. The syntax of an initialization is +simple: + +\code + variable = value_1, value_2, ...; +\endcode + +The comma-delimited list of constants may be separated by spaces, +tabs, and newlines. For multidimensional arrays, the last dimension +varies fastest. Thus, row-order rather than column order is used for +matrices. If fewer values are supplied than are needed to fill a +variable, it is extended with the fill value. The types of constants +need not match the type declared for a variable; coercions are done to +convert integers to floating point, for example. All meaningful type +conversions among primitive types are supported. + +A special notation for fill values is supported: the ‘_’ character +designates a fill value for variables. + +\section CDL Data Types + +The CDL primitive data types for the classic model are: +- char Characters. +- byte Eight-bit integers. +- short 16-bit signed integers. +- int 32-bit signed integers. +- long (Deprecated, synonymous with int) +- float IEEE single-precision floating point (32 bits). +- real (Synonymous with float). +- double IEEE double-precision floating point (64 bits). + +NetCDF-4 supports the additional primitive types: +- ubyte Unsigned eight-bit integers. +- ushort Unsigned 16-bit integers. +- uint Unsigned 32-bit integers. +- int64 64-bit singed integers. +- uint64 Unsigned 64-bit singed integers. +- string Variable-length string of characters + +Except for the added data-type byte, CDL supports the same primitive +data types as C. For backward compatibility, in declarations primitive +type names may be specified in either upper or lower case. + +The byte type differs from the char type in that it is intended for +numeric data, and the zero byte has no special significance, as it may +for character data. The short type holds values between -32768 and +32767. The ushort type holds values between 0 and 65536. The int type +can hold values between -2147483648 and 2147483647. The uint type +holds values between 0 and 4294967296. The int64 type can hold values +between -9223372036854775808 and 9223372036854775807. The uint64 type +can hold values between 0 and 18446744073709551616. + +The float type can hold values between about -3.4+38 and 3.4+38, with +external representation as 32-bit IEEE normalized single-precision +floating-point numbers. The double type can hold values between about +-1.7+308 and 1.7+308, with external representation as 64-bit IEEE +standard normalized double-precision, floating-point numbers. The +string type holds variable length strings. + +\section CDL Notation for Data Constants + +This section describes the CDL notation for constants. + +Attributes are initialized in the variables section of a CDL +description by providing a list of constants that determines the +attribute's length and type (if primitive and not explicitly +declared). CDL defines a syntax for constant values that permits +distinguishing among different netCDF primitive types. The syntax for +CDL constants is similar to C syntax, with type suffixes appended to +bytes, shorts, and floats to distinguish them from ints and doubles. + +A byte constant is represented by a single character or multiple +character escape sequence enclosed in single quotes. For example: + +\code + 'a' // ASCII a + '\0' // a zero byte + '\n' // ASCII newline character + '\33' // ASCII escape character (33 octal) + '\x2b' // ASCII plus (2b hex) + '\376' // 377 octal = -127 (or 254) decimal +\endcode + +Character constants are enclosed in double quotes. A character array +may be represented as a string enclosed in double quotes. Multiple +strings are concatenated into a single array of characters, permitting +long character arrays to appear on multiple lines. To support multiple +variable-length string values, a conventional delimiter such as ',' +may be used, but interpretation of any such convention for a string +delimiter must be implemented in software above the netCDF library +layer. The usual escape conventions for C strings are honored. For +example: + +\code + "a" // ASCII 'a' + "Two\nlines\n" // a 10-character string with two embedded newlines + "a bell:\007" // a string containing an ASCII bell + "ab","cde" // the same as "abcde" +\endcode + +The form of a short constant is an integer constant with an 's' or 'S' +appended. If a short constant begins with '0', it is interpreted as +octal. When it begins with '0x', it is interpreted as a hexadecimal +constant. For example: + +\code + 2s // a short 2 + 0123s // octal + 0x7ffs // hexadecimal +\endcode + +The form of an int constant is an ordinary integer constant. If an int +constant begins with '0', it is interpreted as octal. When it begins +with '0x', it is interpreted as a hexadecimal constant. Examples of +valid int constants include: + +\code + -2 + 0123 // octal + 0x7ff // hexadecimal + 1234567890L // deprecated, uses old long suffix +\endcode + +The float type is appropriate for representing data with about seven +significant digits of precision. The form of a float constant is the +same as a C floating-point constant with an 'f' or 'F' appended. A +decimal point is required in a CDL float to distinguish it from an +integer. For example, the following are all acceptable float +constants: + +\code + -2.0f + 3.14159265358979f // will be truncated to less precision + 1.f + .1f +\endcode + +The double type is appropriate for representing floating-point data +with about 16 significant digits of precision. The form of a double +constant is the same as a C floating-point constant. An optional 'd' +or 'D' may be appended. A decimal point is required in a CDL double to +distinguish it from an integer. For example, the following are all +acceptable double constants: + +\code + -2.0 + 3.141592653589793 + 1.0e-20 + 1.d +\endcode + +*/ \ No newline at end of file diff --git a/ncdump/ncdump.c b/ncdump/ncdump.c index 0f200a46b..1a1498cfd 100644 --- a/ncdump/ncdump.c +++ b/ncdump/ncdump.c @@ -1,8 +1,10 @@ -/********************************************************************* - * Copyright 2008, University Corporation for Atmospheric Research - * See netcdf/README file for copying and redistribution conditions. - * $Id: ncdump.c 400 2010-08-27 21:02:52Z russ $ - *********************************************************************/ +/** \file +Attribute functions + +These functions read and write attributes. + +Copyright 2008 University Corporation for Atmospheric +Research/Unidata. See \ref copyright file for more info. */ #include #include @@ -2002,6 +2004,186 @@ void adapt_url_for_cache(char **pathp) { return; } +/** +The ncdump tool generates the CDL text representation of a netCDF +dataset on standard output, optionally excluding some or all of the +variable data in the output. The output from ncdump is intended to be +acceptable as input to ncgen. Thus ncdump and ncgen can be used as +inverses to transform data representation between binary and text +representations. + +As of netCDF version 4.1, ncdump can also access DAP data sources if +DAP support is enabled in the underlying netCDF library. Instead of +specifying a file name as argument to ncdump, the user specifies a URL +to a DAP source. + +ncdump may also be used as a simple browser for netCDF datasets, to +display the dimension names and lengths; variable names, types, and +shapes; attribute names and values; and optionally, the values of data +for all variables or selected variables in a netCDF dataset. + +ncdump defines a default format used for each type of netCDF variable +data, but this can be overridden if a C_format attribute is defined +for a netCDF variable. In this case, ncdump will use the C_format +attribute to format values for that variable. For example, if +floating-point data for the netCDF variable Z is known to be accurate +to only three significant digits, it might be appropriate to use this +variable attribute: + +\code + Z:C_format = "%.3g" +\endcode + +Ncdump uses '_' to represent data values that are equal to the +_FillValue attribute for a variable, intended to represent data that +has not yet been written. If a variable has no _FillValue attribute, +the default fill value for the variable type is used unless the +variable is of byte type. + +UNIX syntax for invoking ncdump: + +\code + ncdump [ -c | -h] [-v var1,...] [-b lang] [-f lang] + [-l len] [ -p fdig[,ddig]] [ -s ] [ -n name] [input-file] +\endcode + +where: + +-c Show the values of coordinate variables (variables that are also + dimensions) as well as the declarations of all dimensions, variables, + and attribute values. Data values of non-coordinate variables are not + included in the output. This is often the most suitable option to use + for a brief look at the structure and contents of a netCDF dataset. + +-h Show only the header information in the output, that is, output + only the declarations for the netCDF dimensions, variables, and + attributes of the input file, but no data values for any + variables. The output is identical to using the '-c' option except + that the values of coordinate variables are not included. (At most + one of '-c' or '-h' options may be present.) + +-v var1,... The output will include data values for the specified + variables, in addition to the declarations of all dimensions, + variables, and attributes. One or more variables must be specified by + name in the comma-delimited list following this option. The list must + be a single argument to the command, hence cannot contain blanks or + other white space characters. The named variables must be valid + netCDF variables in the input-file. The default, without this option + and in the absence of the '-c' or '-h' options, is to include data + values for all variables in the output. + +-b lang A brief annotation in the form of a CDL comment (text + beginning with the characters '//') will be included in the data + section of the output for each 'row' of data, to help identify data + values for multidimensional variables. If lang begins with 'C' or + 'c', then C language conventions will be used (zero-based indices, + last dimension varying fastest). If lang begins with 'F' or 'f', then + FORTRAN language conventions will be used (one-based indices, first + dimension varying fastest). In either case, the data will be + presented in the same order; only the annotations will differ. This + option may be useful for browsing through large volumes of + multidimensional data. + +-f lang Full annotations in the form of trailing CDL comments (text + beginning with the characters '//') for every data value (except + individual characters in character arrays) will be included in the + data section. If lang begins with 'C' or 'c', then C language + conventions will be used (zero-based indices, last dimension varying + fastest). If lang begins with 'F' or 'f', then FORTRAN language + conventions will be used (one-based indices, first dimension varying + fastest). In either case, the data will be presented in the same + order; only the annotations will differ. This option may be useful + for piping data into other filters, since each data value appears on + a separate line, fully identified. (At most one of '-b' or '-f' + options may be present.) + +-l len Changes the default maximum line length (80) used in formatting +lists of non-character data values. + +-p float_digits[,double_digits] Specifies default precision (number of +significant digits) to use in displaying floating-point or double +precision data values for attributes and variables. If specified, this +value overrides the value of the C_format attribute, if any, for a +variable. Floating-point data will be displayed with float_digits +significant digits. If double_digits is also specified, +double-precision values will be displayed with that many significant +digits. In the absence of any '-p' specifications, floating-point and +double-precision data are displayed with 7 and 15 significant digits +respectively. CDL files can be made smaller if less precision is +required. If both floating-point and double precisions are specified, +the two values must appear separated by a comma (no blanks) as a +single argument to the command. + +-n name CDL requires a name for a netCDF dataset, for use by 'ncgen +-b' in generating a default netCDF dataset name. By default, ncdump +constructs this name from the last component of the file name of the +input netCDF dataset by stripping off any extension it has. Use the +'-n' option to specify a different name. Although the output file name +used by 'ncgen -b' can be specified, it may be wise to have ncdump +change the default name to avoid inadvertently overwriting a valuable +netCDF dataset when using ncdump, editing the resulting CDL file, and +using 'ncgen -b' to generate a new netCDF dataset from the edited CDL +file. + +-s Specifies that special virtual attributes should be output for the +file format variant and for variable properties such as compression, +chunking, and other properties specific to the format implementation +that are primarily related to performance rather than the logical +schema of the data. All the special virtual attributes begin with '_' +followed by an upper-case letter. Currently they include the global +attribute “_Format” and the variable attributes “_Fletcher32”, +“_ChunkSizes”, “_Endianness”, “_DeflateLevel”, “_Shuffle”, “_Storage”, +and “_NoFill”. The ncgen utility recognizes these attributes and +supports them appropriately. + +-t Controls display of time data, if stored in a variable that uses a +udunits compliant time representation such as “days since 1970-01-01” +or “seconds since 2009-03-15 12:01:17”. If this option is specified, +time values are displayed as human-readable date-time strings rather +than numerical values, interpreted in terms of a “calendar” variable +attribute, if specified. Calendar attribute values interpreted with +this option include the CF Conventions values “gregorian” or +“standard”, “proleptic_gregorian”, “noleap” or “365_day”, “all_leap” +or “366_day”, “360_day”, and “julian”. + +\section Examples + +Look at the structure of the data in the netCDF dataset foo.nc: + +\code +ncdump -c foo.nc +\endcode + +Produce an annotated CDL version of the structure and data in the +netCDF dataset foo.nc, using C-style indexing for the annotations: + +\code +ncdump -b c foo.nc > foo.cdl +\endcode + +Output data for only the variables uwind and vwind from the netCDF +dataset foo.nc, and show the floating-point data with only three +significant digits of precision: + +\code +ncdump -v uwind,vwind -p 3 foo.nc +\endcode + +Produce a fully-annotated (one data value per line) listing of the +data for the variable omega, using FORTRAN conventions for indices, +and changing the netCDF dataset name in the resulting CDL file to +omega: + +\code +ncdump -v omega -f fortran -n omega foo.nc > Z.cdl +\endcode + +Examine the translated DDS for the DAP source from the specified URL. + +\code +ncdump -h http://test.opendap.org:8080/dods/dts/test.01 +\endcode + */ int main(int argc, char *argv[]) {