Introduction to HDF5 
HDF5 Reference Manual 
Other HDF5 documents and links 
And in this document, the HDF5 User's Guide:    
Files   Datasets   Datatypes   Dataspaces   Groups  
References   Attributes   Property Lists   Error Handling  
Filters   Palettes   Caching   Chunking   Mounting Files  
Performance   Debugging   Environment   DDL  
Ragged Arrays  

DDL in BNF for HDF5

1. Introduction

This document contains the data description language (DDL) for an HDF5 file. The description is in Backus-Naur Form.

2. Explanation of Symbols

This section contains a brief explanation of the symbols used in the DDL.
    ::=                      defined as
    <tname>                  a token with the name tname
    <a> | <b>                one of <a> or <b>
    <a>opt                    zero or one occurrence of <a>
    <a>*                     zero or more occurrence of <a>
    <a>+                     one or more occurrence of <a>
    TBD                      To Be Decided

3. The DDL

<file> ::= HDF5 <file_name> { <file_super_block>opt <root_group> }

<file_name> ::= <identifier>

<file_super_block> ::= BOOT_BLOCK { <super_block_content> }

<super_block_content> ::= TBD

<root_group> ::= GROUP "/" { <unamed_datatype>* <object_id>opt <group_attribute>* <group_member>* }  

<unamed_datatype> ::= DATATYPE <unamed_type_name> { <compound_type> }

<unamed_type_name> ::= the assigned name for unamed type is in the form of 
                      #oid1:oid2, where oid1 and oid2 are the object ids of the type

<compound_type> ::= <member_type_def>+ 

<member_type_def> ::= <scalar_type_def> | <array_type_def>

<scalar_type_def> ::= <atomic_type> <field_name> ;

<atomic_type> ::= <integer> | <float> | <time> | <string> | <bitfield> | <opaque> |
                 <reference> | <enum>

<integer> ::=  H5T_STD_I8BE | H5T_STD_I8LE | H5T_STD_I16BE | H5T_STD_I16LE | H5T_STD_I32BE |
              H5T_STD_I32LE | H5T_STD_I64BE | H5T_STD_I64LE |  H5T_STD_U8BE |
              H5T_STD_U8LE | H5T_STD_U16BE | H5T_STD_U16LE | H5T_STD_U32BE |
              H5T_STD_U32LE | H5T_STD_U64BE | H5T_STD_U64LE | H5T_NATIVE_CHAR |
              H5T_NATIVE_UCHAR | H5T_NATIVE_SHORT | H5T_NATIVE_USHORT | 
              H5T_NATIVE_INT | H5T_NATIVE_UINT | H5T_NATIVE_LONG | H5T_NATIVE_ULONG |
              H5T_NATIVE_LLONG | H5T_NATIVE_ULLONG

<float> ::= H5T_IEEE_F32BE | H5T_IEEE_F32LE | H5T_IEEE_F64BE |  H5T_IEEE_F64LE |
            H5T_NATIVE_FLOAT |  H5T_NATIVE_DOUBLE | H5T_NATIVE_LDOUBLE

<time> ::= TBD

<string> ::= { STRSIZE <strsize> ;
               STRPAD <strpad> ;
               CSET <cset> ;
               CTYPE <ctype> ; }  

<strsize> ::= an integer

<strpad> ::= H5T_STR_NULLTERM | H5T_STR_NULLPAD | H5T_STR_SPACEPAD

<cset> ::= H5T_CSET_ASCII

<ctype> ::= H5T_C_S1 | H5T_FORTRAN_S1

<bitfield> ::= TBD

<opaque> ::= TBD

<reference> ::= H5T_REFERENCE

<field_name> ::= <identifier>

<array_type_def> ::= <atomic_type> <field_name> <dim_sizes> ;

<dim_sizes> ::= [dimsize1][dimsize2]..., where dimsize1, dimsize2 are integers

<group_attribute> ::= <attribute> 

<attribute> ::= ATTRIBUTE <attr_name> { <datatype>    
                                        <dataspace>
                                        <object_id>opt
                                        <data>opt  } 
// <datatype> and <dataspace> must appear before <data>.

<attr_name> ::= <identifier>

<datatype> ::= DATATYPE { <atomic_type> }  |         
               DATATYPE { <compound_type> } |
               DATATYPE { <named_type> } 

<enum> ::= H5T_ENUM { <integer>; <enum_def>+  }

<enum_def> ::= <enum_symbol> <enum_val>;

<enum_symbol> ::= <identifier>

<enum_val> ::= an integer;

<named_type> ::= <path_name>

<path_name> ::= <identifier>

<dataspace> ::= DATASPACE { SCALAR } |
                DATASPACE { SIMPLE <current_dims> / <max_dims> } | 
                DATASPACE { COMPLEX <ds_definition>+ } 
                DATASPACE { <dataspace_name> } |

<current_dims> ::= (i1, i2, ... ), where ik is an integer, k = 1,2,...

<max_dims> ::= (i1, i2, ... ) where ik is an integer or H5S_UNLIMITED

<ds_definition> ::= TBD

<dataspace_name> ::= <identifier>

<data> ::= DATA { <scalar_space_data> | <simple_space_data> | <complex_space_data> }
                  
<scalar_space_data> ::= <atomic_scalar_data> | <compound_scalar_data>

<atomic_scalar_data> :: = <integer_data> | <float_data> | <time_data> | <string_data> | 
                          <bitfield_data> | <opaque_data> | <enum_data> | <reference_data>

<integer_data> ::= an integer

<float_data> ::= a floating point number

<time_data> ::= TBD

<string_data> ::= a string
// A string is enclosed in double quotes. 
// If a string is displayed on more than one line, string concatenate operator '//'is used.

<bitfield_data> ::= TBD

<opaque_data> ::= TBD

<enum_data> ::= <enum_symbol>
//maybe will be <enum_symbol> in the future

<reference_data> ::= <object_ref_data> | <data_region_data> | NULL

<object_ref_data> ::= <object_type> <object_num>

<object_type> ::= DATASET | GROUP | DATATYPE

<object_id> ::= OBJECTID { <object_num> }

<object_num> ::= an integer:an integer | an integer

<data_region_data> ::= H5T_STD_REF_DSETREG <object_num> {<data_region_data_info>, 
                     <data_region_data_info>, ...}

<data_region_data_info> ::= <region_info> | <point_info>

<region_info> ::= (<lower_bound>:<upper_bound>, <lower_bound>:<upper_bound>, ...)

<lower_bound> ::= an integer

<upper_bound> ::= an integer

<point_info> ::= (an integer, an integer, ...)

<compound_scalar_data> ::= { [ <member_data> ], [ <member_data> ], ... }

<member_data> ::= <atomic_scalar_data> | <atomic_simple_data> 

<atomic_simple_data> :: = <atomic_element>, <atomic_element>, ... 

<atomic_element> ::= <atomic_scalar_data>

<simple_space_data> :: = <atomic_simple_data> | <compound_simple_data>

<compound_simple_data> ::= <compoud_element>, <compound_element>, ...

<compound_element> ::= <compound_scalar_data>

<complex_space_data> ::= TBD

<group_member> ::= <named_datatype> | <named_dataspace> | <group> | <dataset> | 
                   <softlink>

<named_datatype> ::= DATATYPE <type_name> { <compound_type> }

<type_name> ::= <identifier>

<named_dataspace> ::= TBD

<group> ::= GROUP <group_name> { <hardlink> } |
            GROUP <object_id>opt <group_name> { <group_attribute>* <group_member>* } 
            
<group_name> ::= <identifier>

<hardlink> ::= HARDLINK <path_name> 

<dataset> ::= DATASET <dataset_name> { <hardlink> } |
              DATASET <dataset_name> { <datatype>  
                                       <dataspace> 
                                       <storagelayout>opt
                                       <compression>opt
                                       <dataset_attribute>*
                                       <object_id>opt
                                       <data>opt  } 
// Tokens within {} can be in any order  as long as <data> and <dataset_attribute>
// are after <datatype> and <dataspace>.

<dataset_name> ::= <identifier>

<storagelayout> :: = STORAGELAYOUT <contiguous_layout>  |  
                     STORAGELAYOUT <chunked_layout>  | 
                     STORAGELAYOUT <compact_layout>  | 
                     STORAGELAYOUT <external_layout> 

<contiguous_layout> ::= {CONTIGUOUS}    // default

<chunked_layout> ::=  {CHUNKED <dims> }

<dims> ::= (i1, i2, ... ), ik is an integer, k = 1,2,... 

<compact_layout> ::= TBD           

<external_layout> ::= {EXTERNAL <external_file>+ }

<external_file> ::= (<file_name> <offset> <size>) 

<offset> ::= an integer

<size> ::= an integer

<compression> :: = COMPRESSION { TBD }  

<dataset_attribute> ::= <attribute> 

<softlink> ::= SOFTLINK <softlink_name> { LINKTARGET <target> }

<softlink_name> ::= <identifier>

<target> ::= <identifier>

<identifier> ::= string   
// character '/' should be used with care. 

4. An Example of an HDF5 File in DDL

HDF5 "example.h5" {
GROUP "/" {
   ATTRIBUTE "attr1" {
      DATATYPE {
         { STRSIZE 17;
           STRPAD H5T_STR_NULLTERM;
           CSET H5T_CSET_ASCII;
           CTYPE H5T_C_S1;
         }
      }
      DATASPACE { SCALAR }
      DATA {
         "string attribute"
      }
   }
   DATASET "dset1" {
      DATATYPE { H5T_STD_I32BE }
      DATASPACE { SIMPLE ( 10, 10 ) / ( 10, 10 ) }
      DATA {
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9
      }
   }
   DATASET "dset2" {
      DATATYPE {
         H5T_STD_I32BE "a";
         H5T_IEEE_F32BE "b";
         H5T_IEEE_F64BE "c";
      }
      DATASPACE { SIMPLE ( 5 ) / ( 5 ) }
      DATA {
         {
            [ 1 ],
            [ 0.1 ],
            [ 0.01 ]
         },
         {
            [ 2 ],
            [ 0.2 ],
            [ 0.02 ]
         },
         {
            [ 3 ],
            [ 0.3 ],
            [ 0.03 ]
         },
         {
            [ 4 ],
            [ 0.4 ],
            [ 0.04 ]
         },
         {
            [ 5 ],
            [ 0.5 ],
            [ 0.05 ]
         }
      }
   }
   GROUP "group1" {
      DATASET "dset3" {
         DATATYPE {
            "/type1"
         }
         DATASPACE { SIMPLE ( 5 ) / ( 5 ) }
         DATA {
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            },
            {
               [ 0, 1, 2, 3 ],
               [ 0.1, 0.1, 0.1, 0.1, 0.1, 0.1,
                 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
                 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                 0.4, 0.4, 0.4, 0.4, 0.4, 0.4,
                 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]
            }
         }
      }
   }
   GROUP "group2" {
      HARDLINK "/group1"
   }
   SOFTLINK "slink1" {
      LINKTARGET "somevalue"
   }
   DATATYPE "type1" {
      H5T_STD_I32BE "a"[4];
      H5T_IEEE_F32BE "b"[5][6];
   }
}
}

Introduction to HDF5 
HDF5 Reference Manual 
Other HDF5 documents and links 
And in this document, the HDF5 User's Guide:    
Files   Datasets   Datatypes   Dataspaces   Groups  
References   Attributes   Property Lists   Error Handling  
Filters   Palettes   Caching   Chunking   Mounting Files  
Performance   Debugging   Environment   DDL  
Ragged Arrays  

HDF Help Desk
Last modified: 14 October 1999