NetCDF Template

Creation of MARIS NetCDF template based on “pseudo” Common Data Language .toml config file.

NetCDF template generator

Generate a NetCDF4 template from the configurable CDL.toml file, itself generated in /api/configs.ipynb.


source

NCTemplater

 NCTemplater (cdl:Dict, nuclide_vars_fname:str, tpl_fname:str,
              enum_dicts:Dict, verbose=False)

MARIS NetCDF template generator.

Type Default Details
cdl Dict “Pseudo CDL” (.toml)
nuclide_vars_fname str File name and path of MARIS nuclide lookup table containing variable names
tpl_fname str File name and path of NetCDF4 file to be generated
enum_dicts Dict MARIS NetCDF enumeration types
verbose bool False
Exported source
class NCTemplater:
    "MARIS NetCDF template generator."
    def __init__(self, 
                 cdl:Dict, # "Pseudo CDL" (`.toml`)
                 nuclide_vars_fname:str, # File name and path of MARIS nuclide lookup table containing variable names
                 tpl_fname:str, # File name and path of NetCDF4 file to be generated
                 enum_dicts:Dict, # MARIS NetCDF enumeration types
                 verbose=False
                ):
        fc.store_attr()
        self.dim = cdl['dim']
        self.enum_types = {}

For example, provided the configuration cdl.toml below, the templater gets access, among others, to its dim definiton section:

cdl_test = read_toml('./files/cdl.toml')
lut_src_dir_test = './files/lut'
cdl_enums_test = read_toml('./files/cdl.toml')['enums']
enums = get_enum_dicts(lut_src_dir=lut_src_dir_test, 
                       cdl_enums=cdl_enums_test)


templater = NCTemplater(cdl=cdl_test,
                        nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx', 
                        tpl_fname='./files/nc/test.nc',
                        enum_dicts=enums)

expected = {'name': 'sample', 
            'dtype': 'u8', 
            'attrs': {'long_name': 'Sample ID of measurement'}
            }

fc.test_eq(templater.dim, expected)

source

NCTemplater.nuclide_vars

 NCTemplater.nuclide_vars (col_varnames:str='nc_name',
                           col_stdnames:str='nusymbol', dtype:str='f4')

Return the name of the radionuclide variables analysed.

Type Default Details
col_varnames str nc_name Column name in the Excel lookup file containing the NetCDF variable names
col_stdnames str nusymbol Column name Excel lookup file containing the NetCDF standard names
dtype str f4 Default data type
Returns list List of nuclide variables (including their names and attributes)
Exported source
@patch
def nuclide_vars(
    self:NCTemplater,
    col_varnames:str='nc_name', # Column name in the Excel lookup file containing the NetCDF variable names
    col_stdnames:str='nusymbol', # Column name Excel lookup file containing the NetCDF standard names
    dtype:str='f4', # Default data type
    ) -> list[dict]: # List of nuclide variables (including their names and attributes)
    "Return the name of the radionuclide variables analysed."
    df = pd.read_excel(self.nuclide_vars_fname, index_col=0)
    
    df = df[(df.nuclide != 'NOT AVAILABLE') & (df.nuclide != 'NOT APPLICABLE')]
    # df = df[df.nuclide.isin(['NOT AVAILABLE', 'NOT APPLICABLE'])]
    
    return [
        {
            'name': n,
            'dtype': dtype,
            'attrs': {
                'long_name': f"{nuclide.capitalize()} {massnb}",
                'standard_name': sn,
            }
        }
        for n, nuclide, massnb, sn in zip(
            df[col_varnames],
            df['nuclide'].str.capitalize(),
            df['massnb'].astype(int),
            df[col_stdnames],
        )
    ]

For example, to retrieve the NetCDF nuclide names and associated attributes:

templater = NCTemplater(cdl=cdl_test,
                        nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx', 
                        tpl_fname='./files/nc/test.nc',
                        enum_dicts=enums)
expected = [
  {'name': 'h3', 'attrs': {'long_name': 'Tritium 3', 'standard_name': '3H'}, 'dtype': 'f4'},
  {'name': 'be7', 'attrs': {'long_name': 'Beryllium 7', 'standard_name': '7Be'}, 'dtype': 'f4'}
  ]

fc.test_eq(templater.nuclide_vars()[:2], expected)

source

NCTemplater.derive

 NCTemplater.derive (nuclide:dict, suffix:dict)

Derive NetCDF nuclide-dependent variable names & attributes as defined in CDL.

Type Details
nuclide dict Nuclide variable name and associated netcdf attributes
suffix dict Naming rules as described in CDL (e.g _unc)
Returns dict Derived variable name and associated attributes
Exported source
@patch
def derive(
    self:NCTemplater,
    nuclide:dict, # Nuclide variable name and associated netcdf attributes
    suffix:dict,  # Naming rules as described in CDL (e.g `_unc`)
) -> dict: # Derived variable name and associated attributes
    "Derive NetCDF nuclide-dependent variable names & attributes as defined in CDL." 
    return {
        # 'name': nuclide['name'] + '_' + suffix['name'],
        'name': nuclide['name'] + suffix['name'],
        'dtype': suffix['dtype'],  # Using dtype from suffix
        'attrs': {key: nuclide['attrs'][key] + suffix['attrs'][key] for key in nuclide['attrs']}
        }

For example, among others, the cdl.toml file defines the naming convention on variable names deriving from nuclides (e.g h3_unc for measurement uncertainty on the h3 nuclide variable).

templater = NCTemplater(cdl=cdl_test,
                        nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx', 
                        tpl_fname='./files/nc/test.nc',
                        enum_dicts=enums)

Here is below the defined Tritium NetCDF variable as specified in the .cdl file:

templater.nuclide_vars()[0]
{'name': 'h3',
 'dtype': 'f4',
 'attrs': {'long_name': 'Tritium 3', 'standard_name': '3H'}}
# Example of suffix defined in the .cdl file
suffix = {
    'name': '_unc',
    'attrs': {
        'long_name': ' uncertainty',
        'standard_name': '_uncertainty'
        },
    'dtype': 'f4'
    }

# And what we expect
expected = {
    'name': 'h3_unc',
    'attrs': {
        'long_name': 'Tritium 3 uncertainty',
        'standard_name': '3H_uncertainty'
        },
    'dtype': 'f4'
    }

fc.test_eq(templater.derive(templater.nuclide_vars()[0], suffix=suffix), expected)

source

NCTemplater.create_enum_types

 NCTemplater.create_enum_types ()

Create enumeration types

Exported source
@patch
def create_enum_types(self:NCTemplater):
    "Create enumeration types"
    for name, enum in self.enum_dicts.items(): 
        if self.verbose: print(f'Creating {name} enumeration type')
        self.enum_types[name] = self.nc.createEnumType(np.int_, name, enum)

source

NCTemplater.create_groups

 NCTemplater.create_groups ()

Create NetCDF groups

Exported source
@patch
def create_groups(self:NCTemplater):
    "Create NetCDF groups"
    grp_names = [v['name'] for k, v in self.cdl['grps'].items()]
    for grp_name in grp_names:
        grp = self.nc.createGroup(grp_name)
        self.create_variables(grp)

source

NCTemplater.create_variables

 NCTemplater.create_variables (grp:netCDF4._netCDF4.Group)

Create variables

Type Details
grp Group NetCDF group
Exported source
@patch
def create_variables(self:NCTemplater, 
                     grp:netCDF4.Group, # NetCDF group
                     ):
        "Create variables"
        self.create_variable(grp, self.dim) # Dimension variable
        self.create_default_variables(grp)
        self.create_group_specific_variables(grp)
        self.create_analyte_variables(grp)

source

NCTemplater.create_default_variables

 NCTemplater.create_default_variables (grp:netCDF4._netCDF4.Group)

Create Default variables

Type Details
grp Group NetCDF group
Exported source
@patch
def create_default_variables(self:NCTemplater, 
                             grp:netCDF4.Group, # NetCDF group
                             ):
        "Create Default variables"
        vars = self.cdl['vars']['defaults'].values()
        for var in vars: self.create_variable(grp, var)

source

NCTemplater.create_group_specific_variables

 NCTemplater.create_group_specific_variables (grp:netCDF4._netCDF4.Group)

Create group specific variables

Type Details
grp Group NetCDF group
Exported source
@patch
def create_group_specific_variables(self:NCTemplater, 
                             grp:netCDF4.Group, # NetCDF group
                             ):
        "Create group specific variables"
        vars = self.cdl['vars']
        for var in vars.get(name2grp(grp.name, self.cdl), {}).values(): 
            self.create_variable(grp, var)

source

NCTemplater.create_analyte_variables

 NCTemplater.create_analyte_variables (grp:netCDF4._netCDF4.Group)

Create analyte variables and dependent one as uncertainty, detection limit, …

Type Details
grp Group NetCDF group
Exported source
@patch
def create_analyte_variables(self:NCTemplater, 
                             grp:netCDF4.Group, # NetCDF group
                             ):
    "Create analyte variables and dependent one as uncertainty, detection limit, ..."    
    for var in self.nuclide_vars():
        self.create_variable(grp, var)
        for v in self.cdl['vars']['suffixes'].values(): 
            self.create_variable(grp, self.derive(var, v))

source

NCTemplater.create_variable

 NCTemplater.create_variable (grp:netCDF4._netCDF4.Group, var:Dict)

Create NetCDF variable with proper types (standard and enums)

Type Details
grp Group NetCDF group
var Dict Variable specificiation dict with name, dtype and attrs keys
Exported source
@patch
def create_variable(self:NCTemplater, 
                    grp:netCDF4.Group, # NetCDF group
                    var:Dict, # Variable specificiation dict with `name`, `dtype` and `attrs` keys
                    ):
    "Create NetCDF variable with proper types (standard and enums)"
    name, dtype, attrs = var.values()
    nc_var = grp.createVariable(name, 
                                self.enum_types.get(dtype) or dtype, 
                                self.dim['name'])
    nc_var.setncatts(attrs)

source

NCTemplater.generate

 NCTemplater.generate ()

Generate CDL

Exported source
@patch
def generate(self:NCTemplater):
    "Generate CDL"
    # with NetCDFWriter(self.tpl_fname) as self.nc:
    with Dataset(self.tpl_fname, 'w', format='NETCDF4') as self.nc:
        self.nc.setncatts(self.cdl['global_attrs']) 
        self.create_enum_types()
        self.nc.createDimension(self.dim['name'], None) 
        self.create_groups()

So in summary, to produce a template MARIS NetCDF:

templater = NCTemplater(cdl=cdl_test,
                        nuclide_vars_fname='./files/lut/dbo_nuclide.xlsx', 
                        tpl_fname='./files/nc/template-test.nc',
                        enum_dicts=enums, 
                        verbose=True)

templater.generate()
Creating area_t enumeration type
Creating bio_group_t enumeration type
Creating body_part_t enumeration type
Creating species_t enumeration type
Creating sed_type_t enumeration type
Creating unit_t enumeration type
Creating dl_t enumeration type
Creating filt_t enumeration type
Creating counmet_t enumeration type
Creating sampmet_t enumeration type
Creating prepmet_t enumeration type