dict of Dataframes to encode with group name as key {‘sediment’: df_sed, …}
dest_fname
str
Name of output file to produce
global_attrs
Dict
Global attributes
fn_src_fname
Callable
nc_tpl_path
Function returning file name and path to the MARIS CDL template
custom_maps
Dict
None
Custom maps to encode
verbose
bool
False
Print currently written NetCDF group and variable names
Exported source
class NetCDFEncoder:"MARIS NetCDF encoder."def__init__(self, dfs: Dict[str, pd.DataFrame], # dict of Dataframes to encode with group name as key {'sediment': df_sed, ...} dest_fname: str, # Name of output file to produce global_attrs: Dict[str, str], # Global attributes fn_src_fname: Callable=nc_tpl_path, # Function returning file name and path to the MARIS CDL template custom_maps: Dict[str, Dict[str, int]]=None,# Custom maps to encode verbose: bool=False, # Print currently written NetCDF group and variable names ): store_attr()self.src_fname = fn_src_fname()self.enum_dtypes = {}self.nc_to_cols = {v:k for k,v in NC_VARS.items()}
Update NetCDF template global attributes as specified by global_attrs argument.
Exported source
@patchdef copy_global_attributes(self:NetCDFEncoder):"Update NetCDF template global attributes as specified by `global_attrs` argument."self.dest.setncatts(self.src.__dict__)for k, v inself.global_attrs.items(): self.dest.setncattr(k, v)
Copy dimensions to root and all groups from template.
Exported source
@patchdef copy_dimensions(self:NetCDFEncoder, grp_dest):"Copy dimensions to root and all groups from template." src_dim =self.src.groups[grp_dest.name].dimensionsfor name, dim in src_dim.items(): grp_dest.createDimension(name, (len(dim) ifnot dim.isunlimited() elseNone))
@patchdef copy_variables(self:NetCDFEncoder, grp_name, df, grp_dest): cols = [NC_VARS[col] for col in df.columns if col in NC_VARS]for var_name, var_src inself.src.groups[grp_name].variables.items():if var_name in cols: self.copy_variable(var_name, var_src, df, grp_dest)ifself.custom_maps:self.copy_custom_map(var_name, grp_dest)
@patchdef _create_and_copy_variable(self:NetCDFEncoder, var_name:str, var_src, df, grp_dest, dtype_name:str):"Create and populate a NetCDF variable with data from the dataframe" variable_type =self._get_variable_type(dtype_name, var_src)self._create_netcdf_variable(grp_dest, var_name, variable_type)self._populate_variable_data(grp_dest, var_name, variable_type, df)
Exported source
@patchdef _get_variable_type(self:NetCDFEncoder, dtype_name:str, var_src):"Determine the appropriate variable type for NetCDF creation"if var_src.dtype ==str: returnstrreturnself.enum_dtypes.get(dtype_name, var_src.datatype)
Exported source
@patchdef _create_netcdf_variable(self:NetCDFEncoder, grp_dest, var_name:str, variable_type):"Create a NetCDF variable with appropriate compression settings" compression_kwargs = {'compression': None} if variable_type ==strelse {'compression': 'zlib', 'complevel': 9} grp_dest.createVariable(var_name, variable_type, (NC_DIM,), **compression_kwargs)
Exported source
@patchdef _populate_variable_data(self:NetCDFEncoder, grp_dest, var_name:str, variable_type, df):"Populate the NetCDF variable with data from the dataframe" values = df[self.nc_to_cols[var_name]].values is_enum_type =hasattr(variable_type, '__class__') and'EnumType'instr(type(variable_type))if is_enum_type: values =self.sanitize_if_enum_and_nan(values) if variable_type ==str:for i, v inenumerate(values): grp_dest[var_name][i] = velse: grp_dest[var_name][:] = values
'''#| exports - Not used in this notebook - NM/01-30-2025.@patchdef copy_enum_type(self:NetCDFEncoder, dtype_name): # if enum type not already created if dtype_name not in self.enum_types: enum_info = self.src.enumtypes[dtype_name] # If a subset of an enum is defined in enums_xtra (typically for the lengthy species_t) if enum_info.name in self.enums_xtra: # add "not applicable" enum_info.enum_dict = self.enums_xtra[enum_info.name] enum_info.enum_dict['Not applicable'] = -1 # TBD self.enum_types[dtype_name] = self.dest.createEnumType(enum_info.dtype, enum_info.name, enum_info.enum_dict)'''
'\n#| exports - Not used in this notebook - NM/01-30-2025.\n \n@patch\ndef copy_enum_type(self:NetCDFEncoder, dtype_name):\n # if enum type not already created\n if dtype_name not in self.enum_types:\n enum_info = self.src.enumtypes[dtype_name]\n # If a subset of an enum is defined in enums_xtra (typically for the lengthy species_t)\n if enum_info.name in self.enums_xtra:\n # add "not applicable"\n enum_info.enum_dict = self.enums_xtra[enum_info.name]\n enum_info.enum_dict[\'Not applicable\'] = -1 # TBD\n self.enum_types[dtype_name] = self.dest.createEnumType(enum_info.dtype, \n enum_info.name, \n enum_info.enum_dict)\n'
Retrieve all unique columns from the dict of dataframes.
Exported source
@patchdef retrieve_all_cols(self:NetCDFEncoder, dtypes=NC_DTYPES ):"Retrieve all unique columns from the dict of dataframes."returnlist(set(col for df inself.dfs.values() for col in df.columns if col in dtypes.keys()))
@patchdef copy_custom_map(self:NetCDFEncoder, var_name, grp_dest):"""Copy custom maps for variables.""" custom_maps =self.custom_maps# Convert group names using NC_GROUPS custom_maps = {NC_GROUPS[key]: value for key, value in custom_maps.items()} group_maps = custom_maps.get(grp_dest.name, {})# Convert var names using NC_VARS group_maps = {NC_VARS[key]: value for key, value in group_maps.items()}if var_name in group_maps:# Set the map as an attribute of the variable grp_dest[var_name].setncatts({f"{var_name}_map": str(group_maps[var_name])})
Encode MARIS NetCDF based on template and dataframes.
Exported source
@patchdef encode(self:NetCDFEncoder):"Encode MARIS NetCDF based on template and dataframes."with Dataset(self.src_fname, format='NETCDF4') asself.src, Dataset(self.dest_fname, 'w', format='NETCDF4') asself.dest:self.copy_global_attributes()self.create_enums()self.process_groups()
# Test that global attributes are copied#with Dataset(dest, 'r', format='NETCDF4') as nc:# for k, v in {'id': '123', 'title': 'Test title', 'summary': 'Summary test'}.items():# fc.test_eq(getattr(nc, k), v)
# Test that dimension is `sample` and unlimited# with Dataset(dest, 'r', format='NETCDF4') as nc:# fc.test_eq('sample' in nc.dimensions, True)# fc.test_eq(nc.dimensions['sample'].isunlimited(), True)
# Test that groups are created# with Dataset(dest, 'r', format='NETCDF4') as nc:# fc.test_eq(nc.groups.keys(), ['seawater', 'biota'])
# Test that groups are created# with Dataset(dest, 'r', format='NETCDF4') as nc:# fc.test_eq(nc.groups.keys(), ['seawater', 'biota'])
# Test that correct variables are created in groups# with Dataset(dest, 'r', format='NETCDF4') as nc:# fc.test_eq(nc['biota'].variables.keys(), # ['sample', 'lon', 'lat', 'time', 'species', 'i131', 'i131_dl', 'i131_unit'])# fc.test_eq(nc['seawater'].variables.keys(), # ['sample', 'lon', 'lat', 'time', 'i131', 'i131_dl', 'i131_unit'])
# Test that correct variables are created in groups# with Dataset(dest, 'r', format='NETCDF4') as nc:# print(nc.dimensions.items())# print(nc['biota'].dimensions.items())# print(nc['seawater'].dimensions.items())
# Test that custom maps are copied#with Dataset(dest, 'r', format='NETCDF4') as nc:# print(nc['seawater'].variables.items())# print(nc['biota'].variables.items())