dict of Dataframes to encode with group name as key {‘sediment’: df_sed, …}
dest_fname
str
Name of output file to produce
global_attrs
Dict
Global attributes
fn_src_fname
Callable
nc_tpl_path
Function returning file name and path to the MARIS CDL template
verbose
bool
False
Print currently written NetCDF group and variable names
Exported source
class NetCDFEncoder:"MARIS NetCDF encoder."def__init__(self, dfs: Dict[str, pd.DataFrame], # dict of Dataframes to encode with group name as key {'sediment': df_sed, ...} dest_fname: str, # Name of output file to produce global_attrs: Dict[str, str], # Global attributes fn_src_fname: Callable=nc_tpl_path, # Function returning file name and path to the MARIS CDL template verbose: bool=False, # Print currently written NetCDF group and variable names ): store_attr()self.src_fname = fn_src_fname()self.enum_dtypes = {}self.nc_to_cols = {v:k for k,v in NC_VARS.items()}
Update NetCDF template global attributes as specified by global_attrs argument.
Exported source
@patchdef copy_global_attributes(self:NetCDFEncoder):"Update NetCDF template global attributes as specified by `global_attrs` argument."self.dest.setncatts(self.src.__dict__)for k, v inself.global_attrs.items(): self.dest.setncattr(k, v)
Copy dimensions to root and all groups from template.
Exported source
@patchdef copy_dimensions(self:NetCDFEncoder, grp_dest):"Copy dimensions to root and all groups from template." src_dim =self.src.groups[grp_dest.name].dimensionsfor name, dim in src_dim.items(): grp_dest.createDimension(name, (len(dim) ifnot dim.isunlimited() elseNone))
@patchdef copy_variables(self:NetCDFEncoder, grp_name, df, grp_dest): cols = [NC_VARS[col] for col in df.columns if col in NC_VARS]for var_name, var_src inself.src.groups[grp_name].variables.items():if var_name in cols: self.copy_variable(var_name, var_src, df, grp_dest)
@patchdef copy_enum_type(self:NetCDFEncoder, dtype_name):# if enum type not already createdif dtype_name notinself.enum_types: enum_info =self.src.enumtypes[dtype_name]# If a subset of an enum is defined in enums_xtra (typically for the lengthy species_t)if enum_info.name inself.enums_xtra:# add "not applicable" enum_info.enum_dict =self.enums_xtra[enum_info.name] enum_info.enum_dict['Not applicable'] =-1# TBDself.enum_types[dtype_name] =self.dest.createEnumType(enum_info.dtype, enum_info.name, enum_info.enum_dict)
Retrieve all unique columns from the dict of dataframes.
Exported source
@patchdef retrieve_all_cols(self:NetCDFEncoder, dtypes=NC_DTYPES ):"Retrieve all unique columns from the dict of dataframes."returnlist(set(col for df inself.dfs.values() for col in df.columns if col in dtypes.keys()))
@patchdef create_enums(self:NetCDFEncoder): cols =self.retrieve_all_cols() enums = Enums(lut_src_dir=lut_path())ifself.verbose: print(80*'-')print('Creating enums for the following columns:')print(cols)for col in cols: name = NC_DTYPES[col]['name']ifself.verbose: print(f'Creating enum for {name} with values {enums.types[col]}.') dtype =self.dest.createEnumType(np.int64, name, enums.types[col])self.enum_dtypes[name] = dtype
Encode MARIS NetCDF based on template and dataframes.
Exported source
@patchdef encode(self:NetCDFEncoder):"Encode MARIS NetCDF based on template and dataframes."with Dataset(self.src_fname, format='NETCDF4') asself.src, Dataset(self.dest_fname, 'w', format='NETCDF4') asself.dest:self.copy_global_attributes()self.create_enums()self.process_groups()
# Test that global attributes are copied# with Dataset(dest, 'r', format='NETCDF4') as nc:# for k, v in {'id': '123', 'title': 'Test title', 'summary': 'Summary test'}.items():# fc.test_eq(getattr(nc, k), v)
# Test that dimension is `sample` and unlimited# with Dataset(dest, 'r', format='NETCDF4') as nc:# fc.test_eq('sample' in nc.dimensions, True)# fc.test_eq(nc.dimensions['sample'].isunlimited(), True)
# Test that groups are created# with Dataset(dest, 'r', format='NETCDF4') as nc:# fc.test_eq(nc.groups.keys(), ['seawater', 'biota'])
# Test that groups are created# with Dataset(dest, 'r', format='NETCDF4') as nc:# fc.test_eq(nc.groups.keys(), ['seawater', 'biota'])
# Test that correct variables are created in groups# with Dataset(dest, 'r', format='NETCDF4') as nc:# fc.test_eq(nc['biota'].variables.keys(), # ['sample', 'lon', 'lat', 'time', 'species', 'i131', 'i131_dl', 'i131_unit'])# fc.test_eq(nc['seawater'].variables.keys(), # ['sample', 'lon', 'lat', 'time', 'i131', 'i131_dl', 'i131_unit'])
# Test that correct variables are created in groups# with Dataset(dest, 'r', format='NETCDF4') as nc:# print(nc.dimensions.items())# print(nc['biota'].dimensions.items())# print(nc['seawater'].dimensions.items())