dict of Dataframes to encode with group name as key {‘sediment’: df_sed, …}
src_fname
str
File name and path to the MARIS CDL template
dest_fname
str
Name of output file to produce
global_attrs
Dict
Global attributes
enums_xtra
Dict
{}
Enumeration types to overwrite
verbose
bool
False
Print currently written NetCDF group and variable names
Exported source
class NetCDFEncoder:"MARIS NetCDF encoder."def__init__(self, dfs:dict[pd.DataFrame], # dict of Dataframes to encode with group name as key {'sediment': df_sed, ...} src_fname:str, # File name and path to the MARIS CDL template dest_fname:str, # Name of output file to produce global_attrs:Dict, # Global attributes enums_xtra:Dict={}, # Enumeration types to overwrite verbose:bool=False, # Print currently written NetCDF group and variable names ): store_attr()self.enum_types = {}
Update NetCDF template global attributes as specified by global_attrs argument.
Exported source
@patchdef copy_global_attributes(self:NetCDFEncoder):"Update NetCDF template global attributes as specified by `global_attrs` argument."self.dest.setncatts(self.src.__dict__)for k, v inself.global_attrs.items(): self.dest.setncattr(k, v)
@patchdef process_group(self:NetCDFEncoder, group_name, df): group_dest =self.dest.createGroup(group_name)# Set the dimensions for each group group_dest.createDimension(group_name, len(df.index)) self.copy_variables(group_name, df, group_dest)
@patchdef copy_variable(self:NetCDFEncoder, var_name, var_src, df, group_dest): dtype_name = var_src.datatype.name enums_src =self.src.enumtypesifself.verbose: print(80*'-')print(f'Group: {group_dest.name}, Variable: {var_name}')# If the type of the var is an enum (meaning present in the template src) then create itif dtype_name in enums_src: self.copy_enum_type(dtype_name) self._create_and_copy_variable(var_name, var_src, df, group_dest, dtype_name)self.copy_variable_attributes(var_name, var_src, group_dest)
@patchdef copy_enum_type(self:NetCDFEncoder, dtype_name):# if enum type not already createdif dtype_name notinself.enum_types: enum_info =self.src.enumtypes[dtype_name]# If a subset of an enum is defined in enums_xtra (typically for the lengthy species_t)if enum_info.name inself.enums_xtra:# add "not applicable" enum_info.enum_dict =self.enums_xtra[enum_info.name] enum_info.enum_dict['Not applicable'] =-1# TBDself.enum_types[dtype_name] =self.dest.createEnumType(enum_info.dtype, enum_info.name, enum_info.enum_dict)
# DEPRECATED@patchdef cast_verbose_rf(self:NetCDFEncoder, df, col):""" Try to cast df column to numeric type: - Silently coerce to nan if not possible - But log when it failed """ n_before =sum(df.reset_index()[col].notna()) df_after = pd.to_numeric(df.reset_index()[col], errors='coerce', downcast=None) n_after =sum(df_after.notna())if n_before != n_after: print(f'Failed to convert type of {col} in {n_before - n_after} occurences')return df_after
Encode MARIS NetCDF based on template and dataframes.
Exported source
@patchdef encode(self:NetCDFEncoder):"Encode MARIS NetCDF based on template and dataframes."with Dataset(self.src_fname, format='NETCDF4') asself.src, Dataset(self.dest_fname, 'w', format='NETCDF4') asself.dest:self.copy_global_attributes()self.copy_dimensions()self.process_groups()
# Test that global attributes are copiedwith Dataset(dest, 'r', format='NETCDF4') as nc:for k, v in {'id': '123', 'title': 'Test title', 'summary': 'Summary test'}.items(): fc.test_eq(getattr(nc, k), v)
# Test that dimension is `sample` and unlimitedwith Dataset(dest, 'r', format='NETCDF4') as nc: fc.test_eq('sample'in nc.dimensions, True) fc.test_eq(nc.dimensions['sample'].isunlimited(), True)
# Test that groups are createdwith Dataset(dest, 'r', format='NETCDF4') as nc: fc.test_eq(nc.groups.keys(), ['seawater', 'biota'])
# Test that groups are createdwith Dataset(dest, 'r', format='NETCDF4') as nc: fc.test_eq(nc.groups.keys(), ['seawater', 'biota'])
# Test that correct variables are created in groupswith Dataset(dest, 'r', format='NETCDF4') as nc: fc.test_eq(nc['biota'].variables.keys(), ['sample', 'lon', 'lat', 'time', 'species', 'i131', 'i131_dl', 'i131_unit']) fc.test_eq(nc['seawater'].variables.keys(), ['sample', 'lon', 'lat', 'time', 'i131', 'i131_dl', 'i131_unit'])
# Test that correct variables are created in groupswith Dataset(dest, 'r', format='NETCDF4') as nc:print(nc.dimensions.items())print(nc['biota'].dimensions.items())print(nc['seawater'].dimensions.items())
dict_items([('sample', <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'sample', size = 0)])
dict_items([('biota', <class 'netCDF4._netCDF4.Dimension'>: name = 'biota', size = 2)])
dict_items([('seawater', <class 'netCDF4._netCDF4.Dimension'>: name = 'seawater', size = 3)])
dict of Dataframes to encode with group name as key {‘sediment’: df_sed, …}
dest_fname
str
Name of output file to produce
ref_id
int
-1
ref_id to include
verbose
bool
False
Print
Exported source
class OpenRefineCsvEncoder:"OpenRefine CSV from NetCDF."def__init__(self, dfs:dict[pd.DataFrame], # dict of Dataframes to encode with group name as key {'sediment': df_sed, ...} dest_fname:str, # Name of output file to produce ref_id =-1, # ref_id to include verbose:bool=False, # Print ): store_attr()
Encode OpenRefine CSV based on dataframes from NetCDF.
Exported source
@patchdef encode(self:OpenRefineCsvEncoder):"Encode OpenRefine CSV based on dataframes from NetCDF."# Include ref_id# Process to csvself.process_groups_to_csv()
dest ='../files/csv/encoding-test.csv'encoder = OpenRefineCsvEncoder(dfs, dest_fname=dest)encoder.encode()