Various utilities to decode MARIS dataset from NetCDF.

Convert NetCDF to OpenRefine CSV

MARIS NetCDF files can be converted to OpenRefine CSV files. The OpenRefine CSV files are compatible with the OpenRefine data cleaning tool which are used during the MARIS data cleaning process before loading into the MARIS database.


source

NetCDFDecoder

 NetCDFDecoder (dfs:Dict[str,pandas.core.frame.DataFrame], fname_in:str,
                dest_out:str, output_format:str, remap_vars:Dict[str,str],
                verbose:bool=False)

Decode MARIS NetCDF files to human readable formats.

Type Default Details
dfs Dict
fname_in str Path to NetCDF file
dest_out str
output_format str
remap_vars Dict
verbose bool False
Exported source
class NetCDFDecoder:
    """Decode MARIS NetCDF files to human readable formats."""
    def __init__(self, 
                 dfs: Dict[str, pd.DataFrame], 
                 fname_in: str,  # Path to NetCDF file
                 dest_out: str, 
                 output_format:str, 
                 remap_vars: Dict[str, str],
                 verbose: bool=False
                ):
        fc.store_attr()

source

NetCDFDecoder.process_groups

 NetCDFDecoder.process_groups ()

Process all groups in the dataset.

Exported source
@patch
def process_groups(self: NetCDFDecoder):
    """Process all groups in the dataset."""
    for group_name, df in self.dfs.items():
        self.process_group(group_name, df, self.remap_vars)

source

NetCDFDecoder.process_group

 NetCDFDecoder.process_group (group_name:str,
                              df:pandas.core.frame.DataFrame,
                              remap_vars:Dict[str,str])

Process a single group, mapping column names using remap_vars.

Exported source
@patch
def process_group(self: NetCDFDecoder, group_name: str, df: pd.DataFrame, remap_vars: Dict[str, str]):
    """Process a single group, mapping column names using remap_vars."""
    # Map column names using remap_vars
    df.columns = [remap_vars.get(col, col) for col in df.columns]

source

NetCDFDecoder.save_dataframes

 NetCDFDecoder.save_dataframes ()

*Save DataFrames to CSV files.

Each group in the DataFrame dictionary will be saved as a separate CSV file with the naming pattern: {base_path}_{group_name}.csv

Raises: ValueError: If no destination path is provided or if output format is not CSV*

Exported source
@patch
def save_dataframes(self: NetCDFDecoder):
    """
    Save DataFrames to CSV files.
    
    Each group in the DataFrame dictionary will be saved as a separate CSV file
    with the naming pattern: {base_path}_{group_name}.csv
    
    Raises:
        ValueError: If no destination path is provided or if output format is not CSV
    """
    # Validate destination path
    if self.dest_out is None:
        self.dest_out  = str(Path(self.fname_in).with_suffix(''))
    
    # Validate output format
    if self.output_format != 'csv':
        raise ValueError("Only CSV format is supported")
    
    # Get base path without extension
    base_path = str(Path(self.dest_out).with_suffix(''))
    
    # Save each DataFrame to a CSV file
    for group_name, df in self.dfs.items():
        output_path = f"{base_path}_{group_name}.csv"
        df.to_csv(output_path, index=False)
        
        if self.verbose:
            print(f"Saved {group_name} to {output_path}")

source

NetCDFDecoder.decode

 NetCDFDecoder.decode ()

Decode NetCDF to Human readable files.

Exported source
@patch
def decode(self: NetCDFDecoder):
    "Decode NetCDF to Human readable files."
    # Function to rename the columns. 
    self.process_groups()
    self.save_dataframes()
    return self.dfs
df_seawater = pd.DataFrame({
    'ID': [0, 1, 2], 
    'LON': [141, 142, 143], 
    'LAT': [37.3, 38.3, 39.3], 
    'TIME': [1234, 1235, 1236], 
    'NUCLIDE': [1, 2, 3],
    'VALUE': [0.1, 1.1, 2.1], 
    'AREA': [2374, 2379, 2401],
    })

df_biota = pd.DataFrame({
    'ID': [0, 1, 2, 3], 
    'LON': [141, 142, 143, 144], 
    'LAT': [37.3, 38.3, 39.3, 40.3], 
    'TIME': [1234, 1235, 1236, 1237], 
    'NUCLIDE': [1, 2, 3, 3],
    'VALUE': [0.1, 1.1, 2.1, 3.1], 
    'SPECIES': [1, 2, 3, 3]
    })
dfs = {'SEAWATER': df_seawater, 'BIOTA': df_biota}
fname = Path('../../_data/output/100-HELCOM-MORS-2024.nc')

decoder = NetCDFDecoder( 
                        dfs=dfs,
                        fname_in=fname,  
                        dest_out=fname.with_suffix(''),
                        output_format='csv',
                        remap_vars=CSV_VARS,
                        verbose=True
                 )
decoder.decode()
Saved SEAWATER to ../../_data/output/100-HELCOM-MORS-2024_SEAWATER.csv
Saved BIOTA to ../../_data/output/100-HELCOM-MORS-2024_BIOTA.csv
{'SEAWATER':    ID  longitude  latitude  begperiod  nuclide_id  activity  area
 0   0        141      37.3       1234           1       0.1  2374
 1   1        142      38.3       1235           2       1.1  2379
 2   2        143      39.3       1236           3       2.1  2401,
 'BIOTA':    ID  longitude  latitude  begperiod  nuclide_id  activity  species_id
 0   0        141      37.3       1234           1       0.1           1
 1   1        142      38.3       1235           2       1.1           2
 2   2        143      39.3       1236           3       2.1           3
 3   3        144      40.3       1237           3       3.1           3}