Exported source
# TBD: move to configs
= 'Not available' NA
We define below useful constants throughout the package.
Abstracting some common operations.
get_unique_across_dfs (dfs:Dict[str,pandas.core.frame.DataFrame], col_name:str='NUCLIDE', as_df:bool=False, include_nchars:bool=False)
Get a list of unique column values across dataframes.
Type | Default | Details | |
---|---|---|---|
dfs | Dict | Dictionary of dataframes | |
col_name | str | NUCLIDE | Column name to extract unique values from |
as_df | bool | False | Return a DataFrame of unique values |
include_nchars | bool | False | Add a column with the number of characters in the value |
Returns | List | Returns a list of unique column values across dataframes |
def get_unique_across_dfs(dfs: Dict[str, pd.DataFrame], # Dictionary of dataframes
col_name: str='NUCLIDE', # Column name to extract unique values from
as_df: bool=False, # Return a DataFrame of unique values
include_nchars: bool=False # Add a column with the number of characters in the value
) -> List[str]: # Returns a list of unique column values across dataframes
"Get a list of unique column values across dataframes."
unique_values = list(set().union(*(df[col_name].unique() for df in dfs.values() if col_name in df.columns)))
if not as_df:
return unique_values
else:
df_uniques = pd.DataFrame(unique_values, columns=['value']).reset_index()
if include_nchars: df_uniques['n_chars'] = df_uniques['value'].str.len()
return df_uniques
Example of use:
dfs_test = {'SEAWATER': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134_137_tot', 'cs134_137_tot']}),
'BIOTA': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134', 'cs134_137_tot']}),
'SEDIMENT': pd.DataFrame({'NUCLIDE': ['cs134_137_tot', 'cs134_137_tot', 'cs134_137_tot']})}
fc.test_eq(set(get_unique_across_dfs(dfs_test, col_name='NUCLIDE')),
set(['cs134', 'cs137', 'cs134_137_tot']))
What if the column name is not in one of the dataframe?
dfs_test = {'SEAWATER': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134_137_tot', 'cs134_137_tot']}),
'BIOTA': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134', 'cs134_137_tot']}),
'SEDIMENT': pd.DataFrame({'NONUCLIDE': ['cs134_137_tot', 'cs134_137_tot', 'cs134_137_tot']})}
fc.test_eq(set(get_unique_across_dfs(dfs_test, col_name='NUCLIDE')),
set(['cs134', 'cs137', 'cs134_137_tot']))
index | value | n_chars | |
---|---|---|---|
0 | 0 | cs134 | 5 |
1 | 1 | cs134_137_tot | 13 |
2 | 2 | cs137 | 5 |
Remapper (provider_lut_df:pandas.core.frame.DataFrame, maris_lut_fn:Union[Callable,pandas.core.frame.DataFrame], maris_col_id:str, maris_col_name:str, provider_col_to_match:str, provider_col_key:str, fname_cache:str)
Remap a data provider lookup table to a MARIS lookup table using fuzzy matching.
Type | Details | |
---|---|---|
provider_lut_df | DataFrame | Data provider lookup table to be remapped |
maris_lut_fn | Union | MARIS lookup table or function returning the path |
maris_col_id | str | MARIS lookup table column name for the id |
maris_col_name | str | MARIS lookup table column name for the name |
provider_col_to_match | str | Data provider lookup table column name for the name to match |
provider_col_key | str | Data provider lookup table column name for the key |
fname_cache | str | Cache file name |
class Remapper():
"Remap a data provider lookup table to a MARIS lookup table using fuzzy matching."
def __init__(self,
provider_lut_df: pd.DataFrame, # Data provider lookup table to be remapped
maris_lut_fn: Union[Callable, pd.DataFrame], # MARIS lookup table or function returning the path
maris_col_id: str, # MARIS lookup table column name for the id
maris_col_name: str, # MARIS lookup table column name for the name
provider_col_to_match: str, # Data provider lookup table column name for the name to match
provider_col_key: str, # Data provider lookup table column name for the key
fname_cache: str # Cache file name
):
fc.store_attr()
self.cache_file = cache_path() / fname_cache
# Check if maris_lut is a callable function or already a DataFrame
if callable(maris_lut_fn):
self.maris_lut = maris_lut_fn()
else:
self.maris_lut = maris_lut_fn
self.lut = {}
def generate_lookup_table(self,
fixes={}, # Lookup table fixes
as_df=True, # Whether to return a DataFrame
overwrite=True):
"Generate a lookup table from a data provider lookup table to a MARIS lookup table using fuzzy matching."
self.fixes = fixes
self.as_df = as_df
if overwrite or not self.cache_file.exists():
self._create_lookup_table()
fc.save_pickle(self.cache_file, self.lut)
else:
self.lut = fc.load_pickle(self.cache_file)
return self._format_output()
def _create_lookup_table(self):
df = self.provider_lut_df
for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
self._process_row(row)
def _process_row(self, row):
value_to_match = row[self.provider_col_to_match]
if isinstance(value_to_match, str): # Only process if value is a string
# If value is in fixes, use the fixed value
name_to_match = self.fixes.get(value_to_match, value_to_match)
result = match_maris_lut(self.maris_lut, name_to_match, self.maris_col_id, self.maris_col_name).iloc[0]
match = Match(result[self.maris_col_id], result[self.maris_col_name],
value_to_match, result['score'])
self.lut[row[self.provider_col_key]] = match
else:
# Handle non-string values (e.g., NaN)
self.lut[row[self.provider_col_key]] = Match(-1, "Unknown", value_to_match, 0)
def select_match(self, match_score_threshold:int=1, verbose:bool=False):
if verbose:
matched_len= len([v for v in self.lut.values() if v.match_score < match_score_threshold])
print(f"{matched_len} entries matched the criteria, while {len(self.lut) - matched_len} entries had a match score of {match_score_threshold} or higher.")
self.lut = {k: v for k, v in self.lut.items() if v.match_score >= match_score_threshold}
return self._format_output()
def _format_output(self):
if not self.as_df: return self.lut
df_lut = pd.DataFrame.from_dict(self.lut, orient='index',
columns=['matched_maris_name', 'source_name', 'match_score'])
df_lut.index.name = 'source_key'
return df_lut.sort_values(by='match_score', ascending=False)
has_valid_varname (var_names:List[str], cdl_path:str, group:Optional[str]=None)
Check that proposed variable names are in MARIS CDL
Type | Default | Details | |
---|---|---|---|
var_names | List | variable names | |
cdl_path | str | Path to MARIS CDL file (point of truth) | |
group | Optional | None | Check if the variable names is contained in the group |
# TBD: Assess if still needed
def has_valid_varname(
var_names: List[str], # variable names
cdl_path: str, # Path to MARIS CDL file (point of truth)
group: Optional[str] = None, # Check if the variable names is contained in the group
):
"Check that proposed variable names are in MARIS CDL"
has_valid = True
with Dataset(cdl_path) as nc:
cdl_vars={}
all_vars=[]
# get variable names in CDL
for grp in nc.groups.values():
# Create a list of var for each group
vars = list(grp.variables.keys())
cdl_vars[grp.name] = vars
all_vars.extend(vars)
if group != None:
allowed_vars= cdl_vars[group]
else:
# get unique
allowed_vars = list(set(all_vars))
for name in var_names:
if name not in allowed_vars:
has_valid = False
if group != None:
print(f'"{name}" variable name not found in group "{group}" of MARIS CDL')
else:
print(f'"{name}" variable name not found in MARIS CDL')
return has_valid
get_bbox (df, coord_cols:Tuple[str,str]=('LON', 'LAT'))
Get the bounding box of a DataFrame.
ddmm_to_dd (ddmmmm:float)
Type | Details | |
---|---|---|
ddmmmm | float | Coordinates in degrees/minutes decimal format |
Returns | float | Coordinates in degrees decimal format |
download_file (owner, repo, src_dir, dest_dir, fname)
def download_files_in_folder(
owner: str, # GitHub owner
repo: str, # GitHub repository
src_dir: str, # Source directory
dest_dir: str # Destination directory
):
"Make a GET request to the GitHub API to get the contents of the folder."
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{src_dir}"
response = requests.get(url)
if response.status_code == 200:
contents = response.json()
# Iterate over the files and download them
for item in contents:
if item["type"] == "file":
fname = item["name"]
download_file(owner, repo, src_dir, dest_dir, fname)
else:
print(f"Error: {response.status_code}")
def download_file(owner, repo, src_dir, dest_dir, fname):
# Make a GET request to the GitHub API to get the raw file contents
url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/{src_dir}/{fname}"
response = requests.get(url)
if response.status_code == 200:
# Save the file locally
with open(Path(dest_dir) / fname, "wb") as file:
file.write(response.content)
print(f"{fname} downloaded successfully.")
else:
print(f"Error: {response.status_code}")
download_files_in_folder (owner:str, repo:str, src_dir:str, dest_dir:str)
Make a GET request to the GitHub API to get the contents of the folder.
Type | Details | |
---|---|---|
owner | str | GitHub owner |
repo | str | GitHub repository |
src_dir | str | Source directory |
dest_dir | str | Destination directory |
The World Register of Marine Species (WorMS) is an authoritative classification and catalogue of marine names. It provides a REST API (among others) allowing to “fuzzy” match any species name you might encounter in marine data sources names againt their own database. There are several types of matches as described here.
match_worms (name:str)
Lookup name
in WoRMS (fuzzy match).
Type | Details | |
---|---|---|
name | str | Name of species to look up in WoRMS |
def match_worms(
name: str # Name of species to look up in WoRMS
):
"Lookup `name` in WoRMS (fuzzy match)."
url = 'https://www.marinespecies.org/rest/AphiaRecordsByMatchNames'
params = {
'scientificnames[]': [name],
'marine_only': 'true'
}
headers = {
'accept': 'application/json'
}
response = requests.get(url, params=params, headers=headers)
# Check if the request was successful (status code 200)
if response.status_code == 200:
data = response.json()
return data
else:
return -1
For instance:
[[{'AphiaID': 107083,
'url': 'https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083',
'scientificname': 'Aristeus antennatus',
'authority': '(Risso, 1816)',
'status': 'accepted',
'unacceptreason': None,
'taxonRankID': 220,
'rank': 'Species',
'valid_AphiaID': 107083,
'valid_name': 'Aristeus antennatus',
'valid_authority': '(Risso, 1816)',
'parentNameUsageID': 106807,
'kingdom': 'Animalia',
'phylum': 'Arthropoda',
'class': 'Malacostraca',
'order': 'Decapoda',
'family': 'Aristeidae',
'genus': 'Aristeus',
'citation': 'DecaNet eds. (2024). DecaNet. Aristeus antennatus (Risso, 1816). Accessed through: World Register of Marine Species at: https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083 on 2024-12-17',
'lsid': 'urn:lsid:marinespecies.org:taxname:107083',
'isMarine': 1,
'isBrackish': 0,
'isFreshwater': 0,
'isTerrestrial': 0,
'isExtinct': 0,
'match_type': 'exact',
'modified': '2022-08-24T09:48:14.813Z'}]]
Using https://jamesturk.github.io/jellyfish fuzzy matching distance metrics.
Match (matched_id:int, matched_maris_name:str, source_name:str, match_score:int)
Match between a data provider name and a MARIS lookup table.
match_maris_lut (lut:Union[str,pandas.core.frame.DataFrame,pathlib.Path], data_provider_name:str, maris_id:str, maris_name:str, dist_fn:Callable=<built-in function levenshtein_distance>, nresults:int=10)
Fuzzy matching data provider and MARIS lookup tables (e.g biota species, sediments, …).
Type | Default | Details | |
---|---|---|---|
lut | Union | Either str, Path or DataFrame | |
data_provider_name | str | Name of data provider nomenclature item to look up | |
maris_id | str | Id of MARIS lookup table nomenclature item to match | |
maris_name | str | Name of MARIS lookup table nomenclature item to match | |
dist_fn | Callable | levenshtein_distance | Distance function |
nresults | int | 10 | Maximum number of results to return |
Returns | DataFrame |
def match_maris_lut(
lut: Union[str, pd.DataFrame, Path], # Either str, Path or DataFrame
data_provider_name: str, # Name of data provider nomenclature item to look up
maris_id: str, # Id of MARIS lookup table nomenclature item to match
maris_name: str, # Name of MARIS lookup table nomenclature item to match
dist_fn: Callable = jf.levenshtein_distance, # Distance function
nresults: int = 10 # Maximum number of results to return
) -> pd.DataFrame:
"Fuzzy matching data provider and MARIS lookup tables (e.g biota species, sediments, ...)."
if isinstance(lut, str) or isinstance(lut, Path):
df = pd.read_excel(lut) # Load the LUT if a path is provided
elif isinstance(lut, pd.DataFrame):
df = lut # Use the DataFrame directly if provided
else:
raise ValueError("lut must be either a file path or a DataFrame")
df = df.dropna(subset=[maris_name])
df = df.astype({maris_id: 'int'})
df['score'] = df[maris_name].str.lower().apply(lambda x: dist_fn(data_provider_name.lower(), x))
df = df.sort_values(by='score', ascending=True)[:nresults]
return df[[maris_id, maris_name, 'score']]
Below an example trying to match the name “PLANKTON” with dbo_species_cleaned.xlsx
MARIS biota species lookup table:
lut_fname = '../files/lut/dbo_species_cleaned.xlsx'
match_maris_lut(lut_fname, data_provider_name='PLANKTON',
maris_id='species_id', maris_name='species')
species_id | species | score | |
---|---|---|---|
281 | 280 | Plankton | 0 |
696 | 695 | Zooplankton | 3 |
633 | 632 | Palaemon | 4 |
697 | 696 | Phytoplankton | 5 |
812 | 811 | Chanos | 5 |
160 | 159 | Neuston | 5 |
234 | 233 | Penaeus | 6 |
1458 | 1457 | Lamnidae | 6 |
1438 | 1437 | Labrus | 6 |
1527 | 1526 | Favites | 6 |
Below, we demonstrate matching the laboratory name “Central Mining Institute, Poland” with the MARIS lab lookup table from dbo_lab.xlsx
. This example utilizes the lab
and country
columns. Note that in this instance, df_lut
is passed directly as the lut
argument.
lut_fname = '../files/lut/dbo_lab.xlsx'
df_lut=pd.read_excel(lut_fname)
df_lut['lab_country'] = df_lut['lab'] + '_' + df_lut['country']
match_maris_lut(lut=df_lut, data_provider_name='Central Mining Institute, Poland',
maris_id='lab_id', maris_name='lab_country')
lab_id | lab_country | score | |
---|---|---|---|
6 | 5 | Central Mining Institute_Poland | 2 |
203 | 202 | Polytechnic Institute_Romania | 18 |
282 | 281 | Norwegian Polar Institute_Norway | 21 |
113 | 112 | Nuclear Research Institute_Vietnam | 22 |
246 | 245 | Paul Scherrer Institute_Switzerland | 22 |
136 | 135 | Nuclear Energy Board_Ireland | 23 |
471 | 474 | Kobe University_Japan | 23 |
429 | 432 | Qatar University_Qatar | 23 |
174 | 173 | Interfaculty Reactor Institute_Netherlands | 23 |
177 | 176 | RIKILT_Netherlands | 23 |
Below an example trying to match the name “GLACIAL” with dbo_sedtype.xlsx MARIS sediment lookup table:
lut_fname = '../files/lut/dbo_sedtype.xlsx'
match_maris_lut(lut_fname, data_provider_name='GLACIAL',
maris_id='sedtype_id', maris_name='sedtype')
sedtype_id | sedtype | score | |
---|---|---|---|
26 | 25 | Glacial | 0 |
3 | 2 | Gravel | 4 |
2 | 1 | Clay | 5 |
51 | 50 | Glacial clay | 5 |
4 | 3 | Marsh | 6 |
7 | 6 | Sand | 6 |
13 | 12 | Silt | 6 |
15 | 14 | Sludge | 6 |
27 | 26 | Soft | 7 |
52 | 51 | Soft clay | 7 |
lut_fname = '../files/lut/dbo_nuclide.xlsx'
match_maris_lut(lut_fname, data_provider_name='CS-137',
maris_id='nuclide_id', maris_name='nc_name')
nuclide_id | nc_name | score | |
---|---|---|---|
31 | 33 | cs137 | 1 |
30 | 31 | cs134 | 2 |
29 | 30 | cs127 | 2 |
99 | 102 | cs136 | 2 |
109 | 112 | sb127 | 3 |
111 | 114 | ce139 | 3 |
25 | 24 | sb125 | 4 |
36 | 38 | pm147 | 4 |
28 | 29 | i131 | 4 |
110 | 113 | ba133 | 4 |
download_file (owner, repo, src_dir, dest_dir, fname)
def download_files_in_folder(
owner: str, # GitHub owner
repo: str, # GitHub repository
src_dir: str, # Source directory
dest_dir: str # Destination directory
):
"Make a GET request to the GitHub API to get the contents of the folder"
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{src_dir}"
response = requests.get(url)
if response.status_code == 200:
contents = response.json()
# Iterate over the files and download them
for item in contents:
if item["type"] == "file":
fname = item["name"]
download_file(owner, repo, src_dir, dest_dir, fname)
else:
print(f"Error: {response.status_code}")
def download_file(owner, repo, src_dir, dest_dir, fname):
# Make a GET request to the GitHub API to get the raw file contents
url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/{src_dir}/{fname}"
response = requests.get(url)
if response.status_code == 200:
# Save the file locally
with open(Path(dest_dir) / fname, "wb") as file:
file.write(response.content)
print(f"{fname} downloaded successfully.")
else:
print(f"Error: {response.status_code}")
download_files_in_folder (owner:str, repo:str, src_dir:str, dest_dir:str)
Make a GET request to the GitHub API to get the contents of the folder
Type | Details | |
---|---|---|
owner | str | GitHub owner |
repo | str | GitHub repository |
src_dir | str | Source directory |
dest_dir | str | Destination directory |
test_dfs (dfs1:Dict[str,pandas.core.frame.DataFrame], dfs2:Dict[str,pandas.core.frame.DataFrame])
Compare two dictionaries of DataFrames for equality (also ensuring that columns are in the same order).
Type | Details | |
---|---|---|
dfs1 | Dict | First dictionary of DataFrames to compare |
dfs2 | Dict | Second dictionary of DataFrames to compare |
Returns | None | It raises an AssertionError if the DataFrames are not equal |
def test_dfs(
dfs1: Dict[str, pd.DataFrame], # First dictionary of DataFrames to compare
dfs2: Dict[str, pd.DataFrame] # Second dictionary of DataFrames to compare
) -> None: # It raises an `AssertionError` if the DataFrames are not equal
"Compare two dictionaries of DataFrames for equality (also ensuring that columns are in the same order)."
for grp in dfs1.keys():
df1, df2 = (df.sort_index() for df in (dfs1[grp], dfs2[grp]))
fc.test_eq(df1, df2.reindex(columns=df1.columns))
Extract NetCDF contents
ExtractNetcdfContents (filename:str, verbose:bool=False)
Initialize and extract data from a NetCDF file.
class ExtractNetcdfContents:
def __init__(self, filename: str, verbose: bool = False):
"Initialize and extract data from a NetCDF file."
self.filename = filename
self.verbose = verbose
self.dfs = {} # DataFrames extracted from the NetCDF file
self.enum_dicts = {} # Enum dictionaries extracted from the NetCDF file
self.global_attrs = {} # Global attributes extracted from the NetCDF file
self.extract_all()
def extract_all(self):
"Extract data, enums, and global attributes from the NetCDF file."
if not Path(self.filename).exists():
print(f'File {self.filename} not found.')
return
with Dataset(self.filename, 'r') as nc:
self.global_attrs = self.extract_global_attributes(nc)
for group_name in nc.groups:
group = nc.groups[group_name]
self.dfs[group_name.upper()] = self.extract_data(group)
self.enum_dicts[group_name.upper()] = self.extract_enums(group, group_name)
if self.verbose:
print("Data extraction complete.")
def extract_data(self, group) -> pd.DataFrame:
"Extract data from a group and convert to DataFrame."
data = {var_name: var[:] for var_name, var in group.variables.items() if var_name not in group.dimensions}
df = pd.DataFrame(data)
rename_map = {nc_var: col for col, nc_var in NC_VARS.items() if nc_var in df.columns}
df = df.rename(columns=rename_map)
return df
def extract_enums(self, group, group_name: str) -> Dict:
"Extract enum dictionaries for variables in a group."
local_enum_dicts = {}
for var_name, var in group.variables.items():
if hasattr(var.datatype, 'enum_dict'):
local_enum_dicts[var_name] = {str(k): str(v) for k, v in var.datatype.enum_dict.items()}
if self.verbose:
print(f"Extracted enum_dict for {var_name} in {group_name}")
return local_enum_dicts
def extract_global_attributes(self, nc) -> Dict:
"Extract global attributes from the NetCDF file."
globattrs = {attr: getattr(nc, attr) for attr in nc.ncattrs()}
return globattrs
# fname = Path('../../_data/output/190-geotraces-2021.nc')
fname = Path('../../_data/output/tepco.nc')
contents= ExtractNetcdfContents(fname)
print(contents.dfs)
print(contents.enum_dicts)
print(contents.global_attrs)
{'SEAWATER': sample LON LAT TIME h3 h3_dl mn54 mn54_dl co58 \
0 0 141.029999 37.32 1300749300 NaN NaN NaN NaN 5.7
1 1 141.029999 37.32 1300804080 NaN NaN NaN NaN NaN
2 2 141.029999 37.32 1300888260 NaN NaN NaN NaN NaN
3 3 141.029999 37.32 1300959000 NaN NaN NaN NaN NaN
4 4 141.029999 37.32 1301047200 NaN NaN NaN NaN NaN
... ... ... ... ... .. ... ... ... ...
21472 21472 141.039993 37.48 1657620600 NaN NaN NaN NaN NaN
21473 21473 141.039993 37.48 1657620600 NaN 0.37 NaN NaN NaN
21474 21474 141.039993 37.48 1658224800 NaN NaN NaN NaN NaN
21475 21475 141.039993 37.48 1658224800 NaN 0.38 NaN NaN NaN
21476 21476 141.039993 37.48 1658830200 NaN NaN NaN NaN NaN
co58_dl ... te132 te132_dl i132 i132_dl cs136 cs136_dl tbeta \
0 7.6 ... NaN NaN 160.0 44.0 6.7 4.7 NaN
1 15.0 ... NaN NaN NaN 88.0 NaN 7.8 NaN
2 NaN ... NaN NaN 200.0 58.0 NaN NaN NaN
3 NaN ... NaN NaN 120.0 88.0 68.0 49.0 NaN
4 NaN ... 13.0 7.4 58.0 22.0 4.4 3.2 NaN
... ... ... ... ... ... ... ... ... ...
21472 NaN ... NaN NaN NaN NaN NaN NaN NaN
21473 NaN ... NaN NaN NaN NaN NaN NaN NaN
21474 NaN ... NaN NaN NaN NaN NaN NaN NaN
21475 NaN ... NaN NaN NaN NaN NaN NaN NaN
21476 NaN ... NaN NaN NaN NaN NaN NaN NaN
tbeta_dl talpha talpha_dl
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
21472 NaN NaN NaN
21473 NaN NaN NaN
21474 NaN NaN NaN
21475 13.0 NaN NaN
21476 NaN NaN NaN
[21477 rows x 49 columns]}
{'SEAWATER': {}}
{'id': '', 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances', 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.', 'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Water Quality > Ocean Contaminants', 'keywords_vocabulary': 'GCMD Science Keywords', 'keywords_vocabulary_url': 'https://gcmd.earthdata.nasa.gov/static/kms/', 'record': '', 'featureType': '', 'cdm_data_type': '', 'Conventions': 'CF-1.10 ACDD-1.3', 'publisher_name': 'Paul MCGINNITY, Iolanda OSVATH, Florence DESCROIX-COMANDUCCI', 'publisher_email': 'p.mc-ginnity@iaea.org, i.osvath@iaea.org, F.Descroix-Comanducci@iaea.org', 'publisher_url': 'https://maris.iaea.org', 'publisher_institution': 'International Atomic Energy Agency - IAEA', 'creator_name': 'author: HELCOM MORS', 'institution': '', 'metadata_link': '', 'creator_email': '', 'creator_url': '', 'references': '', 'license': 'Without prejudice to the applicable Terms and Conditions (https://nucleus.iaea.org/Pages/Others/Disclaimer.aspx), I hereby agree that any use of the data will contain appropriate acknowledgement of the data source(s) and the IAEA Marine Radioactivity Information System (MARIS).', 'comment': '', 'geospatial_lat_min': '141.67', 'geospatial_lon_min': '140.6', 'geospatial_lat_max': '38.63', 'geospatial_lon_max': '35.8', 'geospatial_vertical_min': '', 'geospatial_vertical_max': '', 'geospatial_bounds': 'POLYGON ((140.6 35.8, 141.67 35.8, 141.67 38.63, 140.6 38.63, 140.6 35.8))', 'geospatial_bounds_crs': 'EPSG:4326', 'time_coverage_start': '2011-03-21T23:15:00', 'time_coverage_end': '2022-07-26T13:45:00', 'local_time_zone': '', 'date_created': '', 'date_modified': '', 'publisher_postprocess_logs': 'Assign `NaN` to values equal to `ND` (not detected) - to be confirmed , Remove 約 (about) char, Replace e.g `4.0E+00<&<8.0E+00` by its mean (here 6), Remap to MARIS radionuclide names, Normalizing, renaming columns, Encode time as `int` representing seconds since xxx, Drop row when both longitude & latitude equal 0'}