Exported source
# TBD: move to configs
= 'Not available' NA
We define below useful constants throughout the package.
Abstracting some common operations.
get_unique_across_dfs (dfs:Dict[str,pandas.core.frame.DataFrame], col_name:str='NUCLIDE', as_df:bool=False, include_nchars:bool=False)
Get a list of unique column values across dataframes.
Type | Default | Details | |
---|---|---|---|
dfs | Dict | Dictionary of dataframes | |
col_name | str | NUCLIDE | Column name to extract unique values from |
as_df | bool | False | Return a DataFrame of unique values |
include_nchars | bool | False | Add a column with the number of characters in the value |
Returns | List | Returns a list of unique column values across dataframes |
def get_unique_across_dfs(dfs: Dict[str, pd.DataFrame], # Dictionary of dataframes
col_name: str='NUCLIDE', # Column name to extract unique values from
as_df: bool=False, # Return a DataFrame of unique values
include_nchars: bool=False # Add a column with the number of characters in the value
) -> List[str]: # Returns a list of unique column values across dataframes
"Get a list of unique column values across dataframes."
unique_values = list(set().union(*(df[col_name].unique() for df in dfs.values() if col_name in df.columns)))
if not as_df:
return unique_values
else:
df_uniques = pd.DataFrame(unique_values, columns=['value']).reset_index()
if include_nchars: df_uniques['n_chars'] = df_uniques['value'].str.len()
return df_uniques
Example of use:
dfs_test = {'SEAWATER': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134_137_tot', 'cs134_137_tot']}),
'BIOTA': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134', 'cs134_137_tot']}),
'SEDIMENT': pd.DataFrame({'NUCLIDE': ['cs134_137_tot', 'cs134_137_tot', 'cs134_137_tot']})}
fc.test_eq(set(get_unique_across_dfs(dfs_test, col_name='NUCLIDE')),
set(['cs134', 'cs137', 'cs134_137_tot']))
What if the column name is not in one of the dataframe?
dfs_test = {'SEAWATER': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134_137_tot', 'cs134_137_tot']}),
'BIOTA': pd.DataFrame({'NUCLIDE': ['cs137', 'cs134', 'cs134_137_tot']}),
'SEDIMENT': pd.DataFrame({'NONUCLIDE': ['cs134_137_tot', 'cs134_137_tot', 'cs134_137_tot']})}
fc.test_eq(set(get_unique_across_dfs(dfs_test, col_name='NUCLIDE')),
set(['cs134', 'cs137', 'cs134_137_tot']))
index | value | n_chars | |
---|---|---|---|
0 | 0 | cs134_137_tot | 13 |
1 | 1 | cs134 | 5 |
2 | 2 | cs137 | 5 |
Remapper (provider_lut_df:pandas.core.frame.DataFrame, maris_lut_fn:Union[Callable,pandas.core.frame.DataFrame], maris_col_id:str, maris_col_name:str, provider_col_to_match:str, provider_col_key:str, fname_cache:str)
Remap a data provider lookup table to a MARIS lookup table using fuzzy matching.
Type | Details | |
---|---|---|
provider_lut_df | DataFrame | Data provider lookup table to be remapped |
maris_lut_fn | Union | MARIS lookup table or function returning the path |
maris_col_id | str | MARIS lookup table column name for the id |
maris_col_name | str | MARIS lookup table column name for the name |
provider_col_to_match | str | Data provider lookup table column name for the name to match |
provider_col_key | str | Data provider lookup table column name for the key |
fname_cache | str | Cache file name |
class Remapper():
"Remap a data provider lookup table to a MARIS lookup table using fuzzy matching."
def __init__(self,
provider_lut_df: pd.DataFrame, # Data provider lookup table to be remapped
maris_lut_fn: Union[Callable, pd.DataFrame], # MARIS lookup table or function returning the path
maris_col_id: str, # MARIS lookup table column name for the id
maris_col_name: str, # MARIS lookup table column name for the name
provider_col_to_match: str, # Data provider lookup table column name for the name to match
provider_col_key: str, # Data provider lookup table column name for the key
fname_cache: str # Cache file name
):
fc.store_attr()
self.cache_file = cache_path() / fname_cache
# Check if maris_lut is a callable function or already a DataFrame
if callable(maris_lut_fn):
self.maris_lut = maris_lut_fn()
else:
self.maris_lut = maris_lut_fn
self.lut = {}
def generate_lookup_table(self,
fixes={}, # Lookup table fixes
as_df=True, # Whether to return a DataFrame
overwrite=True):
"Generate a lookup table from a data provider lookup table to a MARIS lookup table using fuzzy matching."
self.fixes = fixes
self.as_df = as_df
if overwrite or not self.cache_file.exists():
self._create_lookup_table()
fc.save_pickle(self.cache_file, self.lut)
else:
self.lut = fc.load_pickle(self.cache_file)
return self._format_output()
def _create_lookup_table(self):
df = self.provider_lut_df
for _, row in tqdm(df.iterrows(), total=len(df), desc="Processing"):
self._process_row(row)
def _process_row(self, row):
value_to_match = row[self.provider_col_to_match]
if isinstance(value_to_match, str): # Only process if value is a string
# If value is in fixes, use the fixed value
name_to_match = self.fixes.get(value_to_match, value_to_match)
result = match_maris_lut(self.maris_lut, name_to_match, self.maris_col_id, self.maris_col_name).iloc[0]
match = Match(result[self.maris_col_id], result[self.maris_col_name],
value_to_match, result['score'])
self.lut[row[self.provider_col_key]] = match
else:
# Handle non-string values (e.g., NaN)
self.lut[row[self.provider_col_key]] = Match(-1, "Unknown", value_to_match, 0)
def select_match(self, match_score_threshold:int=1, verbose:bool=False):
if verbose:
matched_len= len([v for v in self.lut.values() if v.match_score < match_score_threshold])
print(f"{matched_len} entries matched the criteria, while {len(self.lut) - matched_len} entries had a match score of {match_score_threshold} or higher.")
self.lut = {k: v for k, v in self.lut.items() if v.match_score >= match_score_threshold}
return self._format_output()
def _format_output(self):
if not self.as_df: return self.lut
df_lut = pd.DataFrame.from_dict(self.lut, orient='index',
columns=['matched_maris_name', 'source_name', 'match_score'])
df_lut.index.name = 'source_key'
return df_lut.sort_values(by='match_score', ascending=False)
has_valid_varname (var_names:List[str], cdl_path:str, group:Optional[str]=None)
Check that proposed variable names are in MARIS CDL
Type | Default | Details | |
---|---|---|---|
var_names | List | variable names | |
cdl_path | str | Path to MARIS CDL file (point of truth) | |
group | Optional | None | Check if the variable names is contained in the group |
# TBD: Assess if still needed
def has_valid_varname(
var_names: List[str], # variable names
cdl_path: str, # Path to MARIS CDL file (point of truth)
group: Optional[str] = None, # Check if the variable names is contained in the group
):
"Check that proposed variable names are in MARIS CDL"
has_valid = True
with Dataset(cdl_path) as nc:
cdl_vars={}
all_vars=[]
# get variable names in CDL
for grp in nc.groups.values():
# Create a list of var for each group
vars = list(grp.variables.keys())
cdl_vars[grp.name] = vars
all_vars.extend(vars)
if group != None:
allowed_vars= cdl_vars[group]
else:
# get unique
allowed_vars = list(set(all_vars))
for name in var_names:
if name not in allowed_vars:
has_valid = False
if group != None:
print(f'"{name}" variable name not found in group "{group}" of MARIS CDL')
else:
print(f'"{name}" variable name not found in MARIS CDL')
return has_valid
get_bbox (df, coord_cols:Tuple[str,str]=('LON', 'LAT'))
Get the bounding box of a DataFrame.
ddmm_to_dd (ddmmmm:float)
Type | Details | |
---|---|---|
ddmmmm | float | Coordinates in degrees/minutes decimal format |
Returns | float | Coordinates in degrees decimal format |
download_file (owner, repo, src_dir, dest_dir, fname)
def download_files_in_folder(
owner: str, # GitHub owner
repo: str, # GitHub repository
src_dir: str, # Source directory
dest_dir: str # Destination directory
):
"Make a GET request to the GitHub API to get the contents of the folder."
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{src_dir}"
response = requests.get(url)
if response.status_code == 200:
contents = response.json()
# Iterate over the files and download them
for item in contents:
if item["type"] == "file":
fname = item["name"]
download_file(owner, repo, src_dir, dest_dir, fname)
else:
print(f"Error: {response.status_code}")
def download_file(owner, repo, src_dir, dest_dir, fname):
# Make a GET request to the GitHub API to get the raw file contents
url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/{src_dir}/{fname}"
response = requests.get(url)
if response.status_code == 200:
# Save the file locally
with open(Path(dest_dir) / fname, "wb") as file:
file.write(response.content)
print(f"{fname} downloaded successfully.")
else:
print(f"Error: {response.status_code}")
download_files_in_folder (owner:str, repo:str, src_dir:str, dest_dir:str)
Make a GET request to the GitHub API to get the contents of the folder.
Type | Details | |
---|---|---|
owner | str | GitHub owner |
repo | str | GitHub repository |
src_dir | str | Source directory |
dest_dir | str | Destination directory |
The World Register of Marine Species (WorMS) is an authoritative classification and catalogue of marine names. It provides a REST API (among others) allowing to “fuzzy” match any species name you might encounter in marine data sources names againt their own database. There are several types of matches as described here.
match_worms (name:str)
Lookup name
in WoRMS (fuzzy match).
Type | Details | |
---|---|---|
name | str | Name of species to look up in WoRMS |
def match_worms(
name: str # Name of species to look up in WoRMS
):
"Lookup `name` in WoRMS (fuzzy match)."
url = 'https://www.marinespecies.org/rest/AphiaRecordsByMatchNames'
params = {
'scientificnames[]': [name],
'marine_only': 'true'
}
headers = {
'accept': 'application/json'
}
response = requests.get(url, params=params, headers=headers)
# Check if the request was successful (status code 200)
if response.status_code == 200:
data = response.json()
return data
else:
return -1
For instance:
[[{'AphiaID': 107083,
'url': 'https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083',
'scientificname': 'Aristeus antennatus',
'authority': '(Risso, 1816)',
'status': 'accepted',
'unacceptreason': None,
'taxonRankID': 220,
'rank': 'Species',
'valid_AphiaID': 107083,
'valid_name': 'Aristeus antennatus',
'valid_authority': '(Risso, 1816)',
'parentNameUsageID': 106807,
'kingdom': 'Animalia',
'phylum': 'Arthropoda',
'class': 'Malacostraca',
'order': 'Decapoda',
'family': 'Aristeidae',
'genus': 'Aristeus',
'citation': 'DecaNet eds. (2025). DecaNet. Aristeus antennatus (Risso, 1816). Accessed through: World Register of Marine Species at: https://www.marinespecies.org/aphia.php?p=taxdetails&id=107083 on 2025-02-11',
'lsid': 'urn:lsid:marinespecies.org:taxname:107083',
'isMarine': 1,
'isBrackish': 0,
'isFreshwater': 0,
'isTerrestrial': 0,
'isExtinct': 0,
'match_type': 'exact',
'modified': '2022-08-24T09:48:14.813Z'}]]
Using https://jamesturk.github.io/jellyfish fuzzy matching distance metrics.
Match (matched_id:int, matched_maris_name:str, source_name:str, match_score:int)
Match between a data provider name and a MARIS lookup table.
match_maris_lut (lut:Union[str,pandas.core.frame.DataFrame,pathlib.Path], data_provider_name:str, maris_id:str, maris_name:str, dist_fn:Callable=<built-in function levenshtein_distance>, nresults:int=10)
Fuzzy matching data provider and MARIS lookup tables (e.g biota species, sediments, …).
Type | Default | Details | |
---|---|---|---|
lut | Union | Either str, Path or DataFrame | |
data_provider_name | str | Name of data provider nomenclature item to look up | |
maris_id | str | Id of MARIS lookup table nomenclature item to match | |
maris_name | str | Name of MARIS lookup table nomenclature item to match | |
dist_fn | Callable | levenshtein_distance | Distance function |
nresults | int | 10 | Maximum number of results to return |
Returns | DataFrame |
def match_maris_lut(
lut: Union[str, pd.DataFrame, Path], # Either str, Path or DataFrame
data_provider_name: str, # Name of data provider nomenclature item to look up
maris_id: str, # Id of MARIS lookup table nomenclature item to match
maris_name: str, # Name of MARIS lookup table nomenclature item to match
dist_fn: Callable = jf.levenshtein_distance, # Distance function
nresults: int = 10 # Maximum number of results to return
) -> pd.DataFrame:
"Fuzzy matching data provider and MARIS lookup tables (e.g biota species, sediments, ...)."
if isinstance(lut, str) or isinstance(lut, Path):
df = pd.read_excel(lut) # Load the LUT if a path is provided
elif isinstance(lut, pd.DataFrame):
df = lut # Use the DataFrame directly if provided
else:
raise ValueError("lut must be either a file path or a DataFrame")
df = df.dropna(subset=[maris_name])
df = df.astype({maris_id: 'int'})
df['score'] = df[maris_name].str.lower().apply(lambda x: dist_fn(data_provider_name.lower(), x))
df = df.sort_values(by='score', ascending=True)[:nresults]
return df[[maris_id, maris_name, 'score']]
Below an example trying to match the name “PLANKTON” with dbo_species_cleaned.xlsx
MARIS biota species lookup table:
lut_fname = '../files/lut/dbo_species_cleaned.xlsx'
match_maris_lut(lut_fname, data_provider_name='PLANKTON',
maris_id='species_id', maris_name='species')
species_id | species | score | |
---|---|---|---|
281 | 280 | Plankton | 0 |
696 | 695 | Zooplankton | 3 |
633 | 632 | Palaemon | 4 |
697 | 696 | Phytoplankton | 5 |
812 | 811 | Chanos | 5 |
160 | 159 | Neuston | 5 |
234 | 233 | Penaeus | 6 |
1458 | 1457 | Lamnidae | 6 |
1438 | 1437 | Labrus | 6 |
1527 | 1526 | Favites | 6 |
Below, we demonstrate matching the laboratory name “Central Mining Institute, Poland” with the MARIS lab lookup table from dbo_lab.xlsx
. This example utilizes the lab
and country
columns. Note that in this instance, df_lut
is passed directly as the lut
argument.
lut_fname = '../files/lut/dbo_lab.xlsx'
df_lut=pd.read_excel(lut_fname)
df_lut['lab_country'] = df_lut['lab'] + '_' + df_lut['country']
match_maris_lut(lut=df_lut, data_provider_name='Central Mining Institute, Poland',
maris_id='lab_id', maris_name='lab_country')
lab_id | lab_country | score | |
---|---|---|---|
6 | 5 | Central Mining Institute_Poland | 2 |
203 | 202 | Polytechnic Institute_Romania | 18 |
282 | 281 | Norwegian Polar Institute_Norway | 21 |
113 | 112 | Nuclear Research Institute_Vietnam | 22 |
246 | 245 | Paul Scherrer Institute_Switzerland | 22 |
136 | 135 | Nuclear Energy Board_Ireland | 23 |
471 | 474 | Kobe University_Japan | 23 |
429 | 432 | Qatar University_Qatar | 23 |
174 | 173 | Interfaculty Reactor Institute_Netherlands | 23 |
177 | 176 | RIKILT_Netherlands | 23 |
Below an example trying to match the name “GLACIAL” with dbo_sedtype.xlsx MARIS sediment lookup table:
lut_fname = '../files/lut/dbo_sedtype.xlsx'
match_maris_lut(lut_fname, data_provider_name='GLACIAL',
maris_id='sedtype_id', maris_name='sedtype')
sedtype_id | sedtype | score | |
---|---|---|---|
26 | 25 | Glacial | 0 |
3 | 2 | Gravel | 4 |
2 | 1 | Clay | 5 |
51 | 50 | Glacial clay | 5 |
4 | 3 | Marsh | 6 |
7 | 6 | Sand | 6 |
13 | 12 | Silt | 6 |
15 | 14 | Sludge | 6 |
27 | 26 | Soft | 7 |
52 | 51 | Soft clay | 7 |
lut_fname = '../files/lut/dbo_nuclide.xlsx'
match_maris_lut(lut_fname, data_provider_name='CS-137',
maris_id='nuclide_id', maris_name='nc_name')
nuclide_id | nc_name | score | |
---|---|---|---|
31 | 33 | cs137 | 1 |
30 | 31 | cs134 | 2 |
29 | 30 | cs127 | 2 |
99 | 102 | cs136 | 2 |
109 | 112 | sb127 | 3 |
111 | 114 | ce139 | 3 |
25 | 24 | sb125 | 4 |
36 | 38 | pm147 | 4 |
28 | 29 | i131 | 4 |
110 | 113 | ba133 | 4 |
download_file (owner, repo, src_dir, dest_dir, fname)
def download_files_in_folder(
owner: str, # GitHub owner
repo: str, # GitHub repository
src_dir: str, # Source directory
dest_dir: str # Destination directory
):
"Make a GET request to the GitHub API to get the contents of the folder"
url = f"https://api.github.com/repos/{owner}/{repo}/contents/{src_dir}"
response = requests.get(url)
if response.status_code == 200:
contents = response.json()
# Iterate over the files and download them
for item in contents:
if item["type"] == "file":
fname = item["name"]
download_file(owner, repo, src_dir, dest_dir, fname)
else:
print(f"Error: {response.status_code}")
def download_file(owner, repo, src_dir, dest_dir, fname):
# Make a GET request to the GitHub API to get the raw file contents
url = f"https://raw.githubusercontent.com/{owner}/{repo}/master/{src_dir}/{fname}"
response = requests.get(url)
if response.status_code == 200:
# Save the file locally
with open(Path(dest_dir) / fname, "wb") as file:
file.write(response.content)
print(f"{fname} downloaded successfully.")
else:
print(f"Error: {response.status_code}")
download_files_in_folder (owner:str, repo:str, src_dir:str, dest_dir:str)
Make a GET request to the GitHub API to get the contents of the folder
Type | Details | |
---|---|---|
owner | str | GitHub owner |
repo | str | GitHub repository |
src_dir | str | Source directory |
dest_dir | str | Destination directory |
test_dfs (dfs1:Dict[str,pandas.core.frame.DataFrame], dfs2:Dict[str,pandas.core.frame.DataFrame])
Compare two dictionaries of DataFrames for equality (also ensuring that columns are in the same order).
Type | Details | |
---|---|---|
dfs1 | Dict | First dictionary of DataFrames to compare |
dfs2 | Dict | Second dictionary of DataFrames to compare |
Returns | None | It raises an AssertionError if the DataFrames are not equal |
def test_dfs(
dfs1: Dict[str, pd.DataFrame], # First dictionary of DataFrames to compare
dfs2: Dict[str, pd.DataFrame] # Second dictionary of DataFrames to compare
) -> None: # It raises an `AssertionError` if the DataFrames are not equal
"Compare two dictionaries of DataFrames for equality (also ensuring that columns are in the same order)."
for grp in dfs1.keys():
df1, df2 = (df.sort_index() for df in (dfs1[grp], dfs2[grp]))
fc.test_eq(df1, df2.reindex(columns=df1.columns))
Extract NetCDF contents
ExtractNetcdfContents (filename:str, verbose:bool=False)
Initialize and extract data from a NetCDF file.
class ExtractNetcdfContents:
def __init__(self, filename: str, verbose: bool = False):
"Initialize and extract data from a NetCDF file."
self.filename = filename
self.verbose = verbose
self.dfs = {} # DataFrames extracted from the NetCDF file
self.enum_dicts = {} # Enum dictionaries extracted from the NetCDF file
self.global_attrs = {} # Global attributes extracted from the NetCDF file
self.custom_maps = {} # Custom maps extracted from the NetCDF file
self.extract_all()
def extract_all(self):
"Extract data, enums, and global attributes from the NetCDF file."
if not Path(self.filename).exists():
print(f'File {self.filename} not found.')
return
with Dataset(self.filename, 'r') as nc:
self.global_attrs = self.extract_global_attributes(nc)
for group_name in nc.groups:
group = nc.groups[group_name]
self.dfs[group_name.upper()] = self.extract_data(group)
self.enum_dicts[group_name.upper()] = self.extract_enums(group, group_name)
self.custom_maps[group_name.upper()] = self.extract_custom_maps(group, group_name)
if self.verbose:
print("Data extraction complete.")
def extract_data(self, group) -> pd.DataFrame:
"Extract data from a group and convert to DataFrame."
data = {var_name: var[:] for var_name, var in group.variables.items() if var_name not in group.dimensions}
df = pd.DataFrame(data)
rename_map = {nc_var: col for col, nc_var in NC_VARS.items() if nc_var in df.columns}
df = df.rename(columns=rename_map)
return df
def extract_enums(self, group, group_name: str) -> Dict:
"Extract enum dictionaries for variables in a group."
local_enum_dicts = {}
for var_name, var in group.variables.items():
if hasattr(var.datatype, 'enum_dict'):
local_enum_dicts[var_name] = {str(k): str(v) for k, v in var.datatype.enum_dict.items()}
if self.verbose:
print(f"Extracted enum_dict for {var_name} in {group_name}")
return local_enum_dicts
def extract_global_attributes(self, nc) -> Dict:
"Extract global attributes from the NetCDF file."
globattrs = {attr: getattr(nc, attr) for attr in nc.ncattrs()}
return globattrs
def extract_custom_maps(self, group, group_name: str) -> Dict:
"Extract custom maps from the NetCDF file."
reverse_nc_vars = {v: k for k, v in NC_VARS.items()}
custom_maps = {}
for var_name, var in group.variables.items():
attr=f"{var_name}_map"
if hasattr(var, attr):
custom_maps[reverse_nc_vars[var.name]] = literal_eval(getattr(var, attr))
return custom_maps
# fname = Path('../../_data/output/190-geotraces-2021.nc')
#fname = Path('../../_data/output/tepco.nc')
fname = Path('../../_data/output/tepco.nc')
#fname = Path('./files/nc/encoding-test.nc')
contents= ExtractNetcdfContents(fname)
print(contents.dfs)
print(contents.enum_dicts)
print(contents.global_attrs)
print(contents.custom_maps)
{'SEAWATER': LON LAT TIME NUCLIDE VALUE UNIT UNC DL
0 140.603882 36.299721 1318512060 29 NaN 3 NaN 2
1 140.603882 36.299721 1318512060 31 NaN 3 NaN 2
2 140.603882 36.299721 1318512060 33 NaN 3 NaN 2
3 140.603882 36.299721 1318512180 29 NaN 3 NaN 2
4 140.603882 36.299721 1318512180 31 NaN 3 NaN 2
... ... ... ... ... ... ... ... ..
80620 141.666672 38.299999 1722930660 33 0.0016 3 NaN 1
80621 141.666672 38.299999 1725956340 31 NaN 3 NaN 2
80622 141.666672 38.299999 1725956340 33 0.0015 3 NaN 1
80623 141.666672 38.299999 1725957120 31 NaN 3 NaN 2
80624 141.666672 38.299999 1725957120 33 0.0014 3 NaN 1
[80625 rows x 8 columns]}
{'SEAWATER': {'nuclide': {'NOT APPLICABLE': '-1', 'NOT AVAILABLE': '0', 'h3': '1', 'be7': '2', 'c14': '3', 'k40': '4', 'cr51': '5', 'mn54': '6', 'co57': '7', 'co58': '8', 'co60': '9', 'zn65': '10', 'sr89': '11', 'sr90': '12', 'zr95': '13', 'nb95': '14', 'tc99': '15', 'ru103': '16', 'ru106': '17', 'rh106': '18', 'ag106m': '19', 'ag108': '20', 'ag108m': '21', 'ag110m': '22', 'sb124': '23', 'sb125': '24', 'te129m': '25', 'i129': '28', 'i131': '29', 'cs127': '30', 'cs134': '31', 'cs137': '33', 'ba140': '34', 'la140': '35', 'ce141': '36', 'ce144': '37', 'pm147': '38', 'eu154': '39', 'eu155': '40', 'pb210': '41', 'pb212': '42', 'pb214': '43', 'bi207': '44', 'bi211': '45', 'bi214': '46', 'po210': '47', 'rn220': '48', 'rn222': '49', 'ra223': '50', 'ra224': '51', 'ra225': '52', 'ra226': '53', 'ra228': '54', 'ac228': '55', 'th227': '56', 'th228': '57', 'th232': '59', 'th234': '60', 'pa234': '61', 'u234': '62', 'u235': '63', 'u238': '64', 'np237': '65', 'np239': '66', 'pu238': '67', 'pu239': '68', 'pu240': '69', 'pu241': '70', 'am240': '71', 'am241': '72', 'cm242': '73', 'cm243': '74', 'cm244': '75', 'cs134_137_tot': '76', 'pu239_240_tot': '77', 'pu239_240_iii_iv_tot': '78', 'pu239_240_v_vi_tot': '79', 'cm243_244_tot': '80', 'pu238_pu239_240_tot_ratio': '81', 'am241_pu239_240_tot_ratio': '82', 'cs137_134_ratio': '83', 'cd109': '84', 'eu152': '85', 'fe59': '86', 'gd153': '87', 'ir192': '88', 'pu238_240_tot': '89', 'rb86': '90', 'sc46': '91', 'sn113': '92', 'sn117m': '93', 'tl208': '94', 'mo99': '95', 'tc99m': '96', 'ru105': '97', 'te129': '98', 'te132': '99', 'i132': '100', 'i135': '101', 'cs136': '102', 'tbeta': '103', 'talpha': '104', 'i133': '105', 'th230': '106', 'pa231': '107', 'u236': '108', 'ag111': '109', 'in116m': '110', 'te123m': '111', 'sb127': '112', 'ba133': '113', 'ce139': '114', 'tl201': '116', 'hg203': '117', 'na22': '122', 'pa234m': '123', 'am243': '124', 'se75': '126', 'sr85': '127', 'y88': '128', 'ce140': '129', 'bi212': '130', 'u236_238_ratio': '131', 'i125': '132', 'ba137m': '133', 'u232': '134', 'pa233': '135', 'ru106_rh106_tot': '136', 'tu': '137', 'tbeta40k': '138', 'fe55': '139', 'ce144_pr144_tot': '140', 'pu240_pu239_ratio': '141', 'u233': '142', 'pu239_242_tot': '143', 'ac227': '144'}, 'unit': {'Not applicable': '-1', 'NOT AVAILABLE': '0', 'Bq per m3': '1', 'Bq per m2': '2', 'Bq per kg': '3', 'Bq per kgd': '4', 'Bq per kgw': '5', 'kg per kg': '6', 'TU': '7', 'DELTA per mill': '8', 'atom per kg': '9', 'atom per kgd': '10', 'atom per kgw': '11', 'atom per l': '12', 'Bq per kgC': '13'}, 'dl': {'Not applicable': '-1', 'Not available': '0', 'Detected value': '1', 'Detection limit': '2', 'Not detected': '3', 'Derived': '4'}}}
{'id': 'JEV6HP5A', 'title': "Readings of Sea Area Monitoring - Monitoring of sea water - Sea area close to TEPCO's Fukushima Daiichi NPS / Coastal area - Readings of Sea Area Monitoring [TEPCO]", 'summary': '', 'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments, Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes, Earth Science > Oceans > Water Quality > Ocean Contaminants, Earth Science > Biological Classification > Animals/Vertebrates > Fish, Earth Science > Biosphere > Ecosystems > Marine Ecosystems, Earth Science > Biological Classification > Animals/Invertebrates > Mollusks, Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans, Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)', 'history': 'TBD', 'keywords_vocabulary': 'GCMD Science Keywords', 'keywords_vocabulary_url': 'https://gcmd.earthdata.nasa.gov/static/kms/', 'record': 'TBD', 'featureType': 'TBD', 'cdm_data_type': 'TBD', 'Conventions': 'CF-1.10 ACDD-1.3', 'publisher_name': 'Paul MCGINNITY, Iolanda OSVATH, Florence DESCROIX-COMANDUCCI', 'publisher_email': 'p.mc-ginnity@iaea.org, i.osvath@iaea.org, F.Descroix-Comanducci@iaea.org', 'publisher_url': 'https://maris.iaea.org', 'publisher_institution': 'International Atomic Energy Agency - IAEA', 'creator_name': '[{"creatorType": "author", "firstName": "", "lastName": "TEPCO - Tokyo Electric Power Company"}]', 'institution': 'TBD', 'metadata_link': 'TBD', 'creator_email': 'TBD', 'creator_url': 'TBD', 'references': 'TBD', 'license': 'Without prejudice to the applicable Terms and Conditions (https://nucleus.iaea.org/Pages/Others/Disclaimer.aspx), I hereby agree that any use of the data will contain appropriate acknowledgement of the data source(s) and the IAEA Marine Radioactivity Information System (MARIS).', 'comment': 'TBD', 'geospatial_lat_min': '141.66666667', 'geospatial_lon_min': '140.60388889', 'geospatial_lat_max': '38.63333333', 'geospatial_lon_max': '35.79611111', 'geospatial_vertical_min': 'TBD', 'geospatial_vertical_max': 'TBD', 'geospatial_bounds': 'POLYGON ((140.60388889 35.79611111, 141.66666667 35.79611111, 141.66666667 38.63333333, 140.60388889 38.63333333, 140.60388889 35.79611111))', 'geospatial_bounds_crs': 'EPSG:4326', 'time_coverage_start': '2011-03-21T14:30:00', 'time_coverage_end': '2024-12-21T08:03:00', 'local_time_zone': 'TBD', 'date_created': 'TBD', 'date_modified': 'TBD', 'publisher_postprocess_logs': "Assign `NaN` to values equal to `ND` (not detected) - to be confirmed , Remove 約 (about) char, Replace range values (e.g '4.0E+00<&<8.0E+00' or '1.0~2.7') by their mean, Select columns of interest., \n Parse TEPCO measurement columns to extract nuclide name, measurement value, \n detection limit and uncertainty\n , \n Remap `UNIT` name to MARIS id.\n , \n Remap `NUCLIDE` name to MARIS id.\n , \n Remap `DL` name to MARIS id.\n , Parse time column from TEPCO., Encode time as seconds since epoch., Drop rows with invalid longitude & latitude values. Convert `,` separator to `.` separator."}
{'SEAWATER': {}}