source
GlobAttrsFeeder
GlobAttrsFeeder (dfs:Dict[str,pandas.core.frame.DataFrame],
cbs:List[marisco.callbacks.Callback]=[],
logs:List[str]=[])
Produce NetCDF global attributes as specified by the callbacks.
dfs
Dict
Dictionary of NetCDF group DataFrames
cbs
List
[]
Callbacks
logs
List
[]
List of preprocessing steps taken
Exported source
class GlobAttrsFeeder:
"Produce NetCDF global attributes as specified by the callbacks."
def __init__ (self ,
dfs: Dict[str , pd.DataFrame], # Dictionary of NetCDF group DataFrames
cbs: List[Callback]= [], # Callbacks
logs: List[str ]= [] # List of preprocessing steps taken
):
fc.store_attr()
self .attrs = {}
def callback(self ):
run_cbs(self .cbs, self )
def __call__ (self ):
self .callback()
return self .attrs
source
BboxCB
BboxCB ()
Compute dataset geographical bounding box
Exported source
class BboxCB(Callback):
"Compute dataset geographical bounding box"
def __call__ (self , obj):
bbox = get_bbox(pd.concat(obj.dfs))
lon_min, lon_max, lat_min, lat_max = [str (bound) for bound in bbox.bounds]
obj.attrs.update({
'geospatial_lat_min' : lat_min,
'geospatial_lat_max' : lat_max,
'geospatial_lon_min' : lon_min,
'geospatial_lon_max' : lon_max,
'geospatial_bounds' : bbox.wkt})
source
DepthRangeCB
DepthRangeCB (depth_col:str='SMP_DEPTH')
Compute depth values range
Exported source
class DepthRangeCB(Callback):
"Compute depth values range"
def __init__ (self ,
depth_col: str = 'SMP_DEPTH' ):
fc.store_attr()
def __call__ (self , obj):
depths = pd.concat(obj.dfs).get(self .depth_col, default= pd.Series([]))
if not depths.empty:
obj.attrs.update({
'geospatial_vertical_max' : str (depths.max ()),
'geospatial_vertical_min' : str (depths.min ())})
source
TimeRangeCB
TimeRangeCB (time_col:str='TIME', fn_time_unit:Callable=<function
get_time_units>)
Compute time values range
Exported source
class TimeRangeCB(Callback):
"Compute time values range"
def __init__ (self ,
time_col: str = 'TIME' ,
fn_time_unit: Callable= get_time_units):
fc.store_attr()
self .time_unit = fn_time_unit()
def __call__ (self , obj):
time = pd.concat(obj.dfs)[self .time_col]
start, end = [num2date(t, units= self .time_unit).isoformat()
for t in (time.min (), time.max ())]
obj.attrs.update({
'time_coverage_start' : start,
'time_coverage_end' : end})
source
ZoteroItem
ZoteroItem (item_id:str, cfg:Dict[str,str])
Retrieve Zotero metadata.
Exported source
class ZoteroItem:
"Retrieve Zotero metadata."
def __init__ (self ,
item_id: str ,
cfg: Dict[str , str ]):
fc.store_attr()
self .item = self .getItem(item_id)
def exist(self ): return self .item != None
def getItem(self , item_id):
zot = zotero.Zotero(self .cfg['lib_id' ], 'group' , self .cfg['api_key' ])
try :
return zot.item(item_id)
except zotero_errors.ResourceNotFound:
print (f'Item { item_id} does not exist in Zotero library' )
return None
def title(self ):
return self .item['data' ]['title' ]
def summary(self ):
return self .item['data' ]['abstractNote' ]
def creator_name(self ):
# creators = [f'{c["creatorType"]}: {c["name"]}' for c in self.item['data']['creators']]
# return '; '.join(creators)
return json.dumps(self .item['data' ]['creators' ])
def __repr__ (self ):
return json.dumps(self .item, indent= 4 )
source
ZoteroCB
ZoteroCB (itemId, cfg)
Retrieve Zotero metadata.
Exported source
# TBD : put it in callback module
class ZoteroCB(Callback):
"Retrieve Zotero metadata."
def __init__ (self , itemId, cfg): fc.store_attr()
def __call__ (self , obj):
item = ZoteroItem(self .itemId, self .cfg['zotero' ])
if item.exist():
obj.attrs['id' ] = item.item['key' ]
for attr in ['title' ,'summary' , 'creator_name' ]:
obj.attrs[attr] = getattr (item, attr)()
GlobAttrsFeeder(None , cbs= [
ZoteroCB('26VMZZ2Q' , cfg= cfg())
])()
{'id': '26VMZZ2Q',
'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]'}
GlobAttrsFeeder(None , cbs= [
ZoteroCB('3W354SQG' , cfg= cfg())
])()
{'id': '3W354SQG',
'title': 'Radioactivity Monitoring of the Irish Marine Environment 1991 and 1992',
'summary': '',
'creator_name': '[{"creatorType": "author", "firstName": "A.", "lastName": "McGarry"}, {"creatorType": "author", "firstName": "S.", "lastName": "Lyons"}, {"creatorType": "author", "firstName": "C.", "lastName": "McEnri"}, {"creatorType": "author", "firstName": "T.", "lastName": "Ryan"}, {"creatorType": "author", "firstName": "M.", "lastName": "O\'Colmain"}, {"creatorType": "author", "firstName": "J.D.", "lastName": "Cunningham"}]'}
GlobAttrsFeeder(None , cbs= [
ZoteroCB('x' , cfg= cfg())
])()
Item x does not exist in Zotero library
source
KeyValuePairCB
KeyValuePairCB (k, v)
Base class for callbacks.
Exported source
class KeyValuePairCB(Callback):
def __init__ (self , k, v): fc.store_attr()
def __call__ (self , obj): obj.attrs[self .k] = self .v
How to use
dfs = pd.read_pickle('../files/pkl/dfs_test.pkl' )
kw = ['oceanography' , 'Earth Science > Oceans > Ocean Chemistry> Radionuclides' ,
'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure' ,
'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments' ,
'Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes' ,
'Earth Science > Oceans > Water Quality > Ocean Contaminants' ,
'Earth Science > Biological Classification > Animals/Vertebrates > Fish' ,
'Earth Science > Biosphere > Ecosystems > Marine Ecosystems' ,
'Earth Science > Biological Classification > Animals/Invertebrates > Mollusks' ,
'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans' ,
'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)' ]
feed = GlobAttrsFeeder(dfs, cbs= [
BboxCB(),
DepthRangeCB(),
TimeRangeCB(),
ZoteroCB('26VMZZ2Q' , cfg()),
KeyValuePairCB('keywords' , ', ' .join(kw))
])
attrs = feed(); attrs
{'geospatial_lat_min': '179.9986',
'geospatial_lat_max': '89.9905',
'geospatial_lon_min': '-180.0',
'geospatial_lon_max': '-70.5744',
'geospatial_bounds': 'POLYGON ((-180 -70.5744, 179.9986 -70.5744, 179.9986 89.9905, -180 89.9905, -180 -70.5744))',
'geospatial_vertical_max': '5815.3',
'geospatial_vertical_min': '0.5',
'time_coverage_start': '2007-07-30T10:37:19',
'time_coverage_end': '2018-11-22T07:33:10',
'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]',
'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments, Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes, Earth Science > Oceans > Water Quality > Ocean Contaminants, Earth Science > Biological Classification > Animals/Vertebrates > Fish, Earth Science > Biosphere > Ecosystems > Marine Ecosystems, Earth Science > Biological Classification > Animals/Invertebrates > Mollusks, Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans, Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)'}