Various utilities to populate NetCDF global attributes as well as ISO13195 metadata.

source

GlobAttrsFeeder

 GlobAttrsFeeder (dfs:Dict[str,pandas.core.frame.DataFrame],
                  cbs:List[marisco.callbacks.Callback]=[],
                  logs:List[str]=[])

Produce NetCDF global attributes as specified by the callbacks.

Type Default Details
dfs Dict Dictionary of NetCDF group DataFrames
cbs List [] Callbacks
logs List [] List of preprocessing steps taken
Exported source
class GlobAttrsFeeder:
    "Produce NetCDF global attributes as specified by the callbacks."
    def __init__(self, 
                 dfs: Dict[str, pd.DataFrame], # Dictionary of NetCDF group DataFrames
                 cbs: List[Callback]=[], # Callbacks
                 logs: List[str]=[] # List of preprocessing steps taken
                 ): 
        fc.store_attr()
        self.attrs = {}
        
    def callback(self):
        run_cbs(self.cbs, self)
        
    def __call__(self):
        self.callback()
        return self.attrs

source

BboxCB

 BboxCB ()

Compute dataset geographical bounding box

Exported source
class BboxCB(Callback):
    "Compute dataset geographical bounding box"
    def __call__(self, obj):
        bbox = get_bbox(pd.concat(obj.dfs))     
        lon_min, lon_max, lat_min, lat_max = [str(bound) for bound in bbox.bounds]
        obj.attrs.update({
            'geospatial_lat_min': lat_min, 
            'geospatial_lat_max': lat_max,
            'geospatial_lon_min': lon_min,
            'geospatial_lon_max': lon_max,
            'geospatial_bounds': bbox.wkt})

source

DepthRangeCB

 DepthRangeCB (depth_col:str='SMP_DEPTH')

Compute depth values range

Exported source
class DepthRangeCB(Callback):
    "Compute depth values range"
    def __init__(self, 
                 depth_col: str='SMP_DEPTH'): 
        fc.store_attr()
    def __call__(self, obj):
        depths = pd.concat(obj.dfs).get(self.depth_col, default=pd.Series([]))
        if not depths.empty:
            obj.attrs.update({
                'geospatial_vertical_max': str(depths.max()),
                'geospatial_vertical_min': str(depths.min())})

source

TimeRangeCB

 TimeRangeCB (time_col:str='TIME', fn_time_unit:Callable=<function
              get_time_units>)

Compute time values range

Exported source
class TimeRangeCB(Callback):
    "Compute time values range"
    def __init__(self, 
                 time_col: str='TIME',
                 fn_time_unit: Callable=get_time_units): 
        fc.store_attr()
        self.time_unit = fn_time_unit()
    
    def __call__(self, obj):
        time = pd.concat(obj.dfs)[self.time_col]
        start, end = [num2date(t, units=self.time_unit).isoformat() 
                      for t in (time.min(), time.max())]
        obj.attrs.update({
            'time_coverage_start': start,
            'time_coverage_end': end})

source

ZoteroItem

 ZoteroItem (item_id:str, cfg:Dict[str,str])

Retrieve Zotero metadata.

Exported source
class ZoteroItem:
    "Retrieve Zotero metadata."
    def __init__(self, 
                 item_id: str, 
                 cfg: Dict[str, str]):
        fc.store_attr()
        self.item = self.getItem(item_id)
    
    def exist(self): return self.item != None
    
    def getItem(self, item_id):
        zot = zotero.Zotero(self.cfg['lib_id'], 'group', self.cfg['api_key'])
        try:
            return zot.item(item_id)
        except zotero_errors.ResourceNotFound:
            print(f'Item {item_id} does not exist in Zotero library')
            return None
            
    def title(self):
        return self.item['data']['title']
    
    def summary(self):
        return self.item['data']['abstractNote']
    
    def creator_name(self):
        # creators = [f'{c["creatorType"]}: {c["name"]}' for c in self.item['data']['creators']]
        # return '; '.join(creators)
        return json.dumps(self.item['data']['creators'])
            
    def __repr__(self):
        return json.dumps(self.item, indent=4)

source

ZoteroCB

 ZoteroCB (itemId, cfg)

Retrieve Zotero metadata.

Exported source
# TBD: put it in callback module
class ZoteroCB(Callback):
    "Retrieve Zotero metadata."
    def __init__(self, itemId, cfg): fc.store_attr()
    def __call__(self, obj):
        item = ZoteroItem(self.itemId, self.cfg['zotero'])
        if item.exist(): 
            obj.attrs['id'] = item.item['key']
            for attr in ['title','summary', 'creator_name']:
                obj.attrs[attr] = getattr(item, attr)()
GlobAttrsFeeder(None, cbs=[
    ZoteroCB('26VMZZ2Q', cfg=cfg())
    ])()
{'id': '26VMZZ2Q',
 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]'}
GlobAttrsFeeder(None, cbs=[
    ZoteroCB('3W354SQG', cfg=cfg())
    ])()
{'id': '3W354SQG',
 'title': 'Radioactivity Monitoring of the Irish Marine Environment 1991 and 1992',
 'summary': '',
 'creator_name': '[{"creatorType": "author", "firstName": "A.", "lastName": "McGarry"}, {"creatorType": "author", "firstName": "S.", "lastName": "Lyons"}, {"creatorType": "author", "firstName": "C.", "lastName": "McEnri"}, {"creatorType": "author", "firstName": "T.", "lastName": "Ryan"}, {"creatorType": "author", "firstName": "M.", "lastName": "O\'Colmain"}, {"creatorType": "author", "firstName": "J.D.", "lastName": "Cunningham"}]'}
GlobAttrsFeeder(None, cbs=[
    ZoteroCB('x', cfg=cfg())
    ])()
Item x does not exist in Zotero library
{}

source

KeyValuePairCB

 KeyValuePairCB (k, v)

Base class for callbacks.

Exported source
class KeyValuePairCB(Callback):
    def __init__(self, k, v): fc.store_attr()
    def __call__(self, obj): obj.attrs[self.k] = self.v

How to use

dfs = pd.read_pickle('../files/pkl/dfs_test.pkl')
kw = ['oceanography', 'Earth Science > Oceans > Ocean Chemistry> Radionuclides',
      'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure',
      'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments',
      'Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes',
      'Earth Science > Oceans > Water Quality > Ocean Contaminants',
      'Earth Science > Biological Classification > Animals/Vertebrates > Fish',
      'Earth Science > Biosphere > Ecosystems > Marine Ecosystems',
      'Earth Science > Biological Classification > Animals/Invertebrates > Mollusks',
      'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans',
      'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)']
feed = GlobAttrsFeeder(dfs, cbs=[
    BboxCB(),
    DepthRangeCB(),
    TimeRangeCB(),
    ZoteroCB('26VMZZ2Q', cfg()),
    KeyValuePairCB('keywords', ', '.join(kw))
    ])

attrs = feed(); attrs
{'geospatial_lat_min': '179.9986',
 'geospatial_lat_max': '89.9905',
 'geospatial_lon_min': '-180.0',
 'geospatial_lon_max': '-70.5744',
 'geospatial_bounds': 'POLYGON ((-180 -70.5744, 179.9986 -70.5744, 179.9986 89.9905, -180 89.9905, -180 -70.5744))',
 'geospatial_vertical_max': '5815.3',
 'geospatial_vertical_min': '0.5',
 'time_coverage_start': '2007-07-30T10:37:19',
 'time_coverage_end': '2018-11-22T07:33:10',
 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]',
 'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments, Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes, Earth Science > Oceans > Water Quality > Ocean Contaminants, Earth Science > Biological Classification > Animals/Vertebrates > Fish, Earth Science > Biosphere > Ecosystems > Marine Ecosystems, Earth Science > Biological Classification > Animals/Invertebrates > Mollusks, Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans, Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)'}