Various utilities to populate NetCDF global attributes as well as ISO13195 metadata.

source

GlobAttrsFeeder

 GlobAttrsFeeder (dfs:dict, cbs:list=[], logs:list=[])

Produce NetCDF global attributes as specified by the callbacks.

Type Default Details
dfs dict Dictionary of NetCDF group DataFrames
cbs list [] Callbacks
logs list [] List of preprocessing steps taken
Exported source
class GlobAttrsFeeder:
    "Produce NetCDF global attributes as specified by the callbacks."
    def __init__(self, 
                 dfs:dict, # Dictionary of NetCDF group DataFrames
                 cbs:list=[], # Callbacks
                 logs:list=[] # List of preprocessing steps taken
                 ): 
        fc.store_attr()
        self.attrs = {}
        
    def callback(self):
        run_cbs(self.cbs, self)
        
    def __call__(self):
        self.callback()
        return self.attrs

source

BboxCB

 BboxCB ()

Compute dataset geographical bounding box

Exported source
class BboxCB(Callback):
    "Compute dataset geographical bounding box"
    def __call__(self, obj):
        bbox = get_bbox(pd.concat(obj.dfs)) 
        lon_min, lon_max, lat_min, lat_max = [str(bound) for bound in bbox.bounds]
        obj.attrs.update({
            'geospatial_lat_min': lat_min, 
            'geospatial_lat_max': lat_max,
            'geospatial_lon_min': lon_min,
            'geospatial_lon_max': lon_max,
            'geospatial_bounds': bbox.wkt})

source

DepthRangeCB

 DepthRangeCB (depth_col='depth')

Compute depth values range

Exported source
class DepthRangeCB(Callback):
    "Compute depth values range"
    def __init__(self, depth_col='depth'): fc.store_attr()
    def __call__(self, obj):
        depths = pd.concat(obj.dfs).get(self.depth_col, default=pd.Series([]))
        if not depths.empty:
            max_depth, min_depth = depths.max(), depths.min()
            obj.attrs.update({
                'geospatial_vertical_max': '0' if min_depth == 0 else str(-min_depth),
                'geospatial_vertical_min': str(-max_depth)})

source

TimeRangeCB

 TimeRangeCB (cfg)

Compute time values range

Exported source
class TimeRangeCB(Callback):
    "Compute time values range"
    def __init__(self, cfg): fc.store_attr()
    def __call__(self, obj):
        time = pd.concat(obj.dfs)['time']
        start, end = [num2date(t, units=self.cfg['units']['time']).isoformat() 
                      for t in (time.min(), time.max())]
        obj.attrs.update({
            'time_coverage_start': start,
            'time_coverage_end': end})

source

ZoteroItem

 ZoteroItem (item_id, cfg)

Initialize self. See help(type(self)) for accurate signature.

Exported source
class ZoteroItem:
    def __init__(self, item_id, cfg):
        self.cfg = cfg
        self.item = self.getItem(item_id)
    
    def exist(self): return self.item != None
    
    def getItem(self, item_id):
        zot = zotero.Zotero(self.cfg['lib_id'], 'group', self.cfg['api_key'])
        try:
            return zot.item(item_id)
        except zotero_errors.ResourceNotFound:
            print(f'Item {item_id} does not exist in Zotero library')
            return None
            
    def title(self):
        return self.item['data']['title']
    
    def summary(self):
        return self.item['data']['abstractNote']
    
    def creator_name(self):
        # creators = [f'{c["creatorType"]}: {c["name"]}' for c in self.item['data']['creators']]
        # return '; '.join(creators)
        return json.dumps(self.item['data']['creators'])
            
    def __repr__(self):
        return json.dumps(self.item, indent=4)

source

ZoteroCB

 ZoteroCB (itemId, cfg)

Retrieve Zotero metadata.

Exported source
# TBD: put it in callback module
class ZoteroCB(Callback):
    "Retrieve Zotero metadata."
    def __init__(self, itemId, cfg): fc.store_attr()
    def __call__(self, obj):
        item = ZoteroItem(self.itemId, self.cfg['zotero'])
        if item.exist(): 
            for attr in ['title', 'summary', 'creator_name']:
                obj.attrs[attr] = getattr(item, attr)()
from marisco.configs import cfg

GlobAttrsFeeder(None, cbs=[
    ZoteroCB('26VMZZ2Q', cfg=cfg())
    ])()
{'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]'}
GlobAttrsFeeder(None, cbs=[
    ZoteroCB('3W354SQG', cfg=cfg())
    ])()
{'title': 'Radioactivity Monitoring of the Irish Marine Environment 1991 and 1992',
 'summary': '',
 'creator_name': '[{"creatorType": "author", "firstName": "A.", "lastName": "McGarry"}, {"creatorType": "author", "firstName": "S.", "lastName": "Lyons"}, {"creatorType": "author", "firstName": "C.", "lastName": "McEnri"}, {"creatorType": "author", "firstName": "T.", "lastName": "Ryan"}, {"creatorType": "author", "firstName": "M.", "lastName": "O\'Colmain"}, {"creatorType": "author", "firstName": "J.D.", "lastName": "Cunningham"}]'}
GlobAttrsFeeder(None, cbs=[
    ZoteroCB('x', cfg=cfg())
    ])()
Item x does not exist in Zotero library
{}

source

KeyValuePairCB

 KeyValuePairCB (k, v)

Base class for callbacks.

Exported source
class KeyValuePairCB(Callback):
    def __init__(self, k, v): fc.store_attr()
    def __call__(self, obj): obj.attrs[self.k] = self.v

How to use

dfs = pd.read_pickle('../files/pkl/dfs_test.pkl')
kw = ['oceanography', 'Earth Science > Oceans > Ocean Chemistry> Radionuclides',
      'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure',
      'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments',
      'Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes',
      'Earth Science > Oceans > Water Quality > Ocean Contaminants',
      'Earth Science > Biological Classification > Animals/Vertebrates > Fish',
      'Earth Science > Biosphere > Ecosystems > Marine Ecosystems',
      'Earth Science > Biological Classification > Animals/Invertebrates > Mollusks',
      'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans',
      'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)']
feed = GlobAttrsFeeder(dfs, cbs=[
    BboxCB(),
    DepthRangeCB(),
    TimeRangeCB(cfg=CONFIGS),
    ZoteroCB('26VMZZ2Q', cfg=CONFIGS),
    KeyValuePairCB('keywords', ', '.join(kw))
    ])

attrs = feed(); attrs
{'geospatial_lat_min': '29.05',
 'geospatial_lat_max': '65.35',
 'geospatial_lon_min': '9.6333',
 'geospatial_lon_max': '54.0',
 'geospatial_bounds': 'POLYGON ((9.6333 54, 29.05 54, 29.05 65.35, 9.6333 65.35, 9.6333 54))',
 'geospatial_vertical_max': '0',
 'geospatial_vertical_min': '-248.0',
 'time_coverage_start': '1984-01-10T00:00:00',
 'time_coverage_end': '1987-06-28T00:00:00',
 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (https://www.helcom.fi/wp-content/uploads/2019/08/Guidelines-for-Monitoring-of-Radioactive-Substances.pdf), which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]',
 'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments, Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes, Earth Science > Oceans > Water Quality > Ocean Contaminants, Earth Science > Biological Classification > Animals/Vertebrates > Fish, Earth Science > Biosphere > Ecosystems > Marine Ecosystems, Earth Science > Biological Classification > Animals/Invertebrates > Mollusks, Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans, Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)'}