Various utilities to populate NetCDF global attributes as well as ISO13195 metadata.



 GlobAttrsFeeder (dfs:dict, cbs:list=[], logs:list=[])

Produce NetCDF global attributes as specified by the callbacks.

Type Default Details
dfs dict Dictionary of NetCDF group DataFrames
cbs list [] Callbacks
logs list [] List of preprocessing steps taken
Exported source
class GlobAttrsFeeder:
    "Produce NetCDF global attributes as specified by the callbacks."
    def __init__(self, 
                 dfs:dict, # Dictionary of NetCDF group DataFrames
                 cbs:list=[], # Callbacks
                 logs:list=[] # List of preprocessing steps taken
        self.attrs = {}
    def callback(self):
        run_cbs(, self)
    def __call__(self):
        return self.attrs



 BboxCB ()

Compute dataset geographical bounding box

Exported source
class BboxCB(Callback):
    "Compute dataset geographical bounding box"
    def __call__(self, obj):
        bbox = get_bbox(pd.concat(obj.dfs)) 
        lon_min, lon_max, lat_min, lat_max = [str(bound) for bound in bbox.bounds]
            'geospatial_lat_min': lat_min, 
            'geospatial_lat_max': lat_max,
            'geospatial_lon_min': lon_min,
            'geospatial_lon_max': lon_max,
            'geospatial_bounds': bbox.wkt})



 DepthRangeCB (depth_col='depth')

Compute depth values range

Exported source
class DepthRangeCB(Callback):
    "Compute depth values range"
    def __init__(self, depth_col='depth'): fc.store_attr()
    def __call__(self, obj):
        depths = pd.concat(obj.dfs).get(self.depth_col, default=pd.Series([]))
        if not depths.empty:
            max_depth, min_depth = depths.max(), depths.min()
                'geospatial_vertical_max': '0' if min_depth == 0 else str(-min_depth),
                'geospatial_vertical_min': str(-max_depth)})



 TimeRangeCB (cfg)

Compute time values range

Exported source
class TimeRangeCB(Callback):
    "Compute time values range"
    def __init__(self, cfg): fc.store_attr()
    def __call__(self, obj):
        time = pd.concat(obj.dfs)['time']
        start, end = [num2date(t, units=self.cfg['units']['time']).isoformat() 
                      for t in (time.min(), time.max())]
            'time_coverage_start': start,
            'time_coverage_end': end})



 ZoteroItem (item_id, cfg)

Initialize self. See help(type(self)) for accurate signature.

Exported source
class ZoteroItem:
    def __init__(self, item_id, cfg):
        self.cfg = cfg
        self.item = self.getItem(item_id)
    def exist(self): return self.item != None
    def getItem(self, item_id):
        zot = zotero.Zotero(self.cfg['lib_id'], 'group', self.cfg['api_key'])
            return zot.item(item_id)
        except zotero_errors.ResourceNotFound:
            print(f'Item {item_id} does not exist in Zotero library')
            return None
    def title(self):
        return self.item['data']['title']
    def summary(self):
        return self.item['data']['abstractNote']
    def creator_name(self):
        # creators = [f'{c["creatorType"]}: {c["name"]}' for c in self.item['data']['creators']]
        # return '; '.join(creators)
        return json.dumps(self.item['data']['creators'])
    def __repr__(self):
        return json.dumps(self.item, indent=4)



 ZoteroCB (itemId, cfg)

Retrieve Zotero metadata.

Exported source
# TBD: put it in callback module
class ZoteroCB(Callback):
    "Retrieve Zotero metadata."
    def __init__(self, itemId, cfg): fc.store_attr()
    def __call__(self, obj):
        item = ZoteroItem(self.itemId, self.cfg['zotero'])
        if item.exist(): 
            for attr in ['title', 'summary', 'creator_name']:
                obj.attrs[attr] = getattr(item, attr)()
from marisco.configs import cfg

GlobAttrsFeeder(None, cbs=[
    ZoteroCB('26VMZZ2Q', cfg=cfg())
{'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (, which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]'}
GlobAttrsFeeder(None, cbs=[
    ZoteroCB('3W354SQG', cfg=cfg())
{'title': 'Radioactivity Monitoring of the Irish Marine Environment 1991 and 1992',
 'summary': '',
 'creator_name': '[{"creatorType": "author", "firstName": "A.", "lastName": "McGarry"}, {"creatorType": "author", "firstName": "S.", "lastName": "Lyons"}, {"creatorType": "author", "firstName": "C.", "lastName": "McEnri"}, {"creatorType": "author", "firstName": "T.", "lastName": "Ryan"}, {"creatorType": "author", "firstName": "M.", "lastName": "O\'Colmain"}, {"creatorType": "author", "firstName": "J.D.", "lastName": "Cunningham"}]'}
GlobAttrsFeeder(None, cbs=[
    ZoteroCB('x', cfg=cfg())
Item x does not exist in Zotero library



 KeyValuePairCB (k, v)

Base class for callbacks.

Exported source
class KeyValuePairCB(Callback):
    def __init__(self, k, v): fc.store_attr()
    def __call__(self, obj): obj.attrs[self.k] = self.v

How to use

dfs = pd.read_pickle('../files/pkl/dfs_test.pkl')
kw = ['oceanography', 'Earth Science > Oceans > Ocean Chemistry> Radionuclides',
      'Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure',
      'Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments',
      'Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes',
      'Earth Science > Oceans > Water Quality > Ocean Contaminants',
      'Earth Science > Biological Classification > Animals/Vertebrates > Fish',
      'Earth Science > Biosphere > Ecosystems > Marine Ecosystems',
      'Earth Science > Biological Classification > Animals/Invertebrates > Mollusks',
      'Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans',
      'Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)']
feed = GlobAttrsFeeder(dfs, cbs=[
    ZoteroCB('26VMZZ2Q', cfg=CONFIGS),
    KeyValuePairCB('keywords', ', '.join(kw))

attrs = feed(); attrs
{'geospatial_lat_min': '29.05',
 'geospatial_lat_max': '65.35',
 'geospatial_lon_min': '9.6333',
 'geospatial_lon_max': '54.0',
 'geospatial_bounds': 'POLYGON ((9.6333 54, 29.05 54, 29.05 65.35, 9.6333 65.35, 9.6333 54))',
 'geospatial_vertical_max': '0',
 'geospatial_vertical_min': '-248.0',
 'time_coverage_start': '1984-01-10T00:00:00',
 'time_coverage_end': '1987-06-28T00:00:00',
 'title': 'Environmental database - Helsinki Commission Monitoring of Radioactive Substances',
 'summary': 'MORS Environment database has been used to collate data resulting from monitoring of environmental radioactivity in the Baltic Sea based on HELCOM Recommendation 26/3.\n\nThe database is structured according to HELCOM Guidelines on Monitoring of Radioactive Substances (, which specifies reporting format, database structure, data types and obligatory parameters used for reporting data under Recommendation 26/3.\n\nThe database is updated and quality assured annually by HELCOM MORS EG.',
 'creator_name': '[{"creatorType": "author", "name": "HELCOM MORS"}]',
 'keywords': 'oceanography, Earth Science > Oceans > Ocean Chemistry> Radionuclides, Earth Science > Human Dimensions > Environmental Impacts > Nuclear Radiation Exposure, Earth Science > Oceans > Ocean Chemistry > Ocean Tracers, Earth Science > Oceans > Marine Sediments, Earth Science > Oceans > Ocean Chemistry, Earth Science > Oceans > Sea Ice > Isotopes, Earth Science > Oceans > Water Quality > Ocean Contaminants, Earth Science > Biological Classification > Animals/Vertebrates > Fish, Earth Science > Biosphere > Ecosystems > Marine Ecosystems, Earth Science > Biological Classification > Animals/Invertebrates > Mollusks, Earth Science > Biological Classification > Animals/Invertebrates > Arthropods > Crustaceans, Earth Science > Biological Classification > Plants > Macroalgae (Seaweeds)'}