Readers

This module provides a unified interface for loading evaluation data from organizational repositories (IOM, UNHCR, etc.) and transforming it into standardized JSON format.

Components:

Purpose:

Transform diverse evaluation data sources into a consistent format for downstream analysis and processing.

The EvalReader interface defines a common contract that all evaluation repository readers (e.g. IOM, UNHCR) must implement:

Core Reader Interface


EvalReader


def EvalReader(
    cfg:dict, # Configuration dict with field mappings and processing rules
):

Initialize self. See help(type(self)) for accurate signature.

Exported source
class EvalReader:
    def __init__(self, 
                 cfg:dict # Configuration dict with field mappings and processing rules
                ): store_attr()
    def read(self): raise NotImplementedError
    def tfm(self, df): raise NotImplementedError
    def to_json(self, output_path): raise NotImplementedError
    def __call__(self):
        df = self.read()
        return self.tfm(df)

iom_input_cfg


def iom_input_cfg(
    
):
Exported source
def iom_input_cfg():
    return {
        'date_cols': ['Date of Publication', 'Evaluation Period From Date', 'Evaluation Period To Date'],
        'string_cols': ['Year'],
        'list_fields': {
            'Countries Covered': {'separator': ',', 'clean': True}
        },
        'document_fields': ['Document Subtype', 'File URL', 'File description'],
        'id_gen': {
            'method': 'md5',
            'fields': ['Title', 'Year', 'Project Code']  # fields to hash
        },
        'field_mappings': {
            'Title': 'title',
            'Year': 'year',
            # other mappings
        }
    }

Evaluation


def Evaluation(
    id:str, docs:list, meta:dict
)->None:

An evaluation with rich notebook display

Exported source
@dataclass
class Evaluation:
    "An evaluation with rich notebook display"
    id:str
    docs:list
    meta:dict
        
    def _repr_markdown_(self):
        title = self.meta.get('Title', 'Untitled')
        year = self.meta.get('Year', 'n/a')
        org = self.meta.get('Evaluation Commissioner', 'Unknown')
        countries = self.meta.get('Countries Covered', [])
        country_str = ', '.join(countries[:3]) if countries else 'Not specified'
        if len(countries) > 3: country_str += f' (+{len(countries)-3} more)'
        
        return f"""
### {title}
**Year:** {year} | **Organization:** {org} | **Countries:** {country_str}

**Documents:** {len(self.docs)} available  
**ID:** `{self.id}`
"""

IOM Reader


IOMRepoReader


def IOMRepoReader(
    fname:Path, # Path to the CSV export file
):

Initialize self. See help(type(self)) for accurate signature.

Exported source
class IOMRepoReader(EvalReader):
    def __init__(self, 
                 fname:Path # Path to the CSV export file
                 ): 
        cfg = iom_input_cfg()  
        super().__init__(cfg)
        store_attr()

The read method loads the raw CSV data from the IOM repository export:


IOMRepoReader.read


def read(
    
):
Exported source
@patch
def read(self:IOMRepoReader): return pd.read_csv(self.fname)
#fname = 'files/test/evaluation-search-export-11_13_2025--18_09_44.csv'
fname = 'files/test/evaluation-search-export-01_27_2026--21_43_30.csv'
reader = IOMRepoReader(fname)
reader.read().iloc[0]
Title                                       EVALUATION OF IOM’S MIGRATION DATA STRATEGY
Year                                                                               2025
Author                                                           IOM CENTRAL EVALUATION
Best Practicesor Lessons Learnt                                                     Yes
Date of Publication                                                          2025-08-11
Donor                                                                               IOM
Evaluation Brief                                                                    Yes
Evaluation Commissioner                                                             IOM
Evaluation Coverage                                                              Global
Evaluation Period From Date                                                  2025-08-20
Evaluation Period To Date                                                    2025-05-31
Executive Summary                                                                   Yes
External Version of the Report                                                       No
Languages                                                                       English
Migration Thematic Areas                                 Organisational policy/strategy
Name of Project(s) Being Evaluated                                                  NaN
Number of Pages Excluding annexes                                                  44.0
Other Documents Included                                                            NaN
Project Code                                                                        NaN
Countries Covered                                                             Worldwide
Regions Covered                                                               HQ Geneva
Relevant Crosscutting Themes                                                        NaN
Report Published                                                                    Yes
Terms of Reference                                                                  Yes
Type of Evaluation Scope                                                       Strategy
Type of Evaluation Timing                                                Not applicable
Type of Evaluator                                                              External
Level of Evaluation                                                         Centralized
Document Subtype                      Evaluation report, Evaluation brief, Annexes, ...
File URL                              https://evaluation.iom.int/sites/g/files/tmzbd...
File description                      Evaluation Report, Evaluation Brief, Annex VI ...
Management response                                                                  No
Date added                                                      Thu, 08/07/2025 - 23:52
Name: 0, dtype: object

Each evaluation needs a unique identifier. Since the CSV doesn’t include one, we generate an MD5 hash from key fields:

Exported source
@patch
def _mk_id(self:IOMRepoReader, 
           row # DataFrame row containing evaluation metadata
          ):
    id_str = ''.join(str(row[f]) for f in self.cfg['id_gen']['fields'])
    return hashlib.md5(id_str.encode('utf-8')).hexdigest()
reader = IOMRepoReader(fname)
df_test = reader.read()
eval_id = reader._mk_id(df_test.iloc[0])
test_eq(len(eval_id), 32)
test_eq(eval_id, '9992310969aa2f428bc8aba29f865cf3')

IOM evaluations can have multiple associated documents (reports, briefs, annexes). The CSV stores these as comma-separated values in three parallel fields. We parse and combine them into structured document records:

Document Handling

Exported source
@patch
def _mk_docs(self:IOMRepoReader, 
             row # DataFrame row with document fields
            ):
    "Parse document fields into structured records"
    stypes = [s.strip() for s in str(row['Document Subtype']).split(', ')]
    urls = [u.strip() for u in str(row['File URL']).split(', ')]
    descs = [d.strip() for d in str(row['File description']).split(', ')]
    return [dict(subtype=st, url=u, desc=d) for st,u,d in zip(stypes,urls,descs) if u.strip()]
reader = IOMRepoReader(fname)
df_test = reader.read()
reader._mk_docs(df_test.iloc[0])
[{'subtype': 'Evaluation report',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VI%20Case%20Study%20-%20RDH%20East%2C%20Horn%20and%20Southern%20Africa.pdf',
  'desc': 'Evaluation Report'},
 {'subtype': 'Evaluation brief',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VII%20Case%20Study%20-%20RDH%20Asia-Pacific.pdf',
  'desc': 'Evaluation Brief'},
 {'subtype': 'Annexes',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VIII%20-%20Inception%20Report.pdf',
  'desc': 'Annex VI Case Study - RDH East'},
 {'subtype': 'Annexes',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Evaluation%20Brief.pdf',
  'desc': 'Horn and Southern Africa'},
 {'subtype': 'Annexes',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/IOM%20MDS%20Evaluation%20Report%20-%20clean_0.pdf',
  'desc': 'Annex VII Case Study - RDH Asia-Pacific'},
 {'subtype': 'Special related reports/documents',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Migration%20Data%20Evaluation%20infographics.pdf',
  'desc': 'Annex VIII - Inception Report'}]

Data Processing

Exported source
@patch
def _proc_dates(self:IOMRepoReader, df):
    df[self.cfg['date_cols']] = df[self.cfg['date_cols']].astype(str)
    return df
reader = IOMRepoReader(fname)
df_test = reader.read()
df_proc = reader._proc_dates(df_test)
test_eq(df_proc['Date of Publication'].dtype, 'string')
test_eq(df_proc['Evaluation Period From Date'].dtype, 'string')
test_eq(df_proc['Evaluation Period To Date'].dtype, 'string')
df_proc['Date of Publication'].iloc[0]
'2025-08-11'
Exported source
@patch
def _proc_lists(self:IOMRepoReader, df):
    for fname,fcfg in self.cfg['list_fields'].items():
        vals = df[fname].fillna('').astype(str).str.split(fcfg['separator'])
        df[fname] = vals.apply(lambda x: [item.strip() for item in x if item.strip()])
    return df
reader = IOMRepoReader(fname)
df_test = reader.read()
df_proc = reader._proc_lists(df_test)
test_eq(type(df_proc['Countries Covered'].iloc[190]), list)
df_proc['Countries Covered'].iloc[190]
['Austria', 'Greece', 'Italy', 'Malta', 'Poland', 'Romania', 'Spain']

IOMRepoReader.tfm


def tfm(
    df:DataFrame
):

Transform raw dataframe to evaluation objects

Exported source
@patch
def _to_dict(self:IOMRepoReader, row):
    "Convert row to evaluation dict"
    meta_cols = [col for col in row.index if col not in ['id', 'docs']]
    return dict(id=row['id'], docs=row['docs'], meta={f:row[f] for f in meta_cols})
Exported source
@patch
def _to_eval(self:IOMRepoReader, row):
    "Convert row to Evaluation object"
    meta_cols = [col for col in row.index if col not in ['id', 'docs']]
    return Evaluation(id=row['id'], docs=row['docs'], meta={f:row[f] for f in meta_cols})
Exported source
@patch
def tfm(self:IOMRepoReader, df:pd.DataFrame):
    "Transform raw dataframe to evaluation objects"
    df_proc = self._proc_lists(self._proc_dates(df.copy()))
    df_proc['id'] = df_proc.apply(self._mk_id, axis=1)
    df_proc['docs'] = df_proc.apply(self._mk_docs, axis=1)
    return [self._to_eval(row) for _,row in df_proc.iterrows()]
reader = IOMRepoReader(fname)
evals = reader()
evals[0]

EVALUATION OF IOM’S MIGRATION DATA STRATEGY

Year: 2025 | Organization: IOM | Countries: Worldwide

Documents: 6 available
ID: 9992310969aa2f428bc8aba29f865cf3

Finally, to_json runs the full pipeline and saves the results:


IOMRepoReader.to_json


def to_json(
    out_path:Path
):
Exported source
@patch
def to_json(self:IOMRepoReader, out_path:Path):
    evals = self()
    evals_dict = [dict(id=e.id, docs=e.docs, meta=e.meta) for e in evals]
    with open(out_path, 'w', encoding='utf-8') as f: json.dump(evals_dict, f, indent=4, ensure_ascii=False)
reader = IOMRepoReader(fname)
out_path = Path('files/test/iom_evals_test.json')
reader.to_json(out_path)
out_path.exists()
True

To use the reader:

reader = IOMRepoReader(fname)
evaluations = reader()

The reader produces a list of JSON objects, where each object represents an evaluation with:

  • id: A unique MD5 hash identifier generated from specified fields
  • docs: A list of associated documents, each containing:
    • Document Subtype: Type of evaluation document (e.g. report, brief)
    • File URL: Direct link to download the document
    • File description: Brief description of the document contents
  • meta: Additional metadata about the evaluation

Then serialize as json for further use:

reader.to_json('files/test/evaluations.json')

Utilities


load_evals


def load_evals(
    json_file
):

Load evaluations from JSON file

Exported source
def load_evals(json_file):
    "Load evaluations from JSON file"
    return L([Evaluation(**o) for o in json.loads(Path(json_file).read_text())])
fname = 'files/test/evaluations.json'
evals = load_evals(fname)
evals[0]

EVALUATION OF IOM’S MIGRATION DATA STRATEGY

Year: 2025 | Organization: IOM | Countries: Worldwide

Documents: 6 available
ID: 9992310969aa2f428bc8aba29f865cf3

Find eval by title or url


in_docs


def in_docs(
    ev:Evaluation, # Evaluation object
    url:str, # URL of an evaluation report
):

Check if a URL is in the documents of an evaluation

Exported source
def in_docs(
    ev:Evaluation, # Evaluation object
    url:str # URL of an evaluation report 
    ):
    "Check if a URL is in the documents of an evaluation" 
    return any(L(ev.docs).filter(lambda x: x['url'] == url))
url = "https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/AAP%20Evaluation%20Report_final_.pdf"
fname = 'files/test/evaluations.json'
evals = load_evals(fname)
ev = first(evals.filter(lambda x: x.id == '6c3c2cf3fa479112967612b0baddab72'))

test_eq(in_docs(ev, url), True)
test_eq(in_docs(ev, "https://fake.url/nothere.pdf"), False)

find_eval


def find_eval(
    evals:list, # List of evaluations
    query:str, # Title or URL of evaluation
    by:str='title', # 'title' or 'url'
):

Find evaluation by title or URL

title = 'Evaluation of IOM Accountability to Affected Populations'
url = "https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/AAP%20Evaluation%20Report_final_.pdf"
test_eq(find_eval(evals, title, by='title').id, '6c3c2cf3fa479112967612b0baddab72')
test_eq(find_eval(evals, url, by='url').id, '6c3c2cf3fa479112967612b0baddab72')
test_eq(find_eval(evals, 'Nonexistent Title', by='title'), None)
test_eq(find_eval(evals, 'https://fake.url/nowhere.pdf', by='url'), None)
find_eval(evals, title, by='title')

Evaluation of IOM Accountability to Affected Populations

Year: 2025 | Organization: IOM | Countries: Worldwide

Documents: 4 available
ID: 6c3c2cf3fa479112967612b0baddab72