Readers

This module provides a unified interface for loading evaluation data from organizational repositories (IOM, UNHCR, etc.) and transforming it into standardized JSON format.

Components:

EvalReader: Base interface for all repository readers
IOMRepoReader: Reads IOM evaluation CSV exports from evaluation.iom.int/evaluation-search-pdf
load_evals: Loads processed evaluation JSON files

Purpose:

Transform diverse evaluation data sources into a consistent format for downstream analysis and processing.

The EvalReader interface defines a common contract that all evaluation repository readers (e.g. IOM, UNHCR) must implement:

Core Reader Interface

source

EvalReader


def EvalReader(
    cfg:dict, # Configuration dict with field mappings and processing rules
):

Initialize self. See help(type(self)) for accurate signature.

Exported source

class EvalReader:
    def __init__(self, 
                 cfg:dict # Configuration dict with field mappings and processing rules
                ): store_attr()
    def read(self): raise NotImplementedError
    def tfm(self, df): raise NotImplementedError
    def to_json(self, output_path): raise NotImplementedError
    def __call__(self):
        df = self.read()
        return self.tfm(df)

source

iom_input_cfg


def iom_input_cfg(
    
):

Exported source

def iom_input_cfg():
    return {
        'date_cols': ['Date of Publication', 'Evaluation Period From Date', 'Evaluation Period To Date'],
        'string_cols': ['Year'],
        'list_fields': {
            'Countries Covered': {'separator': ',', 'clean': True}
        },
        'document_fields': ['Document Subtype', 'File URL', 'File description'],
        'id_gen': {
            'method': 'md5',
            'fields': ['Title', 'Year', 'Project Code']  # fields to hash
        },
        'field_mappings': {
            'Title': 'title',
            'Year': 'year',
            # other mappings
        }
    }

source

Evaluation


def Evaluation(
    id:str, docs:list, meta:dict
)->None:

An evaluation with rich notebook display

Exported source

@dataclass
class Evaluation:
    "An evaluation with rich notebook display"
    id:str
    docs:list
    meta:dict
        
    def _repr_markdown_(self):
        title = self.meta.get('Title', 'Untitled')
        year = self.meta.get('Year', 'n/a')
        org = self.meta.get('Evaluation Commissioner', 'Unknown')
        countries = self.meta.get('Countries Covered', [])
        country_str = ', '.join(countries[:3]) if countries else 'Not specified'
        if len(countries) > 3: country_str += f' (+{len(countries)-3} more)'
        
        return f"""
### {title}
**Year:** {year} | **Organization:** {org} | **Countries:** {country_str}

**Documents:** {len(self.docs)} available  
**ID:** `{self.id}`
"""

IOM Reader

source

IOMRepoReader


def IOMRepoReader(
    fname:Path, # Path to the CSV export file
):

Initialize self. See help(type(self)) for accurate signature.

Exported source

class IOMRepoReader(EvalReader):
    def __init__(self, 
                 fname:Path # Path to the CSV export file
                 ): 
        cfg = iom_input_cfg()  
        super().__init__(cfg)
        store_attr()

The read method loads the raw CSV data from the IOM repository export:

source

IOMRepoReader.read


def read(
    
):

Exported source

@patch
def read(self:IOMRepoReader): return pd.read_csv(self.fname)

#fname = 'files/test/evaluation-search-export-11_13_2025--18_09_44.csv'
fname = 'files/test/evaluation-search-export-01_27_2026--21_43_30.csv'
reader = IOMRepoReader(fname)
reader.read().iloc[0]

Title                                       EVALUATION OF IOM’S MIGRATION DATA STRATEGY
Year                                                                               2025
Author                                                           IOM CENTRAL EVALUATION
Best Practicesor Lessons Learnt                                                     Yes
Date of Publication                                                          2025-08-11
Donor                                                                               IOM
Evaluation Brief                                                                    Yes
Evaluation Commissioner                                                             IOM
Evaluation Coverage                                                              Global
Evaluation Period From Date                                                  2025-08-20
Evaluation Period To Date                                                    2025-05-31
Executive Summary                                                                   Yes
External Version of the Report                                                       No
Languages                                                                       English
Migration Thematic Areas                                 Organisational policy/strategy
Name of Project(s) Being Evaluated                                                  NaN
Number of Pages Excluding annexes                                                  44.0
Other Documents Included                                                            NaN
Project Code                                                                        NaN
Countries Covered                                                             Worldwide
Regions Covered                                                               HQ Geneva
Relevant Crosscutting Themes                                                        NaN
Report Published                                                                    Yes
Terms of Reference                                                                  Yes
Type of Evaluation Scope                                                       Strategy
Type of Evaluation Timing                                                Not applicable
Type of Evaluator                                                              External
Level of Evaluation                                                         Centralized
Document Subtype                      Evaluation report, Evaluation brief, Annexes, ...
File URL                              https://evaluation.iom.int/sites/g/files/tmzbd...
File description                      Evaluation Report, Evaluation Brief, Annex VI ...
Management response                                                                  No
Date added                                                      Thu, 08/07/2025 - 23:52
Name: 0, dtype: object

df = reader.read()
len(df)

dstrat = df.iloc[0]
dstrat

Title                                       EVALUATION OF IOM’S MIGRATION DATA STRATEGY
Year                                                                               2025
Author                                                           IOM CENTRAL EVALUATION
Best Practicesor Lessons Learnt                                                     Yes
Date of Publication                                                          2025-08-11
Donor                                                                               IOM
Evaluation Brief                                                                    Yes
Evaluation Commissioner                                                             IOM
Evaluation Coverage                                                              Global
Evaluation Period From Date                                                  2025-08-20
Evaluation Period To Date                                                    2025-05-31
Executive Summary                                                                   Yes
External Version of the Report                                                       No
Languages                                                                       English
Migration Thematic Areas                                 Organisational policy/strategy
Name of Project(s) Being Evaluated                                                  NaN
Number of Pages Excluding annexes                                                  44.0
Other Documents Included                                                            NaN
Project Code                                                                        NaN
Countries Covered                                                             Worldwide
Regions Covered                                                               HQ Geneva
Relevant Crosscutting Themes                                                        NaN
Report Published                                                                    Yes
Terms of Reference                                                                  Yes
Type of Evaluation Scope                                                       Strategy
Type of Evaluation Timing                                                Not applicable
Type of Evaluator                                                              External
Level of Evaluation                                                         Centralized
Document Subtype                      Evaluation report, Evaluation brief, Annexes, ...
File URL                              https://evaluation.iom.int/sites/g/files/tmzbd...
File description                      Evaluation Report, Evaluation Brief, Annex VI ...
Management response                                                                  No
Date added                                                      Thu, 08/07/2025 - 23:52
Name: 0, dtype: object

dstrat['Document Subtype']

'Evaluation report, Evaluation brief, Annexes, Annexes, Annexes, Special related reports/documents'

dstrat['File URL']

'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VI%20Case%20Study%20-%20RDH%20East%2C%20Horn%20and%20Southern%20Africa.pdf,   https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VII%20Case%20Study%20-%20RDH%20Asia-Pacific.pdf,   https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VIII%20-%20Inception%20Report.pdf,   https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Evaluation%20Brief.pdf,   https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/IOM%20MDS%20Evaluation%20Report%20-%20clean_0.pdf,   https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Migration%20Data%20Evaluation%20infographics.pdf'

dstrat['File description']

'Evaluation Report, Evaluation Brief, Annex VI Case Study - RDH East, Horn and Southern Africa, Annex VII Case Study - RDH Asia-Pacific, Annex VIII - Inception Report, Infographics'

Each evaluation needs a unique identifier. Since the CSV doesn’t include one, we generate an MD5 hash from key fields:

Exported source

@patch
def _mk_id(self:IOMRepoReader, 
           row # DataFrame row containing evaluation metadata
          ):
    id_str = ''.join(str(row[f]) for f in self.cfg['id_gen']['fields'])
    return hashlib.md5(id_str.encode('utf-8')).hexdigest()

reader = IOMRepoReader(fname)
df_test = reader.read()
eval_id = reader._mk_id(df_test.iloc[0])
test_eq(len(eval_id), 32)
test_eq(eval_id, '9992310969aa2f428bc8aba29f865cf3')

IOM evaluations can have multiple associated documents (reports, briefs, annexes). The CSV stores these as comma-separated values in three parallel fields. We parse and combine them into structured document records:

Document Handling

Exported source

@patch
def _mk_docs(self:IOMRepoReader, 
             row # DataFrame row with document fields
            ):
    "Parse document fields into structured records"
    stypes = [s.strip() for s in str(row['Document Subtype']).split(', ')]
    urls = [u.strip() for u in str(row['File URL']).split(', ')]
    descs = [d.strip() for d in str(row['File description']).split(', ')]
    return [dict(subtype=st, url=u, desc=d) for st,u,d in zip(stypes,urls,descs) if u.strip()]

reader = IOMRepoReader(fname)
df_test = reader.read()
reader._mk_docs(df_test.iloc[0])

[{'subtype': 'Evaluation report',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VI%20Case%20Study%20-%20RDH%20East%2C%20Horn%20and%20Southern%20Africa.pdf',
  'desc': 'Evaluation Report'},
 {'subtype': 'Evaluation brief',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VII%20Case%20Study%20-%20RDH%20Asia-Pacific.pdf',
  'desc': 'Evaluation Brief'},
 {'subtype': 'Annexes',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Annex%20VIII%20-%20Inception%20Report.pdf',
  'desc': 'Annex VI Case Study - RDH East'},
 {'subtype': 'Annexes',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Evaluation%20Brief.pdf',
  'desc': 'Horn and Southern Africa'},
 {'subtype': 'Annexes',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/IOM%20MDS%20Evaluation%20Report%20-%20clean_0.pdf',
  'desc': 'Annex VII Case Study - RDH Asia-Pacific'},
 {'subtype': 'Special related reports/documents',
  'url': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Migration%20Data%20Evaluation%20infographics.pdf',
  'desc': 'Annex VIII - Inception Report'}]

Data Processing

Exported source

@patch
def _proc_dates(self:IOMRepoReader, df):
    df[self.cfg['date_cols']] = df[self.cfg['date_cols']].astype(str)
    return df

reader = IOMRepoReader(fname)
df_test = reader.read()
df_proc = reader._proc_dates(df_test)

test_eq(df_proc['Date of Publication'].dtype, 'string')
test_eq(df_proc['Evaluation Period From Date'].dtype, 'string')
test_eq(df_proc['Evaluation Period To Date'].dtype, 'string')
df_proc['Date of Publication'].iloc[0]

'2025-08-11'

Exported source

@patch
def _proc_lists(self:IOMRepoReader, df):
    for fname,fcfg in self.cfg['list_fields'].items():
        vals = df[fname].fillna('').astype(str).str.split(fcfg['separator'])
        df[fname] = vals.apply(lambda x: [item.strip() for item in x if item.strip()])
    return df

reader = IOMRepoReader(fname)
df_test = reader.read()
df_proc = reader._proc_lists(df_test)

test_eq(type(df_proc['Countries Covered'].iloc[190]), list)
df_proc['Countries Covered'].iloc[190]

['Austria', 'Greece', 'Italy', 'Malta', 'Poland', 'Romania', 'Spain']

source

IOMRepoReader.tfm


def tfm(
    df:DataFrame
):

Transform raw dataframe to evaluation objects

Exported source

@patch
def _to_dict(self:IOMRepoReader, row):
    "Convert row to evaluation dict"
    meta_cols = [col for col in row.index if col not in ['id', 'docs']]
    return dict(id=row['id'], docs=row['docs'], meta={f:row[f] for f in meta_cols})

Exported source

@patch
def _to_eval(self:IOMRepoReader, row):
    "Convert row to Evaluation object"
    meta_cols = [col for col in row.index if col not in ['id', 'docs']]
    return Evaluation(id=row['id'], docs=row['docs'], meta={f:row[f] for f in meta_cols})

Exported source

@patch
def tfm(self:IOMRepoReader, df:pd.DataFrame):
    "Transform raw dataframe to evaluation objects"
    df_proc = self._proc_lists(self._proc_dates(df.copy()))
    df_proc['id'] = df_proc.apply(self._mk_id, axis=1)
    df_proc['docs'] = df_proc.apply(self._mk_docs, axis=1)
    return [self._to_eval(row) for _,row in df_proc.iterrows()]

reader = IOMRepoReader(fname)
evals = reader()
evals[0]

EVALUATION OF IOM’S MIGRATION DATA STRATEGY

Year: 2025 | Organization: IOM | Countries: Worldwide

Documents: 6 available
ID: 9992310969aa2f428bc8aba29f865cf3

source

get_report_urls


def get_report_urls(
    
):

Get title->URL lookup dict from IOM UNEG Evaluation API

report_urls = get_report_urls()
dict(list(report_urls.items())[:2])

{'WESTERN BALKANS ASSISTED VOLUNTARY RETURN AND REINTEGRATION PROGRAMME PHASE II': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/WBRR%20Phase%20II%20Final%20Evaluation%20Report%20%2827%20June%202025%29%20%281%29.pdf',
 'Co-Funding Mechanism Internal Evaluation': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/CoFunding%20Mechanism%20Internal%20Evaluation%20Repor_Elise%20Caroline%20Anais.pdf'}

def get_uneg_url(ev, report_urls):
    "Get UNEG report URL for an evaluation"
    return report_urls.get(ev.meta['Title'].replace('\xa0', ' ').strip())

For instance:

get_uneg_url(evals[0], report_urls)

'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/IOM%20MDS%20Evaluation%20Report%20-%20clean_0_1.pdf'

Finally, to_json runs the full pipeline and saves the results:

source

IOMRepoReader.to_json


def to_json(
    out_path:Path
):

reader = IOMRepoReader(fname)
out_path = Path('files/test/iom_evals_test.json')
reader.to_json(out_path)
out_path.exists()

True

To use the reader:

reader = IOMRepoReader(fname)
evaluations = reader()

The reader produces a list of JSON objects, where each object represents an evaluation with:

id: A unique MD5 hash identifier generated from specified fields
docs: A list of associated documents, each containing:
- Document Subtype: Type of evaluation document (e.g. report, brief)
- File URL: Direct link to download the document
- File description: Brief description of the document contents
meta: Additional metadata about the evaluation

Then serialize as json for further use:

reader.to_json('files/test/evaluations.json')

Utilities

source

load_evals


def load_evals(
    json_file
):

Load evaluations from JSON file

Exported source

def load_evals(json_file):
    "Load evaluations from JSON file"
    return L([Evaluation(**o) for o in json.loads(Path(json_file).read_text())])

fname = 'files/test/evaluations.json'
evals = load_evals(fname)
evals[0]

EVALUATION OF IOM’S MIGRATION DATA STRATEGY

Year: 2025 | Organization: IOM | Countries: Worldwide

Documents: 7 available
ID: 9992310969aa2f428bc8aba29f865cf3

Find eval

source

in_docs


def in_docs(
    ev:Evaluation, # Evaluation object
    url:str, # URL of an evaluation report
):

Check if a URL is in the documents of an evaluation

Exported source

def in_docs(
    ev:Evaluation, # Evaluation object
    url:str # URL of an evaluation report 
    ):
    "Check if a URL is in the documents of an evaluation" 
    return any(L(ev.docs).filter(lambda x: x['url'] == url))

url = "https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/AAP%20Evaluation%20Report_final_.pdf"
fname = 'files/test/evaluations.json'
evals = load_evals(fname)
ev = first(evals.filter(lambda x: x.id == '6c3c2cf3fa479112967612b0baddab72'))

test_eq(in_docs(ev, url), True)
test_eq(in_docs(ev, "https://fake.url/nothere.pdf"), False)

source

find_eval


def find_eval(
    evals:list, # List of evaluations
    query:str, # Title or URL of evaluation
    by:str='title', # 'title', 'url' or 'id'
):

Find evaluation by title, URL or id

title = 'Evaluation of IOM Accountability to Affected Populations'
url = "https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/AAP%20Evaluation%20Report_final_.pdf"
test_eq(find_eval(evals, title, by='title').id, '6c3c2cf3fa479112967612b0baddab72')
test_eq(find_eval(evals, url, by='url').id, '6c3c2cf3fa479112967612b0baddab72')
test_eq(find_eval(evals, 'Nonexistent Title', by='title'), None)
test_eq(find_eval(evals, 'https://fake.url/nowhere.pdf', by='url'), None)
test_eq(find_eval(evals, '6c3c2cf3fa479112967612b0baddab72', by='id').meta['Title'], 'Evaluation of IOM Accountability to Affected Populations')

find_eval(evals, title, by='title')

Evaluation of IOM Accountability to Affected Populations

Year: 2025 | Organization: IOM | Countries: Worldwide

Documents: 5 available
ID: 6c3c2cf3fa479112967612b0baddab72

def get_sections(id, data_path='../iomeval/data'):
    "Load report and return extracted sections markdown"
    r = load_report(id, data_path)
    full_md = read_pgs(r.md_path)
    if r.selected_headings: return extract_sections(full_md, selected_headings=r.selected_headings)
    return full_md

source

eval_url


def eval_url(
    ev:Evaluation
):

Get evaluation report URL, preferring UNEG source

eval_url(find_eval(evals, title, by='title'))

'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/AAP%20Evaluation%20Report_final_.pdf'