import pandas as pd
from evaluatr.readers import load_evals
from pathlib import Path
from fastcore.all import *
from rich import print
from matplotlib import pyplot as plt
import seaborn as snsEvaluation reports EDA
IOM Reports Exploratory Data Analysis
Imports
Configs
path = Path("../../_data/output/evaluations.json")print(load_evals(path).filter(lambda x: "Migration" in x['meta']['Title'].lower()))[]
Accessing evaluation report
By ID
idx = '9558482be1b049827f630c1760025c0f'
[o for o in load_evals(path).filter(lambda x: x['id'] == idx)][{'id': '9558482be1b049827f630c1760025c0f',
'docs': [{'Document Subtype': 'Evaluation brief',
'File URL': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/IOM-CREST%20Evaluation-Brief-D3_Thi%20Bao%20Chau%20NGUYEN.pdf',
'File description': 'Evaluation Brief'},
{'Document Subtype': 'Annexes',
'File URL': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/IOM-CREST%20Evaluation-D3_Thi%20Bao%20Chau%20NGUYEN.pdf',
'File description': 'Terms of Reference'},
{'Document Subtype': 'Evaluation report',
'File URL': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Management%20Response_March%202024%20%281%29.docx',
'File description': 'Evaluation Report'},
{'Document Subtype': 'Management response',
'File URL': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/ToR_Final%20Evaluation_CREST_Thi%20Bao%20Chau%20NGUYEN.pdf',
'File description': 'Management Response'}],
'meta': {'Title': 'Final Evaluation of the Enhancing Corporate Responsibility in Eliminating Slavery and Trafficking in Asia (CREST) Project',
'Year': 2023,
'Author': 'UPENDO Consulting Inc.',
'Best Practicesor Lessons Learnt': 'Yes',
'Date of Publication': '2023-08-01',
'Donor': 'Government of Sweden',
'Evaluation Brief': 'Yes',
'Evaluation Commissioner': 'IOM',
'Evaluation Coverage': 'Regional',
'Evaluation Period From Date': 'nan',
'Evaluation Period To Date': 'NaT',
'Executive Summary': 'Yes',
'External Version of the Report': 'Yes',
'Languages': 'English',
'Migration Thematic Areas': 'Assistance to vulnerable migrants',
'Name of Project(s) Being Evaluated': nan,
'Number of Pages Excluding annexes': nan,
'Other Documents Included': nan,
'Project Code': 'LM.0331',
'Countries Covered': ['Bangladesh',
'Cambodia',
'China',
'Hong Kong SAR',
'Hong Kong SAR',
'China',
'Indonesia',
"Lao People's Democratic Republic",
'Malaysia',
'Myanmar',
'Nepal',
'Philippines',
'Republic of Korea',
'Sri Lanka',
'Thailand',
'Viet Nam'],
'Regions Covered': 'RO Bangkok',
'Relevant Crosscutting Themes': 'Accountability to affected populations, Environment, Gender, Rights-based approach',
'Report Published': 'Yes',
'Terms of Reference': 'Yes',
'Type of Evaluation Scope': 'Programme/Project',
'Type of Evaluation Timing': 'Final (at the end of the project/programme)',
'Type of Evaluator': 'External',
'Level of Evaluation': 'Decentralized',
'Document Subtype': 'Evaluation brief, Annexes, Evaluation report, Management response',
'File URL': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/IOM-CREST%20Evaluation-Brief-D3_Thi%20Bao%20Chau%20NGUYEN.pdf, https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/IOM-CREST%20Evaluation-D3_Thi%20Bao%20Chau%20NGUYEN.pdf, https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Management%20Response_March%202024%20%281%29.docx, https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/ToR_Final%20Evaluation_CREST_Thi%20Bao%20Chau%20NGUYEN.pdf',
'File description': 'Evaluation Brief, Terms of Reference, Evaluation Report, Management Response',
'Management response': 'Yes',
'Date added': 'Tue, 10/03/2023 - 17:05',
'Metaevaluation': '2020-24',
'exclude': nan,
'reason': nan}}]
By Title
title = 'Review of the use and follow-up of'
title = "Impact evaluation of the UN Secretary General’s Peacebuilding"
title = "Evaluation of Mental Health and Psychosocial Support in IOM"
[o for o in load_evals(path).filter(lambda x: title.lower() in x['meta']['Title'].lower())][{'id': '22cac1c000836253adc445993e101560',
'docs': [{'Document Subtype': 'Evaluation report',
'File URL': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Final%20Report%20Evaluation%20of%20MHPSS%20in%20IOM%208%202024.pdf',
'File description': 'nan'}],
'meta': {'Title': 'Evaluation of Mental Health and Psychosocial Support in IOM',
'Year': 2024,
'Author': 'IOM CENTRAL EVALUATION',
'Best Practicesor Lessons Learnt': 'Yes',
'Date of Publication': '2024-08-23',
'Donor': 'IOM',
'Evaluation Brief': 'Yes',
'Evaluation Commissioner': 'IOM',
'Evaluation Coverage': 'Global',
'Evaluation Period From Date': 'nan',
'Evaluation Period To Date': 'NaT',
'Executive Summary': nan,
'External Version of the Report': nan,
'Languages': 'English',
'Migration Thematic Areas': 'Migration and Development - mainstreaming migration into development, Migration health (assessment, travel, health promotion, crisis-affected), Migration health policy and partnerships, Multiple thematic overview, Organisational policy/strategy',
'Name of Project(s) Being Evaluated': nan,
'Number of Pages Excluding annexes': nan,
'Other Documents Included': nan,
'Project Code': nan,
'Countries Covered': ['Worldwide'],
'Regions Covered': 'Global',
'Relevant Crosscutting Themes': 'Gender, Rights-based approach',
'Report Published': 'Yes',
'Terms of Reference': 'Yes',
'Type of Evaluation Scope': 'Strategy, Thematic',
'Type of Evaluation Timing': 'Not applicable',
'Type of Evaluator': 'Central/OIG',
'Level of Evaluation': 'Centralized',
'Document Subtype': 'Evaluation report, Evaluation brief',
'File URL': 'https://evaluation.iom.int/sites/g/files/tmzbdl151/files/docs/resources/Final%20Report%20Evaluation%20of%20MHPSS%20in%20IOM%208%202024.pdf',
'File description': nan,
'Management response': nan,
'Date added': 'Mon, 08/26/2024 - 09:29',
'Metaevaluation': nan,
'exclude': nan,
'reason': nan}}]
Counts & Bar Charts per type
import pandas as pd
import json
from pathlib import Path
def create_evaluation_dataframes(json_path):
"""
Create multiple DataFrames optimized for different types of analysis
"""
with open(json_path) as f:
evals = json.load(f)
# Main DataFrame (evaluation level)
eval_records = []
for eval_record in evals:
record = {'id': eval_record['id']}
# Flatten metadata, handling both lists and strings
for key, value in eval_record['meta'].items():
if isinstance(value, list):
record[f"{key}_count"] = len(value)
record[f"{key}_concatenated"] = '; '.join(value) if value else None
record[key] = value # Keep original list for reference
elif isinstance(value, str) and ',' in value and key in ['Migration Thematic Areas', 'Relevant Crosscutting Themes']:
# Handle comma-separated strings as if they were lists
items = [item.strip() for item in value.split(',') if item.strip()]
record[f"{key}_count"] = len(items)
record[f"{key}_concatenated"] = value
record[key] = items # Convert to list for consistency
else:
record[key] = value
record['doc_count'] = len(eval_record['docs'])
eval_records.append(record)
df_main = pd.DataFrame(eval_records)
# Exploded DataFrames for multi-valued fields
exploded_dfs = {}
list_fields = ['Countries Covered', 'Migration Thematic Areas', 'Relevant Crosscutting Themes']
for field in list_fields:
if field in df_main.columns:
# Use the list version we created above
temp_df = df_main[['id', field]].copy()
temp_df = temp_df.dropna()
if len(temp_df) > 0:
# Explode the list column
exploded = temp_df.explode(field)
exploded[field] = exploded[field].str.strip()
exploded_dfs[field] = exploded[['id', field]]
# Documents DataFrame
doc_records = []
for eval_record in evals:
eval_id = eval_record['id']
meta = eval_record['meta']
for doc in eval_record['docs']:
doc_record = {
'eval_id': eval_id,
'title': meta.get('Title'),
'year': meta.get('Year'),
'author': meta.get('Author'),
'donor': meta.get('Donor'),
'doc_subtype': doc['Document Subtype'],
'file_url': doc['File URL'],
'file_description': doc['File description']
}
doc_records.append(doc_record)
df_docs = pd.DataFrame(doc_records)
return {
'main': df_main,
'documents': df_docs,
'exploded': exploded_dfs
}path = Path("../../_data/output/evaluations.json")
dataframes = create_evaluation_dataframes(path)
# Access different DataFrames
df_main = dataframes['main']
df_docs = dataframes['documents']
df_countries = dataframes['exploded']['Countries Covered']
df_themes = dataframes['exploded']['Migration Thematic Areas']
print("Main DataFrame:", df_main.shape)
print("Documents DataFrame:", df_docs.shape)
print("Countries DataFrame:", df_countries.shape)
print("Themes DataFrame:", df_themes.shape)Main DataFrame: (731, 44)
Documents DataFrame: (1405, 8)
Countries DataFrame: (1491, 2)
Themes DataFrame: (1948, 2)
df_main.head()| id | Title | Year | Author | Best Practicesor Lessons Learnt | Date of Publication | Donor | Evaluation Brief | Evaluation Commissioner | Evaluation Coverage | ... | Level of Evaluation | Document Subtype | File URL | File description | Management response | Date added | Metaevaluation | exclude | reason | doc_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1a57974ab89d7280988aa6b706147ce1 | EX-POST EVALUATION OF THE PROJECT: NIGERIA: S... | 2023 | Abderrahim El Moulat | Yes | 2023-05-10 | Government of Germany | Yes | Donor, IOM | Country | ... | Decentralized | Evaluation report, Evaluation brief | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Report , Evaluation Brief | No | Fri, 07/07/2023 - 15:35 | 2020-24 | NaN | NaN | 2 |
| 1 | c660e774d14854e20dc74457712b50ec | FINAL EVALUATION OF THE PROJECT: STRENGTHEN BO... | 2023 | Abderrahim El Moulat | Yes | 2023-02-14 | Government of Canada | Yes | Donor, IOM | Multi-country | ... | Decentralized | Evaluation report, Evaluation brief | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Report , Evaluation Brief | No | Fri, 05/19/2023 - 16:49 | 2020-24 | NaN | NaN | 2 |
| 2 | 2cae361c6779b561af07200e3d4e4051 | Final Evaluation of the project "SUPPORTING TH... | 2022 | Abderrahim El Moulat | Yes | 2022-09-15 | IOM Development Fund | Yes | IOM | Country | ... | Decentralized | Evaluation report, Evaluation brief | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Report , Evaluation Brief | No | Thu, 02/23/2023 - 11:43 | 2020-24 | NaN | NaN | 2 |
| 3 | a9dea21fd254df7759b3936903e0a885 | Finale Internal Evluation: ENHANCING THE CAPAC... | 2022 | Abderrahim El Moulat | Yes | 2022-06-22 | IOM Development Fund | Yes | Donor, IOM | Country | ... | Decentralized | Evaluation brief, Evaluation report | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation brief , Evaluation Report | No | Mon, 08/08/2022 - 11:37 | 2020-24 | NaN | NaN | 2 |
| 4 | f0b09b92ea8ad6dddd9623de68a8d278 | Evaluation Finale Interne du Projet "ENGAGEMEN... | 2022 | Abderrahim El Moulat | Yes | 2022-05-17 | IOM Development Fund | Yes | Donor, IOM | Country | ... | Decentralized | Evaluation brief, Evaluation report | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Brief, Evaluation Report | No | Fri, 08/05/2022 - 15:00 | 2020-24 | NaN | NaN | 2 |
5 rows × 44 columns
df_docs.head()| eval_id | title | year | author | donor | doc_subtype | file_url | file_description | |
|---|---|---|---|---|---|---|---|---|
| 0 | 1a57974ab89d7280988aa6b706147ce1 | EX-POST EVALUATION OF THE PROJECT: NIGERIA: S... | 2023 | Abderrahim El Moulat | Government of Germany | Evaluation report | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Report |
| 1 | 1a57974ab89d7280988aa6b706147ce1 | EX-POST EVALUATION OF THE PROJECT: NIGERIA: S... | 2023 | Abderrahim El Moulat | Government of Germany | Evaluation brief | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Brief |
| 2 | c660e774d14854e20dc74457712b50ec | FINAL EVALUATION OF THE PROJECT: STRENGTHEN BO... | 2023 | Abderrahim El Moulat | Government of Canada | Evaluation report | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Report |
| 3 | c660e774d14854e20dc74457712b50ec | FINAL EVALUATION OF THE PROJECT: STRENGTHEN BO... | 2023 | Abderrahim El Moulat | Government of Canada | Evaluation brief | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Brief |
| 4 | 2cae361c6779b561af07200e3d4e4051 | Final Evaluation of the project "SUPPORTING TH... | 2022 | Abderrahim El Moulat | IOM Development Fund | Evaluation report | https://evaluation.iom.int/sites/g/files/tmzbd... | Evaluation Report |
df_countries.head()| id | Countries Covered | |
|---|---|---|
| 0 | 1a57974ab89d7280988aa6b706147ce1 | Nigeria |
| 1 | c660e774d14854e20dc74457712b50ec | Mali |
| 1 | c660e774d14854e20dc74457712b50ec | Niger |
| 2 | 2cae361c6779b561af07200e3d4e4051 | Cabo Verde |
| 3 | a9dea21fd254df7759b3936903e0a885 | Senegal |
df_countries.groupby('id').size().sort_values(ascending=False)id
1eafa00d143cec4b2067c938cd0665ad 18
9558482be1b049827f630c1760025c0f 16
2aacaf6d93836e795bb78009c30fdcfb 13
f6d27662a2d9ac51c9212a03615b5bd7 12
8d6171c5252c8b5f537c8f22d6df7b61 12
..
6b30198a6f8b83c8cbf36ef7d9e8cb29 1
6ba7723382db006326abd2d12db64380 1
6bb4ac298da9a2e13fd85836bfa70a18 1
6c20c87993c52eac06f6c9c20d193041 1
ffa7846d8ad478ed9fa22411e39fc685 1
Length: 731, dtype: int64
Migration Thematic Areas
theme_counts = df_themes.groupby('Migration Thematic Areas').size().sort_values(ascending=False)
plt.figure(figsize=(12, 8))
sns.barplot(x=theme_counts.values, y=theme_counts.index, orient='h')
plt.grid(axis='x', color='white', lw=1.5)
plt.box(False)
plt.title('Migration Thematic Areas', pad=20)
plt.xlabel('Count')
plt.tight_layout()
Cross-cutting Themes
df_crosscutting = dataframes['exploded']['Relevant Crosscutting Themes']
theme_counts = df_crosscutting.groupby('Relevant Crosscutting Themes').size().sort_values(ascending=False)
plt.figure(figsize=(12, 3))
sns.barplot(x=theme_counts.values, y=theme_counts.index, orient='h')
plt.grid(axis='x', color='white', lw=1.5)
plt.box(False)
plt.title('Relevant Crosscutting Themes', pad=20)
plt.xlabel('Count')
plt.tight_layout()
Year
year_counts = df_main['Year'].value_counts().sort_index()
plt.figure(figsize=(10, 3))
sns.barplot(x=year_counts.index, y=year_counts.values)
plt.grid(axis='y', color='white', lw=1.5)
plt.box(False)
plt.title('Number of Evaluation Reports per Year', pad=20)
plt.xlabel('Year')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
IOM Regional Office
regional_offices = df_main['Regions Covered'].str.split(r',\s*').explode()
# Count reports per regional office
ro_counts = regional_offices.value_counts()
plt.figure(figsize=(12, 3))
sns.barplot(x=ro_counts.values, y=ro_counts.index, orient='h')
plt.grid(axis='x', color='white', lw=1.5)
plt.box(False)
plt.title('Number of Evaluation Reports per Regional Office', pad=20)
plt.xlabel('Count')
plt.tight_layout()
Country covered
n_countries = 50
country_counts = df_countries.groupby('Countries Covered').size().sort_values(ascending=False)
top_X_countries = country_counts.head(n_countries)
plt.figure(figsize=(12, 8))
sns.barplot(x=top_X_countries.values, y=top_X_countries.index, orient='h')
plt.grid(axis='x', color='white', lw=1.5)
plt.box(False)
plt.title(f'Number of Evaluation Reports per Country (Top {n_countries})', pad=20)
plt.xlabel('Count')
plt.tight_layout()
Evaluation brief available?
has_brief = df_main['Evaluation Brief'] == 'Yes'
brief_percentage = (has_brief.sum() / len(df_main)) * 100
print(f"% of reports with Evaluation Brief: {brief_percentage:.1f}% ({has_brief.sum()} out of {len(df_main)} total reports)")% of reports with Evaluation Brief: 32.7% (239 out of 731 total reports)
Type of Report
df_docs['doc_subtype'].value_counts()doc_subtype
Evaluation report 742
Evaluation brief 245
Annexes 166
Management response 142
Special related reports/documents 56
Evaluation summary 48
Internal evaluation report 3
nan 2
External evaluation report 1
Name: count, dtype: int64
Type of Evaluation Scope
df_main['Type of Evaluation Scope'].value_counts()Type of Evaluation Scope
Programme/Project 633
Thematic 27
Programme/Project, Thematic 13
Strategy 10
Synthesis 9
Strategy, Thematic 7
Programme/Project, Strategy 4
Thematic, Strategy 3
Policy 3
Thematic, Programme/Project 3
Policy, Thematic 2
Strategy, Programme/Project 2
Programme/Project, Synthesis 2
Thematic, Synthesis 1
Policy, Strategy 1
Strategy, Policy 1
Policy, Programme/Project, Strategy 1
Thematic, Policy 1
Policy, Programme/Project 1
Programme/Project, Thematic, Policy 1
Name: count, dtype: int64
Type of Evaluation Timing (%)
df_main['Type of Evaluation Timing'].value_counts(normalize=True).mul(100).round(1)Type of Evaluation Timing
Final (at the end of the project/programme) 47.7
Ex-post (after the end of the project/programme) 20.2
Mid-term (during the project's implementation/programme) 19.0
Not applicable 9.8
Not available 2.1
Real-time (at the early stages of project/programme) 0.8
Ex-ante (before the start of a project/programme) 0.4
Name: proportion, dtype: float64
Type of Evaluator (%)
df_main['Type of Evaluator'].value_counts(normalize=True).mul(100).round(1)Type of Evaluator
External 62.8
Internal 25.5
Central/OIG 10.0
Mixed (Internal/OIG and external) 1.6
Name: proportion, dtype: float64
Level of Evaluation (%)
df_main['Level of Evaluation'].value_counts(normalize=True).mul(100).round(1)Level of Evaluation
Decentralized 86.6
Centralized 10.8
Other 2.6
Name: proportion, dtype: float64