Fix, clean markdown headings and enrich it with figures description, …

This module aims to fix and enrich markdown headings from OCR’d PDF files by:

  1. Fixing heading hierarchy that was corrupted during OCR
  2. Adding page numbers to headings for better navigation
  3. Enriching figure references with descriptive text and creating a table of figures
Exported source
cfg = AttrDict({
    'fixed_suffix': '_fixed',
    'lm': 'gemini/gemini-2.0-flash-exp',
    'api_key': GEMINI_API_KEY,
    'max_tokens': 8192,
    'track_usage': False
})

Fixing Markdown Headings

doc = src_dir / 'abridged_evaluation_report_final_olta_ndoja_pdf'
pages = [p for p in doc.ls(file_exts=".md") if cfg.fixed_suffix not in p.stem] 
pages = L(pages).sorted(key=lambda p: int(p.stem.split('_')[1])); pages
(#31) [Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_1.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_2.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_3.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_4.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_5.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_6.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_7.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_8.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_9.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_10.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_11.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_12.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_13.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_14.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_15.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_16.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_17.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_18.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_19.md'),Path('../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf/page_20.md')...]

source

get_hdgs

 get_hdgs (md_txt)
Exported source
def get_hdgs(md_txt): return re.findall(r'^#+.*$', md_txt, re.MULTILINE)

source

get_hdgs_with_pages

 get_hdgs_with_pages (pages:list[pathlib.Path])

Get headings and the page number they are on

Type Details
pages list List of pages
Exported source
def get_hdgs_with_pages(
    pages: list[Path] # List of pages
    ):
    "Get headings and the page number they are on"
    headings = []
    for i, page in enumerate(pages, 1):  # page numbers start at 1
        page_headings = get_hdgs(page.read_text())
        # add each heading with its page number
        for o in page_headings:
            headings.append({'heading': o, 'page': i})
    return headings
hdgs = get_hdgs_with_pages(pages); hdgs[:5]
[{'heading': '# **PPMi**', 'page': 1},
 {'heading': '# CONTENTS ', 'page': 3},
 {'heading': '# 1. Introduction ', 'page': 4},
 {'heading': '# 2. Background of the JI-HoA ', 'page': 5},
 {'heading': '### 2.1. Context and design of the JI-HoA', 'page': 5}]
toc = L([get_hdgs(p.read_text()) for p in pages]).concat(); toc[:5]
(#5) ['# **PPMi**','# CONTENTS ','# 1. Introduction ','# 2. Background of the JI-HoA ','### 2.1. Context and design of the JI-HoA']

source

format_hdgs

 format_hdgs (hdgs:list[dict])

Format headings with page numbers

Type Details
hdgs list List of headings with page numbers
Exported source
def format_hdgs(
    hdgs: list[dict] # List of headings with page numbers
    ):
    "Format headings with page numbers"
    formatted = []
    page_positions = {}
    
    for item in hdgs:
        page = item['page']
        page_positions[page] = page_positions.get(page, 0) + 1
        formatted.append(f"{item['heading']} (Page {page}, Position {page_positions[page]})")
    
    return "\n".join(formatted)
print(format_hdgs(hdgs)[:500])
# **PPMi** (Page 1, Position 1)
# CONTENTS  (Page 3, Position 1)
# 1. Introduction  (Page 4, Position 1)
# 2. Background of the JI-HoA  (Page 5, Position 1)
### 2.1. Context and design of the JI-HoA (Page 5, Position 2)
# 2.2. External factors affecting the implementation of the JI  (Page 7, Position 1)
# 3. Methodology  (Page 8, Position 1)
# 4. Findings  (Page 10, Position 1)
### 4.1. Relevance (Page 10, Position 2)
### 4.1.1. Relevance of programme activities for migrants, returnees, and comm

source

HeadingResult

 HeadingResult (old:str, page:int, position:int, new:str, changed:bool)

*!!! abstract “Usage Documentation” Models

A base class for creating Pydantic models.

Attributes: class_vars: The names of the class variables defined on the model. private_attributes: Metadata about the private attributes of the model. signature: The synthesized __init__ [Signature][inspect.Signature] of the model.

__pydantic_complete__: Whether model building is completed, or if there are still undefined fields.
__pydantic_core_schema__: The core schema of the model.
__pydantic_custom_init__: Whether the model has a custom `__init__` function.
__pydantic_decorators__: Metadata containing the decorators defined on the model.
    This replaces `Model.__validators__` and `Model.__root_validators__` from Pydantic V1.
__pydantic_generic_metadata__: Metadata for generic models; contains data used for a similar purpose to
    __args__, __origin__, __parameters__ in typing-module generics. May eventually be replaced by these.
__pydantic_parent_namespace__: Parent namespace of the model, used for automatic rebuilding of models.
__pydantic_post_init__: The name of the post-init method for the model, if defined.
__pydantic_root_model__: Whether the model is a [`RootModel`][pydantic.root_model.RootModel].
__pydantic_serializer__: The `pydantic-core` `SchemaSerializer` used to dump instances of the model.
__pydantic_validator__: The `pydantic-core` `SchemaValidator` used to validate instances of the model.

__pydantic_fields__: A dictionary of field names and their corresponding [`FieldInfo`][pydantic.fields.FieldInfo] objects.
__pydantic_computed_fields__: A dictionary of computed field names and their corresponding [`ComputedFieldInfo`][pydantic.fields.ComputedFieldInfo] objects.

__pydantic_extra__: A dictionary containing extra values, if [`extra`][pydantic.config.ConfigDict.extra]
    is set to `'allow'`.
__pydantic_fields_set__: The names of fields explicitly set during instantiation.
__pydantic_private__: Values of private attributes set on the model instance.*
Exported source
lm = dspy.LM(cfg.lm, api_key=cfg.api_key)
dspy.configure(lm=lm)
dspy.settings.configure(track_usage=cfg.track_usage)
Exported source
class HeadingResult(BaseModel):
    old: str
    page: int
    position: int
    new: str
    changed: bool  # True if correction was made

source

FixHeadingHierarchy

 FixHeadingHierarchy (headings_with_pages:str,
                      results:List[__main__.HeadingResult])

Fix markdown heading hierarchy by analyzing the document’s numbering patterns: - Detect numbering scheme (1.2.3, I.A.1, A.1.a, etc.) - Apply hierarchy levels based on nesting depth: # for top level, ## for second level, ### for third level - When a section number is lower than a previously seen number at the same level (e.g., seeing ‘2.’ after ‘3.1’), it’s likely a subsection or list item, not a main section - Unnumbered headings: keep as-is if at document boundaries, treat as subsections if within numbered sections - Return ALL headings with their corrected form

Exported source
class FixHeadingHierarchy(dspy.Signature):
    """Fix markdown heading hierarchy by analyzing the document's numbering patterns:
    - Detect numbering scheme (1.2.3, I.A.1, A.1.a, etc.)
    - Apply hierarchy levels based on nesting depth: # for top level, ## for second level, ### for third level
    - When a section number is lower than a previously seen number at the same level (e.g., seeing '2.' after '3.1'), it's likely a subsection or list item, not a main section
    - Unnumbered headings: keep as-is if at document boundaries, treat as subsections if within numbered sections
    - Return ALL headings with their corrected form
    """
    
    headings_with_pages: str = dspy.InputField(desc="List of headings with page numbers")
    results: List[HeadingResult] = dspy.OutputField(desc="All headings with corrections and change status")

source

fix_md

 fix_md (hdgs:list[dict], track_usage:bool=False)

Fix markdown headings

Type Default Details
hdgs list List of headings with page numbers
track_usage bool False
Exported source
def fix_md(
    hdgs: list[dict], # List of headings with page numbers
    track_usage: bool=cfg.track_usage,
    ):
    "Fix markdown headings"
    lm = dspy.LM(cfg.lm, api_key=cfg.api_key, max_tokens=cfg.max_tokens)
    dspy.configure(lm=lm)
    dspy.settings.configure(track_usage=track_usage)

    inp = format_hdgs(hdgs)
    fix_hdgs = dspy.ChainOfThought(FixHeadingHierarchy)
    result = fix_hdgs(headings_with_pages=inp)
    return result
result = fix_md(hdgs, track_usage=True)
print("Result:", result)
print("Usage:", result.get_lm_usage())
Result: Prediction(
    reasoning='The provided headings have inconsistent hierarchy. I will correct the hierarchy based on the numbering and content. The numbering scheme appears to be a mix of numbered sections (1, 2, 3, etc.) and subsections (2.1, 2.1.1, etc.). I will use this numbering to determine the appropriate heading level. Unnumbered headings within numbered sections will be treated as subsections. Headings that appear to be list items (e.g., "3. Increase attention...") will be treated as top-level headings.',
    results=[HeadingResult(old='# **PPMi**', page=1, position=1, new='# **PPMi**', changed=False), HeadingResult(old='# CONTENTS', page=3, position=1, new='# CONTENTS', changed=False), HeadingResult(old='# 1. Introduction', page=4, position=1, new='# 1. Introduction', changed=False), HeadingResult(old='# 2. Background of the JI-HoA', page=5, position=1, new='# 2. Background of the JI-HoA', changed=False), HeadingResult(old='### 2.1. Context and design of the JI-HoA', page=5, position=2, new='## 2.1. Context and design of the JI-HoA', changed=False), HeadingResult(old='# 2.2. External factors affecting the implementation of the JI', page=7, position=1, new='## 2.2. External factors affecting the implementation of the JI', changed=True), HeadingResult(old='# 3. Methodology', page=8, position=1, new='# 3. Methodology', changed=False), HeadingResult(old='# 4. Findings', page=10, position=1, new='# 4. Findings', changed=False), HeadingResult(old='### 4.1. Relevance', page=10, position=2, new='## 4.1. Relevance', changed=False), HeadingResult(old='### 4.1.1. Relevance of programme activities for migrants, returnees, and communities', page=10, position=3, new='### 4.1.1. Relevance of programme activities for migrants, returnees, and communities', changed=False), HeadingResult(old='## Overall performance score for relevance: $3.9 / 5$ <br> Robustness score for the evidence: $4.5 / 5$', page=10, position=4, new='#### Overall performance score for relevance: $3.9 / 5$ <br> Robustness score for the evidence: $4.5 / 5$', changed=True), HeadingResult(old='### 4.1.1.1 Needs of migrants', page=10, position=5, new='#### 4.1.1.1 Needs of migrants', changed=False), HeadingResult(old='### 4.1.1.2 Needs of returnees', page=10, position=6, new='#### 4.1.1.2 Needs of returnees', changed=False), HeadingResult(old='# 4.1.1.3 Needs of community members', page=12, position=1, new='#### 4.1.1.3 Needs of community members', changed=True), HeadingResult(old="### 4.1.2. Programme's relevance to the needs of stakeholders", page=12, position=2, new="### 4.1.2. Programme's relevance to the needs of stakeholders", changed=False), HeadingResult(old='### 4.1.2.1 Needs of governments', page=12, position=3, new='#### 4.1.2.1 Needs of governments', changed=False), HeadingResult(old='# 4.1.2.2 Needs of other stakeholders', page=13, position=1, new='#### 4.1.2.2 Needs of other stakeholders', changed=True), HeadingResult(old='### 4.2. Coherence', page=13, position=2, new='## 4.2. Coherence', changed=False), HeadingResult(old="# 4.2.1. The JI-HoA's alignment with the objectives and standards of IOM, and objectives of the EU", page=14, position=1, new="### 4.2.1. The JI-HoA's alignment with the objectives and standards of IOM, and objectives of the EU", changed=True), HeadingResult(old='### 4.2.2. Alignment with other initiatives', page=14, position=2, new='### 4.2.2. Alignment with other initiatives', changed=False), HeadingResult(old='# 4.3. Effectiveness', page=16, position=1, new='## 4.3. Effectiveness', changed=True), HeadingResult(old='### 4.3.1. Specific Objective 1: Partner countries and relevant stakeholders developed or strengthened evidence-based return and reintegration procedures', page=16, position=2, new='### 4.3.1. Specific Objective 1: Partner countries and relevant stakeholders developed or strengthened evidence-based return and reintegration procedures', changed=False), HeadingResult(old='### 4.3.1.1 Achievement of outputs and results', page=16, position=3, new='#### 4.3.1.1 Achievement of outputs and results', changed=False), HeadingResult(old='## Data availability', page=16, position=4, new='##### Data availability', changed=True), HeadingResult(old='# Capacity of stakeholders', page=17, position=1, new='##### Capacity of stakeholders', changed=True), HeadingResult(old='### 4.3.1.2 Achievement of Specific Objective 1', page=17, position=2, new='#### 4.3.1.2 Achievement of Specific Objective 1', changed=False), HeadingResult(old='# 4.3.2. Specific Objective 2: Safe, humane, dignified voluntary return processes are enhanced along main migration routes', page=18, position=1, new='### 4.3.2. Specific Objective 2: Safe, humane, dignified voluntary return processes are enhanced along main migration routes', changed=True), HeadingResult(old='# 4.3.2.1 Achievement of outputs and results', page=19, position=1, new='#### 4.3.2.1 Achievement of outputs and results', changed=True), HeadingResult(old='## Outreach and awareness', page=19, position=2, new='##### Outreach and awareness', changed=True), HeadingResult(old='## Assistance to stranded migrants', page=19, position=3, new='##### Assistance to stranded migrants', changed=True), HeadingResult(old='# 4.3.2.2 Achievement of the Objective', page=20, position=1, new='#### 4.3.2.2 Achievement of the Objective', changed=True), HeadingResult(old='### 4.3.3. Specific Objective 3: Returnees are sustainably integrated in host communities, and host communities are better able to create living standards that address drivers of migration.', page=20, position=2, new='### 4.3.3. Specific Objective 3: Returnees are sustainably integrated in host communities, and host communities are better able to create living standards that address drivers of migration.', changed=False), HeadingResult(old='### 4.3.3.1 Achievement of outputs and results', page=20, position=3, new='#### 4.3.3.1 Achievement of outputs and results', changed=False), HeadingResult(old='## Individual and community-based reintegration', page=20, position=4, new='##### Individual and community-based reintegration', changed=True), HeadingResult(old='# M\\&E systems', page=21, position=1, new='##### M\\&E systems', changed=True), HeadingResult(old='# 4.3.3.2 Achievement of Specific Objective 3', page=22, position=1, new='#### 4.3.3.2 Achievement of Specific Objective 3', changed=True), HeadingResult(old='## Overall achievement of reintegration', page=22, position=2, new='##### Overall achievement of reintegration', changed=True), HeadingResult(old='# Sustainability of reintegration', page=23, position=1, new='##### Sustainability of reintegration', changed=True), HeadingResult(old='### 4.3.4. Functioning of the Integrated Approach', page=23, position=2, new='### 4.3.4. Functioning of the Integrated Approach', changed=False), HeadingResult(old='# 4.4. Efficiency', page=24, position=1, new='## 4.4. Efficiency', changed=True), HeadingResult(old='### 4.4.3. Did the programme receive sufficient resources to achieve its objectives?', page=24, position=2, new='### 4.4.3. Did the programme receive sufficient resources to achieve its objectives?', changed=False), HeadingResult(old='# 4.4.2. Cost-effectiveness and efficiency of the programme', page=25, position=1, new='### 4.4.2. Cost-effectiveness and efficiency of the programme', changed=True), HeadingResult(old='# 4.5. Sustainability', page=26, position=1, new='## 4.5. Sustainability', changed=True), HeadingResult(old='## Overall performance score for sustainability: $2.5 / 5$ <br> Robustness score for the evidence: $4 / 5$', page=26, position=2, new='#### Overall performance score for sustainability: $2.5 / 5$ <br> Robustness score for the evidence: $4 / 5$', changed=True), HeadingResult(old='# 5. Conclusions and Recommendations', page=27, position=1, new='# 5. Conclusions and Recommendations', changed=False), HeadingResult(old='### 5.1. Conclusions', page=27, position=2, new='## 5.1. Conclusions', changed=False), HeadingResult(old='# 5.2. Recommendations', page=28, position=1, new='## 5.2. Recommendations', changed=True), HeadingResult(old='# 3. Increase attention on building partnerships with service providers who can function without (significant) funding channelled by IOM.', page=29, position=1, new='# 3. Increase attention on building partnerships with service providers who can function without (significant) funding channelled by IOM.', changed=True), HeadingResult(old='# 5. Explore opportunities to extend the scope of support provided to returnees, with a focus on longer-term integration.', page=30, position=1, new='# 5. Explore opportunities to extend the scope of support provided to returnees, with a focus on longer-term integration.', changed=True)]
)
Usage: {}

source

group_corrections_by_page

 group_corrections_by_page (results:list[__main__.HeadingResult])

Group HeadingResult corrections by page number into dict with page nums as keys

Type Details
results list List of headings with corrections and change status
Exported source
def group_corrections_by_page(
    results: list[HeadingResult], # List of headings with corrections and change status
    ):
    "Group HeadingResult corrections by page number into dict with page nums as keys"
    page_groups = {}
    for result in results:
        page = result.page
        if page not in page_groups:
            page_groups[page] = []
        page_groups[page].append(result)
    return page_groups
group_corrections_by_page(result.results)
{1: [HeadingResult(old='# **PPMi**', page=1, position=1, new='# **PPMi**', changed=False)],
 3: [HeadingResult(old='# CONTENTS', page=3, position=1, new='# CONTENTS', changed=False)],
 4: [HeadingResult(old='# 1. Introduction', page=4, position=1, new='# 1. Introduction', changed=False)],
 5: [HeadingResult(old='# 2. Background of the JI-HoA', page=5, position=1, new='# 2. Background of the JI-HoA', changed=False),
  HeadingResult(old='### 2.1. Context and design of the JI-HoA', page=5, position=2, new='## 2.1. Context and design of the JI-HoA', changed=False)],
 7: [HeadingResult(old='# 2.2. External factors affecting the implementation of the JI', page=7, position=1, new='## 2.2. External factors affecting the implementation of the JI', changed=True)],
 8: [HeadingResult(old='# 3. Methodology', page=8, position=1, new='# 3. Methodology', changed=False)],
 10: [HeadingResult(old='# 4. Findings', page=10, position=1, new='# 4. Findings', changed=False),
  HeadingResult(old='### 4.1. Relevance', page=10, position=2, new='## 4.1. Relevance', changed=False),
  HeadingResult(old='### 4.1.1. Relevance of programme activities for migrants, returnees, and communities', page=10, position=3, new='### 4.1.1. Relevance of programme activities for migrants, returnees, and communities', changed=False),
  HeadingResult(old='## Overall performance score for relevance: $3.9 / 5$ <br> Robustness score for the evidence: $4.5 / 5$', page=10, position=4, new='#### Overall performance score for relevance: $3.9 / 5$ <br> Robustness score for the evidence: $4.5 / 5$', changed=True),
  HeadingResult(old='### 4.1.1.1 Needs of migrants', page=10, position=5, new='#### 4.1.1.1 Needs of migrants', changed=False),
  HeadingResult(old='### 4.1.1.2 Needs of returnees', page=10, position=6, new='#### 4.1.1.2 Needs of returnees', changed=False)],
 12: [HeadingResult(old='# 4.1.1.3 Needs of community members', page=12, position=1, new='#### 4.1.1.3 Needs of community members', changed=True),
  HeadingResult(old="### 4.1.2. Programme's relevance to the needs of stakeholders", page=12, position=2, new="### 4.1.2. Programme's relevance to the needs of stakeholders", changed=False),
  HeadingResult(old='### 4.1.2.1 Needs of governments', page=12, position=3, new='#### 4.1.2.1 Needs of governments', changed=False)],
 13: [HeadingResult(old='# 4.1.2.2 Needs of other stakeholders', page=13, position=1, new='#### 4.1.2.2 Needs of other stakeholders', changed=True),
  HeadingResult(old='### 4.2. Coherence', page=13, position=2, new='## 4.2. Coherence', changed=False)],
 14: [HeadingResult(old="# 4.2.1. The JI-HoA's alignment with the objectives and standards of IOM, and objectives of the EU", page=14, position=1, new="### 4.2.1. The JI-HoA's alignment with the objectives and standards of IOM, and objectives of the EU", changed=True),
  HeadingResult(old='### 4.2.2. Alignment with other initiatives', page=14, position=2, new='### 4.2.2. Alignment with other initiatives', changed=False)],
 16: [HeadingResult(old='# 4.3. Effectiveness', page=16, position=1, new='## 4.3. Effectiveness', changed=True),
  HeadingResult(old='### 4.3.1. Specific Objective 1: Partner countries and relevant stakeholders developed or strengthened evidence-based return and reintegration procedures', page=16, position=2, new='### 4.3.1. Specific Objective 1: Partner countries and relevant stakeholders developed or strengthened evidence-based return and reintegration procedures', changed=False),
  HeadingResult(old='### 4.3.1.1 Achievement of outputs and results', page=16, position=3, new='#### 4.3.1.1 Achievement of outputs and results', changed=False),
  HeadingResult(old='## Data availability', page=16, position=4, new='##### Data availability', changed=True)],
 17: [HeadingResult(old='# Capacity of stakeholders', page=17, position=1, new='##### Capacity of stakeholders', changed=True),
  HeadingResult(old='### 4.3.1.2 Achievement of Specific Objective 1', page=17, position=2, new='#### 4.3.1.2 Achievement of Specific Objective 1', changed=False)],
 18: [HeadingResult(old='# 4.3.2. Specific Objective 2: Safe, humane, dignified voluntary return processes are enhanced along main migration routes', page=18, position=1, new='### 4.3.2. Specific Objective 2: Safe, humane, dignified voluntary return processes are enhanced along main migration routes', changed=True)],
 19: [HeadingResult(old='# 4.3.2.1 Achievement of outputs and results', page=19, position=1, new='#### 4.3.2.1 Achievement of outputs and results', changed=True),
  HeadingResult(old='## Outreach and awareness', page=19, position=2, new='##### Outreach and awareness', changed=True),
  HeadingResult(old='## Assistance to stranded migrants', page=19, position=3, new='##### Assistance to stranded migrants', changed=True)],
 20: [HeadingResult(old='# 4.3.2.2 Achievement of the Objective', page=20, position=1, new='#### 4.3.2.2 Achievement of the Objective', changed=True),
  HeadingResult(old='### 4.3.3. Specific Objective 3: Returnees are sustainably integrated in host communities, and host communities are better able to create living standards that address drivers of migration.', page=20, position=2, new='### 4.3.3. Specific Objective 3: Returnees are sustainably integrated in host communities, and host communities are better able to create living standards that address drivers of migration.', changed=False),
  HeadingResult(old='### 4.3.3.1 Achievement of outputs and results', page=20, position=3, new='#### 4.3.3.1 Achievement of outputs and results', changed=False),
  HeadingResult(old='## Individual and community-based reintegration', page=20, position=4, new='##### Individual and community-based reintegration', changed=True)],
 21: [HeadingResult(old='# M\\&E systems', page=21, position=1, new='##### M\\&E systems', changed=True)],
 22: [HeadingResult(old='# 4.3.3.2 Achievement of Specific Objective 3', page=22, position=1, new='#### 4.3.3.2 Achievement of Specific Objective 3', changed=True),
  HeadingResult(old='## Overall achievement of reintegration', page=22, position=2, new='##### Overall achievement of reintegration', changed=True)],
 23: [HeadingResult(old='# Sustainability of reintegration', page=23, position=1, new='##### Sustainability of reintegration', changed=True),
  HeadingResult(old='### 4.3.4. Functioning of the Integrated Approach', page=23, position=2, new='### 4.3.4. Functioning of the Integrated Approach', changed=False)],
 24: [HeadingResult(old='# 4.4. Efficiency', page=24, position=1, new='## 4.4. Efficiency', changed=True),
  HeadingResult(old='### 4.4.3. Did the programme receive sufficient resources to achieve its objectives?', page=24, position=2, new='### 4.4.3. Did the programme receive sufficient resources to achieve its objectives?', changed=False)],
 25: [HeadingResult(old='# 4.4.2. Cost-effectiveness and efficiency of the programme', page=25, position=1, new='### 4.4.2. Cost-effectiveness and efficiency of the programme', changed=True)],
 26: [HeadingResult(old='# 4.5. Sustainability', page=26, position=1, new='## 4.5. Sustainability', changed=True),
  HeadingResult(old='## Overall performance score for sustainability: $2.5 / 5$ <br> Robustness score for the evidence: $4 / 5$', page=26, position=2, new='#### Overall performance score for sustainability: $2.5 / 5$ <br> Robustness score for the evidence: $4 / 5$', changed=True)],
 27: [HeadingResult(old='# 5. Conclusions and Recommendations', page=27, position=1, new='# 5. Conclusions and Recommendations', changed=False),
  HeadingResult(old='### 5.1. Conclusions', page=27, position=2, new='## 5.1. Conclusions', changed=False)],
 28: [HeadingResult(old='# 5.2. Recommendations', page=28, position=1, new='## 5.2. Recommendations', changed=True)],
 29: [HeadingResult(old='# 3. Increase attention on building partnerships with service providers who can function without (significant) funding channelled by IOM.', page=29, position=1, new='# 3. Increase attention on building partnerships with service providers who can function without (significant) funding channelled by IOM.', changed=True)],
 30: [HeadingResult(old='# 5. Explore opportunities to extend the scope of support provided to returnees, with a focus on longer-term integration.', page=30, position=1, new='# 5. Explore opportunities to extend the scope of support provided to returnees, with a focus on longer-term integration.', changed=True)]}

source

apply_corrections_to_page

 apply_corrections_to_page (page_nb, corrections, pages_list,
                            suffix='_fixed')

Apply corrections to a page by replacing original headings with corrected versions and page numbers

Type Default Details
page_nb Page number
corrections List of corrections
pages_list List of pages
suffix str _fixed Suffix for the new file
Exported source
def apply_corrections_to_page(
    page_nb, # Page number
    corrections, # List of corrections
    pages_list, # List of pages
    suffix=cfg.fixed_suffix, # Suffix for the new file
    ):
    "Apply corrections to a page by replacing original headings with corrected versions and page numbers"
    page_file = pages_list[page_nb - 1]
    lines = page_file.read_text().splitlines()
    corrections_copy = corrections.copy()
    
    for i, line in enumerate(lines):
        for correction in corrections_copy:
            if line.strip() == correction.old.strip():
                lines[i] = f"{correction.new} .... page {page_nb}"
                corrections_copy.remove(correction)
                break
            
    new_file = page_file.with_stem(f"{page_file.stem}{suffix}")
    new_file.write_text('\n'.join(lines))
apply_corrections_to_page(5, result.results, pages)

source

apply_all_corrections

 apply_all_corrections (results, pages_list)

Apply all corrections to the pages

Details
results List of headings with corrections and change status
pages_list List of pages
Exported source
def apply_all_corrections(
    results, # List of headings with corrections and change status
    pages_list, # List of pages
    ):
    "Apply all corrections to the pages"
    grouped = group_corrections_by_page(results)
    for page_nb, corrections in grouped.items(): 
        apply_corrections_to_page(page_nb, corrections, pages_list)
apply_all_corrections(result.results, pages)

source

fix_doc_hdgs

 fix_doc_hdgs (src_dir, force=False)

Process the document directory

Type Default Details
src_dir Path to the folder containing the document
force bool False Whether to overwrite the existing files
Exported source
def fix_doc_hdgs(
    src_dir, # Path to the folder containing the document
    force=False, # Whether to overwrite the existing files
    ):
    "Process the document directory"
    folder = Path(src_dir)
    fixed_files = list(folder.glob(f"*{cfg.fixed_suffix}.md"))
    if fixed_files and not force:
        print(f"Found {len(fixed_files)} {cfg.fixed_suffix} files. Use force=True to overwrite.")
        return
    if fixed_files and force: [f.delete() for f in fixed_files]
    pages = folder.ls(file_exts=".md").sorted(key=lambda p: int(p.stem.split('_')[1]))
    result = fix_md(get_hdgs_with_pages(pages))
    apply_all_corrections(result.results, pages)
print(doc)
fix_doc_hdgs(doc, force=True)
../_data/md_library/49d2fba781b6a7c0d94577479636ee6f/abridged_evaluation_report_final_olta_ndoja_pdf

Enrich with figures description