"""Intervention/outcome 온톨로지 로더.

10개 intervention × 25+ outcome 격자 정의.
IARC 12 obesity-related cancer 포함.
"""
from __future__ import annotations

import csv
import os
from pathlib import Path
from typing import Dict, List

DATA_DIR = Path(__file__).resolve().parent.parent / "data"


def _read_csv(path: Path) -> List[Dict[str, str]]:
    if not path.exists():
        return []
    with path.open(encoding="utf-8") as f:
        return list(csv.DictReader(f))


def load_interventions() -> List[Dict[str, str]]:
    """10 intervention (5 약물 + 5 bariatric + 5 lifestyle 일부)."""
    return _read_csv(DATA_DIR / "intervention_ontology.csv")


def load_outcomes() -> List[Dict[str, str]]:
    """25+ outcome with IARC 12 obesity-related cancer."""
    return _read_csv(DATA_DIR / "outcome_ontology.csv")


def load_effects() -> List[Dict[str, str]]:
    """6-design effect 큐레이션."""
    return _read_csv(DATA_DIR / "effects_sample.csv")


def intervention_outcome_grid() -> List[Dict[str, str]]:
    """모든 (intervention, outcome) 쌍 - 데이터 있는 것만 반환."""
    effects = load_effects()
    pairs = {}
    for row in effects:
        key = (row["intervention"], row["outcome"])
        pairs.setdefault(key, []).append(row)
    return [
        {"intervention": iv, "outcome": oc, "n_designs": len(set(r["design"] for r in rows)),
         "n_rows": len(rows)}
        for (iv, oc), rows in pairs.items()
    ]


def iarc_obesity_related_cancers() -> List[str]:
    """IARC 2016 12 obesity-related cancer."""
    outs = load_outcomes()
    return [o["outcome"] for o in outs if o.get("iarc_obesity_related") == "yes"]


def design_types() -> List[str]:
    """6-design 정의."""
    return [
        "RCT",
        "observational",
        "BMI_MR",
        "multivariable_MR",
        "bariatric_natural",
        "within_subject_crossover",
        "DIO_animal",
    ]


if __name__ == "__main__":
    print(f"interventions: {len(load_interventions())}")
    print(f"outcomes: {len(load_outcomes())}")
    print(f"IARC cancers: {len(iarc_obesity_related_cancers())}")
    print(f"effect rows: {len(load_effects())}")
    print(f"unique pairs: {len(intervention_outcome_grid())}")
