"""MASLD-outcome ontology + 데이터 로더 (stdlib only)."""
from __future__ import annotations

import csv
import math
import os
from typing import Dict, List, Optional

ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA = os.path.join(ROOT, "data")


def _read_csv(path: str) -> List[Dict[str, str]]:
    if not os.path.exists(path):
        return []
    with open(path, "r", encoding="utf-8") as f:
        return list(csv.DictReader(f))


def load_stages() -> List[Dict[str, str]]:
    return _read_csv(os.path.join(DATA, "masld_stages.csv"))


def load_outcomes() -> List[Dict[str, str]]:
    return _read_csv(os.path.join(DATA, "outcome_ontology.csv"))


def load_instruments() -> List[Dict[str, str]]:
    return _read_csv(os.path.join(DATA, "mr_instruments.csv"))


def load_korean_af() -> List[Dict[str, str]]:
    return _read_csv(os.path.join(DATA, "allele_frequency_korean.csv"))


def _to_float(s: Optional[str]) -> Optional[float]:
    if s is None or s == "" or s == "N/A":
        return None
    try:
        return float(s)
    except (TypeError, ValueError):
        return None


def load_effects(path: Optional[str] = None) -> List[Dict[str, object]]:
    """Effects CSV → list of dict with numeric estimate/CI fields."""
    p = path or os.path.join(DATA, "effects_sample.csv")
    raw = _read_csv(p)
    out: List[Dict[str, object]] = []
    for row in raw:
        rec: Dict[str, object] = dict(row)
        rec["effect_estimate"] = _to_float(row.get("effect_estimate"))
        rec["ci_low"] = _to_float(row.get("ci_low"))
        rec["ci_high"] = _to_float(row.get("ci_high"))
        rec["sample_size"] = _to_float(row.get("sample_size"))
        rec["follow_up_years"] = _to_float(row.get("follow_up_years"))
        # log-effect (for ratio-scale estimates) — guards against missing
        e = rec["effect_estimate"]
        if isinstance(e, float) and e > 0:
            rec["log_effect"] = math.log(e)
        else:
            rec["log_effect"] = None
        out.append(rec)
    return out


# ─── Ontology helpers ─────────────────────────────────────────────────────────

DESIGN_ORDER = [
    "observational",
    "MR",
    "RCT",
    "ex_vivo_pcls",
    "within_subject_lifestyle",
]

DESIGN_LABEL = {
    "observational": "Observational (NHANES·UKB·KoNEHS·NHIS)",
    "MR": "Mendelian Randomization",
    "RCT": "RCT (MAESTRO-NASH·ESSENCE·SYNERGY-NASH 등)",
    "ex_vivo_pcls": "Ex vivo PCLS / animal mechanism",
    "within_subject_lifestyle": "Within-subject lifestyle crossover",
}

STAGE_ORDER = ["S0", "S1", "S1_lean", "MASH", "F2", "F3", "F4"]


def filter_effects(
    effects: List[Dict[str, object]],
    stage: Optional[str] = None,
    outcome: Optional[str] = None,
    design: Optional[str] = None,
    ancestry: Optional[str] = None,
    instrument: Optional[str] = None,
) -> List[Dict[str, object]]:
    out = effects
    if stage:
        out = [e for e in out if e.get("masld_stage") == stage]
    if outcome:
        out = [e for e in out if e.get("outcome") == outcome]
    if design:
        out = [e for e in out if e.get("design") == design]
    if ancestry:
        out = [e for e in out if (e.get("ancestry") or "").lower() == ancestry.lower()]
    if instrument:
        out = [e for e in out if e.get("mr_instrument") == instrument]
    return out


def unique_pairs(effects: List[Dict[str, object]]):
    seen = []
    sset = set()
    for e in effects:
        key = (e.get("masld_stage"), e.get("outcome"))
        if key not in sset:
            sset.add(key)
            seen.append(key)
    return seen
