"""Ingestion + synthetic data generation + de-identification module.

비만수술 시행 병원의 REDCap/CSV/EDW export -> 표준화된 in-memory dataframes.

Tables generated/ingested:
  - patients.csv          : 환자 baseline (age/sex/BMI/동반질환/preop status/술식/ward/외과 PI)
  - intraop.csv           : intraop metric (laparoscopic/robotic, conversion, perioperative glycemia)
  - pod0_3.csv            : POD0-3 immediate complication (leak/bleeding/VTE/ICU)
  - pod4_30.csv           : POD4-30 complication + LOS + readmission + mortality
  - pod90_outpt.csv       : POD0-90 외래 follow-up + 체중·HbA1c·BP·지질·PHQ-9
  - hypo_events.csv       : post-bariatric hypoglycemia + dumping syndrome events

de-identification:
  - 환자 ID -> SHA-256 truncated hash (8 hex)
  - 입원/수술/외래 date -> study-day index (date-shifted)
  - free-text 필드 enum-coded only
  - audit log returned in IngestReport
"""
from __future__ import annotations

import csv
import hashlib
import os
import random
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple

# --------------------------------------------------------------------------- #
# Constants — ERAS Society bariatric protocol & MBSAQIP/ASMBS/KASMBS analog
# --------------------------------------------------------------------------- #

PROCEDURES = ["RYGB", "SG", "OAGB", "SADI", "DJB"]
WARDS = ["GS-A", "GS-B", "GS-C", "GS-D"]   # general surgery wards
SURGEONS = ["PI-1", "PI-2", "PI-3", "PI-4", "PI-5"]
SEXES = ["M", "F"]

COMORBIDITIES = ["T2DM", "HTN", "OSA", "MASLD", "DLP", "GERD"]

ANESTHESIA = ["GA-TIVA", "GA-INH"]
APPROACH = ["laparoscopic", "robotic", "open"]   # open is rare / conversion

# POD0-3 immediate complications
POD03_COMPLICATIONS = [
    "anastomotic_leak",
    "staple_leak",
    "bleeding",
    "VTE",
    "ICU_admit",
    "reoperation_acute",
    "stricture_early",
]

# POD4-30 complications (incl. delayed)
POD430_COMPLICATIONS = [
    "marginal_ulcer",
    "stricture_late",
    "dumping_early",
    "dumping_late",
    "post_bariatric_hypo_immediate",
    "wound_infection",
    "SBO",
    "bile_reflux",
    "malabsorption",
    "protein_deficiency",
    "iron_deficiency",
    "vitamin_deficiency",
]

HYPO_TIME_BUCKETS = ["POD0_3", "POD4_30", "POD30_90"]
HYPO_TYPES = ["immediate", "delayed_postprandial", "dumping_early", "dumping_late"]

OUTPT_VISIT_DAYS = [7, 30, 60, 90]
READMIT_REASONS = ["leak", "bleeding", "dehydration", "obstruction",
                   "marginal_ulcer", "hypoglycemia", "other"]

# ERAS Society bariatric protocol elements (binary compliance per patient)
ERAS_PREOP_ELEMENTS = [
    "vte_prophylaxis_preop",
    "smoking_cessation_4wk",
    "nutrition_assessment",
    "osa_screening",
    "carb_loading_preop",
    "preop_education",
]
ERAS_INTRAOP_ELEMENTS = [
    "laparoscopic_or_robotic",
    "perioperative_glycemia_lt180",
    "ppv_lung_protection",
    "normothermia",
    "antiemetic_prophylaxis",
]
ERAS_POD03_ELEMENTS = [
    "early_mobilization_pod1",
    "early_oral_intake_pod1",
    "vte_prophylaxis_postop",
    "multimodal_analgesia",
    "no_routine_ngt",
]
ERAS_POD430_ELEMENTS = [
    "protein_supplementation_60g",
    "multivitamin_started",
    "iron_b12_panel_pod30",
    "weight_followup_pod30",
    "early_weight_loss_pct_5",
]


# --------------------------------------------------------------------------- #
# Dataclasses
# --------------------------------------------------------------------------- #

@dataclass
class Patient:
    patient_id: str
    age: int
    sex: str
    bmi_pre: float
    weight_pre_kg: float
    height_cm: float
    t2dm: bool
    htn: bool
    osa: bool
    masld: bool
    dlp: bool
    gerd: bool
    procedure: str
    ward: str
    surgeon: str
    anesthesia: str
    approach: str
    admit_day: int
    surgery_day: int
    discharge_day: int
    los_days: int
    # ERAS preop compliance booleans
    eras_vte_prophy_preop: bool
    eras_smoking_cess_4wk: bool
    eras_nutrition_assess: bool
    eras_osa_screen: bool
    eras_carb_loading: bool
    eras_preop_edu: bool
    died_30d: bool = False


@dataclass
class IntraopRow:
    patient_id: str
    duration_min: int
    conversion: bool                # to open
    eras_lap_or_robot: bool
    eras_glycemia_lt180: bool
    eras_ppv_lung: bool
    eras_normothermia: bool
    eras_antiemetic: bool
    ebl_ml: int


@dataclass
class POD03Row:
    patient_id: str
    leak: bool
    staple_leak: bool
    bleeding: bool
    vte: bool
    icu_admit: bool
    reop_acute: bool
    stricture_early: bool
    # ERAS POD0-3 compliance
    eras_mob_pod1: bool
    eras_oral_pod1: bool
    eras_vte_postop: bool
    eras_mm_analgesia: bool
    eras_no_routine_ngt: bool


@dataclass
class POD430Row:
    patient_id: str
    marginal_ulcer: bool
    stricture_late: bool
    dumping_early: bool
    dumping_late: bool
    pb_hypo_immediate: bool
    wound_infection: bool
    sbo: bool
    bile_reflux: bool
    malabsorption: bool
    protein_deficiency: bool
    iron_deficiency: bool
    vitamin_deficiency: bool
    early_weight_loss_pct: float
    # ERAS POD4-30 compliance
    eras_protein_60g: bool
    eras_multivitamin: bool
    eras_iron_b12_pod30: bool
    eras_weight_fu_pod30: bool
    eras_ewl_5pct: bool
    readmit_30d: bool
    readmit_day: Optional[int]
    readmit_reason: Optional[str]


@dataclass
class POD90Row:
    patient_id: str
    visit_pod7: bool
    visit_pod30: bool
    visit_pod60: bool
    visit_pod90: bool
    weight_pod90_kg: Optional[float]
    bmi_pod90: Optional[float]
    hba1c_pod90: Optional[float]
    sbp_pod90: Optional[int]
    ldl_pod90: Optional[int]
    osa_recovery: Optional[bool]
    phq9_pod90: Optional[int]
    glp1_ra_added: bool
    reop_trigger: bool


@dataclass
class HypoEvent:
    patient_id: str
    study_day: int                # days since surgery
    time_bucket: str              # POD0_3 / POD4_30 / POD30_90
    hypo_type: str                # immediate / delayed_postprandial / dumping_early / late
    glucose_mg_dl: float
    severity: str                 # L1 (54-69) / L2 (<54) / L3 (severe needs help)


@dataclass
class IngestReport:
    n_patients: int
    n_intraop: int
    n_pod03: int
    n_pod430: int
    n_pod90: int
    n_hypo: int
    deid_method: str
    date_shift_offset_days: int
    notes: List[str] = field(default_factory=list)


# --------------------------------------------------------------------------- #
# Helpers
# --------------------------------------------------------------------------- #

def _hash_id(raw: str) -> str:
    return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:8]


def _maybe(rng: random.Random, p: float) -> bool:
    return rng.random() < p


def _round(x: float, n: int = 2) -> float:
    return round(x, n)


# --------------------------------------------------------------------------- #
# Synthetic data generation
# --------------------------------------------------------------------------- #

def generate_synthetic(n_patients: int = 400, out_dir: str = "data",
                       seed: int = 42) -> IngestReport:
    rng = random.Random(seed)
    os.makedirs(out_dir, exist_ok=True)
    date_shift = rng.randint(180, 360)

    patients: List[Patient] = []
    intraop: List[IntraopRow] = []
    pod03: List[POD03Row] = []
    pod430: List[POD430Row] = []
    pod90: List[POD90Row] = []
    hypo: List[HypoEvent] = []

    # Procedure mix roughly mirrors KASMBS distribution
    proc_weights = {"RYGB": 0.30, "SG": 0.40, "OAGB": 0.15,
                    "SADI": 0.08, "DJB": 0.07}
    proc_cum = []
    acc = 0.0
    for p, w in proc_weights.items():
        acc += w
        proc_cum.append((p, acc))

    def pick_procedure() -> str:
        u = rng.random()
        for p, c in proc_cum:
            if u <= c:
                return p
        return proc_cum[-1][0]

    for i in range(n_patients):
        pid = _hash_id(f"BARI-{i:05d}-{seed}")
        age = max(19, min(70, int(rng.gauss(42, 10))))
        sex = "F" if _maybe(rng, 0.62) else "M"
        bmi = _round(max(32.0, min(58.0, rng.gauss(40.5, 4.5))))
        height = _round(rng.gauss(163 if sex == "F" else 173, 7), 1)
        weight = _round(bmi * (height / 100) ** 2, 1)
        proc = pick_procedure()
        ward = rng.choice(WARDS)
        surgeon = rng.choice(SURGEONS)

        t2dm = _maybe(rng, 0.42)
        htn = _maybe(rng, 0.55)
        osa = _maybe(rng, 0.38)
        masld = _maybe(rng, 0.60)
        dlp = _maybe(rng, 0.48)
        gerd = _maybe(rng, 0.30 if proc != "SG" else 0.40)

        anesthesia = rng.choice(ANESTHESIA)
        # SADI / OAGB more often laparoscopic; SG/RYGB increasingly robotic
        if proc in ("RYGB", "SG"):
            approach = "robotic" if _maybe(rng, 0.30) else "laparoscopic"
        else:
            approach = "robotic" if _maybe(rng, 0.10) else "laparoscopic"

        admit = rng.randint(0, 720) + date_shift
        # ERAS LOS targets: SG ~2d, RYGB/OAGB ~3d, SADI ~4d, DJB ~3d
        base_los = {"SG": 2, "RYGB": 3, "OAGB": 3, "SADI": 4, "DJB": 3}[proc]
        los = max(1, int(rng.gauss(base_los, 1.0)))
        surgery_day = admit  # same day
        discharge = surgery_day + los

        # ERAS preop compliance — ward and surgeon both modulate
        ward_boost = {"GS-A": 0.10, "GS-B": 0.05, "GS-C": -0.02, "GS-D": -0.08}[ward]
        surg_boost = {"PI-1": 0.10, "PI-2": 0.05, "PI-3": 0.0,
                      "PI-4": -0.05, "PI-5": -0.08}[surgeon]
        boost = ward_boost + surg_boost

        eras_vte = _maybe(rng, 0.88 + boost)
        eras_smoke = _maybe(rng, 0.72 + boost)
        eras_nutr = _maybe(rng, 0.85 + boost)
        eras_osa = _maybe(rng, 0.78 + boost)
        eras_carb = _maybe(rng, 0.55 + boost)
        eras_edu = _maybe(rng, 0.80 + boost)

        pt = Patient(
            patient_id=pid, age=age, sex=sex, bmi_pre=bmi,
            weight_pre_kg=weight, height_cm=height,
            t2dm=t2dm, htn=htn, osa=osa, masld=masld, dlp=dlp, gerd=gerd,
            procedure=proc, ward=ward, surgeon=surgeon,
            anesthesia=anesthesia, approach=approach,
            admit_day=admit, surgery_day=surgery_day,
            discharge_day=discharge, los_days=los,
            eras_vte_prophy_preop=eras_vte,
            eras_smoking_cess_4wk=eras_smoke,
            eras_nutrition_assess=eras_nutr,
            eras_osa_screen=eras_osa,
            eras_carb_loading=eras_carb,
            eras_preop_edu=eras_edu,
        )
        patients.append(pt)

        # Intraop
        op_dur = int(rng.gauss({"SG": 95, "RYGB": 140, "OAGB": 130,
                                "SADI": 170, "DJB": 145}[proc], 25))
        conversion = _maybe(rng, 0.015 if approach != "open" else 0.0)
        intraop.append(IntraopRow(
            patient_id=pid,
            duration_min=max(40, op_dur),
            conversion=conversion,
            eras_lap_or_robot=(approach in ("laparoscopic", "robotic") and not conversion),
            eras_glycemia_lt180=_maybe(rng, 0.84 + boost),
            eras_ppv_lung=_maybe(rng, 0.92 + boost),
            eras_normothermia=_maybe(rng, 0.95),
            eras_antiemetic=_maybe(rng, 0.90 + boost),
            ebl_ml=max(10, int(rng.gauss(60, 30))),
        ))

        # POD0-3 complications — procedure-specific baseline rates
        leak_p = {"RYGB": 0.012, "SG": 0.008, "OAGB": 0.014,
                  "SADI": 0.020, "DJB": 0.010}[proc]
        staple_leak_p = {"RYGB": 0.004, "SG": 0.015, "OAGB": 0.006,
                         "SADI": 0.010, "DJB": 0.005}[proc]
        leak = _maybe(rng, leak_p * (1.0 - 0.5 * boost))
        staple_leak = _maybe(rng, staple_leak_p * (1.0 - 0.5 * boost))
        bleeding = _maybe(rng, 0.022)
        vte = _maybe(rng, 0.010 * (1.0 if not eras_vte else 0.3))
        icu = _maybe(rng, 0.04 + (0.10 if leak or staple_leak else 0.0))
        reop_acute = leak or staple_leak or _maybe(rng, 0.005)
        stricture_early = _maybe(rng, 0.006 if proc == "SG" else 0.003)

        pod03.append(POD03Row(
            patient_id=pid, leak=leak, staple_leak=staple_leak,
            bleeding=bleeding, vte=vte, icu_admit=icu,
            reop_acute=reop_acute, stricture_early=stricture_early,
            eras_mob_pod1=_maybe(rng, 0.85 + boost),
            eras_oral_pod1=_maybe(rng, 0.75 + boost),
            eras_vte_postop=_maybe(rng, 0.92 + boost),
            eras_mm_analgesia=_maybe(rng, 0.80 + boost),
            eras_no_routine_ngt=_maybe(rng, 0.78 + boost),
        ))

        # POD4-30 complications
        marg_ulcer = _maybe(rng, 0.040 if proc == "RYGB" else
                            (0.020 if proc == "OAGB" else 0.010))
        stricture_late = _maybe(rng, 0.020 if proc == "SG" else 0.012)
        dumping_early = _maybe(rng, 0.18 if proc in ("RYGB", "OAGB") else
                               (0.10 if proc == "SG" else 0.12))
        dumping_late = _maybe(rng, 0.05 if proc in ("RYGB", "OAGB") else 0.02)
        pb_hypo_imm = _maybe(rng, 0.06 if proc in ("RYGB", "SG") else 0.04)
        wound_inf = _maybe(rng, 0.018)
        sbo = _maybe(rng, 0.012 if proc in ("RYGB", "OAGB", "SADI") else 0.004)
        bile_reflux = _maybe(rng, 0.05 if proc == "OAGB" else 0.01)
        malabs = _maybe(rng, 0.10 if proc in ("SADI", "DJB") else 0.02)
        prot_def = _maybe(rng, 0.08 if proc in ("SADI", "DJB") else 0.04)
        iron_def = _maybe(rng, 0.10 if proc in ("RYGB", "OAGB", "SADI") else 0.04)
        vit_def = _maybe(rng, 0.12 if proc in ("RYGB", "OAGB", "SADI", "DJB") else 0.05)

        ewl_pct = max(0.0, _round(rng.gauss(8.0, 2.5)))   # %TWL by POD30

        readmit = _maybe(rng, 0.06 + (0.30 if (leak or staple_leak) else 0.0)
                         + (0.05 if dumping_early else 0.0))
        readmit_day = rng.randint(2, 29) if readmit else None
        readmit_reason = (rng.choice(READMIT_REASONS) if readmit else None)
        if readmit and (leak or staple_leak):
            readmit_reason = "leak"

        pod430.append(POD430Row(
            patient_id=pid, marginal_ulcer=marg_ulcer,
            stricture_late=stricture_late,
            dumping_early=dumping_early, dumping_late=dumping_late,
            pb_hypo_immediate=pb_hypo_imm, wound_infection=wound_inf,
            sbo=sbo, bile_reflux=bile_reflux, malabsorption=malabs,
            protein_deficiency=prot_def, iron_deficiency=iron_def,
            vitamin_deficiency=vit_def, early_weight_loss_pct=ewl_pct,
            eras_protein_60g=_maybe(rng, 0.70 + boost),
            eras_multivitamin=_maybe(rng, 0.82 + boost),
            eras_iron_b12_pod30=_maybe(rng, 0.68 + boost),
            eras_weight_fu_pod30=_maybe(rng, 0.78 + boost),
            eras_ewl_5pct=(ewl_pct >= 5.0),
            readmit_30d=readmit, readmit_day=readmit_day,
            readmit_reason=readmit_reason,
        ))

        # 30-day mortality — very rare but elevated if leak
        died_30 = _maybe(rng, 0.0008 + (0.04 if (leak or staple_leak) else 0.0))
        pt.died_30d = died_30

        # POD0-90 outpatient follow-up
        v7 = _maybe(rng, 0.78 + boost)
        v30 = _maybe(rng, 0.85 + boost)
        v60 = _maybe(rng, 0.72 + boost)
        v90 = _maybe(rng, 0.68 + boost)
        weight90 = (_round(weight * (1.0 - rng.gauss(0.13, 0.04)), 1)
                    if v90 else None)
        bmi90 = _round(weight90 / (height / 100) ** 2, 1) if weight90 else None
        hba1c90 = (_round(max(4.8, rng.gauss(6.0 if t2dm else 5.4, 0.6)), 1)
                   if v90 else None)
        sbp90 = (int(rng.gauss(120 if htn else 115, 10)) if v90 else None)
        ldl90 = (int(rng.gauss(95 if dlp else 105, 22)) if v90 else None)
        osa_rec = (_maybe(rng, 0.55) if (osa and v90) else None)
        phq9_90 = (max(0, int(rng.gauss(4.5, 3.5))) if v90 else None)
        glp1_added = _maybe(rng, 0.15 if (t2dm and v90) else 0.06)
        reop_trig = _maybe(rng, 0.012 + (0.20 if (leak or staple_leak) else 0.0))

        pod90.append(POD90Row(
            patient_id=pid, visit_pod7=v7, visit_pod30=v30,
            visit_pod60=v60, visit_pod90=v90,
            weight_pod90_kg=weight90, bmi_pod90=bmi90,
            hba1c_pod90=hba1c90, sbp_pod90=sbp90, ldl_pod90=ldl90,
            osa_recovery=osa_rec, phq9_pod90=phq9_90,
            glp1_ra_added=glp1_added, reop_trigger=reop_trig,
        ))

        # Hypoglycemia events sprinkle
        # Immediate (POD0-3): per-patient ~0.06 events if RYGB/SG
        if pb_hypo_imm:
            n_ev = rng.randint(1, 3)
            for _ in range(n_ev):
                hypo.append(HypoEvent(
                    patient_id=pid,
                    study_day=rng.randint(0, 3),
                    time_bucket="POD0_3",
                    hypo_type="immediate",
                    glucose_mg_dl=_round(rng.uniform(45, 69), 1),
                    severity=("L1" if rng.random() < 0.7 else "L2"),
                ))
        # Delayed postprandial (POD30-90) — bigger for RYGB
        if proc in ("RYGB", "OAGB") and _maybe(rng, 0.10):
            n_ev = rng.randint(1, 4)
            for _ in range(n_ev):
                hypo.append(HypoEvent(
                    patient_id=pid,
                    study_day=rng.randint(30, 89),
                    time_bucket="POD30_90",
                    hypo_type="delayed_postprandial",
                    glucose_mg_dl=_round(rng.uniform(40, 68), 1),
                    severity=("L1" if rng.random() < 0.55 else
                              ("L2" if rng.random() < 0.85 else "L3")),
                ))
        # Dumping early/late within POD4-30
        if dumping_early:
            hypo.append(HypoEvent(
                patient_id=pid,
                study_day=rng.randint(4, 30),
                time_bucket="POD4_30",
                hypo_type="dumping_early",
                glucose_mg_dl=_round(rng.uniform(60, 90), 1),
                severity="L1",
            ))
        if dumping_late:
            hypo.append(HypoEvent(
                patient_id=pid,
                study_day=rng.randint(15, 30),
                time_bucket="POD4_30",
                hypo_type="dumping_late",
                glucose_mg_dl=_round(rng.uniform(50, 75), 1),
                severity=("L1" if rng.random() < 0.7 else "L2"),
            ))

    # ---- Write CSVs ----
    _write_patients(os.path.join(out_dir, "patients.csv"), patients)
    _write_intraop(os.path.join(out_dir, "intraop.csv"), intraop)
    _write_pod03(os.path.join(out_dir, "pod0_3.csv"), pod03)
    _write_pod430(os.path.join(out_dir, "pod4_30.csv"), pod430)
    _write_pod90(os.path.join(out_dir, "pod90_outpt.csv"), pod90)
    _write_hypo(os.path.join(out_dir, "hypo_events.csv"), hypo)

    return IngestReport(
        n_patients=len(patients),
        n_intraop=len(intraop),
        n_pod03=len(pod03),
        n_pod430=len(pod430),
        n_pod90=len(pod90),
        n_hypo=len(hypo),
        deid_method="SHA-256 trunc(8) + date-shift",
        date_shift_offset_days=date_shift,
        notes=[
            f"procedures: {sorted(set(p.procedure for p in patients))}",
            f"wards: {sorted(set(p.ward for p in patients))}",
            f"surgeons: {sorted(set(p.surgeon for p in patients))}",
            f"died_30d: {sum(1 for p in patients if p.died_30d)} / {len(patients)}",
        ],
    )


# --------------------------------------------------------------------------- #
# CSV writers
# --------------------------------------------------------------------------- #

def _write_csv(path: str, header: List[str], rows: List[List[Any]]) -> None:
    with open(path, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(header)
        for r in rows:
            w.writerow(r)


def _b(x: bool) -> int:
    return 1 if x else 0


def _write_patients(path: str, rows: List[Patient]) -> None:
    header = [
        "patient_id", "age", "sex", "bmi_pre", "weight_pre_kg", "height_cm",
        "t2dm", "htn", "osa", "masld", "dlp", "gerd",
        "procedure", "ward", "surgeon", "anesthesia", "approach",
        "admit_day", "surgery_day", "discharge_day", "los_days",
        "eras_vte_prophy_preop", "eras_smoking_cess_4wk",
        "eras_nutrition_assess", "eras_osa_screen",
        "eras_carb_loading", "eras_preop_edu", "died_30d",
    ]
    out = [[r.patient_id, r.age, r.sex, r.bmi_pre, r.weight_pre_kg, r.height_cm,
            _b(r.t2dm), _b(r.htn), _b(r.osa), _b(r.masld), _b(r.dlp), _b(r.gerd),
            r.procedure, r.ward, r.surgeon, r.anesthesia, r.approach,
            r.admit_day, r.surgery_day, r.discharge_day, r.los_days,
            _b(r.eras_vte_prophy_preop), _b(r.eras_smoking_cess_4wk),
            _b(r.eras_nutrition_assess), _b(r.eras_osa_screen),
            _b(r.eras_carb_loading), _b(r.eras_preop_edu), _b(r.died_30d)]
           for r in rows]
    _write_csv(path, header, out)


def _write_intraop(path: str, rows: List[IntraopRow]) -> None:
    header = [
        "patient_id", "duration_min", "conversion",
        "eras_lap_or_robot", "eras_glycemia_lt180",
        "eras_ppv_lung", "eras_normothermia", "eras_antiemetic", "ebl_ml",
    ]
    out = [[r.patient_id, r.duration_min, _b(r.conversion),
            _b(r.eras_lap_or_robot), _b(r.eras_glycemia_lt180),
            _b(r.eras_ppv_lung), _b(r.eras_normothermia),
            _b(r.eras_antiemetic), r.ebl_ml] for r in rows]
    _write_csv(path, header, out)


def _write_pod03(path: str, rows: List[POD03Row]) -> None:
    header = [
        "patient_id", "leak", "staple_leak", "bleeding", "vte",
        "icu_admit", "reop_acute", "stricture_early",
        "eras_mob_pod1", "eras_oral_pod1", "eras_vte_postop",
        "eras_mm_analgesia", "eras_no_routine_ngt",
    ]
    out = [[r.patient_id, _b(r.leak), _b(r.staple_leak), _b(r.bleeding),
            _b(r.vte), _b(r.icu_admit), _b(r.reop_acute),
            _b(r.stricture_early), _b(r.eras_mob_pod1), _b(r.eras_oral_pod1),
            _b(r.eras_vte_postop), _b(r.eras_mm_analgesia),
            _b(r.eras_no_routine_ngt)] for r in rows]
    _write_csv(path, header, out)


def _write_pod430(path: str, rows: List[POD430Row]) -> None:
    header = [
        "patient_id", "marginal_ulcer", "stricture_late", "dumping_early",
        "dumping_late", "pb_hypo_immediate", "wound_infection", "sbo",
        "bile_reflux", "malabsorption", "protein_deficiency",
        "iron_deficiency", "vitamin_deficiency", "early_weight_loss_pct",
        "eras_protein_60g", "eras_multivitamin", "eras_iron_b12_pod30",
        "eras_weight_fu_pod30", "eras_ewl_5pct",
        "readmit_30d", "readmit_day", "readmit_reason",
    ]
    out = [[r.patient_id, _b(r.marginal_ulcer), _b(r.stricture_late),
            _b(r.dumping_early), _b(r.dumping_late),
            _b(r.pb_hypo_immediate), _b(r.wound_infection), _b(r.sbo),
            _b(r.bile_reflux), _b(r.malabsorption),
            _b(r.protein_deficiency), _b(r.iron_deficiency),
            _b(r.vitamin_deficiency), r.early_weight_loss_pct,
            _b(r.eras_protein_60g), _b(r.eras_multivitamin),
            _b(r.eras_iron_b12_pod30), _b(r.eras_weight_fu_pod30),
            _b(r.eras_ewl_5pct), _b(r.readmit_30d),
            (r.readmit_day if r.readmit_day is not None else ""),
            (r.readmit_reason if r.readmit_reason else "")]
           for r in rows]
    _write_csv(path, header, out)


def _write_pod90(path: str, rows: List[POD90Row]) -> None:
    header = [
        "patient_id", "visit_pod7", "visit_pod30", "visit_pod60", "visit_pod90",
        "weight_pod90_kg", "bmi_pod90", "hba1c_pod90", "sbp_pod90", "ldl_pod90",
        "osa_recovery", "phq9_pod90", "glp1_ra_added", "reop_trigger",
    ]
    def _v(x):
        return "" if x is None else (1 if isinstance(x, bool) and x else
                                     0 if isinstance(x, bool) else x)
    out = [[r.patient_id, _b(r.visit_pod7), _b(r.visit_pod30),
            _b(r.visit_pod60), _b(r.visit_pod90),
            _v(r.weight_pod90_kg), _v(r.bmi_pod90), _v(r.hba1c_pod90),
            _v(r.sbp_pod90), _v(r.ldl_pod90), _v(r.osa_recovery),
            _v(r.phq9_pod90), _b(r.glp1_ra_added), _b(r.reop_trigger)]
           for r in rows]
    _write_csv(path, header, out)


def _write_hypo(path: str, rows: List[HypoEvent]) -> None:
    header = ["patient_id", "study_day", "time_bucket",
              "hypo_type", "glucose_mg_dl", "severity"]
    out = [[r.patient_id, r.study_day, r.time_bucket, r.hypo_type,
            r.glucose_mg_dl, r.severity] for r in rows]
    _write_csv(path, header, out)


# --------------------------------------------------------------------------- #
# CSV loaders
# --------------------------------------------------------------------------- #

def _read_csv(path: str) -> List[Dict[str, str]]:
    with open(path, encoding="utf-8") as f:
        return list(csv.DictReader(f))


def _to_bool(x: str) -> bool:
    return str(x).strip() in ("1", "True", "true", "y", "Y")


def _to_int(x: str, default: int = 0) -> int:
    try:
        return int(float(x))
    except (TypeError, ValueError):
        return default


def _to_float(x: str, default: float = 0.0) -> float:
    try:
        return float(x)
    except (TypeError, ValueError):
        return default


def _to_opt_float(x: str) -> Optional[float]:
    if x is None or str(x).strip() == "":
        return None
    try:
        return float(x)
    except ValueError:
        return None


def _to_opt_int(x: str) -> Optional[int]:
    if x is None or str(x).strip() == "":
        return None
    try:
        return int(float(x))
    except ValueError:
        return None


def _to_opt_bool(x: str) -> Optional[bool]:
    if x is None or str(x).strip() == "":
        return None
    return _to_bool(x)


def load_all(data_dir: str) -> Tuple[List[Patient], List[IntraopRow],
                                     List[POD03Row], List[POD430Row],
                                     List[POD90Row], List[HypoEvent],
                                     IngestReport]:
    patients = [
        Patient(
            patient_id=r["patient_id"], age=_to_int(r["age"]),
            sex=r["sex"], bmi_pre=_to_float(r["bmi_pre"]),
            weight_pre_kg=_to_float(r["weight_pre_kg"]),
            height_cm=_to_float(r["height_cm"]),
            t2dm=_to_bool(r["t2dm"]), htn=_to_bool(r["htn"]),
            osa=_to_bool(r["osa"]), masld=_to_bool(r["masld"]),
            dlp=_to_bool(r["dlp"]), gerd=_to_bool(r["gerd"]),
            procedure=r["procedure"], ward=r["ward"],
            surgeon=r["surgeon"], anesthesia=r["anesthesia"],
            approach=r["approach"],
            admit_day=_to_int(r["admit_day"]),
            surgery_day=_to_int(r["surgery_day"]),
            discharge_day=_to_int(r["discharge_day"]),
            los_days=_to_int(r["los_days"]),
            eras_vte_prophy_preop=_to_bool(r["eras_vte_prophy_preop"]),
            eras_smoking_cess_4wk=_to_bool(r["eras_smoking_cess_4wk"]),
            eras_nutrition_assess=_to_bool(r["eras_nutrition_assess"]),
            eras_osa_screen=_to_bool(r["eras_osa_screen"]),
            eras_carb_loading=_to_bool(r["eras_carb_loading"]),
            eras_preop_edu=_to_bool(r["eras_preop_edu"]),
            died_30d=_to_bool(r["died_30d"]),
        )
        for r in _read_csv(os.path.join(data_dir, "patients.csv"))
    ]

    intraop = [
        IntraopRow(
            patient_id=r["patient_id"],
            duration_min=_to_int(r["duration_min"]),
            conversion=_to_bool(r["conversion"]),
            eras_lap_or_robot=_to_bool(r["eras_lap_or_robot"]),
            eras_glycemia_lt180=_to_bool(r["eras_glycemia_lt180"]),
            eras_ppv_lung=_to_bool(r["eras_ppv_lung"]),
            eras_normothermia=_to_bool(r["eras_normothermia"]),
            eras_antiemetic=_to_bool(r["eras_antiemetic"]),
            ebl_ml=_to_int(r["ebl_ml"]),
        ) for r in _read_csv(os.path.join(data_dir, "intraop.csv"))
    ]

    pod03 = [
        POD03Row(
            patient_id=r["patient_id"],
            leak=_to_bool(r["leak"]), staple_leak=_to_bool(r["staple_leak"]),
            bleeding=_to_bool(r["bleeding"]), vte=_to_bool(r["vte"]),
            icu_admit=_to_bool(r["icu_admit"]),
            reop_acute=_to_bool(r["reop_acute"]),
            stricture_early=_to_bool(r["stricture_early"]),
            eras_mob_pod1=_to_bool(r["eras_mob_pod1"]),
            eras_oral_pod1=_to_bool(r["eras_oral_pod1"]),
            eras_vte_postop=_to_bool(r["eras_vte_postop"]),
            eras_mm_analgesia=_to_bool(r["eras_mm_analgesia"]),
            eras_no_routine_ngt=_to_bool(r["eras_no_routine_ngt"]),
        ) for r in _read_csv(os.path.join(data_dir, "pod0_3.csv"))
    ]

    pod430 = [
        POD430Row(
            patient_id=r["patient_id"],
            marginal_ulcer=_to_bool(r["marginal_ulcer"]),
            stricture_late=_to_bool(r["stricture_late"]),
            dumping_early=_to_bool(r["dumping_early"]),
            dumping_late=_to_bool(r["dumping_late"]),
            pb_hypo_immediate=_to_bool(r["pb_hypo_immediate"]),
            wound_infection=_to_bool(r["wound_infection"]),
            sbo=_to_bool(r["sbo"]),
            bile_reflux=_to_bool(r["bile_reflux"]),
            malabsorption=_to_bool(r["malabsorption"]),
            protein_deficiency=_to_bool(r["protein_deficiency"]),
            iron_deficiency=_to_bool(r["iron_deficiency"]),
            vitamin_deficiency=_to_bool(r["vitamin_deficiency"]),
            early_weight_loss_pct=_to_float(r["early_weight_loss_pct"]),
            eras_protein_60g=_to_bool(r["eras_protein_60g"]),
            eras_multivitamin=_to_bool(r["eras_multivitamin"]),
            eras_iron_b12_pod30=_to_bool(r["eras_iron_b12_pod30"]),
            eras_weight_fu_pod30=_to_bool(r["eras_weight_fu_pod30"]),
            eras_ewl_5pct=_to_bool(r["eras_ewl_5pct"]),
            readmit_30d=_to_bool(r["readmit_30d"]),
            readmit_day=_to_opt_int(r.get("readmit_day", "")),
            readmit_reason=(r.get("readmit_reason") or None) or None,
        ) for r in _read_csv(os.path.join(data_dir, "pod4_30.csv"))
    ]

    pod90 = [
        POD90Row(
            patient_id=r["patient_id"],
            visit_pod7=_to_bool(r["visit_pod7"]),
            visit_pod30=_to_bool(r["visit_pod30"]),
            visit_pod60=_to_bool(r["visit_pod60"]),
            visit_pod90=_to_bool(r["visit_pod90"]),
            weight_pod90_kg=_to_opt_float(r["weight_pod90_kg"]),
            bmi_pod90=_to_opt_float(r["bmi_pod90"]),
            hba1c_pod90=_to_opt_float(r["hba1c_pod90"]),
            sbp_pod90=_to_opt_int(r["sbp_pod90"]),
            ldl_pod90=_to_opt_int(r["ldl_pod90"]),
            osa_recovery=_to_opt_bool(r["osa_recovery"]),
            phq9_pod90=_to_opt_int(r["phq9_pod90"]),
            glp1_ra_added=_to_bool(r["glp1_ra_added"]),
            reop_trigger=_to_bool(r["reop_trigger"]),
        ) for r in _read_csv(os.path.join(data_dir, "pod90_outpt.csv"))
    ]

    hypo = [
        HypoEvent(
            patient_id=r["patient_id"],
            study_day=_to_int(r["study_day"]),
            time_bucket=r["time_bucket"],
            hypo_type=r["hypo_type"],
            glucose_mg_dl=_to_float(r["glucose_mg_dl"]),
            severity=r["severity"],
        ) for r in _read_csv(os.path.join(data_dir, "hypo_events.csv"))
    ]

    rep = IngestReport(
        n_patients=len(patients), n_intraop=len(intraop),
        n_pod03=len(pod03), n_pod430=len(pod430),
        n_pod90=len(pod90), n_hypo=len(hypo),
        deid_method="SHA-256 trunc(8) + date-shift (loaded)",
        date_shift_offset_days=-1,
        notes=["loaded from CSV (de-id already applied at export)"],
    )
    return patients, intraop, pod03, pod430, pod90, hypo, rep
