#!/usr/bin/env python3
"""MASH-DILISurveil-Kor CLI entrypoint.

오프라인 standalone. `python3 main.py --demo` 로 합성 데이터를 생성하고
end-to-end 안전성 시그널 파이프라인을 실행한다.

참고용·연구용. Not for clinical decision.
"""
from __future__ import annotations

import argparse
import csv
import json
import random
import sys
from pathlib import Path
from typing import Dict, List

ROOT = Path(__file__).resolve().parent
sys.path.insert(0, str(ROOT))

from modules import ingest, hys_law, rucam, class_panel, report  # noqa: E402


DISCLAIMER = (
    "[DISCLAIMER] 본 도구는 참고용·연구용입니다. 임상적 의사결정·규제 제출에 "
    "그대로 사용해서는 안 됩니다. placebo arm reference는 mock 값입니다."
)


# --------- synthetic data generation ---------
def _gen_lft_row(rng: random.Random, pid: str, arm: str, drug_class: str,
                 weeks: List[int], baseline_alt: float, drug_hepatotoxic_p: float,
                 baseline_tbl: float = 0.7) -> List[Dict]:
    rows = []
    alt = baseline_alt
    tbl = baseline_tbl
    alp = rng.uniform(85, 120)
    ast = baseline_alt * 0.85
    inr = 1.0
    alb = rng.uniform(3.8, 4.6)
    plt = rng.uniform(180, 280)
    hepatotoxic_event = rng.random() < drug_hepatotoxic_p
    # Temple corollary (ALT↑ TBL 정상)은 drug arm에서 hepatotoxic event 6배 빈도로 발생
    temple_event = (not hepatotoxic_event) and arm != "placebo" \
        and rng.random() < drug_hepatotoxic_p * 6
    event_week = rng.choice([12, 24, 36, 48]) \
        if (hepatotoxic_event or temple_event) else None
    for wk in weeks:
        on_event = (hepatotoxic_event or temple_event) and event_week is not None \
            and event_week <= wk <= event_week + 12
        if on_event and hepatotoxic_event:
            alt = baseline_alt * rng.uniform(3.5, 6.5)
            tbl = rng.uniform(2.2, 4.5)
            ast = alt * 0.9
            alp = rng.uniform(110, 180)
        elif on_event and temple_event:
            # ALT only — TBL stays normal
            alt = baseline_alt * rng.uniform(3.2, 5.0)
            tbl = baseline_tbl * rng.uniform(0.9, 1.4)
            ast = alt * 0.85
            alp = rng.uniform(100, 140)
        else:
            # 대부분 시간에서 baseline 근처로 회귀, 약간의 drift
            alt = baseline_alt * rng.uniform(0.6, 1.3)
            tbl = baseline_tbl * rng.uniform(0.8, 1.3)
            ast = alt * rng.uniform(0.7, 1.0)
            alp = max(60, alp + rng.uniform(-8, 8))
        rows.append({
            "pid": pid, "arm": arm, "drug_class": drug_class, "week": wk,
            "ALT": round(alt, 1),
            "AST": round(ast, 1),
            "ALP": round(alp, 1),
            "TBL": round(tbl, 2),
            "INR": round(inr + rng.uniform(-0.05, 0.05), 2),
            "ALB": round(alb + rng.uniform(-0.1, 0.1), 2),
            "PLT": round(plt + rng.uniform(-15, 15), 1),
        })
    return rows


def gen_synthetic_data(out_dir: Path, n: int = 600, seed: int = 42) -> Dict[str, Path]:
    rng = random.Random(seed)
    out_dir.mkdir(parents=True, exist_ok=True)
    # Hy's law positive case 발생률 (drug arm 합산 ~3-5건 + Temple corollary 동반)
    arms_classes = [
        ("THR-beta", "THRb", 0.025),
        ("FGF21", "FGF21", 0.018),
        ("ACC", "ACC", 0.020),
        ("FXR", "FXR", 0.020),
        ("placebo", "placebo", 0.002),
    ]
    weeks = [0, 4, 8, 12, 24, 36, 48, 60, 72, 84]
    lft_rows: List[Dict] = []
    panel_rows: Dict[str, List[Dict]] = {
        "THRb": [], "FGF21": [], "ACC": [], "FXR": [],
    }
    per_arm = n // len(arms_classes)
    for arm, dclass, hp in arms_classes:
        for i in range(per_arm):
            pid = f"{dclass[:3].upper()}-{i:04d}"
            baseline_alt = rng.uniform(45, 90)  # MASH baseline
            baseline_tbl = rng.uniform(0.5, 1.0)
            lft_rows.extend(_gen_lft_row(rng, pid, arm, dclass, weeks,
                                         baseline_alt, hp, baseline_tbl))
            if dclass == "THRb":
                _gen_thrb_panel(rng, pid, panel_rows["THRb"])
            elif dclass == "FGF21":
                _gen_fgf21_panel(rng, pid, panel_rows["FGF21"])
            elif dclass == "ACC":
                _gen_acc_panel(rng, pid, panel_rows["ACC"])
            elif dclass == "FXR":
                _gen_fxr_panel(rng, pid, panel_rows["FXR"])

    # write LFT
    lft_path = out_dir / "synthetic_lft.csv"
    fields = ["pid", "arm", "drug_class", "week",
              "ALT", "AST", "ALP", "TBL", "INR", "ALB", "PLT"]
    with lft_path.open("w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=fields)
        w.writeheader()
        w.writerows(lft_rows)

    # panels
    panel_paths: Dict[str, Path] = {}
    for cls, rows in panel_rows.items():
        path = out_dir / f"synthetic_{cls.lower()}_panel.csv"
        with path.open("w", newline="", encoding="utf-8") as f:
            w = csv.DictWriter(f, fieldnames=["pid", "week", "marker", "value"])
            w.writeheader()
            w.writerows(rows)
        panel_paths[cls] = path

    # placebo reference
    placebo_path = out_dir / "placebo_reference.csv"
    with placebo_path.open("w", newline="", encoding="utf-8") as f:
        w = csv.DictWriter(f, fieldnames=[
            "trial", "n", "week_horizon",
            "classical_hys_rate", "temple_rate",
            "mean_alt_change_pct"])
        w.writeheader()
        # MAESTRO-NASH·ENLIGHTEN·SYMMETRY·CONTROL·ESSENCE 공개값을 단순화한 mock
        ref = [
            {"trial": "MAESTRO-NASH-placebo", "n": 318, "week_horizon": 52,
             "classical_hys_rate": 0.003, "temple_rate": 0.018,
             "mean_alt_change_pct": -2.1},
            {"trial": "ENLIGHTEN-placebo", "n": 220, "week_horizon": 48,
             "classical_hys_rate": 0.000, "temple_rate": 0.022,
             "mean_alt_change_pct": -1.5},
            {"trial": "SYMMETRY-placebo", "n": 195, "week_horizon": 48,
             "classical_hys_rate": 0.005, "temple_rate": 0.030,
             "mean_alt_change_pct": -3.4},
            {"trial": "CONTROL-placebo", "n": 142, "week_horizon": 48,
             "classical_hys_rate": 0.000, "temple_rate": 0.014,
             "mean_alt_change_pct": -0.8},
            {"trial": "ESSENCE-placebo", "n": 380, "week_horizon": 72,
             "classical_hys_rate": 0.004, "temple_rate": 0.025,
             "mean_alt_change_pct": -2.0},
        ]
        w.writerows(ref)

    # charter example
    charter_path = out_dir / "charter_example.yaml"
    charter_path.write_text(
        "drug: STUDY-DRUG-001\n"
        "drug_classes:\n"
        "  - THRb\n  - FGF21\n  - ACC\n  - FXR\n"
        "monitoring_windows:\n"
        "  baseline: 0\n"
        "  scheduled_weeks: [4, 12, 24, 48, 72]\n"
        "  unscheduled_trigger:\n"
        "    ALT_x_baseline: 3\n"
        "    TBL_x_uln: 2\n"
        "placebo_arm_reference:\n"
        "  - MAESTRO-NASH-placebo\n  - ENLIGHTEN-placebo\n  - SYMMETRY-placebo\n"
        "  - CONTROL-placebo\n  - ESSENCE-placebo\n"
        "disclaimers:\n"
        "  - 본 차터는 참고용 예시이며 실제 시험 차터를 대체하지 않습니다.\n",
        encoding="utf-8",
    )

    return {
        "lft": lft_path,
        "placebo": placebo_path,
        "charter": charter_path,
        **{f"panel_{k.lower()}": v for k, v in panel_paths.items()},
    }


def _gen_thrb_panel(rng, pid, rows):
    weeks = [0, 12, 24, 48]
    tsh_b = rng.uniform(1.2, 2.4)
    igf1_b = rng.uniform(120, 200)
    shbg_b = rng.uniform(25, 45)
    hr_b = rng.uniform(68, 78)
    for w in weeks:
        suppress = 1.0 if w == 0 else rng.uniform(0.55, 0.85)
        rows.append({"pid": pid, "week": w, "marker": "TSH",
                     "value": round(tsh_b * suppress, 2)})
        rows.append({"pid": pid, "week": w, "marker": "T3",
                     "value": round(rng.uniform(95, 130), 1)})
        rows.append({"pid": pid, "week": w, "marker": "T4",
                     "value": round(rng.uniform(6.5, 9.5), 2)})
        rows.append({"pid": pid, "week": w, "marker": "SHBG",
                     "value": round(shbg_b * (1.0 if w == 0 else rng.uniform(1.2, 1.6)), 1)})
        rows.append({"pid": pid, "week": w, "marker": "HR",
                     "value": round(hr_b + (0 if w == 0 else rng.uniform(2, 10)), 1)})
        rows.append({"pid": pid, "week": w, "marker": "prolactin",
                     "value": round(rng.uniform(8, 15), 2)})


def _gen_fgf21_panel(rng, pid, rows):
    weeks = [0, 12, 24, 48]
    igf1_b = rng.uniform(120, 200)
    p1np_b = rng.uniform(40, 70)
    ctx_b = rng.uniform(0.25, 0.55)
    ua_b = rng.uniform(4.5, 6.5)
    for w in weeks:
        if w == 0:
            r = 1.0
        else:
            r = rng.uniform(0.6, 0.85)
        rows.append({"pid": pid, "week": w, "marker": "IGF1",
                     "value": round(igf1_b * r, 1)})
        rows.append({"pid": pid, "week": w, "marker": "P1NP",
                     "value": round(p1np_b * (1.0 if w == 0 else rng.uniform(0.7, 0.9)), 2)})
        rows.append({"pid": pid, "week": w, "marker": "CTX",
                     "value": round(ctx_b * (1.0 if w == 0 else rng.uniform(1.1, 1.4)), 3)})
        rows.append({"pid": pid, "week": w, "marker": "uric_acid",
                     "value": round(ua_b * (1.0 if w == 0 else rng.uniform(1.05, 1.25)), 2)})
        rows.append({"pid": pid, "week": w, "marker": "GH",
                     "value": round(rng.uniform(0.3, 3.5), 2)})


def _gen_acc_panel(rng, pid, rows):
    weeks = [0, 12, 24, 48]
    tg_b = rng.uniform(120, 180)
    hdl_b = rng.uniform(40, 55)
    for w in weeks:
        rows.append({"pid": pid, "week": w, "marker": "TG",
                     "value": round(tg_b * (1.0 if w == 0 else rng.uniform(1.2, 1.7)), 1)})
        rows.append({"pid": pid, "week": w, "marker": "HDL",
                     "value": round(hdl_b * (1.0 if w == 0 else rng.uniform(0.85, 1.0)), 1)})


def _gen_fxr_panel(rng, pid, rows):
    weeks = [0, 12, 24, 48]
    ldl_b = rng.uniform(95, 130)
    alp_b = rng.uniform(95, 130)
    for w in weeks:
        rows.append({"pid": pid, "week": w, "marker": "LDL",
                     "value": round(ldl_b * (1.0 if w == 0 else rng.uniform(1.1, 1.3)), 1)})
        rows.append({"pid": pid, "week": w, "marker": "pruritus_VAS",
                     "value": round(0 if w == 0 else rng.uniform(2, 7), 1)})
        rows.append({"pid": pid, "week": w, "marker": "ALP",
                     "value": round(alp_b * (1.0 if w == 0 else rng.uniform(1.0, 1.25)), 1)})


# --------- pipeline ---------
def run_pipeline(data_dir: Path, report_dir: Path, quarter: str = "Q1") -> Dict:
    patients = ingest.load_lft_csv(data_dir / "synthetic_lft.csv")
    for cls in ("THRb", "FGF21", "ACC", "FXR"):
        ingest.load_panel_csv(data_dir / f"synthetic_{cls.lower()}_panel.csv",
                              patients, cls)
    cases = hys_law.evaluate_patients(patients)
    rucam_inputs = rucam.derive_inputs_from_cases(cases)
    rucam_results = rucam.evaluate_batch(rucam_inputs, cases)
    class_signals = class_panel.evaluate_all(patients)
    placebo_ref = report.load_placebo_reference(data_dir / "placebo_reference.csv")
    dsc = report.build_dsc_report(cases, rucam_results, class_signals,
                                  placebo_ref, quarter=quarter,
                                  drug_label="STUDY-DRUG-001")
    report.export_dsc_json(dsc, report_dir / f"DSC_{quarter}.json")
    report.export_cases_csv(cases, rucam_results,
                            report_dir / f"cases_{quarter}.csv")
    report.export_docx(dsc, report_dir / f"DSC_{quarter}.docx", doc_type="DSC")
    report.export_docx(dsc, report_dir / f"RMP_{quarter}.docx", doc_type="RMP")
    report.export_docx(dsc, report_dir / f"DSUR_{quarter}.docx", doc_type="DSUR")
    report.export_docx(dsc, report_dir / f"PSUR_{quarter}.docx", doc_type="PSUR")
    report.build_manuscript_supplement(cases, rucam_results, class_signals,
                                       report_dir / "supplementary.md")
    edish_txt = hys_law.render_edish_ascii(cases)
    (report_dir / "edish_ascii.txt").write_text(edish_txt, encoding="utf-8")
    return {
        "n_patients": len(patients),
        "n_cases": len(cases),
        "hys": hys_law.summarize_hys(cases),
        "rucam_categories": dsc["rucam_category_counts"],
        "placebo_attribution": dsc["placebo_attribution"],
        "class_flags": dsc["class_signal_flag_counts"],
    }


def main(argv: List[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        prog="mash-dili-surveil-kor",
        description=(
            "MASH/MASLD trial DILI surveillance: baseline-adjusted Hy's law, "
            "RUCAM/CIOMS causality, class-effect panel, placebo-attributable signal."
        ),
    )
    parser.add_argument("--demo", action="store_true",
                        help="합성 데이터 생성 후 end-to-end 파이프라인 실행")
    parser.add_argument("--generate-only", action="store_true",
                        help="합성 데이터만 생성")
    parser.add_argument("--analyze", action="store_true",
                        help="이미 생성된 data/ 로 분석만 수행")
    parser.add_argument("--data-dir", default=str(ROOT / "data"))
    parser.add_argument("--report-dir", default=str(ROOT / "reports"))
    parser.add_argument("--n", type=int, default=600)
    parser.add_argument("--seed", type=int, default=42)
    parser.add_argument("--quarter", default="Q1")
    args = parser.parse_args(argv)

    print(DISCLAIMER)
    data_dir = Path(args.data_dir)
    report_dir = Path(args.report_dir)

    if args.demo or args.generate_only:
        print(f"[INFO] generating synthetic data N={args.n} seed={args.seed}")
        paths = gen_synthetic_data(data_dir, n=args.n, seed=args.seed)
        for k, v in paths.items():
            print(f"  - {k}: {v}")
        if args.generate_only:
            return 0

    if args.demo or args.analyze:
        if not (data_dir / "synthetic_lft.csv").exists():
            print("[ERROR] synthetic_lft.csv missing. Run with --demo first.",
                  file=sys.stderr)
            return 2
        print(f"[INFO] running pipeline data={data_dir} report={report_dir}")
        summary = run_pipeline(data_dir, report_dir, quarter=args.quarter)
        print("[SUMMARY]")
        print(json.dumps(summary, ensure_ascii=False, indent=2))
        return 0

    parser.print_help()
    return 0


if __name__ == "__main__":
    sys.exit(main())