#!/usr/bin/env python3
"""
WtLossSurrogate-Kor (웨이트로스서로게이트코어)
=============================================
Domain   : Obesity (비만대사질환)
Category : 연구 아이디어 생성 (research-hypothesis generation)

A standalone, OFFLINE tool that ingests trial-level (%weight-loss, Δhard-outcome)
effect-size pairs from anti-obesity RCTs and computes by-drug-class TRIAL-LEVEL
SURROGACY (R²_trial, surrogate threshold effect [STE], dose–response surrogacy,
proportion of treatment effect explained [PTE] = weight-mediated fraction) via
weighted meta-regression. It separates weight-mediated vs weight-independent
effects, auto-flags unvalidated surrogate–outcome–class pairs, and generates
validation-study hypotheses with suggested sample sizes.

⚠️  연구용·참고용 (research/reference use only) — not for clinical decision-making.
    All bundled effect sizes are illustrative/synthetic, NOT official readouts.

CLI examples
------------
    python3 main.py --help
    python3 main.py                       # useful default summary
    python3 main.py --surrogacy           # by-class R²_trial / STE / PTE / grade
    python3 main.py --dose-response       # weight-loss bin vs hard benefit
    python3 main.py --paradox             # surrogate-paradox flags
    python3 main.py --gaps                # mined under-validated pairs
    python3 main.py --hypotheses          # validation hypotheses + sample sizes
    python3 main.py --all                 # everything
    python3 main.py --data my.csv --top 10
"""

from __future__ import annotations

import argparse
import math
import sys

import dataio
import surrogacy as sg


# --------------------------------------------------------------------------- #
# Pretty-printing helpers (pure stdlib, no external table deps)
# --------------------------------------------------------------------------- #

def _fmt(v, nd=2, dash="—"):
    if v is None:
        return dash
    if isinstance(v, float) and math.isnan(v):
        return dash
    if isinstance(v, float):
        return f"{v:.{nd}f}"
    return str(v)


def _table(rows, headers):
    cols = list(zip(*([headers] + rows))) if rows else [[h] for h in headers]
    widths = [max(len(str(c)) for c in col) for col in cols]
    line = "  ".join(h.ljust(widths[i]) for i, h in enumerate(headers))
    out = [line, "  ".join("-" * widths[i] for i in range(len(headers)))]
    for r in rows:
        out.append("  ".join(str(c).ljust(widths[i]) for i, c in enumerate(r)))
    return "\n".join(out)


def _hdr(title):
    bar = "=" * 76
    return f"\n{bar}\n{title}\n{bar}"


def banner():
    print("=" * 76)
    print("WtLossSurrogate-Kor (웨이트로스서로게이트코어)")
    print("Obesity (비만대사질환)  |  연구 아이디어 생성 (research-hypothesis)")
    print("-" * 76)
    print(sg.DISCLAIMER)
    print("=" * 76)


# --------------------------------------------------------------------------- #
# Sections
# --------------------------------------------------------------------------- #

def section_summary(df):
    c = dataio.summary_counts(df)
    print(_hdr("DATA SUMMARY"))
    print(f"source        : {df.attrs.get('source_path')}")
    print(f"trials loaded : {c['n_trials']}  "
          f"(dropped {df.attrs.get('dropped_rows', 0)} invalid)")
    print(f"drug classes  : {c['n_classes']}  -> {', '.join(c['classes'])}")
    print(f"hard outcomes : {c['n_outcomes']}  -> {', '.join(c['outcomes'])}")


def section_surrogacy(df, top=None):
    print(_hdr("TRIAL-LEVEL SURROGACY  (R²_trial / STE / PTE / grade)"))
    print("Surrogate = % body-weight change ; hard outcome on log-HR scale "
          "(negative = benefit).")
    print("STE = % weight-loss at which the predicted benefit becomes credible "
          "(upper 95% band < null).")
    print("PTE = weight-mediated fraction of the hard-outcome benefit.\n")

    res = [r for r in sg.all_surrogacy(df) if r.n_trials > 0]
    # show informative cells first: regressable, then sorted by grade strength
    grade_order = {"strong": 0, "moderate": 1, "weak": 2, "invalid": 3,
                   "insufficient": 4}
    res.sort(key=lambda r: (grade_order.get(r.grade, 9),
                            -(0 if math.isnan(r.r2_trial) else r.r2_trial)))
    if top:
        res = res[:top]

    rows = []
    for r in res:
        r2 = _fmt(r.r2_trial)
        ci = (f"[{_fmt(r.r2_ci_low)},{_fmt(r.r2_ci_high)}]"
              if not math.isnan(r.r2_trial) else "—")
        ste = _fmt(r.ste, 1) + ("%" if r.ste is not None else "")
        pte = (_fmt(r.pte) if r.pte is not None else "—")
        pteflag = "" if r.pte_flag in ("ok", "") else f" ({r.pte_flag})"
        rows.append([
            r.drug_class, r.hard_outcome, str(r.n_trials),
            r2, ci, ste, pte + pteflag, r.grade.upper(),
            "PARADOX" if r.paradox else "",
        ])
    print(_table(rows, ["class", "outcome", "k", "R2_trial", "95%CI",
                        "STE", "PTE", "grade", "flag"]))

    # spotlight the central debate
    sel = [r for r in res if r.drug_class == "GLP1RA" and r.hard_outcome == "MACE"]
    if sel:
        r = sel[0]
        print("\nSELECT-style spotlight (GLP1RA → MACE):")
        print(f"  R²_trial={_fmt(r.r2_trial)}  grade={r.grade.upper()}  "
              f"STE={_fmt(r.ste,1)}%  PTE(weight-mediated)={_fmt(r.pte)} "
              f"{('['+r.pte_flag+']') if r.pte_flag not in ('ok','') else ''}")
        if r.pte is not None and r.pte < 0.6:
            print("  -> A large weight-INDEPENDENT (direct) component is implied: "
                  "% weight-loss only partially mediates the CV benefit.")


def section_dose_response(df):
    print(_hdr("DOSE–RESPONSE SURROGACY  (does more weight-loss = more hard benefit?)"))
    classes = sorted(df["drug_class"].unique())
    outcomes = sorted(df["hard_outcome"].unique())
    any_out = False
    for cls in classes:
        for out in outcomes:
            dr = sg.dose_response(df, cls, out)
            if dr is None:
                continue
            any_out = True
            print(f"\n[{cls} → {out}]  verdict: {dr.verdict}")
            print(f"  linear slope (Δlog-HR per +1% loss) = {_fmt(dr.linear_slope,4)}"
                  f"   quadratic curvature = {_fmt(dr.quad_coef,5)}"
                  f"   nonlinearity p = {_fmt(dr.nonlinearity_p,3)}")
            brows = [[b[0], _fmt(b[1], 1), _fmt(b[2], 3), str(b[3])] for b in dr.bins]
            print(_indent(_table(brows, ["wl_bin", "mean_wl", "mean_logHR", "k"]), 2))
    if not any_out:
        print("  (no class×outcome cell had >=3 trials for a dose-response fit)")


def _indent(text, n):
    pad = " " * n
    return "\n".join(pad + ln for ln in text.splitlines())


def section_paradox(df):
    print(_hdr("SURROGATE-PARADOX FLAGS  (weight improves but hard outcome worsens)"))
    par = sg.paradox_scan(df)
    if not par:
        print("  none detected.")
        return
    rows = []
    for r in par:
        rows.append([r.drug_class, r.hard_outcome, str(r.n_trials),
                     _fmt(r.r2_trial), r.grade.upper()])
    print(_table(rows, ["class", "outcome", "k", "R2_trial", "grade"]))
    print("\n  Interpretation: in flagged cells the surrogate (weight) moves the "
          "'right' way while the\n  hard-outcome point estimate trends adverse — "
          "surrogacy is INVALID there and must not\n  be assumed. Treat as a high-"
          "priority validation target.")


def section_hypotheses(df, top=None):
    print(_hdr("VALIDATION-STUDY HYPOTHESES  (mined unvalidated surrogate–outcome–class pairs)"))
    hyps = sg.mine_gaps(df)
    if top:
        hyps = hyps[:top]
    if not hyps:
        print("  no gaps mined.")
        return
    for i, h in enumerate(hyps, 1):
        n_arm = (f"{h.suggested_n_per_arm:,}/arm" if h.suggested_n_per_arm else "n/a")
        r2 = _fmt(h.r2_trial)
        print(f"\n  H{i}  [priority {h.priority:.2f}]  {h.statement}")
        print(f"      class={h.drug_class}  outcome={h.hard_outcome}  "
              f"k(existing)={h.n_trials}  R²_trial={r2}")
        print(f"      suggested validation: ~{h.suggested_trials} more trial(s); "
              f"per-arm size ≈ {n_arm} (Schoenfeld, 6% event rate, 80% power)")


def section_gaps(df, top=None):
    print(_hdr("GAP MAP  (under-validated / weak surrogate–outcome–class cells)"))
    hyps = sg.mine_gaps(df)
    if top:
        hyps = hyps[:top]
    rows = []
    for h in hyps:
        rows.append([h.drug_class, h.hard_outcome, str(h.n_trials),
                     _fmt(h.r2_trial), f"{h.priority:.2f}", h.reason])
    print(_table(rows, ["class", "outcome", "k", "R2_trial", "prio", "reason"]))


# --------------------------------------------------------------------------- #
# CLI
# --------------------------------------------------------------------------- #

def build_parser():
    p = argparse.ArgumentParser(
        prog="main.py",
        description="WtLossSurrogate-Kor — trial-level surrogacy for anti-obesity "
                    "RCTs (%weight-loss vs hard outcomes). OFFLINE, research/"
                    "reference use only.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="⚠️  연구용·참고용 — not for clinical decision-making.",
    )
    p.add_argument("--data", metavar="PATH", default=None,
                   help="path to a trials CSV (default: bundled demo data).")
    p.add_argument("--top", metavar="N", type=int, default=None,
                   help="limit rows/hypotheses shown to N.")
    p.add_argument("--surrogacy", action="store_true",
                   help="by-class R²_trial / STE / PTE / grade table.")
    p.add_argument("--dose-response", dest="dose_response", action="store_true",
                   help="weight-loss bin vs hard benefit (non-linearity report).")
    p.add_argument("--paradox", action="store_true",
                   help="surrogate-paradox flags.")
    p.add_argument("--gaps", action="store_true",
                   help="gap map of under-validated/weak pairs.")
    p.add_argument("--hypotheses", action="store_true",
                   help="validation-study hypotheses + suggested sample sizes.")
    p.add_argument("--all", action="store_true",
                   help="run every section.")
    return p


def main(argv=None):
    args = build_parser().parse_args(argv)

    try:
        df = dataio.load_trials(args.data)
    except dataio.DataError as e:
        print(f"ERROR: {e}", file=sys.stderr)
        return 2

    banner()

    ran_any = any([args.surrogacy, args.dose_response, args.paradox,
                   args.gaps, args.hypotheses, args.all])

    if args.all:
        section_summary(df)
        section_surrogacy(df, args.top)
        section_dose_response(df)
        section_paradox(df)
        section_gaps(df, args.top)
        section_hypotheses(df, args.top)
        _footer()
        return 0

    if not ran_any:
        # bare invocation → useful default summary
        section_summary(df)
        section_surrogacy(df, args.top or 8)
        section_paradox(df)
        section_hypotheses(df, args.top or 3)
        print("\n(tip: run with --all, or --surrogacy / --dose-response / "
              "--paradox / --gaps / --hypotheses)")
        _footer()
        return 0

    if args.surrogacy:
        section_summary(df)
        section_surrogacy(df, args.top)
    if args.dose_response:
        section_dose_response(df)
    if args.paradox:
        section_paradox(df)
    if args.gaps:
        section_gaps(df, args.top)
    if args.hypotheses:
        section_hypotheses(df, args.top)
    _footer()
    return 0


def _footer():
    print("\n" + "-" * 76)
    print(sg.DISCLAIMER)
    print("-" * 76)


if __name__ == "__main__":
    raise SystemExit(main())
