#!/usr/bin/env python3
"""
GlyceSurrogate-Kor (글라이스서로게이트코어) -- CLI entry point.

Domain   : DM (당뇨병 / diabetes)
Category : 연구 아이디어 생성 (research-hypothesis generation)

A standalone, OFFLINE tool that ingests trial-level (Delta glycemic surrogate,
Delta hard outcome) effect-size pairs from diabetes RCTs, computes by-drug-class
TRIAL-LEVEL SURROGACY (R2_trial, surrogate threshold effect [STE], proportion of
treatment effect explained [PTE]) via weighted meta-regression, auto-flags weak /
unvalidated surrogate-outcome-class pairs, and generates validation-study hypotheses
with sample-size suggestions.

  RESEARCH / REFERENCE USE ONLY -- NOT FOR CLINICAL DECISION-MAKING.

Usage examples:
  python3 main.py                         # default summary
  python3 main.py --surrogacy             # by-class R2/STE/PTE/grade table
  python3 main.py --paradox               # surrogate-paradox flags
  python3 main.py --gaps                  # mined unvalidated pairs + hypotheses
  python3 main.py --hypotheses --top 5    # alias of --gaps, limited to 5
  python3 main.py --data my_trials.csv --surrogacy
  python3 main.py --all                   # run everything
"""
from __future__ import annotations

import argparse
import os
import sys

import numpy as np
import pandas as pd

# Allow running from anywhere: make the project dir importable.
_HERE = os.path.dirname(os.path.abspath(__file__))
if _HERE not in sys.path:
    sys.path.insert(0, _HERE)

import surrogacy as S  # noqa: E402

DEFAULT_DATA = os.path.join(_HERE, "data", "demo_trials.csv")

BANNER = "=" * 78
HEADER = (
    "GlyceSurrogate-Kor  |  Domain: DM (당뇨병)  |  Category: 연구 아이디어 생성\n"
    "Trial-level glycemic-surrogate validity meta-regression\n"
    "⚠️  " + S.DISCLAIMER
)


def _fmt(x, nd=3, dash="--"):
    if x is None:
        return dash
    try:
        if isinstance(x, float) and not np.isfinite(x):
            return dash
        return f"{x:.{nd}f}"
    except (TypeError, ValueError):
        return str(x)


def print_header():
    print(BANNER)
    print(HEADER)
    print(BANNER)


# --------------------------------------------------------------------------------------
# Sub-reports
# --------------------------------------------------------------------------------------
def report_surrogacy(results, top=None):
    print("\n[ TRIAL-LEVEL SURROGACY BY CLASS x SURROGATE x OUTCOME ]")
    print("-" * 78)
    cols = (
        f"{'class':<13}{'surr':<6}{'outcome':<17}{'n':>3} "
        f"{'R2':>6} {'R2_CI':>13} {'STE':>8} {'PTE':>6} {'grade':<11}"
    )
    print(cols)
    print("-" * 78)
    rows = results if top is None else results[:top]
    for r in rows:
        ci = f"[{_fmt(r.r2_ci_lo,2)},{_fmt(r.r2_ci_hi,2)}]"
        line = (
            f"{r.drug_class:<13}{r.surrogate:<6}{r.hard_outcome:<17}{r.n_trials:>3} "
            f"{_fmt(r.r2_trial,3):>6} {ci:>13} {_fmt(r.ste,3):>8} "
            f"{_fmt(r.pte,2):>6} {r.grade:<11}"
        )
        if r.has_paradox:
            line += "  <-- PARADOX"
        print(line)
    print("-" * 78)
    print("R2_trial: weighted meta-regression of log-HR on Delta-surrogate across "
          "trials.\nSTE: surrogate effect at which the 95% prediction band crosses the "
          "null (benefit).\nPTE: 1 - beta_adjusted/beta_unadjusted (clamped to [0,1]; "
          "'--' = undetermined).\nGrade: strong>={s} / moderate>={m} / weak<{m} / "
          "invalid(paradox).".format(s=S.R2_STRONG, m=S.R2_MODERATE))


def report_paradox(df):
    print("\n[ SURROGATE-PARADOX FLAGS ]")
    print("-" * 78)
    print("Trials where the glycemic surrogate IMPROVED but the hard outcome WORSENED "
          "(HR>1).\nThis is the ACCORD-style failure of glycemic surrogacy.")
    print("-" * 78)
    flags = S.detect_paradox(df)
    if not flags:
        print("  (none detected)")
        return
    print(f"{'trial':<14}{'class':<14}{'surr':<6}{'Δsurr':>8} {'outcome':<17}{'HR':>6}")
    for f in flags:
        print(f"{f['trial']:<14}{f['drug_class']:<14}{f['surrogate']:<6}"
              f"{_fmt(f['delta_surrogate'],2):>8} {f['hard_outcome']:<17}"
              f"{_fmt(f['hr'],2):>6}")
    print("-" * 78)
    print(f"  {len(flags)} paradox flag(s). These render the surrogate INVALID for the "
          "affected class/outcome.")


def report_gaps(df, results, top=None):
    print("\n[ UNVALIDATED-PAIR MINING + VALIDATION HYPOTHESES ]")
    print("-" * 78)
    hyps = S.mine_gaps(df, results)
    if top is not None:
        hyps = hyps[:top]
    if not hyps:
        print("  (no gaps mined -- all observed cells are well-validated)")
        return
    for i, h in enumerate(hyps, 1):
        ss = h["suggestion"]
        tag = "WEAK CELL" if h["kind"] == "weak_cell" else "MISSING CELL"
        print(f"\n  H{i}. [{tag}] {h['drug_class']} | {h['surrogate']} -> "
              f"{h['hard_outcome']}  (grade={h['grade']}, n={h['n_trials']})")
        print(f"      Hypothesis : {h['hypothesis']}")
        print(f"      Why flagged: {', '.join(h['reasons'])}")
        print(f"      Suggested  : +{ss['n_additional_trials']} trial(s); "
              f"per-arm n≈{ss['approx_per_arm_n']:,} "
              f"(target |logHR|={ss['target_loghr']}, "
              f"assumed event rate={ss['assumed_event_rate']}, "
              f"~{ss['approx_total_events']} events).")
    print("\n" + "-" * 78)
    print(f"  {len(hyps)} validation hypothesis(es) generated. Sample sizes are "
          "transparent illustrative heuristics, not formal power calculations.")


def report_summary(df, results):
    n_trials = df["trial"].nunique()
    n_rows = len(df)
    classes = sorted(df["drug_class"].unique())
    grades = {}
    for r in results:
        grades[r.grade] = grades.get(r.grade, 0) + 1
    paradox = S.detect_paradox(df)
    hyps = S.mine_gaps(df, results)

    print("\n[ SUMMARY ]")
    print("-" * 78)
    print(f"  Unique trials      : {n_trials}")
    print(f"  Trial-outcome rows : {n_rows}")
    print(f"  Drug classes       : {', '.join(classes)}")
    print(f"  Surrogacy cells    : {len(results)} "
          f"({df['surrogate'].nunique()} surrogates x "
          f"{df['hard_outcome'].nunique()} outcomes x {len(classes)} classes observed)")
    print(f"  Grade distribution : " +
          ", ".join(f"{g}={c}" for g, c in sorted(grades.items())))
    print(f"  Paradox flags      : {len(paradox)}")
    print(f"  Validation hypoths : {len(hyps)}")
    # Highlight the strongest VALID and weakest observed cells. "Strongest" ignores
    # paradox cells -- a high R2 in a paradox cell is not a usable surrogate.
    fin = [r for r in results if np.isfinite(r.r2_trial)]
    valid = [r for r in fin if not r.has_paradox]
    if valid:
        best = max(valid, key=lambda r: r.r2_trial)
        print(f"  Strongest (valid)  : {best.drug_class}/{best.surrogate}->"
              f"{best.hard_outcome}  R2={_fmt(best.r2_trial,3)} ({best.grade})")
    if fin:
        worst = min(fin, key=lambda r: r.r2_trial)
        print(f"  Weakest surrogacy  : {worst.drug_class}/{worst.surrogate}->"
              f"{worst.hard_outcome}  R2={_fmt(worst.r2_trial,3)} ({worst.grade})")
    print("-" * 78)
    print("  Tip: run with --surrogacy, --paradox, --gaps, or --all for detail.")


# --------------------------------------------------------------------------------------
# Main
# --------------------------------------------------------------------------------------
def build_parser():
    p = argparse.ArgumentParser(
        prog="main.py",
        description="GlyceSurrogate-Kor: trial-level glycemic-surrogate validity "
                    "meta-regression and validation-hypothesis generator (DM domain; "
                    "research/reference use only).",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="With no flags, prints a useful summary. ⚠️ Research/reference use only.",
    )
    p.add_argument("--data", metavar="PATH", default=DEFAULT_DATA,
                   help="CSV of trial-level effect sizes (default: bundled demo).")
    p.add_argument("--surrogacy", action="store_true",
                   help="Print by-class R2_trial / STE / PTE / grade table.")
    p.add_argument("--paradox", action="store_true",
                   help="List surrogate-paradox flags (ACCORD-style).")
    p.add_argument("--gaps", action="store_true",
                   help="Mine unvalidated pairs + generate validation hypotheses.")
    p.add_argument("--hypotheses", action="store_true",
                   help="Alias of --gaps.")
    p.add_argument("--all", action="store_true",
                   help="Run summary + surrogacy + paradox + gaps.")
    p.add_argument("--top", type=int, metavar="N", default=None,
                   help="Limit number of rows / hypotheses shown.")
    p.add_argument("--csv-out", metavar="PATH", default=None,
                   help="Write the surrogacy table to a CSV file.")
    return p


def main(argv=None):
    args = build_parser().parse_args(argv)

    if not os.path.exists(args.data):
        print(f"ERROR: data file not found: {args.data}", file=sys.stderr)
        return 2
    try:
        df = S.load_data(args.data)
    except Exception as e:  # noqa: BLE001
        print(f"ERROR: failed to load data: {e}", file=sys.stderr)
        return 2
    if df.empty:
        print("ERROR: no usable rows after loading.", file=sys.stderr)
        return 2

    results = S.analyze_all(df)

    print_header()
    print(f"\nData: {args.data}  ({len(df)} rows, {df['trial'].nunique()} trials)")

    want_surr = args.surrogacy or args.all
    want_par = args.paradox or args.all
    want_gap = args.gaps or args.hypotheses or args.all
    any_specific = want_surr or want_par or want_gap

    if not any_specific:
        report_summary(df, results)
    else:
        if args.all:
            report_summary(df, results)
        if want_surr:
            report_surrogacy(results, top=args.top)
        if want_par:
            report_paradox(df)
        if want_gap:
            report_gaps(df, results, top=args.top)

    if args.csv_out:
        S.results_to_frame(results).to_csv(args.csv_out, index=False)
        print(f"\nWrote surrogacy table -> {args.csv_out}")

    print("\n" + BANNER)
    print("⚠️  " + S.DISCLAIMER)
    print(BANNER)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())