"""Korean RCT report (Markdown) generator."""

from __future__ import annotations
import pandas as pd
from datetime import datetime


def _df_to_md(df: pd.DataFrame) -> str:
    """Render a DataFrame as a GitHub-flavored markdown table without tabulate."""
    if df is None or df.empty:
        return "_(no data)_"
    cols = list(df.columns)
    header = "| " + " | ".join(str(c) for c in cols) + " |"
    sep = "| " + " | ".join("---" for _ in cols) + " |"
    rows = []
    for _, r in df.iterrows():
        cells = []
        for c in cols:
            v = r[c]
            if pd.isna(v):
                cells.append("")
            elif isinstance(v, float):
                cells.append(f"{v:.4g}")
            else:
                cells.append(str(v))
        rows.append("| " + " | ".join(cells) + " |")
    return "\n".join([header, sep] + rows)


def _heatmap_block(df: pd.DataFrame) -> str:
    """Compact heatmap-friendly markdown table for outcome summary."""
    if df is None or df.empty:
        return "_(no data)_"
    show_cols = [
        "subject_id", "TIR_70_180_pct", "TBR_lt70_pct", "TBR_lt54_pct",
        "TAR_gt180_pct", "TAR_gt250_pct", "GMI_pct", "CV_pct", "MAGE_mg_dl",
        "hypo_events_ge15min", "sensor_wear_pct", "days_with_data"
    ]
    cols = [c for c in show_cols if c in df.columns]
    return _df_to_md(df[cols])


def _bland_altman_data(df: pd.DataFrame, ref_col: str = "TIR_70_180_pct",
                       comp_col: str = "GMI_pct") -> str:
    if df.empty or ref_col not in df.columns or comp_col not in df.columns:
        return "_(insufficient data for Bland-Altman)_"
    pairs = []
    for _, r in df.iterrows():
        a, b = r[ref_col], r[comp_col]
        try:
            mean = (float(a) + float(b)) / 2.0
            diff = float(a) - float(b)
            pairs.append(f"  - subject={r['subject_id']}, mean={mean:.2f}, diff={diff:.2f}")
        except Exception:
            continue
    return "\n".join(pairs) if pairs else "_(no numeric pairs)_"


def _group_period_block(df: pd.DataFrame) -> str:
    """If subject_id has prefix like A_/B_, infer group; else single group."""
    if df.empty:
        return "_(no data)_"
    df = df.copy()
    def infer(sid):
        s = str(sid)
        if s.upper().startswith("A_") or "GROUP_A" in s.upper():
            return "A"
        if s.upper().startswith("B_") or "GROUP_B" in s.upper():
            return "B"
        # By index modulo for synth demo: even -> A, odd -> B
        try:
            idx = int("".join(ch for ch in s if ch.isdigit()) or "0")
        except Exception:
            idx = 0
        return "A" if idx % 2 == 0 else "B"

    df["group"] = df["subject_id"].apply(infer)
    if "TIR_70_180_pct" not in df.columns:
        return df.head().to_string(index=False)
    agg = df.groupby("group").agg(
        n=("subject_id", "count"),
        TIR_mean=("TIR_70_180_pct", "mean"),
        TIR_sd=("TIR_70_180_pct", "std"),
        GMI_mean=("GMI_pct", "mean"),
        TBR1_mean=("TBR_lt70_pct", "mean"),
    ).reset_index()
    agg["period"] = "post-randomization (full window)"
    return _df_to_md(agg)


def build_markdown_report(outcomes_df: pd.DataFrame, gaps_df: pd.DataFrame,
                          power_results: dict | None = None) -> str:
    now = datetime.now().strftime("%Y-%m-%d %H:%M KST")
    lines = []
    lines.append("# InsuTrialKit-Kor — Korean RCT Report")
    lines.append("")
    lines.append(f"_Generated: {now}_")
    lines.append("")
    lines.append("> **연구·교육용. 실제 임상의사결정에 사용 금지.**")
    lines.append("")

    lines.append("## 1. Per-subject outcomes (ADA 2023 14-metric)")
    lines.append("")
    lines.append(_heatmap_block(outcomes_df))
    lines.append("")

    lines.append("## 2. Group × Period summary table")
    lines.append("")
    lines.append(_group_period_block(outcomes_df))
    lines.append("")

    lines.append("## 3. Outcome heatmap-ready data (subject × metric)")
    lines.append("")
    lines.append("Columns are normalized 0–100 within each metric for visual heatmap rendering.")
    lines.append("")
    if not outcomes_df.empty:
        metrics = ["TIR_70_180_pct", "TBR_lt70_pct", "TAR_gt180_pct", "GMI_pct", "CV_pct"]
        present = [m for m in metrics if m in outcomes_df.columns]
        norm = outcomes_df[["subject_id"] + present].copy()
        for m in present:
            mn, mx = norm[m].min(), norm[m].max()
            rng = (mx - mn) if (mx - mn) != 0 else 1.0
            norm[m + "_norm"] = ((norm[m] - mn) / rng * 100).round(1)
        cols = ["subject_id"] + [m + "_norm" for m in present]
        lines.append(_df_to_md(norm[cols]))
    else:
        lines.append("_(no data)_")
    lines.append("")

    lines.append("## 4. Bland-Altman pairs (TIR vs GMI, per-subject)")
    lines.append("")
    lines.append(_bland_altman_data(outcomes_df))
    lines.append("")

    lines.append("## 5. CGM data quality — gaps ≥ 30 min")
    lines.append("")
    if gaps_df is None or gaps_df.empty:
        lines.append("_No gaps ≥ 30 min detected._")
    else:
        lines.append(_df_to_md(gaps_df.head(50)))
    lines.append("")

    if power_results:
        lines.append("## 6. Sample size calculation")
        lines.append("")
        lines.append(f"- ΔTIR = {power_results['delta']}%, SD = {power_results['sd']}%, "
                     f"α = {power_results['alpha']}, power = {power_results['power']}")
        lines.append(f"- z_(1-α/2) = {power_results['z_alpha_2']}, z_(1-β) = {power_results['z_beta']}")
        lines.append(f"- **N per arm = {power_results['n_per_arm']}**, total = {power_results['n_total']}")
        lines.append(f"- Formula: `{power_results['formula']}`")
        lines.append("")

    lines.append("## 7. References")
    lines.append("")
    lines.append("- ADA 2023 Standards of Care, Diabetes Technology chapter")
    lines.append("- Battelino T et al., Diabetes Care 2019 / ATTD 2022 consensus on CGM TIR")
    lines.append("- Bergenstal RM et al., Diabetes Care 2018 (GMI formula)")
    lines.append("")
    lines.append("---")
    lines.append("_End of report._")
    return "\n".join(lines)
