"""main.py — HepatoSeahorse entrypoint.

Modes:
  streamlit run main.py        -> interactive UI
  python3 main.py --demo       -> CLI demo (offline; no streamlit required)
  python3 main.py --regen      -> regenerate synthetic CSVs
"""

from __future__ import annotations

import argparse
import os
import sys
from typing import List, Tuple

import pandas as pd

HERE = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(HERE, "data", "synthetic")

# allow importing local modules
sys.path.insert(0, HERE)

from seahorse_parser import parse_plate, list_plate_dir, detect_protocol  # noqa: E402
from qc import run_well_qc, summarize_qc                                  # noqa: E402
from bioenergetics import (                                               # noqa: E402
    compute_plate_params,
    substrate_dependence,
    phenotype_quadrant,
    anova_by_group,
    tukey_hsd_fallback,
    korean_summary,
)


PROTOCOL_LABEL = {
    "mito_stress.csv": "Mito Stress",
    "glycolysis_stress.csv": "Glycolysis Stress",
    "fao_assay.csv": "FAO Assay",
    "atp_rate.csv": "ATP Rate Assay",
}


# ---------------------------------------------------------------------------
# common analysis pipeline
# ---------------------------------------------------------------------------


def _analyze_plate(path: str) -> dict:
    """Run full pipeline on one plate file; return dict of artifacts."""
    plate = parse_plate(path)
    protocol = plate.meta.protocol
    if protocol == "Unknown":
        # fallback to filename hint
        fname = os.path.basename(path)
        protocol = PROTOCOL_LABEL.get(fname, "Unknown")

    qc_df = run_well_qc(plate.df, protocol)
    params_df = compute_plate_params(plate.df, protocol)
    params_df = phenotype_quadrant(params_df)
    substrate_df = substrate_dependence(params_df)

    anova = anova_by_group(params_df, "basal_ocr", "drug")
    tukey = tukey_hsd_fallback(params_df, "basal_ocr", "drug")

    return {
        "path": path,
        "plate": plate,
        "protocol": protocol,
        "qc": qc_df,
        "qc_summary": summarize_qc(qc_df),
        "params": params_df,
        "substrate": substrate_df,
        "anova": anova,
        "tukey": tukey,
    }


# ---------------------------------------------------------------------------
# CLI demo
# ---------------------------------------------------------------------------


def cli_demo() -> int:
    """Run analysis over all synthetic CSVs and print a text summary."""
    print("=" * 68)
    print(" HepatoSeahorse — CLI demo (offline)")
    print("=" * 68)

    if not os.path.isdir(DATA_DIR) or not list_plate_dir(DATA_DIR):
        print(f"[info] synthetic data not found in {DATA_DIR}; generating...")
        regen()

    files = list_plate_dir(DATA_DIR)
    if not files:
        print("[error] no plate files available")
        return 1

    print(f"\nFound {len(files)} plate files:")
    for f in files:
        print(f"  - {os.path.basename(f)}")

    for f in files:
        print("\n" + "-" * 68)
        try:
            art = _analyze_plate(f)
        except Exception as e:
            print(f"[error] {f}: {e}")
            continue

        meta = art["plate"].meta
        print(f"plate: {meta.plate_id}")
        print(f"  detected protocol: {art['protocol']}")
        print(f"  injections: {meta.injections}")
        print(f"  wells: {meta.n_wells}, timepoints: {meta.n_timepoints}")
        print(f"  cell types: {meta.cell_types}")
        print(f"  drugs: {meta.drugs}")

        qc = art["qc_summary"]
        print(f"  QC: Pass={qc['Pass']}, Borderline={qc['Borderline']}, "
              f"Excludable={qc['Excludable']}, total={qc['total']}")

        params = art["params"]
        if not params.empty:
            print("  parameter means (across wells):")
            for col in ["basal_ocr", "atp_linked_ocr", "maximal_ocr",
                         "spare_capacity", "proton_leak", "non_mito_ocr",
                         "basal_ecar", "glycolytic_capacity",
                         "glycolytic_reserve", "basal_atp_rate",
                         "max_atp_rate"]:
                v = pd.to_numeric(params[col], errors="coerce").dropna()
                if len(v):
                    print(f"    {col:24s}: {v.mean():.2f} ± {v.std():.2f} (n={len(v)})")

            if "phenotype" in params.columns:
                vc = params["phenotype"].value_counts().to_dict()
                print(f"  phenotype quadrant: {vc}")

        sub = art["substrate"]
        if not sub.empty:
            print("  substrate dependence (HepG2 / vehicle):")
            for _, row in sub.iterrows():
                ct = row.get("cell_type", "")
                dr = row.get("drug", "")
                print(f"    [{ct}|{dr}] FAO={row.get('fao_contribution', float('nan')):.2f}, "
                      f"CPT1-dep={row.get('cpt1_dependent_fao', float('nan')):.2f}, "
                      f"Gln-dep={row.get('gln_dependence', float('nan')):.2f}, "
                      f"Pyr-dep={row.get('pyruvate_dependence', float('nan')):.2f}, "
                      f"flex_idx={row.get('flexibility_index', float('nan')):.3f}")

        an = art["anova"]
        if an.get("ok"):
            print(f"  ANOVA basal_OCR ~ drug: F={an['F']:.2f}, p={an['p']:.4f}")
            print(f"    group means: {an['means']}")

        tk = art["tukey"]
        if isinstance(tk, pd.DataFrame) and not tk.empty and "p_adj" in tk.columns:
            sig = tk[tk["p_adj"] < 0.05]
            print(f"  Tukey HSD pairs p<0.05: n={len(sig)}")
            for _, row in sig.head(5).iterrows():
                print(f"    {row['group1']} vs {row['group2']}: "
                      f"diff={row['mean_diff']:.2f}, p={row['p_adj']:.4f}")

        # Korean manuscript summary preview (first 8 lines)
        ks = korean_summary(params, art["protocol"])
        print("\n  -- 한국어 요약 미리보기 --")
        for line in ks.splitlines()[:8]:
            print("  " + line)

    print("\n" + "=" * 68)
    print(" demo complete — 본 결과는 연구·참고용. 임상 의사결정 사용 금지.")
    print("=" * 68)
    return 0


# ---------------------------------------------------------------------------
# regenerate synthetic data
# ---------------------------------------------------------------------------


def regen() -> int:
    try:
        sys.path.insert(0, os.path.join(HERE, "data"))
        import synthetic_generator
        synthetic_generator.main()
        return 0
    except Exception as e:
        print(f"[error] regen failed: {e}")
        return 1


# ---------------------------------------------------------------------------
# Streamlit UI
# ---------------------------------------------------------------------------


def streamlit_app() -> None:
    """Run the Streamlit UI. Called when imported by `streamlit run`."""
    try:
        import streamlit as st
    except Exception as e:
        print(f"streamlit unavailable: {e}")
        return

    st.set_page_config(page_title="HepatoSeahorse", layout="wide")
    st.title("HepatoSeahorse")
    st.caption("연구·참고용 — in vitro 세포실험 데이터 사후 분석용, 임상 의사결정용 아님")

    with st.sidebar:
        st.header("Data Source")
        mode = st.radio("Source", ["Synthetic (built-in)", "Upload CSV/Excel"])
        if mode == "Synthetic (built-in)":
            files = list_plate_dir(DATA_DIR)
            if not files:
                if st.button("Generate synthetic data"):
                    regen()
                    st.experimental_rerun()
                st.warning(f"no plate files in {DATA_DIR}")
                return
            choice = st.selectbox("Plate", files,
                                   format_func=lambda p: os.path.basename(p))
            path = choice
        else:
            up = st.file_uploader("Plate file (.csv / .xlsx)",
                                   type=["csv", "xlsx", "xls"])
            if not up:
                st.info("Drop a Wave-exported Excel or CSV file.")
                return
            import tempfile
            tmp = tempfile.NamedTemporaryFile(suffix=os.path.splitext(up.name)[1],
                                              delete=False)
            tmp.write(up.read())
            tmp.flush()
            path = tmp.name

    try:
        art = _analyze_plate(path)
    except Exception as e:
        st.error(f"failed: {e}")
        return

    meta = art["plate"].meta
    st.subheader(f"Plate: {meta.plate_id}")
    c1, c2, c3, c4 = st.columns(4)
    c1.metric("Detected protocol", art["protocol"])
    c2.metric("Wells", meta.n_wells)
    c3.metric("Timepoints", meta.n_timepoints)
    c4.metric("Injections", len(meta.injections))

    st.markdown("**Injection sequence:** " + " → ".join(meta.injections))

    tabs = st.tabs(["QC", "Parameters", "Phenotype Map",
                    "Substrate Dependence", "Cohort Stats", "Korean Export"])

    with tabs[0]:
        st.dataframe(art["qc"])
        s = art["qc_summary"]
        st.write({
            "Pass": s["Pass"], "Borderline": s["Borderline"],
            "Excludable": s["Excludable"], "total": s["total"],
        })

    with tabs[1]:
        st.dataframe(art["params"])

    with tabs[2]:
        params = art["params"]
        if not params.empty:
            ocr_thr = params.attrs.get("ocr_threshold", params["basal_ocr"].median())
            ecar_thr = params.attrs.get("ecar_threshold", params["basal_ecar"].median())
            st.write(f"OCR threshold: {ocr_thr:.2f}, ECAR threshold: {ecar_thr:.2f}")
            st.scatter_chart(params, x="basal_ecar", y="basal_ocr",
                              color="phenotype" if "phenotype" in params.columns else None)
            if "phenotype" in params.columns:
                st.write(params["phenotype"].value_counts().rename("count"))

    with tabs[3]:
        sub = art["substrate"]
        if sub.empty:
            st.info("Substrate dependence requires multiple substrate conditions (FAO assay).")
        else:
            st.dataframe(sub)

    with tabs[4]:
        an = art["anova"]
        if an.get("ok"):
            st.write(f"ANOVA basal_OCR ~ drug: F={an['F']:.2f}, p={an['p']:.4f}")
            st.write({"group means": an["means"], "n_per_group": an["n_per_group"]})
        else:
            st.info(f"ANOVA not available: {an.get('reason', '')}")
        tk = art["tukey"]
        if isinstance(tk, pd.DataFrame) and not tk.empty:
            st.dataframe(tk)

    with tabs[5]:
        ks = korean_summary(art["params"], art["protocol"])
        st.markdown(ks)
        st.download_button("Download (Markdown)", ks,
                          file_name=f"{meta.plate_id}_korean_summary.md")


# ---------------------------------------------------------------------------
# main
# ---------------------------------------------------------------------------


def main() -> int:
    parser = argparse.ArgumentParser(description="HepatoSeahorse")
    parser.add_argument("--demo", action="store_true",
                        help="CLI demo (offline, no streamlit)")
    parser.add_argument("--regen", action="store_true",
                        help="regenerate synthetic CSVs")
    args, _ = parser.parse_known_args()

    if args.regen:
        return regen()
    if args.demo:
        return cli_demo()

    # default: print usage hint
    print("HepatoSeahorse")
    print("  streamlit run main.py    # interactive UI")
    print("  python3 main.py --demo   # CLI demo (offline)")
    print("  python3 main.py --regen  # regenerate synthetic data")
    return 0


# streamlit invokes this module top-level (no __main__) — detect and run UI
try:
    import streamlit as _st  # noqa: F401
    if os.environ.get("STREAMLIT_SERVER_RUN_ON_SAVE") or os.environ.get("STREAMLIT_RUNNING") or \
       any("streamlit" in a for a in sys.argv):
        # heuristic: when run via `streamlit run`
        _ran_via_streamlit = any("streamlit" in a for a in sys.argv[0:1])
        if _ran_via_streamlit:
            streamlit_app()
except Exception:
    pass


if __name__ == "__main__":
    sys.exit(main())