"""
MASLDRetractionWatch-Kor — Streamlit MVP

Run:
    streamlit run app.py

All data is offline mock/synthetic. No external API calls.
"""
from __future__ import annotations

import io
import json
import os
from datetime import datetime

import pandas as pd
import streamlit as st

import core

st.set_page_config(
    page_title="MASLDRetractionWatch-Kor",
    page_icon="🧪",
    layout="wide",
)

# ---------- header ----------
st.title("MASLDRetractionWatch-Kor")
st.caption(
    "MASLD/MASH·NAFLD·NASH·hepatic fibrosis 분야 retraction / correction / "
    "Expression of Concern 통합 surveillance (오프라인 mock data)"
)
st.warning(core.DISCLAIMER, icon="⚠️")

# ---------- load data once ----------
@st.cache_data
def _load_all():
    records_all = core.load_retractions()
    records_masld = core.filter_and_dedup(records_all)
    sample_sr = core.load_sample_systematic_review()
    guidelines = core.load_guidelines()
    return records_all, records_masld, sample_sr, guidelines


records_all, records_masld, sample_sr, guidelines = _load_all()

# ---------- sidebar filters ----------
st.sidebar.header("필터")
sources_sel = st.sidebar.multiselect(
    "Source", core.SOURCES, default=core.SOURCES,
)
types_all = sorted({r.get("type", "Unknown") for r in records_masld})
types_sel = st.sidebar.multiselect("유형(Type)", types_all, default=types_all)
journals_all = sorted({r.get("journal", "Unknown") for r in records_masld})
journals_sel = st.sidebar.multiselect("저널", journals_all, default=journals_all)
year_min = min((int(r.get("retraction_date", "9999")[:4]) for r in records_masld), default=2020)
year_max = max((int(r.get("retraction_date", "0000")[:4]) for r in records_masld), default=2026)
year_range = st.sidebar.slider("Retraction 연도", year_min, year_max, (year_min, year_max))

def _passes(r):
    if r.get("source") not in sources_sel:
        return False
    if r.get("type") not in types_sel:
        return False
    if r.get("journal") not in journals_sel:
        return False
    try:
        y = int(r.get("retraction_date", "0000")[:4])
    except Exception:
        y = 0
    if not (year_range[0] <= y <= year_range[1]):
        return False
    return True


records = [r for r in records_masld if _passes(r)]

st.sidebar.markdown("---")
st.sidebar.metric("총 record (mock)", len(records_all))
st.sidebar.metric("MASLD 필터 후", len(records_masld))
st.sidebar.metric("현재 필터 적용", len(records))

# ---------- tabs ----------
tab1, tab2, tab3, tab4, tab5 = st.tabs([
    "1. 통합 Ingest + MASLD 필터",
    "2. SR/MA Cross-reference",
    "3. Author/Lab 누적 flag",
    "4. Journal/Publisher/사유 분포",
    "5. 주간 다이제스트 + 가이드라인 sanity",
])

# ---------- Tab 1 ----------
with tab1:
    st.subheader("5개 source 통합 + MASLD/MASH 키워드 필터")
    summary = core.overall_summary(records)
    col1, col2, col3, col4 = st.columns(4)
    col1.metric("필터 후 record", summary["n_total"])
    col2.metric("Retraction", summary["by_type"].get("Retraction", 0))
    col3.metric("Correction", summary["by_type"].get("Correction", 0))
    col4.metric("Expression of Concern", summary["by_type"].get("Expression of Concern", 0))

    st.markdown("**Source 분포 (mock)**")
    st.bar_chart(pd.Series(summary["by_source"]))

    st.markdown("**MASLD 키워드 매칭 룰**")
    st.code(", ".join(core.MASLD_KEYWORDS))

    st.markdown("**Record table**")
    if records:
        df = pd.DataFrame(records)[[
            "id", "retraction_date", "type", "journal", "publisher",
            "reason", "title", "authors", "source", "pubpeer_threads",
        ]]
        df["authors"] = df["authors"].apply(lambda xs: ", ".join(xs))
        st.dataframe(df, use_container_width=True, hide_index=True)
    else:
        st.info("필터 조건에 맞는 record 없음.")

# ---------- Tab 2 ----------
with tab2:
    st.subheader("Systematic review / meta-analysis included paper cross-reference")
    st.caption("included paper PMID/DOI를 retraction DB와 cross-reference하여 영향 평가.")

    st.markdown("**샘플 systematic review (mock)**")
    st.write(f"제목: {sample_sr.get('title')}")
    st.write(f"등록: {sample_sr.get('registration')}")
    st.write(f"검색일: {sample_sr.get('search_date')}")

    uploaded = st.file_uploader(
        "Included paper list 업로드 (JSON: [{\"pmid\":..., \"doi\":...}, ...]) — 없으면 샘플 사용",
        type=["json"],
    )
    if uploaded is not None:
        try:
            uploaded_data = json.load(uploaded)
            if isinstance(uploaded_data, dict):
                included = uploaded_data.get("included_papers", [])
            else:
                included = uploaded_data
        except Exception as e:
            st.error(f"업로드 파일 파싱 실패: {e}")
            included = sample_sr.get("included_papers", [])
    else:
        included = sample_sr.get("included_papers", [])

    result = core.cross_reference_review(included, records_masld)
    c1, c2, c3 = st.columns(3)
    c1.metric("Included paper 수", result["n_included"])
    c2.metric("영향 받은 paper", result["n_affected"])
    pct = (100.0 * result["n_affected"] / result["n_included"]) if result["n_included"] else 0
    c3.metric("영향 비율", f"{pct:.1f}%")

    if result["severity_breakdown"]:
        st.markdown("**Severity 분포**")
        st.write(result["severity_breakdown"])

    st.markdown("**Highlight — retracted/corrected/EoC included paper**")
    if result["affected"]:
        rows = []
        for a in result["affected"]:
            inc, rec = a["included"], a["record"]
            rows.append({
                "label": inc.get("label"),
                "pmid": inc.get("pmid"),
                "type": rec.get("type"),
                "journal": rec.get("journal"),
                "reason": rec.get("reason"),
                "retraction_date": rec.get("retraction_date"),
            })
        st.dataframe(pd.DataFrame(rows), use_container_width=True, hide_index=True)
    else:
        st.success("영향 받은 included paper 없음.")

    st.markdown("**Cochrane RoB2 / PRISMA / sensitivity analysis 보조 권고**")
    for s in result["suggestions"]:
        st.write(f"- {s}")

# ---------- Tab 3 ----------
with tab3:
    st.subheader("Author / lab / affiliation 단위 retraction 누적 + 패턴 detection")

    author_rows = core.author_cumulative(records_masld)
    aff_rows = core.affiliation_cumulative(records_masld)

    st.markdown("**Author 단위 누적 (전체 MASLD DB 기준)**")
    df_a = pd.DataFrame([{
        "author": r["author"],
        "count": r["count"],
        "flag": r["flag"],
        "ORCID": ", ".join(r["orcid"]) if r["orcid"] else "",
        "affiliations": ", ".join(r["affiliations"]),
        "type_breakdown": r["type_breakdown"],
        "time_cluster": r["time_cluster"],
    } for r in author_rows])

    def _color(row):
        c = {"red": "background-color: #ffcccc",
             "yellow": "background-color: #fff3b0",
             "green": ""}.get(row["flag"], "")
        return [c] * len(row)

    st.dataframe(df_a.style.apply(_color, axis=1), use_container_width=True, hide_index=True)

    st.markdown("**Affiliation 단위 누적**")
    df_af = pd.DataFrame(aff_rows)
    st.dataframe(df_af, use_container_width=True, hide_index=True)

    st.markdown("**KASL/AASLD/EASL 가이드라인 committee 인사 cross-reference**")
    overlap = core.guideline_member_overlap(author_rows)
    for o in overlap:
        with st.expander(f"{o['guideline']} — flagged {len(o['flagged_members'])}건"):
            if not o["flagged_members"]:
                st.write("교차 없음")
            for a in o["flagged_members"]:
                st.write(f"- **{a['author']}** ({a['flag']}) — 누적 {a['count']}건, {a['time_cluster']}")

# ---------- Tab 4 ----------
with tab4:
    st.subheader("Journal / publisher / 사유 분포 + 시계열")
    col1, col2 = st.columns(2)
    with col1:
        st.markdown("**저널별**")
        st.bar_chart(pd.Series(core.journal_distribution(records)))
        st.markdown("**Publisher별**")
        st.bar_chart(pd.Series(core.publisher_distribution(records)))
    with col2:
        st.markdown("**사유별**")
        st.bar_chart(pd.Series(core.reason_distribution(records)))
        st.markdown("**유형(Type)별**")
        st.bar_chart(pd.Series(core.type_distribution(records)))

    st.markdown("**연도별 시계열 (retraction_date 기준)**")
    ts = core.yearly_timeseries(records)
    if ts:
        st.line_chart(pd.DataFrame(ts, columns=["year", "count"]).set_index("year"))
    else:
        st.info("시계열 데이터 없음.")

    st.markdown("**Publication → retraction lag 통계 (일)**")
    st.json(core.lag_distribution(records))

    st.markdown("**저널별 연도별 retraction 카운트**")
    jy = core.journal_yearly_rate(records)
    if jy:
        rows = []
        for j, series in jy.items():
            for y, c in series:
                rows.append({"journal": j, "year": y, "count": c})
        df_jy = pd.DataFrame(rows)
        pivot = df_jy.pivot_table(
            index="year", columns="journal", values="count", fill_value=0
        )
        st.line_chart(pivot)
    else:
        st.info("데이터 부족.")

# ---------- Tab 5 ----------
with tab5:
    st.subheader("주간 다이제스트 + KASL/AASLD/EASL 가이드라인 sanity check")

    digest = core.weekly_digest_markdown(records_masld, top_n=15)
    st.markdown(digest)

    st.markdown("---")
    st.markdown("### 가이드라인 sanity check")
    gs = core.guideline_sanity_report(guidelines, records_masld)
    for g in gs:
        with st.expander(
            f"{g['society']} — {g['guideline']} "
            f"({g['year']}) — 인용 {g['n_cited']} 중 영향 {g['n_affected']}"
        ):
            if not g["affected"]:
                st.success("인용 paper 중 retraction/EoC/correction 없음.")
            for a in g["affected"]:
                st.write(
                    f"- **{a['type']}** | PMID {a['pmid']} | {a['journal']} "
                    f"| {a['retraction_date']} | 사유: {a['reason']}"
                )
                st.caption(a["title"])

    st.markdown("---")
    st.markdown("### Export")
    col_md, col_docx = st.columns(2)
    with col_md:
        st.download_button(
            "다이제스트 .md 다운로드",
            data=digest.encode("utf-8"),
            file_name=f"masld_retraction_digest_{datetime.now().strftime('%Y%m%d')}.md",
            mime="text/markdown",
        )
    with col_docx:
        if st.button("docx 리포트 생성"):
            out_path = os.path.join(
                os.path.dirname(os.path.abspath(__file__)),
                f"masld_retraction_report_{datetime.now().strftime('%Y%m%d')}.docx",
            )
            try:
                core.export_docx_report(digest, gs, out_path)
                with open(out_path, "rb") as f:
                    st.download_button(
                        "docx 다운로드",
                        data=f.read(),
                        file_name=os.path.basename(out_path),
                        mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                    )
                st.success(f"저장됨: {out_path}")
            except Exception as e:
                st.error(f"docx export 실패: {e}")