#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
DMChronoTx-Hypo (디엠크로노테라피하이포)
당뇨병 chronotherapy 3D ontology + 미탐색 cell ranking + 한국어 hypothesis card
+ STROBE-RCT/SPIRIT protocol skeleton + IIT/grant proposal generator

Standalone Python CLI. 표준 라이브러리만 사용. 외부 네트워크 호출 없음.

본 도구는 연구·참고용 mock simulation 입니다.
임상 진료에 직접 사용하지 마십시오.
"""

import argparse
import json
import math
import os
import random
import sys
from datetime import datetime

# ---------------------------------------------------------------------------
# 데이터 로드
# ---------------------------------------------------------------------------
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(SCRIPT_DIR, "data")

DISCLAIMER = (
    "[안내] 본 출력은 합성 mock 데이터에 기반한 연구·참고용 자료입니다. "
    "임상 진료, 처방, 환자 상담에 직접 사용하지 마십시오. "
    "실제 chronotherapy 적용은 PubMed/Europe PMC/ClinicalTrials.gov "
    "최신 1차 문헌 검토와 IRB 승인 후에만 가능합니다."
)


def load_json(filename):
    path = os.path.join(DATA_DIR, filename)
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def load_all():
    return {
        "drugs": load_json("drugs.json"),
        "time_splits": load_json("time_splits.json"),
        "outcomes": load_json("outcomes.json"),
        "corpus": load_json("corpus.json"),
        "cohorts": load_json("korean_cohorts.json"),
        "labels": load_json("labels.json"),
        "guidelines": load_json("guidelines.json"),
    }


# ---------------------------------------------------------------------------
# 1. Chronotherapy literature ETL + entity 정규화
# ---------------------------------------------------------------------------
def normalize_corpus(data):
    """Mock RxNorm/MeSH 정규화. 약물명을 표준 dict 키로 매핑."""
    drug_map = {d["name"].lower(): d for d in data["drugs"]}
    normalized = []
    for art in data["corpus"]:
        d = drug_map.get(art["drug"].lower())
        if d is None:
            continue
        normalized.append({
            "pmid": art["pmid"],
            "year": art["year"],
            "drug": d["name"],
            "drug_kor": d["kor"],
            "rxnorm": d["rxnorm"],
            "mesh": d["mesh"],
            "time_split": art["time_split"],
            "outcome": art["outcome"],
            "title": art["title"],
            "journal": art.get("journal", ""),
        })
    return normalized


# ---------------------------------------------------------------------------
# 2. 3D ontology + 미탐색 cell ranking
# ---------------------------------------------------------------------------
def build_3d_ontology(data, normalized):
    """drug × time_split × outcome cell 빌드. published count + novelty score."""
    drugs = [d["name"] for d in data["drugs"]]
    splits = [s["id"] for s in data["time_splits"]]
    outcomes = [o["id"] for o in data["outcomes"]]
    expected = {o["id"]: o["expected_studies"] for o in data["outcomes"]}

    counts = {}
    for art in normalized:
        key = (art["drug"], art["time_split"], art["outcome"])
        counts[key] = counts.get(key, 0) + 1

    cells = []
    n_drugs = len(drugs)
    n_splits = len(splits)
    for drug in drugs:
        for split in splits:
            for outcome in outcomes:
                key = (drug, split, outcome)
                published = counts.get(key, 0)
                # expected per cell = expected studies for outcome / (n_drugs * n_splits)
                # bumped by relevance heuristic for plausible drug-split combos
                base_expected = max(0.05, expected[outcome] / float(n_drugs * n_splits))
                novelty = max(0.0, 1.0 - (published / max(base_expected, 0.05)))
                cells.append({
                    "drug": drug,
                    "time_split": split,
                    "outcome": outcome,
                    "published": published,
                    "expected": round(base_expected, 3),
                    "novelty": round(novelty, 3),
                })
    return cells


WEEKLY_GLP1 = {"Semaglutide", "Dulaglutide", "Tirzepatide"}


def plausibility_score(cell, data):
    """약물 class × time_split × outcome 조합의 임상적 타당성 가중치."""
    drug = next((d for d in data["drugs"] if d["name"] == cell["drug"]), None)
    if drug is None:
        return 0.5
    name = drug["name"]
    cls = drug["class"]
    s = cell["time_split"]
    o = cell["outcome"]

    score = 0.5

    # 주1회 약물만 once_weekly plausible (liraglutide·exenatide 제외)
    if name in WEEKLY_GLP1 and s == "once_weekly":
        score = 0.95
    elif cls in ("GLP1RA", "GIP/GLP1RA") and s == "once_weekly":
        score = 0.2  # daily GLP1 once_weekly 부적합

    # 1일 1회 GLP1RA (liraglutide·lixisenatide) 시각 split 탐색 가치
    if name in ("Liraglutide", "Lixisenatide") and s in ("AM", "PM", "bedtime"):
        score = max(score, 0.85)

    # SGLT2i AM 라벨, but PM·bedtime 미탐색 가치
    if cls == "SGLT2i" and s in ("PM", "bedtime") and o in ("BP_dipping", "nocturnal_hypo", "TIR"):
        score = max(score, 0.9)
    # 기저인슐린 + bedtime/AM 비교
    if cls == "BasalInsulin" and s in ("bedtime", "AM", "morning_vs_evening"):
        score = max(score, 0.9)
    # statin + night_only LDL/diurnal
    if cls == "Statin" and s == "night_only" and o in ("LDL_diurnal", "BP_dipping"):
        score = max(score, 0.85)
    # SU 야간 hypo 위험 (어떤 split이든)
    if cls == "Sulfonylurea" and o == "nocturnal_hypo":
        score = max(score, 0.85)
    # ARB·ACEi bedtime BP dipping
    if cls in ("ARB", "ACEi") and s == "bedtime" and o == "BP_dipping":
        score = max(score, 0.85)
    # alpha-glucosidase pre_meal
    if cls == "AlphaGlucosidase" and s == "pre_meal_30":
        score = max(score, 0.85)
    # shift work × any class
    if s == "shift_work_adjusted":
        score = max(score, 0.8)
    # TZD evening
    if cls == "TZD" and s in ("PM", "bedtime"):
        score = max(score, 0.7)
    # DPP4i evening / split
    if cls == "DPP4i" and s in ("PM", "twice_daily_split", "morning_vs_evening"):
        score = max(score, 0.75)
    # rapid insulin pre_meal
    if cls == "RapidInsulin" and s in ("pre_meal_30", "with_meal"):
        score = max(score, 0.85)
    # bromocriptine QR morning (label-aligned)
    if cls == "DopamineAgonist" and s == "AM":
        score = max(score, 0.8)

    # outcome × cls additional sanity
    if cls == "Statin" and o in ("HbA1c", "TIR", "MAGE", "hypo_rate"):
        score = min(score, 0.3)
    if cls in ("ACEi", "ARB", "CCB", "Thiazide", "MRA") and o in ("HbA1c", "TIR", "TBR", "MAGE", "FPG", "PPG"):
        score = min(score, 0.4)

    return score


def rank_unexplored(cells, data, top_n=10, diversify=True):
    """novelty × plausibility 결합 score로 ranking. diversify=True이면 약물·시각 split 다양성 페널티 적용."""
    for c in cells:
        c["plausibility"] = round(plausibility_score(c, data), 3)
        c["combined_score"] = round(c["novelty"] * c["plausibility"], 3)
    cells_sorted = sorted(
        cells,
        key=lambda x: (x["combined_score"], x["plausibility"]),
        reverse=True,
    )
    if not diversify:
        return cells_sorted[:top_n]

    # 다양성 후처리: 같은 drug 또는 같은 (drug,split)이 top에 도배되지 않도록 round-robin 선택
    selected = []
    drug_count = {}
    drug_split_count = {}
    pool = list(cells_sorted)
    # 1차 패스: drug별 최대 1, drug+split별 최대 1
    for c in pool:
        if len(selected) >= top_n:
            break
        ds_key = (c["drug"], c["time_split"])
        if drug_count.get(c["drug"], 0) >= 1:
            continue
        if drug_split_count.get(ds_key, 0) >= 1:
            continue
        if c["combined_score"] <= 0:
            continue
        selected.append(c)
        drug_count[c["drug"]] = drug_count.get(c["drug"], 0) + 1
        drug_split_count[ds_key] = drug_split_count.get(ds_key, 0) + 1
    # 2차 패스: drug별 최대 2까지 허용
    if len(selected) < top_n:
        for c in pool:
            if len(selected) >= top_n:
                break
            if c in selected:
                continue
            if drug_count.get(c["drug"], 0) >= 2:
                continue
            if c["combined_score"] <= 0:
                continue
            selected.append(c)
            drug_count[c["drug"]] = drug_count.get(c["drug"], 0) + 1
    # 3차: 남은 자리 채우기
    if len(selected) < top_n:
        for c in pool:
            if len(selected) >= top_n:
                break
            if c in selected:
                continue
            selected.append(c)
    return selected[:top_n]


# ---------------------------------------------------------------------------
# 3. Korean cohort suitability + 진료환경 feasibility cross-link
# ---------------------------------------------------------------------------
def korean_cohort_match(cell, data):
    """cell에 적합한 한국 cohort + feasibility 점수."""
    relevant = [c for c in data["cohorts"] if c["chronotherapy_relevant"]]
    matches = []
    for c in relevant:
        # outcome compatibility heuristic
        score = c["feasibility_score"]
        if cell["outcome"] in ("nocturnal_hypo", "hypo_rate", "DKA") and "claims" in c["data_types"]:
            score = min(1.0, score + 0.05)
        if cell["outcome"] in ("TIR", "TBR", "TAR", "MAGE") and "EMR" not in c["data_types"]:
            score = max(0.0, score - 0.2)
        if cell["time_split"] == "shift_work_adjusted" and "lifestyle" in c["data_types"]:
            score = min(1.0, score + 0.1)
        matches.append({
            "cohort_id": c["id"],
            "cohort_name": c["name"],
            "n": c["n"],
            "feasibility": round(score, 3),
            "notes": c["notes"],
        })
    matches.sort(key=lambda x: x["feasibility"], reverse=True)
    return matches[:3]


def practice_feasibility(cell, data):
    """한국 진료환경 (보험급여·adherence·sleep schedule) 점수."""
    drug = next((d for d in data["drugs"] if d["name"] == cell["drug"]), None)
    cls = drug["class"] if drug else "Unknown"
    s = cell["time_split"]

    insurance = 0.9  # mock 기본 가정
    if cls in ("GIP/GLP1RA",):
        insurance = 0.7  # 신규 약물 일부 비급여
    if cls == "Amylin":
        insurance = 0.4  # 국내 미시판
    if cls == "DopamineAgonist" and drug and drug["name"] == "Bromocriptine QR":
        insurance = 0.5  # 국내 chronotherapy 적응증 미허가

    adherence = 0.85
    if s in ("twice_daily_split", "shift_work_adjusted"):
        adherence = 0.65
    if s == "once_weekly":
        adherence = 0.95
    if s == "bedtime":
        adherence = 0.75

    sleep_align = 0.8
    if s in ("bedtime", "night_only"):
        sleep_align = 0.7
    if s == "shift_work_adjusted":
        sleep_align = 0.5

    overall = round((insurance + adherence + sleep_align) / 3.0, 3)
    return {
        "insurance_coverage": insurance,
        "adherence_estimate": adherence,
        "sleep_schedule_align": sleep_align,
        "overall": overall,
    }


# ---------------------------------------------------------------------------
# 4. FDA/EMA/MFDS 라벨 cross-link + 가이드라인 cross-link
# ---------------------------------------------------------------------------
def label_lookup(drug_name, data):
    for lab in data["labels"]:
        if lab["drug"].lower() == drug_name.lower():
            return lab
    return {"drug": drug_name, "fda": "정보없음", "ema": "정보없음", "mfds": "정보없음"}


def guideline_summary(data):
    return [
        {"name": g["kor"], "year": g["year"], "remarks": g["chronotherapy_remarks"]}
        for g in data["guidelines"]
    ]


# ---------------------------------------------------------------------------
# 5. 한국어 hypothesis card + protocol skeleton + grant proposal
# ---------------------------------------------------------------------------
def hypothesis_card(cell, data):
    drug = next(d for d in data["drugs"] if d["name"] == cell["drug"])
    split_def = next((s for s in data["time_splits"] if s["id"] == cell["time_split"]),
                     {"label": cell["time_split"], "rationale": ""})
    outcome_def = next((o for o in data["outcomes"] if o["id"] == cell["outcome"]),
                       {"label": cell["outcome"], "unit": ""})
    label = label_lookup(drug["name"], data)
    cohorts = korean_cohort_match(cell, data)
    feas = practice_feasibility(cell, data)

    lines = []
    lines.append("=" * 70)
    lines.append(f"[가설 카드] {drug['kor']} × {split_def['label']} × {outcome_def['label']}")
    lines.append("=" * 70)
    lines.append(f"약물 (영문): {drug['name']} ({drug['class']})")
    lines.append(f"  - RxNorm: {drug['rxnorm']} | MeSH: {drug['mesh']} | MFDS: {drug['mfds_code']}")
    lines.append(f"시각 split: {split_def['label']}")
    lines.append(f"  근거: {split_def['rationale']}")
    lines.append(f"Outcome: {outcome_def['label']} ({outcome_def.get('unit', '')})")
    lines.append("")
    lines.append(f"[Novelty] published={cell['published']} expected≈{cell['expected']} "
                 f"novelty={cell['novelty']} plausibility={cell['plausibility']} "
                 f"combined={cell['combined_score']}")
    lines.append("")
    lines.append("[가설 진술]")
    lines.append(f"  {drug['kor']}을(를) {split_def['label']}에 투여하면, "
                 f"표준 시각 투여 대비 {outcome_def['label']}이(가) "
                 f"임상적으로 의미있는 차이를 보일 것이다.")
    lines.append("")
    lines.append("[FDA / EMA / MFDS 라벨 cross-link]")
    lines.append(f"  - FDA  : {label['fda']}")
    lines.append(f"  - EMA  : {label['ema']}")
    lines.append(f"  - MFDS : {label['mfds']}")
    lines.append("")
    lines.append("[한국 cohort 적합성 top 3]")
    for c in cohorts:
        lines.append(f"  - {c['cohort_name']} (n={c['n']:,}) feasibility={c['feasibility']}")
        lines.append(f"      {c['notes']}")
    lines.append("")
    lines.append("[한국 진료환경 feasibility]")
    lines.append(f"  - 보험급여: {feas['insurance_coverage']}")
    lines.append(f"  - 환자 adherence 예상: {feas['adherence_estimate']}")
    lines.append(f"  - 수면스케줄 정합성: {feas['sleep_schedule_align']}")
    lines.append(f"  - 종합: {feas['overall']}")
    lines.append("")
    lines.append(DISCLAIMER)
    return "\n".join(lines)


def protocol_skeleton(cell, data):
    drug = next(d for d in data["drugs"] if d["name"] == cell["drug"])
    split_def = next((s for s in data["time_splits"] if s["id"] == cell["time_split"]),
                     {"label": cell["time_split"]})
    outcome_def = next((o for o in data["outcomes"] if o["id"] == cell["outcome"]),
                       {"label": cell["outcome"], "unit": ""})

    lines = []
    lines.append("=" * 70)
    lines.append("[STROBE-RCT / SPIRIT 호환 Protocol Skeleton]")
    lines.append("=" * 70)
    lines.append(f"제목: {drug['kor']} {split_def['label']} chronotherapy의 "
                 f"{outcome_def['label']} 효과: 무작위 배정 교차/평행 임상시험")
    lines.append("")
    lines.append("[1. Background & Rationale (SPIRIT 6a)]")
    lines.append(f"  - 약물: {drug['name']} ({drug['class']})")
    lines.append(f"  - 미충족 의료 수요: {split_def['label']} 투여 시 "
                 f"{outcome_def['label']} 변화에 대한 한국인 RCT 근거 부족")
    lines.append("")
    lines.append("[2. Objectives & Hypotheses (SPIRIT 7)]")
    lines.append(f"  - Primary: 12주 시점 {outcome_def['label']}의 군간 차이")
    lines.append(f"  - Secondary: 안전성 (저혈당·DKA·hyperkalemia), QoL, adherence")
    lines.append("")
    lines.append("[3. Study Design (SPIRIT 8)]")
    lines.append("  - 다기관 무작위 배정, 평행/교차 (washout 2주), open-label or PROBE")
    lines.append("  - Arm A: 표준 시각 (라벨 권고)  vs  Arm B: 시험 시각 (chronotherapy)")
    lines.append("")
    lines.append("[4. Participants (SPIRIT 10)]")
    lines.append("  - Inclusion: 만 19-75세, T2DM, HbA1c 7.0-10.0%, 안정적 약물 4주↑")
    lines.append("  - Exclusion: T1DM, eGFR<30, 임신, 교대근무 (별도 sub-study)")
    lines.append("")
    lines.append("[5. Interventions (SPIRIT 11)]")
    lines.append(f"  - {drug['kor']} 시각 split 적용. 동일 용량 유지.")
    lines.append("")
    lines.append("[6. Outcomes (SPIRIT 12)]")
    lines.append(f"  - Primary: {outcome_def['label']} ({outcome_def.get('unit', '')})")
    lines.append("  - 측정 시점: 0주, 4주, 12주, 24주")
    lines.append("")
    lines.append("[7. Sample Size (SPIRIT 14)]")
    lines.append("  - 양측 α=0.05, power 80%, 효과크기 d=0.4 가정 → 군당 100명, 총 200명")
    lines.append("")
    lines.append("[8. Randomisation & Blinding (SPIRIT 16)]")
    lines.append("  - 1:1 블록 무작위 (블록 4), 중앙 무작위, outcome assessor blinding")
    lines.append("")
    lines.append("[9. Data Collection & Management (SPIRIT 18)]")
    lines.append("  - eCRF, CGM raw data, NHIS-HEALS·K-CURE cross-link 동의 옵션")
    lines.append("")
    lines.append("[10. Statistical Methods (SPIRIT 20)]")
    lines.append("  - ITT 분석. ANCOVA (baseline 보정). subgroup: chronotype, age, eGFR.")
    lines.append("")
    lines.append("[11. Ethics & Dissemination (SPIRIT 24-31)]")
    lines.append("  - IRB 승인 필수. CRIS·ClinicalTrials.gov 등록. STROBE 보고지침 준수.")
    lines.append("")
    lines.append(DISCLAIMER)
    return "\n".join(lines)


def proposal_abstract(cell, data):
    drug = next(d for d in data["drugs"] if d["name"] == cell["drug"])
    split_def = next((s for s in data["time_splits"] if s["id"] == cell["time_split"]),
                     {"label": cell["time_split"]})
    outcome_def = next((o for o in data["outcomes"] if o["id"] == cell["outcome"]),
                       {"label": cell["outcome"], "unit": ""})

    lines = []
    lines.append("=" * 70)
    lines.append("[KHIDI / NRF / NIH / 제약 IIT 호환 Proposal Abstract]")
    lines.append("=" * 70)
    lines.append(f"과제명: {drug['kor']} {split_def['label']} chronotherapy의 "
                 f"한국인 제2형 당뇨병에서 {outcome_def['label']} 효과 검증")
    lines.append("")
    lines.append("[연구 배경 및 필요성]")
    lines.append(
        f"  {drug['kor']}({drug['name']}, {drug['class']})은 표준 시각 처방 라벨이 "
        f"존재하지만, {split_def['label']} 시각의 chronotherapy 효과는 한국인 "
        "데이터에서 거의 검증되지 않았다. 본 연구는 시간생물학적 기전과 한국 "
        "환자의 식사·수면 패턴을 반영해 새로운 dosing window를 탐색한다."
    )
    lines.append("")
    lines.append("[연구 목표]")
    lines.append(f"  1) 표준 시각 vs {split_def['label']} 군 간 {outcome_def['label']} 차이 검증")
    lines.append("  2) 한국인 chronotype·교대근무 sub-population 효과 분석")
    lines.append("  3) NHIS-HEALS / K-CURE 기반 외부 generalizability 평가")
    lines.append("")
    lines.append("[연구 방법 요약]")
    lines.append("  다기관 RCT (n=200) + 후향 NHIS-HEALS validation cohort (n≥10,000)")
    lines.append("")
    lines.append("[기대 효과 및 활용]")
    lines.append("  - KDA·ADA·EASD 가이드라인 chronotherapy section 근거 제공")
    lines.append("  - MFDS 의약품 라벨 dose timing 부록 update 근거")
    lines.append("  - 한국인 individualized chronotherapy 알고리즘 1차 prototype")
    lines.append("")
    lines.append("[지원처 적합성]")
    lines.append("  - KHIDI / NRF: 한국인 cohort 활용·국내 진료환경 적합성")
    lines.append("  - NIH (R03/R21): novelty score 기반 unexplored hypothesis 강조")
    lines.append("  - Novo Nordisk / Eli Lilly / Boehringer Ingelheim IIT: "
                 "상품 chronotherapy 신적응 탐색")
    lines.append("")
    lines.append(DISCLAIMER)
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# Drug-specific gap view
# ---------------------------------------------------------------------------
def drug_gap_view(drug_query, cells, data):
    drug_query_l = drug_query.lower()
    matched = [c for c in cells
               if c["drug"].lower() == drug_query_l
               or any(d["kor"] == drug_query and d["name"].lower() == c["drug"].lower()
                      for d in data["drugs"])]
    # also allow Korean name match
    if not matched:
        for d in data["drugs"]:
            if d["kor"] == drug_query or d["name"].lower() == drug_query_l:
                matched = [c for c in cells if c["drug"] == d["name"]]
                break
    if not matched:
        return f"[경고] '{drug_query}'에 해당하는 약물을 찾지 못했습니다."

    drug_name = matched[0]["drug"]
    drug = next(d for d in data["drugs"] if d["name"] == drug_name)
    matched.sort(key=lambda x: (x.get("combined_score", x["novelty"])), reverse=True)
    top = matched[:10]

    lines = []
    lines.append("=" * 70)
    lines.append(f"[{drug['kor']} ({drug['name']}) chronotherapy gap top 10]")
    lines.append("=" * 70)
    lines.append(f"{'시각 split':<25} {'outcome':<22} {'pub':<5} {'nov':<5} {'plaus':<6} {'comb':<5}")
    lines.append("-" * 70)
    for c in top:
        lines.append(f"{c['time_split']:<25} {c['outcome']:<22} "
                     f"{c['published']:<5} {c['novelty']:<5} "
                     f"{c.get('plausibility', '-'):<6} "
                     f"{c.get('combined_score', '-'):<5}")
    lines.append("")
    lab = label_lookup(drug["name"], data)
    lines.append(f"[라벨 권고] FDA: {lab['fda']} | EMA: {lab['ema']} | MFDS: {lab['mfds']}")
    lines.append("")
    lines.append(DISCLAIMER)
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# Top unexplored cells output
# ---------------------------------------------------------------------------
def render_top_table(top_cells, data):
    drug_kor = {d["name"]: d["kor"] for d in data["drugs"]}
    split_kor = {s["id"]: s["label"] for s in data["time_splits"]}
    outcome_kor = {o["id"]: o["label"] for o in data["outcomes"]}

    lines = []
    lines.append("=" * 80)
    lines.append("[Top 미탐색 chronotherapy cell ranking]")
    lines.append("=" * 80)
    header = f"{'#':<3} {'약물':<18} {'시각 split':<18} {'outcome':<18} {'pub':<4} {'nov':<5} {'plaus':<6} {'comb':<5}"
    lines.append(header)
    lines.append("-" * 80)
    for i, c in enumerate(top_cells, 1):
        lines.append(
            f"{i:<3} "
            f"{drug_kor.get(c['drug'], c['drug'])[:16]:<18} "
            f"{split_kor.get(c['time_split'], c['time_split'])[:16]:<18} "
            f"{outcome_kor.get(c['outcome'], c['outcome'])[:16]:<18} "
            f"{c['published']:<4} "
            f"{c['novelty']:<5} "
            f"{c['plausibility']:<6} "
            f"{c['combined_score']:<5}"
        )
    lines.append("")
    lines.append(DISCLAIMER)
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# Stats summary
# ---------------------------------------------------------------------------
def stats_summary(data, normalized, cells):
    lines = []
    lines.append("=" * 70)
    lines.append("[DMChronoTx-Hypo 데이터 요약]")
    lines.append("=" * 70)
    lines.append(f"약물 수            : {len(data['drugs'])}")
    lines.append(f"시각 split 수      : {len(data['time_splits'])}")
    lines.append(f"outcome 수         : {len(data['outcomes'])}")
    lines.append(f"mock corpus abstr  : {len(data['corpus'])}")
    lines.append(f"정규화 abstract    : {len(normalized)}")
    lines.append(f"3D ontology cells  : {len(cells):,}")
    cov = sum(1 for c in cells if c["published"] > 0)
    lines.append(f"  published cells  : {cov}")
    lines.append(f"  미탐색 cells     : {len(cells) - cov}")
    lines.append(f"한국 cohort 수     : {len(data['cohorts'])}")
    lines.append(f"라벨 cross-link    : {len(data['labels'])}")
    lines.append(f"가이드라인 cross-link: {len(data['guidelines'])}")
    lines.append("")
    lines.append("[가이드라인 chronotherapy 권고 요약]")
    for g in guideline_summary(data):
        lines.append(f"  - {g['name']} ({g['year']})")
        for r in g["remarks"]:
            lines.append(f"      · {r}")
    lines.append("")
    lines.append(DISCLAIMER)
    return "\n".join(lines)


# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
def build_parser():
    p = argparse.ArgumentParser(
        prog="DMChronoTx-Hypo",
        description=(
            "당뇨병 chronotherapy hypothesis generator (mock).\n"
            "약물 × 시각 split × outcome 3D ontology에서 미탐색 cell을 ranking하고, "
            "한국어 hypothesis card / STROBE-RCT/SPIRIT protocol skeleton / "
            "IIT proposal abstract를 생성합니다.\n\n"
            "주의: 본 도구는 합성 mock 데이터 기반 연구·참고용 도구이며, "
            "임상 진료에 직접 사용해서는 안 됩니다."
        ),
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    p.add_argument("--top", type=int, default=0,
                   help="상위 N개 미탐색 cell 출력")
    p.add_argument("--drug", type=str, default=None,
                   help="특정 약물의 chronotherapy gap top 10 출력 (영문/한글 모두 가능)")
    p.add_argument("--card", action="store_true",
                   help="top 1 cell 한국어 hypothesis card 출력 (--card-rank 으로 순위 변경)")
    p.add_argument("--card-rank", type=int, default=1,
                   help="hypothesis card 출력할 순위 (기본 1)")
    p.add_argument("--protocol", action="store_true",
                   help="top 1 cell STROBE-RCT/SPIRIT protocol skeleton 출력")
    p.add_argument("--proposal", action="store_true",
                   help="top 1 cell 한국어 IIT/grant proposal abstract 출력")
    p.add_argument("--stats", action="store_true",
                   help="데이터 요약 통계 출력")
    p.add_argument("--seed", type=int, default=20260508,
                   help="재현성용 random seed")
    return p


def main(argv=None):
    parser = build_parser()
    args = parser.parse_args(argv)

    random.seed(args.seed)

    try:
        data = load_all()
    except Exception as e:
        print(f"[오류] 데이터 로드 실패: {e}", file=sys.stderr)
        return 2

    normalized = normalize_corpus(data)
    cells = build_3d_ontology(data, normalized)
    top_cells_full = rank_unexplored(cells, data, top_n=max(50, args.top + 5))

    any_action = False

    if args.stats:
        any_action = True
        print(stats_summary(data, normalized, cells))
        print()

    if args.top and args.top > 0:
        any_action = True
        top_cells = top_cells_full[: args.top]
        print(render_top_table(top_cells, data))
        print()

    if args.drug:
        any_action = True
        all_scored = rank_unexplored(cells, data, top_n=len(cells), diversify=False)
        print(drug_gap_view(args.drug, all_scored, data))
        print()

    if args.card:
        any_action = True
        idx = max(0, args.card_rank - 1)
        if idx >= len(top_cells_full):
            idx = 0
        print(hypothesis_card(top_cells_full[idx], data))
        print()

    if args.protocol:
        any_action = True
        idx = max(0, args.card_rank - 1)
        if idx >= len(top_cells_full):
            idx = 0
        print(protocol_skeleton(top_cells_full[idx], data))
        print()

    if args.proposal:
        any_action = True
        idx = max(0, args.card_rank - 1)
        if idx >= len(top_cells_full):
            idx = 0
        print(proposal_abstract(top_cells_full[idx], data))
        print()

    if not any_action:
        # 기본 동작: stats + top 5
        print(stats_summary(data, normalized, cells))
        print()
        print(render_top_table(top_cells_full[:5], data))
        print()
        print("[힌트] python3 main.py --help 로 모든 옵션을 확인하세요.")

    return 0


if __name__ == "__main__":
    sys.exit(main())
