#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
ObesityGrantIITRadar (오베시티그랜트아이아이티레이더)
- 비만대사질환 funding/grant/IIT call multi-source 자동 알림 + 적합도 매칭 MVP
- mock data 기반 데모. 외부 네트워크 호출 없음.

서브커맨드:
  ingest      Multi-source grant data 적재 (시뮬레이션)
  match       연구자 적합도 매칭
  deadlines   마감일 임박 grant 분류
  iit-watch   기업 IIT call + ClinicalTrials.gov sub-investigator 추적
  digest      한국어 weekly digest + Telegram/Slack alert 시뮬레이션
"""

import argparse
import json
import os
import sys
from datetime import date, datetime
from pathlib import Path

# ============================================================
# 경로 설정
# ============================================================
ROOT = Path(__file__).resolve().parent
DATA_DIR = ROOT / "data"

GRANTS_FILE = DATA_DIR / "grants.json"
RESEARCHERS_FILE = DATA_DIR / "researchers.json"
CT_GOV_FILE = DATA_DIR / "clinicaltrials_gov_subinvestigator.json"
COMPANY_IIT_FILE = DATA_DIR / "company_iit_calls.json"
MENTORS_FILE = DATA_DIR / "mentors.json"

TODAY = date(2026, 5, 7)  # 고정된 기준일 (오늘 = 2026-05-07)

DISCLAIMER = (
    "[디스클레이머] 본 도구는 mock data 기반 데모 MVP입니다. "
    "참고용이며 실제 grant 신청 전 공고 원문 및 연구자 본인 검토 필수."
)


# ============================================================
# 데이터 로드
# ============================================================
def load_json(path: Path):
    if not path.exists():
        print(f"[ERROR] 데이터 파일 없음: {path}", file=sys.stderr)
        sys.exit(1)
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def load_all_grants():
    return load_json(GRANTS_FILE)


def load_researchers():
    return load_json(RESEARCHERS_FILE)


def load_ct_gov():
    return load_json(CT_GOV_FILE)


def load_company_iit():
    return load_json(COMPANY_IIT_FILE)


def load_mentors():
    return load_json(MENTORS_FILE)


# ============================================================
# 유틸
# ============================================================
def parse_date(s: str) -> date:
    return datetime.strptime(s, "%Y-%m-%d").date()


def days_until(d_str: str) -> int:
    return (parse_date(d_str) - TODAY).days


def stage_label(days: int) -> str:
    if days < 0:
        return "지난마감"
    elif days <= 1:
        return "D-1"
    elif days <= 3:
        return "D-3"
    elif days <= 7:
        return "D-7"
    elif days <= 14:
        return "D-14"
    elif days <= 30:
        return "D-30"
    else:
        return f"D-{days}"


def jaccard(a, b) -> float:
    sa, sb = set(x.lower() for x in a), set(y.lower() for y in b)
    if not sa and not sb:
        return 0.0
    union = sa | sb
    if not union:
        return 0.0
    return len(sa & sb) / len(union)


def tokenize(text: str):
    if not text:
        return []
    out = []
    cur = []
    for ch in text:
        if ch.isalnum() or ord(ch) > 127:
            cur.append(ch.lower())
        else:
            if cur:
                out.append("".join(cur))
                cur = []
    if cur:
        out.append("".join(cur))
    return [t for t in out if len(t) >= 2]


# ============================================================
# 적합도 매칭
# ============================================================
def expertise_match(grant, researcher) -> float:
    return jaccard(grant.get("domain_keywords", []), researcher.get("sub_domains", []))


def publication_relevance(grant, researcher) -> float:
    grant_tokens = set(tokenize(grant.get("title_en", "")) + tokenize(grant.get("title_kr", "")))
    grant_tokens |= set(t.lower() for t in grant.get("domain_keywords", []))
    if not grant_tokens:
        return 0.0
    pub_tokens = set()
    for p in researcher.get("publications", []):
        pub_tokens |= set(tokenize(p.get("title", "")))
    if not pub_tokens:
        return 0.0
    inter = grant_tokens & pub_tokens
    return min(1.0, len(inter) / max(1, len(grant_tokens)))


def site_capability_match(grant, researcher) -> float:
    req = grant.get("site_capability_required", [])
    if not req:
        return 1.0
    cap = researcher.get("site_capability", {})
    have = sum(1 for r in req if cap.get(r, False))
    return have / len(req)


def drug_experience_match(grant, researcher) -> float:
    req = grant.get("drug_experience_required", [])
    if not req:
        return 1.0
    exp = set(d.lower() for d in researcher.get("drug_experience", []))
    have = sum(1 for r in req if r.lower() in exp)
    return have / len(req)


def fitness_score(grant, researcher) -> dict:
    em = expertise_match(grant, researcher)
    pr = publication_relevance(grant, researcher)
    sc = site_capability_match(grant, researcher)
    de = drug_experience_match(grant, researcher)
    kr = float(researcher.get("korean_site_recruiting_feasibility", 0.5))
    score = 0.40 * em + 0.20 * pr + 0.15 * sc + 0.15 * de + 0.10 * kr
    return {
        "score_0_100": round(score * 100, 1),
        "expertise_match": round(em, 3),
        "publication_relevance": round(pr, 3),
        "site_capability_match": round(sc, 3),
        "drug_experience_match": round(de, 3),
        "korean_site_recruiting_feasibility": round(kr, 3),
    }


# ============================================================
# 멘토 매칭
# ============================================================
def find_mentor(grant, mentors):
    grant_kw = set(k.lower() for k in grant.get("domain_keywords", []))
    best = None
    best_overlap = 0
    for m in mentors:
        exp = set(e.lower() for e in m.get("expertise", []))
        overlap = len(grant_kw & exp)
        if overlap > best_overlap:
            best_overlap = overlap
            best = m
    return best


# ============================================================
# 서브커맨드: ingest
# ============================================================
def cmd_ingest(args):
    grants = load_all_grants()
    src = args.source.lower() if args.source else "all"
    if src == "all":
        rows = grants
    else:
        # source 키워드 매칭
        keymap = {
            "khidi": "KHIDI",
            "nrf": "NRF",
            "nih": "NIH",
            "eu-horizon": "EU-Horizon",
            "amed": "AMED",
            "company-iit": "IIT",
            "kda": "KDA",
            "ksso": "KSSO",
            "kasl": "KASL",
            "mfds": "MFDS",
            "mohw": "복지부",
            "mrc": "MRC",
            "cihr": "CIHR",
            "nhmrc": "NHMRC",
            "bmbf": "BMBF",
        }
        token = keymap.get(src, src.upper())
        rows = [g for g in grants if token.lower() in g.get("source", "").lower()]

    print(f"[INGEST] source={src} 적재 건수={len(rows)} (전체 {len(grants)})")
    print("-" * 70)
    by_src = {}
    for g in rows:
        by_src.setdefault(g["source"], 0)
        by_src[g["source"]] += 1
    for s, n in sorted(by_src.items(), key=lambda x: -x[1]):
        print(f"  {s:25s} {n:4d}건")
    print("-" * 70)
    print(f"디덥/정규화: ID 유일성 OK ({len(set(g['grant_id'] for g in rows))} unique)")
    # 마감일/예산 추출 sanity
    no_deadline = [g for g in rows if not g.get("deadline")]
    no_budget = [g for g in rows if not g.get("budget")]
    print(f"마감일 결손={len(no_deadline)} / 예산 결손={len(no_budget)}")
    print(DISCLAIMER)


# ============================================================
# 서브커맨드: match
# ============================================================
def cmd_match(args):
    researchers = load_researchers()
    grants = load_all_grants()
    mentors = load_mentors()

    rid = args.researcher
    r = next((x for x in researchers if x["researcher_id"] == rid), None)
    if not r:
        print(f"[ERROR] researcher_id={rid} 없음. 사용 가능: " +
              ", ".join(x["researcher_id"] for x in researchers), file=sys.stderr)
        sys.exit(2)

    top_n = args.top or 10
    scored = []
    for g in grants:
        if days_until(g["deadline"]) < 0:
            continue
        s = fitness_score(g, r)
        scored.append((s["score_0_100"], s, g))
    scored.sort(key=lambda x: -x[0])
    top = scored[:top_n]

    print(f"=== 적합도 매칭: {r['name_kr']} ({r['name_en']}, {r['affiliation']}) ===")
    print(f"career_stage={r['career_stage']} / sub_domains={','.join(r['sub_domains'])}")
    print(f"site_capability={[k for k,v in r['site_capability'].items() if v]}")
    print(f"drug_experience={r['drug_experience']}")
    print("-" * 70)
    for rank, (sc, det, g) in enumerate(top, 1):
        d = days_until(g["deadline"])
        mentor = find_mentor(g, mentors)
        print(f"{rank:2d}. [{sc:5.1f}/100] {g['grant_id']} ({g['source']}) {stage_label(d)}")
        print(f"    제목: {g['title_kr']}")
        print(f"    예산: {g['budget']}  마감: {g['deadline']} ({d}일 남음)  funding_rate: {g['funding_rate_history']*100:.0f}%")
        print(f"    분해: expertise={det['expertise_match']} pub={det['publication_relevance']} "
              f"site={det['site_capability_match']} drug={det['drug_experience_match']} "
              f"recruit={det['korean_site_recruiting_feasibility']}")
        if mentor:
            print(f"    멘토 추천: {mentor['name_kr']} ({mentor['affiliation']})")
        print()
    print(DISCLAIMER)


# ============================================================
# 서브커맨드: deadlines
# ============================================================
def cmd_deadlines(args):
    grants = load_all_grants()
    days_window = args.days or 30

    grouped = {"D-1": [], "D-3": [], "D-7": [], "D-14": [], "D-30": []}
    for g in grants:
        d = days_until(g["deadline"])
        if d < 0 or d > days_window:
            continue
        if d <= 1:
            grouped["D-1"].append((d, g))
        elif d <= 3:
            grouped["D-3"].append((d, g))
        elif d <= 7:
            grouped["D-7"].append((d, g))
        elif d <= 14:
            grouped["D-14"].append((d, g))
        else:
            grouped["D-30"].append((d, g))

    print(f"=== 마감일 임박 Grant (기준일 {TODAY}, 윈도우 {days_window}일) ===")
    for stage in ["D-1", "D-3", "D-7", "D-14", "D-30"]:
        items = grouped[stage]
        items.sort(key=lambda x: x[0])
        print(f"\n[{stage}] {len(items)}건")
        for d, g in items:
            print(f"  ({d:2d}d) {g['grant_id']:25s} {g['source']:20s} ${g['budget'][:18]:18s} {g['title_kr']}")
    print()
    print(DISCLAIMER)


# ============================================================
# 서브커맨드: iit-watch
# ============================================================
def cmd_iit_watch(args):
    iit = load_company_iit()
    ct = load_ct_gov()
    country_filter = (args.country or "").lower()

    print("=== 기업 IIT call 알림 ===")
    iit_active = [c for c in iit if days_until(c["deadline"]) >= 0]
    iit_active.sort(key=lambda c: days_until(c["deadline"]))
    for c in iit_active:
        d = days_until(c["deadline"])
        print(f"  [{stage_label(d):5s}] {c['call_id']:25s} {c['sponsor_company']:15s} "
              f"{c['drug']:30s} 마감: {c['deadline']} ({d}d)")
        print(f"          type: {c['call_type']}  자격: {c['eligibility']}")

    print()
    print("=== ClinicalTrials.gov sub-investigator 추적 ===")
    rows = ct
    if country_filter:
        rows = [r for r in rows if country_filter in r.get("country", "").lower()]
    rows = [r for r in rows if r.get("sub_investigator_open")]
    print(f"필터 country={country_filter or '(all)'} / sub-inv open={len(rows)}건")
    for r in rows:
        print(f"  {r['nct_id']:14s} [{r['phase']}] {r['sponsor']:20s} {r['drug']:30s} "
              f"{r['recruitment_status']:25s} {r['title']}")

    print()
    print(DISCLAIMER)


# ============================================================
# 서브커맨드: digest
# ============================================================
def category_of(source: str) -> str:
    s = source.lower()
    if "khidi" in s or "nrf" in s or "복지부" in s or "mfds" in s:
        return "국내 정부 (KHIDI/NRF/복지부/MFDS)"
    if "kda" in s or "ksso" in s or "kasl" in s:
        return "국내 학회 (KDA/KSSO/KASL)"
    if "nih" in s:
        return "NIH (미국)"
    if "eu" in s or "horizon" in s:
        return "EU Horizon"
    if "amed" in s:
        return "AMED (일본)"
    if "iit" in s:
        return "기업 IIT call"
    if "cihr" in s or "nhmrc" in s or "mrc" in s or "bmbf" in s:
        return "기타 해외 (CIHR/NHMRC/MRC/BMBF)"
    return "기타"


def cmd_digest(args):
    researchers = load_researchers()
    grants = load_all_grants()
    mentors = load_mentors()

    target_ids = [args.researcher] if args.researcher else [r["researcher_id"] for r in researchers]

    out_lines = []
    out_lines.append(f"# ObesityGrantIITRadar — 한국어 Weekly Digest")
    out_lines.append(f"")
    out_lines.append(f"- 발행일: {TODAY}")
    out_lines.append(f"- 대상 연구자: {len(target_ids)}명 / 활성 grant: "
                     f"{len([g for g in grants if days_until(g['deadline'])>=0])}건")
    out_lines.append(f"")

    for rid in target_ids:
        r = next((x for x in researchers if x["researcher_id"] == rid), None)
        if not r:
            continue
        out_lines.append("---")
        out_lines.append(f"## {r['name_kr']} ({r['affiliation']}, {r['career_stage']})")
        out_lines.append("")
        scored = []
        for g in grants:
            d = days_until(g["deadline"])
            if d < 0:
                continue
            s = fitness_score(g, r)
            # 마감일 가중: D-7 이내 부스트
            boost = 0.0
            if d <= 7:
                boost = 5.0
            elif d <= 14:
                boost = 2.0
            scored.append((s["score_0_100"] + boost, s["score_0_100"], d, g))
        scored.sort(key=lambda x: -x[0])
        top = scored[:10]

        # 카테고리별 그룹
        by_cat = {}
        for total, raw, d, g in top:
            cat = category_of(g["source"])
            by_cat.setdefault(cat, []).append((total, raw, d, g))

        for cat in sorted(by_cat.keys()):
            out_lines.append(f"### {cat}")
            out_lines.append("")
            for total, raw, d, g in by_cat[cat]:
                mentor = find_mentor(g, mentors)
                mentor_str = f" / 멘토: {mentor['name_kr']}" if mentor else ""
                out_lines.append(
                    f"- **[{stage_label(d)}]** `{g['grant_id']}` — {g['title_kr']}  "
                    f"(적합도 {raw:.1f}/100, 마감 {g['deadline']}, 예산 {g['budget']}, "
                    f"funding_rate {g['funding_rate_history']*100:.0f}%{mentor_str})"
                )
            out_lines.append("")

        # alert 시뮬레이션
        critical = [t for t in top if t[2] <= 7]
        if critical:
            print(f"[TELEGRAM SIMULATED] researcher: {r['name_kr']} / "
                  f"{len(critical)}건 마감 임박 (D-7 이내) — 최상위: "
                  f"{critical[0][3]['grant_id']} ({critical[0][3]['title_kr']})")
            print(f"[SLACK SIMULATED] #grant-radar > {r['name_kr']} 적합 grant {len(top)}건, "
                  f"D-7 이내 {len(critical)}건. 자세한 내용은 digest.md 참조.")

    out_lines.append("---")
    out_lines.append(f"_{DISCLAIMER}_")

    digest_text = "\n".join(out_lines)

    if args.korean:
        # stdout
        print(digest_text)
    else:
        print(digest_text)

    # 파일로도 저장
    out_path = ROOT / "digest.md"
    with open(out_path, "w", encoding="utf-8") as f:
        f.write(digest_text)
    print(f"\n[INFO] digest 저장: {out_path}", file=sys.stderr)


# ============================================================
# argparse
# ============================================================
def build_parser():
    p = argparse.ArgumentParser(
        prog="ObesityGrantIITRadar",
        description="비만대사질환 funding/grant/IIT call multi-source 자동 알림 + 적합도 매칭 MVP (mock data 기반)"
    )
    sub = p.add_subparsers(dest="cmd", required=True)

    p1 = sub.add_parser("ingest", help="multi-source grant ETL 시뮬레이션")
    p1.add_argument("--source", default="all",
                    help="khidi|nrf|nih|eu-horizon|amed|company-iit|kda|ksso|kasl|mfds|mohw|mrc|cihr|nhmrc|bmbf|all")
    p1.set_defaults(func=cmd_ingest)

    p2 = sub.add_parser("match", help="연구자 적합도 매칭")
    p2.add_argument("--researcher", required=True, help="researcher_id (예: R001)")
    p2.add_argument("--top", type=int, default=10, help="상위 N개")
    p2.set_defaults(func=cmd_match)

    p3 = sub.add_parser("deadlines", help="마감일 임박 grant 분류 (D-30/D-14/D-7/D-3/D-1)")
    p3.add_argument("--days", type=int, default=30, help="윈도우 일수 (default 30)")
    p3.set_defaults(func=cmd_deadlines)

    p4 = sub.add_parser("iit-watch", help="기업 IIT call + ClinicalTrials.gov sub-investigator 추적")
    p4.add_argument("--country", default="korea", help="필터링할 국가 (예: korea)")
    p4.set_defaults(func=cmd_iit_watch)

    p5 = sub.add_parser("digest", help="한국어 weekly digest + Telegram/Slack alert 시뮬레이션")
    p5.add_argument("--korean", action="store_true", help="한국어 본문 출력")
    p5.add_argument("--researcher", help="특정 researcher_id 만 (없으면 전체)")
    p5.set_defaults(func=cmd_digest)

    return p


def main():
    parser = build_parser()
    args = parser.parse_args()
    args.func(args)


if __name__ == "__main__":
    main()