#!/usr/bin/env python3
"""DMPatentLandscape-Kor CLI entry point.

USPTO·EPO·KIPO·JPO·CNIPA·WIPO 당뇨 관련 특허를 family 단위로 통합하여
landscape view·만료/PTE/SPC 카운트다운·한국 출원인 동향·weekly digest를
한국어로 제공한다.

* lazy import: --help는 pandas/streamlit/plotly 없이도 동작한다.
* 외부 네트워크 호출 0.
* 본 도구는 연구·참고용이며, 실제 IP/FTO 결정은 변리사 자문 필수.
"""

from __future__ import annotations

import argparse
import os
import sys
from typing import Optional


PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
DATA_PATH = os.path.join(PROJECT_ROOT, "data", "patents.json")

KOREAN_APPLICANTS_HINTS = [
    "LG", "엘지", "HK inno.N", "에이치케이이노엔", "종근당", "동아ST", "동아에스티",
    "대웅", "한미", "일동", "녹십자", "삼성바이오에피스", "셀트리온", "SK바이오팜",
    "에스케이바이오팜", "카카오헬스케어", "메디센서", "Hanmi", "Samsung", "Celltrion",
]

GLOBAL_TOP_ASSIGNEE_HINTS = [
    "Novo Nordisk", "Eli Lilly", "Sanofi", "Merck", "Boehringer", "AstraZeneca",
    "Bristol-Myers Squibb", "Dexcom", "Abbott", "Medtronic", "Takeda", "Janssen",
    "Pfizer", "Novo", "Innovent",
]

DISCLAIMER = (
    "[디스클레이머] 본 도구는 연구·참고용이며, 실제 IP/FTO 결정은 변리사 자문 필수. "
    "데이터는 mock/synthetic이며 실제 특허 등록 정보가 아니다."
)


def _lazy_load_patents() -> list:
    """JSON 로드는 표준 라이브러리만 사용 (pandas 미사용)."""
    import json

    if not os.path.exists(DATA_PATH):
        print(f"[ERROR] mock data 파일이 없습니다: {DATA_PATH}", file=sys.stderr)
        sys.exit(2)
    with open(DATA_PATH, "r", encoding="utf-8") as f:
        return json.load(f)


def _is_korean_applicant(assignee: str) -> bool:
    if not assignee:
        return False
    return any(h.lower() in assignee.lower() for h in KOREAN_APPLICANTS_HINTS)


def _is_global_top_assignee(assignee: str) -> bool:
    if not assignee:
        return False
    return any(h.lower() in assignee.lower() for h in GLOBAL_TOP_ASSIGNEE_HINTS)


def _days_to(date_str: Optional[str], today_str: str) -> Optional[int]:
    if not date_str:
        return None
    from datetime import date

    try:
        y, m, d = (int(x) for x in date_str.split("-"))
        today_y, today_m, today_d = (int(x) for x in today_str.split("-"))
        return (date(y, m, d) - date(today_y, today_m, today_d)).days
    except Exception:
        return None


def _effective_expiry(rec: dict) -> Optional[str]:
    """단순화된 effective expiry: standard expiry + PTE/SPC + pediatric."""
    base = rec.get("expiry_date")
    if not base:
        return None
    from datetime import date, timedelta

    try:
        y, m, d = (int(x) for x in base.split("-"))
        dt = date(y, m, d)
    except Exception:
        return base
    pte = int(rec.get("pte_days", 0) or 0)
    spc_months = int(rec.get("spc_extension_months", 0) or 0)
    pediatric = bool(rec.get("pediatric_extension"))
    extra_days = pte + spc_months * 30 + (180 if pediatric else 0)
    return (dt + timedelta(days=extra_days)).isoformat()


def cmd_summary(args: argparse.Namespace) -> int:
    patents = _lazy_load_patents()
    offices = {}
    cats = {}
    korean = 0
    families = set()
    for p in patents:
        offices[p["office"]] = offices.get(p["office"], 0) + 1
        cats[p["category"]] = cats.get(p["category"], 0) + 1
        if _is_korean_applicant(p["assignee"]) or p.get("korean_filing"):
            korean += 1
        families.add(p["family_id"])

    print("=" * 60)
    print("DMPatentLandscape-Kor — Summary")
    print("=" * 60)
    print(f"총 patent records: {len(patents)}")
    print(f"고유 family 수: {len(families)}")
    print(f"한국 관련 (한국 출원인 또는 한국 출원): {korean}")
    print()
    print("[Office별 분포]")
    for off, n in sorted(offices.items(), key=lambda x: -x[1]):
        print(f"  {off:6s} : {n:3d}")
    print()
    print("[Category별 분포]")
    for cat, n in sorted(cats.items(), key=lambda x: -x[1]):
        print(f"  {cat:14s} : {n:3d}")
    print()
    print(DISCLAIMER)
    return 0


def cmd_top(args: argparse.Namespace) -> int:
    patents = _lazy_load_patents()
    n = max(1, int(args.top))
    # assignee별 카운트
    by_assignee: dict = {}
    for p in patents:
        a = p["assignee"]
        by_assignee[a] = by_assignee.get(a, 0) + 1
    ranked = sorted(by_assignee.items(), key=lambda x: -x[1])[:n]
    print("=" * 60)
    print(f"Top {n} Assignees (patent count)")
    print("=" * 60)
    for i, (a, c) in enumerate(ranked, 1):
        flag = " [한국]" if _is_korean_applicant(a) else (
            " [global top]" if _is_global_top_assignee(a) else "")
        print(f"  {i:2d}. {a}  ({c}){flag}")
    print()
    print(DISCLAIMER)
    return 0


def _today_iso() -> str:
    from datetime import date

    return date.today().isoformat()


def _format_digest_md(patents: list, focus: Optional[list], today: str) -> str:
    lines = []
    lines.append(f"# DMPatentLandscape-Kor — Weekly Digest ({today})")
    lines.append("")
    lines.append(DISCLAIMER)
    lines.append("")

    # 필터링
    if focus:
        filtered = [p for p in patents if p["category"] in focus]
        lines.append(f"**관심영역 필터**: {', '.join(focus)} (총 {len(filtered)}건)")
    else:
        filtered = patents
        lines.append(f"**관심영역**: 전체 (총 {len(filtered)}건)")
    lines.append("")

    # 만료 임박 (D-365 이내)
    lines.append("## 만료 임박 (effective expiry 기준 D-365 이내)")
    soon = []
    for p in filtered:
        eff = _effective_expiry(p)
        if not eff:
            continue
        d = _days_to(eff, today)
        if d is not None and 0 <= d <= 365:
            soon.append((d, eff, p))
    soon.sort(key=lambda x: x[0])
    if not soon:
        lines.append("- (해당 없음)")
    else:
        for d, eff, p in soon[:20]:
            lines.append(
                f"- **D-{d}** | {p['office']} {p['pub_no']} | {p['drug_ref']} "
                f"({p['category']}) | {p['assignee']} | effective expiry={eff}"
            )
    lines.append("")

    # 최근 grant
    lines.append("## 최근 grant (priority 기준 2020년 이후)")
    recent = [p for p in filtered if p.get("priority_date", "") >= "2020-01-01" and p.get("grant_date")]
    recent.sort(key=lambda x: x.get("grant_date") or "", reverse=True)
    for p in recent[:10]:
        lines.append(
            f"- {p['grant_date']} | {p['office']} {p['pub_no']} | "
            f"{p['drug_ref']} ({p['category']}) | {p['assignee']}"
        )
    lines.append("")

    # 한국 출원인 동향
    lines.append("## 한국 출원인 / 한국 출원 동향")
    kr = [p for p in filtered if _is_korean_applicant(p["assignee"]) or p.get("korean_filing")]
    if not kr:
        lines.append("- (해당 없음)")
    else:
        kr.sort(key=lambda x: x.get("app_date") or "", reverse=True)
        for p in kr[:15]:
            lines.append(
                f"- {p.get('app_date', '?')} | {p['office']} {p['pub_no']} | "
                f"{p['assignee']} | {p['drug_ref']} ({p['category']})"
            )
            lines.append(f"    - claim 요약: {p.get('claim1_summary_ko', '')}")
    lines.append("")

    # IP whitespace
    lines.append("## IP whitespace 자동 highlight")
    cat_counts: dict = {}
    for p in filtered:
        cat_counts[p["category"]] = cat_counts.get(p["category"], 0) + 1
    avg = sum(cat_counts.values()) / max(1, len(cat_counts))
    sparse = [(c, n) for c, n in cat_counts.items() if n <= max(1, avg / 3)]
    if not sparse:
        lines.append("- (특별히 sparse한 카테고리 없음)")
    else:
        lines.append("- 평균 대비 1/3 이하 카테고리:")
        for c, n in sorted(sparse, key=lambda x: x[1]):
            lines.append(f"  - **{c}**: {n}건 — 신규 진입 검토 가치 있음")
    lines.append("")
    return "\n".join(lines)


def cmd_export_digest(args: argparse.Namespace) -> int:
    patents = _lazy_load_patents()
    focus = None
    if args.focus:
        focus = [x.strip() for x in args.focus.split(",") if x.strip()]
    today = args.today or _today_iso()
    md = _format_digest_md(patents, focus, today)
    out = args.export_digest
    with open(out, "w", encoding="utf-8") as f:
        f.write(md)
    print(f"[OK] weekly digest 저장: {out} ({len(md)} bytes)")
    print(DISCLAIMER)
    return 0


def cmd_check(args: argparse.Namespace) -> int:
    """mock data 로드 + 무결성 점검."""
    patents = _lazy_load_patents()
    required = ["family_id", "office", "pub_no", "assignee", "category", "claim1_summary_ko"]
    missing = []
    for i, p in enumerate(patents):
        for k in required:
            if k not in p:
                missing.append((i, k))
    print(f"records: {len(patents)}")
    print(f"missing fields: {len(missing)}")
    if missing[:5]:
        print(f"  e.g. {missing[:5]}")
    families = {}
    for p in patents:
        families.setdefault(p["family_id"], []).append(p["office"])
    multi_office = sum(1 for v in families.values() if len(v) > 1)
    print(f"families with multi-office members: {multi_office}/{len(families)}")
    print(DISCLAIMER)
    return 0


def build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(
        prog="dm-patent-landscape-kor",
        description=(
            "DMPatentLandscape-Kor: USPTO·EPO·KIPO·JPO·CNIPA·WIPO 당뇨 특허 "
            "landscape watch CLI (mock data, 한국어 weekly digest)."
        ),
        epilog=DISCLAIMER,
    )
    p.add_argument(
        "--summary",
        action="store_true",
        help="office별·category별 분포와 한국 관련 통계를 출력한다.",
    )
    p.add_argument(
        "--top",
        type=int,
        default=0,
        metavar="N",
        help="patent count 기준 상위 N개 assignee를 출력한다 (예: --top 10).",
    )
    p.add_argument(
        "--export-digest",
        type=str,
        default=None,
        metavar="PATH",
        help="weekly digest Markdown을 PATH에 저장한다 (예: --export-digest digest.md).",
    )
    p.add_argument(
        "--focus",
        type=str,
        default=None,
        help="--export-digest와 함께 사용. 콤마 구분 카테고리 (예: GLP-1RA,SGLT2i,CGM).",
    )
    p.add_argument(
        "--today",
        type=str,
        default=None,
        help="--export-digest 기준일 (YYYY-MM-DD, 기본은 오늘).",
    )
    p.add_argument(
        "--check",
        action="store_true",
        help="mock data 로드 + 무결성 점검 (필드 완정성, family multi-office 통계).",
    )
    return p


def main(argv: Optional[list] = None) -> int:
    parser = build_parser()
    args = parser.parse_args(argv)

    did_any = False
    if args.summary:
        did_any = True
        rc = cmd_summary(args)
        if rc != 0:
            return rc
    if args.top and args.top > 0:
        did_any = True
        rc = cmd_top(args)
        if rc != 0:
            return rc
    if args.export_digest:
        did_any = True
        rc = cmd_export_digest(args)
        if rc != 0:
            return rc
    if args.check:
        did_any = True
        rc = cmd_check(args)
        if rc != 0:
            return rc
    if not did_any:
        parser.print_help()
        print()
        print(DISCLAIMER)
        return 0
    return 0


if __name__ == "__main__":
    sys.exit(main())
