"""어제 vs 오늘 snapshot의 diff event 추출.

diff event 형식:
    {
        "event": "new_trial" | "phase_change" | "status_change"
                 | "results_posted" | "ae_added" | "primary_completion_change"
                 | "removed_trial" | "new_fda_item",
        "nct_id": "...",  (CTG diff인 경우)
        "id": "...",       (FDA diff인 경우)
        "drug": "...",
        "sponsor": "...",
        "mechanism": "...",
        "before": ...,
        "after": ...,
        "detail": "사람이 읽기 쉬운 한 줄"
    }
"""

from __future__ import annotations

from typing import Any, Dict, List


CTG_FIELDS = ("phase", "status", "results_posted", "primary_completion")


def diff_ctg(yesterday: Dict[str, Any], today: Dict[str, Any]) -> List[Dict[str, Any]]:
    """CTG snapshot 두 개를 비교하여 diff event list 반환."""
    events: List[Dict[str, Any]] = []
    y_index = {t["nct_id"]: t for t in yesterday.get("trials", [])}
    t_index = {t["nct_id"]: t for t in today.get("trials", [])}

    # new trials
    for nct, trial in t_index.items():
        if nct not in y_index:
            events.append({
                "event": "new_trial",
                "nct_id": nct,
                "drug": trial.get("drug", ""),
                "sponsor": trial.get("sponsor", ""),
                "mechanism": trial.get("mechanism", ""),
                "before": None,
                "after": trial.get("phase", ""),
                "detail": f"신규 trial 등록: {trial.get('title', nct)} ({trial.get('phase', '')})",
            })

    # removed trials
    for nct, trial in y_index.items():
        if nct not in t_index:
            events.append({
                "event": "removed_trial",
                "nct_id": nct,
                "drug": trial.get("drug", ""),
                "sponsor": trial.get("sponsor", ""),
                "mechanism": trial.get("mechanism", ""),
                "before": trial.get("status", ""),
                "after": None,
                "detail": f"trial 삭제/취하: {trial.get('title', nct)}",
            })

    # field-level changes (intersection)
    for nct in set(y_index) & set(t_index):
        y, t = y_index[nct], t_index[nct]
        for field in CTG_FIELDS:
            if y.get(field) != t.get(field):
                event_name = {
                    "phase": "phase_change",
                    "status": "status_change",
                    "results_posted": "results_posted",
                    "primary_completion": "primary_completion_change",
                }[field]
                # results_posted 는 false->true 일 때만 의미 있게 처리
                if field == "results_posted" and not t.get(field):
                    continue
                events.append({
                    "event": event_name,
                    "nct_id": nct,
                    "drug": t.get("drug", ""),
                    "sponsor": t.get("sponsor", ""),
                    "mechanism": t.get("mechanism", ""),
                    "before": y.get(field),
                    "after": t.get(field),
                    "detail": _describe_field_change(t, field, y.get(field), t.get(field)),
                })

        # AE term diff (set-based)
        y_ae = set(y.get("ae_terms", []) or [])
        t_ae = set(t.get("ae_terms", []) or [])
        added = t_ae - y_ae
        if added:
            events.append({
                "event": "ae_added",
                "nct_id": nct,
                "drug": t.get("drug", ""),
                "sponsor": t.get("sponsor", ""),
                "mechanism": t.get("mechanism", ""),
                "before": sorted(y_ae),
                "after": sorted(t_ae),
                "detail": f"새 AE term 추가: {', '.join(sorted(added))}",
            })

    return events


def _describe_field_change(trial: Dict[str, Any], field: str, before: Any, after: Any) -> str:
    drug = trial.get("drug", "?")
    if field == "phase":
        return f"{drug} phase 변경: {before} -> {after}"
    if field == "status":
        return f"{drug} status 변경: {before} -> {after}"
    if field == "results_posted":
        return f"{drug} results posted (이전 {before} -> 현재 {after})"
    if field == "primary_completion":
        return f"{drug} primary completion 변경: {before} -> {after}"
    return f"{drug} {field} 변경: {before} -> {after}"


def diff_fda(yesterday: Dict[str, Any], today: Dict[str, Any]) -> List[Dict[str, Any]]:
    """FDA news diff: 오늘 새로 등장한 id."""
    events: List[Dict[str, Any]] = []
    y_ids = {item["id"] for item in yesterday.get("items", [])}
    for item in today.get("items", []):
        if item["id"] not in y_ids:
            events.append({
                "event": "new_fda_item",
                "id": item["id"],
                "category": item.get("category", ""),
                "drugs": item.get("drugs", []),
                "headline": item.get("headline", ""),
                "date": item.get("date", ""),
                "before": None,
                "after": item.get("headline", ""),
                "detail": f"신규 FDA 항목: {item.get('headline', '')}",
            })
    return events


def collect_all(snapshots: Dict[str, Any]) -> Dict[str, List[Dict[str, Any]]]:
    """End-to-end: snapshot bundle -> 카테고리별 event list."""
    ctg_events = diff_ctg(snapshots["ctg"]["yesterday"], snapshots["ctg"]["today"])
    fda_events = diff_fda(snapshots["fda"]["yesterday"], snapshots["fda"]["today"])
    ema_events = [
        {
            "event": "ema_item",
            "id": item["id"],
            "category": item.get("category", ""),
            "drugs": item.get("drugs", []),
            "headline": item.get("headline", ""),
            "date": item.get("date", ""),
            "detail": f"EMA: {item.get('headline', '')}",
        }
        for item in (snapshots.get("ema") or {}).get("items", [])
    ]
    pmda_events = [
        {
            "event": "pmda_item",
            "id": item["id"],
            "category": item.get("category", ""),
            "drugs": item.get("drugs", []),
            "headline": item.get("headline", ""),
            "date": item.get("date", ""),
            "detail": f"PMDA: {item.get('headline', '')}",
        }
        for item in (snapshots.get("pmda") or {}).get("items", [])
    ]
    ir_events = [
        {
            "event": "ir_item",
            "id": item["id"],
            "type": item.get("type", ""),
            "sponsor": item.get("sponsor", ""),
            "drugs": item.get("drugs", []),
            "headline": item.get("headline", ""),
            "date": item.get("date", ""),
            "detail": f"{item.get('sponsor', '')} {item.get('type', '')}: {item.get('headline', '')}",
        }
        for item in (snapshots.get("ir") or {}).get("items", [])
    ]
    aasld_events = [
        {
            "event": "aasld_abstract",
            "id": ab["id"],
            "drugs": ab.get("drugs", []),
            "headline": ab.get("title", ""),
            "topic": ab.get("topic", ""),
            "detail": f"AASLD abstract ({ab.get('topic', '')}): {ab.get('title', '')}",
        }
        for ab in (snapshots.get("aasld") or {}).get("abstracts", [])
    ]

    return {
        "ctg": ctg_events,
        "fda": fda_events,
        "ema": ema_events,
        "pmda": pmda_events,
        "ir": ir_events,
        "aasld": aasld_events,
    }


# --- inline unit assertion (called from main when --self-check) -----------
def _self_check_new_trial_detection() -> None:
    yesterday = {"trials": [{"nct_id": "NCT-A", "drug": "x", "sponsor": "S", "mechanism": "M",
                              "phase": "Phase 2", "status": "Recruiting",
                              "results_posted": False, "primary_completion": "2026-01",
                              "ae_terms": []}]}
    today = {"trials": [
        {"nct_id": "NCT-A", "drug": "x", "sponsor": "S", "mechanism": "M",
         "phase": "Phase 2", "status": "Recruiting", "results_posted": False,
         "primary_completion": "2026-01", "ae_terms": []},
        {"nct_id": "NCT-B", "drug": "y", "sponsor": "S2", "mechanism": "M2",
         "phase": "Phase 3", "status": "Recruiting", "results_posted": False,
         "primary_completion": "2027-01", "ae_terms": [], "title": "new"}
    ]}
    events = diff_ctg(yesterday, today)
    assert any(e["event"] == "new_trial" and e["nct_id"] == "NCT-B" for e in events), \
        "differ: new_trial event not detected"
