"""diff event를 safety / efficacy / regulatory / pipeline 4개 태그로 분류.

규칙 기반(키워드 매칭). 외부 LLM 미사용.
"""

from __future__ import annotations

from typing import Dict, List


TAGS = ("safety", "efficacy", "regulatory", "pipeline")


_SAFETY_KEYS = (
    "safety", "ae", "adverse", "alt", "ast", "lipase", "elevation", "dsmb",
    "alopecia", "hepatic", "signal", "pruritus",
)
_EFFICACY_KEYS = (
    "histology", "regression", "fibrosis", "primary completion", "primary_completion",
    "results posted", "results_posted", "topline", "interim", "readout", "efficacy",
    "completed",
)
_REGULATORY_KEYS = (
    "fda", "ema", "pmda", "chmp", "prac", "approval", "approved", "priority review",
    "adcomm", "label", "designation", "sakigake", "accelerated",
)
_PIPELINE_KEYS = (
    "phase", "enrollment", "milestone", "new trial", "8-k", "8k", "new_trial",
    "removed_trial", "8-K",
)


def classify(event: Dict[str, str]) -> List[str]:
    """Return 1+ tag for one event. Always returns at least one tag."""
    haystack = " ".join(
        str(event.get(k, "")) for k in (
            "event", "category", "headline", "detail", "topic",
        )
    ).lower()

    tags: List[str] = []
    if any(k in haystack for k in _SAFETY_KEYS):
        tags.append("safety")
    if any(k in haystack for k in _EFFICACY_KEYS):
        tags.append("efficacy")
    if any(k in haystack for k in _REGULATORY_KEYS):
        tags.append("regulatory")
    if any(k in haystack for k in _PIPELINE_KEYS):
        tags.append("pipeline")

    # event 종류별 fallback
    if not tags:
        ev = event.get("event", "")
        if ev in ("new_trial", "removed_trial", "phase_change", "status_change",
                  "primary_completion_change", "ir_item"):
            tags = ["pipeline"]
        elif ev in ("ae_added",):
            tags = ["safety"]
        elif ev in ("results_posted", "aasld_abstract"):
            tags = ["efficacy"]
        elif ev in ("new_fda_item", "ema_item", "pmda_item"):
            tags = ["regulatory"]
        else:
            tags = ["pipeline"]

    # dedupe, keep canonical order
    out = [t for t in TAGS if t in tags]
    return out or ["pipeline"]


def classify_bucket(events: List[Dict[str, str]]) -> List[Dict[str, object]]:
    """Add 'tags' to each event."""
    out: List[Dict[str, object]] = []
    for ev in events:
        new_ev = dict(ev)
        new_ev["tags"] = classify(ev)
        out.append(new_ev)
    return out
