"""Abstract <-> ClinicalTrials.gov trial linker."""

from . import vocab


def _normalize_drug(name):
    if not name:
        return ""
    return name.strip().lower().replace(" ", "").replace("-", "")


def build_trial_index(trials):
    """Index CTG trials by normalized drug name."""
    index = {}
    for trial in trials:
        for drug in trial.get("drugs", []):
            key = _normalize_drug(drug)
            index.setdefault(key, []).append(trial)
        # Also index by canonical resolution if drug is an alias.
        for drug in trial.get("drugs", []):
            for alias_lower, canonical in vocab.all_aliases():
                if alias_lower == drug.strip().lower():
                    canon_key = _normalize_drug(canonical)
                    if canon_key != _normalize_drug(drug):
                        index.setdefault(canon_key, []).append(trial)
                    break
    # Dedupe trials per key.
    for key, lst in index.items():
        seen = set()
        unique = []
        for t in lst:
            if t["nct_id"] not in seen:
                unique.append(t)
                seen.add(t["nct_id"])
        index[key] = unique
    return index


def link_abstract(abstract, trial_index):
    """Return list of CTG trials matching the abstract's drugs."""
    matches = []
    seen = set()
    for drug in abstract.get("drugs", []):
        key = _normalize_drug(drug)
        for trial in trial_index.get(key, []):
            if trial["nct_id"] not in seen:
                matches.append(trial)
                seen.add(trial["nct_id"])
    return matches


def link_all(abstracts, trials):
    """Attach 'linked_trials' list to each abstract."""
    index = build_trial_index(trials)
    out = []
    for abs_rec in abstracts:
        rec = dict(abs_rec)
        rec["linked_trials"] = link_abstract(abs_rec, index)
        out.append(rec)
    return out
