"""Controlled vocabulary for DKDPulse.

All keyword sets are case-insensitive (matched after .lower()).
Curated from public clinical knowledge of DKD pharmacology and trial endpoints.
No copyrighted material is reproduced; these are common drug names and
endpoint terms used in the published literature.
"""

# 4 drug classes — each with brand and generic names commonly seen in titles/abstracts.
DRUG_CLASSES = {
    "SGLT2i": [
        "sglt2", "sglt-2", "sodium-glucose cotransporter",
        "empagliflozin", "dapagliflozin", "canagliflozin",
        "ertugliflozin", "ipragliflozin", "luseogliflozin",
        "tofogliflozin", "bexagliflozin", "sotagliflozin",
    ],
    "MRA": [
        "mineralocorticoid receptor antagonist", "ns-mra", "nsmra",
        "finerenone", "esaxerenone", "spironolactone",
        "eplerenone", "apararenone", "ocedurenone",
        "kbp-5074",
    ],
    "GLP-1RA": [
        "glp-1", "glp1", "glucagon-like peptide-1",
        "semaglutide", "liraglutide", "dulaglutide",
        "exenatide", "lixisenatide", "albiglutide",
        "efpeglenatide", "oral semaglutide",
    ],
    "dual-GIP-GLP1": [
        "tirzepatide", "gip/glp-1", "gip-glp1", "gip/glp1",
        "dual agonist", "twincretin",
        "retatrutide", "mazdutide", "cagrisema",
    ],
}

# 6 clinically meaningful DKD outcomes.
OUTCOMES = {
    "eGFR_slope": [
        "egfr slope", "egfr decline", "annual egfr",
        "egfr trajectory", "kidney function slope",
        "ckd-epi slope", "estimated gfr slope",
    ],
    "UACR": [
        "uacr", "urine albumin", "albumin-to-creatinine",
        "albuminuria", "proteinuria", "acr",
        "albumin/creatinine ratio",
    ],
    "MAKE": [
        "make", "major adverse kidney event",
        "kidney failure", "esrd", "end-stage renal",
        "dialysis", "kidney replacement therapy",
        "krt", "doubling of serum creatinine",
        "renal composite",
    ],
    "hypoglycemia": [
        "hypoglycemia", "hypoglycaemia", "low blood glucose",
        "severe hypoglycemia", "level 2 hypoglycemia",
    ],
    "hyperkalemia": [
        "hyperkalemia", "hyperkalaemia", "potassium >",
        "serum potassium", "elevated potassium",
        "k+ elevation",
    ],
    "all_cause_death": [
        "all-cause mortality", "all cause mortality",
        "death from any cause", "overall survival",
        "all-cause death", "total mortality",
    ],
}

# 5 DKD phenotypes / sub-populations.
PHENOTYPES = {
    "macroalbuminuric_DKD": [
        "macroalbuminuria", "macroalbuminuric",
        "uacr >300", "uacr ≥300", "overt nephropathy",
        "overt proteinuria",
    ],
    "normoalbuminuric_DKD": [
        "normoalbuminuric", "normoalbuminuria",
        "non-albuminuric", "nonalbuminuric",
        "uacr <30", "without albuminuria",
    ],
    "T1DM_DKD": [
        "type 1 diabetes", "t1dm", "type-1 diabetes",
        "iddm", "insulin-dependent diabetes",
    ],
    "T2DM_DKD": [
        "type 2 diabetes", "t2dm", "type-2 diabetes",
        "niddm",
    ],
    "advanced_CKD": [
        "egfr <30", "egfr below 30", "ckd stage 4",
        "ckd stage 5", "advanced ckd", "low egfr",
        "stage 3b", "egfr 15-29",
    ],
}

# Guidelines that boost novelty when cited (supports KDIGO/KDA/ADA).
GUIDELINE_KEYWORDS = [
    "kdigo", "kda", "korean diabetes association",
    "ada standards", "ada guideline", "easd",
    "kdoqi", "guideline update", "consensus statement",
]


def all_drug_keywords():
    """Flat dict: keyword -> drug class label."""
    out = {}
    for cls, kws in DRUG_CLASSES.items():
        for kw in kws:
            out[kw.lower()] = cls
    return out


def all_outcome_keywords():
    out = {}
    for oc, kws in OUTCOMES.items():
        for kw in kws:
            out[kw.lower()] = oc
    return out


def all_phenotype_keywords():
    out = {}
    for ph, kws in PHENOTYPES.items():
        for kw in kws:
            out[kw.lower()] = ph
    return out
