"""Discordance bias direction taxonomy — MASLD-specific."""
from __future__ import annotations

from typing import Dict, List, Optional

from .grid import design_summary
from .mediation import masld_mediated_fraction

BIAS_TAXONOMY = [
    {
        "code": "metabolic_syndrome_confounding",
        "name": "Metabolic syndrome confounding",
        "description": "비만/T2DM/이상지질혈증이 MASLD와 결과 모두에 영향. 관찰연구 과대추정.",
        "triggers": ["obs_strong AND mr_liver_null", "BMI_polygenic_strong"],
        "remedy": "MVMR with PNPLA3 + BMI + T2DM 동시 IV.",
    },
    {
        "code": "shared_genetic_confounder",
        "name": "Shared genetic confounder",
        "description": "동일 변이가 MASLD와 metabolic 둘다 영향 (GCKR 등). pleiotropy.",
        "triggers": ["GCKR_strong", "MR_pleiotropy_egger_pval<0.05"],
        "remedy": "MR-Egger / MR-PRESSO / leave-one-out.",
    },
    {
        "code": "reverse_causation",
        "name": "Reverse causation",
        "description": "결과가 MASLD를 유발 (예: 심부전→간 울혈 steatosis).",
        "triggers": ["RCT_null AND obs_strong AND temporal_questionable"],
        "remedy": "Within-subject lifestyle crossover, temporal MR.",
    },
    {
        "code": "mr_pleiotropy",
        "name": "MR horizontal pleiotropy",
        "description": "Instrument이 노출 외 경로로 결과에 직접 영향 (TM6SF2 → LDL↓ → CV↓).",
        "triggers": ["TM6SF2_discordant", "instrument_known_pleiotropy"],
        "remedy": "Multi-instrument MR, MR-Egger intercept.",
    },
    {
        "code": "canalization",
        "name": "Developmental canalization",
        "description": "평생 노출 vs 단기 RCT 효과 차이 (MR > RCT).",
        "triggers": ["MR_strong AND RCT_attenuated"],
        "remedy": "Bridging animal/human, longer RCT follow-up.",
    },
    {
        "code": "dose_translation",
        "name": "Dose / window of exposure mismatch",
        "description": "MR (lifelong small) vs RCT (short large) 비교 어려움.",
        "triggers": ["effect_magnitude_discordant"],
        "remedy": "Standardize per SD log-liability scale.",
    },
    {
        "code": "external_validity_korean",
        "name": "External validity — Korean / lean MASLD",
        "description": "유럽 baseline allele/BMI 분포가 한국 lean MASLD 와 다름.",
        "triggers": ["ancestry_mismatch", "lean_MASLD_subset"],
        "remedy": "KoNEHS·NHIS replication, Korean PNPLA3 layer.",
    },
    {
        "code": "detection_bias_nit",
        "name": "Detection bias — NIT (FIB-4/VCTE)",
        "description": "FIB-4·VCTE cutoff 차이 → stage misclassification.",
        "triggers": ["nit_cutoff_variance", "vcte_iqr_high"],
        "remedy": "VCTE IQR<30%, FIB-4 ≥1.30 sensitivity.",
    },
    {
        "code": "selection_bias_biopsy",
        "name": "Selection bias — biopsy cohort",
        "description": "Biopsy 받은 cohort = 임상적 의심 → 일반 인구 일반화 제한.",
        "triggers": ["biopsy_only_cohort"],
        "remedy": "Population-based NIT cohort triangulation.",
    },
    {
        "code": "competing_risk",
        "name": "Competing risk",
        "description": "Cirrhosis 환자 LRC mortality > CV death (사망 경합).",
        "triggers": ["cirrhosis_subset", "long_followup"],
        "remedy": "Fine-Gray competing risk model.",
    },
]


def diagnose_discordance(
    effects: List[Dict[str, object]],
    stage: str,
    outcome: str,
) -> Dict[str, object]:
    """Diagnose discordance for one (stage, outcome) pair."""
    summary = design_summary(effects, stage, outcome)
    med = masld_mediated_fraction(effects, stage, outcome)

    flags: List[Dict[str, str]] = []
    obs = summary["observational"]
    mr = summary["MR"]
    rct = summary["RCT"]

    # heuristic triggers
    if obs["n"] > 0 and mr["n"] > 0 and obs["effect"] and mr["effect"]:
        if obs["effect"] > 1.5 and 0.85 < mr["effect"] < 1.20:
            flags.append({
                "code": "metabolic_syndrome_confounding",
                "trigger": f"obs effect {obs['effect']:.2f} (강) vs MR {mr['effect']:.2f} (null)",
            })

    # TM6SF2 discordance
    tm6_rows = [e for e in effects
                if e.get("masld_stage") == stage
                and e.get("outcome") == outcome
                and e.get("mr_instrument") == "TM6SF2"]
    if tm6_rows:
        tm_eff = tm6_rows[0].get("effect_estimate")
        if tm_eff and tm_eff < 0.95 and obs["effect"] and obs["effect"] > 1.10:
            flags.append({
                "code": "mr_pleiotropy",
                "trigger": f"TM6SF2 MR {tm_eff:.2f} (방향 ↓) vs obs {obs['effect']:.2f} (↑) — discordant",
            })

    # canalization
    if mr["effect"] and rct["n"] > 0 and rct["effect"]:
        if mr["effect"] > 1.5 and 0.85 < rct["effect"] < 1.15:
            flags.append({
                "code": "canalization",
                "trigger": f"MR {mr['effect']:.2f} 강 vs RCT {rct['effect']:.2f} attenuated (lifelong vs 단기)",
            })

    # external validity korean
    rows = [e for e in effects if e.get("masld_stage") == stage and e.get("outcome") == outcome]
    korean = [r for r in rows if (r.get("ancestry") or "").lower() == "korean"]
    if not korean:
        flags.append({
            "code": "external_validity_korean",
            "trigger": "한국 ancestry 자료 없음 — replication 필요",
        })

    # lean MASLD
    if "lean" in stage.lower() or any("lean" in (r.get("biomarker") or "").lower() for r in rows):
        flags.append({
            "code": "external_validity_korean",
            "trigger": "Lean MASLD subphenotype — 일반 BMI≥25 cohort 일반화 불가",
        })

    # selection bias biopsy
    if any("biopsy" in (r.get("population") or "").lower() for r in rows):
        flags.append({
            "code": "selection_bias_biopsy",
            "trigger": "Biopsy cohort — 일반 인구 일반화 제한",
        })

    # detection bias NIT
    if any((r.get("biomarker") or "").upper().startswith(("FIB", "VCTE")) for r in rows):
        flags.append({
            "code": "detection_bias_nit",
            "trigger": "NIT-based staging — cutoff variance check",
        })

    # competing risk
    if stage in ("F3", "F4") and outcome in ("CV death", "all-cause mortality"):
        flags.append({
            "code": "competing_risk",
            "trigger": "Cirrhosis ± advanced fibrosis cohort에서 hepatic mortality 경합",
        })

    # dedupe
    seen = set()
    dedup_flags = []
    for f in flags:
        if f["code"] in seen:
            continue
        seen.add(f["code"])
        dedup_flags.append(f)

    return {
        "stage": stage,
        "outcome": outcome,
        "n_flags": len(dedup_flags),
        "flags": dedup_flags,
        "taxonomy_codes_referenced": sorted({f["code"] for f in dedup_flags}),
        "mediation_summary": {
            "frac_masld_mediated": med.get("frac_masld_mediated"),
            "frac_metabolic_mediated": med.get("frac_metabolic_mediated"),
        },
    }


def taxonomy() -> List[Dict[str, object]]:
    return list(BIAS_TAXONOMY)