"""Modality csv loaders + cohort integrity checks."""
from __future__ import annotations

import os
from typing import Dict, List

import numpy as np
import pandas as pd

from .schemas import MODALITY_COLUMNS, ALLOWED_MUSCLES, ALLOWED_FIBER_TYPES, ALLOWED_MYOKINES


REQUIRED_MODALITIES = [
    "cohort_meta",
    "body_weight",
    "body_composition",
    "grip_strength",
    "treadmill",
    "microct_muscle",
    "myofiber_hcs",
    "myokine",
]


def _csv_path(cohort_dir: str, name: str) -> str:
    return os.path.join(cohort_dir, f"{name}.csv")


def load_modality(cohort_dir: str, name: str) -> pd.DataFrame:
    """Load a single modality csv with column validation."""
    path = _csv_path(cohort_dir, name)
    if not os.path.exists(path):
        raise FileNotFoundError(f"missing modality file: {path}")
    df = pd.read_csv(path)
    expected = MODALITY_COLUMNS[name]
    missing = [c for c in expected if c not in df.columns]
    if missing:
        raise ValueError(f"{name}: missing columns {missing}")
    return df


def load_cohort(cohort_dir: str) -> Dict[str, pd.DataFrame]:
    """Load all required modalities and return dict by name."""
    out: Dict[str, pd.DataFrame] = {}
    for m in REQUIRED_MODALITIES:
        out[m] = load_modality(cohort_dir, m)
    # optional ones
    for m in ("running_wheel", "exvivo_force"):
        path = _csv_path(cohort_dir, m)
        if os.path.exists(path):
            out[m] = load_modality(cohort_dir, m)
    return out


def integrity_check(modalities: Dict[str, pd.DataFrame]) -> List[str]:
    """Return list of warnings/errors. Empty list = clean."""
    issues: List[str] = []
    meta = modalities["cohort_meta"]
    mouse_ids = set(meta["mouse_id"].astype(str).tolist())

    for name, df in modalities.items():
        if name == "cohort_meta":
            continue
        if "mouse_id" not in df.columns:
            issues.append(f"{name}: no mouse_id column")
            continue
        unknown = set(df["mouse_id"].astype(str).tolist()) - mouse_ids
        if unknown:
            issues.append(f"{name}: unknown mouse_ids {sorted(unknown)[:3]}...")

    # category checks
    if "microct_muscle" in modalities:
        bad = set(modalities["microct_muscle"]["muscle"].unique()) - ALLOWED_MUSCLES
        if bad:
            issues.append(f"microct_muscle: unknown muscles {bad}")

    if "myofiber_hcs" in modalities:
        bad = set(modalities["myofiber_hcs"]["fiber_type"].unique()) - ALLOWED_FIBER_TYPES
        if bad:
            issues.append(f"myofiber_hcs: unknown fiber types {bad}")

    if "myokine" in modalities:
        bad = set(modalities["myokine"]["analyte"].unique()) - ALLOWED_MYOKINES
        if bad:
            issues.append(f"myokine: unknown analytes {bad}")

    return issues


def write_modality(cohort_dir: str, name: str, df: pd.DataFrame) -> str:
    """Write a modality csv (used by demo data writer)."""
    os.makedirs(cohort_dir, exist_ok=True)
    path = _csv_path(cohort_dir, name)
    df.to_csv(path, index=False)
    return path
