"""Vendor-aware perifusion CSV parsers + sample/assay metadata schemas.

Supports column-header heuristics for:
  - BioRep ("BioRep Technologies" / "Time (min)")
  - Brandel SF-06 ("Sample" / "Fraction" / "Time_min")
  - in-house generic ("time_min" / "channel_*" or "ch1..N")

Pydantic schemas are used when the package is available; otherwise simple
dataclasses provide an equivalent surface for offline operation.
"""
from __future__ import annotations

import csv
import io
import os
import re
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, List, Optional, Tuple

# -- optional pydantic --------------------------------------------------------
try:
    from pydantic import BaseModel, Field  # type: ignore

    _HAS_PYDANTIC = True
except Exception:  # pragma: no cover - graceful fallback
    _HAS_PYDANTIC = False

# ---------------------------------------------------------------------------
# Sample / assay metadata
# ---------------------------------------------------------------------------
SAMPLE_KINDS = {
    "primary_mouse_islet",
    "primary_rat_islet",
    "primary_human_islet",
    "INS-1_832/13",
    "MIN6",
    "HIT-T15",
    "EndoC-bH1",
    "EndoC-bH3",
    "iPSC-SC-beta_D14",
    "iPSC-SC-beta_D21",
    "iPSC-SC-beta_D28",
    "hPSC-SC-beta_D14",
    "hPSC-SC-beta_D21",
    "hPSC-SC-beta_D28",
}

ASSAY_KINDS = {"ELISA", "Luminex", "HTRF"}


@dataclass
class GlucoseStep:
    """One stimulus segment in min/min/mM/uM units."""

    t_start_min: float
    t_end_min: float
    glucose_mM: float
    secretagogue: Optional[str] = None
    secretagogue_uM: Optional[float] = None


@dataclass
class PerifusionMeta:
    sample_kind: str
    sample_id: str
    ieq: Optional[float] = None  # islet equivalents
    cell_count: Optional[float] = None
    protein_ug: Optional[float] = None
    flow_rate_ml_min: float = 0.1
    dead_volume_ul: float = 100.0
    tubing_length_cm: float = 30.0
    chamber_id: Optional[str] = None
    assay: str = "ELISA"
    analyte: str = "insulin"  # insulin / c-peptide / glucagon / proinsulin
    llod_ng_ml: Optional[float] = None
    uloq_ng_ml: Optional[float] = None
    storage_hours: float = 0.0
    steps: List[GlucoseStep] = field(default_factory=list)
    vendor: str = "in-house"

    def to_dict(self) -> Dict[str, Any]:
        d = asdict(self)
        d["steps"] = [asdict(s) for s in self.steps]
        return d


# Pydantic mirror for stricter validation when available
if _HAS_PYDANTIC:

    class PerifusionMetaModel(BaseModel):  # type: ignore
        sample_kind: str
        sample_id: str
        ieq: Optional[float] = None
        cell_count: Optional[float] = None
        protein_ug: Optional[float] = None
        flow_rate_ml_min: float = 0.1
        dead_volume_ul: float = 100.0
        tubing_length_cm: float = 30.0
        chamber_id: Optional[str] = None
        assay: str = "ELISA"
        analyte: str = "insulin"
        llod_ng_ml: Optional[float] = None
        uloq_ng_ml: Optional[float] = None
        storage_hours: float = 0.0
        vendor: str = "in-house"


# ---------------------------------------------------------------------------
# Vendor detection
# ---------------------------------------------------------------------------
VENDOR_BIOREP = "BioRep"
VENDOR_BRANDEL = "Brandel"
VENDOR_INHOUSE = "in-house"


def detect_vendor(header: List[str], first_lines: List[str]) -> str:
    blob = "\n".join(first_lines).lower()
    if "biorep" in blob:
        return VENDOR_BIOREP
    if "brandel" in blob or any("fraction" == h.strip().lower() for h in header):
        return VENDOR_BRANDEL
    # heuristic: time_min column → in-house
    return VENDOR_INHOUSE


# ---------------------------------------------------------------------------
# Loader
# ---------------------------------------------------------------------------
def _read_csv_all_text(path: str) -> Tuple[List[List[str]], List[str]]:
    with open(path, "r", encoding="utf-8") as f:
        text = f.read()
    rows = list(csv.reader(io.StringIO(text)))
    first_lines = text.splitlines()[:8]
    return rows, first_lines


def _find_header_row(rows: List[List[str]]) -> int:
    """Header row is the first row containing a 'time' token."""
    for i, r in enumerate(rows[:20]):
        for c in r:
            if re.search(r"time", c, flags=re.I):
                return i
    return 0


def parse_perifusion_csv(path: str) -> Dict[str, Any]:
    """Parse a perifusion CSV file and return a normalized dict:
        {
          'vendor': str,
          'time_min': List[float],
          'channels': Dict[str, List[float]],   # channel_id -> ng/mL trace
          'meta_hint': Dict[str, Any]           # any inline metadata key=value comments
        }
    """
    rows, first_lines = _read_csv_all_text(path)
    if not rows:
        raise ValueError(f"empty CSV: {path}")
    # collect comment metadata lines starting with '#'
    meta_hint: Dict[str, Any] = {}
    for line in first_lines:
        if line.startswith("#") and "=" in line:
            kv = line.lstrip("#").strip()
            for pair in kv.split(";"):
                if "=" in pair:
                    k, v = pair.split("=", 1)
                    meta_hint[k.strip()] = v.strip()

    hdr_idx = _find_header_row(rows)
    header = [h.strip() for h in rows[hdr_idx]]
    vendor = detect_vendor(header, first_lines)

    # find time column
    time_col = None
    for i, h in enumerate(header):
        if re.search(r"time", h, flags=re.I):
            time_col = i
            break
    if time_col is None:
        raise ValueError(f"no time column in {path}")

    # all other columns = channels (skip empty / fraction index)
    channels: Dict[str, List[float]] = {}
    ch_indices: List[Tuple[int, str]] = []
    for i, h in enumerate(header):
        if i == time_col or not h:
            continue
        if re.search(r"fraction|sample|index", h, flags=re.I):
            continue
        ch_indices.append((i, h))
        channels[h] = []

    times: List[float] = []
    for row in rows[hdr_idx + 1 :]:
        if not row or not any(c.strip() for c in row):
            continue
        try:
            t = float(row[time_col])
        except (ValueError, IndexError):
            continue
        times.append(t)
        for i, h in ch_indices:
            try:
                v = float(row[i]) if i < len(row) and row[i].strip() else float("nan")
            except ValueError:
                v = float("nan")
            channels[h].append(v)

    return {
        "vendor": vendor,
        "time_min": times,
        "channels": channels,
        "meta_hint": meta_hint,
        "source_file": os.path.basename(path),
    }


def load_dir(input_dir: str) -> List[Dict[str, Any]]:
    out = []
    for name in sorted(os.listdir(input_dir)):
        if name.lower().endswith(".csv"):
            out.append(parse_perifusion_csv(os.path.join(input_dir, name)))
    return out