"""
meal_core.py — RodentMealScope 핵심 계산 모듈 (순수 파이썬)

설치류 섭식 이벤트 로그를 식사(meal) 단위로 분해하여 미세구조(microstructure)
지표를 계산한다. Streamlit 없이 단독으로 테스트 가능하도록 계산 로직만 담는다.

도메인: 비만 (Obesity) / 신경내분비 동물실험
용도: 참고용·연구용 (for reference / research use only)

주요 기능
  1. 다중 하드웨어 이벤트 정규화 (BioDAQ / FED3 / 자동급이기 / lickometer / PhenoMaster)
  2. 객관적 식사 기준(meal criterion) 도출 — Tolkamp 로그-생존곡선 변곡점법
  3. 식사 미세구조 지표 (meal size/duration/frequency/IMI/ingestion rate/satiety ratio 등)
  4. 일주기(circadian) 분해 — dark active / light rest, zeitgeber-time bin
  5. 코호트 통계 (ANOVA / 혼합효과) + 기전 분류
"""

from __future__ import annotations

import io
import numpy as np
import pandas as pd

# ----------------------------------------------------------------------
# 공통 스키마
# ----------------------------------------------------------------------
# 정규화 후 표준 컬럼:
#   animal_id   : 개체 식별자 (str)
#   group       : 실험군 (str)
#   timestamp   : pandas datetime
#   intake_amount : 섭취량 (g) — lickometer의 경우 licks를 환산하거나 별도 보관
#   licks       : 핥기 횟수 (선택)
#   bout        : 원자료 상의 bout/event 식별 (선택)
#   qc_flag     : QC 플래그 문자열 ("" = 정상)

STANDARD_COLUMNS = [
    "animal_id", "group", "timestamp", "intake_amount", "licks", "bout", "qc_flag",
]

SUPPORTED_HARDWARE = [
    "auto",          # 컬럼명으로 자동 추정
    "rodentmealscope",  # 본 도구 합성 포맷
    "biodaq",
    "fed3",
    "feeder",        # 일반 자동급이기
    "lickometer",
    "phenomaster",
]

# lickometer licks → 추정 섭취량(g) 환산 계수 (문헌 관용값, 사용자 조정 가능)
DEFAULT_LICK_TO_GRAM = 0.0015


# ----------------------------------------------------------------------
# 1. 다중 하드웨어 이벤트 정규화
# ----------------------------------------------------------------------
def _lower_map(cols):
    return {c.lower().strip(): c for c in cols}


def detect_hardware(df: pd.DataFrame) -> str:
    """컬럼명 패턴으로 하드웨어 종류를 추정한다."""
    cols = set(c.lower().strip() for c in df.columns)
    if {"animal_id", "timestamp", "intake_amount"}.issubset(cols):
        return "rodentmealscope"
    if "pelletcount" in cols or "fed_event" in cols or ("device" in cols and "pellets" in cols):
        return "fed3"
    if "cumulativeintake" in cols or "biodaq_cage" in cols or "weightchange" in cols:
        return "biodaq"
    if "licks" in cols or "lick_count" in cols:
        return "lickometer"
    if "drink" in cols and "feed" in cols:
        return "phenomaster"
    if "feeder_id" in cols or "dispensed_g" in cols:
        return "feeder"
    return "rodentmealscope"


def _pick(colmap, *candidates):
    """후보 컬럼명(소문자) 중 처음 발견되는 실제 컬럼명을 반환."""
    for cand in candidates:
        if cand in colmap:
            return colmap[cand]
    return None


def normalize_events(df: pd.DataFrame,
                      hardware: str = "auto",
                      lick_to_gram: float = DEFAULT_LICK_TO_GRAM,
                      fed3_pellet_g: float = 0.02) -> pd.DataFrame:
    """
    임의 하드웨어의 섭식 이벤트 export 를 공통 스키마로 정규화한다.

    Parameters
    ----------
    df : 원자료 DataFrame
    hardware : SUPPORTED_HARDWARE 중 하나 ("auto" 면 자동 추정)
    lick_to_gram : lickometer licks → g 환산 계수
    fed3_pellet_g : FED3 pellet 1개 무게(g)

    Returns
    -------
    표준 스키마 DataFrame (animal_id, group, timestamp, intake_amount,
    licks, bout, qc_flag), timestamp 오름차순 정렬.
    """
    if hardware == "auto":
        hardware = detect_hardware(df)
    colmap = _lower_map(df.columns)
    out = pd.DataFrame()

    # --- animal id ---
    aid = _pick(colmap, "animal_id", "animal", "subject", "id", "cage",
                "biodaq_cage", "device", "feeder_id", "mouse")
    out["animal_id"] = (df[aid].astype(str) if aid is not None
                        else pd.Series(["A01"] * len(df)))

    # --- group ---
    grp = _pick(colmap, "group", "treatment", "cohort", "condition", "arm")
    out["group"] = df[grp].astype(str) if grp is not None else "Unassigned"

    # --- timestamp ---
    ts = _pick(colmap, "timestamp", "time", "datetime", "mm:dd:yyyy hh:mm:ss",
               "date_time", "event_time")
    if ts is None:
        raise ValueError("타임스탬프 컬럼을 찾을 수 없습니다 (timestamp/time/datetime).")
    out["timestamp"] = pd.to_datetime(df[ts], errors="coerce")

    # --- intake / licks ---
    intake_col = _pick(colmap, "intake_amount", "intake", "amount", "weightchange",
                       "dispensed_g", "feed", "grams", "delta_g")
    licks_col = _pick(colmap, "licks", "lick_count", "lick")
    pellet_col = _pick(colmap, "pelletcount", "pellets", "pellet")

    if hardware == "fed3" and pellet_col is not None:
        out["intake_amount"] = pd.to_numeric(df[pellet_col], errors="coerce").fillna(0) * fed3_pellet_g
        out["licks"] = np.nan
    elif hardware == "lickometer" and licks_col is not None:
        out["licks"] = pd.to_numeric(df[licks_col], errors="coerce").fillna(0)
        out["intake_amount"] = out["licks"] * lick_to_gram
    elif intake_col is not None:
        out["intake_amount"] = pd.to_numeric(df[intake_col], errors="coerce").fillna(0)
        out["licks"] = (pd.to_numeric(df[licks_col], errors="coerce")
                        if licks_col is not None else np.nan)
    elif licks_col is not None:
        out["licks"] = pd.to_numeric(df[licks_col], errors="coerce").fillna(0)
        out["intake_amount"] = out["licks"] * lick_to_gram
    elif pellet_col is not None:
        out["intake_amount"] = pd.to_numeric(df[pellet_col], errors="coerce").fillna(0) * fed3_pellet_g
        out["licks"] = np.nan
    else:
        raise ValueError("섭취량/licks/pellet 관련 컬럼을 찾을 수 없습니다.")

    # --- bout ---
    bout = _pick(colmap, "bout", "event", "event_id", "fed_event")
    out["bout"] = df[bout].astype(str) if bout is not None else np.arange(len(df)).astype(str)

    # BioDAQ 누적값(cumulative)이면 차분
    cum = _pick(colmap, "cumulativeintake", "cumulative", "cum_intake")
    if hardware == "biodaq" and cum is not None and intake_col is None:
        cumv = pd.to_numeric(df[cum], errors="coerce")
        out["intake_amount"] = cumv.groupby(out["animal_id"]).diff().fillna(0).clip(lower=0)

    out["qc_flag"] = ""
    out = out.dropna(subset=["timestamp"]).sort_values(
        ["animal_id", "timestamp"]).reset_index(drop=True)
    out["_hardware"] = hardware
    return out


def qc_check(df: pd.DataFrame,
             spill_threshold_g: float = 2.0,
             min_amount_g: float = 0.0,
             max_gap_hours: float = 6.0) -> pd.DataFrame:
    """
    QC 플래그를 부여한다.
      - SPILLAGE      : 비현실적으로 큰 단일 섭취량 (흘림 의심)
      - NEGATIVE      : 음수 섭취량
      - DOUBLE_COUNT  : 동일 개체에서 1초 이내 중복 이벤트 (이중계수 의심)
      - DATA_GAP      : 직전 이벤트와 큰 시간 공백 (장비 누락 의심)
    """
    df = df.copy()
    flags = [list() for _ in range(len(df))]

    amt = df["intake_amount"].values
    for i, v in enumerate(amt):
        if v < min_amount_g - 1e-9:
            flags[i].append("NEGATIVE")
        if v > spill_threshold_g:
            flags[i].append("SPILLAGE")

    for aid, idx in df.groupby("animal_id").groups.items():
        idx = list(idx)
        ts = df.loc[idx, "timestamp"].values
        for k in range(1, len(idx)):
            dt = (ts[k] - ts[k - 1]) / np.timedelta64(1, "s")
            if dt < 1.0:
                flags[idx[k]].append("DOUBLE_COUNT")
            if dt > max_gap_hours * 3600.0:
                flags[idx[k]].append("DATA_GAP")

    df["qc_flag"] = ["|".join(f) for f in flags]
    return df


def qc_summary(df: pd.DataFrame) -> pd.DataFrame:
    """QC 플래그 집계표."""
    rows = []
    total = len(df)
    flagged = (df["qc_flag"] != "").sum()
    rows.append({"flag": "TOTAL_EVENTS", "count": total, "pct": 100.0})
    rows.append({"flag": "FLAGGED_EVENTS", "count": int(flagged),
                 "pct": round(100.0 * flagged / max(total, 1), 2)})
    for tag in ["SPILLAGE", "NEGATIVE", "DOUBLE_COUNT", "DATA_GAP"]:
        c = df["qc_flag"].str.contains(tag).sum()
        rows.append({"flag": tag, "count": int(c),
                     "pct": round(100.0 * c / max(total, 1), 2)})
    return pd.DataFrame(rows)


# ----------------------------------------------------------------------
# 2. 객관적 식사 기준 도출 — Tolkamp 로그-생존곡선 변곡점법
# ----------------------------------------------------------------------
def intermeal_intervals(df: pd.DataFrame) -> np.ndarray:
    """모든 개체의 연속 이벤트 간 간격(분)을 반환한다."""
    gaps = []
    for aid, idx in df.groupby("animal_id").groups.items():
        ts = df.loc[idx, "timestamp"].sort_values().values
        if len(ts) < 2:
            continue
        d = np.diff(ts) / np.timedelta64(1, "m")
        gaps.append(d)
    if not gaps:
        return np.array([])
    return np.concatenate(gaps)


def derive_meal_criterion(intervals_min: np.ndarray,
                          n_bins: int = 60,
                          min_minutes: float = 0.05) -> dict:
    """
    이벤트 간격(분)의 로그-생존곡선 변곡점을 찾아 데이터 기반의 권장
    식사 분리 간격(meal criterion)을 도출한다 (Tolkamp & Kyriazakis 방식).

    아이디어
    --------
    한 식사 내부의 짧은 간격(within-meal)과 식사 사이의 긴 간격(between-meal)이
    혼합된 분포에서, log10(간격) 히스토그램은 이봉성(bimodal)을 띤다.
    두 봉우리 사이의 골(antimode) = 식사 기준.
    분포가 단봉이면 로그-생존곡선의 곡률 최대점을 대안으로 사용한다.

    Returns
    -------
    dict: criterion_min, method, log_centers, log_hist,
          surv_x, surv_y, antimode_log, peaks
    """
    iv = np.asarray(intervals_min, dtype=float)
    iv = iv[(iv > min_minutes) & np.isfinite(iv)]
    result = {"criterion_min": 10.0, "method": "default_fallback",
              "log_centers": np.array([]), "log_hist": np.array([]),
              "surv_x": np.array([]), "surv_y": np.array([]),
              "antimode_log": None, "peaks": []}
    if len(iv) < 20:
        return result

    logv = np.log10(iv)
    hist, edges = np.histogram(logv, bins=n_bins)
    centers = 0.5 * (edges[:-1] + edges[1:])
    result["log_centers"] = centers
    result["log_hist"] = hist

    # 로그-생존곡선 (1 - CDF), log-log
    sx = np.sort(iv)
    sy = 1.0 - np.arange(1, len(sx) + 1) / len(sx)
    result["surv_x"] = sx
    result["surv_y"] = sy

    # 히스토그램 강한 평활(폭 5 이동평균) 후 봉우리/골 탐색
    kernel = np.ones(5) / 5.0
    smooth = np.convolve(hist.astype(float), kernel, mode="same")

    # 국소 최대 탐색 + prominence(돌출도) 필터로 잡음 봉우리 제거
    raw_peaks = []
    for i in range(1, len(smooth) - 1):
        if smooth[i] > smooth[i - 1] and smooth[i] >= smooth[i + 1]:
            raw_peaks.append(i)
    if not raw_peaks:  # 단조 증가/감소 시 최대 위치 사용
        raw_peaks = [int(np.argmax(smooth))]

    prom_thresh = 0.08 * smooth.max()  # 전체 최댓값의 8% 미만 돌출은 제거
    peaks = []
    for p in raw_peaks:
        left_min = smooth[:p + 1].min()
        right_min = smooth[p:].min()
        prominence = smooth[p] - max(left_min, right_min)
        if prominence >= prom_thresh:
            peaks.append(p)
    # 인접(<=3 bin) 봉우리는 더 높은 쪽만 유지
    dedup = []
    for p in peaks:
        if dedup and p - dedup[-1] <= 3:
            if smooth[p] > smooth[dedup[-1]]:
                dedup[-1] = p
            continue
        dedup.append(p)
    peaks = dedup
    result["peaks"] = [round(float(centers[p]), 3) for p in peaks]

    if len(peaks) >= 2:
        # 시간이 가장 짧은 봉우리(식사 내부 mode)와 가장 긴 봉우리(식사 사이
        # mode) 사이에서 최소밀도 구간(antimode)을 찾는다. 식사 내부 간격이
        # 훨씬 많아 봉우리 '높이'로 고르면 안 되므로 시간 양 끝 봉우리를 쓴다.
        p1, p2 = peaks[0], peaks[-1]
        seg = smooth[p1:p2 + 1]
        vmin = seg.min()
        # 최소밀도(±허용오차) bin 들 중 가장 긴 '연속 구간'의 중앙을 antimode로
        tol = max(1e-9, 0.05 * smooth.max())
        low_mask = seg <= vmin + tol
        best_run, cur = (0, 0), []
        for i, flag in enumerate(list(low_mask) + [False]):
            if flag:
                cur.append(i)
            else:
                if len(cur) > best_run[1] - best_run[0]:
                    best_run = (cur[0], cur[-1])
                cur = []
        valley = p1 + int(round(0.5 * (best_run[0] + best_run[1])))
        antimode_log = centers[valley]
        result["antimode_log"] = float(antimode_log)
        result["criterion_min"] = float(10.0 ** antimode_log)
        result["method"] = "log_survivor_antimode (Tolkamp)"
    else:
        # 단봉 → 로그-생존곡선 곡률 최대점
        x = np.log10(sx)
        y = np.log10(np.clip(sy, 1e-9, None))
        if len(x) > 30:
            xs = np.linspace(x.min(), x.max(), 200)
            ys = np.interp(xs, x, y)
            d2 = np.gradient(np.gradient(ys, xs), xs)
            knee = xs[int(np.argmax(np.abs(d2)))]
            result["criterion_min"] = float(10.0 ** knee)
            result["antimode_log"] = float(knee)
            result["method"] = "log_survivor_curvature (unimodal fallback)"

    # 합리적 범위로 클립 (0.5~60분)
    result["criterion_min"] = float(np.clip(result["criterion_min"], 0.5, 60.0))
    return result


# ----------------------------------------------------------------------
# 3. 식사 미세구조 — 식사 그룹핑 및 지표 계산
# ----------------------------------------------------------------------
def group_meals(df: pd.DataFrame, criterion_min: float) -> pd.DataFrame:
    """
    이벤트를 식사(meal) 단위로 묶는다. 같은 개체에서 직전 이벤트와의 간격이
    criterion 이내면 같은 식사로 본다.

    Returns
    -------
    이벤트별 'meal_id' 컬럼이 추가된 DataFrame.
    """
    df = df.sort_values(["animal_id", "timestamp"]).reset_index(drop=True)
    meal_ids = np.empty(len(df), dtype=object)
    crit_s = criterion_min * 60.0

    for aid, idx in df.groupby("animal_id").groups.items():
        idx = list(idx)
        ts = df.loc[idx, "timestamp"].values
        counter = 0
        for k, row_i in enumerate(idx):
            if k == 0:
                counter = 0
            else:
                dt = (ts[k] - ts[k - 1]) / np.timedelta64(1, "s")
                if dt > crit_s:
                    counter += 1
            meal_ids[row_i] = f"{aid}_M{counter:04d}"
    df["meal_id"] = meal_ids
    return df


def meal_table(df_with_meals: pd.DataFrame) -> pd.DataFrame:
    """
    식사(meal) 단위 집계표.
    컬럼: animal_id, group, meal_id, start, end, meal_size_g, duration_min,
          n_events, ingestion_rate_g_per_min, start_zt(시작 ZT시각).
    """
    rows = []
    for mid, idx in df_with_meals.groupby("meal_id").groups.items():
        sub = df_with_meals.loc[idx].sort_values("timestamp")
        start = sub["timestamp"].iloc[0]
        end = sub["timestamp"].iloc[-1]
        dur = (end - start).total_seconds() / 60.0
        size = float(sub["intake_amount"].sum())
        rate = size / dur if dur > 0 else size  # 순간식사(단일이벤트)는 size로
        rows.append({
            "animal_id": sub["animal_id"].iloc[0],
            "group": sub["group"].iloc[0],
            "meal_id": mid,
            "start": start,
            "end": end,
            "meal_size_g": size,
            "duration_min": dur,
            "n_events": len(sub),
            "ingestion_rate_g_per_min": rate,
        })
    mt = pd.DataFrame(rows).sort_values(["animal_id", "start"]).reset_index(drop=True)
    return mt


def microstructure_per_animal(meal_df: pd.DataFrame,
                              study_start=None) -> pd.DataFrame:
    """
    개체별 식사 미세구조 지표.

    지표
    ----
    total_intake_g        : 총 섭취량
    n_meals               : 식사 횟수
    meal_size_mean_g      : 평균 식사 크기
    meal_duration_mean_min: 평균 식사 시간
    meal_freq_per_day     : 1일당 식사 빈도
    imi_mean_min          : 평균 식사간격(IMI, intermeal interval)
    ingestion_rate_g_per_min : 평균 섭취속도
    satiety_ratio_min_per_g  : 포만비 (IMI / 선행 식사 크기)
    first_meal_latency_min   : 첫 식사 잠복기 (study_start 기준)
    """
    rows = []
    for aid, idx in meal_df.groupby("animal_id").groups.items():
        sub = meal_df.loc[idx].sort_values("start").reset_index(drop=True)
        n = len(sub)
        total = float(sub["meal_size_g"].sum())
        span_days = ((sub["end"].iloc[-1] - sub["start"].iloc[0]).total_seconds()
                     / 86400.0) if n > 1 else 1.0
        span_days = max(span_days, 1e-6)

        # IMI: 식사 i 종료 → 식사 i+1 시작
        imis = []
        for k in range(1, n):
            gap = (sub["start"].iloc[k] - sub["end"].iloc[k - 1]).total_seconds() / 60.0
            imis.append(gap)
        imi_mean = float(np.mean(imis)) if imis else np.nan

        # 포만비: 선행 식사 크기 대비 다음 IMI
        sat = []
        for k in range(1, n):
            prev_size = sub["meal_size_g"].iloc[k - 1]
            gap = (sub["start"].iloc[k] - sub["end"].iloc[k - 1]).total_seconds() / 60.0
            if prev_size > 1e-9:
                sat.append(gap / prev_size)
        sat_mean = float(np.mean(sat)) if sat else np.nan

        latency = np.nan
        if study_start is not None:
            latency = (sub["start"].iloc[0] - pd.Timestamp(study_start)).total_seconds() / 60.0

        rows.append({
            "animal_id": aid,
            "group": sub["group"].iloc[0],
            "total_intake_g": round(total, 4),
            "n_meals": n,
            "meal_size_mean_g": round(float(sub["meal_size_g"].mean()), 4),
            "meal_size_sd_g": round(float(sub["meal_size_g"].std(ddof=1)) if n > 1 else 0.0, 4),
            "meal_duration_mean_min": round(float(sub["duration_min"].mean()), 3),
            "meal_freq_per_day": round(n / span_days, 3),
            "imi_mean_min": round(imi_mean, 3) if imi_mean == imi_mean else np.nan,
            "ingestion_rate_g_per_min": round(
                float(sub["ingestion_rate_g_per_min"].replace(
                    [np.inf, -np.inf], np.nan).dropna().mean()), 4),
            "satiety_ratio_min_per_g": round(sat_mean, 3) if sat_mean == sat_mean else np.nan,
            "first_meal_latency_min": round(latency, 2) if latency == latency else np.nan,
        })
    return pd.DataFrame(rows).sort_values(["group", "animal_id"]).reset_index(drop=True)


def microstructure_per_group(per_animal: pd.DataFrame) -> pd.DataFrame:
    """군별 미세구조 요약 (평균 ± SEM, n)."""
    metrics = ["total_intake_g", "n_meals", "meal_size_mean_g",
               "meal_duration_mean_min", "meal_freq_per_day", "imi_mean_min",
               "ingestion_rate_g_per_min", "satiety_ratio_min_per_g",
               "first_meal_latency_min"]
    rows = []
    for grp, sub in per_animal.groupby("group"):
        rec = {"group": grp, "n_animals": len(sub)}
        for m in metrics:
            vals = pd.to_numeric(sub[m], errors="coerce").dropna()
            if len(vals) == 0:
                rec[f"{m}_mean"] = np.nan
                rec[f"{m}_sem"] = np.nan
            else:
                rec[f"{m}_mean"] = round(float(vals.mean()), 4)
                rec[f"{m}_sem"] = round(
                    float(vals.std(ddof=1) / np.sqrt(len(vals))) if len(vals) > 1 else 0.0, 4)
        rows.append(rec)
    return pd.DataFrame(rows)


# ----------------------------------------------------------------------
# 4. 일주기(circadian) 분해
# ----------------------------------------------------------------------
def add_zeitgeber(df: pd.DataFrame,
                  lights_on_hour: int = 7,
                  light_phase_hours: int = 12) -> pd.DataFrame:
    """
    각 이벤트에 zeitgeber time(ZT)과 위상(light/dark)을 부여한다.
    ZT0 = 소등 점등(lights_on). 기본 12:12 LD 주기.
    """
    df = df.copy()
    hod = df["timestamp"].dt.hour + df["timestamp"].dt.minute / 60.0
    zt = (hod - lights_on_hour) % 24.0
    df["zt"] = zt
    df["zt_bin"] = np.floor(zt).astype(int)
    df["phase"] = np.where(zt < light_phase_hours, "light_rest", "dark_active")
    return df


def circadian_intake(df_zt: pd.DataFrame) -> pd.DataFrame:
    """위상(light/dark)별 군별 섭취량 요약."""
    rows = []
    for (grp, phase), sub in df_zt.groupby(["group", "phase"]):
        rows.append({
            "group": grp,
            "phase": phase,
            "total_intake_g": round(float(sub["intake_amount"].sum()), 4),
            "n_events": len(sub),
            "mean_event_g": round(float(sub["intake_amount"].mean()), 5),
        })
    out = pd.DataFrame(rows)
    # 위상 비율 (dark/light)
    if not out.empty:
        piv = out.pivot_table(index="group", columns="phase",
                              values="total_intake_g", aggfunc="sum").fillna(0.0)
        piv["dark_fraction"] = piv.get("dark_active", 0.0) / (
            piv.sum(axis=1).replace(0, np.nan))
        out = out.merge(piv[["dark_fraction"]].reset_index(), on="group", how="left")
    return out


def zt_profile(df_zt: pd.DataFrame) -> pd.DataFrame:
    """ZT 시간대(bin)별 군별 섭취량 프로파일 (24 bins)."""
    rows = []
    n_animals = df_zt.groupby("group")["animal_id"].nunique().to_dict()
    n_days = {}
    for grp, sub in df_zt.groupby("group"):
        span = (sub["timestamp"].max() - sub["timestamp"].min()).total_seconds() / 86400.0
        n_days[grp] = max(span, 1.0)
    for (grp, zb), sub in df_zt.groupby(["group", "zt_bin"]):
        denom = max(n_animals.get(grp, 1) * n_days.get(grp, 1.0), 1e-6)
        rows.append({
            "group": grp,
            "zt_bin": int(zb),
            "intake_g": round(float(sub["intake_amount"].sum()), 4),
            "intake_g_per_animal_day": round(float(sub["intake_amount"].sum()) / denom, 5),
        })
    return pd.DataFrame(rows).sort_values(["group", "zt_bin"]).reset_index(drop=True)


def acute_vs_chronic(df_zt: pd.DataFrame,
                     dosing_time,
                     acute_hours: float = 6.0) -> pd.DataFrame:
    """
    투약 시각(dosing_time) 기준으로 급성(acute) vs 만성(chronic) 효과를 분리한다.
      - acute   : 투약 후 acute_hours 이내
      - chronic : 그 이후
      - baseline: 투약 이전
    """
    df = df_zt.copy()
    dose = pd.Timestamp(dosing_time)
    dt_h = (df["timestamp"] - dose).dt.total_seconds() / 3600.0
    window = np.where(dt_h < 0, "baseline",
                      np.where(dt_h <= acute_hours, "acute", "chronic"))
    df["dose_window"] = window
    rows = []
    for (grp, win), sub in df.groupby(["group", "dose_window"]):
        rows.append({
            "group": grp,
            "dose_window": win,
            "total_intake_g": round(float(sub["intake_amount"].sum()), 4),
            "n_events": len(sub),
        })
    order = {"baseline": 0, "acute": 1, "chronic": 2}
    out = pd.DataFrame(rows)
    if not out.empty:
        out["_o"] = out["dose_window"].map(order)
        out = out.sort_values(["group", "_o"]).drop(columns="_o").reset_index(drop=True)
    return out


# ----------------------------------------------------------------------
# 5. 코호트 통계 + 기전 분류
# ----------------------------------------------------------------------
def anova_oneway(per_animal: pd.DataFrame, metric: str) -> dict:
    """metric 에 대한 일원배치 ANOVA (scipy)."""
    from scipy import stats
    groups = []
    labels = []
    for grp, sub in per_animal.groupby("group"):
        vals = pd.to_numeric(sub[metric], errors="coerce").dropna().values
        if len(vals) >= 2:
            groups.append(vals)
            labels.append(grp)
    if len(groups) < 2:
        return {"metric": metric, "test": "one-way ANOVA",
                "F": np.nan, "p": np.nan, "groups": labels,
                "note": "군이 2개 미만이거나 표본 부족"}
    F, p = stats.f_oneway(*groups)
    return {"metric": metric, "test": "one-way ANOVA",
            "F": round(float(F), 4), "p": round(float(p), 6),
            "groups": labels, "note": ""}


def anova_table(per_animal: pd.DataFrame, metrics=None) -> pd.DataFrame:
    """여러 지표에 대한 ANOVA 결과표."""
    if metrics is None:
        metrics = ["total_intake_g", "meal_size_mean_g", "meal_freq_per_day",
                   "imi_mean_min", "ingestion_rate_g_per_min",
                   "satiety_ratio_min_per_g", "meal_duration_mean_min"]
    rows = []
    for m in metrics:
        if m in per_animal.columns:
            r = anova_oneway(per_animal, m)
            rows.append({"metric": m, "F": r["F"], "p": r["p"],
                         "significant_0.05": (r["p"] < 0.05) if r["p"] == r["p"] else False,
                         "note": r["note"]})
    return pd.DataFrame(rows)


def mixed_effects_intake(meal_df: pd.DataFrame):
    """
    혼합효과모형: meal_size ~ group, 개체(animal_id)를 임의효과로.
    statsmodels MixedLM. 적합 실패 시 None 반환.
    반환: (summary_text, params_dict)
    """
    try:
        import statsmodels.formula.api as smf
        d = meal_df.copy()
        d = d.rename(columns={"meal_size_g": "meal_size"})
        d = d.dropna(subset=["meal_size", "group", "animal_id"])
        if d["group"].nunique() < 2 or d["animal_id"].nunique() < 3:
            return ("혼합효과모형: 군/개체 수 부족으로 생략", {})
        md = smf.mixedlm("meal_size ~ C(group)", d, groups=d["animal_id"])
        mfit = md.fit(method="lbfgs", maxiter=200, disp=False)
        params = {k: round(float(v), 5) for k, v in mfit.params.items()}
        return (mfit.summary().as_text(), params)
    except Exception as e:  # noqa: BLE001
        return (f"혼합효과모형 적합 실패: {e}", {})


def classify_mechanism(per_group: pd.DataFrame,
                       reference_group: str | None = None) -> pd.DataFrame:
    """
    각 처리군이 대조군 대비 '식사 크기 감소형' 인지 '식사 빈도 감소형' 인지
    분류한다. 식욕억제제 기전 해석에 사용.

    판정 규칙 (대조군 대비 % 변화):
      - size 감소가 freq 감소보다 크면      → meal-size-reducing
      - freq 감소가 size 감소보다 크면      → meal-frequency-reducing
      - 둘 다 미미(±5% 이내)                → no clear effect / mixed
    """
    df = per_group.copy()
    if reference_group is None:
        # 총섭취량이 가장 큰 군을 대조군으로 추정
        if "total_intake_g_mean" in df.columns and len(df):
            reference_group = df.loc[df["total_intake_g_mean"].idxmax(), "group"]
        else:
            reference_group = df["group"].iloc[0] if len(df) else None

    ref = df[df["group"] == reference_group]
    if ref.empty:
        return pd.DataFrame()
    ref_size = float(ref["meal_size_mean_g_mean"].iloc[0])
    ref_freq = float(ref["meal_freq_per_day_mean"].iloc[0])
    ref_intake = float(ref["total_intake_g_mean"].iloc[0])

    rows = []
    for _, r in df.iterrows():
        g = r["group"]
        size = float(r["meal_size_mean_g_mean"])
        freq = float(r["meal_freq_per_day_mean"])
        intake = float(r["total_intake_g_mean"])
        d_size = 100.0 * (size - ref_size) / ref_size if ref_size else 0.0
        d_freq = 100.0 * (freq - ref_freq) / ref_freq if ref_freq else 0.0
        d_intake = 100.0 * (intake - ref_intake) / ref_intake if ref_intake else 0.0
        if g == reference_group:
            mech = "reference (대조군)"
        elif abs(d_size) < 5.0 and abs(d_freq) < 5.0:
            mech = "no clear effect / mixed"
        elif d_size < 0 and (d_freq >= 0 or abs(d_size) >= abs(d_freq)):
            mech = "meal-size-reducing (식사 크기 감소형)"
        elif d_freq < 0 and abs(d_freq) > abs(d_size):
            mech = "meal-frequency-reducing (식사 빈도 감소형)"
        elif d_size > 0 or d_freq > 0:
            mech = "intake-increasing (섭취 증가형)"
        else:
            mech = "mixed"
        rows.append({
            "group": g,
            "vs_reference": reference_group,
            "meal_size_change_pct": round(d_size, 2),
            "meal_freq_change_pct": round(d_freq, 2),
            "total_intake_change_pct": round(d_intake, 2),
            "mechanism": mech,
        })
    return pd.DataFrame(rows)


# ----------------------------------------------------------------------
# 리포트 텍스트 생성 (Method + Result, 한/영)
# ----------------------------------------------------------------------
def build_report(criterion: dict,
                 per_group: pd.DataFrame,
                 anova_df: pd.DataFrame,
                 mechanism_df: pd.DataFrame,
                 lang: str = "ko") -> str:
    """Method + Result 섹션 텍스트를 생성한다 (lang: 'ko' 또는 'en')."""
    crit = criterion.get("criterion_min", float("nan"))
    method_used = criterion.get("method", "")

    if lang == "ko":
        lines = []
        lines.append("## Methods (방법)")
        lines.append(
            f"섭식 이벤트 로그를 표준 스키마로 정규화한 뒤, 연속 이벤트 간 "
            f"간격의 로그-생존곡선 변곡점 분석(Tolkamp 방식)으로 식사 기준 "
            f"간격을 {crit:.2f}분으로 도출하였다 (방법: {method_used}). "
            f"이 기준으로 이벤트를 식사 단위로 묶어 식사 크기, 식사 시간, "
            f"식사 빈도, 식사간격(IMI), 섭취속도, 포만비를 개체별·군별로 "
            f"산출하였다. 일주기 분석은 12:12 명암주기 기준 zeitgeber time "
            f"으로 수행하였다. 군간 비교는 일원배치 ANOVA 및 개체를 임의효과로 "
            f"한 혼합효과모형으로 분석하였다.")
        lines.append("")
        lines.append("## Results (결과)")
        for _, r in per_group.iterrows():
            lines.append(
                f"- {r['group']} (n={int(r['n_animals'])}): "
                f"총섭취 {r.get('total_intake_g_mean', float('nan')):.2f}g, "
                f"식사크기 {r.get('meal_size_mean_g_mean', float('nan')):.3f}g, "
                f"식사빈도 {r.get('meal_freq_per_day_mean', float('nan')):.2f}회/일, "
                f"IMI {r.get('imi_mean_min_mean', float('nan')):.1f}분")
        sig = anova_df[anova_df["significant_0.05"] == True]  # noqa: E712
        if len(sig):
            lines.append("")
            lines.append("유의한 군간 차이(p<0.05): " +
                         ", ".join(f"{r['metric']} (p={r['p']})"
                                   for _, r in sig.iterrows()))
        if mechanism_df is not None and len(mechanism_df):
            lines.append("")
            lines.append("기전 분류:")
            for _, r in mechanism_df.iterrows():
                lines.append(f"  - {r['group']}: {r['mechanism']} "
                             f"(식사크기 {r['meal_size_change_pct']}%, "
                             f"식사빈도 {r['meal_freq_change_pct']}%)")
        lines.append("")
        lines.append("※ 본 결과는 참고용·연구용이며 임상적 판단 근거로 사용할 수 없다.")
        return "\n".join(lines)
    else:
        lines = []
        lines.append("## Methods")
        lines.append(
            f"Feeding event logs were normalized to a common schema. The meal "
            f"criterion was derived objectively from the log-survivor curve of "
            f"inter-event intervals (Tolkamp method), yielding a meal-grouping "
            f"interval of {crit:.2f} min (method: {method_used}). Events were "
            f"grouped into meals using this criterion, and meal size, duration, "
            f"frequency, intermeal interval (IMI), ingestion rate and satiety "
            f"ratio were computed per animal and per group. Circadian analysis "
            f"used zeitgeber time under a 12:12 LD cycle. Group comparisons used "
            f"one-way ANOVA and a mixed-effects model with animal as random effect.")
        lines.append("")
        lines.append("## Results")
        for _, r in per_group.iterrows():
            lines.append(
                f"- {r['group']} (n={int(r['n_animals'])}): "
                f"total intake {r.get('total_intake_g_mean', float('nan')):.2f} g, "
                f"meal size {r.get('meal_size_mean_g_mean', float('nan')):.3f} g, "
                f"meal frequency {r.get('meal_freq_per_day_mean', float('nan')):.2f}/day, "
                f"IMI {r.get('imi_mean_min_mean', float('nan')):.1f} min")
        sig = anova_df[anova_df["significant_0.05"] == True]  # noqa: E712
        if len(sig):
            lines.append("")
            lines.append("Significant group differences (p<0.05): " +
                         ", ".join(f"{r['metric']} (p={r['p']})"
                                   for _, r in sig.iterrows()))
        if mechanism_df is not None and len(mechanism_df):
            lines.append("")
            lines.append("Mechanism classification:")
            for _, r in mechanism_df.iterrows():
                lines.append(f"  - {r['group']}: {r['mechanism']} "
                             f"(meal size {r['meal_size_change_pct']}%, "
                             f"meal freq {r['meal_freq_change_pct']}%)")
        lines.append("")
        lines.append("Note: results are for research reference only and must "
                     "not be used as a basis for clinical decisions.")
        return "\n".join(lines)


# ----------------------------------------------------------------------
# 편의 함수: 전체 파이프라인 한 번에
# ----------------------------------------------------------------------
def run_pipeline(raw_df: pd.DataFrame,
                 hardware: str = "auto",
                 criterion_override: float | None = None,
                 lights_on_hour: int = 7) -> dict:
    """
    원자료 → 정규화 → QC → 식사기준 → 식사그룹핑 → 미세구조 → 일주기 → 통계
    까지 한 번에 수행하고 모든 중간 산출물을 dict로 반환한다.
    """
    norm = normalize_events(raw_df, hardware=hardware)
    norm = qc_check(norm)
    qc = qc_summary(norm)

    ivs = intermeal_intervals(norm)
    crit = derive_meal_criterion(ivs)
    crit_min = criterion_override if criterion_override else crit["criterion_min"]

    meals_events = group_meals(norm, crit_min)
    meals = meal_table(meals_events)
    study_start = norm["timestamp"].min()
    per_animal = microstructure_per_animal(meals, study_start=study_start)
    per_group = microstructure_per_group(per_animal)

    zt = add_zeitgeber(norm, lights_on_hour=lights_on_hour)
    circ = circadian_intake(zt)
    ztp = zt_profile(zt)

    anv = anova_table(per_animal)
    mech = classify_mechanism(per_group)

    return {
        "normalized": norm,
        "qc_summary": qc,
        "intervals_min": ivs,
        "criterion": dict(crit, criterion_used=crit_min),
        "meals_events": meals_events,
        "meals": meals,
        "per_animal": per_animal,
        "per_group": per_group,
        "zt_events": zt,
        "circadian": circ,
        "zt_profile": ztp,
        "anova": anv,
        "mechanism": mech,
    }


if __name__ == "__main__":
    # 모듈 단독 실행 시 간단한 자가 점검
    print("meal_core.py — RodentMealScope core module")
    print("지원 하드웨어:", ", ".join(SUPPORTED_HARDWARE))
    print("표준 스키마:", ", ".join(STANDARD_COLUMNS))