Add comprehensive BTC/USDT price analysis framework with 17 modules

Complete statistical analysis pipeline covering: - FFT spectral analysis, wavelet CWT, ACF/PACF autocorrelation - Returns distribution (fat tails, kurtosis=15.65), GARCH volatility modeling - Hurst exponent (H=0.593), fractal dimension, power law corridor - Volume-price causality (Granger), calendar effects, halving cycle analysis - Technical indicator validation (0/21 pass FDR), candlestick pattern testing - Market state clustering (K-Means/GMM), Markov chain transitions - Time series forecasting (ARIMA/Prophet/LSTM benchmarks) - Anomaly detection ensemble (IF+LOF+COPOD, AUC=0.9935) Key finding: volatility is predictable (GARCH persistence=0.973), but price direction is statistically indistinguishable from random walk. Includes REPORT.md with 16-section analysis report and future projections, 70+ charts in output/, and all source modules in src/. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 10:29:54 +08:00
parent 2ed3be1812
commit 277a5f067d
96 changed files with 13218 additions and 0 deletions
--- a/src/anomaly.py
+++ b/src/anomaly.py
@@ -0,0 +1,774 @@
+"""异常检测与前兆模式提取模块
+
+分析内容：
+- 集成异常检测（Isolation Forest + LOF + COPOD，≥2/3 一致判定）
+- GARCH 条件波动率异常检测（标准化残差 > 3）
+- 异常前兆模式提取（Random Forest 分类器）
+- 事件对齐分析（比特币减半等重大事件）
+- 可视化：异常标记价格图、特征分布对比、ROC 曲线、特征重要性
+"""
+
+import matplotlib
+matplotlib.use('Agg')
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import warnings
+from pathlib import Path
+from typing import Optional, Dict, List, Tuple
+
+from sklearn.ensemble import IsolationForest, RandomForestClassifier
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import cross_val_predict, StratifiedKFold
+from sklearn.metrics import roc_auc_score, roc_curve
+
+try:
+    from pyod.models.copod import COPOD
+    HAS_COPOD = True
+except ImportError:
+    HAS_COPOD = False
+    print("[警告] pyod 未安装，COPOD 检测将跳过，使用 2/2 一致判定")
+
+
+# ============================================================
+# 1. 检测特征定义
+# ============================================================
+
+# 用于异常检测的特征列
+DETECTION_FEATURES = [
+    'log_return',
+    'abs_return',
+    'volume_ratio',
+    'range_pct',
+    'taker_buy_ratio',
+    'vol_7d',
+]
+
+# 比特币减半及其他重大事件日期
+KNOWN_EVENTS = {
+    '2012-11-28': '第一次减半',
+    '2016-07-09': '第二次减半',
+    '2020-05-11': '第三次减半',
+    '2024-04-20': '第四次减半',
+    '2017-12-17': '2017年牛市顶点',
+    '2018-12-15': '2018年熊市底部',
+    '2020-03-12': '新冠黑色星期四',
+    '2021-04-14': '2021年牛市中期高点',
+    '2021-11-10': '2021年牛市顶点',
+    '2022-06-18': 'Luna/3AC 暴跌',
+    '2022-11-09': 'FTX 崩盘',
+    '2024-01-11': 'BTC ETF 获批',
+}
+
+
+# ============================================================
+# 2. 集成异常检测
+# ============================================================
+
+def prepare_features(df: pd.DataFrame) -> Tuple[pd.DataFrame, np.ndarray]:
+    """
+    准备异常检测特征矩阵
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        含衍生特征的日线数据
+
+    Returns
+    -------
+    features_df : pd.DataFrame
+        特征子集（已去除 NaN）
+    X_scaled : np.ndarray
+        标准化后的特征矩阵
+    """
+    # 选取可用特征
+    available = [f for f in DETECTION_FEATURES if f in df.columns]
+    if len(available) < 3:
+        raise ValueError(f"可用特征不足: {available}，至少需要 3 个")
+
+    features_df = df[available].dropna()
+
+    # 标准化
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(features_df.values)
+
+    return features_df, X_scaled
+
+
+def detect_isolation_forest(X: np.ndarray, contamination: float = 0.05) -> np.ndarray:
+    """Isolation Forest 异常检测"""
+    model = IsolationForest(
+        n_estimators=200,
+        contamination=contamination,
+        random_state=42,
+        n_jobs=-1,
+    )
+    # -1 = 异常, 1 = 正常
+    labels = model.fit_predict(X)
+    return (labels == -1).astype(int)
+
+
+def detect_lof(X: np.ndarray, contamination: float = 0.05) -> np.ndarray:
+    """Local Outlier Factor 异常检测"""
+    model = LocalOutlierFactor(
+        n_neighbors=20,
+        contamination=contamination,
+        novelty=False,
+        n_jobs=-1,
+    )
+    labels = model.fit_predict(X)
+    return (labels == -1).astype(int)
+
+
+def detect_copod(X: np.ndarray, contamination: float = 0.05) -> np.ndarray:
+    """COPOD 异常检测（基于 Copula）"""
+    if not HAS_COPOD:
+        return None
+
+    model = COPOD(contamination=contamination)
+    labels = model.fit_predict(X)
+    return labels.astype(int)
+
+
+def ensemble_anomaly_detection(
+    df: pd.DataFrame,
+    contamination: float = 0.05,
+    min_agreement: int = 2,
+) -> pd.DataFrame:
+    """
+    集成异常检测：要求 ≥ min_agreement / n_methods 一致判定
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        含衍生特征的日线数据
+    contamination : float
+        预期异常比例
+    min_agreement : int
+        最少多少个方法一致才标记为异常
+
+    Returns
+    -------
+    pd.DataFrame
+        添加了各方法检测结果及集成结果的数据
+    """
+    features_df, X_scaled = prepare_features(df)
+
+    print(f"  特征矩阵: {X_scaled.shape[0]} 样本 x {X_scaled.shape[1]} 特征")
+
+    # 执行各方法检测
+    print("  [1/3] Isolation Forest...")
+    if_labels = detect_isolation_forest(X_scaled, contamination)
+
+    print("  [2/3] Local Outlier Factor...")
+    lof_labels = detect_lof(X_scaled, contamination)
+
+    n_methods = 2
+    vote_matrix = np.column_stack([if_labels, lof_labels])
+    method_names = ['iforest', 'lof']
+
+    print("  [3/3] COPOD...")
+    copod_labels = detect_copod(X_scaled, contamination)
+    if copod_labels is not None:
+        vote_matrix = np.column_stack([vote_matrix, copod_labels])
+        method_names.append('copod')
+        n_methods = 3
+    else:
+        print("    COPOD 不可用，使用 2 方法集成")
+
+    # 投票
+    vote_sum = vote_matrix.sum(axis=1)
+    ensemble_label = (vote_sum >= min_agreement).astype(int)
+
+    # 构建结果 DataFrame
+    result = features_df.copy()
+    for i, name in enumerate(method_names):
+        result[f'anomaly_{name}'] = vote_matrix[:, i]
+    result['anomaly_votes'] = vote_sum
+    result['anomaly_ensemble'] = ensemble_label
+
+    # 打印各方法统计
+    print(f"\n  异常检测统计:")
+    for name in method_names:
+        n_anom = result[f'anomaly_{name}'].sum()
+        print(f"    {name:>12}: {n_anom} 个异常 ({n_anom / len(result) * 100:.2f}%)")
+    n_ensemble = ensemble_label.sum()
+    print(f"    {'集成(≥' + str(min_agreement) + ')':>12}: {n_ensemble} 个异常 ({n_ensemble / len(result) * 100:.2f}%)")
+
+    # 方法间重叠度
+    print(f"\n  方法间重叠:")
+    for i in range(len(method_names)):
+        for j in range(i + 1, len(method_names)):
+            overlap = ((vote_matrix[:, i] == 1) & (vote_matrix[:, j] == 1)).sum()
+            n_i = vote_matrix[:, i].sum()
+            n_j = vote_matrix[:, j].sum()
+            if min(n_i, n_j) > 0:
+                jaccard = overlap / ((vote_matrix[:, i] == 1) | (vote_matrix[:, j] == 1)).sum()
+            else:
+                jaccard = 0.0
+            print(f"    {method_names[i]} ∩ {method_names[j]}: "
+                  f"{overlap} 个 (Jaccard={jaccard:.3f})")
+
+    return result
+
+
+# ============================================================
+# 3. GARCH 条件波动率异常
+# ============================================================
+
+def garch_anomaly_detection(
+    df: pd.DataFrame,
+    threshold: float = 3.0,
+) -> pd.Series:
+    """
+    基于 GARCH(1,1) 的条件波动率异常检测
+
+    标准化残差 |ε_t / σ_t| > threshold 的日期标记为异常
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        含 log_return 列的数据
+    threshold : float
+        标准化残差阈值
+
+    Returns
+    -------
+    pd.Series
+        异常标记（1 = 异常，0 = 正常），索引与输入对齐
+    """
+    from arch import arch_model
+
+    returns = df['log_return'].dropna()
+    r_pct = returns * 100  # arch 库使用百分比收益率
+
+    # 拟合 GARCH(1,1)
+    model = arch_model(r_pct, vol='Garch', p=1, q=1, mean='Constant', dist='Normal')
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        result = model.fit(disp='off')
+
+    # 计算标准化残差
+    std_resid = result.resid / result.conditional_volatility
+    anomaly = (std_resid.abs() > threshold).astype(int)
+
+    n_anom = anomaly.sum()
+    print(f"  GARCH 异常: {n_anom} 个 (|标准化残差| > {threshold})")
+    print(f"  GARCH 模型: α={result.params.get('alpha[1]', np.nan):.4f}, "
+          f"β={result.params.get('beta[1]', np.nan):.4f}, "
+          f"持续性={result.params.get('alpha[1]', 0) + result.params.get('beta[1]', 0):.4f}")
+
+    return anomaly
+
+
+# ============================================================
+# 4. 前兆模式提取
+# ============================================================
+
+def extract_precursor_features(
+    df: pd.DataFrame,
+    anomaly_labels: pd.Series,
+    lookback_windows: List[int] = None,
+) -> Tuple[pd.DataFrame, pd.Series]:
+    """
+    提取异常日前若干天的特征作为前兆信号
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        含衍生特征的数据
+    anomaly_labels : pd.Series
+        异常标记（1 = 异常）
+    lookback_windows : list of int
+        向前回溯的天数窗口
+
+    Returns
+    -------
+    X : pd.DataFrame
+        前兆特征矩阵
+    y : pd.Series
+        标签（1 = 后续发生异常, 0 = 正常）
+    """
+    if lookback_windows is None:
+        lookback_windows = [5, 10, 20]
+
+    # 确保对齐
+    common_idx = df.index.intersection(anomaly_labels.index)
+    df_aligned = df.loc[common_idx]
+    labels_aligned = anomaly_labels.loc[common_idx]
+
+    base_features = [f for f in DETECTION_FEATURES if f in df.columns]
+    precursor_features = {}
+
+    for window in lookback_windows:
+        for feat in base_features:
+            if feat not in df_aligned.columns:
+                continue
+            series = df_aligned[feat]
+
+            # 滚动统计作为前兆特征
+            precursor_features[f'{feat}_mean_{window}d'] = series.rolling(window).mean()
+            precursor_features[f'{feat}_std_{window}d'] = series.rolling(window).std()
+            precursor_features[f'{feat}_max_{window}d'] = series.rolling(window).max()
+            precursor_features[f'{feat}_min_{window}d'] = series.rolling(window).min()
+
+            # 趋势特征（最近值 vs 窗口均值的偏离）
+            rolling_mean = series.rolling(window).mean()
+            precursor_features[f'{feat}_deviation_{window}d'] = series - rolling_mean
+
+    X = pd.DataFrame(precursor_features, index=df_aligned.index)
+
+    # 标签: 未来是否出现异常（shift(-1) 使得特征是"之前"的）
+    # 我们用当前特征预测当天是否异常
+    y = labels_aligned
+
+    # 去除 NaN
+    valid_mask = X.notna().all(axis=1) & y.notna()
+    X = X[valid_mask]
+    y = y[valid_mask]
+
+    return X, y
+
+
+def train_precursor_classifier(
+    X: pd.DataFrame,
+    y: pd.Series,
+) -> Dict:
+    """
+    训练前兆模式分类器（Random Forest）
+
+    使用分层 K 折交叉验证评估
+
+    Parameters
+    ----------
+    X : pd.DataFrame
+        前兆特征矩阵
+    y : pd.Series
+        标签
+
+    Returns
+    -------
+    dict
+        AUC、特征重要性等结果
+    """
+    if len(X) < 50 or y.sum() < 10:
+        print(f"  [警告] 样本不足 (n={len(X)}, 正例={y.sum()})，跳过分类器训练")
+        return {}
+
+    # 标准化
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+
+    # 分层 K 折
+    n_splits = min(5, int(y.sum()))
+    if n_splits < 2:
+        print("  [警告] 正例数过少，无法进行交叉验证")
+        return {}
+
+    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
+
+    clf = RandomForestClassifier(
+        n_estimators=200,
+        max_depth=10,
+        min_samples_split=5,
+        class_weight='balanced',
+        random_state=42,
+        n_jobs=-1,
+    )
+
+    # 交叉验证预测概率
+    try:
+        y_prob = cross_val_predict(clf, X_scaled, y, cv=cv, method='predict_proba')[:, 1]
+        auc = roc_auc_score(y, y_prob)
+    except Exception as e:
+        print(f"  [错误] 交叉验证失败: {e}")
+        return {}
+
+    # 在全量数据上训练获取特征重要性
+    clf.fit(X_scaled, y)
+    importances = pd.Series(clf.feature_importances_, index=X.columns)
+    importances = importances.sort_values(ascending=False)
+
+    # ROC 曲线数据
+    fpr, tpr, thresholds = roc_curve(y, y_prob)
+
+    results = {
+        'auc': auc,
+        'feature_importances': importances,
+        'y_true': y,
+        'y_prob': y_prob,
+        'fpr': fpr,
+        'tpr': tpr,
+    }
+
+    print(f"\n  前兆分类器结果:")
+    print(f"    AUC: {auc:.4f}")
+    print(f"    样本: {len(y)} (异常: {y.sum()}, 正常: {(y == 0).sum()})")
+    print(f"    Top-10 重要特征:")
+    for feat, imp in importances.head(10).items():
+        print(f"      {feat:<40} {imp:.4f}")
+
+    return results
+
+
+# ============================================================
+# 5. 事件对齐分析
+# ============================================================
+
+def align_with_events(
+    anomaly_dates: pd.DatetimeIndex,
+    tolerance_days: int = 5,
+) -> pd.DataFrame:
+    """
+    将异常日期与已知事件对齐
+
+    Parameters
+    ----------
+    anomaly_dates : pd.DatetimeIndex
+        异常日期列表
+    tolerance_days : int
+        容差天数（异常日期与事件日期相差 ≤ tolerance_days 天即视为匹配）
+
+    Returns
+    -------
+    pd.DataFrame
+        匹配结果
+    """
+    matches = []
+
+    for event_date_str, event_name in KNOWN_EVENTS.items():
+        event_date = pd.Timestamp(event_date_str)
+
+        for anom_date in anomaly_dates:
+            diff_days = abs((anom_date - event_date).days)
+            if diff_days <= tolerance_days:
+                matches.append({
+                    'anomaly_date': anom_date,
+                    'event_date': event_date,
+                    'event_name': event_name,
+                    'diff_days': diff_days,
+                })
+
+    if matches:
+        result = pd.DataFrame(matches)
+        print(f"\n  事件对齐 (容差 {tolerance_days} 天):")
+        for _, row in result.iterrows():
+            print(f"    异常 {row['anomaly_date'].strftime('%Y-%m-%d')} ↔ "
+                  f"{row['event_name']} ({row['event_date'].strftime('%Y-%m-%d')}, "
+                  f"差 {row['diff_days']} 天)")
+        return result
+    else:
+        print(f"  [信息] 无异常日期与已知事件匹配 (容差 {tolerance_days} 天)")
+        return pd.DataFrame()
+
+
+# ============================================================
+# 6. 可视化
+# ============================================================
+
+def plot_price_with_anomalies(
+    df: pd.DataFrame,
+    anomaly_result: pd.DataFrame,
+    garch_anomaly: Optional[pd.Series],
+    output_dir: Path,
+):
+    """绘制价格图，标注异常点"""
+    fig, axes = plt.subplots(2, 1, figsize=(16, 10), gridspec_kw={'height_ratios': [3, 1]})
+
+    # 上图：价格 + 异常标记
+    ax1 = axes[0]
+    ax1.plot(df.index, df['close'], linewidth=0.6, color='steelblue', alpha=0.8, label='BTC 收盘价')
+
+    # 集成异常
+    ensemble_anom = anomaly_result[anomaly_result['anomaly_ensemble'] == 1]
+    if not ensemble_anom.empty:
+        # 获取异常日期对应的收盘价
+        anom_prices = df.loc[df.index.isin(ensemble_anom.index), 'close']
+        ax1.scatter(anom_prices.index, anom_prices.values,
+                    color='red', s=30, zorder=5, label=f'集成异常 (n={len(anom_prices)})',
+                    alpha=0.7, edgecolors='darkred', linewidths=0.5)
+
+    # GARCH 异常
+    if garch_anomaly is not None:
+        garch_anom_dates = garch_anomaly[garch_anomaly == 1].index
+        garch_prices = df.loc[df.index.isin(garch_anom_dates), 'close']
+        if not garch_prices.empty:
+            ax1.scatter(garch_prices.index, garch_prices.values,
+                        color='orange', s=20, zorder=4, marker='^',
+                        label=f'GARCH 异常 (n={len(garch_prices)})',
+                        alpha=0.7, edgecolors='darkorange', linewidths=0.5)
+
+    ax1.set_ylabel('价格 (USDT)', fontsize=12)
+    ax1.set_title('BTC 价格与异常检测结果', fontsize=14)
+    ax1.legend(fontsize=10, loc='upper left')
+    ax1.grid(True, alpha=0.3)
+    ax1.set_yscale('log')
+
+    # 下图：成交量 + 异常标记
+    ax2 = axes[1]
+    if 'volume' in df.columns:
+        ax2.bar(df.index, df['volume'], width=1, color='steelblue', alpha=0.4, label='成交量')
+        if not ensemble_anom.empty:
+            anom_vol = df.loc[df.index.isin(ensemble_anom.index), 'volume']
+            ax2.bar(anom_vol.index, anom_vol.values, width=1, color='red', alpha=0.7, label='异常日成交量')
+    ax2.set_ylabel('成交量', fontsize=12)
+    ax2.set_xlabel('日期', fontsize=12)
+    ax2.legend(fontsize=10)
+    ax2.grid(True, alpha=0.3)
+
+    fig.tight_layout()
+    fig.savefig(output_dir / 'anomaly_price_chart.png', dpi=150, bbox_inches='tight')
+    plt.close(fig)
+    print(f"  [保存] {output_dir / 'anomaly_price_chart.png'}")
+
+
+def plot_anomaly_feature_distributions(
+    anomaly_result: pd.DataFrame,
+    output_dir: Path,
+):
+    """绘制异常日 vs 正常日的特征分布对比"""
+    features_to_plot = [f for f in DETECTION_FEATURES if f in anomaly_result.columns]
+    n_feats = len(features_to_plot)
+    if n_feats == 0:
+        print("  [警告] 无可绘制特征")
+        return
+
+    n_cols = 3
+    n_rows = (n_feats + n_cols - 1) // n_cols
+
+    fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 4 * n_rows))
+    axes = np.array(axes).flatten()
+
+    normal = anomaly_result[anomaly_result['anomaly_ensemble'] == 0]
+    anomaly = anomaly_result[anomaly_result['anomaly_ensemble'] == 1]
+
+    for idx, feat in enumerate(features_to_plot):
+        ax = axes[idx]
+
+        # 正常分布
+        vals_normal = normal[feat].dropna()
+        vals_anomaly = anomaly[feat].dropna()
+
+        ax.hist(vals_normal, bins=50, density=True, alpha=0.6,
+                color='steelblue', label=f'正常 (n={len(vals_normal)})', edgecolor='white', linewidth=0.3)
+        if len(vals_anomaly) > 0:
+            ax.hist(vals_anomaly, bins=30, density=True, alpha=0.6,
+                    color='red', label=f'异常 (n={len(vals_anomaly)})', edgecolor='white', linewidth=0.3)
+
+        ax.set_title(feat, fontsize=11)
+        ax.legend(fontsize=8)
+        ax.grid(True, alpha=0.3)
+
+    # 隐藏多余子图
+    for idx in range(n_feats, len(axes)):
+        axes[idx].set_visible(False)
+
+    fig.suptitle('异常日 vs 正常日 特征分布对比', fontsize=14, y=1.02)
+    fig.tight_layout()
+    fig.savefig(output_dir / 'anomaly_feature_distributions.png', dpi=150, bbox_inches='tight')
+    plt.close(fig)
+    print(f"  [保存] {output_dir / 'anomaly_feature_distributions.png'}")
+
+
+def plot_precursor_roc(precursor_results: Dict, output_dir: Path):
+    """绘制前兆分类器 ROC 曲线"""
+    if not precursor_results or 'fpr' not in precursor_results:
+        print("  [警告] 无前兆分类器结果，跳过 ROC 曲线")
+        return
+
+    fig, ax = plt.subplots(figsize=(8, 8))
+
+    fpr = precursor_results['fpr']
+    tpr = precursor_results['tpr']
+    auc = precursor_results['auc']
+
+    ax.plot(fpr, tpr, color='steelblue', linewidth=2,
+            label=f'Random Forest (AUC = {auc:.4f})')
+    ax.plot([0, 1], [0, 1], 'k--', linewidth=1, label='随机基线')
+
+    ax.set_xlabel('假阳性率 (FPR)', fontsize=12)
+    ax.set_ylabel('真阳性率 (TPR)', fontsize=12)
+    ax.set_title('异常前兆分类器 ROC 曲线', fontsize=14)
+    ax.legend(fontsize=11)
+    ax.grid(True, alpha=0.3)
+    ax.set_xlim([-0.02, 1.02])
+    ax.set_ylim([-0.02, 1.02])
+
+    fig.savefig(output_dir / 'precursor_roc_curve.png', dpi=150, bbox_inches='tight')
+    plt.close(fig)
+    print(f"  [保存] {output_dir / 'precursor_roc_curve.png'}")
+
+
+def plot_feature_importance(precursor_results: Dict, output_dir: Path, top_n: int = 20):
+    """绘制前兆特征重要性条形图"""
+    if not precursor_results or 'feature_importances' not in precursor_results:
+        print("  [警告] 无特征重要性数据，跳过")
+        return
+
+    importances = precursor_results['feature_importances'].head(top_n)
+
+    fig, ax = plt.subplots(figsize=(10, max(6, top_n * 0.35)))
+
+    colors = plt.cm.RdYlBu_r(np.linspace(0.2, 0.8, len(importances)))
+    ax.barh(range(len(importances)), importances.values[::-1],
+            color=colors[::-1], edgecolor='white', linewidth=0.5)
+    ax.set_yticks(range(len(importances)))
+    ax.set_yticklabels(importances.index[::-1], fontsize=9)
+    ax.set_xlabel('特征重要性', fontsize=12)
+    ax.set_title(f'异常前兆 Top-{top_n} 特征重要性 (Random Forest)', fontsize=13)
+    ax.grid(True, alpha=0.3, axis='x')
+
+    fig.savefig(output_dir / 'precursor_feature_importance.png', dpi=150, bbox_inches='tight')
+    plt.close(fig)
+    print(f"  [保存] {output_dir / 'precursor_feature_importance.png'}")
+
+
+# ============================================================
+# 7. 结果打印
+# ============================================================
+
+def print_anomaly_summary(
+    anomaly_result: pd.DataFrame,
+    garch_anomaly: Optional[pd.Series],
+    precursor_results: Dict,
+):
+    """打印异常检测汇总"""
+    print("\n" + "=" * 70)
+    print("异常检测结果汇总")
+    print("=" * 70)
+
+    # 集成异常统计
+    n_total = len(anomaly_result)
+    n_ensemble = anomaly_result['anomaly_ensemble'].sum()
+    print(f"\n  总样本数: {n_total}")
+    print(f"  集成异常数: {n_ensemble} ({n_ensemble / n_total * 100:.2f}%)")
+
+    # 各方法统计
+    method_cols = [c for c in anomaly_result.columns if c.startswith('anomaly_') and c != 'anomaly_ensemble' and c != 'anomaly_votes']
+    for col in method_cols:
+        method_name = col.replace('anomaly_', '')
+        n_anom = anomaly_result[col].sum()
+        print(f"    {method_name:>12}: {n_anom} ({n_anom / n_total * 100:.2f}%)")
+
+    # GARCH 异常
+    if garch_anomaly is not None:
+        n_garch = garch_anomaly.sum()
+        print(f"    {'GARCH':>12}: {n_garch} ({n_garch / len(garch_anomaly) * 100:.2f}%)")
+
+        # 集成异常与 GARCH 异常的重叠
+        common_idx = anomaly_result.index.intersection(garch_anomaly.index)
+        if len(common_idx) > 0:
+            ensemble_set = set(anomaly_result.loc[common_idx][anomaly_result.loc[common_idx, 'anomaly_ensemble'] == 1].index)
+            garch_set = set(garch_anomaly[garch_anomaly == 1].index)
+            overlap = len(ensemble_set & garch_set)
+            print(f"\n  集成 ∩ GARCH 重叠: {overlap} 个")
+
+    # 前兆分类器
+    if precursor_results and 'auc' in precursor_results:
+        print(f"\n  前兆分类器 AUC: {precursor_results['auc']:.4f}")
+        print(f"  Top-5 前兆特征:")
+        for feat, imp in precursor_results['feature_importances'].head(5).items():
+            print(f"    {feat:<40} {imp:.4f}")
+
+
+# ============================================================
+# 8. 主入口
+# ============================================================
+
+def run_anomaly_analysis(
+    df: pd.DataFrame,
+    output_dir: str = "output/anomaly",
+) -> Dict:
+    """
+    异常检测与前兆模式分析主函数
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        日线数据（已通过 add_derived_features 添加衍生特征）
+    output_dir : str
+        图表输出目录
+
+    Returns
+    -------
+    dict
+        包含所有分析结果的字典
+    """
+    output_dir = Path(output_dir)
+    output_dir.mkdir(parents=True, exist_ok=True)
+
+    print("=" * 70)
+    print("BTC 异常检测与前兆模式分析")
+    print("=" * 70)
+    print(f"数据范围: {df.index.min()} ~ {df.index.max()}")
+    print(f"样本数量: {len(df)}")
+
+    # 设置中文字体
+    plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
+    plt.rcParams['axes.unicode_minus'] = False
+
+    # --- 集成异常检测 ---
+    print("\n>>> [1/5] 执行集成异常检测...")
+    anomaly_result = ensemble_anomaly_detection(df, contamination=0.05, min_agreement=2)
+
+    # --- GARCH 条件波动率异常 ---
+    print("\n>>> [2/5] 执行 GARCH 条件波动率异常检测...")
+    garch_anomaly = None
+    try:
+        garch_anomaly = garch_anomaly_detection(df, threshold=3.0)
+    except Exception as e:
+        print(f"  [错误] GARCH 异常检测失败: {e}")
+
+    # --- 事件对齐 ---
+    print("\n>>> [3/5] 执行事件对齐分析...")
+    ensemble_anom_dates = anomaly_result[anomaly_result['anomaly_ensemble'] == 1].index
+    event_alignment = align_with_events(ensemble_anom_dates, tolerance_days=5)
+
+    # --- 前兆模式提取 ---
+    print("\n>>> [4/5] 提取前兆模式并训练分类器...")
+    precursor_results = {}
+    try:
+        X_precursor, y_precursor = extract_precursor_features(
+            df, anomaly_result['anomaly_ensemble'], lookback_windows=[5, 10, 20]
+        )
+        print(f"  前兆特征矩阵: {X_precursor.shape[0]} 样本 x {X_precursor.shape[1]} 特征")
+        precursor_results = train_precursor_classifier(X_precursor, y_precursor)
+    except Exception as e:
+        print(f"  [错误] 前兆模式提取失败: {e}")
+
+    # --- 可视化 ---
+    print("\n>>> [5/5] 生成可视化图表...")
+    plot_price_with_anomalies(df, anomaly_result, garch_anomaly, output_dir)
+    plot_anomaly_feature_distributions(anomaly_result, output_dir)
+    plot_precursor_roc(precursor_results, output_dir)
+    plot_feature_importance(precursor_results, output_dir)
+
+    # --- 汇总打印 ---
+    print_anomaly_summary(anomaly_result, garch_anomaly, precursor_results)
+
+    print("\n" + "=" * 70)
+    print("异常检测与前兆模式分析完成！")
+    print(f"图表已保存至: {output_dir.resolve()}")
+    print("=" * 70)
+
+    return {
+        'anomaly_result': anomaly_result,
+        'garch_anomaly': garch_anomaly,
+        'event_alignment': event_alignment,
+        'precursor_results': precursor_results,
+    }
+
+
+# ============================================================
+# 独立运行入口
+# ============================================================
+
+if __name__ == '__main__':
+    from src.data_loader import load_daily
+    from src.preprocessing import add_derived_features
+
+    df = load_daily()
+    df = add_derived_features(df)
+    run_anomaly_analysis(df)