Add comprehensive BTC/USDT price analysis framework with 17 modules

Complete statistical analysis pipeline covering:
- FFT spectral analysis, wavelet CWT, ACF/PACF autocorrelation
- Returns distribution (fat tails, kurtosis=15.65), GARCH volatility modeling
- Hurst exponent (H=0.593), fractal dimension, power law corridor
- Volume-price causality (Granger), calendar effects, halving cycle analysis
- Technical indicator validation (0/21 pass FDR), candlestick pattern testing
- Market state clustering (K-Means/GMM), Markov chain transitions
- Time series forecasting (ARIMA/Prophet/LSTM benchmarks)
- Anomaly detection ensemble (IF+LOF+COPOD, AUC=0.9935)

Key finding: volatility is predictable (GARCH persistence=0.973),
but price direction is statistically indistinguishable from random walk.

Includes REPORT.md with 16-section analysis report and future projections,
70+ charts in output/, and all source modules in src/.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-03 10:29:54 +08:00
parent 2ed3be1812
commit 277a5f067d
96 changed files with 13218 additions and 0 deletions

774
src/anomaly.py Normal file
View File

@@ -0,0 +1,774 @@
"""异常检测与前兆模式提取模块
分析内容:
- 集成异常检测Isolation Forest + LOF + COPOD≥2/3 一致判定)
- GARCH 条件波动率异常检测(标准化残差 > 3
- 异常前兆模式提取Random Forest 分类器)
- 事件对齐分析(比特币减半等重大事件)
- 可视化异常标记价格图、特征分布对比、ROC 曲线、特征重要性
"""
import matplotlib
matplotlib.use('Agg')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from pathlib import Path
from typing import Optional, Dict, List, Tuple
from sklearn.ensemble import IsolationForest, RandomForestClassifier
from sklearn.neighbors import LocalOutlierFactor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_predict, StratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve
try:
from pyod.models.copod import COPOD
HAS_COPOD = True
except ImportError:
HAS_COPOD = False
print("[警告] pyod 未安装COPOD 检测将跳过,使用 2/2 一致判定")
# ============================================================
# 1. 检测特征定义
# ============================================================
# 用于异常检测的特征列
DETECTION_FEATURES = [
'log_return',
'abs_return',
'volume_ratio',
'range_pct',
'taker_buy_ratio',
'vol_7d',
]
# 比特币减半及其他重大事件日期
KNOWN_EVENTS = {
'2012-11-28': '第一次减半',
'2016-07-09': '第二次减半',
'2020-05-11': '第三次减半',
'2024-04-20': '第四次减半',
'2017-12-17': '2017年牛市顶点',
'2018-12-15': '2018年熊市底部',
'2020-03-12': '新冠黑色星期四',
'2021-04-14': '2021年牛市中期高点',
'2021-11-10': '2021年牛市顶点',
'2022-06-18': 'Luna/3AC 暴跌',
'2022-11-09': 'FTX 崩盘',
'2024-01-11': 'BTC ETF 获批',
}
# ============================================================
# 2. 集成异常检测
# ============================================================
def prepare_features(df: pd.DataFrame) -> Tuple[pd.DataFrame, np.ndarray]:
"""
准备异常检测特征矩阵
Parameters
----------
df : pd.DataFrame
含衍生特征的日线数据
Returns
-------
features_df : pd.DataFrame
特征子集(已去除 NaN
X_scaled : np.ndarray
标准化后的特征矩阵
"""
# 选取可用特征
available = [f for f in DETECTION_FEATURES if f in df.columns]
if len(available) < 3:
raise ValueError(f"可用特征不足: {available},至少需要 3 个")
features_df = df[available].dropna()
# 标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(features_df.values)
return features_df, X_scaled
def detect_isolation_forest(X: np.ndarray, contamination: float = 0.05) -> np.ndarray:
"""Isolation Forest 异常检测"""
model = IsolationForest(
n_estimators=200,
contamination=contamination,
random_state=42,
n_jobs=-1,
)
# -1 = 异常, 1 = 正常
labels = model.fit_predict(X)
return (labels == -1).astype(int)
def detect_lof(X: np.ndarray, contamination: float = 0.05) -> np.ndarray:
"""Local Outlier Factor 异常检测"""
model = LocalOutlierFactor(
n_neighbors=20,
contamination=contamination,
novelty=False,
n_jobs=-1,
)
labels = model.fit_predict(X)
return (labels == -1).astype(int)
def detect_copod(X: np.ndarray, contamination: float = 0.05) -> np.ndarray:
"""COPOD 异常检测(基于 Copula"""
if not HAS_COPOD:
return None
model = COPOD(contamination=contamination)
labels = model.fit_predict(X)
return labels.astype(int)
def ensemble_anomaly_detection(
df: pd.DataFrame,
contamination: float = 0.05,
min_agreement: int = 2,
) -> pd.DataFrame:
"""
集成异常检测:要求 ≥ min_agreement / n_methods 一致判定
Parameters
----------
df : pd.DataFrame
含衍生特征的日线数据
contamination : float
预期异常比例
min_agreement : int
最少多少个方法一致才标记为异常
Returns
-------
pd.DataFrame
添加了各方法检测结果及集成结果的数据
"""
features_df, X_scaled = prepare_features(df)
print(f" 特征矩阵: {X_scaled.shape[0]} 样本 x {X_scaled.shape[1]} 特征")
# 执行各方法检测
print(" [1/3] Isolation Forest...")
if_labels = detect_isolation_forest(X_scaled, contamination)
print(" [2/3] Local Outlier Factor...")
lof_labels = detect_lof(X_scaled, contamination)
n_methods = 2
vote_matrix = np.column_stack([if_labels, lof_labels])
method_names = ['iforest', 'lof']
print(" [3/3] COPOD...")
copod_labels = detect_copod(X_scaled, contamination)
if copod_labels is not None:
vote_matrix = np.column_stack([vote_matrix, copod_labels])
method_names.append('copod')
n_methods = 3
else:
print(" COPOD 不可用,使用 2 方法集成")
# 投票
vote_sum = vote_matrix.sum(axis=1)
ensemble_label = (vote_sum >= min_agreement).astype(int)
# 构建结果 DataFrame
result = features_df.copy()
for i, name in enumerate(method_names):
result[f'anomaly_{name}'] = vote_matrix[:, i]
result['anomaly_votes'] = vote_sum
result['anomaly_ensemble'] = ensemble_label
# 打印各方法统计
print(f"\n 异常检测统计:")
for name in method_names:
n_anom = result[f'anomaly_{name}'].sum()
print(f" {name:>12}: {n_anom} 个异常 ({n_anom / len(result) * 100:.2f}%)")
n_ensemble = ensemble_label.sum()
print(f" {'集成(≥' + str(min_agreement) + ')':>12}: {n_ensemble} 个异常 ({n_ensemble / len(result) * 100:.2f}%)")
# 方法间重叠度
print(f"\n 方法间重叠:")
for i in range(len(method_names)):
for j in range(i + 1, len(method_names)):
overlap = ((vote_matrix[:, i] == 1) & (vote_matrix[:, j] == 1)).sum()
n_i = vote_matrix[:, i].sum()
n_j = vote_matrix[:, j].sum()
if min(n_i, n_j) > 0:
jaccard = overlap / ((vote_matrix[:, i] == 1) | (vote_matrix[:, j] == 1)).sum()
else:
jaccard = 0.0
print(f" {method_names[i]}{method_names[j]}: "
f"{overlap} 个 (Jaccard={jaccard:.3f})")
return result
# ============================================================
# 3. GARCH 条件波动率异常
# ============================================================
def garch_anomaly_detection(
df: pd.DataFrame,
threshold: float = 3.0,
) -> pd.Series:
"""
基于 GARCH(1,1) 的条件波动率异常检测
标准化残差 |ε_t / σ_t| > threshold 的日期标记为异常
Parameters
----------
df : pd.DataFrame
含 log_return 列的数据
threshold : float
标准化残差阈值
Returns
-------
pd.Series
异常标记1 = 异常0 = 正常),索引与输入对齐
"""
from arch import arch_model
returns = df['log_return'].dropna()
r_pct = returns * 100 # arch 库使用百分比收益率
# 拟合 GARCH(1,1)
model = arch_model(r_pct, vol='Garch', p=1, q=1, mean='Constant', dist='Normal')
with warnings.catch_warnings():
warnings.simplefilter("ignore")
result = model.fit(disp='off')
# 计算标准化残差
std_resid = result.resid / result.conditional_volatility
anomaly = (std_resid.abs() > threshold).astype(int)
n_anom = anomaly.sum()
print(f" GARCH 异常: {n_anom} 个 (|标准化残差| > {threshold})")
print(f" GARCH 模型: α={result.params.get('alpha[1]', np.nan):.4f}, "
f"β={result.params.get('beta[1]', np.nan):.4f}, "
f"持续性={result.params.get('alpha[1]', 0) + result.params.get('beta[1]', 0):.4f}")
return anomaly
# ============================================================
# 4. 前兆模式提取
# ============================================================
def extract_precursor_features(
df: pd.DataFrame,
anomaly_labels: pd.Series,
lookback_windows: List[int] = None,
) -> Tuple[pd.DataFrame, pd.Series]:
"""
提取异常日前若干天的特征作为前兆信号
Parameters
----------
df : pd.DataFrame
含衍生特征的数据
anomaly_labels : pd.Series
异常标记1 = 异常)
lookback_windows : list of int
向前回溯的天数窗口
Returns
-------
X : pd.DataFrame
前兆特征矩阵
y : pd.Series
标签1 = 后续发生异常, 0 = 正常)
"""
if lookback_windows is None:
lookback_windows = [5, 10, 20]
# 确保对齐
common_idx = df.index.intersection(anomaly_labels.index)
df_aligned = df.loc[common_idx]
labels_aligned = anomaly_labels.loc[common_idx]
base_features = [f for f in DETECTION_FEATURES if f in df.columns]
precursor_features = {}
for window in lookback_windows:
for feat in base_features:
if feat not in df_aligned.columns:
continue
series = df_aligned[feat]
# 滚动统计作为前兆特征
precursor_features[f'{feat}_mean_{window}d'] = series.rolling(window).mean()
precursor_features[f'{feat}_std_{window}d'] = series.rolling(window).std()
precursor_features[f'{feat}_max_{window}d'] = series.rolling(window).max()
precursor_features[f'{feat}_min_{window}d'] = series.rolling(window).min()
# 趋势特征(最近值 vs 窗口均值的偏离)
rolling_mean = series.rolling(window).mean()
precursor_features[f'{feat}_deviation_{window}d'] = series - rolling_mean
X = pd.DataFrame(precursor_features, index=df_aligned.index)
# 标签: 未来是否出现异常shift(-1) 使得特征是"之前"的)
# 我们用当前特征预测当天是否异常
y = labels_aligned
# 去除 NaN
valid_mask = X.notna().all(axis=1) & y.notna()
X = X[valid_mask]
y = y[valid_mask]
return X, y
def train_precursor_classifier(
X: pd.DataFrame,
y: pd.Series,
) -> Dict:
"""
训练前兆模式分类器Random Forest
使用分层 K 折交叉验证评估
Parameters
----------
X : pd.DataFrame
前兆特征矩阵
y : pd.Series
标签
Returns
-------
dict
AUC、特征重要性等结果
"""
if len(X) < 50 or y.sum() < 10:
print(f" [警告] 样本不足 (n={len(X)}, 正例={y.sum()}),跳过分类器训练")
return {}
# 标准化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# 分层 K 折
n_splits = min(5, int(y.sum()))
if n_splits < 2:
print(" [警告] 正例数过少,无法进行交叉验证")
return {}
cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
clf = RandomForestClassifier(
n_estimators=200,
max_depth=10,
min_samples_split=5,
class_weight='balanced',
random_state=42,
n_jobs=-1,
)
# 交叉验证预测概率
try:
y_prob = cross_val_predict(clf, X_scaled, y, cv=cv, method='predict_proba')[:, 1]
auc = roc_auc_score(y, y_prob)
except Exception as e:
print(f" [错误] 交叉验证失败: {e}")
return {}
# 在全量数据上训练获取特征重要性
clf.fit(X_scaled, y)
importances = pd.Series(clf.feature_importances_, index=X.columns)
importances = importances.sort_values(ascending=False)
# ROC 曲线数据
fpr, tpr, thresholds = roc_curve(y, y_prob)
results = {
'auc': auc,
'feature_importances': importances,
'y_true': y,
'y_prob': y_prob,
'fpr': fpr,
'tpr': tpr,
}
print(f"\n 前兆分类器结果:")
print(f" AUC: {auc:.4f}")
print(f" 样本: {len(y)} (异常: {y.sum()}, 正常: {(y == 0).sum()})")
print(f" Top-10 重要特征:")
for feat, imp in importances.head(10).items():
print(f" {feat:<40} {imp:.4f}")
return results
# ============================================================
# 5. 事件对齐分析
# ============================================================
def align_with_events(
anomaly_dates: pd.DatetimeIndex,
tolerance_days: int = 5,
) -> pd.DataFrame:
"""
将异常日期与已知事件对齐
Parameters
----------
anomaly_dates : pd.DatetimeIndex
异常日期列表
tolerance_days : int
容差天数(异常日期与事件日期相差 ≤ tolerance_days 天即视为匹配)
Returns
-------
pd.DataFrame
匹配结果
"""
matches = []
for event_date_str, event_name in KNOWN_EVENTS.items():
event_date = pd.Timestamp(event_date_str)
for anom_date in anomaly_dates:
diff_days = abs((anom_date - event_date).days)
if diff_days <= tolerance_days:
matches.append({
'anomaly_date': anom_date,
'event_date': event_date,
'event_name': event_name,
'diff_days': diff_days,
})
if matches:
result = pd.DataFrame(matches)
print(f"\n 事件对齐 (容差 {tolerance_days} 天):")
for _, row in result.iterrows():
print(f" 异常 {row['anomaly_date'].strftime('%Y-%m-%d')}"
f"{row['event_name']} ({row['event_date'].strftime('%Y-%m-%d')}, "
f"{row['diff_days']} 天)")
return result
else:
print(f" [信息] 无异常日期与已知事件匹配 (容差 {tolerance_days} 天)")
return pd.DataFrame()
# ============================================================
# 6. 可视化
# ============================================================
def plot_price_with_anomalies(
df: pd.DataFrame,
anomaly_result: pd.DataFrame,
garch_anomaly: Optional[pd.Series],
output_dir: Path,
):
"""绘制价格图,标注异常点"""
fig, axes = plt.subplots(2, 1, figsize=(16, 10), gridspec_kw={'height_ratios': [3, 1]})
# 上图:价格 + 异常标记
ax1 = axes[0]
ax1.plot(df.index, df['close'], linewidth=0.6, color='steelblue', alpha=0.8, label='BTC 收盘价')
# 集成异常
ensemble_anom = anomaly_result[anomaly_result['anomaly_ensemble'] == 1]
if not ensemble_anom.empty:
# 获取异常日期对应的收盘价
anom_prices = df.loc[df.index.isin(ensemble_anom.index), 'close']
ax1.scatter(anom_prices.index, anom_prices.values,
color='red', s=30, zorder=5, label=f'集成异常 (n={len(anom_prices)})',
alpha=0.7, edgecolors='darkred', linewidths=0.5)
# GARCH 异常
if garch_anomaly is not None:
garch_anom_dates = garch_anomaly[garch_anomaly == 1].index
garch_prices = df.loc[df.index.isin(garch_anom_dates), 'close']
if not garch_prices.empty:
ax1.scatter(garch_prices.index, garch_prices.values,
color='orange', s=20, zorder=4, marker='^',
label=f'GARCH 异常 (n={len(garch_prices)})',
alpha=0.7, edgecolors='darkorange', linewidths=0.5)
ax1.set_ylabel('价格 (USDT)', fontsize=12)
ax1.set_title('BTC 价格与异常检测结果', fontsize=14)
ax1.legend(fontsize=10, loc='upper left')
ax1.grid(True, alpha=0.3)
ax1.set_yscale('log')
# 下图:成交量 + 异常标记
ax2 = axes[1]
if 'volume' in df.columns:
ax2.bar(df.index, df['volume'], width=1, color='steelblue', alpha=0.4, label='成交量')
if not ensemble_anom.empty:
anom_vol = df.loc[df.index.isin(ensemble_anom.index), 'volume']
ax2.bar(anom_vol.index, anom_vol.values, width=1, color='red', alpha=0.7, label='异常日成交量')
ax2.set_ylabel('成交量', fontsize=12)
ax2.set_xlabel('日期', fontsize=12)
ax2.legend(fontsize=10)
ax2.grid(True, alpha=0.3)
fig.tight_layout()
fig.savefig(output_dir / 'anomaly_price_chart.png', dpi=150, bbox_inches='tight')
plt.close(fig)
print(f" [保存] {output_dir / 'anomaly_price_chart.png'}")
def plot_anomaly_feature_distributions(
anomaly_result: pd.DataFrame,
output_dir: Path,
):
"""绘制异常日 vs 正常日的特征分布对比"""
features_to_plot = [f for f in DETECTION_FEATURES if f in anomaly_result.columns]
n_feats = len(features_to_plot)
if n_feats == 0:
print(" [警告] 无可绘制特征")
return
n_cols = 3
n_rows = (n_feats + n_cols - 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(5 * n_cols, 4 * n_rows))
axes = np.array(axes).flatten()
normal = anomaly_result[anomaly_result['anomaly_ensemble'] == 0]
anomaly = anomaly_result[anomaly_result['anomaly_ensemble'] == 1]
for idx, feat in enumerate(features_to_plot):
ax = axes[idx]
# 正常分布
vals_normal = normal[feat].dropna()
vals_anomaly = anomaly[feat].dropna()
ax.hist(vals_normal, bins=50, density=True, alpha=0.6,
color='steelblue', label=f'正常 (n={len(vals_normal)})', edgecolor='white', linewidth=0.3)
if len(vals_anomaly) > 0:
ax.hist(vals_anomaly, bins=30, density=True, alpha=0.6,
color='red', label=f'异常 (n={len(vals_anomaly)})', edgecolor='white', linewidth=0.3)
ax.set_title(feat, fontsize=11)
ax.legend(fontsize=8)
ax.grid(True, alpha=0.3)
# 隐藏多余子图
for idx in range(n_feats, len(axes)):
axes[idx].set_visible(False)
fig.suptitle('异常日 vs 正常日 特征分布对比', fontsize=14, y=1.02)
fig.tight_layout()
fig.savefig(output_dir / 'anomaly_feature_distributions.png', dpi=150, bbox_inches='tight')
plt.close(fig)
print(f" [保存] {output_dir / 'anomaly_feature_distributions.png'}")
def plot_precursor_roc(precursor_results: Dict, output_dir: Path):
"""绘制前兆分类器 ROC 曲线"""
if not precursor_results or 'fpr' not in precursor_results:
print(" [警告] 无前兆分类器结果,跳过 ROC 曲线")
return
fig, ax = plt.subplots(figsize=(8, 8))
fpr = precursor_results['fpr']
tpr = precursor_results['tpr']
auc = precursor_results['auc']
ax.plot(fpr, tpr, color='steelblue', linewidth=2,
label=f'Random Forest (AUC = {auc:.4f})')
ax.plot([0, 1], [0, 1], 'k--', linewidth=1, label='随机基线')
ax.set_xlabel('假阳性率 (FPR)', fontsize=12)
ax.set_ylabel('真阳性率 (TPR)', fontsize=12)
ax.set_title('异常前兆分类器 ROC 曲线', fontsize=14)
ax.legend(fontsize=11)
ax.grid(True, alpha=0.3)
ax.set_xlim([-0.02, 1.02])
ax.set_ylim([-0.02, 1.02])
fig.savefig(output_dir / 'precursor_roc_curve.png', dpi=150, bbox_inches='tight')
plt.close(fig)
print(f" [保存] {output_dir / 'precursor_roc_curve.png'}")
def plot_feature_importance(precursor_results: Dict, output_dir: Path, top_n: int = 20):
"""绘制前兆特征重要性条形图"""
if not precursor_results or 'feature_importances' not in precursor_results:
print(" [警告] 无特征重要性数据,跳过")
return
importances = precursor_results['feature_importances'].head(top_n)
fig, ax = plt.subplots(figsize=(10, max(6, top_n * 0.35)))
colors = plt.cm.RdYlBu_r(np.linspace(0.2, 0.8, len(importances)))
ax.barh(range(len(importances)), importances.values[::-1],
color=colors[::-1], edgecolor='white', linewidth=0.5)
ax.set_yticks(range(len(importances)))
ax.set_yticklabels(importances.index[::-1], fontsize=9)
ax.set_xlabel('特征重要性', fontsize=12)
ax.set_title(f'异常前兆 Top-{top_n} 特征重要性 (Random Forest)', fontsize=13)
ax.grid(True, alpha=0.3, axis='x')
fig.savefig(output_dir / 'precursor_feature_importance.png', dpi=150, bbox_inches='tight')
plt.close(fig)
print(f" [保存] {output_dir / 'precursor_feature_importance.png'}")
# ============================================================
# 7. 结果打印
# ============================================================
def print_anomaly_summary(
anomaly_result: pd.DataFrame,
garch_anomaly: Optional[pd.Series],
precursor_results: Dict,
):
"""打印异常检测汇总"""
print("\n" + "=" * 70)
print("异常检测结果汇总")
print("=" * 70)
# 集成异常统计
n_total = len(anomaly_result)
n_ensemble = anomaly_result['anomaly_ensemble'].sum()
print(f"\n 总样本数: {n_total}")
print(f" 集成异常数: {n_ensemble} ({n_ensemble / n_total * 100:.2f}%)")
# 各方法统计
method_cols = [c for c in anomaly_result.columns if c.startswith('anomaly_') and c != 'anomaly_ensemble' and c != 'anomaly_votes']
for col in method_cols:
method_name = col.replace('anomaly_', '')
n_anom = anomaly_result[col].sum()
print(f" {method_name:>12}: {n_anom} ({n_anom / n_total * 100:.2f}%)")
# GARCH 异常
if garch_anomaly is not None:
n_garch = garch_anomaly.sum()
print(f" {'GARCH':>12}: {n_garch} ({n_garch / len(garch_anomaly) * 100:.2f}%)")
# 集成异常与 GARCH 异常的重叠
common_idx = anomaly_result.index.intersection(garch_anomaly.index)
if len(common_idx) > 0:
ensemble_set = set(anomaly_result.loc[common_idx][anomaly_result.loc[common_idx, 'anomaly_ensemble'] == 1].index)
garch_set = set(garch_anomaly[garch_anomaly == 1].index)
overlap = len(ensemble_set & garch_set)
print(f"\n 集成 ∩ GARCH 重叠: {overlap}")
# 前兆分类器
if precursor_results and 'auc' in precursor_results:
print(f"\n 前兆分类器 AUC: {precursor_results['auc']:.4f}")
print(f" Top-5 前兆特征:")
for feat, imp in precursor_results['feature_importances'].head(5).items():
print(f" {feat:<40} {imp:.4f}")
# ============================================================
# 8. 主入口
# ============================================================
def run_anomaly_analysis(
df: pd.DataFrame,
output_dir: str = "output/anomaly",
) -> Dict:
"""
异常检测与前兆模式分析主函数
Parameters
----------
df : pd.DataFrame
日线数据(已通过 add_derived_features 添加衍生特征)
output_dir : str
图表输出目录
Returns
-------
dict
包含所有分析结果的字典
"""
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
print("=" * 70)
print("BTC 异常检测与前兆模式分析")
print("=" * 70)
print(f"数据范围: {df.index.min()} ~ {df.index.max()}")
print(f"样本数量: {len(df)}")
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS', 'SimHei', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False
# --- 集成异常检测 ---
print("\n>>> [1/5] 执行集成异常检测...")
anomaly_result = ensemble_anomaly_detection(df, contamination=0.05, min_agreement=2)
# --- GARCH 条件波动率异常 ---
print("\n>>> [2/5] 执行 GARCH 条件波动率异常检测...")
garch_anomaly = None
try:
garch_anomaly = garch_anomaly_detection(df, threshold=3.0)
except Exception as e:
print(f" [错误] GARCH 异常检测失败: {e}")
# --- 事件对齐 ---
print("\n>>> [3/5] 执行事件对齐分析...")
ensemble_anom_dates = anomaly_result[anomaly_result['anomaly_ensemble'] == 1].index
event_alignment = align_with_events(ensemble_anom_dates, tolerance_days=5)
# --- 前兆模式提取 ---
print("\n>>> [4/5] 提取前兆模式并训练分类器...")
precursor_results = {}
try:
X_precursor, y_precursor = extract_precursor_features(
df, anomaly_result['anomaly_ensemble'], lookback_windows=[5, 10, 20]
)
print(f" 前兆特征矩阵: {X_precursor.shape[0]} 样本 x {X_precursor.shape[1]} 特征")
precursor_results = train_precursor_classifier(X_precursor, y_precursor)
except Exception as e:
print(f" [错误] 前兆模式提取失败: {e}")
# --- 可视化 ---
print("\n>>> [5/5] 生成可视化图表...")
plot_price_with_anomalies(df, anomaly_result, garch_anomaly, output_dir)
plot_anomaly_feature_distributions(anomaly_result, output_dir)
plot_precursor_roc(precursor_results, output_dir)
plot_feature_importance(precursor_results, output_dir)
# --- 汇总打印 ---
print_anomaly_summary(anomaly_result, garch_anomaly, precursor_results)
print("\n" + "=" * 70)
print("异常检测与前兆模式分析完成!")
print(f"图表已保存至: {output_dir.resolve()}")
print("=" * 70)
return {
'anomaly_result': anomaly_result,
'garch_anomaly': garch_anomaly,
'event_alignment': event_alignment,
'precursor_results': precursor_results,
}
# ============================================================
# 独立运行入口
# ============================================================
if __name__ == '__main__':
from src.data_loader import load_daily
from src.preprocessing import add_derived_features
df = load_daily()
df = add_derived_features(df)
run_anomaly_analysis(df)