fix: 全面修复代码质量和报告准确性问题
代码修复 (16 个模块): - GARCH 模型统一改用 t 分布 + 收敛检查 (returns/volatility/anomaly) - KS 检验替换为 Lilliefors 检验 (returns) - 修复数据泄漏: StratifiedKFold→TimeSeriesSplit, scaler 逐折 fit (anomaly) - 前兆标签 shift(-1) 预测次日异常 (anomaly) - PSD 归一化加入采样频率和单边谱×2 (fft) - AR(1) 红噪声基线经验缩放 (fft) - 盒计数法独立 x/y 归一化, MF-DFA q=0 (fractal) - ADF 平稳性检验 + 移除双重 Bonferroni (causality) - R/S Hurst 添加 R² 拟合优度 (hurst) - Prophet 递推预测避免信息泄露 (time_series) - IC 计算过滤零信号, 中性形态 hit_rate=NaN (indicators/patterns) - 聚类阈值自适应化 (clustering) - 日历效应前后半段稳健性检查 (calendar) - 证据评分标准文本与代码对齐 (visualization) - 核心管道 NaN/空值防护 (data_loader/preprocessing/main) 报告修复 (docs/REPORT.md, 15 处): - 标度指数 H_scaling 与 Hurst 指数消歧 - GBM 6 个月概率锥数值重算 - CLT 限定、减半措辞弱化、情景概率逻辑修正 - GPD 形状参数解读修正、异常 AUC 证据降级 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -17,7 +17,7 @@ import warnings
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Tuple, Dict
|
||||
|
||||
from statsmodels.tsa.stattools import grangercausalitytests
|
||||
from statsmodels.tsa.stattools import grangercausalitytests, adfuller
|
||||
|
||||
from src.data_loader import load_hourly
|
||||
from src.preprocessing import log_returns, add_derived_features
|
||||
@@ -46,7 +46,20 @@ TEST_LAGS = [1, 2, 3, 5, 10]
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2. 单对 Granger 因果检验
|
||||
# 2. ADF 平稳性检验辅助函数
|
||||
# ============================================================
|
||||
|
||||
def _check_stationarity(series, name, alpha=0.05):
|
||||
"""ADF 平稳性检验,非平稳则取差分"""
|
||||
result = adfuller(series.dropna(), autolag='AIC')
|
||||
if result[1] > alpha:
|
||||
print(f" [注意] {name} 非平稳 (ADF p={result[1]:.4f}),使用差分序列")
|
||||
return series.diff().dropna(), True
|
||||
return series, False
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 3. 单对 Granger 因果检验
|
||||
# ============================================================
|
||||
|
||||
def granger_test_pair(
|
||||
@@ -87,6 +100,15 @@ def granger_test_pair(
|
||||
print(f" [警告] {cause} → {effect}: 样本量不足 ({len(data)}),跳过")
|
||||
return []
|
||||
|
||||
# ADF 平稳性检验,非平稳则取差分
|
||||
effect_series, effect_diffed = _check_stationarity(data[effect], effect)
|
||||
cause_series, cause_diffed = _check_stationarity(data[cause], cause)
|
||||
if effect_diffed or cause_diffed:
|
||||
data = pd.concat([effect_series, cause_series], axis=1).dropna()
|
||||
if len(data) < max_lag + 20:
|
||||
print(f" [警告] {cause} → {effect}: 差分后样本量不足 ({len(data)}),跳过")
|
||||
return []
|
||||
|
||||
results = []
|
||||
try:
|
||||
# 执行检验,maxlag 取最大值,一次获取所有滞后
|
||||
@@ -578,14 +600,10 @@ def run_causality_analysis(
|
||||
|
||||
# --- 因果关系网络图 ---
|
||||
print("\n>>> [4/4] 绘制因果关系网络图...")
|
||||
# 使用所有结果(含跨时间尺度)
|
||||
# 使用所有结果(含跨时间尺度),直接使用各组已做的 Bonferroni 校正结果,
|
||||
# 不再重复校正(各组检验已独立校正,合并后再校正会导致双重惩罚)
|
||||
if not all_results.empty:
|
||||
# 重新做一次 Bonferroni 校正(因为合并后总检验数增加)
|
||||
all_corrected = apply_bonferroni(all_results.drop(
|
||||
columns=['bonferroni_alpha', 'significant_raw', 'significant_corrected'],
|
||||
errors='ignore'
|
||||
), alpha=0.05)
|
||||
plot_causal_network(all_corrected, output_dir)
|
||||
plot_causal_network(all_results, output_dir)
|
||||
else:
|
||||
print(" [警告] 无可用结果,跳过网络图")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user