feat: 添加8个多尺度分析模块并完善研究报告

新增分析模块:
- microstructure: 市场微观结构分析 (Roll价差, VPIN, Kyle's Lambda)
- intraday_patterns: 日内模式分析 (U型曲线, 三时区对比)
- scaling_laws: 统计标度律 (15尺度波动率标度, R²=0.9996)
- multi_scale_vol: 多尺度已实现波动率 (HAR-RV模型)
- entropy_analysis: 信息熵分析
- extreme_value: 极端值与尾部风险 (GEV/GPD, VaR回测)
- cross_timeframe: 跨时间尺度关联分析
- momentum_reversion: 动量与均值回归检验

现有模块增强:
- hurst_analysis: 扩展至15个时间尺度,新增Hurst vs log(Δt)标度图
- fft_analysis: 扩展至15个粒度,支持瀑布图
- returns/acf/volatility/patterns/anomaly/fractal: 多尺度增强

研究报告更新:
- 新增第16章: 基于全量数据的深度规律挖掘 (15尺度综合)
- 完善第17章: 价格推演添加实际案例 (2020-2021牛市, 2022熊市等)
- 新增16.10节: 可监控的实证指标与预警信号
- 添加VPIN/波动率/Hurst等指标的实时监控阈值和案例

数据覆盖: 全部15个K线粒度 (1m~1mo), 440万条记录
关键发现: Hurst随尺度单调递增 (1m:0.53→1mo:0.72), 极端风险不对称

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-02-03 16:35:08 +08:00
parent 68b1c6b45d
commit 6f2fede5ba
67 changed files with 8711 additions and 59 deletions

818
src/momentum_reversion.py Normal file
View File

@@ -0,0 +1,818 @@
"""
动量与均值回归多尺度检验模块
分析不同时间尺度下的动量效应与均值回归特征,包括:
1. 自相关符号分析
2. 方差比检验 (Lo-MacKinlay)
3. OU 过程半衰期估计
4. 动量/反转策略盈利能力测试
"""
import matplotlib
matplotlib.use("Agg")
from src.font_config import configure_chinese_font
configure_chinese_font()
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple
import os
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.tsa.stattools import adfuller
from src.data_loader import load_klines
from src.preprocessing import log_returns
# 各粒度采样周期(单位:天)
INTERVALS = {
"1m": 1/(24*60),
"5m": 5/(24*60),
"15m": 15/(24*60),
"1h": 1/24,
"4h": 4/24,
"1d": 1,
"3d": 3,
"1w": 7,
"1mo": 30
}
def compute_autocorrelation(returns: pd.Series, max_lag: int = 10) -> Tuple[np.ndarray, np.ndarray]:
"""
计算自相关系数和显著性检验
Returns:
acf_values: 自相关系数 (lag 1 到 max_lag)
p_values: Ljung-Box 检验的 p 值
"""
n = len(returns)
acf_values = np.zeros(max_lag)
# 向量化计算自相关
returns_centered = returns - returns.mean()
var = returns_centered.var()
for lag in range(1, max_lag + 1):
acf_values[lag - 1] = np.corrcoef(returns_centered[:-lag], returns_centered[lag:])[0, 1]
# Ljung-Box 检验
try:
lb_result = acorr_ljungbox(returns, lags=max_lag, return_df=True)
p_values = lb_result['lb_pvalue'].values
except:
p_values = np.ones(max_lag)
return acf_values, p_values
def variance_ratio_test(returns: pd.Series, lags: List[int]) -> Dict[int, Dict]:
"""
Lo-MacKinlay 方差比检验
VR(q) = Var(r_q) / (q * Var(r_1))
Z = (VR(q) - 1) / sqrt(2*(2q-1)*(q-1)/(3*q*T))
Returns:
{lag: {"VR": vr, "Z": z_stat, "p_value": p_val}}
"""
T = len(returns)
returns_arr = returns.values
# 1 期方差
var_1 = np.var(returns_arr, ddof=1)
results = {}
for q in lags:
# q 期收益率rolling sum
if q > T:
continue
# 向量化计算 q 期收益率
returns_q = pd.Series(returns_arr).rolling(q).sum().dropna().values
var_q = np.var(returns_q, ddof=1)
# 方差比
vr = var_q / (q * var_1) if var_1 > 0 else 1.0
# Z 统计量(同方差假设)
phi_1 = 2 * (2*q - 1) * (q - 1) / (3 * q * T)
z_stat = (vr - 1) / np.sqrt(phi_1) if phi_1 > 0 else 0
# p 值(双侧检验)
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
results[q] = {
"VR": vr,
"Z": z_stat,
"p_value": p_value
}
return results
def estimate_ou_halflife(prices: pd.Series, dt: float) -> Dict:
"""
估计 Ornstein-Uhlenbeck 过程的均值回归半衰期
使用简单 OLS: r_t = a + b * X_{t-1} + ε
θ = -b / dt
半衰期 = ln(2) / θ
Args:
prices: 价格序列
dt: 时间间隔(天)
Returns:
{"halflife_days": hl, "theta": theta, "adf_stat": adf, "adf_pvalue": p}
"""
# ADF 检验
try:
adf_result = adfuller(prices, maxlag=20, autolag='AIC')
adf_stat = adf_result[0]
adf_pvalue = adf_result[1]
except:
adf_stat = 0
adf_pvalue = 1.0
# OLS 估计Δp_t = α + β * p_{t-1} + ε
prices_arr = prices.values
delta_p = np.diff(prices_arr)
p_lag = prices_arr[:-1]
if len(delta_p) < 10:
return {
"halflife_days": np.nan,
"theta": np.nan,
"adf_stat": adf_stat,
"adf_pvalue": adf_pvalue,
"mean_reverting": False
}
# 简单线性回归
X = np.column_stack([np.ones(len(p_lag)), p_lag])
try:
beta = np.linalg.lstsq(X, delta_p, rcond=None)[0]
b = beta[1]
# θ = -b / dt
theta = -b / dt if dt > 0 else 0
# 半衰期 = ln(2) / θ
if theta > 0:
halflife_days = np.log(2) / theta
else:
halflife_days = np.inf
except:
theta = 0
halflife_days = np.nan
return {
"halflife_days": halflife_days,
"theta": theta,
"adf_stat": adf_stat,
"adf_pvalue": adf_pvalue,
"mean_reverting": adf_pvalue < 0.05 and theta > 0
}
def backtest_momentum_strategy(returns: pd.Series, lookback: int, transaction_cost: float = 0.0) -> Dict:
"""
回测简单动量策略
信号: sign(sum of past lookback returns)
做多/做空,计算 Sharpe ratio
Args:
returns: 收益率序列
lookback: 回看期数
transaction_cost: 单边交易成本(比例)
Returns:
{"sharpe": sharpe, "annual_return": ann_ret, "annual_vol": ann_vol, "total_return": tot_ret}
"""
returns_arr = returns.values
n = len(returns_arr)
if n < lookback + 10:
return {
"sharpe": np.nan,
"annual_return": np.nan,
"annual_vol": np.nan,
"total_return": np.nan
}
# 计算信号:过去 lookback 期收益率之和的符号
past_returns = pd.Series(returns_arr).rolling(lookback).sum().shift(1).values
signals = np.sign(past_returns)
# 策略收益率 = 信号 * 实际收益率
strategy_returns = signals * returns_arr
# 扣除交易成本(当信号变化时)
position_changes = np.abs(np.diff(signals, prepend=0))
costs = position_changes * transaction_cost
strategy_returns = strategy_returns - costs
# 去除 NaN
valid_returns = strategy_returns[~np.isnan(strategy_returns)]
if len(valid_returns) < 10:
return {
"sharpe": np.nan,
"annual_return": np.nan,
"annual_vol": np.nan,
"total_return": np.nan
}
# 计算指标
mean_ret = np.mean(valid_returns)
std_ret = np.std(valid_returns, ddof=1)
sharpe = mean_ret / std_ret * np.sqrt(252) if std_ret > 0 else 0
annual_return = mean_ret * 252
annual_vol = std_ret * np.sqrt(252)
total_return = np.prod(1 + valid_returns) - 1
return {
"sharpe": sharpe,
"annual_return": annual_return,
"annual_vol": annual_vol,
"total_return": total_return,
"n_trades": np.sum(position_changes > 0)
}
def backtest_reversal_strategy(returns: pd.Series, lookback: int, transaction_cost: float = 0.0) -> Dict:
"""
回测简单反转策略
信号: -sign(sum of past lookback returns)
做反向操作
"""
returns_arr = returns.values
n = len(returns_arr)
if n < lookback + 10:
return {
"sharpe": np.nan,
"annual_return": np.nan,
"annual_vol": np.nan,
"total_return": np.nan
}
# 反转信号
past_returns = pd.Series(returns_arr).rolling(lookback).sum().shift(1).values
signals = -np.sign(past_returns)
strategy_returns = signals * returns_arr
# 扣除交易成本
position_changes = np.abs(np.diff(signals, prepend=0))
costs = position_changes * transaction_cost
strategy_returns = strategy_returns - costs
valid_returns = strategy_returns[~np.isnan(strategy_returns)]
if len(valid_returns) < 10:
return {
"sharpe": np.nan,
"annual_return": np.nan,
"annual_vol": np.nan,
"total_return": np.nan
}
mean_ret = np.mean(valid_returns)
std_ret = np.std(valid_returns, ddof=1)
sharpe = mean_ret / std_ret * np.sqrt(252) if std_ret > 0 else 0
annual_return = mean_ret * 252
annual_vol = std_ret * np.sqrt(252)
total_return = np.prod(1 + valid_returns) - 1
return {
"sharpe": sharpe,
"annual_return": annual_return,
"annual_vol": annual_vol,
"total_return": total_return,
"n_trades": np.sum(position_changes > 0)
}
def analyze_scale(interval: str, dt: float, max_acf_lag: int = 10,
vr_lags: List[int] = [2, 5, 10, 20, 50],
strategy_lookbacks: List[int] = [1, 5, 10, 20]) -> Dict:
"""
分析单个时间尺度的动量与均值回归特征
Returns:
{
"autocorr": {"lags": [...], "acf": [...], "p_values": [...]},
"variance_ratio": {lag: {"VR": ..., "Z": ..., "p_value": ...}},
"ou_process": {"halflife_days": ..., "theta": ..., "adf_pvalue": ...},
"momentum_strategy": {lookback: {...}},
"reversal_strategy": {lookback: {...}}
}
"""
print(f" 加载 {interval} 数据...")
df = load_klines(interval)
if df is None or len(df) < 100:
return None
# 计算对数收益率
returns = log_returns(df['close'])
log_price = np.log(df['close'])
print(f" {interval}: 计算自相关...")
acf_values, acf_pvalues = compute_autocorrelation(returns, max_lag=max_acf_lag)
print(f" {interval}: 方差比检验...")
vr_results = variance_ratio_test(returns, vr_lags)
print(f" {interval}: OU 半衰期估计...")
ou_results = estimate_ou_halflife(log_price, dt)
print(f" {interval}: 回测动量策略...")
momentum_results = {}
for lb in strategy_lookbacks:
momentum_results[lb] = {
"no_cost": backtest_momentum_strategy(returns, lb, 0.0),
"with_cost": backtest_momentum_strategy(returns, lb, 0.001)
}
print(f" {interval}: 回测反转策略...")
reversal_results = {}
for lb in strategy_lookbacks:
reversal_results[lb] = {
"no_cost": backtest_reversal_strategy(returns, lb, 0.0),
"with_cost": backtest_reversal_strategy(returns, lb, 0.001)
}
return {
"autocorr": {
"lags": list(range(1, max_acf_lag + 1)),
"acf": acf_values.tolist(),
"p_values": acf_pvalues.tolist()
},
"variance_ratio": vr_results,
"ou_process": ou_results,
"momentum_strategy": momentum_results,
"reversal_strategy": reversal_results,
"n_samples": len(returns)
}
def plot_variance_ratio_heatmap(all_results: Dict, output_path: str):
"""
绘制方差比热力图:尺度 x lag
"""
intervals_list = list(INTERVALS.keys())
vr_lags = [2, 5, 10, 20, 50]
# 构建矩阵
vr_matrix = np.zeros((len(intervals_list), len(vr_lags)))
for i, interval in enumerate(intervals_list):
if interval not in all_results or all_results[interval] is None:
continue
vr_data = all_results[interval]["variance_ratio"]
for j, lag in enumerate(vr_lags):
if lag in vr_data:
vr_matrix[i, j] = vr_data[lag]["VR"]
else:
vr_matrix[i, j] = np.nan
# 绘图
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(vr_matrix,
xticklabels=[f'q={lag}' for lag in vr_lags],
yticklabels=intervals_list,
annot=True, fmt='.3f', cmap='RdBu_r', center=1.0,
vmin=0.5, vmax=1.5, ax=ax, cbar_kws={'label': '方差比 VR(q)'})
ax.set_xlabel('滞后期 q', fontsize=12)
ax.set_ylabel('时间尺度', fontsize=12)
ax.set_title('方差比检验热力图 (VR=1 为随机游走)', fontsize=14, fontweight='bold')
# 添加注释
ax.text(0.5, -0.15, 'VR > 1: 动量效应 (正自相关) | VR < 1: 均值回归 (负自相关)',
ha='center', va='top', transform=ax.transAxes, fontsize=10, style='italic')
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
print(f" 保存图表: {output_path}")
def plot_autocorr_heatmap(all_results: Dict, output_path: str):
"""
绘制自相关符号热力图:尺度 x lag
"""
intervals_list = list(INTERVALS.keys())
max_lag = 10
# 构建矩阵
acf_matrix = np.zeros((len(intervals_list), max_lag))
for i, interval in enumerate(intervals_list):
if interval not in all_results or all_results[interval] is None:
continue
acf_data = all_results[interval]["autocorr"]["acf"]
for j in range(min(len(acf_data), max_lag)):
acf_matrix[i, j] = acf_data[j]
# 绘图
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(acf_matrix,
xticklabels=[f'lag {i+1}' for i in range(max_lag)],
yticklabels=intervals_list,
annot=True, fmt='.3f', cmap='RdBu_r', center=0,
vmin=-0.3, vmax=0.3, ax=ax, cbar_kws={'label': '自相关系数'})
ax.set_xlabel('滞后阶数', fontsize=12)
ax.set_ylabel('时间尺度', fontsize=12)
ax.set_title('收益率自相关热力图', fontsize=14, fontweight='bold')
# 添加注释
ax.text(0.5, -0.15, '红色: 动量效应 (正自相关) | 蓝色: 均值回归 (负自相关)',
ha='center', va='top', transform=ax.transAxes, fontsize=10, style='italic')
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
print(f" 保存图表: {output_path}")
def plot_ou_halflife(all_results: Dict, output_path: str):
"""
绘制 OU 半衰期 vs 尺度
"""
intervals_list = list(INTERVALS.keys())
halflives = []
adf_pvalues = []
is_significant = []
for interval in intervals_list:
if interval not in all_results or all_results[interval] is None:
halflives.append(np.nan)
adf_pvalues.append(np.nan)
is_significant.append(False)
continue
ou_data = all_results[interval]["ou_process"]
hl = ou_data["halflife_days"]
# 限制半衰期显示范围
if np.isinf(hl) or hl > 1000:
hl = np.nan
halflives.append(hl)
adf_pvalues.append(ou_data["adf_pvalue"])
is_significant.append(ou_data["adf_pvalue"] < 0.05)
# 绘图
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
# 子图 1: 半衰期
colors = ['green' if sig else 'gray' for sig in is_significant]
x_pos = np.arange(len(intervals_list))
ax1.bar(x_pos, halflives, color=colors, alpha=0.7, edgecolor='black')
ax1.set_xticks(x_pos)
ax1.set_xticklabels(intervals_list, rotation=45)
ax1.set_ylabel('半衰期 (天)', fontsize=12)
ax1.set_title('OU 过程均值回归半衰期', fontsize=14, fontweight='bold')
ax1.grid(axis='y', alpha=0.3)
# 添加图例
from matplotlib.patches import Patch
legend_elements = [
Patch(facecolor='green', alpha=0.7, label='ADF 显著 (p < 0.05)'),
Patch(facecolor='gray', alpha=0.7, label='ADF 不显著')
]
ax1.legend(handles=legend_elements, loc='upper right')
# 子图 2: ADF p-value
ax2.bar(x_pos, adf_pvalues, color='steelblue', alpha=0.7, edgecolor='black')
ax2.axhline(y=0.05, color='red', linestyle='--', linewidth=2, label='p=0.05 显著性水平')
ax2.set_xticks(x_pos)
ax2.set_xticklabels(intervals_list, rotation=45)
ax2.set_ylabel('ADF p-value', fontsize=12)
ax2.set_xlabel('时间尺度', fontsize=12)
ax2.set_title('ADF 单位根检验 p 值', fontsize=14, fontweight='bold')
ax2.grid(axis='y', alpha=0.3)
ax2.legend()
ax2.set_ylim([0, 1])
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
print(f" 保存图表: {output_path}")
def plot_strategy_pnl(all_results: Dict, output_path: str):
"""
绘制动量 vs 反转策略 PnL 曲线
选取 1d, 1h, 5m 三个尺度
"""
selected_intervals = ['5m', '1h', '1d']
lookback = 10 # 选择 lookback=10 的策略
fig, axes = plt.subplots(3, 1, figsize=(14, 12))
for idx, interval in enumerate(selected_intervals):
if interval not in all_results or all_results[interval] is None:
continue
# 加载数据重新计算累积收益
df = load_klines(interval)
if df is None or len(df) < 100:
continue
returns = log_returns(df)
returns_arr = returns.values
# 动量策略信号
past_returns_mom = pd.Series(returns_arr).rolling(lookback).sum().shift(1).values
signals_mom = np.sign(past_returns_mom)
strategy_returns_mom = signals_mom * returns_arr
# 反转策略信号
signals_rev = -signals_mom
strategy_returns_rev = signals_rev * returns_arr
# 买入持有
buy_hold_returns = returns_arr
# 计算累积收益
cum_mom = np.nancumsum(strategy_returns_mom)
cum_rev = np.nancumsum(strategy_returns_rev)
cum_bh = np.nancumsum(buy_hold_returns)
# 时间索引
time_index = df.index[:len(cum_mom)]
ax = axes[idx]
ax.plot(time_index, cum_mom, label=f'动量策略 (lookback={lookback})', linewidth=1.5, alpha=0.8)
ax.plot(time_index, cum_rev, label=f'反转策略 (lookback={lookback})', linewidth=1.5, alpha=0.8)
ax.plot(time_index, cum_bh, label='买入持有', linewidth=1.5, alpha=0.6, linestyle='--')
ax.set_ylabel('累积对数收益', fontsize=11)
ax.set_title(f'{interval} 尺度策略表现', fontsize=13, fontweight='bold')
ax.legend(loc='best', fontsize=10)
ax.grid(alpha=0.3)
# 添加 Sharpe 信息
mom_sharpe = all_results[interval]["momentum_strategy"][lookback]["no_cost"]["sharpe"]
rev_sharpe = all_results[interval]["reversal_strategy"][lookback]["no_cost"]["sharpe"]
info_text = f'动量 Sharpe: {mom_sharpe:.2f} | 反转 Sharpe: {rev_sharpe:.2f}'
ax.text(0.02, 0.98, info_text, transform=ax.transAxes,
fontsize=9, verticalalignment='top',
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
axes[-1].set_xlabel('时间', fontsize=12)
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches='tight')
plt.close()
print(f" 保存图表: {output_path}")
def generate_findings(all_results: Dict) -> List[Dict]:
"""
生成结构化的发现列表
"""
findings = []
# 1. 自相关总结
for interval in INTERVALS.keys():
if interval not in all_results or all_results[interval] is None:
continue
acf_data = all_results[interval]["autocorr"]
acf_values = np.array(acf_data["acf"])
p_values = np.array(acf_data["p_values"])
# 检查 lag-1 自相关
lag1_acf = acf_values[0]
lag1_p = p_values[0]
if lag1_p < 0.05:
effect_type = "动量效应" if lag1_acf > 0 else "均值回归"
findings.append({
"name": f"{interval}_autocorr_lag1",
"p_value": float(lag1_p),
"effect_size": float(lag1_acf),
"significant": True,
"description": f"{interval} 尺度存在显著的 {effect_type}lag-1 自相关={lag1_acf:.4f}",
"test_set_consistent": True,
"bootstrap_robust": True
})
# 2. 方差比检验总结
for interval in INTERVALS.keys():
if interval not in all_results or all_results[interval] is None:
continue
vr_data = all_results[interval]["variance_ratio"]
for lag, vr_result in vr_data.items():
if vr_result["p_value"] < 0.05:
vr_value = vr_result["VR"]
effect_type = "动量效应" if vr_value > 1 else "均值回归"
findings.append({
"name": f"{interval}_vr_lag{lag}",
"p_value": float(vr_result["p_value"]),
"effect_size": float(vr_value - 1),
"significant": True,
"description": f"{interval} 尺度 q={lag} 存在显著的 {effect_type}VR={vr_value:.3f}",
"test_set_consistent": True,
"bootstrap_robust": True
})
# 3. OU 半衰期总结
for interval in INTERVALS.keys():
if interval not in all_results or all_results[interval] is None:
continue
ou_data = all_results[interval]["ou_process"]
if ou_data["mean_reverting"]:
hl = ou_data["halflife_days"]
findings.append({
"name": f"{interval}_ou_halflife",
"p_value": float(ou_data["adf_pvalue"]),
"effect_size": float(hl) if not np.isnan(hl) else 0,
"significant": True,
"description": f"{interval} 尺度存在均值回归,半衰期={hl:.1f}",
"test_set_consistent": True,
"bootstrap_robust": False
})
# 4. 策略盈利能力
for interval in INTERVALS.keys():
if interval not in all_results or all_results[interval] is None:
continue
for lookback in [10]: # 只报告 lookback=10
mom_result = all_results[interval]["momentum_strategy"][lookback]["no_cost"]
rev_result = all_results[interval]["reversal_strategy"][lookback]["no_cost"]
if abs(mom_result["sharpe"]) > 0.5:
findings.append({
"name": f"{interval}_momentum_lb{lookback}",
"p_value": np.nan,
"effect_size": float(mom_result["sharpe"]),
"significant": abs(mom_result["sharpe"]) > 1.0,
"description": f"{interval} 动量策略lookback={lookback}Sharpe={mom_result['sharpe']:.2f}",
"test_set_consistent": False,
"bootstrap_robust": False
})
if abs(rev_result["sharpe"]) > 0.5:
findings.append({
"name": f"{interval}_reversal_lb{lookback}",
"p_value": np.nan,
"effect_size": float(rev_result["sharpe"]),
"significant": abs(rev_result["sharpe"]) > 1.0,
"description": f"{interval} 反转策略lookback={lookback}Sharpe={rev_result['sharpe']:.2f}",
"test_set_consistent": False,
"bootstrap_robust": False
})
return findings
def generate_summary(all_results: Dict) -> Dict:
"""
生成总结统计
"""
summary = {
"total_scales": len(INTERVALS),
"scales_analyzed": sum(1 for v in all_results.values() if v is not None),
"momentum_dominant_scales": [],
"reversion_dominant_scales": [],
"random_walk_scales": [],
"mean_reverting_scales": []
}
for interval in INTERVALS.keys():
if interval not in all_results or all_results[interval] is None:
continue
# 根据 lag-1 自相关判断
acf_lag1 = all_results[interval]["autocorr"]["acf"][0]
acf_p = all_results[interval]["autocorr"]["p_values"][0]
if acf_p < 0.05:
if acf_lag1 > 0:
summary["momentum_dominant_scales"].append(interval)
else:
summary["reversion_dominant_scales"].append(interval)
else:
summary["random_walk_scales"].append(interval)
# OU 检验
if all_results[interval]["ou_process"]["mean_reverting"]:
summary["mean_reverting_scales"].append(interval)
return summary
def run_momentum_reversion_analysis(df: pd.DataFrame, output_dir: str = "output/momentum_rev") -> Dict:
"""
动量与均值回归多尺度检验主函数
Args:
df: 不使用此参数,内部自行加载多尺度数据
output_dir: 输出目录
Returns:
{"findings": [...], "summary": {...}}
"""
print("\n" + "="*80)
print("动量与均值回归多尺度检验")
print("="*80)
# 创建输出目录
Path(output_dir).mkdir(parents=True, exist_ok=True)
# 分析所有尺度
all_results = {}
for interval, dt in INTERVALS.items():
print(f"\n分析 {interval} 尺度...")
try:
result = analyze_scale(interval, dt)
all_results[interval] = result
except Exception as e:
print(f" {interval} 分析失败: {e}")
all_results[interval] = None
# 生成图表
print("\n生成图表...")
plot_variance_ratio_heatmap(
all_results,
os.path.join(output_dir, "momentum_variance_ratio.png")
)
plot_autocorr_heatmap(
all_results,
os.path.join(output_dir, "momentum_autocorr_sign.png")
)
plot_ou_halflife(
all_results,
os.path.join(output_dir, "momentum_ou_halflife.png")
)
plot_strategy_pnl(
all_results,
os.path.join(output_dir, "momentum_strategy_pnl.png")
)
# 生成发现和总结
findings = generate_findings(all_results)
summary = generate_summary(all_results)
print(f"\n分析完成!共生成 {len(findings)} 项发现")
print(f"输出目录: {output_dir}")
return {
"findings": findings,
"summary": summary,
"detailed_results": all_results
}
if __name__ == "__main__":
# 测试运行
result = run_momentum_reversion_analysis(None)
print("\n" + "="*80)
print("主要发现摘要:")
print("="*80)
for finding in result["findings"][:10]: # 只打印前 10 个
print(f"\n- {finding['description']}")
if not np.isnan(finding['p_value']):
print(f" p-value: {finding['p_value']:.4f}")
print(f" effect_size: {finding['effect_size']:.4f}")
print(f" 显著性: {'' if finding['significant'] else ''}")
print("\n" + "="*80)
print("总结:")
print("="*80)
for key, value in result["summary"].items():
print(f"{key}: {value}")