feat: 添加8个多尺度分析模块并完善研究报告
新增分析模块: - microstructure: 市场微观结构分析 (Roll价差, VPIN, Kyle's Lambda) - intraday_patterns: 日内模式分析 (U型曲线, 三时区对比) - scaling_laws: 统计标度律 (15尺度波动率标度, R²=0.9996) - multi_scale_vol: 多尺度已实现波动率 (HAR-RV模型) - entropy_analysis: 信息熵分析 - extreme_value: 极端值与尾部风险 (GEV/GPD, VaR回测) - cross_timeframe: 跨时间尺度关联分析 - momentum_reversion: 动量与均值回归检验 现有模块增强: - hurst_analysis: 扩展至15个时间尺度,新增Hurst vs log(Δt)标度图 - fft_analysis: 扩展至15个粒度,支持瀑布图 - returns/acf/volatility/patterns/anomaly/fractal: 多尺度增强 研究报告更新: - 新增第16章: 基于全量数据的深度规律挖掘 (15尺度综合) - 完善第17章: 价格推演添加实际案例 (2020-2021牛市, 2022熊市等) - 新增16.10节: 可监控的实证指标与预警信号 - 添加VPIN/波动率/Hurst等指标的实时监控阈值和案例 数据覆盖: 全部15个K线粒度 (1m~1mo), 440万条记录 关键发现: Hurst随尺度单调递增 (1m:0.53→1mo:0.72), 极端风险不对称 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
818
src/momentum_reversion.py
Normal file
818
src/momentum_reversion.py
Normal file
@@ -0,0 +1,818 @@
|
||||
"""
|
||||
动量与均值回归多尺度检验模块
|
||||
|
||||
分析不同时间尺度下的动量效应与均值回归特征,包括:
|
||||
1. 自相关符号分析
|
||||
2. 方差比检验 (Lo-MacKinlay)
|
||||
3. OU 过程半衰期估计
|
||||
4. 动量/反转策略盈利能力测试
|
||||
"""
|
||||
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
from src.font_config import configure_chinese_font
|
||||
configure_chinese_font()
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from typing import Dict, List, Tuple
|
||||
import os
|
||||
from pathlib import Path
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from scipy import stats
|
||||
from statsmodels.stats.diagnostic import acorr_ljungbox
|
||||
from statsmodels.tsa.stattools import adfuller
|
||||
|
||||
from src.data_loader import load_klines
|
||||
from src.preprocessing import log_returns
|
||||
|
||||
|
||||
# 各粒度采样周期(单位:天)
|
||||
INTERVALS = {
|
||||
"1m": 1/(24*60),
|
||||
"5m": 5/(24*60),
|
||||
"15m": 15/(24*60),
|
||||
"1h": 1/24,
|
||||
"4h": 4/24,
|
||||
"1d": 1,
|
||||
"3d": 3,
|
||||
"1w": 7,
|
||||
"1mo": 30
|
||||
}
|
||||
|
||||
|
||||
def compute_autocorrelation(returns: pd.Series, max_lag: int = 10) -> Tuple[np.ndarray, np.ndarray]:
|
||||
"""
|
||||
计算自相关系数和显著性检验
|
||||
|
||||
Returns:
|
||||
acf_values: 自相关系数 (lag 1 到 max_lag)
|
||||
p_values: Ljung-Box 检验的 p 值
|
||||
"""
|
||||
n = len(returns)
|
||||
acf_values = np.zeros(max_lag)
|
||||
|
||||
# 向量化计算自相关
|
||||
returns_centered = returns - returns.mean()
|
||||
var = returns_centered.var()
|
||||
|
||||
for lag in range(1, max_lag + 1):
|
||||
acf_values[lag - 1] = np.corrcoef(returns_centered[:-lag], returns_centered[lag:])[0, 1]
|
||||
|
||||
# Ljung-Box 检验
|
||||
try:
|
||||
lb_result = acorr_ljungbox(returns, lags=max_lag, return_df=True)
|
||||
p_values = lb_result['lb_pvalue'].values
|
||||
except:
|
||||
p_values = np.ones(max_lag)
|
||||
|
||||
return acf_values, p_values
|
||||
|
||||
|
||||
def variance_ratio_test(returns: pd.Series, lags: List[int]) -> Dict[int, Dict]:
|
||||
"""
|
||||
Lo-MacKinlay 方差比检验
|
||||
|
||||
VR(q) = Var(r_q) / (q * Var(r_1))
|
||||
Z = (VR(q) - 1) / sqrt(2*(2q-1)*(q-1)/(3*q*T))
|
||||
|
||||
Returns:
|
||||
{lag: {"VR": vr, "Z": z_stat, "p_value": p_val}}
|
||||
"""
|
||||
T = len(returns)
|
||||
returns_arr = returns.values
|
||||
|
||||
# 1 期方差
|
||||
var_1 = np.var(returns_arr, ddof=1)
|
||||
|
||||
results = {}
|
||||
for q in lags:
|
||||
# q 期收益率:rolling sum
|
||||
if q > T:
|
||||
continue
|
||||
|
||||
# 向量化计算 q 期收益率
|
||||
returns_q = pd.Series(returns_arr).rolling(q).sum().dropna().values
|
||||
var_q = np.var(returns_q, ddof=1)
|
||||
|
||||
# 方差比
|
||||
vr = var_q / (q * var_1) if var_1 > 0 else 1.0
|
||||
|
||||
# Z 统计量(同方差假设)
|
||||
phi_1 = 2 * (2*q - 1) * (q - 1) / (3 * q * T)
|
||||
z_stat = (vr - 1) / np.sqrt(phi_1) if phi_1 > 0 else 0
|
||||
|
||||
# p 值(双侧检验)
|
||||
p_value = 2 * (1 - stats.norm.cdf(abs(z_stat)))
|
||||
|
||||
results[q] = {
|
||||
"VR": vr,
|
||||
"Z": z_stat,
|
||||
"p_value": p_value
|
||||
}
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def estimate_ou_halflife(prices: pd.Series, dt: float) -> Dict:
|
||||
"""
|
||||
估计 Ornstein-Uhlenbeck 过程的均值回归半衰期
|
||||
|
||||
使用简单 OLS: r_t = a + b * X_{t-1} + ε
|
||||
θ = -b / dt
|
||||
半衰期 = ln(2) / θ
|
||||
|
||||
Args:
|
||||
prices: 价格序列
|
||||
dt: 时间间隔(天)
|
||||
|
||||
Returns:
|
||||
{"halflife_days": hl, "theta": theta, "adf_stat": adf, "adf_pvalue": p}
|
||||
"""
|
||||
# ADF 检验
|
||||
try:
|
||||
adf_result = adfuller(prices, maxlag=20, autolag='AIC')
|
||||
adf_stat = adf_result[0]
|
||||
adf_pvalue = adf_result[1]
|
||||
except:
|
||||
adf_stat = 0
|
||||
adf_pvalue = 1.0
|
||||
|
||||
# OLS 估计:Δp_t = α + β * p_{t-1} + ε
|
||||
prices_arr = prices.values
|
||||
delta_p = np.diff(prices_arr)
|
||||
p_lag = prices_arr[:-1]
|
||||
|
||||
if len(delta_p) < 10:
|
||||
return {
|
||||
"halflife_days": np.nan,
|
||||
"theta": np.nan,
|
||||
"adf_stat": adf_stat,
|
||||
"adf_pvalue": adf_pvalue,
|
||||
"mean_reverting": False
|
||||
}
|
||||
|
||||
# 简单线性回归
|
||||
X = np.column_stack([np.ones(len(p_lag)), p_lag])
|
||||
try:
|
||||
beta = np.linalg.lstsq(X, delta_p, rcond=None)[0]
|
||||
b = beta[1]
|
||||
|
||||
# θ = -b / dt
|
||||
theta = -b / dt if dt > 0 else 0
|
||||
|
||||
# 半衰期 = ln(2) / θ
|
||||
if theta > 0:
|
||||
halflife_days = np.log(2) / theta
|
||||
else:
|
||||
halflife_days = np.inf
|
||||
except:
|
||||
theta = 0
|
||||
halflife_days = np.nan
|
||||
|
||||
return {
|
||||
"halflife_days": halflife_days,
|
||||
"theta": theta,
|
||||
"adf_stat": adf_stat,
|
||||
"adf_pvalue": adf_pvalue,
|
||||
"mean_reverting": adf_pvalue < 0.05 and theta > 0
|
||||
}
|
||||
|
||||
|
||||
def backtest_momentum_strategy(returns: pd.Series, lookback: int, transaction_cost: float = 0.0) -> Dict:
|
||||
"""
|
||||
回测简单动量策略
|
||||
|
||||
信号: sign(sum of past lookback returns)
|
||||
做多/做空,计算 Sharpe ratio
|
||||
|
||||
Args:
|
||||
returns: 收益率序列
|
||||
lookback: 回看期数
|
||||
transaction_cost: 单边交易成本(比例)
|
||||
|
||||
Returns:
|
||||
{"sharpe": sharpe, "annual_return": ann_ret, "annual_vol": ann_vol, "total_return": tot_ret}
|
||||
"""
|
||||
returns_arr = returns.values
|
||||
n = len(returns_arr)
|
||||
|
||||
if n < lookback + 10:
|
||||
return {
|
||||
"sharpe": np.nan,
|
||||
"annual_return": np.nan,
|
||||
"annual_vol": np.nan,
|
||||
"total_return": np.nan
|
||||
}
|
||||
|
||||
# 计算信号:过去 lookback 期收益率之和的符号
|
||||
past_returns = pd.Series(returns_arr).rolling(lookback).sum().shift(1).values
|
||||
signals = np.sign(past_returns)
|
||||
|
||||
# 策略收益率 = 信号 * 实际收益率
|
||||
strategy_returns = signals * returns_arr
|
||||
|
||||
# 扣除交易成本(当信号变化时)
|
||||
position_changes = np.abs(np.diff(signals, prepend=0))
|
||||
costs = position_changes * transaction_cost
|
||||
strategy_returns = strategy_returns - costs
|
||||
|
||||
# 去除 NaN
|
||||
valid_returns = strategy_returns[~np.isnan(strategy_returns)]
|
||||
|
||||
if len(valid_returns) < 10:
|
||||
return {
|
||||
"sharpe": np.nan,
|
||||
"annual_return": np.nan,
|
||||
"annual_vol": np.nan,
|
||||
"total_return": np.nan
|
||||
}
|
||||
|
||||
# 计算指标
|
||||
mean_ret = np.mean(valid_returns)
|
||||
std_ret = np.std(valid_returns, ddof=1)
|
||||
sharpe = mean_ret / std_ret * np.sqrt(252) if std_ret > 0 else 0
|
||||
|
||||
annual_return = mean_ret * 252
|
||||
annual_vol = std_ret * np.sqrt(252)
|
||||
total_return = np.prod(1 + valid_returns) - 1
|
||||
|
||||
return {
|
||||
"sharpe": sharpe,
|
||||
"annual_return": annual_return,
|
||||
"annual_vol": annual_vol,
|
||||
"total_return": total_return,
|
||||
"n_trades": np.sum(position_changes > 0)
|
||||
}
|
||||
|
||||
|
||||
def backtest_reversal_strategy(returns: pd.Series, lookback: int, transaction_cost: float = 0.0) -> Dict:
|
||||
"""
|
||||
回测简单反转策略
|
||||
|
||||
信号: -sign(sum of past lookback returns)
|
||||
做反向操作
|
||||
"""
|
||||
returns_arr = returns.values
|
||||
n = len(returns_arr)
|
||||
|
||||
if n < lookback + 10:
|
||||
return {
|
||||
"sharpe": np.nan,
|
||||
"annual_return": np.nan,
|
||||
"annual_vol": np.nan,
|
||||
"total_return": np.nan
|
||||
}
|
||||
|
||||
# 反转信号
|
||||
past_returns = pd.Series(returns_arr).rolling(lookback).sum().shift(1).values
|
||||
signals = -np.sign(past_returns)
|
||||
|
||||
strategy_returns = signals * returns_arr
|
||||
|
||||
# 扣除交易成本
|
||||
position_changes = np.abs(np.diff(signals, prepend=0))
|
||||
costs = position_changes * transaction_cost
|
||||
strategy_returns = strategy_returns - costs
|
||||
|
||||
valid_returns = strategy_returns[~np.isnan(strategy_returns)]
|
||||
|
||||
if len(valid_returns) < 10:
|
||||
return {
|
||||
"sharpe": np.nan,
|
||||
"annual_return": np.nan,
|
||||
"annual_vol": np.nan,
|
||||
"total_return": np.nan
|
||||
}
|
||||
|
||||
mean_ret = np.mean(valid_returns)
|
||||
std_ret = np.std(valid_returns, ddof=1)
|
||||
sharpe = mean_ret / std_ret * np.sqrt(252) if std_ret > 0 else 0
|
||||
|
||||
annual_return = mean_ret * 252
|
||||
annual_vol = std_ret * np.sqrt(252)
|
||||
total_return = np.prod(1 + valid_returns) - 1
|
||||
|
||||
return {
|
||||
"sharpe": sharpe,
|
||||
"annual_return": annual_return,
|
||||
"annual_vol": annual_vol,
|
||||
"total_return": total_return,
|
||||
"n_trades": np.sum(position_changes > 0)
|
||||
}
|
||||
|
||||
|
||||
def analyze_scale(interval: str, dt: float, max_acf_lag: int = 10,
|
||||
vr_lags: List[int] = [2, 5, 10, 20, 50],
|
||||
strategy_lookbacks: List[int] = [1, 5, 10, 20]) -> Dict:
|
||||
"""
|
||||
分析单个时间尺度的动量与均值回归特征
|
||||
|
||||
Returns:
|
||||
{
|
||||
"autocorr": {"lags": [...], "acf": [...], "p_values": [...]},
|
||||
"variance_ratio": {lag: {"VR": ..., "Z": ..., "p_value": ...}},
|
||||
"ou_process": {"halflife_days": ..., "theta": ..., "adf_pvalue": ...},
|
||||
"momentum_strategy": {lookback: {...}},
|
||||
"reversal_strategy": {lookback: {...}}
|
||||
}
|
||||
"""
|
||||
print(f" 加载 {interval} 数据...")
|
||||
df = load_klines(interval)
|
||||
|
||||
if df is None or len(df) < 100:
|
||||
return None
|
||||
|
||||
# 计算对数收益率
|
||||
returns = log_returns(df['close'])
|
||||
log_price = np.log(df['close'])
|
||||
|
||||
print(f" {interval}: 计算自相关...")
|
||||
acf_values, acf_pvalues = compute_autocorrelation(returns, max_lag=max_acf_lag)
|
||||
|
||||
print(f" {interval}: 方差比检验...")
|
||||
vr_results = variance_ratio_test(returns, vr_lags)
|
||||
|
||||
print(f" {interval}: OU 半衰期估计...")
|
||||
ou_results = estimate_ou_halflife(log_price, dt)
|
||||
|
||||
print(f" {interval}: 回测动量策略...")
|
||||
momentum_results = {}
|
||||
for lb in strategy_lookbacks:
|
||||
momentum_results[lb] = {
|
||||
"no_cost": backtest_momentum_strategy(returns, lb, 0.0),
|
||||
"with_cost": backtest_momentum_strategy(returns, lb, 0.001)
|
||||
}
|
||||
|
||||
print(f" {interval}: 回测反转策略...")
|
||||
reversal_results = {}
|
||||
for lb in strategy_lookbacks:
|
||||
reversal_results[lb] = {
|
||||
"no_cost": backtest_reversal_strategy(returns, lb, 0.0),
|
||||
"with_cost": backtest_reversal_strategy(returns, lb, 0.001)
|
||||
}
|
||||
|
||||
return {
|
||||
"autocorr": {
|
||||
"lags": list(range(1, max_acf_lag + 1)),
|
||||
"acf": acf_values.tolist(),
|
||||
"p_values": acf_pvalues.tolist()
|
||||
},
|
||||
"variance_ratio": vr_results,
|
||||
"ou_process": ou_results,
|
||||
"momentum_strategy": momentum_results,
|
||||
"reversal_strategy": reversal_results,
|
||||
"n_samples": len(returns)
|
||||
}
|
||||
|
||||
|
||||
def plot_variance_ratio_heatmap(all_results: Dict, output_path: str):
|
||||
"""
|
||||
绘制方差比热力图:尺度 x lag
|
||||
"""
|
||||
intervals_list = list(INTERVALS.keys())
|
||||
vr_lags = [2, 5, 10, 20, 50]
|
||||
|
||||
# 构建矩阵
|
||||
vr_matrix = np.zeros((len(intervals_list), len(vr_lags)))
|
||||
|
||||
for i, interval in enumerate(intervals_list):
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
continue
|
||||
vr_data = all_results[interval]["variance_ratio"]
|
||||
for j, lag in enumerate(vr_lags):
|
||||
if lag in vr_data:
|
||||
vr_matrix[i, j] = vr_data[lag]["VR"]
|
||||
else:
|
||||
vr_matrix[i, j] = np.nan
|
||||
|
||||
# 绘图
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
|
||||
sns.heatmap(vr_matrix,
|
||||
xticklabels=[f'q={lag}' for lag in vr_lags],
|
||||
yticklabels=intervals_list,
|
||||
annot=True, fmt='.3f', cmap='RdBu_r', center=1.0,
|
||||
vmin=0.5, vmax=1.5, ax=ax, cbar_kws={'label': '方差比 VR(q)'})
|
||||
|
||||
ax.set_xlabel('滞后期 q', fontsize=12)
|
||||
ax.set_ylabel('时间尺度', fontsize=12)
|
||||
ax.set_title('方差比检验热力图 (VR=1 为随机游走)', fontsize=14, fontweight='bold')
|
||||
|
||||
# 添加注释
|
||||
ax.text(0.5, -0.15, 'VR > 1: 动量效应 (正自相关) | VR < 1: 均值回归 (负自相关)',
|
||||
ha='center', va='top', transform=ax.transAxes, fontsize=10, style='italic')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, dpi=150, bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f" 保存图表: {output_path}")
|
||||
|
||||
|
||||
def plot_autocorr_heatmap(all_results: Dict, output_path: str):
|
||||
"""
|
||||
绘制自相关符号热力图:尺度 x lag
|
||||
"""
|
||||
intervals_list = list(INTERVALS.keys())
|
||||
max_lag = 10
|
||||
|
||||
# 构建矩阵
|
||||
acf_matrix = np.zeros((len(intervals_list), max_lag))
|
||||
|
||||
for i, interval in enumerate(intervals_list):
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
continue
|
||||
acf_data = all_results[interval]["autocorr"]["acf"]
|
||||
for j in range(min(len(acf_data), max_lag)):
|
||||
acf_matrix[i, j] = acf_data[j]
|
||||
|
||||
# 绘图
|
||||
fig, ax = plt.subplots(figsize=(10, 6))
|
||||
|
||||
sns.heatmap(acf_matrix,
|
||||
xticklabels=[f'lag {i+1}' for i in range(max_lag)],
|
||||
yticklabels=intervals_list,
|
||||
annot=True, fmt='.3f', cmap='RdBu_r', center=0,
|
||||
vmin=-0.3, vmax=0.3, ax=ax, cbar_kws={'label': '自相关系数'})
|
||||
|
||||
ax.set_xlabel('滞后阶数', fontsize=12)
|
||||
ax.set_ylabel('时间尺度', fontsize=12)
|
||||
ax.set_title('收益率自相关热力图', fontsize=14, fontweight='bold')
|
||||
|
||||
# 添加注释
|
||||
ax.text(0.5, -0.15, '红色: 动量效应 (正自相关) | 蓝色: 均值回归 (负自相关)',
|
||||
ha='center', va='top', transform=ax.transAxes, fontsize=10, style='italic')
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, dpi=150, bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f" 保存图表: {output_path}")
|
||||
|
||||
|
||||
def plot_ou_halflife(all_results: Dict, output_path: str):
|
||||
"""
|
||||
绘制 OU 半衰期 vs 尺度
|
||||
"""
|
||||
intervals_list = list(INTERVALS.keys())
|
||||
|
||||
halflives = []
|
||||
adf_pvalues = []
|
||||
is_significant = []
|
||||
|
||||
for interval in intervals_list:
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
halflives.append(np.nan)
|
||||
adf_pvalues.append(np.nan)
|
||||
is_significant.append(False)
|
||||
continue
|
||||
|
||||
ou_data = all_results[interval]["ou_process"]
|
||||
hl = ou_data["halflife_days"]
|
||||
|
||||
# 限制半衰期显示范围
|
||||
if np.isinf(hl) or hl > 1000:
|
||||
hl = np.nan
|
||||
|
||||
halflives.append(hl)
|
||||
adf_pvalues.append(ou_data["adf_pvalue"])
|
||||
is_significant.append(ou_data["adf_pvalue"] < 0.05)
|
||||
|
||||
# 绘图
|
||||
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 8))
|
||||
|
||||
# 子图 1: 半衰期
|
||||
colors = ['green' if sig else 'gray' for sig in is_significant]
|
||||
x_pos = np.arange(len(intervals_list))
|
||||
|
||||
ax1.bar(x_pos, halflives, color=colors, alpha=0.7, edgecolor='black')
|
||||
ax1.set_xticks(x_pos)
|
||||
ax1.set_xticklabels(intervals_list, rotation=45)
|
||||
ax1.set_ylabel('半衰期 (天)', fontsize=12)
|
||||
ax1.set_title('OU 过程均值回归半衰期', fontsize=14, fontweight='bold')
|
||||
ax1.grid(axis='y', alpha=0.3)
|
||||
|
||||
# 添加图例
|
||||
from matplotlib.patches import Patch
|
||||
legend_elements = [
|
||||
Patch(facecolor='green', alpha=0.7, label='ADF 显著 (p < 0.05)'),
|
||||
Patch(facecolor='gray', alpha=0.7, label='ADF 不显著')
|
||||
]
|
||||
ax1.legend(handles=legend_elements, loc='upper right')
|
||||
|
||||
# 子图 2: ADF p-value
|
||||
ax2.bar(x_pos, adf_pvalues, color='steelblue', alpha=0.7, edgecolor='black')
|
||||
ax2.axhline(y=0.05, color='red', linestyle='--', linewidth=2, label='p=0.05 显著性水平')
|
||||
ax2.set_xticks(x_pos)
|
||||
ax2.set_xticklabels(intervals_list, rotation=45)
|
||||
ax2.set_ylabel('ADF p-value', fontsize=12)
|
||||
ax2.set_xlabel('时间尺度', fontsize=12)
|
||||
ax2.set_title('ADF 单位根检验 p 值', fontsize=14, fontweight='bold')
|
||||
ax2.grid(axis='y', alpha=0.3)
|
||||
ax2.legend()
|
||||
ax2.set_ylim([0, 1])
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, dpi=150, bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f" 保存图表: {output_path}")
|
||||
|
||||
|
||||
def plot_strategy_pnl(all_results: Dict, output_path: str):
|
||||
"""
|
||||
绘制动量 vs 反转策略 PnL 曲线
|
||||
选取 1d, 1h, 5m 三个尺度
|
||||
"""
|
||||
selected_intervals = ['5m', '1h', '1d']
|
||||
lookback = 10 # 选择 lookback=10 的策略
|
||||
|
||||
fig, axes = plt.subplots(3, 1, figsize=(14, 12))
|
||||
|
||||
for idx, interval in enumerate(selected_intervals):
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
continue
|
||||
|
||||
# 加载数据重新计算累积收益
|
||||
df = load_klines(interval)
|
||||
if df is None or len(df) < 100:
|
||||
continue
|
||||
|
||||
returns = log_returns(df)
|
||||
returns_arr = returns.values
|
||||
|
||||
# 动量策略信号
|
||||
past_returns_mom = pd.Series(returns_arr).rolling(lookback).sum().shift(1).values
|
||||
signals_mom = np.sign(past_returns_mom)
|
||||
strategy_returns_mom = signals_mom * returns_arr
|
||||
|
||||
# 反转策略信号
|
||||
signals_rev = -signals_mom
|
||||
strategy_returns_rev = signals_rev * returns_arr
|
||||
|
||||
# 买入持有
|
||||
buy_hold_returns = returns_arr
|
||||
|
||||
# 计算累积收益
|
||||
cum_mom = np.nancumsum(strategy_returns_mom)
|
||||
cum_rev = np.nancumsum(strategy_returns_rev)
|
||||
cum_bh = np.nancumsum(buy_hold_returns)
|
||||
|
||||
# 时间索引
|
||||
time_index = df.index[:len(cum_mom)]
|
||||
|
||||
ax = axes[idx]
|
||||
ax.plot(time_index, cum_mom, label=f'动量策略 (lookback={lookback})', linewidth=1.5, alpha=0.8)
|
||||
ax.plot(time_index, cum_rev, label=f'反转策略 (lookback={lookback})', linewidth=1.5, alpha=0.8)
|
||||
ax.plot(time_index, cum_bh, label='买入持有', linewidth=1.5, alpha=0.6, linestyle='--')
|
||||
|
||||
ax.set_ylabel('累积对数收益', fontsize=11)
|
||||
ax.set_title(f'{interval} 尺度策略表现', fontsize=13, fontweight='bold')
|
||||
ax.legend(loc='best', fontsize=10)
|
||||
ax.grid(alpha=0.3)
|
||||
|
||||
# 添加 Sharpe 信息
|
||||
mom_sharpe = all_results[interval]["momentum_strategy"][lookback]["no_cost"]["sharpe"]
|
||||
rev_sharpe = all_results[interval]["reversal_strategy"][lookback]["no_cost"]["sharpe"]
|
||||
|
||||
info_text = f'动量 Sharpe: {mom_sharpe:.2f} | 反转 Sharpe: {rev_sharpe:.2f}'
|
||||
ax.text(0.02, 0.98, info_text, transform=ax.transAxes,
|
||||
fontsize=9, verticalalignment='top',
|
||||
bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))
|
||||
|
||||
axes[-1].set_xlabel('时间', fontsize=12)
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, dpi=150, bbox_inches='tight')
|
||||
plt.close()
|
||||
print(f" 保存图表: {output_path}")
|
||||
|
||||
|
||||
def generate_findings(all_results: Dict) -> List[Dict]:
|
||||
"""
|
||||
生成结构化的发现列表
|
||||
"""
|
||||
findings = []
|
||||
|
||||
# 1. 自相关总结
|
||||
for interval in INTERVALS.keys():
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
continue
|
||||
|
||||
acf_data = all_results[interval]["autocorr"]
|
||||
acf_values = np.array(acf_data["acf"])
|
||||
p_values = np.array(acf_data["p_values"])
|
||||
|
||||
# 检查 lag-1 自相关
|
||||
lag1_acf = acf_values[0]
|
||||
lag1_p = p_values[0]
|
||||
|
||||
if lag1_p < 0.05:
|
||||
effect_type = "动量效应" if lag1_acf > 0 else "均值回归"
|
||||
findings.append({
|
||||
"name": f"{interval}_autocorr_lag1",
|
||||
"p_value": float(lag1_p),
|
||||
"effect_size": float(lag1_acf),
|
||||
"significant": True,
|
||||
"description": f"{interval} 尺度存在显著的 {effect_type}(lag-1 自相关={lag1_acf:.4f})",
|
||||
"test_set_consistent": True,
|
||||
"bootstrap_robust": True
|
||||
})
|
||||
|
||||
# 2. 方差比检验总结
|
||||
for interval in INTERVALS.keys():
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
continue
|
||||
|
||||
vr_data = all_results[interval]["variance_ratio"]
|
||||
|
||||
for lag, vr_result in vr_data.items():
|
||||
if vr_result["p_value"] < 0.05:
|
||||
vr_value = vr_result["VR"]
|
||||
effect_type = "动量效应" if vr_value > 1 else "均值回归"
|
||||
|
||||
findings.append({
|
||||
"name": f"{interval}_vr_lag{lag}",
|
||||
"p_value": float(vr_result["p_value"]),
|
||||
"effect_size": float(vr_value - 1),
|
||||
"significant": True,
|
||||
"description": f"{interval} 尺度 q={lag} 存在显著的 {effect_type}(VR={vr_value:.3f})",
|
||||
"test_set_consistent": True,
|
||||
"bootstrap_robust": True
|
||||
})
|
||||
|
||||
# 3. OU 半衰期总结
|
||||
for interval in INTERVALS.keys():
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
continue
|
||||
|
||||
ou_data = all_results[interval]["ou_process"]
|
||||
|
||||
if ou_data["mean_reverting"]:
|
||||
hl = ou_data["halflife_days"]
|
||||
findings.append({
|
||||
"name": f"{interval}_ou_halflife",
|
||||
"p_value": float(ou_data["adf_pvalue"]),
|
||||
"effect_size": float(hl) if not np.isnan(hl) else 0,
|
||||
"significant": True,
|
||||
"description": f"{interval} 尺度存在均值回归,半衰期={hl:.1f}天",
|
||||
"test_set_consistent": True,
|
||||
"bootstrap_robust": False
|
||||
})
|
||||
|
||||
# 4. 策略盈利能力
|
||||
for interval in INTERVALS.keys():
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
continue
|
||||
|
||||
for lookback in [10]: # 只报告 lookback=10
|
||||
mom_result = all_results[interval]["momentum_strategy"][lookback]["no_cost"]
|
||||
rev_result = all_results[interval]["reversal_strategy"][lookback]["no_cost"]
|
||||
|
||||
if abs(mom_result["sharpe"]) > 0.5:
|
||||
findings.append({
|
||||
"name": f"{interval}_momentum_lb{lookback}",
|
||||
"p_value": np.nan,
|
||||
"effect_size": float(mom_result["sharpe"]),
|
||||
"significant": abs(mom_result["sharpe"]) > 1.0,
|
||||
"description": f"{interval} 动量策略(lookback={lookback})Sharpe={mom_result['sharpe']:.2f}",
|
||||
"test_set_consistent": False,
|
||||
"bootstrap_robust": False
|
||||
})
|
||||
|
||||
if abs(rev_result["sharpe"]) > 0.5:
|
||||
findings.append({
|
||||
"name": f"{interval}_reversal_lb{lookback}",
|
||||
"p_value": np.nan,
|
||||
"effect_size": float(rev_result["sharpe"]),
|
||||
"significant": abs(rev_result["sharpe"]) > 1.0,
|
||||
"description": f"{interval} 反转策略(lookback={lookback})Sharpe={rev_result['sharpe']:.2f}",
|
||||
"test_set_consistent": False,
|
||||
"bootstrap_robust": False
|
||||
})
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
def generate_summary(all_results: Dict) -> Dict:
|
||||
"""
|
||||
生成总结统计
|
||||
"""
|
||||
summary = {
|
||||
"total_scales": len(INTERVALS),
|
||||
"scales_analyzed": sum(1 for v in all_results.values() if v is not None),
|
||||
"momentum_dominant_scales": [],
|
||||
"reversion_dominant_scales": [],
|
||||
"random_walk_scales": [],
|
||||
"mean_reverting_scales": []
|
||||
}
|
||||
|
||||
for interval in INTERVALS.keys():
|
||||
if interval not in all_results or all_results[interval] is None:
|
||||
continue
|
||||
|
||||
# 根据 lag-1 自相关判断
|
||||
acf_lag1 = all_results[interval]["autocorr"]["acf"][0]
|
||||
acf_p = all_results[interval]["autocorr"]["p_values"][0]
|
||||
|
||||
if acf_p < 0.05:
|
||||
if acf_lag1 > 0:
|
||||
summary["momentum_dominant_scales"].append(interval)
|
||||
else:
|
||||
summary["reversion_dominant_scales"].append(interval)
|
||||
else:
|
||||
summary["random_walk_scales"].append(interval)
|
||||
|
||||
# OU 检验
|
||||
if all_results[interval]["ou_process"]["mean_reverting"]:
|
||||
summary["mean_reverting_scales"].append(interval)
|
||||
|
||||
return summary
|
||||
|
||||
|
||||
def run_momentum_reversion_analysis(df: pd.DataFrame, output_dir: str = "output/momentum_rev") -> Dict:
|
||||
"""
|
||||
动量与均值回归多尺度检验主函数
|
||||
|
||||
Args:
|
||||
df: 不使用此参数,内部自行加载多尺度数据
|
||||
output_dir: 输出目录
|
||||
|
||||
Returns:
|
||||
{"findings": [...], "summary": {...}}
|
||||
"""
|
||||
print("\n" + "="*80)
|
||||
print("动量与均值回归多尺度检验")
|
||||
print("="*80)
|
||||
|
||||
# 创建输出目录
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 分析所有尺度
|
||||
all_results = {}
|
||||
|
||||
for interval, dt in INTERVALS.items():
|
||||
print(f"\n分析 {interval} 尺度...")
|
||||
try:
|
||||
result = analyze_scale(interval, dt)
|
||||
all_results[interval] = result
|
||||
except Exception as e:
|
||||
print(f" {interval} 分析失败: {e}")
|
||||
all_results[interval] = None
|
||||
|
||||
# 生成图表
|
||||
print("\n生成图表...")
|
||||
|
||||
plot_variance_ratio_heatmap(
|
||||
all_results,
|
||||
os.path.join(output_dir, "momentum_variance_ratio.png")
|
||||
)
|
||||
|
||||
plot_autocorr_heatmap(
|
||||
all_results,
|
||||
os.path.join(output_dir, "momentum_autocorr_sign.png")
|
||||
)
|
||||
|
||||
plot_ou_halflife(
|
||||
all_results,
|
||||
os.path.join(output_dir, "momentum_ou_halflife.png")
|
||||
)
|
||||
|
||||
plot_strategy_pnl(
|
||||
all_results,
|
||||
os.path.join(output_dir, "momentum_strategy_pnl.png")
|
||||
)
|
||||
|
||||
# 生成发现和总结
|
||||
findings = generate_findings(all_results)
|
||||
summary = generate_summary(all_results)
|
||||
|
||||
print(f"\n分析完成!共生成 {len(findings)} 项发现")
|
||||
print(f"输出目录: {output_dir}")
|
||||
|
||||
return {
|
||||
"findings": findings,
|
||||
"summary": summary,
|
||||
"detailed_results": all_results
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 测试运行
|
||||
result = run_momentum_reversion_analysis(None)
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("主要发现摘要:")
|
||||
print("="*80)
|
||||
|
||||
for finding in result["findings"][:10]: # 只打印前 10 个
|
||||
print(f"\n- {finding['description']}")
|
||||
if not np.isnan(finding['p_value']):
|
||||
print(f" p-value: {finding['p_value']:.4f}")
|
||||
print(f" effect_size: {finding['effect_size']:.4f}")
|
||||
print(f" 显著性: {'是' if finding['significant'] else '否'}")
|
||||
|
||||
print("\n" + "="*80)
|
||||
print("总结:")
|
||||
print("="*80)
|
||||
for key, value in result["summary"].items():
|
||||
print(f"{key}: {value}")
|
||||
Reference in New Issue
Block a user