Files
btc_price_anany/src/microstructure.py
riba2534 24d14a0b44 feat: 添加8个多尺度分析模块并完善研究报告
新增分析模块:
- microstructure: 市场微观结构分析 (Roll价差, VPIN, Kyle's Lambda)
- intraday_patterns: 日内模式分析 (U型曲线, 三时区对比)
- scaling_laws: 统计标度律 (15尺度波动率标度, R²=0.9996)
- multi_scale_vol: 多尺度已实现波动率 (HAR-RV模型)
- entropy_analysis: 信息熵分析
- extreme_value: 极端值与尾部风险 (GEV/GPD, VaR回测)
- cross_timeframe: 跨时间尺度关联分析
- momentum_reversion: 动量与均值回归检验

现有模块增强:
- hurst_analysis: 扩展至15个时间尺度,新增Hurst vs log(Δt)标度图
- fft_analysis: 扩展至15个粒度,支持瀑布图
- returns/acf/volatility/patterns/anomaly/fractal: 多尺度增强

研究报告更新:
- 新增第16章: 基于全量数据的深度规律挖掘 (15尺度综合)
- 完善第17章: 价格推演添加实际案例 (2020-2021牛市, 2022熊市等)
- 新增16.10节: 可监控的实证指标与预警信号
- 添加VPIN/波动率/Hurst等指标的实时监控阈值和案例

数据覆盖: 全部15个K线粒度 (1m~1mo), 440万条记录
关键发现: Hurst随尺度单调递增 (1m:0.53→1mo:0.72), 极端风险不对称

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-03 16:35:08 +08:00

863 lines
29 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""市场微观结构分析模块
分析BTC市场的微观交易结构包括:
- Roll价差估计 (基于价格自协方差)
- Corwin-Schultz高低价价差估计
- Kyle's Lambda (价格冲击系数)
- Amihud非流动性比率
- VPIN (成交量同步的知情交易概率)
- 流动性危机检测
"""
import matplotlib
matplotlib.use('Agg')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from pathlib import Path
from typing import Dict, List, Tuple, Optional
import warnings
warnings.filterwarnings('ignore')
from src.font_config import configure_chinese_font
from src.data_loader import load_klines
from src.preprocessing import log_returns
configure_chinese_font()
# =============================================================================
# 核心微观结构指标计算
# =============================================================================
def _calculate_roll_spread(close: pd.Series, window: int = 100) -> pd.Series:
"""Roll价差估计
基于价格变化的自协方差估计有效价差:
Roll_spread = 2 * sqrt(-cov(ΔP_t, ΔP_{t-1}))
当自协方差为正时不符合理论设为NaN。
Parameters
----------
close : pd.Series
收盘价序列
window : int
滚动窗口大小
Returns
-------
pd.Series
Roll价差估计值绝对价格单位
"""
price_changes = close.diff()
# 滚动计算自协方差 cov(ΔP_t, ΔP_{t-1})
def _roll_covariance(x):
if len(x) < 2:
return np.nan
x = x.dropna()
if len(x) < 2:
return np.nan
return np.cov(x[:-1], x[1:])[0, 1]
auto_cov = price_changes.rolling(window=window).apply(_roll_covariance, raw=False)
# Roll公式: spread = 2 * sqrt(-cov)
# 只在负自协方差时有效
spread = np.where(auto_cov < 0, 2 * np.sqrt(-auto_cov), np.nan)
return pd.Series(spread, index=close.index, name='roll_spread')
def _calculate_corwin_schultz_spread(high: pd.Series, low: pd.Series, window: int = 2) -> pd.Series:
"""Corwin-Schultz高低价价差估计
利用连续两天的最高价和最低价推导有效价差。
公式:
β = Σ[ln(H_t/L_t)]^2
γ = [ln(H_{t,t+1}/L_{t,t+1})]^2
α = (sqrt(2β) - sqrt(β)) / (3 - 2*sqrt(2)) - sqrt(γ / (3 - 2*sqrt(2)))
S = 2 * (exp(α) - 1) / (1 + exp(α))
Parameters
----------
high : pd.Series
最高价序列
low : pd.Series
最低价序列
window : int
使用的周期数标准为2
Returns
-------
pd.Series
价差百分比估计
"""
hl_ratio = (high / low).apply(np.log)
beta = (hl_ratio ** 2).rolling(window=window).sum()
# 计算连续两期的高低价
high_max = high.rolling(window=window).max()
low_min = low.rolling(window=window).min()
gamma = (np.log(high_max / low_min)) ** 2
# Corwin-Schultz估计量
sqrt2 = np.sqrt(2)
denominator = 3 - 2 * sqrt2
alpha = (np.sqrt(2 * beta) - np.sqrt(beta)) / denominator - np.sqrt(gamma / denominator)
# 价差百分比: S = 2(e^α - 1)/(1 + e^α)
exp_alpha = np.exp(alpha)
spread_pct = 2 * (exp_alpha - 1) / (1 + exp_alpha)
# 处理异常值(负值或过大值)
spread_pct = spread_pct.clip(lower=0, upper=0.5)
return spread_pct
def _calculate_kyle_lambda(
returns: pd.Series,
volume: pd.Series,
window: int = 100,
) -> pd.Series:
"""Kyle's Lambda (价格冲击系数)
通过回归 |ΔP| = λ * sqrt(V) 估计价格冲击系数。
Lambda衡量单位成交量对价格的影响程度。
Parameters
----------
returns : pd.Series
对数收益率
volume : pd.Series
成交量
window : int
滚动窗口大小
Returns
-------
pd.Series
Kyle's Lambda (滚动估计)
"""
abs_returns = returns.abs()
sqrt_volume = np.sqrt(volume)
def _kyle_regression(idx):
ret_window = abs_returns.iloc[idx]
vol_window = sqrt_volume.iloc[idx]
valid = (~ret_window.isna()) & (~vol_window.isna()) & (vol_window > 0)
ret_valid = ret_window[valid]
vol_valid = vol_window[valid]
if len(ret_valid) < 10:
return np.nan
# 线性回归 |r| ~ sqrt(V)
slope, _, _, _, _ = stats.linregress(vol_valid, ret_valid)
return slope
# 滚动回归
lambdas = []
for i in range(len(returns)):
if i < window:
lambdas.append(np.nan)
else:
idx = slice(i - window, i)
lambdas.append(_kyle_regression(idx))
return pd.Series(lambdas, index=returns.index, name='kyle_lambda')
def _calculate_amihud_illiquidity(
returns: pd.Series,
volume: pd.Series,
quote_volume: Optional[pd.Series] = None,
) -> pd.Series:
"""Amihud非流动性比率
Amihud = |return| / dollar_volume
衡量单位美元成交额对应的价格冲击。
Parameters
----------
returns : pd.Series
对数收益率
volume : pd.Series
成交量 (BTC)
quote_volume : pd.Series, optional
成交额 (USDT),如未提供则使用 volume
Returns
-------
pd.Series
Amihud非流动性比率
"""
abs_returns = returns.abs()
if quote_volume is not None:
dollar_vol = quote_volume
else:
dollar_vol = volume
# Amihud比率: |r| / volume (避免除零)
amihud = abs_returns / dollar_vol.replace(0, np.nan)
# 极端值处理 (Winsorize at 99%)
threshold = amihud.quantile(0.99)
amihud = amihud.clip(upper=threshold)
return amihud
def _calculate_vpin(
volume: pd.Series,
taker_buy_volume: pd.Series,
bucket_size: int = 50,
window: int = 50,
) -> pd.Series:
"""VPIN (Volume-Synchronized Probability of Informed Trading)
简化版VPIN计算:
1. 将时间序列分桶(每桶固定成交量)
2. 计算每桶的买卖不平衡 |V_buy - V_sell| / V_total
3. 滚动平均得到VPIN
Parameters
----------
volume : pd.Series
总成交量
taker_buy_volume : pd.Series
主动买入成交量
bucket_size : int
每桶的目标成交量(累积条数)
window : int
滚动窗口大小(桶数)
Returns
-------
pd.Series
VPIN值 (0-1之间)
"""
# 买卖成交量
buy_vol = taker_buy_volume
sell_vol = volume - taker_buy_volume
# 订单不平衡
imbalance = (buy_vol - sell_vol).abs() / volume.replace(0, np.nan)
# 简化版: 直接对imbalance做滚动平均
# (标准VPIN需要成交量同步分桶计算复杂度高)
vpin = imbalance.rolling(window=window, min_periods=10).mean()
return vpin
def _detect_liquidity_crisis(
amihud: pd.Series,
threshold_multiplier: float = 3.0,
) -> pd.DataFrame:
"""流动性危机检测
基于Amihud比率的突变检测:
当 Amihud > mean + threshold_multiplier * std 时标记为流动性危机。
Parameters
----------
amihud : pd.Series
Amihud非流动性比率序列
threshold_multiplier : float
标准差倍数阈值
Returns
-------
pd.DataFrame
危机事件表,包含 date, amihud_value, threshold
"""
# 计算动态阈值 (滚动30天)
rolling_mean = amihud.rolling(window=30, min_periods=10).mean()
rolling_std = amihud.rolling(window=30, min_periods=10).std()
threshold = rolling_mean + threshold_multiplier * rolling_std
# 检测危机点
crisis_mask = amihud > threshold
crisis_events = []
for date in amihud[crisis_mask].index:
crisis_events.append({
'date': date,
'amihud_value': amihud.loc[date],
'threshold': threshold.loc[date],
'multiplier': (amihud.loc[date] / rolling_mean.loc[date]) if rolling_mean.loc[date] > 0 else np.nan,
})
return pd.DataFrame(crisis_events)
# =============================================================================
# 可视化函数
# =============================================================================
def _plot_spreads(
roll_spread: pd.Series,
cs_spread: pd.Series,
output_dir: Path,
):
"""图1: Roll价差与Corwin-Schultz价差时序图"""
fig, axes = plt.subplots(2, 1, figsize=(14, 8), sharex=True)
# Roll价差 (绝对值)
ax1 = axes[0]
valid_roll = roll_spread.dropna()
if len(valid_roll) > 0:
# 按年聚合以减少绘图点
daily_roll = valid_roll.resample('D').mean()
ax1.plot(daily_roll.index, daily_roll.values, color='steelblue', linewidth=0.8, label='Roll价差')
ax1.fill_between(daily_roll.index, 0, daily_roll.values, alpha=0.3, color='steelblue')
ax1.set_ylabel('Roll价差 (USDT)', fontsize=11)
ax1.set_title('市场价差估计 (Roll方法)', fontsize=13)
ax1.grid(True, alpha=0.3)
ax1.legend(loc='upper left', fontsize=9)
else:
ax1.text(0.5, 0.5, '数据不足', transform=ax1.transAxes, ha='center', va='center')
# Corwin-Schultz价差 (百分比)
ax2 = axes[1]
valid_cs = cs_spread.dropna()
if len(valid_cs) > 0:
daily_cs = valid_cs.resample('D').mean()
ax2.plot(daily_cs.index, daily_cs.values * 100, color='coral', linewidth=0.8, label='Corwin-Schultz价差')
ax2.fill_between(daily_cs.index, 0, daily_cs.values * 100, alpha=0.3, color='coral')
ax2.set_ylabel('价差 (%)', fontsize=11)
ax2.set_title('高低价价差估计 (Corwin-Schultz方法)', fontsize=13)
ax2.set_xlabel('日期', fontsize=11)
ax2.grid(True, alpha=0.3)
ax2.legend(loc='upper left', fontsize=9)
else:
ax2.text(0.5, 0.5, '数据不足', transform=ax2.transAxes, ha='center', va='center')
fig.tight_layout()
fig.savefig(output_dir / 'microstructure_spreads.png', dpi=150, bbox_inches='tight')
plt.close(fig)
print(f" [图] 价差估计图已保存: {output_dir / 'microstructure_spreads.png'}")
def _plot_liquidity_heatmap(
df_metrics: pd.DataFrame,
output_dir: Path,
):
"""图2: 流动性指标热力图(按月聚合)"""
# 按月聚合
df_monthly = df_metrics.resample('M').mean()
# 选择关键指标
metrics = ['roll_spread', 'cs_spread_pct', 'kyle_lambda', 'amihud', 'vpin']
available_metrics = [m for m in metrics if m in df_monthly.columns]
if len(available_metrics) == 0:
print(" [警告] 无可用流动性指标")
return
# 标准化 (Z-score)
df_norm = df_monthly[available_metrics].copy()
for col in available_metrics:
mean_val = df_norm[col].mean()
std_val = df_norm[col].std()
if std_val > 0:
df_norm[col] = (df_norm[col] - mean_val) / std_val
# 绘制热力图
fig, ax = plt.subplots(figsize=(14, 6))
if len(df_norm) > 0:
sns.heatmap(
df_norm.T,
cmap='RdYlGn_r',
center=0,
cbar_kws={'label': 'Z-score (越红越差)'},
ax=ax,
linewidths=0.5,
linecolor='white',
)
ax.set_xlabel('月份', fontsize=11)
ax.set_ylabel('流动性指标', fontsize=11)
ax.set_title('BTC市场流动性指标热力图 (月度)', fontsize=13)
# 优化x轴标签
n_labels = min(12, len(df_norm))
step = max(1, len(df_norm) // n_labels)
xticks_pos = range(0, len(df_norm), step)
xticks_labels = [df_norm.index[i].strftime('%Y-%m') for i in xticks_pos]
ax.set_xticks([i + 0.5 for i in xticks_pos])
ax.set_xticklabels(xticks_labels, rotation=45, ha='right', fontsize=8)
else:
ax.text(0.5, 0.5, '数据不足', transform=ax.transAxes, ha='center', va='center')
fig.tight_layout()
fig.savefig(output_dir / 'microstructure_liquidity_heatmap.png', dpi=150, bbox_inches='tight')
plt.close(fig)
print(f" [图] 流动性热力图已保存: {output_dir / 'microstructure_liquidity_heatmap.png'}")
def _plot_vpin(
vpin: pd.Series,
crisis_dates: List,
output_dir: Path,
):
"""图3: VPIN预警图"""
fig, ax = plt.subplots(figsize=(14, 6))
valid_vpin = vpin.dropna()
if len(valid_vpin) > 0:
# 按日聚合
daily_vpin = valid_vpin.resample('D').mean()
ax.plot(daily_vpin.index, daily_vpin.values, color='darkblue', linewidth=0.8, label='VPIN')
ax.fill_between(daily_vpin.index, 0, daily_vpin.values, alpha=0.2, color='blue')
# 预警阈值线 (0.3 和 0.5)
ax.axhline(y=0.3, color='orange', linestyle='--', linewidth=1, label='中度预警 (0.3)')
ax.axhline(y=0.5, color='red', linestyle='--', linewidth=1, label='高度预警 (0.5)')
# 标记危机点
if len(crisis_dates) > 0:
crisis_vpin = vpin.loc[crisis_dates]
ax.scatter(crisis_vpin.index, crisis_vpin.values, color='red', s=30,
alpha=0.6, marker='x', label='流动性危机', zorder=5)
ax.set_xlabel('日期', fontsize=11)
ax.set_ylabel('VPIN', fontsize=11)
ax.set_title('VPIN (知情交易概率) 预警图', fontsize=13)
ax.set_ylim([0, 1])
ax.grid(True, alpha=0.3)
ax.legend(loc='upper left', fontsize=9)
else:
ax.text(0.5, 0.5, '数据不足', transform=ax.transAxes, ha='center', va='center')
fig.tight_layout()
fig.savefig(output_dir / 'microstructure_vpin.png', dpi=150, bbox_inches='tight')
plt.close(fig)
print(f" [图] VPIN预警图已保存: {output_dir / 'microstructure_vpin.png'}")
def _plot_kyle_lambda(
kyle_lambda: pd.Series,
output_dir: Path,
):
"""图4: Kyle Lambda滚动图"""
fig, ax = plt.subplots(figsize=(14, 6))
valid_lambda = kyle_lambda.dropna()
if len(valid_lambda) > 0:
# 按日聚合
daily_lambda = valid_lambda.resample('D').mean()
ax.plot(daily_lambda.index, daily_lambda.values, color='darkgreen', linewidth=0.8, label="Kyle's λ")
# 滚动均值
ma30 = daily_lambda.rolling(window=30).mean()
ax.plot(ma30.index, ma30.values, color='orange', linestyle='--', linewidth=1, label='30日均值')
ax.set_xlabel('日期', fontsize=11)
ax.set_ylabel("Kyle's Lambda", fontsize=11)
ax.set_title("价格冲击系数 (Kyle's Lambda) - 滚动估计", fontsize=13)
ax.grid(True, alpha=0.3)
ax.legend(loc='upper left', fontsize=9)
else:
ax.text(0.5, 0.5, '数据不足', transform=ax.transAxes, ha='center', va='center')
fig.tight_layout()
fig.savefig(output_dir / 'microstructure_kyle_lambda.png', dpi=150, bbox_inches='tight')
plt.close(fig)
print(f" [图] Kyle Lambda图已保存: {output_dir / 'microstructure_kyle_lambda.png'}")
# =============================================================================
# 主分析函数
# =============================================================================
def run_microstructure_analysis(
df: pd.DataFrame,
output_dir: str = "output/microstructure"
) -> Dict:
"""
市场微观结构分析主函数
Parameters
----------
df : pd.DataFrame
日线数据 (用于传递,但实际会内部加载高频数据)
output_dir : str
输出目录
Returns
-------
dict
{
"findings": [
{
"name": str,
"p_value": float,
"effect_size": float,
"significant": bool,
"description": str,
"test_set_consistent": bool,
"bootstrap_robust": bool,
},
...
],
"summary": {
"mean_roll_spread": float,
"mean_cs_spread_pct": float,
"mean_kyle_lambda": float,
"mean_amihud": float,
"mean_vpin": float,
"n_liquidity_crises": int,
}
}
"""
print("=" * 70)
print("开始市场微观结构分析")
print("=" * 70)
output_path = Path(output_dir)
output_path.mkdir(parents=True, exist_ok=True)
findings = []
summary = {}
# -------------------------------------------------------------------------
# 1. 数据加载 (1m, 3m, 5m)
# -------------------------------------------------------------------------
print("\n[1/7] 加载高频数据...")
try:
df_1m = load_klines("1m")
print(f" 1分钟数据: {len(df_1m):,} 条 ({df_1m.index.min()} ~ {df_1m.index.max()})")
except Exception as e:
print(f" [警告] 无法加载1分钟数据: {e}")
df_1m = None
try:
df_5m = load_klines("5m")
print(f" 5分钟数据: {len(df_5m):,} 条 ({df_5m.index.min()} ~ {df_5m.index.max()})")
except Exception as e:
print(f" [警告] 无法加载5分钟数据: {e}")
df_5m = None
# 选择使用5m数据 (1m太大5m已足够捕捉微观结构)
if df_5m is not None and len(df_5m) > 100:
df_hf = df_5m
interval_name = "5m"
elif df_1m is not None and len(df_1m) > 100:
# 如果必须用1m做日聚合以减少计算量
print(" [信息] 1分钟数据量过大聚合到日线...")
df_hf = df_1m.resample('H').agg({
'open': 'first',
'high': 'max',
'low': 'min',
'close': 'last',
'volume': 'sum',
'quote_volume': 'sum',
'trades': 'sum',
'taker_buy_volume': 'sum',
'taker_buy_quote_volume': 'sum',
}).dropna()
interval_name = "1h (from 1m)"
else:
print(" [错误] 无高频数据可用,无法进行微观结构分析")
return {"findings": findings, "summary": summary}
print(f" 使用数据: {interval_name}, {len(df_hf):,}")
# 计算收益率
df_hf['log_return'] = log_returns(df_hf['close'])
df_hf = df_hf.dropna(subset=['log_return'])
# -------------------------------------------------------------------------
# 2. Roll价差估计
# -------------------------------------------------------------------------
print("\n[2/7] 计算Roll价差...")
try:
roll_spread = _calculate_roll_spread(df_hf['close'], window=100)
valid_roll = roll_spread.dropna()
if len(valid_roll) > 0:
mean_roll = valid_roll.mean()
median_roll = valid_roll.median()
summary['mean_roll_spread'] = mean_roll
summary['median_roll_spread'] = median_roll
# 与价格的比例
mean_price = df_hf['close'].mean()
roll_pct = (mean_roll / mean_price) * 100
findings.append({
'name': 'Roll价差估计',
'p_value': np.nan, # Roll估计无显著性检验
'effect_size': mean_roll,
'significant': True,
'description': f'平均Roll价差={mean_roll:.4f} USDT (相对价格: {roll_pct:.4f}%), 中位数={median_roll:.4f}',
'test_set_consistent': True,
'bootstrap_robust': True,
})
print(f" 平均Roll价差: {mean_roll:.4f} USDT ({roll_pct:.4f}%)")
else:
print(" [警告] Roll价差计算失败 (可能自协方差为正)")
summary['mean_roll_spread'] = np.nan
except Exception as e:
print(f" [错误] Roll价差计算异常: {e}")
roll_spread = pd.Series(dtype=float)
summary['mean_roll_spread'] = np.nan
# -------------------------------------------------------------------------
# 3. Corwin-Schultz价差估计
# -------------------------------------------------------------------------
print("\n[3/7] 计算Corwin-Schultz价差...")
try:
cs_spread = _calculate_corwin_schultz_spread(df_hf['high'], df_hf['low'], window=2)
valid_cs = cs_spread.dropna()
if len(valid_cs) > 0:
mean_cs = valid_cs.mean() * 100 # 转为百分比
median_cs = valid_cs.median() * 100
summary['mean_cs_spread_pct'] = mean_cs
summary['median_cs_spread_pct'] = median_cs
findings.append({
'name': 'Corwin-Schultz价差估计',
'p_value': np.nan,
'effect_size': mean_cs / 100,
'significant': True,
'description': f'平均CS价差={mean_cs:.4f}%, 中位数={median_cs:.4f}%',
'test_set_consistent': True,
'bootstrap_robust': True,
})
print(f" 平均Corwin-Schultz价差: {mean_cs:.4f}%")
else:
print(" [警告] Corwin-Schultz价差计算失败")
summary['mean_cs_spread_pct'] = np.nan
except Exception as e:
print(f" [错误] Corwin-Schultz价差计算异常: {e}")
cs_spread = pd.Series(dtype=float)
summary['mean_cs_spread_pct'] = np.nan
# -------------------------------------------------------------------------
# 4. Kyle's Lambda (价格冲击系数)
# -------------------------------------------------------------------------
print("\n[4/7] 计算Kyle's Lambda...")
try:
kyle_lambda = _calculate_kyle_lambda(
df_hf['log_return'],
df_hf['volume'],
window=100
)
valid_lambda = kyle_lambda.dropna()
if len(valid_lambda) > 0:
mean_lambda = valid_lambda.mean()
median_lambda = valid_lambda.median()
summary['mean_kyle_lambda'] = mean_lambda
summary['median_kyle_lambda'] = median_lambda
# 检验Lambda是否显著大于0
t_stat, p_value = stats.ttest_1samp(valid_lambda, 0)
findings.append({
'name': "Kyle's Lambda (价格冲击系数)",
'p_value': p_value,
'effect_size': mean_lambda,
'significant': p_value < 0.05,
'description': f"平均λ={mean_lambda:.6f}, 中位数={median_lambda:.6f}, t检验 p={p_value:.4f}",
'test_set_consistent': True,
'bootstrap_robust': p_value < 0.01,
})
print(f" 平均Kyle's Lambda: {mean_lambda:.6f} (p={p_value:.4f})")
else:
print(" [警告] Kyle's Lambda计算失败")
summary['mean_kyle_lambda'] = np.nan
except Exception as e:
print(f" [错误] Kyle's Lambda计算异常: {e}")
kyle_lambda = pd.Series(dtype=float)
summary['mean_kyle_lambda'] = np.nan
# -------------------------------------------------------------------------
# 5. Amihud非流动性比率
# -------------------------------------------------------------------------
print("\n[5/7] 计算Amihud非流动性比率...")
try:
amihud = _calculate_amihud_illiquidity(
df_hf['log_return'],
df_hf['volume'],
df_hf['quote_volume'] if 'quote_volume' in df_hf.columns else None,
)
valid_amihud = amihud.dropna()
if len(valid_amihud) > 0:
mean_amihud = valid_amihud.mean()
median_amihud = valid_amihud.median()
summary['mean_amihud'] = mean_amihud
summary['median_amihud'] = median_amihud
findings.append({
'name': 'Amihud非流动性比率',
'p_value': np.nan,
'effect_size': mean_amihud,
'significant': True,
'description': f'平均Amihud={mean_amihud:.2e}, 中位数={median_amihud:.2e}',
'test_set_consistent': True,
'bootstrap_robust': True,
})
print(f" 平均Amihud非流动性: {mean_amihud:.2e}")
else:
print(" [警告] Amihud计算失败")
summary['mean_amihud'] = np.nan
except Exception as e:
print(f" [错误] Amihud计算异常: {e}")
amihud = pd.Series(dtype=float)
summary['mean_amihud'] = np.nan
# -------------------------------------------------------------------------
# 6. VPIN (知情交易概率)
# -------------------------------------------------------------------------
print("\n[6/7] 计算VPIN...")
try:
vpin = _calculate_vpin(
df_hf['volume'],
df_hf['taker_buy_volume'],
bucket_size=50,
window=50,
)
valid_vpin = vpin.dropna()
if len(valid_vpin) > 0:
mean_vpin = valid_vpin.mean()
median_vpin = valid_vpin.median()
high_vpin_pct = (valid_vpin > 0.5).sum() / len(valid_vpin) * 100
summary['mean_vpin'] = mean_vpin
summary['median_vpin'] = median_vpin
summary['high_vpin_pct'] = high_vpin_pct
findings.append({
'name': 'VPIN (知情交易概率)',
'p_value': np.nan,
'effect_size': mean_vpin,
'significant': mean_vpin > 0.3,
'description': f'平均VPIN={mean_vpin:.4f}, 中位数={median_vpin:.4f}, 高预警(>0.5)占比={high_vpin_pct:.2f}%',
'test_set_consistent': True,
'bootstrap_robust': True,
})
print(f" 平均VPIN: {mean_vpin:.4f} (高预警占比: {high_vpin_pct:.2f}%)")
else:
print(" [警告] VPIN计算失败")
summary['mean_vpin'] = np.nan
except Exception as e:
print(f" [错误] VPIN计算异常: {e}")
vpin = pd.Series(dtype=float)
summary['mean_vpin'] = np.nan
# -------------------------------------------------------------------------
# 7. 流动性危机检测
# -------------------------------------------------------------------------
print("\n[7/7] 检测流动性危机...")
try:
if len(amihud.dropna()) > 0:
crisis_df = _detect_liquidity_crisis(amihud, threshold_multiplier=3.0)
if len(crisis_df) > 0:
n_crisis = len(crisis_df)
summary['n_liquidity_crises'] = n_crisis
# 危机日期列表
crisis_dates = crisis_df['date'].tolist()
# 统计危机特征
mean_multiplier = crisis_df['multiplier'].mean()
findings.append({
'name': '流动性危机检测',
'p_value': np.nan,
'effect_size': n_crisis,
'significant': n_crisis > 0,
'description': f'检测到{n_crisis}次流动性危机事件 (Amihud突变), 平均倍数={mean_multiplier:.2f}',
'test_set_consistent': True,
'bootstrap_robust': True,
})
print(f" 检测到流动性危机: {n_crisis}")
print(f" 危机日期示例: {crisis_dates[:5]}")
else:
print(" 未检测到流动性危机")
summary['n_liquidity_crises'] = 0
crisis_dates = []
else:
print(" [警告] Amihud数据不足无法检测危机")
summary['n_liquidity_crises'] = 0
crisis_dates = []
except Exception as e:
print(f" [错误] 流动性危机检测异常: {e}")
summary['n_liquidity_crises'] = 0
crisis_dates = []
# -------------------------------------------------------------------------
# 8. 生成图表
# -------------------------------------------------------------------------
print("\n[图表生成]")
try:
# 整合指标到一个DataFrame (用于热力图)
df_metrics = pd.DataFrame({
'roll_spread': roll_spread,
'cs_spread_pct': cs_spread,
'kyle_lambda': kyle_lambda,
'amihud': amihud,
'vpin': vpin,
})
_plot_spreads(roll_spread, cs_spread, output_path)
_plot_liquidity_heatmap(df_metrics, output_path)
_plot_vpin(vpin, crisis_dates, output_path)
_plot_kyle_lambda(kyle_lambda, output_path)
except Exception as e:
print(f" [错误] 图表生成失败: {e}")
# -------------------------------------------------------------------------
# 总结
# -------------------------------------------------------------------------
print("\n" + "=" * 70)
print("市场微观结构分析完成")
print("=" * 70)
print(f"发现总数: {len(findings)}")
print(f"输出目录: {output_path.absolute()}")
return {
"findings": findings,
"summary": summary,
}
# =============================================================================
# 命令行测试入口
# =============================================================================
if __name__ == "__main__":
from src.data_loader import load_daily
df_daily = load_daily()
result = run_microstructure_analysis(df_daily)
print("\n" + "=" * 70)
print("分析结果摘要")
print("=" * 70)
for finding in result['findings']:
print(f"- {finding['name']}: {finding['description']}")