fix: 全面修复代码质量和报告准确性问题
代码修复 (16 个模块): - GARCH 模型统一改用 t 分布 + 收敛检查 (returns/volatility/anomaly) - KS 检验替换为 Lilliefors 检验 (returns) - 修复数据泄漏: StratifiedKFold→TimeSeriesSplit, scaler 逐折 fit (anomaly) - 前兆标签 shift(-1) 预测次日异常 (anomaly) - PSD 归一化加入采样频率和单边谱×2 (fft) - AR(1) 红噪声基线经验缩放 (fft) - 盒计数法独立 x/y 归一化, MF-DFA q=0 (fractal) - ADF 平稳性检验 + 移除双重 Bonferroni (causality) - R/S Hurst 添加 R² 拟合优度 (hurst) - Prophet 递推预测避免信息泄露 (time_series) - IC 计算过滤零信号, 中性形态 hit_rate=NaN (indicators/patterns) - 聚类阈值自适应化 (clustering) - 日历效应前后半段稳健性检查 (calendar) - 证据评分标准文本与代码对齐 (visualization) - 核心管道 NaN/空值防护 (data_loader/preprocessing/main) 报告修复 (docs/REPORT.md, 15 处): - 标度指数 H_scaling 与 Hurst 指数消歧 - GBM 6 个月概率锥数值重算 - CLT 限定、减半措辞弱化、情景概率逻辑修正 - GPD 形状参数解读修正、异常 AUC 证据降级 Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -21,10 +21,15 @@ def detrend_log_diff(prices: pd.Series) -> pd.Series:
|
||||
|
||||
|
||||
def detrend_linear(series: pd.Series) -> pd.Series:
|
||||
"""线性去趋势"""
|
||||
x = np.arange(len(series))
|
||||
coeffs = np.polyfit(x, series.values, 1)
|
||||
trend = np.polyval(coeffs, x)
|
||||
"""线性去趋势(自动忽略NaN)"""
|
||||
clean = series.dropna()
|
||||
if len(clean) < 2:
|
||||
return series - series.mean()
|
||||
x = np.arange(len(clean))
|
||||
coeffs = np.polyfit(x, clean.values, 1)
|
||||
# 对完整索引计算趋势
|
||||
x_full = np.arange(len(series))
|
||||
trend = np.polyval(coeffs, x_full)
|
||||
return pd.Series(series.values - trend, index=series.index)
|
||||
|
||||
|
||||
@@ -35,9 +40,9 @@ def hp_filter(series: pd.Series, lamb: float = 1600) -> tuple:
|
||||
return cycle, trend
|
||||
|
||||
|
||||
def rolling_volatility(returns: pd.Series, window: int = 30) -> pd.Series:
|
||||
def rolling_volatility(returns: pd.Series, window: int = 30, periods_per_year: int = 365) -> pd.Series:
|
||||
"""滚动波动率(年化)"""
|
||||
return returns.rolling(window=window).std() * np.sqrt(365)
|
||||
return returns.rolling(window=window).std() * np.sqrt(periods_per_year)
|
||||
|
||||
|
||||
def realized_volatility(returns: pd.Series, window: int = 30) -> pd.Series:
|
||||
@@ -51,7 +56,11 @@ def taker_buy_ratio(df: pd.DataFrame) -> pd.Series:
|
||||
|
||||
|
||||
def add_derived_features(df: pd.DataFrame) -> pd.DataFrame:
|
||||
"""添加常用衍生特征列"""
|
||||
"""添加常用衍生特征列
|
||||
|
||||
注意: 返回的 DataFrame 前30行部分列包含 NaN(由滚动窗口计算导致),
|
||||
下游模块应根据需要自行处理。
|
||||
"""
|
||||
out = df.copy()
|
||||
out["log_return"] = log_returns(df["close"])
|
||||
out["simple_return"] = simple_returns(df["close"])
|
||||
@@ -69,8 +78,11 @@ def add_derived_features(df: pd.DataFrame) -> pd.DataFrame:
|
||||
|
||||
|
||||
def standardize(series: pd.Series) -> pd.Series:
|
||||
"""Z-score标准化"""
|
||||
return (series - series.mean()) / series.std()
|
||||
"""Z-score标准化(零方差时返回全零序列)"""
|
||||
std = series.std()
|
||||
if std == 0 or np.isnan(std):
|
||||
return pd.Series(0.0, index=series.index)
|
||||
return (series - series.mean()) / std
|
||||
|
||||
|
||||
def winsorize(series: pd.Series, lower: float = 0.01, upper: float = 0.99) -> pd.Series:
|
||||
|
||||
Reference in New Issue
Block a user