From 0bb65c7b6bd558b68fcca1cc6c2c95e743284874 Mon Sep 17 00:00:00 2001 From: zengbin93 Date: Sat, 20 Jan 2024 17:24:45 +0800 Subject: [PATCH] =?UTF-8?q?0.9.41=20=E6=96=B0=E5=A2=9E=E8=87=AA=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E5=88=86=E6=9E=90=E7=BB=84=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 1 + czsc/__init__.py | 2 + czsc/utils/st_components.py | 157 ++++++++++++++++++++++++++++++++++-- docs/requirements.txt | 3 +- requirements.txt | 3 +- 5 files changed, 156 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 5b0079112..799014a87 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ * 已经开始用czsc库进行量化研究的朋友,欢迎[加入飞书群](https://applink.feishu.cn/client/chat/chatter/add_by_link?link_token=0bak668e-7617-452c-b935-94d2c209e6cf),快点击加入吧! * [B站视频教程合集(持续更新...)](https://space.bilibili.com/243682308/channel/series) +* [CZSC小圈子](https://s0cqcxuy3p.feishu.cn/wiki/wikcnwXSk9mWnki1b6URPhLA2Hc) * [CZSC代码库QA](https://zbczsc.streamlit.app/) diff --git a/czsc/__init__.py b/czsc/__init__.py index 83f486e59..204cd49aa 100644 --- a/czsc/__init__.py +++ b/czsc/__init__.py @@ -106,6 +106,8 @@ show_factor_layering, show_symbol_factor_layering, show_weight_backtest, + show_ts_rolling_corr, + show_ts_self_corr, ) from czsc.utils.bi_info import ( diff --git a/czsc/utils/st_components.py b/czsc/utils/st_components.py index f40c36a98..f90bb0825 100644 --- a/czsc/utils/st_components.py +++ b/czsc/utils/st_components.py @@ -1,8 +1,11 @@ import czsc +import hashlib import numpy as np import pandas as pd import streamlit as st import plotly.express as px +import statsmodels.api as sm +import plotly.graph_objects as go from sklearn.linear_model import LinearRegression @@ -73,7 +76,13 @@ def _stats(df_, type_='持有日'): def show_monthly_return(df, ret_col='total', title="月度累计收益", **kwargs): - """展示指定列的月度累计收益""" + """展示指定列的月度累计收益 + + :param df: pd.DataFrame,数据源 + :param ret_col: str,收益列名 + :param title: str,标题 + :param kwargs: + """ if not df.index.dtype == 'datetime64[ns]': df['dt'] = pd.to_datetime(df['dt']) df.set_index('dt', inplace=True) @@ -82,7 +91,9 @@ def show_monthly_return(df, ret_col='total', title="月度累计收益", **kwarg df = df.copy().fillna(0) df.sort_index(inplace=True, ascending=True) - st.subheader(title, divider="rainbow") + if title: + st.subheader(title, divider="rainbow") + monthly = df[[ret_col]].resample('M').sum() monthly['year'] = monthly.index.year monthly['month'] = monthly.index.month @@ -135,8 +146,10 @@ def show_sectional_ic(df, x_col, y_col, method='pearson', **kwargs): dfm = pd.pivot_table(dfm, index='year', columns='month', values='ic') col4.write("月度IC分析结果:") - col4.dataframe(dfm.style.background_gradient(cmap='RdYlGn_r', axis=None).format('{:.4f}', na_rep='MISS'), - use_container_width=True) + col4.dataframe( + dfm.style.background_gradient(cmap='RdYlGn_r', axis=None).format('{:.4f}', na_rep='MISS'), + use_container_width=True, + ) if kwargs.get("show_factor_histgram", False): fig = px.histogram(df, x=x_col, marginal="box", title="因子数据分布图") @@ -186,7 +199,7 @@ def show_factor_layering(df, x_col, y_col='n1b', **kwargs): """ n = kwargs.get("n", 10) - if df[y_col].max() > 100: # 收益率单位为BP, 转换为万分之一 + if df[y_col].max() > 100: # 收益率单位为BP, 转换为万分之一 df[y_col] = df[y_col] / 10000 df = czsc.feture_cross_layering(df, x_col, n=n) @@ -241,7 +254,7 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs): """ df = df.copy() n = kwargs.get("n", 10) - if df[y_col].max() > 100: # 如果收益率单位为BP, 转换为万分之一 + if df[y_col].max() > 100: # 如果收益率单位为BP, 转换为万分之一 df[y_col] = df[y_col] / 10000 if f'{x_col}分层' not in df.columns: @@ -389,6 +402,134 @@ def show_splited_daily(df, ret_col, **kwargs): dfv = dfv.background_gradient(cmap='RdYlGn', subset=['盈亏平衡点']) dfv = dfv.background_gradient(cmap='RdYlGn_r', subset=['日胜率']) dfv = dfv.background_gradient(cmap='RdYlGn_r', subset=['非零覆盖']) - dfv = dfv.format({'盈亏平衡点': '{:.2f}', '年化波动率': '{:.2%}', '最大回撤': '{:.2%}', '卡玛': '{:.2f}', '年化': '{:.2%}', - '夏普': '{:.2f}', '非零覆盖': '{:.2%}', '日胜率': '{:.2%}', '绝对收益': '{:.2%}'}) + dfv = dfv.format( + { + '盈亏平衡点': '{:.2f}', + '年化波动率': '{:.2%}', + '最大回撤': '{:.2%}', + '卡玛': '{:.2f}', + '年化': '{:.2%}', + '夏普': '{:.2f}', + '非零覆盖': '{:.2%}', + '日胜率': '{:.2%}', + '绝对收益': '{:.2%}', + } + ) st.dataframe(dfv, use_container_width=True) + + +def show_ts_rolling_corr(df, col1, col2, **kwargs): + """时序上按 rolling 的方式计算相关系数 + + :param df: pd.DataFrame, 必须包含列 dt 和 col1, col2 + :param col1: str, df 中的列名 + :param col2: str, df 中的列名 + :param kwargs: + + - min_periods: int, 最小滑动窗口长度 + - window: int, 滑动窗口长度,0 表示按 expanding 方式滑动 + - corr_method: str, 相关系数计算方法,可选 pearson, kendall, spearman + - sub_title: str, 子标题 + """ + if col1 not in df.columns or col2 not in df.columns: + st.error(f"列 {col1} 或 {col2} 不存在,请重新输入") + return + + if not isinstance(df.index, pd.DatetimeIndex): + df['dt'] = pd.to_datetime(df['dt']) + df = df.set_index('dt') + + df = df[[col1, col2]].copy() + if df.isnull().sum().sum() > 0: + st.dataframe(df[df.isnull().sum(axis=1) > 0]) + st.error(f"列 {col1} 或 {col2} 中存在缺失值,请先处理缺失值") + return + + sub_title = kwargs.get('sub_title', None) + if sub_title: + st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode('utf-8')).hexdigest()[:8]) + + min_periods = kwargs.get('min_periods', None) + window = kwargs.get('window', None) + corr_method = kwargs.get('corr_method', 'pearson') + + if not window or window <= 0: + method = 'expanding' + corr_result = df[col1].expanding(min_periods=min_periods).corr(df[col2], pairwise=True) + else: + method = 'rolling' + corr_result = df[col1].rolling(window=window, min_periods=min_periods).corr(df[col2], pairwise=True) + + corr_result = corr_result.dropna() + corr_result = corr_result.rename('corr') + line = go.Scatter(x=corr_result.index, y=corr_result, mode='lines', name='corr') + layout = go.Layout( + title=f'滑动({method})相关系数', + xaxis=dict(title=''), + yaxis=dict(title='corr'), + annotations=[ + dict( + x=0.0, + y=1.05, + showarrow=False, + xref="paper", + yref="paper", + font=dict(size=12), + text=f"滑动窗口长度:{window},最小滑动窗口长度:{min_periods},相关系数计算方法:{corr_method}", + ) + ], + ) + fig = go.Figure(data=[line], layout=layout) + st.plotly_chart(fig, use_container_width=True) + + +def show_ts_self_corr(df, col, **kwargs): + """展示时序上单因子的自相关性分析结果,贡献者:guo + + :param df: pd.DataFrame, 必须包含列 dt 和 col + :param col: str, df 中的列名 + """ + if not isinstance(df.index, pd.DatetimeIndex): + df['dt'] = pd.to_datetime(df['dt']) + df = df.set_index('dt') + df = df.sort_index(ascending=True) + + if df[col].isnull().sum() > 0: + st.dataframe(df[df[col].isnull()]) + st.error(f"列 {col} 中存在缺失值,请先处理缺失值") + return + + col1, col2 = st.columns(2) + + with col1: + sub_title = f"自相关系数分析({col})" + st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode('utf-8')).hexdigest()[:8]) + c1, c2, c3 = st.columns([2, 2, 1]) + nlags = int(c1.number_input('最大滞后阶数', value=20, min_value=1, max_value=100, step=1)) + method = c2.selectbox('选择分析方法', ['acf', 'pacf'], index=0) + + if method == 'acf': + acf_result, conf_int = sm.tsa.acf(df[[col]].copy(), nlags=nlags, alpha=0.05, missing='raise') + else: + acf_result, conf_int = sm.tsa.pacf(df[[col]].copy(), nlags=nlags, alpha=0.05) + + bar = go.Bar(x=list(range(len(acf_result))), y=acf_result, name='自相关系数') + upper = go.Scatter(x=list(range(len(acf_result))), y=conf_int[:, 1], mode='lines', name='95%置信区间上界') + lower = go.Scatter(x=list(range(len(acf_result))), y=conf_int[:, 0], mode='lines', name='95%置信区间下界') + layout = go.Layout(title=method.upper(), xaxis=dict(title='滞后阶数'), yaxis=dict(title='自相关系数')) + fig = go.Figure(data=[bar, upper, lower], layout=layout) + st.plotly_chart(fig, use_container_width=True) + + with col2: + sub_title = f"滞后N阶滑动相关性({col})" + st.subheader(sub_title, divider="rainbow", anchor=hashlib.md5(sub_title.encode('utf-8')).hexdigest()[:8]) + c1, c2, c3, c4 = st.columns(4) + min_periods = int(c1.number_input('最小滑动窗口长度', value=20, min_value=0, step=1)) + window = int(c2.number_input('滑动窗口长度', value=0, step=1, help='0 表示按 expanding 方式滑动')) + corr_method = c3.selectbox('相关系数计算方法', ['pearson', 'kendall', 'spearman']) + n = int(c4.number_input('自相关滞后阶数', value=1, min_value=1, step=1)) + + df[f"{col}_lag{n}"] = df[col].shift(-n) + df.dropna(subset=[f"{col}_lag{n}"], inplace=True) + + show_ts_rolling_corr(df, col, f"{col}_lag{n}", min_periods=min_periods, window=window, corr_method=corr_method) diff --git a/docs/requirements.txt b/docs/requirements.txt index eea0f073e..88d3f6e1d 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -28,4 +28,5 @@ parse>=1.19.0 lightgbm>=4.0.0 streamlit redis -oss2 \ No newline at end of file +oss2 +statsmodels \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f999f0c79..10fbcd5c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,5 @@ parse>=1.19.0 lightgbm>=4.0.0 streamlit redis -oss2 \ No newline at end of file +oss2 +statsmodels \ No newline at end of file