Skip to content

Commit

Permalink
V0.9.35 更新一批代码 (#176)
Browse files Browse the repository at this point in the history
* 0.9.35 start coding

* 0.9.35 优化 streamlit 组件

* 0.9.35 update

* 0.9.35 优化 DataClient 缓存目录结构

* 0.9.35 fix bug

* 0.9.35 update

* 0.9.35 data client 支持自动重试机制

* 0.9.35 streamlit 组件默认不缓存

* 0.9.35 新增 get_all_weights 方法用于一次性获取所有

* 0.9.35 fix bug

* 0.9.35 新增 get_all_weights 方便一次性获取全部历史
  • Loading branch information
zengbin93 authored Nov 11, 2023
1 parent 06420db commit 57b25a5
Show file tree
Hide file tree
Showing 8 changed files with 113 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: Python package

on:
push:
branches: [ master, V0.9.34 ]
branches: [ master, V0.9.35 ]
pull_request:
branches: [ master ]

Expand Down
4 changes: 2 additions & 2 deletions czsc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,10 +110,10 @@
feture_cross_layering,
)

__version__ = "0.9.34"
__version__ = "0.9.35"
__author__ = "zengbin93"
__email__ = "[email protected]"
__date__ = "20231022"
__date__ = "20231104"



Expand Down
48 changes: 45 additions & 3 deletions czsc/traders/rwc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
class RedisWeightsClient:
"""策略持仓权重收发客户端"""

version = "V231014"
version = "V231111"

def __init__(self, strategy_name, redis_url, **kwargs):
"""
Expand Down Expand Up @@ -269,10 +269,17 @@ def get_last_weights(self, symbols=None, ignore_zero=True):
dfw['dt'] = pd.to_datetime(dfw['dt'])
if ignore_zero:
dfw = dfw[dfw['weight'] != 0].copy().reset_index(drop=True)
dfw = dfw.sort_values(['dt', 'symbol']).reset_index(drop=True)
return dfw

def get_hist_weights(self, symbol, sdt, edt) -> pd.DataFrame:
"""获取单个品种的持仓权重历史数据"""
"""获取单个品种的持仓权重历史数据
:param symbol: str, 品种代码
:param sdt: str, 开始时间, eg: 20210924 10:19:00
:param edt: str, 结束时间, eg: 20220924 10:19:00
:return: pd.DataFrame
"""
start_score = pd.to_datetime(sdt).strftime('%Y%m%d%H%M%S')
end_score = pd.to_datetime(edt).strftime('%Y%m%d%H%M%S')
model_key = f'{self.key_prefix}:{self.strategy_name}:{symbol}'
Expand All @@ -289,7 +296,7 @@ def get_hist_weights(self, symbol, sdt, edt) -> pd.DataFrame:

weights = []
for i in range(len(key_list)):
dt = pd.to_datetime(key_list[i].split(":")[-1]) # type: ignore
dt = pd.to_datetime(key_list[i].split(":")[-1])
weight, price, ref = rows[i]
weight = weight if weight is None else float(weight)
price = price if price is None else float(price)
Expand All @@ -298,5 +305,40 @@ def get_hist_weights(self, symbol, sdt, edt) -> pd.DataFrame:
except Exception:
ref = ref
weights.append((self.strategy_name, symbol, dt, weight, price, ref))

dfw = pd.DataFrame(weights, columns=['strategy_name', 'symbol', 'dt', 'weight', 'price', 'ref'])
dfw = dfw.sort_values('dt').reset_index(drop=True)
return dfw

def get_all_weights(self, sdt=None, edt=None, ignore_zero=True) -> pd.DataFrame:
"""获取所有权重数据
:param sdt: str, 开始时间, eg: 20210924 10:19:00
:param edt: str, 结束时间, eg: 20220924 10:19:00
:param ignore_zero: boolean, 是否忽略权重为0的品种
:return: pd.DataFrame
"""
keys = self.get_keys(f"{self.key_prefix}:{self.strategy_name}:*:*")
if keys is None or len(keys) == 0: # type: ignore
return pd.DataFrame()

keys = [x for x in keys if len(x.split(":")[-1]) == 14] # type: ignore
with self.r.pipeline() as pipe:
for key in keys:
pipe.hgetall(key)
rows = pipe.execute()
df = pd.DataFrame(rows)
df['dt'] = pd.to_datetime(df['dt'])
df['weight'] = df['weight'].astype(float)
df = df.sort_values(['dt', 'symbol']).reset_index(drop=True)

df1 = pd.pivot_table(df, index='dt', columns='symbol', values='weight').sort_index().ffill().fillna(0)
df1 = pd.melt(df1.reset_index(), id_vars='dt', value_vars=df1.columns, value_name='weight') # type: ignore
if ignore_zero:
df1 = df1[df1['weight'] != 0].reset_index(drop=True)
if sdt:
df1 = df1[df1['dt'] >= pd.to_datetime(sdt)].reset_index(drop=True)
if edt:
df1 = df1[df1['dt'] <= pd.to_datetime(edt)].reset_index(drop=True)
df1 = df1.sort_values(['dt', 'symbol']).reset_index(drop=True)
return df1
24 changes: 17 additions & 7 deletions czsc/utils/data_client.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import os
import shutil
import hashlib
import requests
import pandas as pd
from time import time
from pathlib import Path
from loguru import logger
from functools import partial
from tenacity import retry, stop_after_attempt, wait_fixed


def set_url_token(token, url):
Expand All @@ -32,7 +34,9 @@ def get_url_token(url):


class DataClient:
def __init__(self, token=None, url='http://api.tushare.pro', timeout=30, **kwargs):
__version__ = "V231109"

def __init__(self, token=None, url='http://api.tushare.pro', timeout=300, **kwargs):
"""数据接口客户端,支持缓存,默认缓存路径为 ~/.quant_data_cache;兼容Tushare数据接口
:param token: str API接口TOKEN,用于用户认证
Expand All @@ -47,35 +51,41 @@ def __init__(self, token=None, url='http://api.tushare.pro', timeout=30, **kwarg
self.__token = token or get_url_token(url)
self.__http_url = url
self.__timeout = timeout
self.__url_hash = hashlib.md5(str(url).encode('utf-8')).hexdigest()[:8]
assert self.__token, "请设置czsc_token凭证码,如果没有请联系管理员申请"
self.cache_path = Path(kwargs.get("cache_path", os.path.expanduser("~/.quant_data_cache")))
self.cache_path.mkdir(exist_ok=True, parents=True)
logger.info(f"数据缓存路径:{self.cache_path}")
logger.info(f"数据URL: {url} 数据缓存路径:{self.cache_path}")
if kwargs.get("clear_cache", False):
self.clear_cache()

def clear_cache(self):
"""清空缓存"""
for file in self.cache_path.glob("*.pkl"):
file.unlink()
shutil.rmtree(self.cache_path)
logger.info(f"{self.cache_path} 路径下的数据缓存已清空")

@retry(stop=stop_after_attempt(3), wait=wait_fixed(10), reraise=True)
def post_request(self, api_name, fields='', **kwargs):
"""执行API数据查询
:param api_name: str, 查询接口名称
:param fields: str, 查询字段
:param kwargs: dict, 查询参数
- ttl: int, 缓存有效期,单位秒,-1表示不过期
:return: pd.DataFrame
"""
stime = time()
if api_name in ['__getstate__', '__setstate__']:
return pd.DataFrame()

ttl = int(kwargs.pop("ttl", -1))
req_params = {'api_name': api_name, 'token': self.__token, 'params': kwargs, 'fields': fields}
hash_key = hashlib.md5(str(req_params).encode('utf-8')).hexdigest()
file_cache = self.cache_path / f"{hash_key}.pkl"
if file_cache.exists():
path = self.cache_path / f"{self.__url_hash}_{api_name}"
path.mkdir(exist_ok=True, parents=True)
file_cache = path / f"{hashlib.md5(str(req_params).encode('utf-8')).hexdigest()}.pkl"
if file_cache.exists() and (ttl == -1 or time() - file_cache.stat().st_mtime < ttl):
df = pd.read_pickle(file_cache)
logger.info(f"缓存命中 | API:{api_name};参数:{kwargs};数据量:{df.shape}")
return df
Expand Down
42 changes: 26 additions & 16 deletions czsc/utils/st_components.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def _stats(df_, type_='持有日'):
stats.append(col_stats)
stats = pd.DataFrame(stats).set_index('日收益名称')
fmt_cols = ['年化', '夏普', '最大回撤', '卡玛', '年化波动率', '非零覆盖', '日胜率', '盈亏平衡点']
stats = stats.style.background_gradient(cmap='RdYlGn_r', axis=None).format('{:.4f}', subset=fmt_cols)
stats = stats.style.background_gradient(cmap='RdYlGn_r', axis=None, subset=fmt_cols).format('{:.4f}')
return stats

with st.container():
Expand Down Expand Up @@ -63,7 +63,7 @@ def show_correlation(df, cols=None, method='pearson', **kwargs):
"""
cols = cols or df.columns.to_list()
dfr = df[cols].corr(method=method)
dfr['total'] = dfr.sum(axis=1) - 1
dfr['average'] = (dfr.sum(axis=1) - 1) / (len(cols) - 1)
dfr = dfr.style.background_gradient(cmap='RdYlGn_r', axis=None).format('{:.4f}', na_rep='MISS')
st.dataframe(dfr, use_container_width=kwargs.get("use_container_width", True))

Expand Down Expand Up @@ -144,24 +144,32 @@ def show_factor_layering(df, x_col, y_col='n1b', **kwargs):
if df[y_col].max() > 100: # 收益率单位为BP, 转换为万分之一
df[y_col] = df[y_col] / 10000

def _layering(x):
return pd.qcut(x, q=n, labels=False, duplicates='drop')
df[f'{x_col}分层'] = df.groupby('dt')[x_col].transform(_layering)
df = czsc.feture_cross_layering(df, x_col, n=n)

mr = df.groupby(["dt", f'{x_col}分层'])[y_col].mean().reset_index()
mrr = mr.pivot(index='dt', columns=f'{x_col}分层', values=y_col).fillna(0)
mrr.columns = [f'第{str(i).zfill(2)}层' for i in range(1, n + 1)]

tabs = st.tabs(["分层收益率", "多空组合"])
with tabs[0]:
show_daily_return(mrr)
czsc.show_daily_return(mrr)

with tabs[1]:
long = kwargs.get("long", f"第{n}层")
short = kwargs.get("short", "第01层")
st.write(f"多头:{long},空头:{short}")
mrr['多空组合'] = (mrr[long] - mrr[short]) / 2
show_daily_return(mrr[['多空组合']])
layering_cols = mrr.columns.to_list()
with st.form(key="factor_form"):
col1, col2 = st.columns(2)
long = col1.multiselect("多头组合", layering_cols, default=[], key="factor_long")
short = col2.multiselect("空头组合", layering_cols, default=[], key="factor_short")
submit = st.form_submit_button("多空组合快速测试")

if not submit:
st.warning("请设置多空组合")
st.stop()

dfr = mrr.copy()
dfr['多头'] = dfr[long].mean(axis=1)
dfr['空头'] = -dfr[short].mean(axis=1)
dfr['多空'] = (dfr['多头'] + dfr['空头']) / 2
czsc.show_daily_return(dfr[['多头', '空头', '多空']])


def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
Expand Down Expand Up @@ -194,7 +202,7 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
if f'{x_col}分层' not in df.columns:
# 如果因子分层列不存在,先计算因子分层
if df[x_col].nunique() > n:
czsc.normlize_ts_feature(df, x_col, n=n)
czsc.normalize_ts_feature(df, x_col, n=n)
else:
# 如果因子值的取值数量小于分层数量,直接使用因子独立值排序作为分层
x_rank = sorted(df[x_col].unique())
Expand Down Expand Up @@ -225,7 +233,6 @@ def show_symbol_factor_layering(df, x_col, y_col='n1b', **kwargs):
show_daily_return(dfr[['多头', '空头', '多空']])


@st.cache_data(ttl=3600 * 24)
def show_weight_backtest(dfw, **kwargs):
"""展示权重回测结果
Expand All @@ -246,13 +253,15 @@ def show_weight_backtest(dfw, **kwargs):
- fee: 单边手续费,单位为BP,默认为2BP
"""
fee = kwargs.get("fee", 2)
digits = kwargs.get("digits", 2)
if (dfw.isnull().sum().sum() > 0) or (dfw.isna().sum().sum() > 0):
st.warning("数据中存在空值,请检查数据后再试")
st.warning("show_weight_backtest :: 持仓权重数据中存在空值,请检查数据后再试;空值数据如下:")
st.dataframe(dfw[dfw.isnull().sum(axis=1) > 0], use_container_width=True)
st.stop()

from czsc.traders.weight_backtest import WeightBacktest

wb = WeightBacktest(dfw, fee=fee / 10000)
wb = WeightBacktest(dfw, fee_rate=fee / 10000, digits=digits)
stat = wb.results['绩效评价']

st.divider()
Expand All @@ -270,3 +279,4 @@ def show_weight_backtest(dfw, **kwargs):
dret = wb.results['品种等权日收益']
dret.index = pd.to_datetime(dret.index)
show_daily_return(dret, legend_only_cols=dfw['symbol'].unique().tolist())
return wb
12 changes: 10 additions & 2 deletions czsc/utils/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,24 @@ def daily_performance(daily_returns):

if len(daily_returns) == 0 or np.std(daily_returns) == 0 or all(x == 0 for x in daily_returns):
return {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0,
"年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0}
"年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, "最大新高时间": 0}

annual_returns = np.sum(daily_returns) / len(daily_returns) * 252
sharpe_ratio = np.mean(daily_returns) / np.std(daily_returns) * np.sqrt(252)
cum_returns = np.cumsum(daily_returns)
max_drawdown = np.max(np.maximum.accumulate(cum_returns) - cum_returns)
dd = np.maximum.accumulate(cum_returns) - cum_returns
max_drawdown = np.max(dd)
kama = annual_returns / max_drawdown if max_drawdown != 0 else 10
win_pct = len(daily_returns[daily_returns > 0]) / len(daily_returns)
annual_volatility = np.std(daily_returns) * np.sqrt(252)
none_zero_cover = len(daily_returns[daily_returns != 0]) / len(daily_returns)

# 计算最大新高时间
high_index = [i for i, x in enumerate(dd) if x == 0]
max_interval = 0
for i in range(len(high_index) - 1):
max_interval = max(max_interval, high_index[i + 1] - high_index[i])

sta = {
"年化": round(annual_returns, 4),
"夏普": round(sharpe_ratio, 2),
Expand All @@ -76,6 +83,7 @@ def daily_performance(daily_returns):
"年化波动率": round(annual_volatility, 4),
"非零覆盖": round(none_zero_cover, 4),
"盈亏平衡点": round(cal_break_even_point(daily_returns), 4),
"最大新高时间": max_interval,
}
return sta

Expand Down
4 changes: 2 additions & 2 deletions examples/test_offline/test_data_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

def test_tushare_pro():
# czsc.set_url_token("******", url="http://api.tushare.pro")
dc = czsc.DataClient(url="http://api.tushare.pro", cache_path="tushare_data")
df = dc.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')
dc = czsc.DataClient(url="http://api.tushare.pro")
df = dc.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date', ttl=5)
try:
df = dc.stock_basic_1(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')
except Exception as e:
Expand Down
15 changes: 10 additions & 5 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,25 @@ def test_daily_performance():

# Test case 1: empty daily returns
result = daily_performance([])
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0}
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0,
"年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, '最大新高时间': 0}

# Test case 2: daily returns with zero standard deviation
result = daily_performance([1, 1, 1, 1, 1])
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0}
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0,
"年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, '最大新高时间': 0}

# Test case 3: daily returns with all zeros
result = daily_performance([0, 0, 0, 0, 0])
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0, "年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0}
assert result == {"年化": 0, "夏普": 0, "最大回撤": 0, "卡玛": 0, "日胜率": 0,
"年化波动率": 0, "非零覆盖": 0, "盈亏平衡点": 0, '最大新高时间': 0}

# Test case 4: normal daily returns
daily_returns = np.array([0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01])
result = daily_performance(daily_returns)
assert result == {'年化': 2.016, '夏普': 8.27, '最大回撤': 0.02, '卡玛': 100.8, '日胜率': 0.7, '年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7}
assert result == {'年化': 2.016, '夏普': 8.27, '最大回撤': 0.02, '卡玛': 100.8, '日胜率': 0.7,
'年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7, '最大新高时间': 4}

result = daily_performance([0.01, 0.02, -0.01, 0.03, 0.02, -0.02, 0.01, -0.01, 0.02, 0.01])
assert result == {'年化': 2.016, '夏普': 8.27, '最大回撤': 0.02, '卡玛': 100.8, '日胜率': 0.7, '年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7}
assert result == {'年化': 2.016, '夏普': 8.27, '最大回撤': 0.02, '卡玛': 100.8, '日胜率': 0.7,
'年化波动率': 0.2439, '非零覆盖': 1.0, '盈亏平衡点': 0.7, '最大新高时间': 4}

0 comments on commit 57b25a5

Please sign in to comment.