Skip to content

Commit

Permalink
0.9.42 disk cache 新增源码变动识别和支持文件类型
Browse files Browse the repository at this point in the history
  • Loading branch information
zengbin93 committed Jan 28, 2024
1 parent a80f107 commit b3f9987
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 4 deletions.
22 changes: 18 additions & 4 deletions czsc/utils/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import os
import time
import dill
import json
import shutil
import hashlib
import json
import inspect
import pandas as pd
from pathlib import Path
from loguru import logger
Expand Down Expand Up @@ -94,6 +95,10 @@ def get(self, k: str, suffix: str = "pkl") -> Any:
res = pd.read_csv(file, encoding='utf-8')
elif suffix == "xlsx":
res = pd.read_excel(file)
elif suffix == "feather":
res = pd.read_feather(file)
elif suffix == "parquet":
res = pd.read_parquet(file)
else:
raise ValueError(f"suffix {suffix} not supported")
return res
Expand Down Expand Up @@ -132,6 +137,16 @@ def set(self, k: str, v: Any, suffix: str = "pkl"):
raise ValueError("suffix xlsx only support pd.DataFrame")
v.to_excel(file, index=False)

elif suffix == "feather":
if not isinstance(v, pd.DataFrame):
raise ValueError("suffix feather only support pd.DataFrame")
v.to_feather(file)

elif suffix == "parquet":
if not isinstance(v, pd.DataFrame):
raise ValueError("suffix parquet only support pd.DataFrame")
v.to_parquet(file)

else:
raise ValueError(f"suffix {suffix} not supported")

Expand All @@ -150,15 +165,14 @@ def disk_cache(path: str, suffix: str = "pkl", ttl: int = -1):
:param suffix: 缓存文件后缀,支持 pkl, json, txt, csv, xlsx
:param ttl: 缓存文件有效期,单位:秒
"""
assert suffix in ["pkl", "json", "txt", "csv", "xlsx"], "suffix not supported"

def decorator(func):
nonlocal path
_c = DiskCache(path=Path(path) / func.__name__)

def cached_func(*args, **kwargs):
hash_str = f"{func.__name__}{args}{kwargs}"
k = hashlib.md5(hash_str.encode('utf-8')).hexdigest().upper()[:8]
code_str = inspect.getsource(func)
k = hashlib.md5((code_str + hash_str).encode('utf-8')).hexdigest().upper()[:8]
k = f"{k}_{func.__name__}"

if _c.is_found(k, suffix=suffix, ttl=ttl):
Expand Down
40 changes: 40 additions & 0 deletions test/test_utils_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,34 @@ def run_func_x(x):
return x * 2


@disk_cache(path=temp_path, suffix="txt", ttl=100)
def run_func_text(x):
return f"hello {x}"


@disk_cache(path=temp_path, suffix="json", ttl=100)
def run_func_json(x):
return {"a": 1, "b": 2, "x": x}


@disk_cache(path=temp_path, suffix="xlsx", ttl=100)
def run_func_y(x):
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'x': [x, x, x]})
return df


@disk_cache(path=temp_path, suffix="feather", ttl=100)
def run_feather(x):
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'x': [x, x, x]})
return df


@disk_cache(path=temp_path, suffix="parquet", ttl=100)
def run_parquet(x):
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6], 'x': [x, x, x]})
return df


def test_disk_cache():
# Call the function
result = run_func_x(5)
Expand All @@ -31,6 +53,24 @@ def test_disk_cache():
# Check if the output is still correct
assert result == 10

# Call the function with a different argument
result = run_func_text(6)
result = run_func_text(6)
assert result == "hello 6"

# Call the function with a different argument
result = run_func_json(7)
result = run_func_json(7)
assert result == {"a": 1, "b": 2, "x": 7}

result = run_feather(8)
result = run_feather(8)
assert isinstance(result, pd.DataFrame)

result = run_parquet(9)
result = run_parquet(9)
assert isinstance(result, pd.DataFrame)

# Check if the cache file exists
files = os.listdir(os.path.join(temp_path, "run_func_x"))
assert len(files) == 1
Expand Down

0 comments on commit b3f9987

Please sign in to comment.