diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 49d55bf9f..01aaba7ba 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -28,8 +28,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -e . - pip install -r test_requirements.txt + pip install -e ".[test]" - name: Test with pytest run: | diff --git a/README.md b/README.md index 29854890b..1d99640cb 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ The following collection shows Milvus versions and recommended PyMilvus versions | 2.1.\* | 2.1.3 | | 2.2.\* | 2.2.15 | | 2.3.\* | 2.3.7 | +| 2.4.\* | 2.4.0 | ## Installation @@ -35,6 +36,7 @@ You can install PyMilvus via `pip` or `pip3` for Python 3.8+: ```shell $ pip3 install pymilvus +$ pip3 install pymilvus[model] # for milvus-model ``` You can install a specific version of PyMilvus by: @@ -83,15 +85,28 @@ make lint Q5. How to fix the coding styles? -Q5 +A5 ```shell make format ``` +Q6. How to run unittests? + +A6 +```shell +$ pip install ".[test]" +$ make unittest +``` +Q7. `zsh: no matches found: pymilvus[model]` in mac, how do I solve this? + +A7 +```shell +$ pip install "pymilvus[model]" +``` ## Documentation -Documentation is available online: https://milvus.io/api-reference/pymilvus/v2.3.x/About.md +Documentation is available online: https://milvus.io/api-reference/pymilvus/v2.4.x/About.md ## Developing package releases diff --git a/examples/bfloat16_example.py b/examples/bfloat16_example.py index 47fc1040a..162dd2139 100644 --- a/examples/bfloat16_example.py +++ b/examples/bfloat16_example.py @@ -69,4 +69,4 @@ def bf16_vector_search(): hello_milvus.drop() if __name__ == "__main__": - bf16_vector_search() \ No newline at end of file + bf16_vector_search() diff --git a/examples/collection.py b/examples/collection.py index b3a7f9455..217bd0a4c 100644 --- a/examples/collection.py +++ b/examples/collection.py @@ -14,7 +14,8 @@ import random import numpy as np -from sklearn import preprocessing +import pandas + import string from pymilvus.orm import db @@ -100,30 +101,24 @@ def gen_binary_schema(): return default_schema -def gen_float_vectors(num, dim, is_normal=True): - vectors = [[random.random() for _ in range(dim)] for _ in range(num)] - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - return vectors.tolist() +def gen_float_vectors(num, dim): + return [[random.random() for _ in range(dim)] for _ in range(num)] -def gen_float_data(nb, is_normal=False): - vectors = gen_float_vectors(nb, default_dim, is_normal) +def gen_float_data(nb): entities = [ [i for i in range(nb)], [float(i) for i in range(nb)], - vectors + gen_float_vectors(nb, default_dim), ] return entities -def gen_dataframe(nb, is_normal=False): - import pandas - import numpy - - vectors = gen_float_vectors(nb, default_dim, is_normal) +def gen_dataframe(nb): + vectors = gen_float_vectors(nb, default_dim) data = { "int64": [i for i in range(nb)], - "float": numpy.array([i for i in range(nb)], dtype=numpy.float32), + "float": np.array([i for i in range(nb)], dtype=np.float32), "float_vector": vectors } diff --git a/examples/partition.py b/examples/partition.py index 111ce332d..3c6bb30ce 100644 --- a/examples/partition.py +++ b/examples/partition.py @@ -16,7 +16,6 @@ ) import random -from sklearn import preprocessing import string default_dim = 128 @@ -72,18 +71,11 @@ def gen_default_fields(auto_id=True): return default_schema -def gen_vectors(num, dim, is_normal=True): - vectors = [[random.random() for _ in range(dim)] for _ in range(num)] - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - return vectors.tolist() - - -def gen_data(nb, is_normal=False): - vectors = gen_vectors(nb, default_dim, is_normal) +def gen_data(nb): entities = [ [i for i in range(nb)], [float(i) for i in range(nb)], - vectors + [[random.random() for _ in range(dim)] for _ in range(num)], ] return entities diff --git a/examples/role_and_privilege.py b/examples/role_and_privilege.py index c16f4d695..3fb8af49e 100644 --- a/examples/role_and_privilege.py +++ b/examples/role_and_privilege.py @@ -2,7 +2,6 @@ from pymilvus.orm.role import Role import random -from sklearn import preprocessing _CONNECTION = "demo" _FOO_CONNECTION = "foo_connection" @@ -79,18 +78,15 @@ def has_collection(collection_name, connection=_CONNECTION): default_nb = 1000 -def gen_float_vectors(num, dim, is_normal=True): - vectors = [[random.random() for _ in range(dim)] for _ in range(num)] - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - return vectors.tolist() +def gen_float_vectors(num, dim): + return [[random.random() for _ in range(dim)] for _ in range(num)] -def gen_float_data(nb, is_normal=False): - vectors = gen_float_vectors(nb, default_dim, is_normal) +def gen_float_data(nb): entities = [ [i for i in range(nb)], [float(i) for i in range(nb)], - vectors + gen_float_vectors(nb, default_dim), ] return entities diff --git a/pyproject.toml b/pyproject.toml index 3a540c8d2..697ab28f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies=[ "minio>=7.0.0", "pyarrow>=12.0.0", "azure-storage-blob", + "scipy", ] classifiers=[ @@ -45,6 +46,15 @@ model = [ "milvus-model>=0.1.0", ] +test = [ + "pytest>=5.3.4", + "pytest-cov>=2.8.1", + "pytest-timeout>=1.3.4", + "grpcio-testing", + "ruff>=0.3.3", + "black", +] + [tool.setuptools.dynamic] version = { attr = "_version_helper.version"} @@ -52,7 +62,7 @@ version = { attr = "_version_helper.version"} [tool.black] line-length = 100 -target-version = ['py37'] +target-version = ['py38'] include = '\.pyi?$' extend-ignore = ["E203", "E501"] # 'extend-exclude' excludes files or directories in addition to the defaults diff --git a/requirements.txt b/requirements.txt index 1cecf2ea6..84648e5fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,7 +14,6 @@ six==1.16.0 toml==0.10.2 ujson>=2.0.0 urllib3==1.26.18 -sklearn==0.0 m2r==0.3.1 scipy>=1.9.3 Sphinx==4.0.0 diff --git a/test_requirements.txt b/test_requirements.txt deleted file mode 100644 index 870364987..000000000 --- a/test_requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -pytest>=5.3.4 -pytest-cov>=2.8.1 -pytest-timeout>=1.3.4 -grpcio-testing==1.60.0 -sklearn==0.0 -ruff>=0.2.0 -black -tensorflow;python_version <'3.12' -# for python3.12 support -tf-nightly;python_version == '3.12' diff --git a/tests/test_connections.py b/tests/test_connections.py index fc5b23975..0e9c94453 100644 --- a/tests/test_connections.py +++ b/tests/test_connections.py @@ -3,6 +3,7 @@ import pytest import pymilvus +from pymilvus import * from unittest import mock from pymilvus import connections diff --git a/tests/test_types.py b/tests/test_types.py index fbda5b6b5..1fe685fe6 100644 --- a/tests/test_types.py +++ b/tests/test_types.py @@ -13,7 +13,14 @@ from pymilvus.client.constants import DEFAULT_RESOURCE_GROUP from pymilvus.exceptions import InvalidConsistencyLevel from pymilvus.client.types import ( - get_consistency_level, Shard, Group, Replica, ConsistencyLevel + get_consistency_level, + Shard, + Group, + Replica, + ConsistencyLevel, +) +from pymilvus.orm.types import ( + infer_dtype_bydata, ) from pymilvus.grpc_gen import common_pb2 @@ -21,95 +28,30 @@ import pytest import pandas as pd import numpy as np -import tensorflow as tf +# from ml_dtypes import bfloat16 -@pytest.mark.xfail +@pytest.mark.skip("please fix me") class TestTypes: - def test_map_numpy_dtype_to_datatype(self): - data1 = { - 'double': [2.0], - 'float32': [np.float32(1.0)], - 'double2': [np.float64(1.0)], - 'int8': [np.int8(1)], - 'int16': [2], - 'int32': [4], - 'int64': [8], - 'bool': [True], - 'float_vec': [np.array([1.1, 1.2])], - } - - df = pd.DataFrame(data1) - - wants1 = [ - DataType.DOUBLE, - DataType.DOUBLE, - DataType.DOUBLE, - DataType.INT64, - DataType.INT64, - DataType.INT64, - DataType.INT64, - DataType.BOOL, - DataType.UNKNOWN, - ] - - ret1 = [map_numpy_dtype_to_datatype(x) for x in df.dtypes] - assert ret1 == wants1 - - df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'], - dtype=np.int8) - assert DataType.INT8 == map_numpy_dtype_to_datatype(df2.dtypes[0]) - - df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'], - dtype=np.int16) - assert DataType.INT16 == map_numpy_dtype_to_datatype(df2.dtypes[0]) - - df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'], - dtype=np.int32) - assert DataType.INT32 == map_numpy_dtype_to_datatype(df2.dtypes[0]) - - df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'], - dtype=np.int64) - assert DataType.INT64 == map_numpy_dtype_to_datatype(df2.dtypes[0]) - - def test_infer_dtype_bydata(self): - data1 = [ - [1], - [True], - [1.0, 2.0], - ["abc"], - bytes("abc", encoding='ascii'), - 1, - True, - "abc", - np.int8(1), - np.int16(1), - [np.int8(1)], - [np.float16(1.0)], - [tf.bfloat16(1.0)] - ] - - wants = [ - DataType.FLOAT_VECTOR, - DataType.UNKNOWN, - DataType.FLOAT_VECTOR, - DataType.UNKNOWN, - DataType.BINARY_VECTOR, - DataType.INT64, - DataType.BOOL, - DataType.STRING, - DataType.INT8, - DataType.INT16, - DataType.FLOAT_VECTOR, - DataType.FLOAT16_VECTOR, - DataType.BFLOAT16_VECTOR, - ] - - actual = [] - for d in data1: - actual.append(infer_dtype_bydata(d)) - - assert actual == wants + @pytest.mark.parametrize("input_expect", [ + ([1], DataType.FLOAT_VECTOR), + ([True], DataType.UNKNOWN), + ([1.0, 2.0], DataType.FLOAT_VECTOR), + (["abc"], DataType.UNKNOWN), + (bytes("abc", encoding='ascii'), DataType.BINARY_VECTOR), + (1, DataType.INT64), + (True, DataType.BOOL), + ("abc", DataType.VARCHAR), + (np.int8(1), DataType.INT8), + (np.int16(1), DataType.INT16), + ([np.int8(1)], DataType.FLOAT_VECTOR), + ([np.float16(1.0)], DataType.FLOAT16_VECTOR), + # ([np.array([1, 1], dtype=bfloat16)], DataType.BFLOAT16_VECTOR), + ]) + def test_infer_dtype_bydata(self, input_expect): + data, expect = input_expect + got = infer_dtype_bydata(data) + assert got == expect class TestConsistencyLevel: diff --git a/tests/utils.py b/tests/utils.py index ec2279193..93d4db724 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,6 +1,6 @@ import random import pandas -from sklearn import preprocessing +import numpy as np from pymilvus import DataType @@ -69,10 +69,8 @@ def gen_schema(): return collection_schema -def gen_vectors(num, dim, is_normal=True): - vectors = [[random.random() for _ in range(dim)] for _ in range(num)] - vectors = preprocessing.normalize(vectors, axis=1, norm='l2') - return vectors.tolist() +def gen_vectors(num, dim): + return [[random.random() for _ in range(dim)] for _ in range(num)] def gen_int_attr(row_num): @@ -80,12 +78,11 @@ def gen_int_attr(row_num): # pandas.DataFrame -def gen_pd_data(nb, is_normal=False): - import numpy - vectors = gen_vectors(nb, default_dim, is_normal) +def gen_pd_data(nb): + vectors = gen_vectors(nb, default_dim) datas = { "int64": [i for i in range(nb)], - "float": numpy.array([i for i in range(nb)], dtype=numpy.float32), + "float": np.array([i for i in range(nb)], dtype=np.float32), default_float_vec_field_name: vectors } data = pandas.DataFrame(datas) @@ -93,8 +90,8 @@ def gen_pd_data(nb, is_normal=False): # list or tuple data -def gen_list_data(nb, is_normal=False): - vectors = gen_vectors(nb, default_dim, is_normal) +def gen_list_data(nb): + vectors = gen_vectors(nb, default_dim) datas = [[i for i in range(nb)], [float(i) for i in range(nb)], vectors] return datas