enhance: tidy dependencies in tests (#1996)

1. Remove dependency of tensorflow, the tests are skipped anyway 2. Remove dependency of sklearn, legacy codes 3. Add pymilvus[test] to minimize the test env, remote test_requirements.txt 4. Update readme to the latest condition --------- Signed-off-by: yangxuan <[email protected]>
milvus-io · Mar 22, 2024 · b9a10c9 · b9a10c9
1 parent c6c162e
commit b9a10c9
Show file tree

Hide file tree

Showing 12 changed files with 83 additions and 147 deletions.
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
@@ -28,8 +28,7 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e .
-          pip install -r test_requirements.txt
+          pip install -e ".[test]"
 
       - name: Test with pytest
         run: |

diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@ The following collection shows Milvus versions and recommended PyMilvus versions
 | 2.1.\* | 2.1.3 |
 | 2.2.\* | 2.2.15 |
 | 2.3.\* | 2.3.7 |
+| 2.4.\* | 2.4.0 |
 
 
 ## Installation
@@ -35,6 +36,7 @@ You can install PyMilvus via `pip` or `pip3` for Python 3.8+:
 
 ```shell
 $ pip3 install pymilvus
+$ pip3 install pymilvus[model] # for milvus-model
 ```
 
 You can install a specific version of PyMilvus by:
@@ -83,15 +85,28 @@ make lint
 
 Q5. How to fix the coding styles?
 
-Q5
+A5
 ```shell
 make format
 ```
 
+Q6. How to run unittests?
+
+A6
+```shell
+$ pip install ".[test]"
+$ make unittest
+```
+Q7. `zsh: no matches found: pymilvus[model]` in mac, how do I solve this?
+
+A7
+```shell
+$ pip install "pymilvus[model]"
+```
 
 ## Documentation
 
-Documentation is available online: https://milvus.io/api-reference/pymilvus/v2.3.x/About.md
+Documentation is available online: https://milvus.io/api-reference/pymilvus/v2.4.x/About.md
 
 ## Developing package releases
 

diff --git a/examples/bfloat16_example.py b/examples/bfloat16_example.py
@@ -69,4 +69,4 @@ def bf16_vector_search():
     hello_milvus.drop()
 
 if __name__ == "__main__":
-    bf16_vector_search()
+    bf16_vector_search()
diff --git a/examples/collection.py b/examples/collection.py
@@ -14,7 +14,8 @@
 
 import random
 import numpy as np
-from sklearn import preprocessing
+import pandas
+
 import string
 
 from pymilvus.orm import db
@@ -100,30 +101,24 @@ def gen_binary_schema():
     return default_schema
 
 
-def gen_float_vectors(num, dim, is_normal=True):
-    vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
-    vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
-    return vectors.tolist()
+def gen_float_vectors(num, dim):
+    return [[random.random() for _ in range(dim)] for _ in range(num)]
 
 
-def gen_float_data(nb, is_normal=False):
-    vectors = gen_float_vectors(nb, default_dim, is_normal)
+def gen_float_data(nb):
     entities = [
         [i for i in range(nb)],
         [float(i) for i in range(nb)],
-        vectors
+        gen_float_vectors(nb, default_dim),
     ]
     return entities
 
 
-def gen_dataframe(nb, is_normal=False):
-    import pandas
-    import numpy
-
-    vectors = gen_float_vectors(nb, default_dim, is_normal)
+def gen_dataframe(nb):
+    vectors = gen_float_vectors(nb, default_dim)
     data = {
         "int64": [i for i in range(nb)],
-        "float": numpy.array([i for i in range(nb)], dtype=numpy.float32),
+        "float": np.array([i for i in range(nb)], dtype=np.float32),
         "float_vector": vectors
     }
 

diff --git a/examples/partition.py b/examples/partition.py
@@ -16,7 +16,6 @@
 )
 
 import random
-from sklearn import preprocessing
 import string
 
 default_dim = 128
@@ -72,18 +71,11 @@ def gen_default_fields(auto_id=True):
     return default_schema
 
 
-def gen_vectors(num, dim, is_normal=True):
-    vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
-    vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
-    return vectors.tolist()
-
-
-def gen_data(nb, is_normal=False):
-    vectors = gen_vectors(nb, default_dim, is_normal)
+def gen_data(nb):
     entities = [
         [i for i in range(nb)],
         [float(i) for i in range(nb)],
-        vectors
+        [[random.random() for _ in range(dim)] for _ in range(num)],
     ]
     return entities
 

diff --git a/examples/role_and_privilege.py b/examples/role_and_privilege.py
@@ -2,7 +2,6 @@
 from pymilvus.orm.role import Role
 
 import random
-from sklearn import preprocessing
 
 _CONNECTION = "demo"
 _FOO_CONNECTION = "foo_connection"
@@ -79,18 +78,15 @@ def has_collection(collection_name, connection=_CONNECTION):
 default_nb = 1000
 
 
-def gen_float_vectors(num, dim, is_normal=True):
-    vectors = [[random.random() for _ in range(dim)] for _ in range(num)]
-    vectors = preprocessing.normalize(vectors, axis=1, norm='l2')
-    return vectors.tolist()
+def gen_float_vectors(num, dim):
+    return [[random.random() for _ in range(dim)] for _ in range(num)]
 
 
-def gen_float_data(nb, is_normal=False):
-    vectors = gen_float_vectors(nb, default_dim, is_normal)
+def gen_float_data(nb):
     entities = [
         [i for i in range(nb)],
         [float(i) for i in range(nb)],
-        vectors
+        gen_float_vectors(nb, default_dim),
     ]
     return entities
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -28,6 +28,7 @@ dependencies=[
     "minio>=7.0.0",
     "pyarrow>=12.0.0",
     "azure-storage-blob",
+    "scipy",
 ]
 
 classifiers=[
@@ -45,14 +46,23 @@ model = [
     "milvus-model>=0.1.0",
 ]
 
+test = [
+    "pytest>=5.3.4",
+    "pytest-cov>=2.8.1",
+    "pytest-timeout>=1.3.4",
+    "grpcio-testing",
+    "ruff>=0.3.3",
+    "black",
+]
+
 [tool.setuptools.dynamic]
 version = { attr = "_version_helper.version"}
 
 [tool.setuptools_scm]
 
 [tool.black]
 line-length = 100
-target-version = ['py37']
+target-version = ['py38']
 include = '\.pyi?$'
 extend-ignore = ["E203", "E501"]
 # 'extend-exclude' excludes files or directories in addition to the defaults

diff --git a/requirements.txt b/requirements.txt
@@ -14,7 +14,6 @@ six==1.16.0
 toml==0.10.2
 ujson>=2.0.0
 urllib3==1.26.18
-sklearn==0.0
 m2r==0.3.1
 scipy>=1.9.3
 Sphinx==4.0.0

diff --git a/test_requirements.txt b/test_requirements.txt
diff --git a/tests/test_connections.py b/tests/test_connections.py
@@ -3,6 +3,7 @@
 
 import pytest
 import pymilvus
+from pymilvus import *
 from unittest import mock
 
 from pymilvus import connections

diff --git a/tests/test_types.py b/tests/test_types.py
@@ -13,103 +13,45 @@
 from pymilvus.client.constants import DEFAULT_RESOURCE_GROUP
 from pymilvus.exceptions import InvalidConsistencyLevel
 from pymilvus.client.types import (
-    get_consistency_level, Shard, Group, Replica, ConsistencyLevel
+    get_consistency_level,
+    Shard,
+    Group,
+    Replica,
+    ConsistencyLevel,
+)
+from pymilvus.orm.types import (
+    infer_dtype_bydata,
 )
 
 from pymilvus.grpc_gen import common_pb2
 
 import pytest
 import pandas as pd
 import numpy as np
-import tensorflow as tf
+#  from ml_dtypes import bfloat16
 
 
-@pytest.mark.xfail
+@pytest.mark.skip("please fix me")
 class TestTypes:
-    def test_map_numpy_dtype_to_datatype(self):
-        data1 = {
-            'double': [2.0],
-            'float32': [np.float32(1.0)],
-            'double2': [np.float64(1.0)],
-            'int8': [np.int8(1)],
-            'int16': [2],
-            'int32': [4],
-            'int64': [8],
-            'bool': [True],
-            'float_vec': [np.array([1.1, 1.2])],
-        }
-
-        df = pd.DataFrame(data1)
-
-        wants1 = [
-            DataType.DOUBLE,
-            DataType.DOUBLE,
-            DataType.DOUBLE,
-            DataType.INT64,
-            DataType.INT64,
-            DataType.INT64,
-            DataType.INT64,
-            DataType.BOOL,
-            DataType.UNKNOWN,
-        ]
-
-        ret1 = [map_numpy_dtype_to_datatype(x) for x in df.dtypes]
-        assert ret1 == wants1
-
-        df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'],
-                           dtype=np.int8)
-        assert DataType.INT8 == map_numpy_dtype_to_datatype(df2.dtypes[0])
-
-        df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'],
-                           dtype=np.int16)
-        assert DataType.INT16 == map_numpy_dtype_to_datatype(df2.dtypes[0])
-
-        df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'],
-                           dtype=np.int32)
-        assert DataType.INT32 == map_numpy_dtype_to_datatype(df2.dtypes[0])
-
-        df2 = pd.DataFrame(data=[1, 2, 3], columns=['a'],
-                           dtype=np.int64)
-        assert DataType.INT64 == map_numpy_dtype_to_datatype(df2.dtypes[0])
-
-    def test_infer_dtype_bydata(self):
-        data1 = [
-            [1],
-            [True],
-            [1.0, 2.0],
-            ["abc"],
-            bytes("abc", encoding='ascii'),
-            1,
-            True,
-            "abc",
-            np.int8(1),
-            np.int16(1),
-            [np.int8(1)],
-            [np.float16(1.0)],
-            [tf.bfloat16(1.0)]
-        ]
-
-        wants = [
-            DataType.FLOAT_VECTOR,
-            DataType.UNKNOWN,
-            DataType.FLOAT_VECTOR,
-            DataType.UNKNOWN,
-            DataType.BINARY_VECTOR,
-            DataType.INT64,
-            DataType.BOOL,
-            DataType.STRING,
-            DataType.INT8,
-            DataType.INT16,
-            DataType.FLOAT_VECTOR,
-            DataType.FLOAT16_VECTOR,
-            DataType.BFLOAT16_VECTOR,
-        ]
-
-        actual = []
-        for d in data1:
-            actual.append(infer_dtype_bydata(d))
-
-        assert actual == wants
+    @pytest.mark.parametrize("input_expect", [
+        ([1], DataType.FLOAT_VECTOR),
+        ([True], DataType.UNKNOWN),
+        ([1.0, 2.0], DataType.FLOAT_VECTOR),
+        (["abc"], DataType.UNKNOWN),
+        (bytes("abc", encoding='ascii'), DataType.BINARY_VECTOR),
+        (1, DataType.INT64),
+        (True, DataType.BOOL),
+        ("abc", DataType.VARCHAR),
+        (np.int8(1), DataType.INT8),
+        (np.int16(1), DataType.INT16),
+        ([np.int8(1)], DataType.FLOAT_VECTOR),
+        ([np.float16(1.0)], DataType.FLOAT16_VECTOR),
+        #  ([np.array([1, 1], dtype=bfloat16)], DataType.BFLOAT16_VECTOR),
+    ])
+    def test_infer_dtype_bydata(self, input_expect):
+        data, expect = input_expect
+        got = infer_dtype_bydata(data)
+        assert got == expect
 
 
 class TestConsistencyLevel: