Skip to content

Commit

Permalink
comment some uts since milvus-lite currently doesn't support some fea…
Browse files Browse the repository at this point in the history
…tures (#36)

Signed-off-by: ChengZi <[email protected]>
  • Loading branch information
zc277584121 authored Jan 13, 2025
1 parent d905eda commit 91c27ad
Showing 1 changed file with 88 additions and 81 deletions.
169 changes: 88 additions & 81 deletions libs/milvus/tests/integration_tests/vectorstores/test_milvus.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest
from langchain_core.documents import Document

from langchain_milvus.function import BM25BuiltInFunction
# from langchain_milvus.function import BM25BuiltInFunction
from langchain_milvus.utils.sparse import BM25SparseEmbedding
from langchain_milvus.vectorstores import Milvus
from tests.integration_tests.utils import (
Expand Down Expand Up @@ -337,8 +337,10 @@ def test_milvus_enable_dynamic_field_with_partition_key(temp_milvus_db: Any) ->
)

# filter on a single namespace
output = docsearch.similarity_search("foo", k=10, expr="namespace == 'name_2'")
assert len(output) == 1
# TODO: This assert only pass by using Milvus Standalone docker service.
# when using Milvus-Lite, it will raise an error.
# output = docsearch.similarity_search("foo", k=10, expr="namespace == 'name_2'")
# assert len(output) == 1

# without namespace filter
output = docsearch.similarity_search("foo", k=10)
Expand Down Expand Up @@ -603,84 +605,89 @@ def test_milvus_similarity_search_with_relevance_scores(
assert retrival_output[0].page_content == "down"


@pytest.mark.parametrize("enable_dynamic_field", [True, False])
def test_milvus_builtin_bm25_function(enable_dynamic_field: bool) -> None:
"""
Test builtin BM25 function
NOTE: The full text search feature is so far not supported in Milvus-Lite and Zilliz
To run this unittest successfully, we can only use Milvus Docker Standalone service.
"""

def _add_and_assert(docsearch: Milvus) -> None:
if enable_dynamic_field:
metadatas = [{"page": i} for i in range(len(fake_texts))]
else:
metadatas = None
docsearch.add_texts(fake_texts, metadatas=metadatas)
output = docsearch.similarity_search("foo", k=1)
if enable_dynamic_field:
assert_docs_equal_without_pk(
output, [Document(page_content=fake_texts[0], metadata={"page": 0})]
)
else:
assert_docs_equal_without_pk(output, [Document(page_content=fake_texts[0])])

# BM25 only
docsearch1 = Milvus(
embedding_function=[],
builtin_function=[BM25BuiltInFunction()],
connection_args={"uri": TEST_URI},
auto_id=True,
drop_old=True,
consistency_level="Strong",
vector_field="sparse",
enable_dynamic_field=enable_dynamic_field,
)
_add_and_assert(docsearch1)

# Dense embedding + BM25
docsearch2 = Milvus(
embedding_function=FakeEmbeddings(),
builtin_function=[BM25BuiltInFunction()],
connection_args={"uri": TEST_URI},
auto_id=True,
drop_old=True,
consistency_level="Strong",
vector_field="sparse",
enable_dynamic_field=enable_dynamic_field,
)
_add_and_assert(docsearch2)

# Dense embedding + BM25 + custom index params
index_param_1 = {
"metric_type": "COSINE",
"index_type": "HNSW",
}
index_param_2 = {
"metric_type": "BM25",
"index_type": "AUTOINDEX",
}
docsearch3 = Milvus(
embedding_function=[
FakeEmbeddings(),
],
builtin_function=[
BM25BuiltInFunction(
input_field_names="text00",
output_field_names="sparse00",
)
],
index_params=[index_param_1, index_param_2],
connection_args={"uri": TEST_URI},
auto_id=True,
drop_old=True,
consistency_level="Strong",
text_field="text00",
vector_field=["dense00", "sparse00"],
enable_dynamic_field=enable_dynamic_field,
)
_add_and_assert(docsearch3)
# TODO: This bm25 case only pass by using Milvus Standalone docker service.
# when using Milvus-Lite, it will raise an error.
# @pytest.mark.parametrize("enable_dynamic_field", [True, False])
# def test_milvus_builtin_bm25_function(enable_dynamic_field: bool) -> None:
# """
# Test builtin BM25 function
#
# NOTE: The full text search feature is so far not supported in Milvus-Lite and
# Zilliz
# To run this unittest successfully, we can only use Milvus Docker Standalone
# service.
# """
#
# def _add_and_assert(docsearch: Milvus) -> None:
# if enable_dynamic_field:
# metadatas = [{"page": i} for i in range(len(fake_texts))]
# else:
# metadatas = None
# docsearch.add_texts(fake_texts, metadatas=metadatas)
# output = docsearch.similarity_search("foo", k=1)
# if enable_dynamic_field:
# assert_docs_equal_without_pk(
# output, [Document(page_content=fake_texts[0], metadata={"page": 0})]
# )
# else:
# assert_docs_equal_without_pk(output, [Document(page_content=fake_texts[0])
# ])
#
# # BM25 only
# docsearch1 = Milvus(
# embedding_function=[],
# builtin_function=[BM25BuiltInFunction()],
# connection_args={"uri": TEST_URI},
# auto_id=True,
# drop_old=True,
# consistency_level="Strong",
# vector_field="sparse",
# enable_dynamic_field=enable_dynamic_field,
# )
# _add_and_assert(docsearch1)
#
# # Dense embedding + BM25
# docsearch2 = Milvus(
# embedding_function=FakeEmbeddings(),
# builtin_function=[BM25BuiltInFunction()],
# connection_args={"uri": TEST_URI},
# auto_id=True,
# drop_old=True,
# consistency_level="Strong",
# vector_field="sparse",
# enable_dynamic_field=enable_dynamic_field,
# )
# _add_and_assert(docsearch2)
#
# # Dense embedding + BM25 + custom index params
# index_param_1 = {
# "metric_type": "COSINE",
# "index_type": "HNSW",
# }
# index_param_2 = {
# "metric_type": "BM25",
# "index_type": "AUTOINDEX",
# }
# docsearch3 = Milvus(
# embedding_function=[
# FakeEmbeddings(),
# ],
# builtin_function=[
# BM25BuiltInFunction(
# input_field_names="text00",
# output_field_names="sparse00",
# )
# ],
# index_params=[index_param_1, index_param_2],
# connection_args={"uri": TEST_URI},
# auto_id=True,
# drop_old=True,
# consistency_level="Strong",
# text_field="text00",
# vector_field=["dense00", "sparse00"],
# enable_dynamic_field=enable_dynamic_field,
# )
# _add_and_assert(docsearch3)


# if __name__ == "__main__":
Expand Down

0 comments on commit 91c27ad

Please sign in to comment.