diff --git a/ragstack-e2e-tests/e2e_tests/langchain/test_compatibility_rag.py b/ragstack-e2e-tests/e2e_tests/langchain/test_compatibility_rag.py index b2f1a7981..26afec743 100644 --- a/ragstack-e2e-tests/e2e_tests/langchain/test_compatibility_rag.py +++ b/ragstack-e2e-tests/e2e_tests/langchain/test_compatibility_rag.py @@ -15,6 +15,7 @@ run_conversational_rag, ) from e2e_tests.langchain.trulens import run_trulens_evaluation +from e2e_tests.test_utils import get_local_resource_path from langchain.chat_models import ChatOpenAI, AzureChatOpenAI, ChatVertexAI, BedrockChat from langchain.embeddings import ( @@ -79,7 +80,8 @@ def openai_embedding(): @pytest.fixture def azure_openai_llm(): return AzureChatOpenAI( - azure_deployment=get_required_env("AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"), + azure_deployment=get_required_env( + "AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"), openai_api_base=get_required_env("AZURE_OPEN_AI_ENDPOINT"), openai_api_key=get_required_env("AZURE_OPEN_AI_KEY"), openai_api_type="azure", @@ -89,7 +91,8 @@ def azure_openai_llm(): @pytest.fixture def azure_openai_embedding(): - model_and_deployment = get_required_env("AZURE_OPEN_AI_EMBEDDINGS_MODEL_DEPLOYMENT") + model_and_deployment = get_required_env( + "AZURE_OPEN_AI_EMBEDDINGS_MODEL_DEPLOYMENT") return AzureOpenAIEmbeddings( model=model_and_deployment, deployment=model_and_deployment, @@ -212,7 +215,8 @@ def test_rag(test_case, vector_store, embedding, llm, request, record_property): def _run_test(test_case: str, vector_store_context, embedding, llm, record_property): - vector_store = vector_store_context.new_langchain_vector_store(embedding=embedding) + vector_store = vector_store_context.new_langchain_vector_store( + embedding=embedding) if test_case == "rag_custom_chain": run_rag_custom_chain( vector_store=vector_store, llm=llm, record_property=record_property @@ -320,7 +324,8 @@ def embed_query(self, text: str) -> List[float]: image=img, contextual_text="Coffee Maker Part" ) - documents = enhanced_vector_store.search_documents(embeddings.image_embedding, 3) + documents = enhanced_vector_store.search_documents( + embeddings.image_embedding, 3) image_message = { "type": "image_url", "image_url": {"url": query_image_path}, @@ -341,12 +346,6 @@ def embed_query(self, text: str) -> List[float]: assert "Coffee Machine Ultra Cool" in response.content -def get_local_resource_path(filename: str): - dirname = os.path.dirname(__file__) - e2e_tests_dir = os.path.dirname(dirname) - return os.path.join(e2e_tests_dir, "resources", filename) - - @pytest.mark.parametrize("chat", ["vertex_gemini_pro_llm", "gemini_pro_llm"]) def test_chat(chat, request, record_property): set_current_test_info( diff --git a/ragstack-e2e-tests/e2e_tests/llama_index/environment.py b/ragstack-e2e-tests/e2e_tests/llama_index/environment.py new file mode 100644 index 000000000..178491352 --- /dev/null +++ b/ragstack-e2e-tests/e2e_tests/llama_index/environment.py @@ -0,0 +1,75 @@ +import logging +from typing import List + +import pytest + +from e2e_tests.conftest import ( + get_required_env, + is_astra, +) +from llama_index import ( + ServiceContext, + StorageContext, +) +from llama_index.embeddings import BaseEmbedding +from llama_index.llms import OpenAI, LLM +from llama_index.node_parser import SimpleNodeParser +from llama_index.vector_stores import AstraDBVectorStore + +from e2e_tests.test_utils import skip_test_due_to_implementation_not_supported +from e2e_tests.test_utils.astradb_vector_store_handler import AstraDBVectorStoreHandler +from e2e_tests.test_utils.vector_store_handler import VectorStoreImplementation + + +class Environment: + def __init__( + self, vectorstore: AstraDBVectorStore, llm: LLM, embedding: BaseEmbedding + ): + self.vectorstore = vectorstore + self.llm = llm + self.embedding = embedding + self.service_context = ServiceContext.from_defaults( + embed_model=self.embedding, llm=self.llm + ) + basic_node_parser = SimpleNodeParser.from_defaults( + chunk_size=100000000, include_prev_next_rel=False, include_metadata=True + ) + self.service_context_no_splitting = ServiceContext.from_defaults( + embed_model=self.embedding, + llm=self.llm, + transformations=[basic_node_parser], + ) + self.storage_context = StorageContext.from_defaults(vector_store=vectorstore) + +@pytest.fixture +def environment() -> Environment: + if not is_astra: + skip_test_due_to_implementation_not_supported("astradb") + embeddings = MockEmbeddings() + handler = AstraDBVectorStoreHandler(VectorStoreImplementation.ASTRADB) + vector_db = handler.before_test().new_llamaindex_vector_store(embedding_dimension=3) + llm = OpenAI( + api_key=get_required_env("OPEN_AI_KEY"), + model="gpt-3.5-turbo-16k", + streaming=False, + temperature=0, + ) + yield Environment(vectorstore=vector_db, llm=llm, embedding=embeddings) + handler.after_test() + + +class MockEmbeddings(BaseEmbedding): + def _get_query_embedding(self, query: str) -> List[float]: + return self.mock_embedding(query) + + async def _aget_query_embedding(self, query: str) -> List[float]: + return self.mock_embedding(query) + + def _get_text_embedding(self, text: str) -> List[float]: + return self.mock_embedding(text) + + @staticmethod + def mock_embedding(text: str): + res = [len(text) / 2, len(text) / 5, len(text) / 10] + logging.debug("mock_embedding for " + text + " : " + str(res)) + return res \ No newline at end of file diff --git a/ragstack-e2e-tests/e2e_tests/llama_index/test_astra.py b/ragstack-e2e-tests/e2e_tests/llama_index/test_astra.py index 046526416..27e6a81ea 100644 --- a/ragstack-e2e-tests/e2e_tests/llama_index/test_astra.py +++ b/ragstack-e2e-tests/e2e_tests/llama_index/test_astra.py @@ -1,22 +1,10 @@ -import logging -from typing import List - import pytest from httpx import ConnectError, HTTPStatusError -from e2e_tests.conftest import ( - get_required_env, - is_astra, -) from llama_index import ( - ServiceContext, - StorageContext, VectorStoreIndex, Document, ) -from llama_index.embeddings import BaseEmbedding -from llama_index.llms import OpenAI, LLM -from llama_index.node_parser import SimpleNodeParser from llama_index.schema import NodeWithScore from llama_index.vector_stores import ( AstraDBVectorStore, @@ -24,31 +12,7 @@ ExactMatchFilter, ) -from e2e_tests.test_utils import skip_test_due_to_implementation_not_supported -from e2e_tests.test_utils.astradb_vector_store_handler import AstraDBVectorStoreHandler -from e2e_tests.test_utils.vector_store_handler import VectorStoreImplementation - - -class Environment: - def __init__( - self, vectorstore: AstraDBVectorStore, llm: LLM, embedding: BaseEmbedding - ): - self.vectorstore = vectorstore - self.llm = llm - self.embedding = embedding - self.service_context = ServiceContext.from_defaults( - embed_model=self.embedding, llm=self.llm - ) - basic_node_parser = SimpleNodeParser.from_defaults( - chunk_size=100000000, include_prev_next_rel=False, include_metadata=True - ) - self.service_context_no_splitting = ServiceContext.from_defaults( - embed_model=self.embedding, - llm=self.llm, - transformations=[basic_node_parser], - ) - self.storage_context = StorageContext.from_defaults(vector_store=vectorstore) - +from e2e_tests.llama_index.environment import Environment def test_basic_vector_search(environment: Environment): print("Running test_basic_vector_search") @@ -219,37 +183,3 @@ def test_vector_search_with_metadata(environment: Environment): # commenting this part, as the delete is not working, maybe it is a problem with document ids ? # documents = index.as_retriever().retrieve("RAGStack") # assert len(documents) == 0 - - -@pytest.fixture -def environment() -> Environment: - if not is_astra: - skip_test_due_to_implementation_not_supported("astradb") - embeddings = MockEmbeddings() - handler = AstraDBVectorStoreHandler(VectorStoreImplementation.ASTRADB) - vector_db = handler.before_test().new_llamaindex_vector_store(embedding_dimension=3) - llm = OpenAI( - api_key=get_required_env("OPEN_AI_KEY"), - model="gpt-3.5-turbo-16k", - streaming=False, - temperature=0, - ) - yield Environment(vectorstore=vector_db, llm=llm, embedding=embeddings) - handler.after_test() - - -class MockEmbeddings(BaseEmbedding): - def _get_query_embedding(self, query: str) -> List[float]: - return self.mock_embedding(query) - - async def _aget_query_embedding(self, query: str) -> List[float]: - return self.mock_embedding(query) - - def _get_text_embedding(self, text: str) -> List[float]: - return self.mock_embedding(text) - - @staticmethod - def mock_embedding(text: str): - res = [len(text) / 2, len(text) / 5, len(text) / 10] - logging.debug("mock_embedding for " + text + " : " + str(res)) - return res diff --git a/ragstack-e2e-tests/e2e_tests/llama_index/test_compatibility_rag.py b/ragstack-e2e-tests/e2e_tests/llama_index/test_compatibility_rag.py index 958bf5fe6..f17b2a3e3 100644 --- a/ragstack-e2e-tests/e2e_tests/llama_index/test_compatibility_rag.py +++ b/ragstack-e2e-tests/e2e_tests/llama_index/test_compatibility_rag.py @@ -33,6 +33,7 @@ ) from vertexai.vision_models import MultiModalEmbeddingModel, Image +from e2e_tests.test_utils import get_local_resource_path from e2e_tests.test_utils.vector_store_handler import ( VectorStoreImplementation, VectorStoreTestContext, @@ -68,7 +69,8 @@ def openai_embedding(): @pytest.fixture def azure_openai_llm(): return "azure-openai", AzureOpenAI( - azure_deployment=get_required_env("AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"), + azure_deployment=get_required_env( + "AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"), azure_endpoint=get_required_env("AZURE_OPEN_AI_ENDPOINT"), api_key=get_required_env("AZURE_OPEN_AI_KEY"), api_version="2023-07-01-preview", @@ -77,7 +79,8 @@ def azure_openai_llm(): @pytest.fixture def azure_openai_embedding(): - model_and_deployment = get_required_env("AZURE_OPEN_AI_EMBEDDINGS_MODEL_DEPLOYMENT") + model_and_deployment = get_required_env( + "AZURE_OPEN_AI_EMBEDDINGS_MODEL_DEPLOYMENT") return ( "azure-openai", 1536, @@ -183,8 +186,10 @@ def huggingface_hub_embedding(): ], ) def test_rag(vector_store, embedding, llm, request): - embedding_name, embedding_dimensions, embedding = request.getfixturevalue(embedding) - vector_store_context: VectorStoreTestContext = request.getfixturevalue(vector_store) + embedding_name, embedding_dimensions, embedding = request.getfixturevalue( + embedding) + vector_store_context: VectorStoreTestContext = request.getfixturevalue( + vector_store) llm_name, llm = request.getfixturevalue(llm) set_current_test_info( "llama_index::rag", @@ -194,7 +199,8 @@ def test_rag(vector_store, embedding, llm, request): embedding_dimension=embedding_dimensions ) storage_context = StorageContext.from_defaults(vector_store=vector_store) - service_context = ServiceContext.from_defaults(llm=llm, embed_model=embedding) + service_context = ServiceContext.from_defaults( + llm=llm, embed_model=embedding) documents = [ Document( @@ -331,7 +337,8 @@ def test_multimodal(vector_store, embedding, llm, request): image=img, contextual_text="Coffee Maker Part" ) - documents = enhanced_vector_store.search_documents(embeddings.image_embedding, 3) + documents = enhanced_vector_store.search_documents( + embeddings.image_embedding, 3) docs_str = ", ".join([f"'{p}'" for p in documents]) prompt = f"Tell me which one of these products it is part of. Only include product from the ones below: {docs_str}." logging.info(f"Prompt: {prompt}") @@ -339,12 +346,6 @@ def test_multimodal(vector_store, embedding, llm, request): assert "Coffee Machine Ultra Cool" in response -def get_local_resource_path(filename: str): - dirname = os.path.dirname(__file__) - e2e_tests_dir = os.path.dirname(dirname) - return os.path.join(e2e_tests_dir, "resources", filename) - - @pytest.mark.parametrize( "chat", ["gemini_pro_llm", "vertex_gemini_pro_llm"], diff --git a/ragstack-e2e-tests/e2e_tests/llama_index/test_llama_parse.py b/ragstack-e2e-tests/e2e_tests/llama_index/test_llama_parse.py index c3b19c44a..a4d3f4950 100644 --- a/ragstack-e2e-tests/e2e_tests/llama_index/test_llama_parse.py +++ b/ragstack-e2e-tests/e2e_tests/llama_index/test_llama_parse.py @@ -1,8 +1,8 @@ from llama_parse import LlamaParse from llama_index import VectorStoreIndex -from e2e_tests.llama_index.test_astra import Environment, environment -from e2e_tests.llama_index.test_compatibility_rag import get_local_resource_path +from e2e_tests.llama_index.environment import Environment +from e2e_tests.test_utils import get_local_resource_path def test_llamaparse_as_text_with_vector_search(environment: Environment): diff --git a/ragstack-e2e-tests/e2e_tests/test_utils/__init__.py b/ragstack-e2e-tests/e2e_tests/test_utils/__init__.py index c8cd4401c..db826ebbf 100644 --- a/ragstack-e2e-tests/e2e_tests/test_utils/__init__.py +++ b/ragstack-e2e-tests/e2e_tests/test_utils/__init__.py @@ -18,6 +18,12 @@ def get_required_env(name) -> str: return value +def get_local_resource_path(filename: str): + dirname = os.path.dirname(__file__) + e2e_tests_dir = os.path.dirname(dirname) + return os.path.join(e2e_tests_dir, "resources", filename) + + def random_string() -> str: return str(uuid.uuid4()).split("-")[0]