Skip to content

Commit

Permalink
made recommended changes
Browse files Browse the repository at this point in the history
  • Loading branch information
epinzur committed Feb 12, 2024
1 parent d3f39bb commit af0dca2
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 95 deletions.
19 changes: 9 additions & 10 deletions ragstack-e2e-tests/e2e_tests/langchain/test_compatibility_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
run_conversational_rag,
)
from e2e_tests.langchain.trulens import run_trulens_evaluation
from e2e_tests.test_utils import get_local_resource_path

from langchain.chat_models import ChatOpenAI, AzureChatOpenAI, ChatVertexAI, BedrockChat
from langchain.embeddings import (
Expand Down Expand Up @@ -79,7 +80,8 @@ def openai_embedding():
@pytest.fixture
def azure_openai_llm():
return AzureChatOpenAI(
azure_deployment=get_required_env("AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"),
azure_deployment=get_required_env(
"AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"),
openai_api_base=get_required_env("AZURE_OPEN_AI_ENDPOINT"),
openai_api_key=get_required_env("AZURE_OPEN_AI_KEY"),
openai_api_type="azure",
Expand All @@ -89,7 +91,8 @@ def azure_openai_llm():

@pytest.fixture
def azure_openai_embedding():
model_and_deployment = get_required_env("AZURE_OPEN_AI_EMBEDDINGS_MODEL_DEPLOYMENT")
model_and_deployment = get_required_env(
"AZURE_OPEN_AI_EMBEDDINGS_MODEL_DEPLOYMENT")
return AzureOpenAIEmbeddings(
model=model_and_deployment,
deployment=model_and_deployment,
Expand Down Expand Up @@ -212,7 +215,8 @@ def test_rag(test_case, vector_store, embedding, llm, request, record_property):


def _run_test(test_case: str, vector_store_context, embedding, llm, record_property):
vector_store = vector_store_context.new_langchain_vector_store(embedding=embedding)
vector_store = vector_store_context.new_langchain_vector_store(
embedding=embedding)
if test_case == "rag_custom_chain":
run_rag_custom_chain(
vector_store=vector_store, llm=llm, record_property=record_property
Expand Down Expand Up @@ -320,7 +324,8 @@ def embed_query(self, text: str) -> List[float]:
image=img, contextual_text="Coffee Maker Part"
)

documents = enhanced_vector_store.search_documents(embeddings.image_embedding, 3)
documents = enhanced_vector_store.search_documents(
embeddings.image_embedding, 3)
image_message = {
"type": "image_url",
"image_url": {"url": query_image_path},
Expand All @@ -341,12 +346,6 @@ def embed_query(self, text: str) -> List[float]:
assert "Coffee Machine Ultra Cool" in response.content


def get_local_resource_path(filename: str):
dirname = os.path.dirname(__file__)
e2e_tests_dir = os.path.dirname(dirname)
return os.path.join(e2e_tests_dir, "resources", filename)


@pytest.mark.parametrize("chat", ["vertex_gemini_pro_llm", "gemini_pro_llm"])
def test_chat(chat, request, record_property):
set_current_test_info(
Expand Down
75 changes: 75 additions & 0 deletions ragstack-e2e-tests/e2e_tests/llama_index/environment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import logging
from typing import List

import pytest

from e2e_tests.conftest import (
get_required_env,
is_astra,
)
from llama_index import (
ServiceContext,
StorageContext,
)
from llama_index.embeddings import BaseEmbedding
from llama_index.llms import OpenAI, LLM
from llama_index.node_parser import SimpleNodeParser
from llama_index.vector_stores import AstraDBVectorStore

from e2e_tests.test_utils import skip_test_due_to_implementation_not_supported
from e2e_tests.test_utils.astradb_vector_store_handler import AstraDBVectorStoreHandler
from e2e_tests.test_utils.vector_store_handler import VectorStoreImplementation


class Environment:
def __init__(
self, vectorstore: AstraDBVectorStore, llm: LLM, embedding: BaseEmbedding
):
self.vectorstore = vectorstore
self.llm = llm
self.embedding = embedding
self.service_context = ServiceContext.from_defaults(
embed_model=self.embedding, llm=self.llm
)
basic_node_parser = SimpleNodeParser.from_defaults(
chunk_size=100000000, include_prev_next_rel=False, include_metadata=True
)
self.service_context_no_splitting = ServiceContext.from_defaults(
embed_model=self.embedding,
llm=self.llm,
transformations=[basic_node_parser],
)
self.storage_context = StorageContext.from_defaults(vector_store=vectorstore)

@pytest.fixture
def environment() -> Environment:
if not is_astra:
skip_test_due_to_implementation_not_supported("astradb")
embeddings = MockEmbeddings()
handler = AstraDBVectorStoreHandler(VectorStoreImplementation.ASTRADB)
vector_db = handler.before_test().new_llamaindex_vector_store(embedding_dimension=3)
llm = OpenAI(
api_key=get_required_env("OPEN_AI_KEY"),
model="gpt-3.5-turbo-16k",
streaming=False,
temperature=0,
)
yield Environment(vectorstore=vector_db, llm=llm, embedding=embeddings)
handler.after_test()


class MockEmbeddings(BaseEmbedding):
def _get_query_embedding(self, query: str) -> List[float]:
return self.mock_embedding(query)

async def _aget_query_embedding(self, query: str) -> List[float]:
return self.mock_embedding(query)

def _get_text_embedding(self, text: str) -> List[float]:
return self.mock_embedding(text)

@staticmethod
def mock_embedding(text: str):
res = [len(text) / 2, len(text) / 5, len(text) / 10]
logging.debug("mock_embedding for " + text + " : " + str(res))
return res
72 changes: 1 addition & 71 deletions ragstack-e2e-tests/e2e_tests/llama_index/test_astra.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,18 @@
import logging
from typing import List

import pytest
from httpx import ConnectError, HTTPStatusError

from e2e_tests.conftest import (
get_required_env,
is_astra,
)
from llama_index import (
ServiceContext,
StorageContext,
VectorStoreIndex,
Document,
)
from llama_index.embeddings import BaseEmbedding
from llama_index.llms import OpenAI, LLM
from llama_index.node_parser import SimpleNodeParser
from llama_index.schema import NodeWithScore
from llama_index.vector_stores import (
AstraDBVectorStore,
MetadataFilters,
ExactMatchFilter,
)

from e2e_tests.test_utils import skip_test_due_to_implementation_not_supported
from e2e_tests.test_utils.astradb_vector_store_handler import AstraDBVectorStoreHandler
from e2e_tests.test_utils.vector_store_handler import VectorStoreImplementation


class Environment:
def __init__(
self, vectorstore: AstraDBVectorStore, llm: LLM, embedding: BaseEmbedding
):
self.vectorstore = vectorstore
self.llm = llm
self.embedding = embedding
self.service_context = ServiceContext.from_defaults(
embed_model=self.embedding, llm=self.llm
)
basic_node_parser = SimpleNodeParser.from_defaults(
chunk_size=100000000, include_prev_next_rel=False, include_metadata=True
)
self.service_context_no_splitting = ServiceContext.from_defaults(
embed_model=self.embedding,
llm=self.llm,
transformations=[basic_node_parser],
)
self.storage_context = StorageContext.from_defaults(vector_store=vectorstore)

from e2e_tests.llama_index.environment import Environment

def test_basic_vector_search(environment: Environment):
print("Running test_basic_vector_search")
Expand Down Expand Up @@ -219,37 +183,3 @@ def test_vector_search_with_metadata(environment: Environment):
# commenting this part, as the delete is not working, maybe it is a problem with document ids ?
# documents = index.as_retriever().retrieve("RAGStack")
# assert len(documents) == 0


@pytest.fixture
def environment() -> Environment:
if not is_astra:
skip_test_due_to_implementation_not_supported("astradb")
embeddings = MockEmbeddings()
handler = AstraDBVectorStoreHandler(VectorStoreImplementation.ASTRADB)
vector_db = handler.before_test().new_llamaindex_vector_store(embedding_dimension=3)
llm = OpenAI(
api_key=get_required_env("OPEN_AI_KEY"),
model="gpt-3.5-turbo-16k",
streaming=False,
temperature=0,
)
yield Environment(vectorstore=vector_db, llm=llm, embedding=embeddings)
handler.after_test()


class MockEmbeddings(BaseEmbedding):
def _get_query_embedding(self, query: str) -> List[float]:
return self.mock_embedding(query)

async def _aget_query_embedding(self, query: str) -> List[float]:
return self.mock_embedding(query)

def _get_text_embedding(self, text: str) -> List[float]:
return self.mock_embedding(text)

@staticmethod
def mock_embedding(text: str):
res = [len(text) / 2, len(text) / 5, len(text) / 10]
logging.debug("mock_embedding for " + text + " : " + str(res))
return res
25 changes: 13 additions & 12 deletions ragstack-e2e-tests/e2e_tests/llama_index/test_compatibility_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
)
from vertexai.vision_models import MultiModalEmbeddingModel, Image

from e2e_tests.test_utils import get_local_resource_path
from e2e_tests.test_utils.vector_store_handler import (
VectorStoreImplementation,
VectorStoreTestContext,
Expand Down Expand Up @@ -68,7 +69,8 @@ def openai_embedding():
@pytest.fixture
def azure_openai_llm():
return "azure-openai", AzureOpenAI(
azure_deployment=get_required_env("AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"),
azure_deployment=get_required_env(
"AZURE_OPEN_AI_CHAT_MODEL_DEPLOYMENT"),
azure_endpoint=get_required_env("AZURE_OPEN_AI_ENDPOINT"),
api_key=get_required_env("AZURE_OPEN_AI_KEY"),
api_version="2023-07-01-preview",
Expand All @@ -77,7 +79,8 @@ def azure_openai_llm():

@pytest.fixture
def azure_openai_embedding():
model_and_deployment = get_required_env("AZURE_OPEN_AI_EMBEDDINGS_MODEL_DEPLOYMENT")
model_and_deployment = get_required_env(
"AZURE_OPEN_AI_EMBEDDINGS_MODEL_DEPLOYMENT")
return (
"azure-openai",
1536,
Expand Down Expand Up @@ -183,8 +186,10 @@ def huggingface_hub_embedding():
],
)
def test_rag(vector_store, embedding, llm, request):
embedding_name, embedding_dimensions, embedding = request.getfixturevalue(embedding)
vector_store_context: VectorStoreTestContext = request.getfixturevalue(vector_store)
embedding_name, embedding_dimensions, embedding = request.getfixturevalue(
embedding)
vector_store_context: VectorStoreTestContext = request.getfixturevalue(
vector_store)
llm_name, llm = request.getfixturevalue(llm)
set_current_test_info(
"llama_index::rag",
Expand All @@ -194,7 +199,8 @@ def test_rag(vector_store, embedding, llm, request):
embedding_dimension=embedding_dimensions
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
service_context = ServiceContext.from_defaults(llm=llm, embed_model=embedding)
service_context = ServiceContext.from_defaults(
llm=llm, embed_model=embedding)

documents = [
Document(
Expand Down Expand Up @@ -331,20 +337,15 @@ def test_multimodal(vector_store, embedding, llm, request):
image=img, contextual_text="Coffee Maker Part"
)

documents = enhanced_vector_store.search_documents(embeddings.image_embedding, 3)
documents = enhanced_vector_store.search_documents(
embeddings.image_embedding, 3)
docs_str = ", ".join([f"'{p}'" for p in documents])
prompt = f"Tell me which one of these products it is part of. Only include product from the ones below: {docs_str}."
logging.info(f"Prompt: {prompt}")
response = llm_complete_fn(resolved_llm, prompt, query_image_path)
assert "Coffee Machine Ultra Cool" in response


def get_local_resource_path(filename: str):
dirname = os.path.dirname(__file__)
e2e_tests_dir = os.path.dirname(dirname)
return os.path.join(e2e_tests_dir, "resources", filename)


@pytest.mark.parametrize(
"chat",
["gemini_pro_llm", "vertex_gemini_pro_llm"],
Expand Down
4 changes: 2 additions & 2 deletions ragstack-e2e-tests/e2e_tests/llama_index/test_llama_parse.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from llama_parse import LlamaParse
from llama_index import VectorStoreIndex

from e2e_tests.llama_index.test_astra import Environment, environment
from e2e_tests.llama_index.test_compatibility_rag import get_local_resource_path
from e2e_tests.llama_index.environment import Environment
from e2e_tests.test_utils import get_local_resource_path


def test_llamaparse_as_text_with_vector_search(environment: Environment):
Expand Down
6 changes: 6 additions & 0 deletions ragstack-e2e-tests/e2e_tests/test_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ def get_required_env(name) -> str:
return value


def get_local_resource_path(filename: str):
dirname = os.path.dirname(__file__)
e2e_tests_dir = os.path.dirname(dirname)
return os.path.join(e2e_tests_dir, "resources", filename)


def random_string() -> str:
return str(uuid.uuid4()).split("-")[0]

Expand Down

0 comments on commit af0dca2

Please sign in to comment.