Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ERROR: Result' object has no attribute 'top_k #35

Open
ashwinzyx opened this issue Oct 1, 2024 · 0 comments
Open

ERROR: Result' object has no attribute 'top_k #35

ashwinzyx opened this issue Oct 1, 2024 · 0 comments

Comments

@ashwinzyx
Copy link

Facing issue with cross-encoder rerankers

Loading TransformerRanker model mixedbread-ai/mxbai-rerank-large-v1
No device set
Using device mps
No dtype set
Using dtype torch.float16
Loaded model mixedbread-ai/mxbai-rerank-large-v1
Using device mps.
Using dtype torch.float16.
/Users/ashwinaravind/.pyenv/versions/3.12.5/lib/python3.12/site-packages/langchain/hub.py:86: DeprecationWarning: The `langchainhub sdk` is deprecated.
Please use the `langsmith sdk` instead:
  pip install langsmith
Use the `pull_prompt` method.
  res_dict = client.pull_repo(owner_repo_commit)
/Users/ashwinaravind/.pyenv/versions/3.12.5/lib/python3.12/site-packages/posthog/client.py:356: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).
  timestamp = datetime.utcnow().replace(tzinfo=tzutc())
Number of requested results 100 is greater than number of elements in index 1, updating n_results = 1
/Users/ashwinaravind/.pyenv/versions/3.12.5/lib/python3.12/site-packages/posthog/request.py:40: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).
  body["sentAt"] = datetime.utcnow().replace(tzinfo=tzutc()).isoformat()
An error occurred: 'Result' object has no attribute 'top_k'

sample code below

from langchain_openai import ChatOpenAI

from langchain_community.document_loaders import *
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain_chroma import Chroma
import chromadb

from rerankers import Reranker
from langchain.retrievers.document_compressors import EmbeddingsFilter
from langchain.retrievers import ContextualCompressionRetriever
import os
from operator import itemgetter
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda
from langchain.retrievers import MergerRetriever,EnsembleRetriever
from langchain.retrievers.document_compressors import DocumentCompressorPipeline

def rag_pipeline():
    try:
        def format_docs(docs):
            return "\n".join(doc.page_content for doc in docs)

        llm=ChatOpenAI(model='gpt-4o-mini')

        loader = WebBaseLoader('https://ashwinaravind.github.io/')
        docs = loader.load()

        embedding=OpenAIEmbeddings(model='text-embedding-3-small')

        headers_to_split_on = [
            ("#", "Header 1"),
            ("##", "Header 2"),
            ("###", "Header 3")]
        splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)
        splits=splitter.split_text(docs[0].page_content)
        c=Chroma.from_documents(documents=splits, embedding=embedding, collection_name='testindex-ragbuilder-1727764109091', client_settings=chromadb.config.Settings(allow_reset=True))
        retrievers=[]
        retriever=c.as_retriever(search_type='similarity', search_kwargs={'k': 100})
        retrievers.append(retriever)
        retriever=EnsembleRetriever(retrievers=retrievers)
        arr_comp=[]
        ranker = Reranker(model_name='mixedbread-ai/mxbai-rerank-large-v1', model_type='cross-encoder')
        compressor = ranker.as_langchain_compressor(k=1)
        arr_comp.append(compressor) 
        retriever=ContextualCompressionRetriever(base_retriever=retriever,base_compressor=compressor)
        prompt = hub.pull("rlm/rag-prompt")
        rag_chain = (
            RunnableParallel(context=retriever, question=RunnablePassthrough())
                .assign(context=itemgetter("context") | RunnableLambda(format_docs))
                .assign(answer=prompt | llm | StrOutputParser())
                # .pick(["answer", "context"])
                )
        print(rag_chain.invoke('How does rag work'))
    except Exception as e:
        print(f"An error occurred: {e}") 
rag_pipeline()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant