diff --git "a/pages/1_\360\237\222\254_basic_chatbot.py" "b/pages/1_\360\237\222\254_basic_chatbot.py" index 3b98bad..07741a3 100644 --- "a/pages/1_\360\237\222\254_basic_chatbot.py" +++ "b/pages/1_\360\237\222\254_basic_chatbot.py" @@ -16,7 +16,7 @@ def __init__(self): self.llm = utils.configure_llm() def setup_chain(self): - chain = ConversationChain(llm=self.llm, verbose=True) + chain = ConversationChain(llm=self.llm, verbose=False) return chain @utils.enable_chat_history @@ -33,6 +33,7 @@ def main(self): ) response = result["response"] st.session_state.messages.append({"role": "assistant", "content": response}) + utils.print_qa(BasicChatbot, user_query, response) if __name__ == "__main__": obj = BasicChatbot() diff --git "a/pages/2_\342\255\220_context_aware_chatbot.py" "b/pages/2_\342\255\220_context_aware_chatbot.py" index 30f1314..4a91bea 100644 --- "a/pages/2_\342\255\220_context_aware_chatbot.py" +++ "b/pages/2_\342\255\220_context_aware_chatbot.py" @@ -19,7 +19,7 @@ def __init__(self): @st.cache_resource def setup_chain(_self): memory = ConversationBufferMemory() - chain = ConversationChain(llm=_self.llm, memory=memory, verbose=True) + chain = ConversationChain(llm=_self.llm, memory=memory, verbose=False) return chain @utils.enable_chat_history @@ -36,6 +36,7 @@ def main(self): ) response = result["response"] st.session_state.messages.append({"role": "assistant", "content": response}) + utils.print_qa(ContextChatbot, user_query, response) if __name__ == "__main__": obj = ContextChatbot() diff --git "a/pages/3_\360\237\214\220_chatbot_with_internet_access.py" "b/pages/3_\360\237\214\220_chatbot_with_internet_access.py" index d658a64..a4ef7e6 100644 --- "a/pages/3_\360\237\214\220_chatbot_with_internet_access.py" +++ "b/pages/3_\360\237\214\220_chatbot_with_internet_access.py" @@ -38,7 +38,7 @@ def setup_agent(_self): # Setup LLM and Agent memory = ConversationBufferMemory(memory_key="chat_history") agent = create_react_agent(_self.llm, tools, prompt) - agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=True) + agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=False) return agent_executor, memory @utils.enable_chat_history @@ -56,6 +56,8 @@ def main(self): response = result["output"] st.session_state.messages.append({"role": "assistant", "content": response}) st.write(response) + utils.print_qa(InternetChatbot, user_query, response) + if __name__ == "__main__": obj = InternetChatbot() diff --git "a/pages/4_\360\237\223\204_chat_with_your_documents.py" "b/pages/4_\360\237\223\204_chat_with_your_documents.py" index 9c155f2..a17e8b7 100644 --- "a/pages/4_\360\237\223\204_chat_with_your_documents.py" +++ "b/pages/4_\360\237\223\204_chat_with_your_documents.py" @@ -8,7 +8,6 @@ from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import DocArrayInMemorySearch from langchain_text_splitters import RecursiveCharacterTextSplitter -from langchain_community.embeddings.fastembed import FastEmbedEmbeddings st.set_page_config(page_title="ChatPDF", page_icon="📄") @@ -16,11 +15,12 @@ st.write('Has access to custom documents and can respond to user queries by referring to the content within those documents') st.write('[![view source code ](https://img.shields.io/badge/view_source_code-gray?logo=github)](https://github.com/shashankdeshpande/langchain-chatbot/blob/master/pages/4_%F0%9F%93%84_chat_with_your_documents.py)') -class CustomDataChatbot: +class CustomDocChatbot: def __init__(self): utils.sync_st_session() self.llm = utils.configure_llm() + self.embedding_model = utils.configure_embedding_model() def save_file(self, file): folder = 'tmp' @@ -41,16 +41,13 @@ def setup_qa_chain(self, uploaded_files): loader = PyPDFLoader(file_path) docs.extend(loader.load()) - # Split documents + # Split documents and store in vector db text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) splits = text_splitter.split_documents(docs) - - # Create embeddings and store in vectordb - embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5") - vectordb = DocArrayInMemorySearch.from_documents(splits, embeddings) + vectordb = DocArrayInMemorySearch.from_documents(splits, self.embedding_model) # Define retriever retriever = vectordb.as_retriever( @@ -71,7 +68,7 @@ def setup_qa_chain(self, uploaded_files): retriever=retriever, memory=memory, return_source_documents=True, - verbose=True + verbose=False ) return qa_chain @@ -99,6 +96,7 @@ def main(self): ) response = result["answer"] st.session_state.messages.append({"role": "assistant", "content": response}) + utils.print_qa(CustomDocChatbot, user_query, response) # to show references for idx, doc in enumerate(result['source_documents'],1): @@ -109,5 +107,5 @@ def main(self): st.caption(doc.page_content) if __name__ == "__main__": - obj = CustomDataChatbot() + obj = CustomDocChatbot() obj.main() \ No newline at end of file diff --git "a/pages/5_\360\237\233\242_chat_with_sql_db.py" "b/pages/5_\360\237\233\242_chat_with_sql_db.py" index 716a8e9..34df83a 100644 --- "a/pages/5_\360\237\233\242_chat_with_sql_db.py" +++ "b/pages/5_\360\237\233\242_chat_with_sql_db.py" @@ -37,7 +37,7 @@ def setup_sql_agent(_self, db): llm=_self.llm, db=db, top_k=10, - verbose=True, + verbose=False, agent_type="openai-tools", handle_parsing_errors=True, handle_sql_errors=True @@ -86,6 +86,8 @@ def main(self): response = result["output"] st.session_state.messages.append({"role": "assistant", "content": response}) st.write(response) + utils.print_qa(SqlChatbot, user_query, response) + if __name__ == "__main__": obj = SqlChatbot() diff --git "a/pages/6_\360\237\224\227_chat_with_website.py" "b/pages/6_\360\237\224\227_chat_with_website.py" index 44e9d7d..4b3025a 100644 --- "a/pages/6_\360\237\224\227_chat_with_website.py" +++ "b/pages/6_\360\237\224\227_chat_with_website.py" @@ -12,7 +12,6 @@ from langchain_core.documents.base import Document from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import DocArrayInMemorySearch -from langchain_community.embeddings.fastembed import FastEmbedEmbeddings st.set_page_config(page_title="ChatWebsite", page_icon="🔗") st.header('Chat with Website') @@ -24,6 +23,7 @@ class ChatbotWeb: def __init__(self): utils.sync_st_session() self.llm = utils.configure_llm() + self.embedding_model = utils.configure_embedding_model() def scrape_website(self, url): content = "" @@ -39,7 +39,7 @@ def scrape_website(self, url): traceback.print_exc() return content - # @st.cache_resource(show_spinner='Analyzing webpage', ttl=3600) + @st.cache_resource(show_spinner='Analyzing webpage', ttl=3600) def setup_vectordb(_self, websites): # Scrape and load documents docs = [] @@ -50,16 +50,13 @@ def setup_vectordb(_self, websites): ) ) - # Split documents + # Split documents and store in vector db text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200 ) splits = text_splitter.split_documents(docs) - - # Create embeddings and store in vectordb - embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5") - vectordb = DocArrayInMemorySearch.from_documents(splits, embeddings) + vectordb = DocArrayInMemorySearch.from_documents(splits, _self.embedding_model) return vectordb def setup_qa_chain(self, vectordb): @@ -83,7 +80,7 @@ def setup_qa_chain(self, vectordb): retriever=retriever, memory=memory, return_source_documents=True, - verbose=True + verbose=False ) return qa_chain @@ -132,6 +129,7 @@ def main(self): ) response = result["answer"] st.session_state.messages.append({"role": "assistant", "content": response}) + utils.print_qa(ChatbotWeb, user_query, response) # to show references for idx, doc in enumerate(result['source_documents'],1): @@ -139,6 +137,7 @@ def main(self): ref_title = f":blue[Reference {idx}: *{url}*]" with st.popover(ref_title): st.caption(doc.page_content) + if __name__ == "__main__": obj = ChatbotWeb() diff --git a/utils.py b/utils.py index 25d46ed..54f3c15 100644 --- a/utils.py +++ b/utils.py @@ -4,6 +4,7 @@ from datetime import datetime from langchain_openai import ChatOpenAI from langchain_community.chat_models import ChatOllama +from langchain_community.embeddings.fastembed import FastEmbedEmbeddings #decorator def enable_chat_history(func): @@ -91,6 +92,15 @@ def configure_llm(): llm = ChatOpenAI(model_name=model, temperature=0, streaming=True, api_key=openai_api_key) return llm +def print_qa(cls, question, answer): + log_str = "Usecase: {}\nQuestion: {}\nAnswer: {}\n" + "------"*10 + print(log_str.format(cls.__name__, question, answer)) + +@st.cache_resource +def configure_embedding_model(): + embedding_model = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5") + return embedding_model + def sync_st_session(): for k, v in st.session_state.items(): st.session_state[k] = v \ No newline at end of file