minor changes

felipauskas · Aug 6, 2024 · aa121d1 · aa121d1
1 parent 5c6ca93
commit aa121d1
Show file tree

Hide file tree

Showing 7 changed files with 34 additions and 21 deletions.
diff --git a/pages/1_💬_basic_chatbot.py b/pages/1_💬_basic_chatbot.py
@@ -16,7 +16,7 @@ def __init__(self):
         self.llm = utils.configure_llm()
 
     def setup_chain(self):
-        chain = ConversationChain(llm=self.llm, verbose=True)
+        chain = ConversationChain(llm=self.llm, verbose=False)
         return chain
 
     @utils.enable_chat_history
@@ -33,6 +33,7 @@ def main(self):
                 )
                 response = result["response"]
                 st.session_state.messages.append({"role": "assistant", "content": response})
+                utils.print_qa(BasicChatbot, user_query, response)
 
 if __name__ == "__main__":
     obj = BasicChatbot()

diff --git a/pages/2_⭐_context_aware_chatbot.py b/pages/2_⭐_context_aware_chatbot.py
@@ -19,7 +19,7 @@ def __init__(self):
     @st.cache_resource
     def setup_chain(_self):
         memory = ConversationBufferMemory()
-        chain = ConversationChain(llm=_self.llm, memory=memory, verbose=True)
+        chain = ConversationChain(llm=_self.llm, memory=memory, verbose=False)
         return chain
 
     @utils.enable_chat_history
@@ -36,6 +36,7 @@ def main(self):
                 )
                 response = result["response"]
                 st.session_state.messages.append({"role": "assistant", "content": response})
+                utils.print_qa(ContextChatbot, user_query, response)
 
 if __name__ == "__main__":
     obj = ContextChatbot()

diff --git a/pages/3_🌐_chatbot_with_internet_access.py b/pages/3_🌐_chatbot_with_internet_access.py
@@ -38,7 +38,7 @@ def setup_agent(_self):
         # Setup LLM and Agent
         memory = ConversationBufferMemory(memory_key="chat_history")
         agent = create_react_agent(_self.llm, tools, prompt)
-        agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=True)
+        agent_executor = AgentExecutor(agent=agent, tools=tools, memory=memory, verbose=False)
         return agent_executor, memory
 
     @utils.enable_chat_history
@@ -56,6 +56,8 @@ def main(self):
                 response = result["output"]
                 st.session_state.messages.append({"role": "assistant", "content": response})
                 st.write(response)
+                utils.print_qa(InternetChatbot, user_query, response)
+
 
 if __name__ == "__main__":
     obj = InternetChatbot()

diff --git a/pages/4_📄_chat_with_your_documents.py b/pages/4_📄_chat_with_your_documents.py
@@ -8,19 +8,19 @@
 from langchain_community.document_loaders import PyPDFLoader
 from langchain_community.vectorstores import DocArrayInMemorySearch
 from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
 
 
 st.set_page_config(page_title="ChatPDF", page_icon="📄")
 st.header('Chat with your documents (Basic RAG)')
 st.write('Has access to custom documents and can respond to user queries by referring to the content within those documents')
 st.write('[![view source code ](https://img.shields.io/badge/view_source_code-gray?logo=github)](https://github.com/shashankdeshpande/langchain-chatbot/blob/master/pages/4_%F0%9F%93%84_chat_with_your_documents.py)')
 
-class CustomDataChatbot:
+class CustomDocChatbot:
 
     def __init__(self):
         utils.sync_st_session()
         self.llm = utils.configure_llm()
+        self.embedding_model = utils.configure_embedding_model()
 
     def save_file(self, file):
         folder = 'tmp'
@@ -41,16 +41,13 @@ def setup_qa_chain(self, uploaded_files):
             loader = PyPDFLoader(file_path)
             docs.extend(loader.load())
 
-        # Split documents
+        # Split documents and store in vector db
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000,
             chunk_overlap=200
         )
         splits = text_splitter.split_documents(docs)
-
-        # Create embeddings and store in vectordb
-        embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
-        vectordb = DocArrayInMemorySearch.from_documents(splits, embeddings)
+        vectordb = DocArrayInMemorySearch.from_documents(splits, self.embedding_model)
 
         # Define retriever
         retriever = vectordb.as_retriever(
@@ -71,7 +68,7 @@ def setup_qa_chain(self, uploaded_files):
             retriever=retriever,
             memory=memory,
             return_source_documents=True,
-            verbose=True
+            verbose=False
         )
         return qa_chain
 
@@ -99,6 +96,7 @@ def main(self):
                 )
                 response = result["answer"]
                 st.session_state.messages.append({"role": "assistant", "content": response})
+                utils.print_qa(CustomDocChatbot, user_query, response)
 
                 # to show references
                 for idx, doc in enumerate(result['source_documents'],1):
@@ -109,5 +107,5 @@ def main(self):
                         st.caption(doc.page_content)
 
 if __name__ == "__main__":
-    obj = CustomDataChatbot()
+    obj = CustomDocChatbot()
     obj.main()
diff --git a/pages/5_🛢_chat_with_sql_db.py b/pages/5_🛢_chat_with_sql_db.py
@@ -37,7 +37,7 @@ def setup_sql_agent(_self, db):
             llm=_self.llm,
             db=db,
             top_k=10,
-            verbose=True,
+            verbose=False,
             agent_type="openai-tools",
             handle_parsing_errors=True,
             handle_sql_errors=True
@@ -86,6 +86,8 @@ def main(self):
                 response = result["output"]
                 st.session_state.messages.append({"role": "assistant", "content": response})
                 st.write(response)
+                utils.print_qa(SqlChatbot, user_query, response)
+
 
 if __name__ == "__main__":
     obj = SqlChatbot()

diff --git a/pages/6_🔗_chat_with_website.py b/pages/6_🔗_chat_with_website.py
@@ -12,7 +12,6 @@
 from langchain_core.documents.base import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.vectorstores import DocArrayInMemorySearch
-from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
 
 st.set_page_config(page_title="ChatWebsite", page_icon="🔗")
 st.header('Chat with Website')
@@ -24,6 +23,7 @@ class ChatbotWeb:
     def __init__(self):
         utils.sync_st_session()
         self.llm = utils.configure_llm()
+        self.embedding_model = utils.configure_embedding_model()
 
     def scrape_website(self, url):
         content = ""
@@ -39,7 +39,7 @@ def scrape_website(self, url):
             traceback.print_exc()
         return content
 
-    # @st.cache_resource(show_spinner='Analyzing webpage', ttl=3600)
+    @st.cache_resource(show_spinner='Analyzing webpage', ttl=3600)
     def setup_vectordb(_self, websites):
         # Scrape and load documents
         docs = []
@@ -50,16 +50,13 @@ def setup_vectordb(_self, websites):
                 )
             )
 
-        # Split documents
+        # Split documents and store in vector db
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=1000,
             chunk_overlap=200
         )
         splits = text_splitter.split_documents(docs)
-
-        # Create embeddings and store in vectordb
-        embeddings = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
-        vectordb = DocArrayInMemorySearch.from_documents(splits, embeddings)
+        vectordb = DocArrayInMemorySearch.from_documents(splits, _self.embedding_model)
         return vectordb
 
     def setup_qa_chain(self, vectordb):
@@ -83,7 +80,7 @@ def setup_qa_chain(self, vectordb):
             retriever=retriever,
             memory=memory,
             return_source_documents=True,
-            verbose=True
+            verbose=False
         )
         return qa_chain
 
@@ -132,13 +129,15 @@ def main(self):
                     )
                     response = result["answer"]
                     st.session_state.messages.append({"role": "assistant", "content": response})
+                    utils.print_qa(ChatbotWeb, user_query, response)
 
                     # to show references
                     for idx, doc in enumerate(result['source_documents'],1):
                         url = os.path.basename(doc.metadata['source'])
                         ref_title = f":blue[Reference {idx}: *{url}*]"
                         with st.popover(ref_title):
                             st.caption(doc.page_content)
+
 
 if __name__ == "__main__":
     obj = ChatbotWeb()

diff --git a/utils.py b/utils.py
@@ -4,6 +4,7 @@
 from datetime import datetime
 from langchain_openai import ChatOpenAI
 from langchain_community.chat_models import ChatOllama
+from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
 
 #decorator
 def enable_chat_history(func):
@@ -91,6 +92,15 @@ def configure_llm():
         llm = ChatOpenAI(model_name=model, temperature=0, streaming=True, api_key=openai_api_key)
     return llm
 
+def print_qa(cls, question, answer):
+    log_str = "Usecase: {}\nQuestion: {}\nAnswer: {}\n" + "------"*10
+    print(log_str.format(cls.__name__, question, answer))
+
+@st.cache_resource
+def configure_embedding_model():
+    embedding_model = FastEmbedEmbeddings(model_name="BAAI/bge-small-en-v1.5")
+    return embedding_model
+
 def sync_st_session():
     for k, v in st.session_state.items():
         st.session_state[k] = v