added langchain chatbots

felipauskas · Jul 8, 2023 · 5c33221 · 5c33221
1 parent a65da74
commit 5c33221
Show file tree

Hide file tree

Showing 8 changed files with 316 additions and 1 deletion.
diff --git a/Home.py b/Home.py
@@ -0,0 +1,22 @@
+import streamlit as st
+
+st.set_page_config(
+    page_title="Langchain Chatbot",
+    page_icon='💬',
+    layout='wide'
+)
+
+st.header("Chatbot Implementations with Langchain")
+st.write("""
+Langchain is a powerful framework designed to streamline the development of applications using Language Models (LLMs). It provides a comprehensive integration of various components, simplifying the process of assembling them to create robust applications.
+
+Leveraging the power of Langchain, the creation of chatbots becomes effortless. Here are a few examples of chatbot implementations catering to different use cases:
+
+- **Basic Chatbot**: Engage in interactive conversations with the LLM.
+- **Chatbot with Internet Access**: An internet-enabled chatbot capable of answering user queries about recent events.
+- **Chat with your documents** Empower the chatbot with the ability to access custom documents, enabling it to provide answers to user queries based on the referenced information.
+
+To explore sample usage of each chatbot, please navigate to the corresponding chatbot section.
+
+GitHub Repository: https://github.com/shashankdeshpande/langchain-chatbot
+""")
diff --git a/README.md b/README.md
@@ -1 +1,20 @@
-# llm-bot
+# Chatbot Implementations with Langchain + Streamlit
+
+[![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)](https://share.streamlit.io/shashankdeshpande/langchain-chatbot/Home.py)
+
+Langchain is a powerful framework designed to streamline the development of applications using Language Models (LLMs). It provides a comprehensive integration of various components, simplifying the process of assembling them to create robust applications.
+
+## Sample chatbots
+Here are a few examples of chatbot implementations using Langchain and Streamlit:
+-  **Basic Chatbot**: Engage in interactive conversations with the LLM.
+-  **Chatbot with Internet Access**: An internet-enabled chatbot capable of answering user queries about recent events.
+-  **Chat with your documents** Empower the chatbot with the ability to access custom documents, enabling it to provide answers to user queries based on the referenced information.
+
+## Website
+WIP
+
+## Running locally
+```shell
+# Run main streamlit app
+$ streamlit run Home.py
+```
diff --git a/pages/1_💬_basic_chatbot.py b/pages/1_💬_basic_chatbot.py
@@ -0,0 +1,41 @@
+import utils
+import streamlit as st
+from streaming import StreamHandler
+
+from langchain.llms import OpenAI
+from langchain.chains import ConversationChain
+
+st.set_page_config(page_title="Chatbot", page_icon="💬")
+st.header('Basic Chatbot')
+st.write('Allows users to interact with the LLM')
+with st.expander("Implementation details"):
+    st.markdown("""
+    - LLM - [OpenAI](https://python.langchain.com/docs/ecosystem/integrations/openai#llm)
+    - Chain - [ConversationChain](https://github.com/hwchase17/langchain/blob/1d649b127eb10c426f9b9a67cbd1fe6ec8e6befa/langchain/chains/conversation/base.py#L12)
+    """)
+
+class Basic:
+
+    def __init__(self):
+        utils.configure_openai_api_key()
+        self.openai_model = "gpt-3.5-turbo"
+
+    def setup_chain(self):
+        llm = OpenAI(model_name=self.openai_model, temperature=0, streaming=True)
+        chain = ConversationChain(llm=llm, verbose=True)
+        return chain
+
+    @utils.enable_chat_history
+    def main(self):
+        chain = self.setup_chain()
+        user_query = st.chat_input(placeholder="Ask me anything!")
+        if user_query:
+            utils.display_msg(user_query, 'user')
+            with st.chat_message("assistant"):
+                st_cb = StreamHandler(st.empty())
+                response = chain.run(user_query, callbacks=[st_cb])
+                st.session_state.messages.append({"role": "assistant", "content": response})
+
+if __name__ == "__main__":
+    obj = Basic()
+    obj.main()
diff --git a/pages/2_🌐_chatbot_with_internet_access.py b/pages/2_🌐_chatbot_with_internet_access.py
@@ -0,0 +1,62 @@
+import utils
+import streamlit as st
+
+from langchain.agents import AgentType
+from langchain.chat_models import ChatOpenAI
+from langchain.tools import DuckDuckGoSearchRun
+from langchain.agents import initialize_agent, Tool
+from langchain.callbacks import StreamlitCallbackHandler
+
+st.set_page_config(page_title="ChatWeb", page_icon="🌐")
+st.header('Chatbot with Internet Access')
+st.write('Equipped with internet access, enables users to ask questions about recent events')
+with st.expander("Implementation details"):
+    st.markdown("""
+    - LLM - [OpenAI](https://python.langchain.com/docs/ecosystem/integrations/openai#llm)
+    - Tools - [DuckDuckGoSearch](https://python.langchain.com/docs/modules/agents/tools/integrations/ddg)
+    - Agent - [ReAct](https://python.langchain.com/docs/modules/agents/agent_types/react)
+    """)
+
+class ChatbotTools:
+
+    def __init__(self):
+        utils.configure_openai_api_key()
+        self.openai_model = "gpt-3.5-turbo"
+
+    def setup_agent(self):
+        # Define tool
+        ddg_search = DuckDuckGoSearchRun()
+        tools = [
+            Tool(
+                name="DuckDuckGoSearch",
+                func=ddg_search.run,
+                description="Useful for when you need to answer questions about current events. You should ask targeted questions",
+            )
+        ]
+
+        # Setup LLM and Agent
+        llm = ChatOpenAI(model_name=self.openai_model, streaming=True)
+        agent = initialize_agent(
+            tools=tools,
+            llm=llm,
+            agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
+            handle_parsing_errors=True,
+            verbose=True
+        )
+        return agent
+
+    @utils.enable_chat_history
+    def main(self):
+        agent = self.setup_agent()
+        user_query = st.chat_input(placeholder="Ask me anything!")
+        if user_query:
+            utils.display_msg(user_query, 'user')
+            with st.chat_message("assistant"):
+                st_cb = StreamlitCallbackHandler(st.container())
+                response = agent.run(user_query, callbacks=[st_cb])
+                st.session_state.messages.append({"role": "assistant", "content": response})
+                st.write(response)
+
+if __name__ == "__main__":
+    obj = ChatbotTools()
+    obj.main()
diff --git a/pages/3_📄_chat_with_your_documents.py b/pages/3_📄_chat_with_your_documents.py
@@ -0,0 +1,105 @@
+import os
+import utils
+import streamlit as st
+from streaming import StreamHandler
+
+from langchain.chat_models import ChatOpenAI
+from langchain.document_loaders import PyPDFLoader
+from langchain.memory import ConversationBufferMemory
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.chains import ConversationalRetrievalChain
+from langchain.vectorstores import DocArrayInMemorySearch
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+st.set_page_config(page_title="ChatPDF", page_icon="📄")
+st.header('Chat with your documents')
+st.write('Has access to custom documents and can respond to user queries by referring to the content within those documents')
+with st.expander("Implementation details"):
+    st.markdown("""
+    - LLM - [OpenAI](https://python.langchain.com/docs/ecosystem/integrations/openai#llm)
+    - Document Loader - [PyPDFLoader](https://python.langchain.com/docs/modules/data_connection/document_loaders/how_to/pdf#using-pypdf)
+    - Document Splitter - [RecursiveCharacterTextSplitter](https://python.langchain.com/docs/modules/data_connection/document_transformers/text_splitters/recursive_text_splitter)
+    - Embeddings - [HuggingFaceEmbeddings](https://python.langchain.com/docs/modules/data_connection/text_embedding/integrations/huggingfacehub)
+    - Vector store - [DocArrayInMemorySearch](https://python.langchain.com/docs/modules/data_connection/vectorstores/integrations/docarray_in_memory)
+    - Document Retriever - [Vector store-backed retriever: Maximum Marginal Relevance](https://python.langchain.com/docs/modules/data_connection/retrievers/how_to/vectorstore#maximum-marginal-relevance-retrieval)
+    - Memory - [ConversationBufferMemory](https://python.langchain.com/docs/modules/memory/how_to/buffer)
+    - Chain - [ConversationalRetrievalChain](https://python.langchain.com/docs/modules/agents/agent_types/react)
+    """)
+
+class CustomDataChatbot:
+
+    def __init__(self):
+        utils.configure_openai_api_key()
+        self.openai_model = "gpt-3.5-turbo"
+
+    def save_file(self, file):
+        folder = 'tmp'
+        if not os.path.exists(folder):
+            os.makedirs(folder)
+
+        file_path = f'./{folder}/{file.name}'
+        with open(file_path, 'wb') as f:
+            f.write(file.getvalue())
+        return file_path
+
+    @st.spinner('Analyzing documents..')
+    def setup_qa_chain(self, uploaded_files):
+        # Load documents
+        docs = []
+        for file in uploaded_files:
+            file_path = self.save_file(file)
+            loader = PyPDFLoader(file_path)
+            docs.extend(loader.load())
+
+        # Split documents
+        text_splitter = RecursiveCharacterTextSplitter(
+            chunk_size=1500,
+            chunk_overlap=200
+        )
+        splits = text_splitter.split_documents(docs)
+
+        # Create embeddings and store in vectordb
+        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+        vectordb = DocArrayInMemorySearch.from_documents(splits, embeddings)
+
+        # Define retriever
+        retriever = vectordb.as_retriever(
+            search_type='mmr',
+            search_kwargs={'k':2, 'fetch_k':4}
+        )
+
+        # Setup memory for contextual conversation        
+        memory = ConversationBufferMemory(
+            memory_key='chat_history',
+            return_messages=True
+        )
+
+        # Setup LLM and QA chain
+        llm = ChatOpenAI(model_name=self.openai_model, temperature=0, streaming=True)
+        qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, memory=memory, verbose=True)
+        return qa_chain
+
+    @utils.enable_chat_history
+    def main(self):
+
+        # User Inputs
+        uploaded_files = st.sidebar.file_uploader(label='Upload PDF files', type=['pdf'], accept_multiple_files=True)
+        if not uploaded_files:
+            st.error("Please upload PDF documents to continue!")
+            st.stop()
+
+        user_query = st.chat_input(placeholder="Ask me anything!")
+
+        if uploaded_files and user_query:
+            qa_chain = self.setup_qa_chain(uploaded_files)
+
+            utils.display_msg(user_query, 'user')
+
+            with st.chat_message("assistant"):
+                st_cb = StreamHandler(st.empty())
+                response = qa_chain.run(user_query, callbacks=[st_cb])
+                st.session_state.messages.append({"role": "assistant", "content": response})
+
+if __name__ == "__main__":
+    obj = CustomDataChatbot()
+    obj.main()
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+langchain==0.0.228
+openai==0.27.8
+streamlit==1.24.0
diff --git a/streaming.py b/streaming.py
@@ -0,0 +1,11 @@
+from langchain.callbacks.base import BaseCallbackHandler
+
+class StreamHandler(BaseCallbackHandler):
+
+    def __init__(self, container, initial_text=""):
+        self.container = container
+        self.text = initial_text
+
+    def on_llm_new_token(self, token: str, **kwargs):
+        self.text += token
+        self.container.markdown(self.text)
diff --git a/utils.py b/utils.py
@@ -0,0 +1,52 @@
+import os
+import random
+import streamlit as st
+
+#decorator
+def enable_chat_history(func):
+    if os.environ.get("OPENAI_API_KEY"):
+
+        # to clear chat history after swtching chatbot
+        current_page = func.__qualname__
+        if "current_page" not in st.session_state:
+            st.session_state["current_page"] = current_page
+        if st.session_state["current_page"] != current_page:
+            try:
+                del st.session_state["messages"]
+            except:
+                pass
+
+        # to show chat history on ui
+        if "messages" not in st.session_state:
+            st.session_state["messages"] = [{"role": "assistant", "content": "How can I help you?"}]
+        for msg in st.session_state["messages"]:
+            st.chat_message(msg["role"]).write(msg["content"])
+
+    def execute(*args, **kwargs):
+        func(*args, **kwargs)
+    return execute
+
+def display_msg(msg, author):
+    """Method to display message on the UI
+
+    Args:
+        msg (str): message to display
+        author (str): author of the message -user/assistant
+    """
+    st.session_state.messages.append({"role": author, "content": msg})
+    st.chat_message(author).write(msg)
+
+def configure_openai_api_key():
+    openai_api_key = st.sidebar.text_input(
+        label="OpenAI API Key",
+        type="password",
+        value=os.environ.get("OPENAI_API_KEY",""),
+        placeholder="sk-..."
+        )
+    if openai_api_key:
+        os.environ["OPENAI_API_KEY"] = openai_api_key
+    else:
+        st.error("Please add your OpenAI API key to continue.")
+        st.info("Obtain your key from this link: https://platform.openai.com/account/api-keys")
+        st.stop()
+    return openai_api_key