-
Notifications
You must be signed in to change notification settings - Fork 38
/
Copy pathchat.py
74 lines (61 loc) · 2.93 KB
/
chat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain import OpenAI, VectorDBQA
from langchain.document_loaders import DirectoryLoader
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
import os
import nltk
import config
import logging
# Initialize logging with the specified configuration
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
handlers=[
logging.FileHandler(config.LOGS_FILE),
logging.StreamHandler(),
],
)
LOGGER = logging.getLogger(__name__)
# Load documents from the specified directory using a DirectoryLoader object
loader = DirectoryLoader(config.FILE_DIR, glob='*.pdf')
documents = loader.load()
# split the text to chuncks of of size 1000
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# Split the documents into chunks of size 1000 using a CharacterTextSplitter object
texts = text_splitter.split_documents(documents)
# Create a vector store from the chunks using an OpenAIEmbeddings object and a Chroma object
embeddings = OpenAIEmbeddings(openai_api_key=config.OPENAI_API_KEY)
docsearch = Chroma.from_documents(texts, embeddings)
# Define answer generation function
def answer(prompt: str, persist_directory: str = config.PERSIST_DIR) -> str:
# Log a message indicating that the function has started
LOGGER.info(f"Start answering based on prompt: {prompt}.")
# Create a prompt template using a template from the config module and input variables
# representing the context and question.
prompt_template = PromptTemplate(template=config.prompt_template, input_variables=["context", "question"])
# Load a QA chain using an OpenAI object, a chain type, and a prompt template.
doc_chain = load_qa_chain(
llm=OpenAI(
openai_api_key = config.OPENAI_API_KEY,
model_name="text-davinci-003",
temperature=0,
max_tokens=300,
),
chain_type="stuff",
prompt=prompt_template,
)
# Log a message indicating the number of chunks to be considered when answering the user's query.
LOGGER.info(f"The top {config.k} chunks are considered to answer the user's query.")
# Create a VectorDBQA object using a vector store, a QA chain, and a number of chunks to consider.
qa = VectorDBQA(vectorstore=docsearch, combine_documents_chain=doc_chain, k=config.k)
# Call the VectorDBQA object to generate an answer to the prompt.
result = qa({"query": prompt})
answer = result["result"]
# Log a message indicating the answer that was generated
LOGGER.info(f"The returned answer is: {answer}")
# Log a message indicating that the function has finished and return the answer.
LOGGER.info(f"Answering module over.")
return answer