From 5ec3b90c7e0c810568a9a069da779ea045e9da88 Mon Sep 17 00:00:00 2001 From: DJ Papzin Date: Tue, 15 Aug 2023 14:27:37 +0200 Subject: [PATCH] Initial commit: Starting the LangChain & Vector Databases in Production course with Activeloop --- 01_basic.py | 13 ++++ 02_chain.py | 19 ++++++ 03_track_usage.py | 9 +++ 04_few_short.py | 58 ++++++++++++++++++ 05_question_answering.py | 31 ++++++++++ 06_text_summarization.py | 14 +++++ 07_movies_finder.py | 21 +++++++ 08_chat_models.py | 14 +++++ 09_news_summarizer.py | 64 ++++++++++++++++++++ 10_gpt4all.py | 18 ++++++ 11_chain_prompting.py | 40 ++++++++++++ 12_news_summarizer_extended copy.py | 79 ++++++++++++++++++++++++ 13_news_summarizer_ output_parsers.py | 58 ++++++++++++++++++ 14. indexes_retrievers.py | 87 +++++++++++++++++++++++++++ my_file.txt | 13 ++++ 15 files changed, 538 insertions(+) create mode 100644 01_basic.py create mode 100644 02_chain.py create mode 100644 03_track_usage.py create mode 100644 04_few_short.py create mode 100644 05_question_answering.py create mode 100644 06_text_summarization.py create mode 100644 07_movies_finder.py create mode 100644 08_chat_models.py create mode 100644 09_news_summarizer.py create mode 100644 10_gpt4all.py create mode 100644 11_chain_prompting.py create mode 100644 12_news_summarizer_extended copy.py create mode 100644 13_news_summarizer_ output_parsers.py create mode 100644 14. indexes_retrievers.py create mode 100644 my_file.txt diff --git a/01_basic.py b/01_basic.py new file mode 100644 index 0000000..d97fd38 --- /dev/null +++ b/01_basic.py @@ -0,0 +1,13 @@ +from langchain.llms import OpenAI +from dotenv import load_dotenv + +load_dotenv() + +# Call the LLM +llm = OpenAI(model="text-davinci-003", temperature=0.9) + +# The Prompt +prompt = "Suggest a personalized workout routine for someone looking to improve cardiovascular endurance and prefers outdoor activities." + +# pass the prompt to the LLM +print(llm(prompt)) \ No newline at end of file diff --git a/02_chain.py b/02_chain.py new file mode 100644 index 0000000..a104d85 --- /dev/null +++ b/02_chain.py @@ -0,0 +1,19 @@ +from langchain.prompts import PromptTemplate +from langchain.llms import OpenAI +from langchain.chains import LLMChain +from dotenv import load_dotenv + +load_dotenv() + +llm = OpenAI(model="text-davinci-003", temperature=0.9) +prompt = PromptTemplate( + input_variables=["product"], + template="What is a good name for a company that makes {product}?", +) + +chain = LLMChain(llm=llm, prompt=prompt, verbose=True) + +product = chain.run(input("Enter your product name: ")) + +# Run the chain only specifying the input variable. +print(product) \ No newline at end of file diff --git a/03_track_usage.py b/03_track_usage.py new file mode 100644 index 0000000..6a859df --- /dev/null +++ b/03_track_usage.py @@ -0,0 +1,9 @@ +from langchain.llms import OpenAI +from langchain.callbacks import get_openai_callback + +llm = OpenAI(model_name="text-davinci-003", n=2, best_of=2) + +with get_openai_callback() as cb: + result = llm("Tell me a joke") + cost = cb.total_cost + print("$",round(cost, 5)) \ No newline at end of file diff --git a/04_few_short.py b/04_few_short.py new file mode 100644 index 0000000..a59110f --- /dev/null +++ b/04_few_short.py @@ -0,0 +1,58 @@ +from langchain import PromptTemplate +from langchain import FewShotPromptTemplate +from langchain.chat_models import ChatOpenAI +from langchain import LLMChain + +# create our examples dictionery +examples = [ + { + "query": "What's the weather like?", + "answer": "It's raining cats and dogs, better bring an umbrella!" + }, { + "query": "How old are you?", + "answer": "Age is just a number, but I'm timeless." + } +] + +# create an example template +example_template = """ +User: {query} +AI: {answer} +""" + +# create a prompt example from above template +example_prompt = PromptTemplate( + input_variables=["query", "answer"], + template=example_template +) + +# now break our previous prompt into a prefix and suffix +# the prefix is our instructions +prefix = """The following are excerpts from conversations with an AI +assistant. The assistant is known for its humor and wit, providing +entertaining and amusing responses to users' questions. Here are some +examples: +""" +# and the suffix our user input and output indicator +suffix = """ +User: {query} +AI: """ + +# now create the fe w-shot prompt template +few_shot_prompt_template = FewShotPromptTemplate( + examples=examples, + example_prompt=example_prompt, + prefix=prefix, + suffix=suffix, + input_variables=["query"], + example_separator="\n\n" +) + +# load the model +chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.9) + +chain = LLMChain(llm=chat, prompt=few_shot_prompt_template, verbose=True) + +ask_question = chain.run(input("Ask your question: ")) + +print(ask_question) \ No newline at end of file diff --git a/05_question_answering.py b/05_question_answering.py new file mode 100644 index 0000000..e3b03f0 --- /dev/null +++ b/05_question_answering.py @@ -0,0 +1,31 @@ +from langchain import PromptTemplate +from langchain import HuggingFaceHub, LLMChain +from dotenv import load_dotenv + +load_dotenv() + +template = """Question: {question} + +Answer: """ +prompt = PromptTemplate( + template=template, + input_variables=['question'] + ) + +# user question +question =input("") + +# initialize Hub LLM +hub_llm = HuggingFaceHub( + repo_id='google/flan-t5-large', + model_kwargs={'temperature':0} +) + +# create prompt template > LLM chain +llm_chain = LLMChain( + prompt=prompt, + llm=hub_llm +) + +# ask the user question about the capital of France +print(llm_chain.run(question)) \ No newline at end of file diff --git a/06_text_summarization.py b/06_text_summarization.py new file mode 100644 index 0000000..39b2a7f --- /dev/null +++ b/06_text_summarization.py @@ -0,0 +1,14 @@ +from langchain.chat_models import ChatOpenAI +from langchain.chains import LLMChain +from langchain.prompts import PromptTemplate + +llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) + +summarization_template = "Summarize the following text to one sentence: {text}" +summarization_prompt = PromptTemplate(input_variables=["text"], template=summarization_template) +summarization_chain = LLMChain(llm=llm, prompt=summarization_prompt, verbose=True) + +text = "LangChain provides many modules that can be used to build language model applications. Modules can be combined to create more complex applications, or be used individually for simple applications. The most basic building block of LangChain is calling an LLM on some input. Let’s walk through a simple example of how to do this. For this purpose, let’s pretend we are building a service that generates a company name based on what the company makes." +summarized_text = summarization_chain.predict(text=text) + +print(summarized_text) \ No newline at end of file diff --git a/07_movies_finder.py b/07_movies_finder.py new file mode 100644 index 0000000..d0ee4a5 --- /dev/null +++ b/07_movies_finder.py @@ -0,0 +1,21 @@ +from langchain.chat_models import ChatOpenAI +from langchain.prompts.chat import ( + ChatPromptTemplate, + SystemMessagePromptTemplate, + HumanMessagePromptTemplate, +) + +# Before executing the following code, make sure to have +# your OpenAI key saved in the “OPENAI_API_KEY” environment variable. +chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) + +template = "You are an assistant that helps users find information about movies." +system_message_prompt = SystemMessagePromptTemplate.from_template(template) +human_template = "Find information about the movie {movie_title}." +human_message_prompt = HumanMessagePromptTemplate.from_template(human_template) + +chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt]) + +response = chat(chat_prompt.format_prompt(movie_title="50 shades of grey").to_messages()) + +print(response.content) \ No newline at end of file diff --git a/08_chat_models.py b/08_chat_models.py new file mode 100644 index 0000000..6ef9c7a --- /dev/null +++ b/08_chat_models.py @@ -0,0 +1,14 @@ +from langchain.chat_models import ChatOpenAI +from langchain.schema import ( + HumanMessage, + SystemMessage +) + +chat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) + +messages = [ + SystemMessage(content="You are a helpful assistant that translates English to French."), + HumanMessage(content="Translate the following sentence: I love programming.") +] + +chat(messages) \ No newline at end of file diff --git a/09_news_summarizer.py b/09_news_summarizer.py new file mode 100644 index 0000000..5ff9557 --- /dev/null +++ b/09_news_summarizer.py @@ -0,0 +1,64 @@ +# Import necessary libraries +import json +from dotenv import load_dotenv +import requests +from newspaper import Article +from langchain.schema import HumanMessage +from langchain.chat_models import ChatOpenAI + +# Load environment variables +load_dotenv() + +# Set headers for requests +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36' +} + +# Specify the URL of the article to summarize +article_url = "https://www.artificialintelligence-news.com/2022/01/25/meta-claims-new-ai-supercomputer-will-set-records/" + +# Create a session +session = requests.Session() + +# Fetch the article +try: + response = session.get(article_url, headers=headers, timeout=10) + + if response.status_code == 200: + article = Article(article_url) + article.download() + article.parse() + + # print(f"Title: {article.title}") + # print(f"Text: {article.text}") + + else: + print(f"Failed to fetch article at {article_url}") +except Exception as e: + print(f"Error occurred while fetching article at {article_url}: {e}") + +# Load the model +chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0) + +# Prepare the prompt +template = """You are an advanced AI assistant that summarizes online articles into bulleted lists. + +Here's the article you need to summarize. + +================== +Title: {article_title} + +{article_text} +================== + +Now, provide a summarized version of the article in a bulleted list format. +""" + +# format prompt +prompt = template.format(article_title=article.title, article_text=article.text) + + +# Generate summary +messages = [HumanMessage(content=prompt)] +summary = chat(messages) +print(summary.content) diff --git a/10_gpt4all.py b/10_gpt4all.py new file mode 100644 index 0000000..280226b --- /dev/null +++ b/10_gpt4all.py @@ -0,0 +1,18 @@ +# Import modules +from langchain.llms import GPT4All +from langchain import PromptTemplate, LLMChain +# from langchain.callbacks.base import CallbackManager +from langchain.callbacks.manager import CallbackManager +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler + +template = """Question: {question} + +Answer: Let's think step by step.""" +prompt = PromptTemplate(template=template, input_variables=["question"]) + +callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) +llm = GPT4All(model="E:/Backup/Documents/Softwares/GPT4All/Models/ggml-wizardLM-7B.q4_2.bin", callback_manager=callback_manager, verbose=True) +llm_chain = LLMChain(prompt=prompt, llm=llm) + +question = "What happens when it rains somewhere?" +llm_chain.run(question) \ No newline at end of file diff --git a/11_chain_prompting.py b/11_chain_prompting.py new file mode 100644 index 0000000..209f604 --- /dev/null +++ b/11_chain_prompting.py @@ -0,0 +1,40 @@ +from langchain import PromptTemplate, LLMChain +from langchain.llms import OpenAI + +# Initialize LLM +llm = OpenAI(model_name="text-davinci-003", temperature=0) + +# Prompt 1 +template_question = """What is the name of the famous scientist who developed the theory of general relativity? +Answer: """ +prompt_question = PromptTemplate( + template=template_question, + input_variables=[]) + +# Prompt 2 +template_fact = """Provide a brief description of {scientist}'s theory of general relativity. +Answer: """ +prompt_fact = PromptTemplate( + input_variables=["scientist"], + template=template_fact) + +# Create the LLMChain for the first prompt +chain_question = LLMChain(llm=llm, prompt=prompt_question) + +# Run the LLMChain for the first prompt with an empty dictionary +response_question = chain_question.run({}) + +# Extract the scientist's name from the response +scientist = response_question.strip() + +# Create the LLMChain for the second prompt +chain_fact = LLMChain(llm=llm, prompt=prompt_fact) + +# Input data for the second prompt +input_data = {"scientist": scientist} + +# Run the LLMChain for the second prompt +response_fact = chain_fact.run(input_data) + +print("Scientist:", scientist) +print("Fact:", response_fact) diff --git a/12_news_summarizer_extended copy.py b/12_news_summarizer_extended copy.py new file mode 100644 index 0000000..dc7ea4f --- /dev/null +++ b/12_news_summarizer_extended copy.py @@ -0,0 +1,79 @@ +import os +import json +from dotenv import load_dotenv +import requests +from newspaper import Article +from langchain.schema import ( + HumanMessage +) +from langchain.chat_models import ChatOpenAI + +load_dotenv() + +headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36' +} + +article_url = "https://www.artificialintelligence-news.com/2022/01/25/meta-claims-new-ai-supercomputer-will-set-records/" + +session = requests.Session() + +try: + response = session.get(article_url, headers=headers, timeout=10) + + if response.status_code == 200: + article = Article(article_url) + article.download() + article.parse() + + # print(f"Title: {article.title}") + # print(f"Text: {article.text}") + else: + print(f"Failed to fetch article at {article_url}") +except Exception as e: + print(f"Error occurred while fetching article at {article_url}: {e}") + +# we get the article data from the scraping part +article_title = article.title +article_text = article.text + +# prepare template for prompt +template = """ +As an advanced AI, you've been tasked to summarize online articles into bulleted points. Here are a few examples of how you've done this in the past: + +Example 1: +Original Article: 'The Effects of Climate Change +Summary: +- Climate change is causing a rise in global temperatures. +- This leads to melting ice caps and rising sea levels. +- Resulting in more frequent and severe weather conditions. + +Example 2: +Original Article: 'The Evolution of Artificial Intelligence +Summary: +- Artificial Intelligence (AI) has developed significantly over the past decade. +- AI is now used in multiple fields such as healthcare, finance, and transportation. +- The future of AI is promising but requires careful regulation. + +Now, here's the article you need to summarize: + +================== +Title: {article_title} + +{article_text} +================== + +Please provide a summarized version of the article in a bulleted list format. +""" + +# Format the Prompt +prompt = template.format(article_title=article.title, article_text=article.text) + +messages = [HumanMessage(content=prompt)] + +# load the model +chat = ChatOpenAI(model_name="gpt-3.5-turbo-16k", temperature=0.0) + +# generate summary +summary = chat(messages) +print(summary.content) \ No newline at end of file diff --git a/13_news_summarizer_ output_parsers.py b/13_news_summarizer_ output_parsers.py new file mode 100644 index 0000000..88cbd69 --- /dev/null +++ b/13_news_summarizer_ output_parsers.py @@ -0,0 +1,58 @@ +from langchain.output_parsers import PydanticOutputParser +from pydantic import validator +from pydantic import BaseModel, Field +from typing import List +from langchain.prompts import PromptTemplate +from langchain.llms import OpenAI + + + +# create output parser class +class ArticleSummary(BaseModel): + title: str = Field(description="Title of the article") + summary: List[str] = Field(description="Bulleted list summary of the article") + + # validating whether the generated summary has at least three lines + @validator('summary', allow_reuse=True) + def has_three_or_more_lines(cls, list_of_lines): + if len(list_of_lines) < 3: + raise ValueError("Generated summary has less than three bullet points!") + return list_of_lines + +# set up output parser +parser = PydanticOutputParser(pydantic_object=ArticleSummary) + +# create prompt template +# notice that we are specifying the "partial_variables" parameter +template = """ +You are a very good assistant that summarizes online articles. + +Here's the article you want to summarize. + +================== +Title: {article_title} + +{article_text} +================== + +{format_instructions} +""" + +prompt = PromptTemplate( + template=template, + input_variables=["article_title", "article_text"], + partial_variables={"format_instructions": parser.get_format_instructions()} +) + +# Format the prompt using the article title and text obtained from scraping +formatted_prompt = prompt.format_prompt(article_title=article_title, article_text=article_text) + +# instantiate model class +model = OpenAI(model_name="text-davinci-003", temperature=0.0) + +# Use the model to generate a summary +output = model(formatted_prompt.to_string()) + +# Parse the output into the Pydantic model +parsed_output = parser.parse(output) +print(parsed_output) \ No newline at end of file diff --git a/14. indexes_retrievers.py b/14. indexes_retrievers.py new file mode 100644 index 0000000..13ddeab --- /dev/null +++ b/14. indexes_retrievers.py @@ -0,0 +1,87 @@ +from langchain.document_loaders import TextLoader +from langchain.text_splitter import CharacterTextSplitter +from langchain.embeddings import OpenAIEmbeddings +from langchain.vectorstores import DeepLake +from langchain.chains import RetrievalQA +from langchain.llms import OpenAI +from langchain.retrievers import ContextualCompressionRetriever +from langchain.retrievers.document_compressors import LLMChainExtractor +from dotenv import load_dotenv + +load_dotenv() + + +# text to write to a local file +# taken from https://www.theverge.com/2023/3/14/23639313/google-ai-language-model-palm-api-challenge-openai +text = """Google opens up its AI language model PaLM to challenge OpenAI and GPT-3 +Google is offering developers access to one of its most advanced AI language models: PaLM. +The search giant is launching an API for PaLM alongside a number of AI enterprise tools +it says will help businesses “generate text, images, code, videos, audio, and more from +simple natural language prompts.” + +PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or +Meta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs, +PaLM is a flexible system that can potentially carry out all sorts of text generation and +editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for +example, or you could use it for tasks like summarizing text or even writing code. +(It’s similar to features Google also announced today for its Workspace apps like Google +Docs and Gmail.) +""" + +# write text to local file +with open("my_file.txt", "w") as file: + file.write(text) + +# use TextLoader to load text from local file +loader = TextLoader("my_file.txt") +docs_from_file = loader.load() + +# create a text splitter +text_splitter = CharacterTextSplitter(chunk_size=400, chunk_overlap=20) + +# split documents into chunks +docs = text_splitter.split_documents(docs_from_file) + +# Before executing the following code, make sure to have +# your OpenAI key saved in the “OPENAI_API_KEY” environment variable. +embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") + +# create Deep Lake dataset +# TODO: use your organization id here. (by default, org id is your username) +my_activeloop_org_id = "langchain_course_deeplake" +my_activeloop_dataset_name = "langchain_course_indexers_retrievers" +dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}" +db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings) + +# add documents to our Deep Lake dataset +db.add_documents(docs) + +# create retriever from db +retriever = db.as_retriever() + +# create a retrieval chain +qa_chain = RetrievalQA.from_chain_type( + llm=OpenAI(model="text-davinci-003"), + chain_type="stuff", + retriever=retriever +) + +query = "How Google plans to challenge OpenAI?" +response = qa_chain.run(query) +print(response) + +# create GPT3 wrapper +llm = OpenAI(model="text-davinci-003", temperature=0) + +# create compressor for the retriever +compressor = LLMChainExtractor.from_llm(llm) +compression_retriever = ContextualCompressionRetriever( + base_compressor=compressor, + base_retriever=retriever +) + +# retrieving compressed documents +retrieved_docs = compression_retriever.get_relevant_documents( + "How Google plans to challenge OpenAI?" +) +print(retrieved_docs[0].page_content) \ No newline at end of file diff --git a/my_file.txt b/my_file.txt new file mode 100644 index 0000000..715d79a --- /dev/null +++ b/my_file.txt @@ -0,0 +1,13 @@ +Google opens up its AI language model PaLM to challenge OpenAI and GPT-3 +Google is offering developers access to one of its most advanced AI language models: PaLM. +The search giant is launching an API for PaLM alongside a number of AI enterprise tools +it says will help businesses generate text, images, code, videos, audio, and more from +simple natural language prompts. + +PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or +Metas LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs, +PaLM is a flexible system that can potentially carry out all sorts of text generation and +editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for +example, or you could use it for tasks like summarizing text or even writing code. +(Its similar to features Google also announced today for its Workspace apps like Google +Docs and Gmail.)