From 378828a2a7964b8716c9f04734043ddb421686e1 Mon Sep 17 00:00:00 2001 From: danilyef Date: Tue, 3 Sep 2024 08:29:27 +0200 Subject: [PATCH] main.py changed --- homework_3/pr7/main.py | 56 ------------------------------------------ 1 file changed, 56 deletions(-) diff --git a/homework_3/pr7/main.py b/homework_3/pr7/main.py index 962519b..c3a8b0c 100644 --- a/homework_3/pr7/main.py +++ b/homework_3/pr7/main.py @@ -1,15 +1,3 @@ -''' -Before starting the script, create a virtual environment: - -1. cd /path/to/your/project -2. python -m venv env -3. source env/bin/activate -5. pip install -r requirements.txt - -After these steps start script from cmd: -5. python main.py -''' - from datasets import load_dataset import lancedb @@ -20,48 +8,6 @@ from sentence_transformers import SentenceTransformer import typer -""" -# Load the Rick and Morty Transcript dataset -dataset = load_dataset("Prarabdha/Rick_and_Morty_Transcript") - -# Remove columns -dataset = dataset.remove_columns(['Unnamed: 0', 'episode no.']) - -# Dataset to pandas -dataset = dataset['train'].to_pandas() - - -# Connect to the LanceDB database -db = lancedb.connect("/tmp/db") - -# Initialize the sentence transformer model for embedding -model = get_registry().get("sentence-transformers").create(name="BAAI/bge-small-en-v1.5", device="cpu") - - -# This class defines the schema for storing quotes in LanceDB -class Quotes(LanceModel): - # Field for the speaker's name - speaker: str - # Field for the dialogue text, which will be used as the source for embedding - dialouge: str = model.SourceField() - # Field for the vector embedding of the dialogue, with dimensions matching the model - vector: Vector(model.ndims()) = model.VectorField() - - -# Create a table in the LanceDB database with the Quotes schema -table = db.create_table("rick_and_morty", schema=Quotes) - -# Add the dataset to the table -table.add(dataset) - -# Perform a semantic search query on the table -query = table.search("What is the purpose of existence?").limit(5).to_df() - -# Print the query results -print(query) -""" - -###################################### app = typer.Typer() @@ -121,5 +67,3 @@ def search_index(query: str, top_n: int = 2): app() -# python main.py create-index -# python main.py search-index "Who are you?" --top-n 2 \ No newline at end of file