diff --git a/.github/workflows/ci.yml b/.github/workflows/ci_homework_6.yml
similarity index 72%
rename from .github/workflows/ci.yml
rename to .github/workflows/ci_homework_6.yml
index 1c13d0b..7956582 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci_homework_6.yml
@@ -1,7 +1,7 @@
-name: ci
+name: ci-homework-6
on:
- push:
+ push:
pull_request:
jobs:
@@ -16,6 +16,6 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
- pip install -r main_requirements.txt
+ pip install -r homework_6/requirements.txt
- name: Run tests
- run: pytest
\ No newline at end of file
+ run: pytest homework_6 -v --tb=long
\ No newline at end of file
diff --git a/.github/workflows/ci_homework_9.yml b/.github/workflows/ci_homework_9.yml
new file mode 100644
index 0000000..22a4457
--- /dev/null
+++ b/.github/workflows/ci_homework_9.yml
@@ -0,0 +1,21 @@
+name: ci-homework-9
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r homework_9/requirements.txt
+ - name: Run tests
+ run: pytest homework_9 -v --tb=long
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..5730778
--- /dev/null
+++ b/README.md
@@ -0,0 +1,22 @@
+# How to start:
+
+#### create virtual environment in the root folder:
+```bash
+cd /path/to/your/root/folder
+python -m venv env
+```
+
+#### activate virtual environment:
+```bash
+source env/bin/activate
+```
+
+#### upgrade pip:
+```bash
+python -m pip install --upgrade pip
+```
+
+#### install requirements:
+```bash
+pip install -r main_requirements.txt
+```
diff --git a/homework_6/pr2/tests/data/__init__.py b/homework_6/pr3/__init__.py
similarity index 100%
rename from homework_6/pr2/tests/data/__init__.py
rename to homework_6/pr3/__init__.py
diff --git a/homework_6/pr3/report_model.html b/homework_6/pr3/report_model.html
deleted file mode 100644
index fd2844e..0000000
--- a/homework_6/pr3/report_model.html
+++ /dev/null
@@ -1,1091 +0,0 @@
-
-
-
-
- report.html
-
-
-
-
- report.html
- Report generated on 17-Sep-2024 at 08:29:45 by pytest-html
- v4.1.1
-
-
-
-
-
- |
- |
-
-
-
-
-
- No results found. Check the filters.
- |
-
-
-
-
-
-
-
-
-
-
-
-
Summary
-
-
-
10 tests took 00:00:07.
-
(Un)check the boxes to filter the results.
-
-
-
-
-
-
-
-
-
-
-
-
- Result |
- Test |
- Duration |
- Links |
-
-
-
-
-
-
\ No newline at end of file
diff --git a/homework_6/pr3/tests/model/test_model.py b/homework_6/pr3/tests/model/test_model.py
deleted file mode 100644
index bf2fe77..0000000
--- a/homework_6/pr3/tests/model/test_model.py
+++ /dev/null
@@ -1,61 +0,0 @@
-import pytest
-from project.model import TextClassifier
-
-
-@pytest.fixture
-def classifier():
- return TextClassifier()
-
-
-def test_predict(classifier):
- text = "This movie is great!"
-
- assert classifier.predict(text) in ["POSITIVE", "NEGATIVE"]
-
-
-def test_predict_proba(classifier):
- text = "The weather is nice today."
- probability = classifier.predict_proba(text)
-
- assert 0 <= probability <= 1
-
-
-def test_initialization():
- custom_model = "distilbert-base-uncased"
- classifier = TextClassifier(model_name=custom_model)
-
- assert classifier.tokenizer.name_or_path == custom_model
- assert classifier.model.name_or_path == custom_model
-
-
-# Behavior tests
-@pytest.mark.parametrize("text_1, text_2, expected_sentiment", [
- ("This movie is great!", "This movie is blody awesome!", "POSITIVE"),
- ("This movie is terrible!", "This movie is disappointing!", "NEGATIVE"),
- ("Movie delivers an exciting and refreshing take on its genre, featuring compelling characters, sharp dialogue, and a plot that keeps you hooked, all wrapped in stunning visuals and a dynamic soundtrack.",
- "Movie is is disgustingly good, with outrageously captivating performances and a ridiculously well-executed plot that grabs you from the start. The visuals are absurdly stunning, and the soundtrack is almost unfairly perfect, making it an insanely enjoyable watch from beginning to end.",
- "POSITIVE")
-])
-def test_invariance(classifier, text_1, text_2, expected_sentiment):
- assert classifier.predict(text_1) == expected_sentiment
- assert classifier.predict(text_1) == classifier.predict(text_2)
-
-
-@pytest.mark.parametrize("text_1, text_2", [
- ("Movie is a visually stunning and emotionally gripping film, with outstanding performances and a well-crafted story that keeps you engaged from start to finish.",
- "Movie is visually impressive but falls flat with a lackluster story and underwhelming performances, making it hard to stay engaged from start to finish."),
- ("Movie is an engaging and heartwarming film, with strong performances and a captivating story that draws you in, beautifully blending emotion, humor, and stunning visuals for a thoroughly enjoyable experience.",
- "Movie tries to be engaging, but weak performances and a disjointed story leave it feeling flat, lacking the emotional depth or humor needed to make it truly enjoyable.")
-])
-def test_directional(classifier, text_1, text_2):
- assert classifier.predict(text_1) == "POSITIVE"
- assert classifier.predict(text_2) == "NEGATIVE"
-
-
-@pytest.mark.parametrize("text, expected_sentiment", [
- ("This movie is great!", "POSITIVE"),
- ("I hate this movie!", "NEGATIVE")
-])
-def test_minimum_functionality(classifier, text, expected_sentiment):
- assert classifier.predict(text) == expected_sentiment
-
diff --git a/homework_6/pr4/requirements.txt b/homework_6/pr4/requirements.txt
deleted file mode 100644
index 231fd18..0000000
--- a/homework_6/pr4/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-wandb==0.18.1
-transformers==4.44.1
-
diff --git a/homework_6/requirements.txt b/homework_6/requirements.txt
new file mode 100644
index 0000000..a77d646
--- /dev/null
+++ b/homework_6/requirements.txt
@@ -0,0 +1,7 @@
+wandb==0.18.1
+pandas==2.2.1
+transformers==4.44.1
+pytest==8.3.0
+torch==2.2.1
+great-expectations==0.15.25
+pyarrow==17.0.0
diff --git a/homework_6/pr3/tests/__init__.py b/homework_6/tests/__init__.py
similarity index 100%
rename from homework_6/pr3/tests/__init__.py
rename to homework_6/tests/__init__.py
diff --git a/homework_6/pr3/tests/model/__init__.py b/homework_6/tests/data/__init__.py
similarity index 100%
rename from homework_6/pr3/tests/model/__init__.py
rename to homework_6/tests/data/__init__.py
diff --git a/homework_6/pr2/tests/data/test_data.py b/homework_6/tests/data/test_data.py
similarity index 100%
rename from homework_6/pr2/tests/data/test_data.py
rename to homework_6/tests/data/test_data.py
diff --git a/homework_9/pr1/README.md b/homework_9/pr1/README.md
index 9a7fc76..3f69e3f 100644
--- a/homework_9/pr1/README.md
+++ b/homework_9/pr1/README.md
@@ -6,5 +6,5 @@ streamlit run project/main.py
## Tests
```bash
-pytest tests/test.py
+pytest tests/test_model.py
```
\ No newline at end of file
diff --git a/homework_9/pr1/project/__init__.py b/homework_9/pr1/__init__.py
similarity index 100%
rename from homework_9/pr1/project/__init__.py
rename to homework_9/pr1/__init__.py
diff --git a/homework_9/pr1/tests/__init__.py b/homework_9/pr1/func_st/__init__.py
similarity index 100%
rename from homework_9/pr1/tests/__init__.py
rename to homework_9/pr1/func_st/__init__.py
diff --git a/homework_9/pr1/project/main.py b/homework_9/pr1/func_st/main.py
similarity index 70%
rename from homework_9/pr1/project/main.py
rename to homework_9/pr1/func_st/main.py
index 8bc302f..70f1387 100644
--- a/homework_9/pr1/project/main.py
+++ b/homework_9/pr1/func_st/main.py
@@ -1,8 +1,7 @@
import streamlit as st
from utils import Model
-
-model = Model(tokenizer_name="distilbert-base-uncased-finetuned-sst-2-english", model_name="distilbert-base-uncased-finetuned-sst-2-english")
+model = Model(model_name='distilbert-base-uncased-finetuned-sst-2-english')
# Create the Streamlit app title
st.title('Sentiment Analysis with DistilBERT')
@@ -17,11 +16,8 @@
if analyze_button and user_input:
# Get the prediction
label = model.predict(user_input)
- probabilities = model.predict_proba(user_input)
+ score = model.predict_proba(user_input)
- # Get probability score for predicted class
- score = probabilities[1] if label == "POSITIVE" else probabilities[0]
-
# Display results
st.write("### Results:")
label_color = "green" if label == "POSITIVE" else "red"
diff --git a/homework_9/pr1/func_st/utils.py b/homework_9/pr1/func_st/utils.py
new file mode 100644
index 0000000..7565235
--- /dev/null
+++ b/homework_9/pr1/func_st/utils.py
@@ -0,0 +1,26 @@
+from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
+import torch
+
+class Model:
+ def __init__(self, model_name="distilbert-base-uncased-finetuned-sst-2-english"):
+ self.tokenizer = DistilBertTokenizer.from_pretrained(model_name)
+ self.model = DistilBertForSequenceClassification.from_pretrained(model_name)
+ self.model.eval()
+
+ def predict(self, text):
+ inputs = self.tokenizer(
+ text, return_tensors="pt", truncation=True, padding=True
+ )
+ with torch.no_grad():
+ outputs = self.model(**inputs)
+ predicted_class_id = torch.argmax(outputs.logits, dim=1).item()
+ return self.model.config.id2label[predicted_class_id]
+
+ def predict_proba(self, text):
+ inputs = self.tokenizer(
+ text, return_tensors="pt", truncation=True, padding=True
+ )
+ with torch.no_grad():
+ outputs = self.model(**inputs)
+ probabilities = torch.softmax(outputs.logits, dim=1)
+ return probabilities.squeeze().max().item()
diff --git a/homework_9/pr1/project/utils.py b/homework_9/pr1/project/utils.py
deleted file mode 100644
index a64c512..0000000
--- a/homework_9/pr1/project/utils.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import torch
-
-class Model:
- def __init__(self,tokenizer_name,model_name):
- self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
- self.model = AutoModelForSequenceClassification.from_pretrained(model_name)
-
- def predict(self, text):
- inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
- with torch.no_grad():
- outputs = self.model(**inputs)
- prediction = torch.nn.functional.softmax(outputs.logits, dim=-1)
- label_id = prediction.argmax().item()
- return "POSITIVE" if label_id == 1 else "NEGATIVE"
-
- def predict_proba(self, text):
- inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
- with torch.no_grad():
- outputs = self.model(**inputs)
- probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1)
- return probabilities[0].tolist() # Return probabilities for all classes
\ No newline at end of file
diff --git a/homework_9/pr2/README.md b/homework_9/pr2/README.md
new file mode 100644
index 0000000..bd4d38c
--- /dev/null
+++ b/homework_9/pr2/README.md
@@ -0,0 +1,10 @@
+## Run
+```bash
+pip install -r requirements.txt
+python project/main.py
+```
+
+## Tests
+```bash
+pytest tests/test_model.py
+```
\ No newline at end of file
diff --git a/homework_9/pr2/__init__.py b/homework_9/pr2/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/homework_9/pr2/func_gradio/__init__.py b/homework_9/pr2/func_gradio/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/homework_9/pr2/func_gradio/main.py b/homework_9/pr2/func_gradio/main.py
new file mode 100644
index 0000000..1ef4334
--- /dev/null
+++ b/homework_9/pr2/func_gradio/main.py
@@ -0,0 +1,33 @@
+import gradio as gr
+from utils import Model
+
+
+model = Model(model_name="distilbert-base-uncased-finetuned-sst-2-english")
+
+def analyze_sentiment(text):
+ # Get the prediction
+ label = model.predict(text)
+ probability = model.predict_proba(text)
+
+ # Format probability score
+ score = f"{probability:.3f}"
+
+ # Add color to label
+ colored_label = f"{label}" if label == "POSITIVE" else f"{label}"
+
+ return colored_label, score
+
+# Create the Gradio interface
+demo = gr.Interface(
+ fn=analyze_sentiment,
+ inputs=gr.Textbox(label="Enter text to analyze:", value="I love this app!"),
+ outputs=[
+ gr.HTML(label="Sentiment"),
+ gr.Textbox(label="Confidence"),
+ ],
+ title="Sentiment Analysis with DistilBERT",
+ description="Analyze the sentiment of your text using DistilBERT"
+)
+
+if __name__ == "__main__":
+ demo.launch()
\ No newline at end of file
diff --git a/homework_9/pr2/func_gradio/utils.py b/homework_9/pr2/func_gradio/utils.py
new file mode 100644
index 0000000..7565235
--- /dev/null
+++ b/homework_9/pr2/func_gradio/utils.py
@@ -0,0 +1,26 @@
+from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
+import torch
+
+class Model:
+ def __init__(self, model_name="distilbert-base-uncased-finetuned-sst-2-english"):
+ self.tokenizer = DistilBertTokenizer.from_pretrained(model_name)
+ self.model = DistilBertForSequenceClassification.from_pretrained(model_name)
+ self.model.eval()
+
+ def predict(self, text):
+ inputs = self.tokenizer(
+ text, return_tensors="pt", truncation=True, padding=True
+ )
+ with torch.no_grad():
+ outputs = self.model(**inputs)
+ predicted_class_id = torch.argmax(outputs.logits, dim=1).item()
+ return self.model.config.id2label[predicted_class_id]
+
+ def predict_proba(self, text):
+ inputs = self.tokenizer(
+ text, return_tensors="pt", truncation=True, padding=True
+ )
+ with torch.no_grad():
+ outputs = self.model(**inputs)
+ probabilities = torch.softmax(outputs.logits, dim=1)
+ return probabilities.squeeze().max().item()
diff --git a/homework_9/pr1/requirements.txt b/homework_9/requirements.txt
similarity index 74%
rename from homework_9/pr1/requirements.txt
rename to homework_9/requirements.txt
index 1e24323..1a121ad 100644
--- a/homework_9/pr1/requirements.txt
+++ b/homework_9/requirements.txt
@@ -3,4 +3,5 @@ numpy==1.26.4
pandas==2.2.1
torch==2.2.1
streamlit==1.39.0
-pytest==8.3.0
\ No newline at end of file
+pytest==8.3.0
+gradio==4.44.1
\ No newline at end of file
diff --git a/homework_9/tests/__init__.py b/homework_9/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/homework_9/tests/gradio_func/__init__.py b/homework_9/tests/gradio_func/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/homework_9/pr1/tests/test.py b/homework_9/tests/gradio_func/test_gradio.py
similarity index 56%
rename from homework_9/pr1/tests/test.py
rename to homework_9/tests/gradio_func/test_gradio.py
index afbb3ae..3be5980 100644
--- a/homework_9/pr1/tests/test.py
+++ b/homework_9/tests/gradio_func/test_gradio.py
@@ -1,10 +1,9 @@
import pytest
-from project.utils import Model
+from pr2.func_gradio.utils import Model
@pytest.fixture
def model():
return Model(
- tokenizer_name="distilbert-base-uncased-finetuned-sst-2-english",
model_name="distilbert-base-uncased-finetuned-sst-2-english"
)
@@ -20,25 +19,21 @@ def test_model_prediction(model):
def test_model_probabilities(model):
# Test probability output format
text = "This is a test."
- probabilities = model.predict_proba(text)
+ probability = model.predict_proba(text)
- # Check if probabilities is a tuple/list of length 2
- assert len(probabilities) == 2
+ # Check if probability is a float
+ assert isinstance(probability, float)
- # Check if probabilities sum to approximately 1
- assert abs(sum(probabilities) - 1.0) < 1e-6
-
- # Check if probabilities are between 0 and 1
- assert all(0 <= p <= 1 for p in probabilities)
+ # Check if probability is between 0 and 1
+ assert 0 <= probability <= 1
def test_model_consistency(model):
# Test if multiple predictions on same text are consistent
text = "This is amazing!"
first_prediction = model.predict(text)
- first_probabilities = model.predict_proba(text)
+ first_probability = model.predict_proba(text)
# Test multiple times to ensure consistency
for _ in range(3):
assert model.predict(text) == first_prediction
- new_probabilities = model.predict_proba(text)
- assert all(abs(a - b) < 1e-6 for a, b in zip(first_probabilities, new_probabilities))
\ No newline at end of file
+ assert abs(model.predict_proba(text) - first_probability) < 1e-6
\ No newline at end of file
diff --git a/homework_9/tests/streamlit_func/__init__.py b/homework_9/tests/streamlit_func/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/homework_9/tests/streamlit_func/test_streamlit.py b/homework_9/tests/streamlit_func/test_streamlit.py
new file mode 100644
index 0000000..914de79
--- /dev/null
+++ b/homework_9/tests/streamlit_func/test_streamlit.py
@@ -0,0 +1,39 @@
+import pytest
+from pr1.func_st.utils import Model
+
+@pytest.fixture
+def model():
+ return Model(
+ model_name="distilbert-base-uncased-finetuned-sst-2-english"
+ )
+
+def test_model_prediction(model):
+ # Test positive sentiment
+ positive_text = "I love this movie!"
+ assert model.predict(positive_text) == "POSITIVE"
+
+ # Test negative sentiment
+ negative_text = "I hate this movie!"
+ assert model.predict(negative_text) == "NEGATIVE"
+
+def test_model_probabilities(model):
+ # Test probability output format
+ text = "This is a test."
+ probability = model.predict_proba(text)
+
+ # Check if probability is a float
+ assert isinstance(probability, float)
+
+ # Check if probability is between 0 and 1
+ assert 0 <= probability <= 1
+
+def test_model_consistency(model):
+ # Test if multiple predictions on same text are consistent
+ text = "This is amazing!"
+ first_prediction = model.predict(text)
+ first_probability = model.predict_proba(text)
+
+ # Test multiple times to ensure consistency
+ for _ in range(3):
+ assert model.predict(text) == first_prediction
+ assert abs(model.predict_proba(text) - first_probability) < 1e-6
\ No newline at end of file