diff --git a/.github/workflows/ci.yml b/.github/workflows/ci_homework_6.yml similarity index 72% rename from .github/workflows/ci.yml rename to .github/workflows/ci_homework_6.yml index 1c13d0b..7956582 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci_homework_6.yml @@ -1,7 +1,7 @@ -name: ci +name: ci-homework-6 on: - push: + push: pull_request: jobs: @@ -16,6 +16,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r main_requirements.txt + pip install -r homework_6/requirements.txt - name: Run tests - run: pytest \ No newline at end of file + run: pytest homework_6 -v --tb=long \ No newline at end of file diff --git a/.github/workflows/ci_homework_9.yml b/.github/workflows/ci_homework_9.yml new file mode 100644 index 0000000..22a4457 --- /dev/null +++ b/.github/workflows/ci_homework_9.yml @@ -0,0 +1,21 @@ +name: ci-homework-9 + +on: + push: + pull_request: + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r homework_9/requirements.txt + - name: Run tests + run: pytest homework_9 -v --tb=long \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..5730778 --- /dev/null +++ b/README.md @@ -0,0 +1,22 @@ +# How to start: + +#### create virtual environment in the root folder: +```bash +cd /path/to/your/root/folder +python -m venv env +``` + +#### activate virtual environment: +```bash +source env/bin/activate +``` + +#### upgrade pip: +```bash +python -m pip install --upgrade pip +``` + +#### install requirements: +```bash +pip install -r main_requirements.txt +``` diff --git a/homework_6/pr2/tests/data/__init__.py b/homework_6/pr3/__init__.py similarity index 100% rename from homework_6/pr2/tests/data/__init__.py rename to homework_6/pr3/__init__.py diff --git a/homework_6/pr3/report_model.html b/homework_6/pr3/report_model.html deleted file mode 100644 index fd2844e..0000000 --- a/homework_6/pr3/report_model.html +++ /dev/null @@ -1,1091 +0,0 @@ - - - - - report.html - - - - -

report.html

-

Report generated on 17-Sep-2024 at 08:29:45 by pytest-html - v4.1.1

-
-

Environment

-
-
- - - - - -
-
-

Summary

-
-
-

10 tests took 00:00:07.

-

(Un)check the boxes to filter the results.

-
- -
-
-
-
- - 0 Failed, - - 10 Passed, - - 0 Skipped, - - 0 Expected failures, - - 0 Unexpected passes, - - 0 Errors, - - 0 Reruns -
-
-  /  -
-
-
-
-
-
-
-
- - - - - - - - - -
ResultTestDurationLinks
- - - \ No newline at end of file diff --git a/homework_6/pr3/tests/model/test_model.py b/homework_6/pr3/tests/model/test_model.py deleted file mode 100644 index bf2fe77..0000000 --- a/homework_6/pr3/tests/model/test_model.py +++ /dev/null @@ -1,61 +0,0 @@ -import pytest -from project.model import TextClassifier - - -@pytest.fixture -def classifier(): - return TextClassifier() - - -def test_predict(classifier): - text = "This movie is great!" - - assert classifier.predict(text) in ["POSITIVE", "NEGATIVE"] - - -def test_predict_proba(classifier): - text = "The weather is nice today." - probability = classifier.predict_proba(text) - - assert 0 <= probability <= 1 - - -def test_initialization(): - custom_model = "distilbert-base-uncased" - classifier = TextClassifier(model_name=custom_model) - - assert classifier.tokenizer.name_or_path == custom_model - assert classifier.model.name_or_path == custom_model - - -# Behavior tests -@pytest.mark.parametrize("text_1, text_2, expected_sentiment", [ - ("This movie is great!", "This movie is blody awesome!", "POSITIVE"), - ("This movie is terrible!", "This movie is disappointing!", "NEGATIVE"), - ("Movie delivers an exciting and refreshing take on its genre, featuring compelling characters, sharp dialogue, and a plot that keeps you hooked, all wrapped in stunning visuals and a dynamic soundtrack.", - "Movie is is disgustingly good, with outrageously captivating performances and a ridiculously well-executed plot that grabs you from the start. The visuals are absurdly stunning, and the soundtrack is almost unfairly perfect, making it an insanely enjoyable watch from beginning to end.", - "POSITIVE") -]) -def test_invariance(classifier, text_1, text_2, expected_sentiment): - assert classifier.predict(text_1) == expected_sentiment - assert classifier.predict(text_1) == classifier.predict(text_2) - - -@pytest.mark.parametrize("text_1, text_2", [ - ("Movie is a visually stunning and emotionally gripping film, with outstanding performances and a well-crafted story that keeps you engaged from start to finish.", - "Movie is visually impressive but falls flat with a lackluster story and underwhelming performances, making it hard to stay engaged from start to finish."), - ("Movie is an engaging and heartwarming film, with strong performances and a captivating story that draws you in, beautifully blending emotion, humor, and stunning visuals for a thoroughly enjoyable experience.", - "Movie tries to be engaging, but weak performances and a disjointed story leave it feeling flat, lacking the emotional depth or humor needed to make it truly enjoyable.") -]) -def test_directional(classifier, text_1, text_2): - assert classifier.predict(text_1) == "POSITIVE" - assert classifier.predict(text_2) == "NEGATIVE" - - -@pytest.mark.parametrize("text, expected_sentiment", [ - ("This movie is great!", "POSITIVE"), - ("I hate this movie!", "NEGATIVE") -]) -def test_minimum_functionality(classifier, text, expected_sentiment): - assert classifier.predict(text) == expected_sentiment - diff --git a/homework_6/pr4/requirements.txt b/homework_6/pr4/requirements.txt deleted file mode 100644 index 231fd18..0000000 --- a/homework_6/pr4/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -wandb==0.18.1 -transformers==4.44.1 - diff --git a/homework_6/requirements.txt b/homework_6/requirements.txt new file mode 100644 index 0000000..a77d646 --- /dev/null +++ b/homework_6/requirements.txt @@ -0,0 +1,7 @@ +wandb==0.18.1 +pandas==2.2.1 +transformers==4.44.1 +pytest==8.3.0 +torch==2.2.1 +great-expectations==0.15.25 +pyarrow==17.0.0 diff --git a/homework_6/pr3/tests/__init__.py b/homework_6/tests/__init__.py similarity index 100% rename from homework_6/pr3/tests/__init__.py rename to homework_6/tests/__init__.py diff --git a/homework_6/pr3/tests/model/__init__.py b/homework_6/tests/data/__init__.py similarity index 100% rename from homework_6/pr3/tests/model/__init__.py rename to homework_6/tests/data/__init__.py diff --git a/homework_6/pr2/tests/data/test_data.py b/homework_6/tests/data/test_data.py similarity index 100% rename from homework_6/pr2/tests/data/test_data.py rename to homework_6/tests/data/test_data.py diff --git a/homework_9/pr1/README.md b/homework_9/pr1/README.md index 9a7fc76..3f69e3f 100644 --- a/homework_9/pr1/README.md +++ b/homework_9/pr1/README.md @@ -6,5 +6,5 @@ streamlit run project/main.py ## Tests ```bash -pytest tests/test.py +pytest tests/test_model.py ``` \ No newline at end of file diff --git a/homework_9/pr1/project/__init__.py b/homework_9/pr1/__init__.py similarity index 100% rename from homework_9/pr1/project/__init__.py rename to homework_9/pr1/__init__.py diff --git a/homework_9/pr1/tests/__init__.py b/homework_9/pr1/func_st/__init__.py similarity index 100% rename from homework_9/pr1/tests/__init__.py rename to homework_9/pr1/func_st/__init__.py diff --git a/homework_9/pr1/project/main.py b/homework_9/pr1/func_st/main.py similarity index 70% rename from homework_9/pr1/project/main.py rename to homework_9/pr1/func_st/main.py index 8bc302f..70f1387 100644 --- a/homework_9/pr1/project/main.py +++ b/homework_9/pr1/func_st/main.py @@ -1,8 +1,7 @@ import streamlit as st from utils import Model - -model = Model(tokenizer_name="distilbert-base-uncased-finetuned-sst-2-english", model_name="distilbert-base-uncased-finetuned-sst-2-english") +model = Model(model_name='distilbert-base-uncased-finetuned-sst-2-english') # Create the Streamlit app title st.title('Sentiment Analysis with DistilBERT') @@ -17,11 +16,8 @@ if analyze_button and user_input: # Get the prediction label = model.predict(user_input) - probabilities = model.predict_proba(user_input) + score = model.predict_proba(user_input) - # Get probability score for predicted class - score = probabilities[1] if label == "POSITIVE" else probabilities[0] - # Display results st.write("### Results:") label_color = "green" if label == "POSITIVE" else "red" diff --git a/homework_9/pr1/func_st/utils.py b/homework_9/pr1/func_st/utils.py new file mode 100644 index 0000000..7565235 --- /dev/null +++ b/homework_9/pr1/func_st/utils.py @@ -0,0 +1,26 @@ +from transformers import DistilBertTokenizer, DistilBertForSequenceClassification +import torch + +class Model: + def __init__(self, model_name="distilbert-base-uncased-finetuned-sst-2-english"): + self.tokenizer = DistilBertTokenizer.from_pretrained(model_name) + self.model = DistilBertForSequenceClassification.from_pretrained(model_name) + self.model.eval() + + def predict(self, text): + inputs = self.tokenizer( + text, return_tensors="pt", truncation=True, padding=True + ) + with torch.no_grad(): + outputs = self.model(**inputs) + predicted_class_id = torch.argmax(outputs.logits, dim=1).item() + return self.model.config.id2label[predicted_class_id] + + def predict_proba(self, text): + inputs = self.tokenizer( + text, return_tensors="pt", truncation=True, padding=True + ) + with torch.no_grad(): + outputs = self.model(**inputs) + probabilities = torch.softmax(outputs.logits, dim=1) + return probabilities.squeeze().max().item() diff --git a/homework_9/pr1/project/utils.py b/homework_9/pr1/project/utils.py deleted file mode 100644 index a64c512..0000000 --- a/homework_9/pr1/project/utils.py +++ /dev/null @@ -1,22 +0,0 @@ -from transformers import AutoTokenizer, AutoModelForSequenceClassification -import torch - -class Model: - def __init__(self,tokenizer_name,model_name): - self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) - self.model = AutoModelForSequenceClassification.from_pretrained(model_name) - - def predict(self, text): - inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512) - with torch.no_grad(): - outputs = self.model(**inputs) - prediction = torch.nn.functional.softmax(outputs.logits, dim=-1) - label_id = prediction.argmax().item() - return "POSITIVE" if label_id == 1 else "NEGATIVE" - - def predict_proba(self, text): - inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512) - with torch.no_grad(): - outputs = self.model(**inputs) - probabilities = torch.nn.functional.softmax(outputs.logits, dim=-1) - return probabilities[0].tolist() # Return probabilities for all classes \ No newline at end of file diff --git a/homework_9/pr2/README.md b/homework_9/pr2/README.md new file mode 100644 index 0000000..bd4d38c --- /dev/null +++ b/homework_9/pr2/README.md @@ -0,0 +1,10 @@ +## Run +```bash +pip install -r requirements.txt +python project/main.py +``` + +## Tests +```bash +pytest tests/test_model.py +``` \ No newline at end of file diff --git a/homework_9/pr2/__init__.py b/homework_9/pr2/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/homework_9/pr2/func_gradio/__init__.py b/homework_9/pr2/func_gradio/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/homework_9/pr2/func_gradio/main.py b/homework_9/pr2/func_gradio/main.py new file mode 100644 index 0000000..1ef4334 --- /dev/null +++ b/homework_9/pr2/func_gradio/main.py @@ -0,0 +1,33 @@ +import gradio as gr +from utils import Model + + +model = Model(model_name="distilbert-base-uncased-finetuned-sst-2-english") + +def analyze_sentiment(text): + # Get the prediction + label = model.predict(text) + probability = model.predict_proba(text) + + # Format probability score + score = f"{probability:.3f}" + + # Add color to label + colored_label = f"{label}" if label == "POSITIVE" else f"{label}" + + return colored_label, score + +# Create the Gradio interface +demo = gr.Interface( + fn=analyze_sentiment, + inputs=gr.Textbox(label="Enter text to analyze:", value="I love this app!"), + outputs=[ + gr.HTML(label="Sentiment"), + gr.Textbox(label="Confidence"), + ], + title="Sentiment Analysis with DistilBERT", + description="Analyze the sentiment of your text using DistilBERT" +) + +if __name__ == "__main__": + demo.launch() \ No newline at end of file diff --git a/homework_9/pr2/func_gradio/utils.py b/homework_9/pr2/func_gradio/utils.py new file mode 100644 index 0000000..7565235 --- /dev/null +++ b/homework_9/pr2/func_gradio/utils.py @@ -0,0 +1,26 @@ +from transformers import DistilBertTokenizer, DistilBertForSequenceClassification +import torch + +class Model: + def __init__(self, model_name="distilbert-base-uncased-finetuned-sst-2-english"): + self.tokenizer = DistilBertTokenizer.from_pretrained(model_name) + self.model = DistilBertForSequenceClassification.from_pretrained(model_name) + self.model.eval() + + def predict(self, text): + inputs = self.tokenizer( + text, return_tensors="pt", truncation=True, padding=True + ) + with torch.no_grad(): + outputs = self.model(**inputs) + predicted_class_id = torch.argmax(outputs.logits, dim=1).item() + return self.model.config.id2label[predicted_class_id] + + def predict_proba(self, text): + inputs = self.tokenizer( + text, return_tensors="pt", truncation=True, padding=True + ) + with torch.no_grad(): + outputs = self.model(**inputs) + probabilities = torch.softmax(outputs.logits, dim=1) + return probabilities.squeeze().max().item() diff --git a/homework_9/pr1/requirements.txt b/homework_9/requirements.txt similarity index 74% rename from homework_9/pr1/requirements.txt rename to homework_9/requirements.txt index 1e24323..1a121ad 100644 --- a/homework_9/pr1/requirements.txt +++ b/homework_9/requirements.txt @@ -3,4 +3,5 @@ numpy==1.26.4 pandas==2.2.1 torch==2.2.1 streamlit==1.39.0 -pytest==8.3.0 \ No newline at end of file +pytest==8.3.0 +gradio==4.44.1 \ No newline at end of file diff --git a/homework_9/tests/__init__.py b/homework_9/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/homework_9/tests/gradio_func/__init__.py b/homework_9/tests/gradio_func/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/homework_9/pr1/tests/test.py b/homework_9/tests/gradio_func/test_gradio.py similarity index 56% rename from homework_9/pr1/tests/test.py rename to homework_9/tests/gradio_func/test_gradio.py index afbb3ae..3be5980 100644 --- a/homework_9/pr1/tests/test.py +++ b/homework_9/tests/gradio_func/test_gradio.py @@ -1,10 +1,9 @@ import pytest -from project.utils import Model +from pr2.func_gradio.utils import Model @pytest.fixture def model(): return Model( - tokenizer_name="distilbert-base-uncased-finetuned-sst-2-english", model_name="distilbert-base-uncased-finetuned-sst-2-english" ) @@ -20,25 +19,21 @@ def test_model_prediction(model): def test_model_probabilities(model): # Test probability output format text = "This is a test." - probabilities = model.predict_proba(text) + probability = model.predict_proba(text) - # Check if probabilities is a tuple/list of length 2 - assert len(probabilities) == 2 + # Check if probability is a float + assert isinstance(probability, float) - # Check if probabilities sum to approximately 1 - assert abs(sum(probabilities) - 1.0) < 1e-6 - - # Check if probabilities are between 0 and 1 - assert all(0 <= p <= 1 for p in probabilities) + # Check if probability is between 0 and 1 + assert 0 <= probability <= 1 def test_model_consistency(model): # Test if multiple predictions on same text are consistent text = "This is amazing!" first_prediction = model.predict(text) - first_probabilities = model.predict_proba(text) + first_probability = model.predict_proba(text) # Test multiple times to ensure consistency for _ in range(3): assert model.predict(text) == first_prediction - new_probabilities = model.predict_proba(text) - assert all(abs(a - b) < 1e-6 for a, b in zip(first_probabilities, new_probabilities)) \ No newline at end of file + assert abs(model.predict_proba(text) - first_probability) < 1e-6 \ No newline at end of file diff --git a/homework_9/tests/streamlit_func/__init__.py b/homework_9/tests/streamlit_func/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/homework_9/tests/streamlit_func/test_streamlit.py b/homework_9/tests/streamlit_func/test_streamlit.py new file mode 100644 index 0000000..914de79 --- /dev/null +++ b/homework_9/tests/streamlit_func/test_streamlit.py @@ -0,0 +1,39 @@ +import pytest +from pr1.func_st.utils import Model + +@pytest.fixture +def model(): + return Model( + model_name="distilbert-base-uncased-finetuned-sst-2-english" + ) + +def test_model_prediction(model): + # Test positive sentiment + positive_text = "I love this movie!" + assert model.predict(positive_text) == "POSITIVE" + + # Test negative sentiment + negative_text = "I hate this movie!" + assert model.predict(negative_text) == "NEGATIVE" + +def test_model_probabilities(model): + # Test probability output format + text = "This is a test." + probability = model.predict_proba(text) + + # Check if probability is a float + assert isinstance(probability, float) + + # Check if probability is between 0 and 1 + assert 0 <= probability <= 1 + +def test_model_consistency(model): + # Test if multiple predictions on same text are consistent + text = "This is amazing!" + first_prediction = model.predict(text) + first_probability = model.predict_proba(text) + + # Test multiple times to ensure consistency + for _ in range(3): + assert model.predict(text) == first_prediction + assert abs(model.predict_proba(text) - first_probability) < 1e-6 \ No newline at end of file