-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #40 from danilyef/readme_branch
README.md 2
- Loading branch information
Showing
26 changed files
with
273 additions
and
128 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
import pytest | ||
from project.processing import Cleaner | ||
|
||
|
||
@pytest.fixture | ||
def cleaner(): | ||
return Cleaner() | ||
|
||
|
||
def test_lower_case(cleaner): | ||
assert cleaner.lower_case("HELLO WORLD") == "hello world" | ||
|
||
|
||
def test_tokenize_numbers(cleaner): | ||
assert ( | ||
cleaner.tokenize_numbers("There are 123 apples and 456 oranges") | ||
== "There are NUMBER apples and NUMBER oranges" | ||
) | ||
|
||
|
||
def test_remove_emails(cleaner): | ||
assert ( | ||
cleaner.remove_emails("Contact us at [email protected] or [email protected]") | ||
== "Contact us at or " | ||
) | ||
|
||
|
||
def test_remove_square_brackets(cleaner): | ||
assert cleaner.remove_square_brackets("This is [hidden] text") == "This is text" | ||
|
||
|
||
def test_remove_round_brackets(cleaner): | ||
assert ( | ||
cleaner.remove_round_brackets("This is (parenthetical) text") == "This is text" | ||
) | ||
|
||
|
||
def test_remove_urls(cleaner): | ||
text = "Visit https://www.example.com or www.test.org for more info" | ||
assert cleaner.remove_urls(text) == "Visit or for more info" | ||
|
||
|
||
def test_remove_whitespace(cleaner): | ||
assert cleaner.remove_whitespace("Too many spaces") == "Too many spaces" | ||
|
||
|
||
def test_clean(cleaner): | ||
text = """HELLO WORLD! [Hidden text] (Parenthetical text) | ||
Visit https://www.example.com or contact [email protected] | ||
There are 123 apples and 456 oranges""" | ||
|
||
expected = ( | ||
"hello world! visit or contact there are NUMBER apples and NUMBER oranges" | ||
) | ||
assert cleaner.clean(text) == expected | ||
|
||
|
||
def test_clean_with_multiple_urls_and_emails(cleaner): | ||
text = """Check out http://www.example.com and https://test.org. Contact us at [email protected] or [email protected]""" | ||
|
||
expected = "check out and . contact us at or " | ||
assert cleaner.clean(text) == expected | ||
|
||
|
||
def test_clean_with_nested_brackets(cleaner): | ||
text = "This is [nested (bracket)] text" | ||
expected = "this is text" | ||
assert cleaner.clean(text) == expected | ||
|
||
|
||
def test_clean_with_multiple_whitespace_types(cleaner): | ||
text = "Too many\tspaces\nand\rline\fbreaks" | ||
expected = "too many spaces and line breaks" | ||
assert cleaner.clean(text) == expected |
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import pytest | ||
from project.model import TextClassifier | ||
|
||
|
||
@pytest.fixture | ||
def classifier(): | ||
return TextClassifier() | ||
|
||
|
||
def test_predict(classifier): | ||
text = "This movie is great!" | ||
|
||
assert classifier.predict(text) in ["POSITIVE", "NEGATIVE"] | ||
|
||
|
||
def test_predict_proba(classifier): | ||
text = "The weather is nice today." | ||
probability = classifier.predict_proba(text) | ||
|
||
assert 0 <= probability <= 1 | ||
|
||
|
||
def test_initialization(): | ||
custom_model = "distilbert-base-uncased" | ||
classifier = TextClassifier(model_name=custom_model) | ||
|
||
assert classifier.tokenizer.name_or_path == custom_model | ||
assert classifier.model.name_or_path == custom_model | ||
|
||
|
||
# Behavior tests | ||
@pytest.mark.parametrize("text_1, text_2, expected_sentiment", [ | ||
("This movie is great!", "This movie is blody awesome!", "POSITIVE"), | ||
("This movie is terrible!", "This movie is disappointing!", "NEGATIVE"), | ||
("Movie delivers an exciting and refreshing take on its genre, featuring compelling characters, sharp dialogue, and a plot that keeps you hooked, all wrapped in stunning visuals and a dynamic soundtrack.", | ||
"Movie is is disgustingly good, with outrageously captivating performances and a ridiculously well-executed plot that grabs you from the start. The visuals are absurdly stunning, and the soundtrack is almost unfairly perfect, making it an insanely enjoyable watch from beginning to end.", | ||
"POSITIVE") | ||
]) | ||
def test_invariance(classifier, text_1, text_2, expected_sentiment): | ||
assert classifier.predict(text_1) == expected_sentiment | ||
assert classifier.predict(text_1) == classifier.predict(text_2) | ||
|
||
|
||
@pytest.mark.parametrize("text_1, text_2", [ | ||
("Movie is a visually stunning and emotionally gripping film, with outstanding performances and a well-crafted story that keeps you engaged from start to finish.", | ||
"Movie is visually impressive but falls flat with a lackluster story and underwhelming performances, making it hard to stay engaged from start to finish."), | ||
("Movie is an engaging and heartwarming film, with strong performances and a captivating story that draws you in, beautifully blending emotion, humor, and stunning visuals for a thoroughly enjoyable experience.", | ||
"Movie tries to be engaging, but weak performances and a disjointed story leave it feeling flat, lacking the emotional depth or humor needed to make it truly enjoyable.") | ||
]) | ||
def test_directional(classifier, text_1, text_2): | ||
assert classifier.predict(text_1) == "POSITIVE" | ||
assert classifier.predict(text_2) == "NEGATIVE" | ||
|
||
|
||
@pytest.mark.parametrize("text, expected_sentiment", [ | ||
("This movie is great!", "POSITIVE"), | ||
("I hate this movie!", "NEGATIVE") | ||
]) | ||
def test_minimum_functionality(classifier, text, expected_sentiment): | ||
assert classifier.predict(text) == expected_sentiment |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Homework 7: Kubeflow + AirFlow pipelines | ||
|
||
## Tasks: | ||
|
||
- RP1: Write a README with instructions on deploying Kubeflow pipelines. | ||
- PR2: Write a Kubeflow training pipeline. | ||
- PR3: Write a Kubeflow inference pipeline. | ||
- RP4: Write a README with instructions on how to deploy Airflow. | ||
- PR5: Write an Airflow training pipeline. | ||
- PR6: Write an Airflow inference pipeline. | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Homework 8: Orchestration & Dagster | ||
|
||
|
||
## Tasks: | ||
|
||
- PR1: Write a Dagster training pipeline. | ||
- PR2: Write a Dagster inference pipeline. | ||
|
||
|
||
### Notes: | ||
|
||
|
||
- All PRs are implemented in the pipelines folder. | ||
|
||
- How to run: | ||
1. Go to the folder project. | ||
|
||
2. To run the training pipeline: `dagster dev -f training_pipeline.py` | ||
3. To run the inference pipeline: `dagster dev -f inference_pipeline.py` |
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
File renamed without changes.
File renamed without changes.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.