Merge pull request #40 from danilyef/readme_branch

README.md 2
danilyef · Dec 17, 2024 · 4a7f4b5 · 4a7f4b5
2 parents bdc47e3 + 3dc7483
commit 4a7f4b5
Show file tree

Hide file tree

Showing 26 changed files with 273 additions and 128 deletions.
diff --git a/homework_6/README.md b/homework_6/README.md
@@ -7,6 +7,9 @@
 - PR3: Write tests for model, tests should be runnable from CI.
 - PR4: Write code to store your model in model management with W&B.
 - PR5: Write code to use LIT for your model, in the case of other domains (CV, audio, tabular) find and use a similar tool.
-- PR6: Write code to test LLM API (select any LLM - OpenAI, VertexAI, etc).
 
 
+### PR1, PR2, PR3
+
+These are implemented in the folder project.
+
diff --git a/homework_6/pr3/__init__.py → homework_6/project/__init__.py b/homework_6/pr3/__init__.py → homework_6/project/__init__.py
diff --git a/homework_6/pr2/data.parquet → homework_6/project/data.parquet b/homework_6/pr2/data.parquet → homework_6/project/data.parquet
diff --git a/homework_6/pr3/project/main.py → homework_6/project/main.py b/homework_6/pr3/project/main.py → homework_6/project/main.py
diff --git a/homework_6/pr3/project/model.py → homework_6/project/model.py b/homework_6/pr3/project/model.py → homework_6/project/model.py
diff --git a/homework_6/pr3/project/processing.py → homework_6/project/processing.py b/homework_6/pr3/project/processing.py → homework_6/project/processing.py
diff --git a/homework_6/pr3/project/__init__.py → homework_6/tests/code/__init__.py b/homework_6/pr3/project/__init__.py → homework_6/tests/code/__init__.py
diff --git a/homework_6/tests/code/test_processing.py b/homework_6/tests/code/test_processing.py
@@ -0,0 +1,74 @@
+import pytest
+from project.processing import Cleaner
+
+
+@pytest.fixture
+def cleaner():
+    return Cleaner()
+
+
+def test_lower_case(cleaner):
+    assert cleaner.lower_case("HELLO WORLD") == "hello world"
+
+
+def test_tokenize_numbers(cleaner):
+    assert (
+        cleaner.tokenize_numbers("There are 123 apples and 456 oranges")
+        == "There are NUMBER apples and NUMBER oranges"
+    )
+
+
+def test_remove_emails(cleaner):
+    assert (
+        cleaner.remove_emails("Contact us at [email protected] or [email protected]")
+        == "Contact us at  or "
+    )
+
+
+def test_remove_square_brackets(cleaner):
+    assert cleaner.remove_square_brackets("This is [hidden] text") == "This is  text"
+
+
+def test_remove_round_brackets(cleaner):
+    assert (
+        cleaner.remove_round_brackets("This is (parenthetical) text") == "This is  text"
+    )
+
+
+def test_remove_urls(cleaner):
+    text = "Visit https://www.example.com or www.test.org for more info"
+    assert cleaner.remove_urls(text) == "Visit  or  for more info"
+
+
+def test_remove_whitespace(cleaner):
+    assert cleaner.remove_whitespace("Too    many    spaces") == "Too many spaces"
+
+
+def test_clean(cleaner):
+    text = """HELLO WORLD! [Hidden text] (Parenthetical text)
+    Visit https://www.example.com or contact [email protected]
+    There are 123 apples and 456 oranges"""
+
+    expected = (
+        "hello world! visit or contact there are NUMBER apples and NUMBER oranges"
+    )
+    assert cleaner.clean(text) == expected
+
+
+def test_clean_with_multiple_urls_and_emails(cleaner):
+    text = """Check out http://www.example.com and https://test.org. Contact us at [email protected] or [email protected]"""
+
+    expected = "check out and . contact us at or "
+    assert cleaner.clean(text) == expected
+
+
+def test_clean_with_nested_brackets(cleaner):
+    text = "This is [nested (bracket)] text"
+    expected = "this is text"
+    assert cleaner.clean(text) == expected
+
+
+def test_clean_with_multiple_whitespace_types(cleaner):
+    text = "Too    many\tspaces\nand\rline\fbreaks"
+    expected = "too many spaces and line breaks"
+    assert cleaner.clean(text) == expected
diff --git a/homework_8/pr1/__init__.py → homework_6/tests/model/__init__.py b/homework_8/pr1/__init__.py → homework_6/tests/model/__init__.py
diff --git a/homework_6/tests/model/test_model.py b/homework_6/tests/model/test_model.py
@@ -0,0 +1,60 @@
+import pytest
+from project.model import TextClassifier
+
+
+@pytest.fixture
+def classifier():
+    return TextClassifier()
+
+
+def test_predict(classifier):
+    text = "This movie is great!"
+
+    assert classifier.predict(text) in ["POSITIVE", "NEGATIVE"]
+
+
+def test_predict_proba(classifier):
+    text = "The weather is nice today."
+    probability = classifier.predict_proba(text)
+
+    assert 0 <= probability <= 1
+
+
+def test_initialization():
+    custom_model = "distilbert-base-uncased"
+    classifier = TextClassifier(model_name=custom_model)
+
+    assert classifier.tokenizer.name_or_path == custom_model
+    assert classifier.model.name_or_path == custom_model
+
+
+# Behavior tests
+@pytest.mark.parametrize("text_1, text_2, expected_sentiment", [
+    ("This movie is great!", "This movie is blody awesome!", "POSITIVE"),
+    ("This movie is terrible!", "This movie is disappointing!", "NEGATIVE"),
+    ("Movie delivers an exciting and refreshing take on its genre, featuring compelling characters, sharp dialogue, and a plot that keeps you hooked, all wrapped in stunning visuals and a dynamic soundtrack.",
+     "Movie is is disgustingly good, with outrageously captivating performances and a ridiculously well-executed plot that grabs you from the start. The visuals are absurdly stunning, and the soundtrack is almost unfairly perfect, making it an insanely enjoyable watch from beginning to end.",
+     "POSITIVE")
+])
+def test_invariance(classifier, text_1, text_2, expected_sentiment):
+    assert classifier.predict(text_1) == expected_sentiment
+    assert classifier.predict(text_1) == classifier.predict(text_2)
+
+
+@pytest.mark.parametrize("text_1, text_2", [
+    ("Movie is a visually stunning and emotionally gripping film, with outstanding performances and a well-crafted story that keeps you engaged from start to finish.",
+     "Movie  is visually impressive but falls flat with a lackluster story and underwhelming performances, making it hard to stay engaged from start to finish."),
+    ("Movie is an engaging and heartwarming film, with strong performances and a captivating story that draws you in, beautifully blending emotion, humor, and stunning visuals for a thoroughly enjoyable experience.",
+     "Movie tries to be engaging, but weak performances and a disjointed story leave it feeling flat, lacking the emotional depth or humor needed to make it truly enjoyable.")
+])
+def test_directional(classifier, text_1, text_2):
+    assert classifier.predict(text_1) == "POSITIVE"
+    assert classifier.predict(text_2) == "NEGATIVE"
+
+
+@pytest.mark.parametrize("text, expected_sentiment", [
+    ("This movie is great!", "POSITIVE"),
+    ("I hate this movie!", "NEGATIVE")
+])
+def test_minimum_functionality(classifier, text, expected_sentiment):
+    assert classifier.predict(text) == expected_sentiment
diff --git a/homework_7/README.md b/homework_7/README.md
@@ -0,0 +1,11 @@
+# Homework 7: Kubeflow + AirFlow pipelines
+
+## Tasks:
+
+- RP1: Write a README with instructions on deploying Kubeflow pipelines.
+- PR2: Write a Kubeflow training pipeline.
+- PR3: Write a Kubeflow inference pipeline.
+- RP4: Write a README with instructions on how to deploy Airflow.
+- PR5: Write an Airflow training pipeline.
+- PR6: Write an Airflow inference pipeline.
+
diff --git a/homework_8/README.md b/homework_8/README.md
@@ -0,0 +1,19 @@
+# Homework 8: Orchestration & Dagster 
+
+
+## Tasks:
+
+- PR1: Write a Dagster training pipeline.
+- PR2: Write a Dagster inference pipeline.
+
+
+### Notes:
+
+
+- All PRs are implemented in the pipelines folder.
+
+- How to run:
+    1. Go to the folder project.
+
+    2. To run the training pipeline: `dagster dev -f training_pipeline.py`
+    3. To run the inference pipeline: `dagster dev -f inference_pipeline.py`
diff --git a/homework_8/pr1/assets/__init__.py → homework_8/pipelines/__init__.py b/homework_8/pr1/assets/__init__.py → homework_8/pipelines/__init__.py
diff --git a/homework_8/pr1/resources/__init__.py → homework_8/pipelines/assets/__init__.py b/homework_8/pr1/resources/__init__.py → homework_8/pipelines/assets/__init__.py
diff --git a/homework_8/pr1/assets/datasets.py → homework_8/pipelines/assets/datasets.py b/homework_8/pr1/assets/datasets.py → homework_8/pipelines/assets/datasets.py
diff --git a/homework_8/pr1/assets/models.py → homework_8/pipelines/assets/models.py b/homework_8/pr1/assets/models.py → homework_8/pipelines/assets/models.py
diff --git a/homework_8/pr1/inference_pipeline.py → homework_8/pipelines/inference_pipeline.py b/homework_8/pr1/inference_pipeline.py → homework_8/pipelines/inference_pipeline.py
diff --git a/homework_8/pr1/requirements.txt → homework_8/pipelines/requirements.txt b/homework_8/pr1/requirements.txt → homework_8/pipelines/requirements.txt
diff --git a/homework_8/pipelines/resources/__init__.py b/homework_8/pipelines/resources/__init__.py
diff --git a/homework_8/pr1/resources/resources.py → homework_8/pipelines/resources/resources.py b/homework_8/pr1/resources/resources.py → homework_8/pipelines/resources/resources.py
diff --git a/homework_8/pr1/training_pipeline.py → homework_8/pipelines/training_pipeline.py b/homework_8/pr1/training_pipeline.py → homework_8/pipelines/training_pipeline.py
diff --git a/homework_8/pr1/README.md b/homework_8/pr1/README.md
diff --git a/homework_9/pr4/README.md → homework_9/README.md b/homework_9/pr4/README.md → homework_9/README.md
@@ -1,66 +1,158 @@
-## correct home directory
+# Homework 9: API serving
+
+## Tasks:
+
+- PR1: Write a Streamlit UI for serving your model, with tests and CI integration.
+- PR2: Write a Gradio UI for serving your model, with tests and CI integration.
+- PR3: Write a FastAPI server for your model, with tests and CI integration.
+- PR4: Write a Kubernetes deployment YAML (Deployment, Service) for your model's API.
+- PR5: Write a Kubernetes deployment YAML (Deployment, Service) for your model's UI (Streamlit, Gradio).
+
+
+### PR1: Streamlit UI
+
+- **How to run**:
+```bash
+streamlit run main.py
+```
+
+- **How to test**:
+```bash
+pytest tests/streamlit/test_model.py
+```
+
+### PR2: Gradio UI
+
+- **How to run**:
+```bash
+python main.py
+```
+
+- **How to test**:
+```bash
+pytest tests/gradio/test_model.py
+```
+
+### PR3: FastAPI server
+
+- **How to run**:
+```bash
+cd homework_9
+uvicorn pr3.app:app --reload
+```
+
+- **How to test**:
+```bash
+pytest tests/fastapi/test_model.py
+```
+
+### PR4: Kubernetes deployment YAML (Deployment, Service) for your model's API
+
+
+**correct home directory**:
 
 ```bash
 cd homework_9/pr4
 ```
 
-## start minikube
+**start minikube**:
 
 ```bash
 minikube start
 eval $(minikube -p minikube docker-env)
 ```
 
 
-## build docker image
+**build docker image**:
 
 ```bash
 docker build -t fastapi-app:latest .
 ```
 
-## deploy to minikube
+**deploy to minikube**:
 
 ```bash
 kubectl apply -f k8s_deployment.yaml
 ```
 
-## get url
+**get url**:
 
 ```bash
 minikube service fastapi-service --url
 ```
 
-
-## test predict
+**test predict**:
 
 ```bash
 curl -X POST -H "Content-Type: application/json" \
     -d '{"text": "this is good"}' \
     http://127.0.0.1:51561/predict
 ```
 
+### PR5: Kubernetes deployment YAML (Deployment, Service) for your model's UI (Streamlit, Gradio)
+
+**correct home directory**:
+
+```bash
+cd homework_9/pr5
+```
+
+**start minikube**:
+
+```bash
+minikube start
+eval $(minikube -p minikube docker-env)
+```
+
+
+**build docker image**:
+
+```bash
+docker build -t streamlit-app:latest .
+```
+
+**deploy to minikube**:
+
+```bash
+kubectl apply -f k8s_deployment.yaml
+```
+
+**get url**:
+
+```bash
+minikube service streamlit-service --url
+```
+
+
+**test predict**:
+
+```bash
+new url: http://192.168.99.100:30000/ (or other)
+```
+
 
 
+### Useful Information
 
 In Kubernetes, **`type: NodePort`** is used in a Service when you want to access your application from outside the Kubernetes cluster (like your laptop or local browser).
 
 Here’s why you might use it in simple terms:
 
 ---
 
-### **1. Kubernetes Runs on Its Own Network**
+**1. Kubernetes Runs on Its Own Network**
 - Kubernetes creates an internal network for all the Pods.
 - By default, this network isn’t accessible from the outside (e.g., your computer).
 
 ---
 
-### **2. Services Expose Pods**
+**2. Services Expose Pods**
 - A **Service** connects your app (running in Pods) to the outside world.
 - **`type: NodePort`** exposes your app on a specific port on every node in your cluster.
 
 ---
 
-### **3. Why Use `NodePort`?**
+**3. Why Use `NodePort`?**
 - When you set `type: NodePort`, Kubernetes assigns a port (like `30000-32767`) on the node's IP address.
 - You can now access your app by visiting:
   ```
@@ -74,19 +166,19 @@ Here’s why you might use it in simple terms:
 
 ---
 
-### **4. Why Not Use ClusterIP?**
+**4. Why Not Use ClusterIP?**
 - By default, Services use **`type: ClusterIP`**, which only allows access *within* the Kubernetes cluster.
 - This is useful for internal communication between apps but not for external access.
 
 ---
 
-### **5. Why NodePort is Good for Minikube**
+**5. Why NodePort is Good for Minikube**
 - In Minikube, you're running Kubernetes on your local machine.
 - Using `NodePort` is a quick and simple way to test and access your app from your browser or other devices on the same network.
 
 ---
 
-### **In Summary**
+**In Summary**
 - **`type: NodePort`** makes your app accessible outside Kubernetes on a specific port.
 - This is great for testing or development, especially in Minikube. 
 - Later, in production, you might use other Service types (like `LoadBalancer` or `Ingress`) for more advanced routing.
diff --git a/homework_9/pr1/README.md b/homework_9/pr1/README.md