diff --git a/homework_6/README.md b/homework_6/README.md index 309175e..6727da4 100644 --- a/homework_6/README.md +++ b/homework_6/README.md @@ -7,6 +7,9 @@ - PR3: Write tests for model, tests should be runnable from CI. - PR4: Write code to store your model in model management with W&B. - PR5: Write code to use LIT for your model, in the case of other domains (CV, audio, tabular) find and use a similar tool. -- PR6: Write code to test LLM API (select any LLM - OpenAI, VertexAI, etc). +### PR1, PR2, PR3 + +These are implemented in the folder project. + diff --git a/homework_6/pr3/__init__.py b/homework_6/project/__init__.py similarity index 100% rename from homework_6/pr3/__init__.py rename to homework_6/project/__init__.py diff --git a/homework_6/pr2/data.parquet b/homework_6/project/data.parquet similarity index 100% rename from homework_6/pr2/data.parquet rename to homework_6/project/data.parquet diff --git a/homework_6/pr3/project/main.py b/homework_6/project/main.py similarity index 100% rename from homework_6/pr3/project/main.py rename to homework_6/project/main.py diff --git a/homework_6/pr3/project/model.py b/homework_6/project/model.py similarity index 100% rename from homework_6/pr3/project/model.py rename to homework_6/project/model.py diff --git a/homework_6/pr3/project/processing.py b/homework_6/project/processing.py similarity index 100% rename from homework_6/pr3/project/processing.py rename to homework_6/project/processing.py diff --git a/homework_6/pr3/project/__init__.py b/homework_6/tests/code/__init__.py similarity index 100% rename from homework_6/pr3/project/__init__.py rename to homework_6/tests/code/__init__.py diff --git a/homework_6/tests/code/test_processing.py b/homework_6/tests/code/test_processing.py new file mode 100644 index 0000000..66f9928 --- /dev/null +++ b/homework_6/tests/code/test_processing.py @@ -0,0 +1,74 @@ +import pytest +from project.processing import Cleaner + + +@pytest.fixture +def cleaner(): + return Cleaner() + + +def test_lower_case(cleaner): + assert cleaner.lower_case("HELLO WORLD") == "hello world" + + +def test_tokenize_numbers(cleaner): + assert ( + cleaner.tokenize_numbers("There are 123 apples and 456 oranges") + == "There are NUMBER apples and NUMBER oranges" + ) + + +def test_remove_emails(cleaner): + assert ( + cleaner.remove_emails("Contact us at info@example.com or support@test.co.uk") + == "Contact us at or " + ) + + +def test_remove_square_brackets(cleaner): + assert cleaner.remove_square_brackets("This is [hidden] text") == "This is text" + + +def test_remove_round_brackets(cleaner): + assert ( + cleaner.remove_round_brackets("This is (parenthetical) text") == "This is text" + ) + + +def test_remove_urls(cleaner): + text = "Visit https://www.example.com or www.test.org for more info" + assert cleaner.remove_urls(text) == "Visit or for more info" + + +def test_remove_whitespace(cleaner): + assert cleaner.remove_whitespace("Too many spaces") == "Too many spaces" + + +def test_clean(cleaner): + text = """HELLO WORLD! [Hidden text] (Parenthetical text) + Visit https://www.example.com or contact info@example.com + There are 123 apples and 456 oranges""" + + expected = ( + "hello world! visit or contact there are NUMBER apples and NUMBER oranges" + ) + assert cleaner.clean(text) == expected + + +def test_clean_with_multiple_urls_and_emails(cleaner): + text = """Check out http://www.example.com and https://test.org. Contact us at info@example.com or support@test.co.uk""" + + expected = "check out and . contact us at or " + assert cleaner.clean(text) == expected + + +def test_clean_with_nested_brackets(cleaner): + text = "This is [nested (bracket)] text" + expected = "this is text" + assert cleaner.clean(text) == expected + + +def test_clean_with_multiple_whitespace_types(cleaner): + text = "Too many\tspaces\nand\rline\fbreaks" + expected = "too many spaces and line breaks" + assert cleaner.clean(text) == expected \ No newline at end of file diff --git a/homework_8/pr1/__init__.py b/homework_6/tests/model/__init__.py similarity index 100% rename from homework_8/pr1/__init__.py rename to homework_6/tests/model/__init__.py diff --git a/homework_6/tests/model/test_model.py b/homework_6/tests/model/test_model.py new file mode 100644 index 0000000..933be57 --- /dev/null +++ b/homework_6/tests/model/test_model.py @@ -0,0 +1,60 @@ +import pytest +from project.model import TextClassifier + + +@pytest.fixture +def classifier(): + return TextClassifier() + + +def test_predict(classifier): + text = "This movie is great!" + + assert classifier.predict(text) in ["POSITIVE", "NEGATIVE"] + + +def test_predict_proba(classifier): + text = "The weather is nice today." + probability = classifier.predict_proba(text) + + assert 0 <= probability <= 1 + + +def test_initialization(): + custom_model = "distilbert-base-uncased" + classifier = TextClassifier(model_name=custom_model) + + assert classifier.tokenizer.name_or_path == custom_model + assert classifier.model.name_or_path == custom_model + + +# Behavior tests +@pytest.mark.parametrize("text_1, text_2, expected_sentiment", [ + ("This movie is great!", "This movie is blody awesome!", "POSITIVE"), + ("This movie is terrible!", "This movie is disappointing!", "NEGATIVE"), + ("Movie delivers an exciting and refreshing take on its genre, featuring compelling characters, sharp dialogue, and a plot that keeps you hooked, all wrapped in stunning visuals and a dynamic soundtrack.", + "Movie is is disgustingly good, with outrageously captivating performances and a ridiculously well-executed plot that grabs you from the start. The visuals are absurdly stunning, and the soundtrack is almost unfairly perfect, making it an insanely enjoyable watch from beginning to end.", + "POSITIVE") +]) +def test_invariance(classifier, text_1, text_2, expected_sentiment): + assert classifier.predict(text_1) == expected_sentiment + assert classifier.predict(text_1) == classifier.predict(text_2) + + +@pytest.mark.parametrize("text_1, text_2", [ + ("Movie is a visually stunning and emotionally gripping film, with outstanding performances and a well-crafted story that keeps you engaged from start to finish.", + "Movie is visually impressive but falls flat with a lackluster story and underwhelming performances, making it hard to stay engaged from start to finish."), + ("Movie is an engaging and heartwarming film, with strong performances and a captivating story that draws you in, beautifully blending emotion, humor, and stunning visuals for a thoroughly enjoyable experience.", + "Movie tries to be engaging, but weak performances and a disjointed story leave it feeling flat, lacking the emotional depth or humor needed to make it truly enjoyable.") +]) +def test_directional(classifier, text_1, text_2): + assert classifier.predict(text_1) == "POSITIVE" + assert classifier.predict(text_2) == "NEGATIVE" + + +@pytest.mark.parametrize("text, expected_sentiment", [ + ("This movie is great!", "POSITIVE"), + ("I hate this movie!", "NEGATIVE") +]) +def test_minimum_functionality(classifier, text, expected_sentiment): + assert classifier.predict(text) == expected_sentiment \ No newline at end of file diff --git a/homework_7/README.md b/homework_7/README.md new file mode 100644 index 0000000..6a7e4b0 --- /dev/null +++ b/homework_7/README.md @@ -0,0 +1,11 @@ +# Homework 7: Kubeflow + AirFlow pipelines + +## Tasks: + +- RP1: Write a README with instructions on deploying Kubeflow pipelines. +- PR2: Write a Kubeflow training pipeline. +- PR3: Write a Kubeflow inference pipeline. +- RP4: Write a README with instructions on how to deploy Airflow. +- PR5: Write an Airflow training pipeline. +- PR6: Write an Airflow inference pipeline. + diff --git a/homework_8/README.md b/homework_8/README.md new file mode 100644 index 0000000..7748c23 --- /dev/null +++ b/homework_8/README.md @@ -0,0 +1,19 @@ +# Homework 8: Orchestration & Dagster + + +## Tasks: + +- PR1: Write a Dagster training pipeline. +- PR2: Write a Dagster inference pipeline. + + +### Notes: + + +- All PRs are implemented in the pipelines folder. + +- How to run: + 1. Go to the folder project. + + 2. To run the training pipeline: `dagster dev -f training_pipeline.py` + 3. To run the inference pipeline: `dagster dev -f inference_pipeline.py` \ No newline at end of file diff --git a/homework_8/pr1/assets/__init__.py b/homework_8/pipelines/__init__.py similarity index 100% rename from homework_8/pr1/assets/__init__.py rename to homework_8/pipelines/__init__.py diff --git a/homework_8/pr1/resources/__init__.py b/homework_8/pipelines/assets/__init__.py similarity index 100% rename from homework_8/pr1/resources/__init__.py rename to homework_8/pipelines/assets/__init__.py diff --git a/homework_8/pr1/assets/datasets.py b/homework_8/pipelines/assets/datasets.py similarity index 100% rename from homework_8/pr1/assets/datasets.py rename to homework_8/pipelines/assets/datasets.py diff --git a/homework_8/pr1/assets/models.py b/homework_8/pipelines/assets/models.py similarity index 100% rename from homework_8/pr1/assets/models.py rename to homework_8/pipelines/assets/models.py diff --git a/homework_8/pr1/inference_pipeline.py b/homework_8/pipelines/inference_pipeline.py similarity index 100% rename from homework_8/pr1/inference_pipeline.py rename to homework_8/pipelines/inference_pipeline.py diff --git a/homework_8/pr1/requirements.txt b/homework_8/pipelines/requirements.txt similarity index 100% rename from homework_8/pr1/requirements.txt rename to homework_8/pipelines/requirements.txt diff --git a/homework_8/pipelines/resources/__init__.py b/homework_8/pipelines/resources/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/homework_8/pr1/resources/resources.py b/homework_8/pipelines/resources/resources.py similarity index 100% rename from homework_8/pr1/resources/resources.py rename to homework_8/pipelines/resources/resources.py diff --git a/homework_8/pr1/training_pipeline.py b/homework_8/pipelines/training_pipeline.py similarity index 100% rename from homework_8/pr1/training_pipeline.py rename to homework_8/pipelines/training_pipeline.py diff --git a/homework_8/pr1/README.md b/homework_8/pr1/README.md deleted file mode 100644 index c71e753..0000000 --- a/homework_8/pr1/README.md +++ /dev/null @@ -1,4 +0,0 @@ -1. Go to the folder project. - -2. To run the training pipeline: `dagster dev -f training_pipeline.py` -3. To run the inference pipeline: `dagster dev -f inference_pipeline.py` diff --git a/homework_9/pr4/README.md b/homework_9/README.md similarity index 50% rename from homework_9/pr4/README.md rename to homework_9/README.md index e05f9d4..6a536c2 100644 --- a/homework_9/pr4/README.md +++ b/homework_9/README.md @@ -1,10 +1,61 @@ -## correct home directory +# Homework 9: API serving + +## Tasks: + +- PR1: Write a Streamlit UI for serving your model, with tests and CI integration. +- PR2: Write a Gradio UI for serving your model, with tests and CI integration. +- PR3: Write a FastAPI server for your model, with tests and CI integration. +- PR4: Write a Kubernetes deployment YAML (Deployment, Service) for your model's API. +- PR5: Write a Kubernetes deployment YAML (Deployment, Service) for your model's UI (Streamlit, Gradio). + + +### PR1: Streamlit UI + +- **How to run**: +```bash +streamlit run main.py +``` + +- **How to test**: +```bash +pytest tests/streamlit/test_model.py +``` + +### PR2: Gradio UI + +- **How to run**: +```bash +python main.py +``` + +- **How to test**: +```bash +pytest tests/gradio/test_model.py +``` + +### PR3: FastAPI server + +- **How to run**: +```bash +cd homework_9 +uvicorn pr3.app:app --reload +``` + +- **How to test**: +```bash +pytest tests/fastapi/test_model.py +``` + +### PR4: Kubernetes deployment YAML (Deployment, Service) for your model's API + + +**correct home directory**: ```bash cd homework_9/pr4 ``` -## start minikube +**start minikube**: ```bash minikube start @@ -12,26 +63,25 @@ eval $(minikube -p minikube docker-env) ``` -## build docker image +**build docker image**: ```bash docker build -t fastapi-app:latest . ``` -## deploy to minikube +**deploy to minikube**: ```bash kubectl apply -f k8s_deployment.yaml ``` -## get url +**get url**: ```bash minikube service fastapi-service --url ``` - -## test predict +**test predict**: ```bash curl -X POST -H "Content-Type: application/json" \ @@ -39,8 +89,50 @@ curl -X POST -H "Content-Type: application/json" \ http://127.0.0.1:51561/predict ``` +### PR5: Kubernetes deployment YAML (Deployment, Service) for your model's UI (Streamlit, Gradio) + +**correct home directory**: + +```bash +cd homework_9/pr5 +``` + +**start minikube**: + +```bash +minikube start +eval $(minikube -p minikube docker-env) +``` + + +**build docker image**: + +```bash +docker build -t streamlit-app:latest . +``` + +**deploy to minikube**: + +```bash +kubectl apply -f k8s_deployment.yaml +``` + +**get url**: + +```bash +minikube service streamlit-service --url +``` + + +**test predict**: + +```bash +new url: http://192.168.99.100:30000/ (or other) +``` + +### Useful Information In Kubernetes, **`type: NodePort`** is used in a Service when you want to access your application from outside the Kubernetes cluster (like your laptop or local browser). @@ -48,19 +140,19 @@ Here’s why you might use it in simple terms: --- -### **1. Kubernetes Runs on Its Own Network** +**1. Kubernetes Runs on Its Own Network** - Kubernetes creates an internal network for all the Pods. - By default, this network isn’t accessible from the outside (e.g., your computer). --- -### **2. Services Expose Pods** +**2. Services Expose Pods** - A **Service** connects your app (running in Pods) to the outside world. - **`type: NodePort`** exposes your app on a specific port on every node in your cluster. --- -### **3. Why Use `NodePort`?** +**3. Why Use `NodePort`?** - When you set `type: NodePort`, Kubernetes assigns a port (like `30000-32767`) on the node's IP address. - You can now access your app by visiting: ``` @@ -74,19 +166,19 @@ Here’s why you might use it in simple terms: --- -### **4. Why Not Use ClusterIP?** +**4. Why Not Use ClusterIP?** - By default, Services use **`type: ClusterIP`**, which only allows access *within* the Kubernetes cluster. - This is useful for internal communication between apps but not for external access. --- -### **5. Why NodePort is Good for Minikube** +**5. Why NodePort is Good for Minikube** - In Minikube, you're running Kubernetes on your local machine. - Using `NodePort` is a quick and simple way to test and access your app from your browser or other devices on the same network. --- -### **In Summary** +**In Summary** - **`type: NodePort`** makes your app accessible outside Kubernetes on a specific port. - This is great for testing or development, especially in Minikube. - Later, in production, you might use other Service types (like `LoadBalancer` or `Ingress`) for more advanced routing. \ No newline at end of file diff --git a/homework_9/pr1/README.md b/homework_9/pr1/README.md deleted file mode 100644 index bb03f81..0000000 --- a/homework_9/pr1/README.md +++ /dev/null @@ -1,10 +0,0 @@ -## Run -```bash -pip install -r requirements.txt -streamlit run main.py -``` - -## Tests -```bash -pytest tests/test_model.py -``` \ No newline at end of file diff --git a/homework_9/pr2/README.md b/homework_9/pr2/README.md deleted file mode 100644 index c0be38b..0000000 --- a/homework_9/pr2/README.md +++ /dev/null @@ -1,10 +0,0 @@ -## Run -```bash -pip install -r requirements.txt -python main.py -``` - -## Tests -```bash -pytest tests/test_model.py -``` \ No newline at end of file diff --git a/homework_9/pr5/README.md b/homework_9/pr5/README.md deleted file mode 100644 index 13d23aa..0000000 --- a/homework_9/pr5/README.md +++ /dev/null @@ -1,90 +0,0 @@ -## correct home directory - -```bash -cd homework_9/pr5 -``` - -## start minikube - -```bash -minikube start -eval $(minikube -p minikube docker-env) -``` - - -## build docker image - -```bash -docker build -t streamlit-app:latest . -``` - -## deploy to minikube - -```bash -kubectl apply -f k8s_deployment.yaml -``` - -## get url - -```bash -minikube service streamlit-service --url -``` - - -## test predict - -```bash -new url: http://192.168.99.100:30000/ (or other) -``` - - - - -In Kubernetes, **`type: NodePort`** is used in a Service when you want to access your application from outside the Kubernetes cluster (like your laptop or local browser). - -Here’s why you might use it in simple terms: - ---- - -### **1. Kubernetes Runs on Its Own Network** -- Kubernetes creates an internal network for all the Pods. -- By default, this network isn’t accessible from the outside (e.g., your computer). - ---- - -### **2. Services Expose Pods** -- A **Service** connects your app (running in Pods) to the outside world. -- **`type: NodePort`** exposes your app on a specific port on every node in your cluster. - ---- - -### **3. Why Use `NodePort`?** -- When you set `type: NodePort`, Kubernetes assigns a port (like `30000-32767`) on the node's IP address. -- You can now access your app by visiting: - ``` - http://: - ``` - For example: - ``` - http://192.168.99.100:30000 - ``` - Here, `192.168.99.100` is the Minikube node's IP, and `30000` is the NodePort. - ---- - -### **4. Why Not Use ClusterIP?** -- By default, Services use **`type: ClusterIP`**, which only allows access *within* the Kubernetes cluster. -- This is useful for internal communication between apps but not for external access. - ---- - -### **5. Why NodePort is Good for Minikube** -- In Minikube, you're running Kubernetes on your local machine. -- Using `NodePort` is a quick and simple way to test and access your app from your browser or other devices on the same network. - ---- - -### **In Summary** -- **`type: NodePort`** makes your app accessible outside Kubernetes on a specific port. -- This is great for testing or development, especially in Minikube. -- Later, in production, you might use other Service types (like `LoadBalancer` or `Ingress`) for more advanced routing. \ No newline at end of file