Skip to content

Commit

Permalink
Merge pull request #40 from danilyef/readme_branch
Browse files Browse the repository at this point in the history
README.md 2
  • Loading branch information
danilyef authored Dec 17, 2024
2 parents bdc47e3 + 3dc7483 commit 4a7f4b5
Show file tree
Hide file tree
Showing 26 changed files with 273 additions and 128 deletions.
5 changes: 4 additions & 1 deletion homework_6/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
- PR3: Write tests for model, tests should be runnable from CI.
- PR4: Write code to store your model in model management with W&B.
- PR5: Write code to use LIT for your model, in the case of other domains (CV, audio, tabular) find and use a similar tool.
- PR6: Write code to test LLM API (select any LLM - OpenAI, VertexAI, etc).


### PR1, PR2, PR3

These are implemented in the folder project.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
74 changes: 74 additions & 0 deletions homework_6/tests/code/test_processing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pytest
from project.processing import Cleaner


@pytest.fixture
def cleaner():
return Cleaner()


def test_lower_case(cleaner):
assert cleaner.lower_case("HELLO WORLD") == "hello world"


def test_tokenize_numbers(cleaner):
assert (
cleaner.tokenize_numbers("There are 123 apples and 456 oranges")
== "There are NUMBER apples and NUMBER oranges"
)


def test_remove_emails(cleaner):
assert (
cleaner.remove_emails("Contact us at [email protected] or [email protected]")
== "Contact us at or "
)


def test_remove_square_brackets(cleaner):
assert cleaner.remove_square_brackets("This is [hidden] text") == "This is text"


def test_remove_round_brackets(cleaner):
assert (
cleaner.remove_round_brackets("This is (parenthetical) text") == "This is text"
)


def test_remove_urls(cleaner):
text = "Visit https://www.example.com or www.test.org for more info"
assert cleaner.remove_urls(text) == "Visit or for more info"


def test_remove_whitespace(cleaner):
assert cleaner.remove_whitespace("Too many spaces") == "Too many spaces"


def test_clean(cleaner):
text = """HELLO WORLD! [Hidden text] (Parenthetical text)
Visit https://www.example.com or contact [email protected]
There are 123 apples and 456 oranges"""

expected = (
"hello world! visit or contact there are NUMBER apples and NUMBER oranges"
)
assert cleaner.clean(text) == expected


def test_clean_with_multiple_urls_and_emails(cleaner):
text = """Check out http://www.example.com and https://test.org. Contact us at [email protected] or [email protected]"""

expected = "check out and . contact us at or "
assert cleaner.clean(text) == expected


def test_clean_with_nested_brackets(cleaner):
text = "This is [nested (bracket)] text"
expected = "this is text"
assert cleaner.clean(text) == expected


def test_clean_with_multiple_whitespace_types(cleaner):
text = "Too many\tspaces\nand\rline\fbreaks"
expected = "too many spaces and line breaks"
assert cleaner.clean(text) == expected
File renamed without changes.
60 changes: 60 additions & 0 deletions homework_6/tests/model/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import pytest
from project.model import TextClassifier


@pytest.fixture
def classifier():
return TextClassifier()


def test_predict(classifier):
text = "This movie is great!"

assert classifier.predict(text) in ["POSITIVE", "NEGATIVE"]


def test_predict_proba(classifier):
text = "The weather is nice today."
probability = classifier.predict_proba(text)

assert 0 <= probability <= 1


def test_initialization():
custom_model = "distilbert-base-uncased"
classifier = TextClassifier(model_name=custom_model)

assert classifier.tokenizer.name_or_path == custom_model
assert classifier.model.name_or_path == custom_model


# Behavior tests
@pytest.mark.parametrize("text_1, text_2, expected_sentiment", [
("This movie is great!", "This movie is blody awesome!", "POSITIVE"),
("This movie is terrible!", "This movie is disappointing!", "NEGATIVE"),
("Movie delivers an exciting and refreshing take on its genre, featuring compelling characters, sharp dialogue, and a plot that keeps you hooked, all wrapped in stunning visuals and a dynamic soundtrack.",
"Movie is is disgustingly good, with outrageously captivating performances and a ridiculously well-executed plot that grabs you from the start. The visuals are absurdly stunning, and the soundtrack is almost unfairly perfect, making it an insanely enjoyable watch from beginning to end.",
"POSITIVE")
])
def test_invariance(classifier, text_1, text_2, expected_sentiment):
assert classifier.predict(text_1) == expected_sentiment
assert classifier.predict(text_1) == classifier.predict(text_2)


@pytest.mark.parametrize("text_1, text_2", [
("Movie is a visually stunning and emotionally gripping film, with outstanding performances and a well-crafted story that keeps you engaged from start to finish.",
"Movie is visually impressive but falls flat with a lackluster story and underwhelming performances, making it hard to stay engaged from start to finish."),
("Movie is an engaging and heartwarming film, with strong performances and a captivating story that draws you in, beautifully blending emotion, humor, and stunning visuals for a thoroughly enjoyable experience.",
"Movie tries to be engaging, but weak performances and a disjointed story leave it feeling flat, lacking the emotional depth or humor needed to make it truly enjoyable.")
])
def test_directional(classifier, text_1, text_2):
assert classifier.predict(text_1) == "POSITIVE"
assert classifier.predict(text_2) == "NEGATIVE"


@pytest.mark.parametrize("text, expected_sentiment", [
("This movie is great!", "POSITIVE"),
("I hate this movie!", "NEGATIVE")
])
def test_minimum_functionality(classifier, text, expected_sentiment):
assert classifier.predict(text) == expected_sentiment
11 changes: 11 additions & 0 deletions homework_7/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Homework 7: Kubeflow + AirFlow pipelines

## Tasks:

- RP1: Write a README with instructions on deploying Kubeflow pipelines.
- PR2: Write a Kubeflow training pipeline.
- PR3: Write a Kubeflow inference pipeline.
- RP4: Write a README with instructions on how to deploy Airflow.
- PR5: Write an Airflow training pipeline.
- PR6: Write an Airflow inference pipeline.

19 changes: 19 additions & 0 deletions homework_8/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Homework 8: Orchestration & Dagster


## Tasks:

- PR1: Write a Dagster training pipeline.
- PR2: Write a Dagster inference pipeline.


### Notes:


- All PRs are implemented in the pipelines folder.

- How to run:
1. Go to the folder project.

2. To run the training pipeline: `dagster dev -f training_pipeline.py`
3. To run the inference pipeline: `dagster dev -f inference_pipeline.py`
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file.
File renamed without changes.
File renamed without changes.
4 changes: 0 additions & 4 deletions homework_8/pr1/README.md

This file was deleted.

118 changes: 105 additions & 13 deletions homework_9/pr4/README.md → homework_9/README.md
Original file line number Diff line number Diff line change
@@ -1,66 +1,158 @@
## correct home directory
# Homework 9: API serving

## Tasks:

- PR1: Write a Streamlit UI for serving your model, with tests and CI integration.
- PR2: Write a Gradio UI for serving your model, with tests and CI integration.
- PR3: Write a FastAPI server for your model, with tests and CI integration.
- PR4: Write a Kubernetes deployment YAML (Deployment, Service) for your model's API.
- PR5: Write a Kubernetes deployment YAML (Deployment, Service) for your model's UI (Streamlit, Gradio).


### PR1: Streamlit UI

- **How to run**:
```bash
streamlit run main.py
```

- **How to test**:
```bash
pytest tests/streamlit/test_model.py
```

### PR2: Gradio UI

- **How to run**:
```bash
python main.py
```

- **How to test**:
```bash
pytest tests/gradio/test_model.py
```

### PR3: FastAPI server

- **How to run**:
```bash
cd homework_9
uvicorn pr3.app:app --reload
```

- **How to test**:
```bash
pytest tests/fastapi/test_model.py
```

### PR4: Kubernetes deployment YAML (Deployment, Service) for your model's API


**correct home directory**:

```bash
cd homework_9/pr4
```

## start minikube
**start minikube**:

```bash
minikube start
eval $(minikube -p minikube docker-env)
```


## build docker image
**build docker image**:

```bash
docker build -t fastapi-app:latest .
```

## deploy to minikube
**deploy to minikube**:

```bash
kubectl apply -f k8s_deployment.yaml
```

## get url
**get url**:

```bash
minikube service fastapi-service --url
```


## test predict
**test predict**:

```bash
curl -X POST -H "Content-Type: application/json" \
-d '{"text": "this is good"}' \
http://127.0.0.1:51561/predict
```

### PR5: Kubernetes deployment YAML (Deployment, Service) for your model's UI (Streamlit, Gradio)

**correct home directory**:

```bash
cd homework_9/pr5
```

**start minikube**:

```bash
minikube start
eval $(minikube -p minikube docker-env)
```


**build docker image**:

```bash
docker build -t streamlit-app:latest .
```

**deploy to minikube**:

```bash
kubectl apply -f k8s_deployment.yaml
```

**get url**:

```bash
minikube service streamlit-service --url
```


**test predict**:

```bash
new url: http://192.168.99.100:30000/ (or other)
```



### Useful Information

In Kubernetes, **`type: NodePort`** is used in a Service when you want to access your application from outside the Kubernetes cluster (like your laptop or local browser).

Here’s why you might use it in simple terms:

---

### **1. Kubernetes Runs on Its Own Network**
**1. Kubernetes Runs on Its Own Network**
- Kubernetes creates an internal network for all the Pods.
- By default, this network isn’t accessible from the outside (e.g., your computer).

---

### **2. Services Expose Pods**
**2. Services Expose Pods**
- A **Service** connects your app (running in Pods) to the outside world.
- **`type: NodePort`** exposes your app on a specific port on every node in your cluster.

---

### **3. Why Use `NodePort`?**
**3. Why Use `NodePort`?**
- When you set `type: NodePort`, Kubernetes assigns a port (like `30000-32767`) on the node's IP address.
- You can now access your app by visiting:
```
Expand All @@ -74,19 +166,19 @@ Here’s why you might use it in simple terms:

---

### **4. Why Not Use ClusterIP?**
**4. Why Not Use ClusterIP?**
- By default, Services use **`type: ClusterIP`**, which only allows access *within* the Kubernetes cluster.
- This is useful for internal communication between apps but not for external access.

---

### **5. Why NodePort is Good for Minikube**
**5. Why NodePort is Good for Minikube**
- In Minikube, you're running Kubernetes on your local machine.
- Using `NodePort` is a quick and simple way to test and access your app from your browser or other devices on the same network.

---

### **In Summary**
**In Summary**
- **`type: NodePort`** makes your app accessible outside Kubernetes on a specific port.
- This is great for testing or development, especially in Minikube.
- Later, in production, you might use other Service types (like `LoadBalancer` or `Ingress`) for more advanced routing.
10 changes: 0 additions & 10 deletions homework_9/pr1/README.md

This file was deleted.

Loading

0 comments on commit 4a7f4b5

Please sign in to comment.