From d8316a666e465122f5f4b81c033ec90ae62afe30 Mon Sep 17 00:00:00 2001 From: danilyef Date: Tue, 17 Dec 2024 18:40:31 +0100 Subject: [PATCH] README.md --- README.md | 72 ++++++++++- homework_2/README.md | 26 ++-- homework_2/{task1 => pr1}/Dockerfile.run | 0 homework_2/{task1 => pr1}/Dockerfile.web | 0 homework_2/{task1 => pr1}/app.py | 0 homework_2/{task1 => pr1}/dockerignore | 0 homework_2/{task1 => pr1}/requirements.txt | 0 homework_2/{task1 => pr1}/run.sh | 0 homework_2/{task1 => pr1}/web.sh | 0 .../{task2 => pr3}/deployment_service.yaml | 0 homework_2/{task2 => pr3}/job.yaml | 0 homework_2/{task2 => pr3}/pod.yaml | 0 homework_3/README.md | 18 +++ homework_3/pr2/tests.py | 11 -- homework_3/pr3/README.md | 25 ---- homework_3/pr3/main.py | 15 --- .../inference_time_comparison.jpg | Bin homework_3/pr4/main.py | 90 +++++++++++++ homework_3/pr4/requirements.txt | 6 + homework_3/pr5/README.md | 25 ---- homework_3/pr5/main.py | 122 +++++++----------- homework_3/pr5/requirements.txt | 9 +- homework_3/pr6/README.md | 25 ---- homework_3/pr6/main.py | 99 +++++++------- homework_3/pr6/requirements.txt | 8 +- homework_3/pr7/README.md | 26 ---- homework_3/pr7/main.py | 69 ---------- homework_3/pr7/requirements.txt | 5 - homework_4/README.md | 74 +++++++++++ homework_4/pr2/README.md | 30 ----- homework_4/pr4/README.md | 22 ---- homework_5/{pr6 => }/README.md | 28 ++-- homework_5/pr1/README.md | 26 ---- homework_5/{pr5 => pr4}/main.py | 0 homework_5/{pr5 => pr4}/model.py | 0 homework_5/{pr6 => pr5}/accelerate_ddp.py | 0 homework_5/{pr6 => pr5}/accelerate_screen.png | Bin homework_5/{pr6 => pr5}/config_acc.yaml | 0 homework_5/{pr6 => pr5}/pytorch_ddp.py | 0 homework_5/{pr6 => pr5}/pytorch_screen.png | Bin homework_5/{pr6 => pr5}/ray_ddp.py | 0 homework_5/{pr6 => pr5}/ray_screen.png | Bin homework_5/{pr6 => pr5}/requirements.txt | 0 homework_6/README.md | 12 ++ intro.jpg | Bin 0 -> 21828 bytes 45 files changed, 405 insertions(+), 438 deletions(-) rename homework_2/{task1 => pr1}/Dockerfile.run (100%) rename homework_2/{task1 => pr1}/Dockerfile.web (100%) rename homework_2/{task1 => pr1}/app.py (100%) rename homework_2/{task1 => pr1}/dockerignore (100%) rename homework_2/{task1 => pr1}/requirements.txt (100%) rename homework_2/{task1 => pr1}/run.sh (100%) rename homework_2/{task1 => pr1}/web.sh (100%) rename homework_2/{task2 => pr3}/deployment_service.yaml (100%) rename homework_2/{task2 => pr3}/job.yaml (100%) rename homework_2/{task2 => pr3}/pod.yaml (100%) create mode 100644 homework_3/README.md delete mode 100644 homework_3/pr3/README.md rename homework_3/{pr5 => pr4}/inference_time_comparison.jpg (100%) create mode 100644 homework_3/pr4/main.py create mode 100644 homework_3/pr4/requirements.txt delete mode 100644 homework_3/pr5/README.md delete mode 100644 homework_3/pr6/README.md delete mode 100644 homework_3/pr7/README.md delete mode 100644 homework_3/pr7/main.py delete mode 100644 homework_3/pr7/requirements.txt create mode 100644 homework_4/README.md delete mode 100644 homework_4/pr2/README.md delete mode 100644 homework_4/pr4/README.md rename homework_5/{pr6 => }/README.md (56%) delete mode 100644 homework_5/pr1/README.md rename homework_5/{pr5 => pr4}/main.py (100%) rename homework_5/{pr5 => pr4}/model.py (100%) rename homework_5/{pr6 => pr5}/accelerate_ddp.py (100%) rename homework_5/{pr6 => pr5}/accelerate_screen.png (100%) rename homework_5/{pr6 => pr5}/config_acc.yaml (100%) rename homework_5/{pr6 => pr5}/pytorch_ddp.py (100%) rename homework_5/{pr6 => pr5}/pytorch_screen.png (100%) rename homework_5/{pr6 => pr5}/ray_ddp.py (100%) rename homework_5/{pr6 => pr5}/ray_screen.png (100%) rename homework_5/{pr6 => pr5}/requirements.txt (100%) create mode 100644 homework_6/README.md create mode 100644 intro.jpg diff --git a/README.md b/README.md index 5730778..331c1c4 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,84 @@ -# How to start: +# Machine Learning in Production -#### create virtual environment in the root folder: +![image](intro.jpg) + + +The **Machine Learning in Production Course** is a comprehensive curriculum designed to equip learners with the knowledge and practical skills needed to build, deploy, and manage machine learning systems at scale. The course combines theoretical insights with hands-on assignments to prepare participants for real-world challenges in MLOps (Machine Learning Operations). Below is an overview of the key topics covered in this course: + +### **Course Modules** + +1. **MLOps Introduction** + - Fundamentals of MLOps and its importance in modern machine learning workflows. + +2. **Infrastructure Setup** + - Setting up infrastructure for machine learning projects. + - Focus on tools, cloud platforms, and deployment environments. + +3. **Data Storage and Processing** + - Best practices for managing data at scale. + - Storage strategies, data preprocessing, and pipelines. + +4. **Versioning and Labeling** + - Version control for datasets and models. + - Effective labeling and validation strategies. + +5. **Training and Experimentation** + - Designing robust training pipelines and running experiments. + - Tools for tracking metrics and improving model performance. + +6. **Testing and CI/CD** + - Implementing testing strategies for machine learning systems. + - Continuous Integration and Continuous Deployment for ML projects. + +7. **Orchestration with Kubeflow and Airflow** + - Automating workflows using orchestration tools like Kubeflow and Airflow. + +8. **Orchestration with Dagster** + - Advanced orchestration techniques with Dagster. + +9. **Serving Basics** + - Fundamentals of serving machine learning models via APIs. + +10. **Inference Servers** + - Understanding inference servers and optimizing their performance. + +11. **Advanced Serving Features and Benchmarking** + - Advanced serving techniques and benchmarking model performance. + +12. **Scaling Infrastructure and Models** + - Techniques for scaling machine learning models and infrastructure to handle production workloads. + +13. **Monitoring and Observability** + - Tools and techniques for monitoring ML systems in production. + - Implementing observability to track model health and data quality. + +14. **Tools, LLMs, and Data Moats** + - Exploring state-of-the-art tools and methodologies. + - Leveraging large language models (LLMs) and building competitive data strategies. + +15. **ML Platforms** + - Overview of ML platforms and their role in scaling machine learning operations. + + +### How to start: + +1. **Create virtual environment in the root folder:** ```bash cd /path/to/your/root/folder python -m venv env ``` -#### activate virtual environment: +2. **Activate virtual environment:** ```bash source env/bin/activate ``` -#### upgrade pip: +3. **Upgrade pip:** ```bash python -m pip install --upgrade pip ``` -#### install requirements: +4. **Install requirements:** ```bash pip install -r main_requirements.txt ``` diff --git a/homework_2/README.md b/homework_2/README.md index a290ed9..998cfbd 100644 --- a/homework_2/README.md +++ b/homework_2/README.md @@ -1,20 +1,18 @@ -# Homework 2 +# Homework 2: Infrastructure setup This repository contains tasks related to Docker, Kubernetes, and GitHub Actions. -## Structure -The repository is organized into the following directories: +## Tasks: -- `homework_2/task1/`: Contains Docker-related tasks. -- `homework_2/task2/`: Contains Kubernetes-related tasks. -- `.github/workflows/`: Contains the GitHub Actions workflows. +- PR1: Write a dummy Dockerfile with a simple server and push it to your docker hub or github docker registry. +- PR2: Write CI/CD pipeline with github action that does this for each PR. +- PR3: Write YAML definition for Pod, Deployment, Service, and Job with your Docker image, Use minikube/kind for testing it.Install k9s tool. ### Task 1: Docker -#### PR1: `homework_2/task1` - -This task involves working with Docker. The following scripts are available: +- folder: `homework_2/pr1` +- This task involves working with Docker. The following scripts are available: 1. **First Docker Container:** - **Purpose**: Builds and runs a task that prints output. @@ -35,15 +33,13 @@ This task involves working with Docker. The following scripts are available: ### Task 2: GitHub Actions -#### PR2: `.github/workflows` - -This directory contains GitHub Actions workflows used for CI/CD automation. +- folder: `.github/workflows` +- This directory contains GitHub Actions workflows used for CI/CD automation. ### Task 3: Kubernetes -#### PR3: `homework_2/task2` - -This task involves working with Kubernetes resources. +- folder: `homework_2/pr3` +- This task involves working with Kubernetes resources. 1. **Pod**: - **Command**: diff --git a/homework_2/task1/Dockerfile.run b/homework_2/pr1/Dockerfile.run similarity index 100% rename from homework_2/task1/Dockerfile.run rename to homework_2/pr1/Dockerfile.run diff --git a/homework_2/task1/Dockerfile.web b/homework_2/pr1/Dockerfile.web similarity index 100% rename from homework_2/task1/Dockerfile.web rename to homework_2/pr1/Dockerfile.web diff --git a/homework_2/task1/app.py b/homework_2/pr1/app.py similarity index 100% rename from homework_2/task1/app.py rename to homework_2/pr1/app.py diff --git a/homework_2/task1/dockerignore b/homework_2/pr1/dockerignore similarity index 100% rename from homework_2/task1/dockerignore rename to homework_2/pr1/dockerignore diff --git a/homework_2/task1/requirements.txt b/homework_2/pr1/requirements.txt similarity index 100% rename from homework_2/task1/requirements.txt rename to homework_2/pr1/requirements.txt diff --git a/homework_2/task1/run.sh b/homework_2/pr1/run.sh similarity index 100% rename from homework_2/task1/run.sh rename to homework_2/pr1/run.sh diff --git a/homework_2/task1/web.sh b/homework_2/pr1/web.sh similarity index 100% rename from homework_2/task1/web.sh rename to homework_2/pr1/web.sh diff --git a/homework_2/task2/deployment_service.yaml b/homework_2/pr3/deployment_service.yaml similarity index 100% rename from homework_2/task2/deployment_service.yaml rename to homework_2/pr3/deployment_service.yaml diff --git a/homework_2/task2/job.yaml b/homework_2/pr3/job.yaml similarity index 100% rename from homework_2/task2/job.yaml rename to homework_2/pr3/job.yaml diff --git a/homework_2/task2/pod.yaml b/homework_2/pr3/pod.yaml similarity index 100% rename from homework_2/task2/pod.yaml rename to homework_2/pr3/pod.yaml diff --git a/homework_3/README.md b/homework_3/README.md new file mode 100644 index 0000000..81f9576 --- /dev/null +++ b/homework_3/README.md @@ -0,0 +1,18 @@ +# Homework 3: Storage and Processing + +## Tasks: + +- PR1: Write README instructions detailing how to deploy MinIO with the following options: Local, Docker, Kubernetes (K8S)-based. +- PR2: Develop a CRUD Python client for MinIO and accompany it with comprehensive tests. +- PR3: Write code to benchmark various Pandas formats in terms of data saving/loading, focusing on load time and save time. +- PR4: Create code to benchmark inference performance using single and multiple processes, and report the differences in time. +- PR5: Develop code for converting your dataset into the StreamingDataset format. +- PR6: Write code for transforming your dataset into a vector format, and utilize VectorDB for ingestion and querying. + + +### PR6: example + +```bash +python main.py create-index +python main.py search-index "Who are you?" --top-n 2 +``` \ No newline at end of file diff --git a/homework_3/pr2/tests.py b/homework_3/pr2/tests.py index e639c85..20c76bb 100644 --- a/homework_3/pr2/tests.py +++ b/homework_3/pr2/tests.py @@ -1,14 +1,3 @@ -''' -Before starting the script, create a virtual environment: - -1. cd /path/to/your/project -2. python -m venv env -3. source env/bin/activate -4. pip install -r requirements.txt - -After these steps start script from cmd: -5. python tests.py -''' from minio import Minio from minio.error import S3Error import pytest diff --git a/homework_3/pr3/README.md b/homework_3/pr3/README.md deleted file mode 100644 index 07f7d86..0000000 --- a/homework_3/pr3/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# PR3: Write code to benchmark various Pandas formats in terms of data saving/loading - -## Setup - -1. Clone the repository: - ``` - git clone https://github.com/danilyef/machine_learning_in_production.git - cd homework_3/pr3 - ``` - -2. Create and activate a virtual environment: - ``` - python -m venv env - source env/bin/activate - ``` - -3. Install the required packages: - ``` - pip install -r requirements.txt - ``` - -4. Run the main script: - ``` - python main.py - ``` \ No newline at end of file diff --git a/homework_3/pr3/main.py b/homework_3/pr3/main.py index 78c4bb5..c288901 100644 --- a/homework_3/pr3/main.py +++ b/homework_3/pr3/main.py @@ -1,18 +1,3 @@ -''' -Before starting the script, create a virtual environment: - -1. cd /path/to/your/project -2. python -m venv env -3. source env/bin/activate -4. pip install -r requirements.txt - -After these steps start script from cmd: -5. python main.py -''' - - - - import pandas as pd import numpy as np import time diff --git a/homework_3/pr5/inference_time_comparison.jpg b/homework_3/pr4/inference_time_comparison.jpg similarity index 100% rename from homework_3/pr5/inference_time_comparison.jpg rename to homework_3/pr4/inference_time_comparison.jpg diff --git a/homework_3/pr4/main.py b/homework_3/pr4/main.py new file mode 100644 index 0000000..e139998 --- /dev/null +++ b/homework_3/pr4/main.py @@ -0,0 +1,90 @@ +''' +Before starting the script, create a virtual environment: + +1. cd /path/to/your/project +2. python -m venv env +3. source env/bin/activate +4. pip install -r requirements.txt + +After these steps start script from cmd: +5. python main.py +''' +import time +import multiprocessing as mp +from multiprocessing import Pool, cpu_count +from sklearn.linear_model import LinearRegression +from sklearn.datasets import make_regression +from sklearn.model_selection import train_test_split +import numpy as np +import matplotlib.pyplot as plt + + +# Prepare a sample dataset and model for benchmarking +def create_model_and_data(): + # Create a synthetic regression dataset with 100 features + X, y = make_regression(n_samples=200000, n_features=100, noise=0.1, random_state=42) + + # Split into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.99, random_state=42) + + # Train a simple Linear Regression model + model = LinearRegression() + model.fit(X_train, y_train) + + return model, X_test + +# Single inference task using the sklearn model +def inference_task(args): + model, data = args + time.sleep(0.005) + # Simulate model inference (predicting the data) + return model.predict(data) + + +def single_process_inference(model, batches): + start_time = time.time() + + for batch in batches: + inference_task((model, batch)) + + elapsed_time = time.time() - start_time + return elapsed_time + + +def multiple_process_inference(model, batches, num_processes=16): + start_time = time.time() + + with mp.Pool(processes=num_processes) as pool: + pool.map(inference_task, [(model, batch) for batch in batches]) + + elapsed_time = time.time() - start_time + return elapsed_time + + +if __name__ == '__main__': + model, X_test = create_model_and_data() + + batch_sizes = [100, 2000] + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 6)) + + colors = ['#1f77b4', '#ff7f0e'] + + for i, batch_size in enumerate(batch_sizes): + num_batches = len(X_test) // batch_size + (1 if len(X_test) % batch_size != 0 else 0) + data_batches = np.array_split(X_test, num_batches) + + single_process_time = single_process_inference(model, data_batches) + multiple_process_time = multiple_process_inference(model, data_batches) + + methods = ['Single Process', 'Multiple Processes'] + times = [single_process_time, multiple_process_time] + + ax = ax1 if i == 0 else ax2 + ax.bar(methods, times, color=colors) + ax.set_title(f'Inference Time Comparison (Batch Size: {batch_size})') + ax.set_xlabel('Method') + ax.set_ylabel('Time (seconds)') + + plt.tight_layout() + plt.savefig('inference_time_comparison.jpg') + plt.close(fig) diff --git a/homework_3/pr4/requirements.txt b/homework_3/pr4/requirements.txt new file mode 100644 index 0000000..f76f088 --- /dev/null +++ b/homework_3/pr4/requirements.txt @@ -0,0 +1,6 @@ +pandas==2.2.2 +numpy===1.26.4 +pyarrow==17.0.0 +matplotlib==3.8.4 +tables==3.9.2 +scikit-learn==1.5.1 \ No newline at end of file diff --git a/homework_3/pr5/README.md b/homework_3/pr5/README.md deleted file mode 100644 index ed3878e..0000000 --- a/homework_3/pr5/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# PR5: Create code to benchmark inference performance using single and multiple processes, and report the differences in time. - -## Setup - -1. Clone the repository: - ``` - git clone https://github.com/danilyef/machine_learning_in_production.git - cd homework_3/pr5 - ``` - -2. Create and activate a virtual environment: - ``` - python -m venv env - source env/bin/activate - ``` - -3. Install the required packages: - ``` - pip install -r requirements.txt - ``` - -4. Run the main script: - ``` - python main.py - ``` \ No newline at end of file diff --git a/homework_3/pr5/main.py b/homework_3/pr5/main.py index e139998..ba8c489 100644 --- a/homework_3/pr5/main.py +++ b/homework_3/pr5/main.py @@ -1,90 +1,64 @@ -''' -Before starting the script, create a virtual environment: +import datasets +import pandas as pd +import torch +from torch.utils.data import IterableDataset, DataLoader -1. cd /path/to/your/project -2. python -m venv env -3. source env/bin/activate -4. pip install -r requirements.txt -After these steps start script from cmd: -5. python main.py -''' -import time -import multiprocessing as mp -from multiprocessing import Pool, cpu_count -from sklearn.linear_model import LinearRegression -from sklearn.datasets import make_regression -from sklearn.model_selection import train_test_split -import numpy as np -import matplotlib.pyplot as plt +# Huggingface example +def create_data(num_rows: int = 1000): + return { + 'id': range(1, num_rows + 1), + 'name': [f'Person_{i}' for i in range(1, num_rows + 1)], + 'age': [20 + i % 60 for i in range(num_rows)], + 'score': [round(50 + 50 * torch.rand(1).item(), 2) for _ in range(num_rows)] + } +def create_iterable_dataset(data): + dataset = datasets.Dataset.from_pandas(pd.DataFrame(data)) + return dataset.to_iterable_dataset() -# Prepare a sample dataset and model for benchmarking -def create_model_and_data(): - # Create a synthetic regression dataset with 100 features - X, y = make_regression(n_samples=200000, n_features=100, noise=0.1, random_state=42) - - # Split into training and testing sets - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.99, random_state=42) - - # Train a simple Linear Regression model - model = LinearRegression() - model.fit(X_train, y_train) - - return model, X_test -# Single inference task using the sklearn model -def inference_task(args): - model, data = args - time.sleep(0.005) - # Simulate model inference (predicting the data) - return model.predict(data) +def main_iterable_dataset(): + # Create a dataset with 1000 rows + data = create_data(num_rows=1000) + # Convert the data to an iterable dataset + dataset = create_iterable_dataset(data) + # Iterate through the dataset and print the first 10 records + for i, record in enumerate(dataset): + print(f"Record {i + 1}:", record) + if i == 9: + break # Stop after printing 10 records -def single_process_inference(model, batches): - start_time = time.time() - for batch in batches: - inference_task((model, batch)) +class StreamingDataset(IterableDataset): + def __init__(self, data): + self.data = data - elapsed_time = time.time() - start_time - return elapsed_time + def __iter__(self): + for i in range(len(self.data['id'])): + yield {key: self.data[key][i] for key in self.data} -def multiple_process_inference(model, batches, num_processes=16): - start_time = time.time() +def main_streaming_dataset(): + # Create the data + data = create_data() - with mp.Pool(processes=num_processes) as pool: - pool.map(inference_task, [(model, batch) for batch in batches]) + # Create the streaming dataset + dataset = StreamingDataset(data) - elapsed_time = time.time() - start_time - return elapsed_time + # Create a DataLoader for batching + dataloader = DataLoader(dataset, batch_size=32) + # Example + for i, batch in enumerate(dataloader): + print(batch) + print('-' * 50) + if i == 1: + break -if __name__ == '__main__': - model, X_test = create_model_and_data() - batch_sizes = [100, 2000] - fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 6)) - - colors = ['#1f77b4', '#ff7f0e'] - - for i, batch_size in enumerate(batch_sizes): - num_batches = len(X_test) // batch_size + (1 if len(X_test) % batch_size != 0 else 0) - data_batches = np.array_split(X_test, num_batches) - - single_process_time = single_process_inference(model, data_batches) - multiple_process_time = multiple_process_inference(model, data_batches) - - methods = ['Single Process', 'Multiple Processes'] - times = [single_process_time, multiple_process_time] - - ax = ax1 if i == 0 else ax2 - ax.bar(methods, times, color=colors) - ax.set_title(f'Inference Time Comparison (Batch Size: {batch_size})') - ax.set_xlabel('Method') - ax.set_ylabel('Time (seconds)') - - plt.tight_layout() - plt.savefig('inference_time_comparison.jpg') - plt.close(fig) +if __name__ == "__main__": + main_iterable_dataset() + print('-' * 50) + main_streaming_dataset() diff --git a/homework_3/pr5/requirements.txt b/homework_3/pr5/requirements.txt index f76f088..e3cc535 100644 --- a/homework_3/pr5/requirements.txt +++ b/homework_3/pr5/requirements.txt @@ -1,6 +1,3 @@ -pandas==2.2.2 -numpy===1.26.4 -pyarrow==17.0.0 -matplotlib==3.8.4 -tables==3.9.2 -scikit-learn==1.5.1 \ No newline at end of file +datasets==2.16.0 +torch==2.1.1 +pandas==2.1.1 \ No newline at end of file diff --git a/homework_3/pr6/README.md b/homework_3/pr6/README.md deleted file mode 100644 index 5aa7571..0000000 --- a/homework_3/pr6/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# PR6: Develop code for converting your dataset into the StreamingDataset format. - -## Setup - -1. Clone the repository: - ``` - git clone https://github.com/danilyef/machine_learning_in_production.git - cd homework_3/pr6 - ``` - -2. Create and activate a virtual environment: - ``` - python -m venv env - source env/bin/activate - ``` - -3. Install the required packages: - ``` - pip install -r requirements.txt - ``` - -4. Run the main script: - ``` - python main.py - ``` \ No newline at end of file diff --git a/homework_3/pr6/main.py b/homework_3/pr6/main.py index ba8c489..c3a8b0c 100644 --- a/homework_3/pr6/main.py +++ b/homework_3/pr6/main.py @@ -1,64 +1,69 @@ -import datasets -import pandas as pd -import torch -from torch.utils.data import IterableDataset, DataLoader +from datasets import load_dataset +import lancedb +from lancedb.pydantic import LanceModel, Vector +from lancedb.embeddings import get_registry -# Huggingface example -def create_data(num_rows: int = 1000): - return { - 'id': range(1, num_rows + 1), - 'name': [f'Person_{i}' for i in range(1, num_rows + 1)], - 'age': [20 + i % 60 for i in range(num_rows)], - 'score': [round(50 + 50 * torch.rand(1).item(), 2) for _ in range(num_rows)] - } +from datasets import load_dataset +from sentence_transformers import SentenceTransformer +import typer -def create_iterable_dataset(data): - dataset = datasets.Dataset.from_pandas(pd.DataFrame(data)) - return dataset.to_iterable_dataset() +app = typer.Typer() -def main_iterable_dataset(): - # Create a dataset with 1000 rows - data = create_data(num_rows=1000) - # Convert the data to an iterable dataset - dataset = create_iterable_dataset(data) - # Iterate through the dataset and print the first 10 records - for i, record in enumerate(dataset): - print(f"Record {i + 1}:", record) - if i == 9: - break # Stop after printing 10 records +@app.command() +def create_index(): + dataset = load_dataset("Prarabdha/Rick_and_Morty_Transcript") + dataset = dataset.remove_columns(['Unnamed: 0', 'episode no.']) + # Select 100 records randomly + sample_size = 2000 + dataset = dataset['train'].shuffle(seed=42).select(range(sample_size)) + # Create vector embeddings + docs = [d['dialouge'] for d in dataset] -class StreamingDataset(IterableDataset): - def __init__(self, data): - self.data = data + model = SentenceTransformer('BAAI/bge-small-en-v1.5') + embeddings = model.encode(docs, show_progress_bar=True) - def __iter__(self): - for i in range(len(self.data['id'])): - yield {key: self.data[key][i] for key in self.data} + # create data for index + data = [ + { + "id": idx, + "vector": embeddings[idx], + "speaker": dataset[idx]['speaker'], + "dialouge": dataset[idx]["dialouge"], + } + for idx in range(len(dataset)) + ] + # Create index + db = lancedb.connect("/tmp/db") + table = db.create_table('Index', data=data, mode="overwrite") + table.create_index() -def main_streaming_dataset(): - # Create the data - data = create_data() + typer.echo("Index created successfully!") - # Create the streaming dataset - dataset = StreamingDataset(data) +@app.command() +def search_index(query: str, top_n: int = 2): + db = lancedb.connect("/tmp/db") + table = db.open_table('Index') + + model = SentenceTransformer('BAAI/bge-small-en-v1.5') + query_embedding = model.encode(query) - # Create a DataLoader for batching - dataloader = DataLoader(dataset, batch_size=32) + results = table.search(query_embedding).limit(top_n).to_list() + typer.echo('Results:') + + - # Example - for i, batch in enumerate(dataloader): - print(batch) - print('-' * 50) - if i == 1: - break + for result in results: + typer.echo(result["speaker"]) + typer.echo(result["dialouge"]) + typer.echo() if __name__ == "__main__": - main_iterable_dataset() - print('-' * 50) - main_streaming_dataset() + app() + + diff --git a/homework_3/pr6/requirements.txt b/homework_3/pr6/requirements.txt index e3cc535..1dcc6b9 100644 --- a/homework_3/pr6/requirements.txt +++ b/homework_3/pr6/requirements.txt @@ -1,3 +1,5 @@ -datasets==2.16.0 -torch==2.1.1 -pandas==2.1.1 \ No newline at end of file +typer==0.9.0 +datasets==2.15.0 +sentence-transformers==2.2.2 +lance==1.2.1 +lancedb==0.6.4 \ No newline at end of file diff --git a/homework_3/pr7/README.md b/homework_3/pr7/README.md deleted file mode 100644 index 7320191..0000000 --- a/homework_3/pr7/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# PR7: Write code for transforming your dataset into a vector format, and utilize VectorDB for ingestion and querying. - -## Setup - -1. Clone the repository: - ``` - git clone https://github.com/danilyef/machine_learning_in_production.git - cd homework_3/pr7 - ``` - -2. Create and activate a virtual environment: - ``` - python -m venv env - source env/bin/activate - ``` - -3. Install the required packages: - ``` - pip install -r requirements.txt - ``` - -4. Run the main script: - ``` - python main.py create-index - python main.py search-index "Who are you?" --top-n 2 - ``` \ No newline at end of file diff --git a/homework_3/pr7/main.py b/homework_3/pr7/main.py deleted file mode 100644 index c3a8b0c..0000000 --- a/homework_3/pr7/main.py +++ /dev/null @@ -1,69 +0,0 @@ - -from datasets import load_dataset -import lancedb -from lancedb.pydantic import LanceModel, Vector -from lancedb.embeddings import get_registry - -from datasets import load_dataset -from sentence_transformers import SentenceTransformer -import typer - - -app = typer.Typer() - -@app.command() -def create_index(): - dataset = load_dataset("Prarabdha/Rick_and_Morty_Transcript") - dataset = dataset.remove_columns(['Unnamed: 0', 'episode no.']) - - # Select 100 records randomly - sample_size = 2000 - dataset = dataset['train'].shuffle(seed=42).select(range(sample_size)) - - # Create vector embeddings - docs = [d['dialouge'] for d in dataset] - - model = SentenceTransformer('BAAI/bge-small-en-v1.5') - embeddings = model.encode(docs, show_progress_bar=True) - - # create data for index - data = [ - { - "id": idx, - "vector": embeddings[idx], - "speaker": dataset[idx]['speaker'], - "dialouge": dataset[idx]["dialouge"], - } - for idx in range(len(dataset)) - ] - - # Create index - db = lancedb.connect("/tmp/db") - table = db.create_table('Index', data=data, mode="overwrite") - table.create_index() - - typer.echo("Index created successfully!") - -@app.command() -def search_index(query: str, top_n: int = 2): - db = lancedb.connect("/tmp/db") - table = db.open_table('Index') - - model = SentenceTransformer('BAAI/bge-small-en-v1.5') - query_embedding = model.encode(query) - - results = table.search(query_embedding).limit(top_n).to_list() - typer.echo('Results:') - - - - for result in results: - - typer.echo(result["speaker"]) - typer.echo(result["dialouge"]) - typer.echo() - -if __name__ == "__main__": - app() - - diff --git a/homework_3/pr7/requirements.txt b/homework_3/pr7/requirements.txt deleted file mode 100644 index 1dcc6b9..0000000 --- a/homework_3/pr7/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -typer==0.9.0 -datasets==2.15.0 -sentence-transformers==2.2.2 -lance==1.2.1 -lancedb==0.6.4 \ No newline at end of file diff --git a/homework_4/README.md b/homework_4/README.md new file mode 100644 index 0000000..21f1a15 --- /dev/null +++ b/homework_4/README.md @@ -0,0 +1,74 @@ +# Homework 4: Versioning and labeling + +## Tasks: + +- PR1: Commit your data with DVC into the GitHub repo. +- PR2: Write code to deploy a labeling tool (e.g., Label Studio, Argilla), including README instructions. +- PR3: Write code to generate a synthetic dataset with ChatGPT. +- PR4: Write code to test your data after labeling (can use Cleanlab or Deepchecks) + + +### PR1: example + +```bash +# Add the dataset to DVC tracking +dvc add data/dataset.parquet + +# Commit the .gitignore and .dvc files +git add data/.gitignore data/dataset.parquet.dvc +git commit -m "Add dataset.parquet to DVC tracking" + +# Push the data to remote storage +dvc push +``` + +### PR2: Instructions + +You have a text in german language. These are emails to sent the Telecommunication company X, which must be categorized into the following categories: + +**Annotation Instructions:** + +- Bussines inquiry: everything that is related to the business of the company from company X business partners. +- Collection request: everything that is related to the collection of the debt for company X from privat clients. Could be proof pf payment or emails about +reopening closed accounts. +- Service request: everything that is related to the service of the company X. +- Cancelation: everything that is related to the cancelation of the service (contract termination, order cancellation etc.) of the company X. +- Billing: everything that is related to the billing problems. Explanationf of the bill, sending copie of the bill, +dispute a bill etc. +- Technical issue: everything that is related to the technical issues (Networking problems ,internet problems, website problems, etc.) of the company X. +- Payment method: everything that is related to the changes in the payment methods of the clients. +- Documents: inquiry about sendings copies of the contracts to clients. +- Other: everything that is not related to the above categories, including spam. + + +**General Annotation Guidelines:** + +- Read the entire email carefully before categorizing. +- Choose only one category per email. +- If an email could fit multiple categories, select the most prominent or primary issue addressed. +- Pay attention to key phrases and context clues that indicate the email's purpose. +- If unsure, use the "Other" category. +- Be consistent in your categorization approach across all emails. +- If you encounter recurring themes that don't fit the existing categories, make a note for potential future category additions. +- Ignore salutations, signatures, and other non-content elements when determining the category. +- Consider the sender's intent and the main action they're requesting or information they're seeking. + +### PR4: results + +**Duplicate Issues** + +- Cleanlab identified 6 duplicate issues in our dataset. +- All of them belong to category 4 or category 8. + +**Label Issues** + +- Cleanlab identified 4 label issues in our dataset. +- they all have score below 0.20 (which is quite low) +- Mislabeled emails belong to category 4 or category 2. +- Detailed analysis of label issues can be found in `label_issues_scores.csv` and `label_issues.csv` + +**Outlier Issues** + +- Cleanlab identified 1 outlier issue in our dataset. +- It belongs to category 1 and has a score lower than 0.20. +- Detailed analysis of outlier issues can be found in `outlier_issues_scores.csv` and `outlier_issues.csv` \ No newline at end of file diff --git a/homework_4/pr2/README.md b/homework_4/pr2/README.md deleted file mode 100644 index f377d82..0000000 --- a/homework_4/pr2/README.md +++ /dev/null @@ -1,30 +0,0 @@ -#### Anotation Instructions - -You have a text in german language. These are emails to sent the Telecommunication company X, which must be categorized into the following categories: - - -- Bussines inquiry: everything that is related to the business of the company from company X business partners. -- Collection request: everything that is related to the collection of the debt for company X from privat clients. Could be proof pf payment or emails about -reopening closed accounts. -- Service request: everything that is related to the service of the company X. -- Cancelation: everything that is related to the cancelation of the service (contract termination, order cancellation etc.) of the company X. -- Billing: everything that is related to the billing problems. Explanationf of the bill, sending copie of the bill, -dispute a bill etc. -- Technical issue: everything that is related to the technical issues (Networking problems ,internet problems, website problems, etc.) of the company X. -- Payment method: everything that is related to the changes in the payment methods of the clients. -- Documents: inquiry about sendings copies of the contracts to clients. -- Other: everything that is not related to the above categories, including spam. - - -#### General Annotation Guidelines: - -1. Read the entire email carefully before categorizing. -2. Choose only one category per email. -3. If an email could fit multiple categories, select the most prominent or primary issue addressed. -4. Pay attention to key phrases and context clues that indicate the email's purpose. -5. If unsure, use the "Other" category. -6. Be consistent in your categorization approach across all emails. -7. If you encounter recurring themes that don't fit the existing categories, make a note for potential future category additions. -8. Ignore salutations, signatures, and other non-content elements when determining the category. -9. Consider the sender's intent and the main action they're requesting or information they're seeking. -10. If the email is in a language other than German or English, categorize it as "Other" and make a note. diff --git a/homework_4/pr4/README.md b/homework_4/pr4/README.md deleted file mode 100644 index a5a5a43..0000000 --- a/homework_4/pr4/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# PR4: Write code for transforming your dataset into a vector format, and utilize VectorDB for ingestion and querying. - - -# Cleanlab Discoveries - -**Duplicate Issues** - -- Cleanlab identified 6 duplicate issues in our dataset. -- All of them belong to category 4 or category 8. - -**Label Issues** - -- Cleanlab identified 4 label issues in our dataset. -- they all have score below 0.20 (which is quite low) -- Mislabeled emails belong to category 4 or category 2. -- Detailed analysis of label issues can be found in `label_issues_scores.csv` and `label_issues.csv` - -**Outlier Issues** - -- Cleanlab identified 1 outlier issue in our dataset. -- It belongs to category 1 and has a score lower than 0.20. -- Detailed analysis of outlier issues can be found in `outlier_issues_scores.csv` and `outlier_issues.csv` \ No newline at end of file diff --git a/homework_5/pr6/README.md b/homework_5/README.md similarity index 56% rename from homework_5/pr6/README.md rename to homework_5/README.md index 91c3b1a..1aa2092 100644 --- a/homework_5/pr6/README.md +++ b/homework_5/README.md @@ -1,22 +1,32 @@ -## Data Distributed Training +# Homework 5: Training & Experiments -For the testing of 3 Frameworkds I hused Lambda service and rented 1 machine with 4 GPUs. +## Tasks: +- PR1: Write code for training your model using the W&B experiment logger. +- PR2: Write code for conducting hyperparameter searches with W&B. +- PR3: Write code to create a model card for your model, which can be a simple markdown or utilize this toolset +- PR4: Write code for hyperparameter searches using NNI +- PR5: Write code for distributed training with PyTorch, Accelerate, and Ray. -#### Pytorch DistributedDataParallel -The main framework for distributed data training. The idea is, that mp.spawn to launch multiple processes for distributed training and in my case I launched 2 processes for 2 GPUs (forgot to change to 4 :( ). +### PR1: Note -It was pretty straightforward to implement, because there are a lot of guides. But for non-standard scenario configuration could me tricky. +All confige files and dataset to be inserted into the repo. -#### Accelerate Hugging Face +### PR5: Data Distributed Training -Very easy to use. All you have to do is to wrap everything, that you want to distribute on several gpus into accelerate and prepare cnfig file (which is easily done in cmd by answering the questions about your use case). +For the testing of 3 Frameworkds I used Lambda service and rented 1 machine with 4 GPUs. -#### Ray DDP +**Pytorch DistributedDataParallel** -Unfortunatelly I had a problemn with running DDP on ray on Mnist dataset (for some uknown for me reason it was not working), that's why I used netword from a guide. From all of the framworks I like it the most, because it allows more customization, that accelerate, but at the samt time it's more easier to use than pytorch DDP.. +The main framework for distributed data training. The idea is, that mp.spawn to launch multiple processes for distributed training and in my case I launched 2 processes for 2 GPUs (forgot to change to 4 :( ). +It was pretty straightforward to implement, because there are a lot of guides. But for non-standard scenario configuration could me tricky. + +**Accelerate Hugging Face** +Very easy to use. All you have to do is to wrap everything, that you want to distribute on several gpus into accelerate and prepare cnfig file (which is easily done in cmd by answering the questions about your use case). +**Ray DDP** +Unfortunatelly I had a problemn with running DDP on ray on Mnist dataset (for some uknown for me reason it was not working), that's why I used netword from a guide. From all of the framworks I like it the most, because it allows more customization, that accelerate, but at the samt time it's more easier to use than pytorch DDP.. diff --git a/homework_5/pr1/README.md b/homework_5/pr1/README.md deleted file mode 100644 index 2f9ee0d..0000000 --- a/homework_5/pr1/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# Homework 5 - PR1: Training with wandb - -## Disclaimer - -The code is almost copy from production code from work. -Unfortunatelly, I can't provide configs and dataset, but the code is working and report fro wandb is attached:) - -## Folder structure - -``` -homework_5/ - pr1/ - metadata_dir/ - transform/ - schema/ - model/ - dataset/ - data_dir/ - pipeline_configs/ - dataset_config.json - transform_config.json - training_config.json - data_type_config.json - main.py - requirements.txt -``` diff --git a/homework_5/pr5/main.py b/homework_5/pr4/main.py similarity index 100% rename from homework_5/pr5/main.py rename to homework_5/pr4/main.py diff --git a/homework_5/pr5/model.py b/homework_5/pr4/model.py similarity index 100% rename from homework_5/pr5/model.py rename to homework_5/pr4/model.py diff --git a/homework_5/pr6/accelerate_ddp.py b/homework_5/pr5/accelerate_ddp.py similarity index 100% rename from homework_5/pr6/accelerate_ddp.py rename to homework_5/pr5/accelerate_ddp.py diff --git a/homework_5/pr6/accelerate_screen.png b/homework_5/pr5/accelerate_screen.png similarity index 100% rename from homework_5/pr6/accelerate_screen.png rename to homework_5/pr5/accelerate_screen.png diff --git a/homework_5/pr6/config_acc.yaml b/homework_5/pr5/config_acc.yaml similarity index 100% rename from homework_5/pr6/config_acc.yaml rename to homework_5/pr5/config_acc.yaml diff --git a/homework_5/pr6/pytorch_ddp.py b/homework_5/pr5/pytorch_ddp.py similarity index 100% rename from homework_5/pr6/pytorch_ddp.py rename to homework_5/pr5/pytorch_ddp.py diff --git a/homework_5/pr6/pytorch_screen.png b/homework_5/pr5/pytorch_screen.png similarity index 100% rename from homework_5/pr6/pytorch_screen.png rename to homework_5/pr5/pytorch_screen.png diff --git a/homework_5/pr6/ray_ddp.py b/homework_5/pr5/ray_ddp.py similarity index 100% rename from homework_5/pr6/ray_ddp.py rename to homework_5/pr5/ray_ddp.py diff --git a/homework_5/pr6/ray_screen.png b/homework_5/pr5/ray_screen.png similarity index 100% rename from homework_5/pr6/ray_screen.png rename to homework_5/pr5/ray_screen.png diff --git a/homework_5/pr6/requirements.txt b/homework_5/pr5/requirements.txt similarity index 100% rename from homework_5/pr6/requirements.txt rename to homework_5/pr5/requirements.txt diff --git a/homework_6/README.md b/homework_6/README.md new file mode 100644 index 0000000..309175e --- /dev/null +++ b/homework_6/README.md @@ -0,0 +1,12 @@ +# Homework 6: Testing and CI/CD + +## Tasks: + +- PR1: Write tests for code, tests should be runnable from CI. +- PR2: Write tests for data, tests should be runnable from CI. +- PR3: Write tests for model, tests should be runnable from CI. +- PR4: Write code to store your model in model management with W&B. +- PR5: Write code to use LIT for your model, in the case of other domains (CV, audio, tabular) find and use a similar tool. +- PR6: Write code to test LLM API (select any LLM - OpenAI, VertexAI, etc). + + diff --git a/intro.jpg b/intro.jpg new file mode 100644 index 0000000000000000000000000000000000000000..8e6c17e4b4d4ba30f2a867c1947ea523729693f3 GIT binary patch literal 21828 zcmeHv2Ut{Dw)I6qk(1<{1eBy?iGq?uBuOj~6_6x32nZB{faIWnWCSE<2}&x-AR;+f zCDs_@=y)Jb-o%0MNi6 z05t+!2e7fQAXu2#5C{YZ2OAfki~t`G51*R!JP{c^4I=|R4ILd58!rbF3pXnr9jC}e z?#ovM1qB&7u1bpXOYjN^@}D(=hJ%BHkB3i1KtRRMOvlXscRx@c021tTbm$+@(O7_U zBxvX)Xs9ND5$q=>+7A!lFAubH=opw-5NsS=Jg`DJF>nqI9sL{zIwmFt23Xq@d>_Cd z!6aqAB7=2a{XT@no{ax#vgIJgv)RMa#V*f}^aUJ?`%77@KF zcKwE&yn>?AP0c%6+B&*=`o<=v56#RiEFGPkT^_r-x%)oz^AC6)7!>v5WpqsJtGM{I z^o%!|S=l+crEkm1D=Oc;uWD#)YHn$5Yya5W*FP{gH2eiIH9a#s_jP_@acOgFduMlV z|KRZGY+Psn`j5i`fB!hJZ^lIej_Vu-208}hY+PvP9)ks)1Ot=#3Kpr1I^@3nc^3Ys z*krPiuS@E2SOqjT$R9ZL;8L&&PF>hM8`=*e`)vdB`CpCfzXtZ-<3a%V=xE^Lp_2g8 zz}^vOnm6vB<==k}%n}$+c2tiS)J2X0rN^W{*2H^DJ|b>p@{yF|v9BYHKlr$LX|0s5 z#3pEbV#+WYAXAt7k0==DuvwV~_*OhckRCkn_3pefH4M9Y@|p*t#){oecc{oOkg1`` z{aOB;ga7nBK(4GgU@e&79J4sWFWiUa3FEspn%z2U@)x|R$2o^0Wz!SQ2<6S3fR(O{BoBE z1*Gl;q5vNY*x@B_q3ryWl_Clh(Ea2F3aA%^pF}I{=>)?TB-3D%@kYp-sRWi>9LHSE z@EwL8=?xV=6mUWPG zo2Iocf1Y^LH}ze}IjSdRPoAQHLMy0H>~xqA3^%fWDNLHEWHe=gOXE8{C9vBmSC&M& zcF4MI8YD0$51Y#fcxKf6oH5j}+sUa%Fr#8(-r(xCO(D^EvhjZ6sjAE{06AAuqlW^R zg71GRSCLeGClcXnX!@(5gdB4&sqVFzP}1l{p_(=g#ZeRfcQ3|pd)1kCcg;fPpk#o$+#jQ8!=6gd zCc9c8>$TxW7p-L9p5UMWw(UlvJ=}5>(4%~f{(Fn?t%Xp(MmG^8JEYmcEw;KhVbBhis^NjEuSd z8l!$G_1me}XoX^xue z0qLIgUaBa9+tU~~j=71vORBa8?w83wK)vcm0$?OnIhjB1Aquy?v-m zO)I)fMdw65d~hDAH17{zA-6&SX))enKMg#u8j&p{T2cxZ^rb>EgGVj>XZTzVE@5|z zY;1p4QLvacw7NZ%zY|Vov1f+@%qf(`N2ueb-2k4s;(NFyKx99imNcay^vj9-#*KZ= zW(=;qlXfmY^;bIYvptk^hYu2(>S%hLtg>xwc$_(RX|pQrS)k1{mE-q6aO-k1eDxNy zR&HUS?b*dF?sx9MgI2VzJxtQ_cQ-lTI&()tX=aSJk)|a&%V-SRGSn37#(^{2TK+u3 zcR|$RR?qQx`7(4RC6V~rK;wnSBW;kpI=?HUxH{zF5etn8s8ff&r_O7 z9@iqEE4gfH_ryn(Tgcr{6lMG5ZVCmAgyv16i3C-B(mCy||_7bgmcsPu!aTtWdy3Xb~GfiqCi zqP3GN)&jRn>TUu$%h9 z`tBc>pg-NcDt&YJ2)IbMDsL+jum31zDdn{98XZx9&^o=BMeen_(Y;wZXtX+& zgSgwP4^lg5y{gmegitGV#T009xH>oG%Bonvs5`!ykEd9FRZ(D>Bte|Bp>}U)%oS$6 zH@Ge@yzM6<6sGQ8%_vww0eI`&<@9C^6)~UKRpe}W*sQYWM@Qn?V&fh=5Ki18j7ALW zvU+IuAxBErrY9OnrXSx{J{;ynn?2tJXZIFU6q;a10S2+^{Z4QCxS#P+hYl;@mfEmC z+7PHWaLLLFIpN>FD_ez3G%3{>VqL{MP9?;V*b`;c4BCcMz*;MtQ;zn_KnHlPvPS03 zFh>ZIzM;Yndpc%{L>8YD>SBt%@R6ur4PutD9Dld=q0LT#?D*E`6-_o$EFi44d%#e< zd7OS=m$GkX)uv%-Q{dJ8B)xab-bA8%MG~82BFZAoiaTYdN%@Z^9$kO&YQhb8e2J)* zwN>~vrC-EBK!dt|>l2K4yT1H6(nPuy)p21o1OK*GiEc>h5(EFnU^J@&tWXy!_VK=H zj5CHP5TQ_PD*Ze+nPYQ2e9ELZeraheR_&c)cU*&CU{y!*lITgRP~7A)p3;fJOb&ZR zkbJ@&;xQ4~uiVE4{DoEBPS>Fqk=n>R@ zMx_@ckH49SpGTKCQwuo%tcU+9M8Gnlbr6u_nFj6O+R5XAo7TloT#Z>SidnAC@|a0< z7Bc(Olm2fJMlPlPEivKRsiC(Zl~^ps|LRH*UEc z@KANClW?(HU|S=eW@(E)v9L#}Y6JLXRVZ9}+RdwgzNo`fz>`071u@cmPJoh&6caPB zU6j6kHa9Xi2}~+mweH0blATFm5n96 zl>V{A`%`)=wo}7maP1{gFHPH~~k#z>;J-mp`i`(8*v0;4x7&f1Y;_y_7%lY4resM1(~jRJB|f!&*3#e5)jJ=HZp0y1Tnr)<=k9b>gRK@Ks|;@ z6itRo-5U$jTn0nCn~nHJd^GT15W`&5FZ-zm3Tb|N|? zaLqzCfLvLI%RsHJ805siMiY$CK%%Ty+;aSfp_~op7t6QNqds}Uq#1gBmG$dsWo%?4 z%f9xrLo<+{>+1HsMo<17)$|(zOV_8*kn9_CXy9)`Sr z7o;YO(|?GX0o&in68vs68(>ec7MR-&50bgeYQPxnw{_OE$oF-R@Q9O`#u>aB#XKf zG{Fb5%vqK`@;%ejeQLlvf2p3%Fdf@#r`YKBkRlm5K(`G3wx3xqzi{$>p#SwoCTG?q z0^Py~%MEq(buT*}r!GDwz0&TMKjWa6%A8kG#ocvjH@3h$WEoX0dot%)`a*?K^|F?aUo*E6M^TYt4Zi z%R0Zs{?`wIw42bB?J=(O8B@1=4|GIZG#iztevD_>!W`ZGf3okt;XQnMbkK-h1dUi4 zY>~d)R+|tcbD4|CKsci=y%UD0%>Fe}_z&~`xAxdpC&fcqmAEc%ioDyF;H^|f&)er3 zDj_|^6aJ93mWMMF{kXsm4d?1_7r5{Am(zN(Hz;>RjqBgTlpForG^M5B!sZQeu=J~Y z3M=4zw~5)*ePc8;&f;n)vAYsWM6XCT2HzuwH~Lp{T&H`-Z{O>)R++lq-N8yVV_UCn z)ckbSPz7*A0UJK9H%`~_&9@lWl}%|Yzsd;08gM2-D7uRZ3#I$V-gq=n2t*qGZ4kndtJaOq0HK)d>t|WKp0_4{w>~uLR!^w|Nqx^7KcCLV)mcy-$8f`F22u%ONMcn8D81&q2ARGp#xp-JBLNy7 z*~|bC3oO76;uk=${7^tl0@4DYRwRmVNl8SjgXlo zmU}eBMRr|zGiff-18OOZ0&H%ql;&Z)3^%S~-5q_>jWzHK2%K6>=Oi2f^6x9spoZc! z@M+O#OWMk453l%#=%w%1!7T`;A*_?(hr}LNQNX=w+f%agT3sn@6c7n%OxeS$IFpv3 z{r_?mOq(babcZsVjHfgNjirzQB7 zv^jha4-O8jBDzmUAQz0yPSeiXK>;X09-)R+{`)4|Evq>%=`LcvkuR;2#Zb;!sP$#Y zwO4X@H;tK`M~Oq849*8hs8)ZlH!?4^sUFnkvKE|&d`j?>8j<0KjR~R+_1ra^A|ma0 zeZswQ7$Y94bEhU=8EWp)mFy<*MeMET0B%Ng!xW)pwAN&^s~O?JD;ODv{WH3;rDv^GgGAU~?eg}A(Jb*;oT#f5fw za3Y$}Lrmz5!4aw(ghftwD&HMke^lUfu7X0OPtRby-G%byT3{u&D)j=Ed$N6Snrj^a zM*{;{xKelFLlu3c*Oe6_3FE@FBeC{vY!0_C6L~!FmShQ6{BEf zW$i}qG5-s*GwT6vDi)#u=@m-rGm)w?4^q###dP3^taq6Jg;)1~y(_5Sgr7W@Vg-?# zVHYyUay*}r7TmoK4R%mGz3t7MXdJEbqJMBHgdCDI<=P~Ck}AN-oF!fi49BQ0Yf zO-1@t&oq}U_NOBW%T5WOU5{?!h_9j_PbTl}azznBvLUZ~4C>xY@10*9bjM0LEqVg8 z-fJ8BOxzN4@<73X5RDB0CF@0l`Ct!C$Ly1WOLQNy{k?WWZf zn=PM2E?vAdl3+u04NG2C2xEen35F4 zfwN^SXS|@Fnd3F2wuIoas3hi}%%3*T)DTObQfj#@gHGpM@FPda6*mBz_3DAGP{L3^ zx;0m3p&7^%Mjp$ZEgT9!hL(XDOurBc@H-V9GL{z|V-CYW{$epNt`|P*H3(m!ybm^I zvz+-OS!LWLW;!?nPld}G zV#@!vxc1mH{j5L!jJA&Uq(&RuV$=Tw{1xM>7cd zaDHmW1O*@_?p^UraF};FnYr4k>b|gAR^5LP=bS5up$F=_RUMJOEFG-mt#?C+W(?0d zQf2GE`^Ag8b*i#Ye@LsxEXc}&J@OW%HC>AwLWTna-vJAc=;q)%YpWKqmH@JG#+qL4 zvXSPxs+i503$t?fN?J$~>@Jk*&OBW=oFhZ~kEufSlmF^ceDIJBU( zkn7dPzQ1#@r1;c|4h3&`rmDk`cPVl~(7}%**+JK-OMhpH6Fq34M#MSqpih^dRUymh z!0}jomB}~2f~~E^F(RM99(lQTQ`UpEy0=JB^nTEIFXx;7-YpS-6Kys8n^KS9bi`L) zw9iPjeJl(KdmZf$vQ}~q#U7#?&9zM$7hbkdnUFS`?>m&&*bvI-20x-ZzNZG!`3tA@ zZ{~{0=e>Xf=k#G$)tjDXjoRpXfqI#4sS0)@!#=84MVs9(fD2`05gT=))GH+Doxmx$ z{K_Pj6?bJbnrgXC^H2vexvJS@w2`}7X2nM^YS`(F(Sx{~Q4(xdxF({gjHU5j0`uyJ z`uT9|+iz+86l?>qd%xGsN+lvlT_ZQ>d67yOcB70h2{alYE5U0^Gkqoi+ zm}PYaAC0f-sZpqX={Wa(?P6sz<@@RlNDxb(1gBGV=r#G6y>$l$w*hRX_HA;_j(b=V z&P8|9yhRPouJ;#uC`{+Qveuib**HQ#i|-$|D6 zL}1@fG$VIcAyu|LH-b=_d;)#`uu9ABnl;X+T%DBI`eq!~bJ^Dqq`Nvk- z0HsM>;CugySI-m<9M~~n_pZWsGzM3H7Pi=4N0jTjPESGU8W&8>{P_$# zbi$+i>8_a_+Amj2tAbZ3dEMokD`i`|ZFu22`paCr)8n+&7S@F=FEs z#arwq7nt|)3`Eb!XQz%x&?k+ypax7gUF&*H>e~%r`ZNlAI&#Z!ebqZI97<)eR=Tz1Fuq2Ap2%hpgFr~)Wu}mQYD}Fb zno)G?3BUQcR_(sEsYs%}PV=tC!)}Q3KYKCBJ5j(l`#JU9W7|39~HiiXYk2AU62r7900i&v%J08Y{E!=Hpw z$i|m?=m)h+Jbk{jDp3h9zH$$(r(6oXv9XL;dQ}byR_+@KsC*lj?52>e>^Cea>*GnK z-CSEE9Mc1ZHbu&iJ_{41(o+8bxT|5j{DlbmJDW>46IKTLA&$mQ+}JU}{$s z*=GmIqXEW#0TBFV#pOFiu-=qS4yMcc1!PZ=l$ri_}YR@qwQ4z6ywbmDvk}-cMkFw>Dhl z)K7@P~@?Lcbt%B=Pi>;WP7jCX?K@kdA7)$;l!02(fsusjxCYUGFaK(EUxmfc)KH*Sl1O z5v8r{BD^?sGmHAEvnEb|K{W!OIos$}z|cTJAK=WT5iT(+vaq+*ChW5263a2YRV6S^)S#tQRt#w_A1Qt(B#3mWAdvm`!+u!Qka zKa>fPC%XMLWl#0&Y@98a@>72Ue)0m^uZBhRMFI*Y?bL#|2qq~g7ppLufalGyc2rNu zKo+!Js8$ZRclrW6Fh>g>m{SMgG+WP~t^5bii>mjtjwu&tfD$`RSSp)qciLOe(nk>) zQCLq*X8MOz4Ay;MvqlNXhWKz~jEW&SK;C@Nhm`L^2k$No)l-rSF43XWM0ksJ>|p~! zeHd;Hl6sV@RQsVqB?bJ4jv;A|E2mJWIh1h24F%Xow7*ti_Y96B&#tRr1g=LlhRW zC9BmvO^ixk46-ShEMV2`H&TljM~o}_T+J-0i5kn4=`k`U`0!-=8iS{2mXUJ0E77e@ z%#OUI2Z}T|s>atT+GUv|RoM!OBfIYIc8@FyOOYPLrVpDF2SZhEf1WGip62MllJvVU zGCUVFU#q)G>BCdl*MQIpi>2<#=cCRFG%^npAoI0j^XG6xkK8tyAFp&$#j$WF7~QXE zSbFj$LVjN-md~E|g9EWZlCkVBdyF?VDMV}lY~ z*s;|UBuTAsM@wunY2VD2ptaHU809=RwpP4=J9=b{Rf-QpTWyY+kO z8qMohH@R?_?Ue8f2`4|iXp>w#((zxB5F%e4Ou_6N+v%uMva!+Eo4-WZ&#&l8@7;zj zh*rN?xqXn!hjVauW38%h-p$!>83iy{mt1!bh)DOuWI2E;4HZc-jV`&gOe^vktsEzL z`}vibxdX0`W|HBYg<19EjPEN3%hySl^qpKNk80Y3wIWTf+z2AGF`aRwD`}j%x1yNhAb3t5dc>b-)s+#e=P`q+G+ zceZIcyC*YmJJ>%RD$pJ{q0F$N3AQAq9f7tJD+msXL_y2i#JE;i?HG2@>J2R1{aPX? z4f*ay$-g*$U>+B>{G^8EYDJ!6){D2eit`1J7RMQS%Zi>O0s`{$dwFQpdNB4WSlQX_ zFUra$$7%Y8y(=u`!(ABMJ5s5kctGV8hhJeqOH2|}8s&CaO8qu*HgPu(_JsmA0w1}n zcl`KET|3;qHMZL2N?(yXFylqEaO9FQ>B`5$J>~y(Nh!un-L4lSSSkt*%ZI-ixBNvfc&%TjQz%n3^u`OR@S}92%-_@RYQ8m zI#4W*^kO{5wKzNJ0ytE-Lz=K_pi=4f)1kspic0h8!2DUxXQe5-H-8VwJ7fgD z2@dj-HdIf7w_(GYVBFr@o|pFP6xsU;T38#L=+sYe{gm7PEZzWBV3rf`0K^Xo`atjR zjm-WapGOy{f5x7Wt)H?RbM$7?e(~$AKB5yBE$L7v=XS#~B;%mz;5{Fu72aqaHUA6T z3wQ26)S|0^eEi^WO+#vpNZA->mV>x37Wzp?czxd_5F}{9w9{W3TA^c$iBv)sB78TN z=FKaa<+)F98Bbi;rTwfYjJ_T)beHD}CNzAYFBu;33aLS2mx90X?S! zQ;otof0~DyA7?#siI^xj*6#r}zw>mpQElj^K0D{1zxs&L8ta9dUs zP}395u_;s}2Y{k)!$t6ybo@M$$QE{X!ZJt~*7X$(#C%64kmAsnYFLaXEHRARhlQ}o zy?yXtW)FBM^G(1B(kkUZvK9r*l)wKsZN^0Zz*wQIO8L#(?Ly<@7h{V1q#VZu%nzi| z^>_Y$mh9J9v_IefH=a|SI(T^>cScQZ(!I)X+F37OHngKiZLSvwacAGOpmhXFWa%2- zJg|l*Q?^G02&CBwc*5t|e#68LZTU&pfli_;ygtRI-#YW{yS`Oc;hwJ|5zHL)_IgsB z#Q}{{G{wE?K^JX!15$*hLwpx0x(CW?IqR{rWoj&^ZNd+{)U*)(Uq2F%^h~x7ymN zKXVov?7MTOMKt^oB&2_2AKps`rfjJS8D$t93325Ys!p4oyy(ysjgPca8%K1&OG+x0N_$wgMiEc-ef(C4s6B@yL}w^SZDx)8Q1eRHAr+lSZnQ zJT})g^P}N86-K3AWV5YA9$Rz9xNg|1odUk+!hKy*VLjK`f=^SZKITZ~1x59KCF5fw zO$%Gez`f^bDo1t)|L>t(|1Z|%;%Gu}oUnN@RQG!I?7_+~PuDt+?dq#c%(4osz