Skip to content

Commit

Permalink
Update to 3.0.0 (#66)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
edamamez and github-actions[bot] authored Aug 6, 2024
1 parent eaa8ace commit a9c3458
Show file tree
Hide file tree
Showing 27 changed files with 244 additions and 345 deletions.
9 changes: 5 additions & 4 deletions lamini/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from lamini.runners.basic_model_runner import BasicModelRunner
from lamini.runners.mistral_runner import MistralRunner
from lamini.api.lamini import Lamini
from lamini.classify.lamini_classifier import LaminiClassifier
from lamini.api.classifier import Classifier
from lamini.api.embedding import Embedding
from lamini.classify.lamini_classifier import LaminiClassifier
from lamini.generation.generation_node import GenerationNode
from lamini.generation.generation_pipeline import GenerationPipeline
from lamini.generation.base_prompt_object import PromptObject
Expand All @@ -27,7 +27,8 @@
Find your LAMINI_API_KEY at https://app.lamini.ai/account"""

# When inference call failed, how much retry should we perform.
retry_limit = os.environ.get("LAMINI_RETRY_LIMIT", 3)
retry_limit = int(os.environ.get("LAMINI_RETRY_LIMIT", 3))

max_workers = os.environ.get("LAMINI_MAX_WORKERS", 10)
batch_size = os.environ.get("LAMINI_BATCH_SIZE", 5)
max_workers = int(os.environ.get("LAMINI_MAX_WORKERS", 4))
batch_size = int(os.environ.get("LAMINI_BATCH_SIZE", 5))
static_batching = bool(os.environ.get("LAMINI_STATIC_BATCHING", False))
6 changes: 2 additions & 4 deletions lamini/api/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@


class Classifier:
def __init__(
self, model_id: int = None, api_key: str = None, api_url: str = None, config={}
):
def __init__(self, model_id: int = None, api_key: str = None, api_url: str = None):
self.model_id = model_id
self.config = get_config(config)
self.config = get_config()
self.api_key = api_key or lamini.api_key or get_configured_key(self.config)
self.api_url = api_url or lamini.api_url or get_configured_url(self.config)
self.api_prefix = self.api_url + "/v1/classifier"
Expand Down
9 changes: 4 additions & 5 deletions lamini/api/embedding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Optional, Union
from typing import List, Union

import lamini
import numpy as np
Expand All @@ -9,15 +9,14 @@
class Embedding:
def __init__(
self,
model_name: str = None,
api_key: str = None,
api_url: str = None,
model_name: str = None,
config={},
):
self.config = get_config(config)
self.config = get_config()
self.api_key = api_key or lamini.api_key or get_configured_key(self.config)
self.api_url = api_url or lamini.api_url or get_configured_url(self.config)
self.api_prefix = self.api_url + "/v1/inference/"
self.api_prefix = self.api_url + "/v1/"
self.model_name = model_name

def generate(self, prompt: Union[str, List[str]]):
Expand Down
107 changes: 22 additions & 85 deletions lamini/api/lamini.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
import json
import logging
import os
import sys
import time
from typing import Callable, Dict, Iterable, List, Optional, Union
from typing import Dict, Iterable, List, Optional, Union

import jsonlines
import pandas as pd
from lamini.api.lamini_config import get_config
from lamini.api.rest_requests import get_version
from lamini.api.synchronize import sync
from lamini.api.train import Train
from lamini.api.utils.async_inference_queue import AsyncInferenceQueue
from lamini.api.utils.completion import Completion
from lamini.api.utils.upload_client import get_dataset_name, upload_to_blob
from lamini.generation.token_optimizer import TokenOptimizer

logger = logging.getLogger(__name__)

Expand All @@ -25,33 +21,15 @@ def __init__(
model_name: str,
api_key: Optional[str] = None,
api_url: Optional[str] = None,
local_cache_file: Optional[str] = None,
config: dict = {},
):
self.config = get_config(config)
self.config = get_config()
self.model_name = model_name
self.api_key = api_key
self.api_url = api_url
if sys.version_info >= (3, 10):
logger.info("Using 3.10 InferenceQueue Interface")
from lamini.api.utils.async_inference_queue_3_10 import (
AsyncInferenceQueue as AsyncInferenceQueue310,
)

self.async_inference_queue = AsyncInferenceQueue310(
api_key, api_url, config=config
)
else:
self.async_inference_queue = AsyncInferenceQueue(
api_key, api_url, config=config
)

self.completion = Completion(api_key, api_url, config=config)
self.trainer = Train(api_key, api_url, config=config)
self.completion = Completion(api_key, api_url)
self.trainer = Train(api_key, api_url)
self.upload_file_path = None
self.upload_base_path = None
self.local_cache_file = local_cache_file
self.model_config = self.config.get("model_config", None)

def version(self):
return get_version(self.api_key, self.api_url, self.config)
Expand All @@ -63,36 +41,20 @@ def generate(
output_type: Optional[dict] = None,
max_tokens: Optional[int] = None,
max_new_tokens: Optional[int] = None,
callback: Optional[Callable] = None,
metadata: Optional[List] = None,
):
if isinstance(prompt, str) or (isinstance(prompt, list) and len(prompt) == 1):
result = self.completion.generate(
prompt=prompt,
model_name=model_name or self.model_name,
output_type=output_type,
max_tokens=max_tokens,
max_new_tokens=max_new_tokens,
)
if output_type is None:
if isinstance(prompt, list) and len(prompt) == 1:
result = [single_result["output"] for single_result in result]
else:
result = result["output"]
return result

assert isinstance(prompt, list)
return sync(
self.async_generate(
prompt=prompt,
model_name=model_name,
output_type=output_type,
max_tokens=max_tokens,
max_new_tokens=max_new_tokens,
callback=callback,
metadata=metadata,
)
result = self.completion.generate(
prompt=prompt,
model_name=model_name or self.model_name,
output_type=output_type,
max_tokens=max_tokens,
max_new_tokens=max_new_tokens,
)
if output_type is None:
if isinstance(prompt, list):
result = [single_result["output"] for single_result in result]
else:
result = result["output"]
return result

async def async_generate(
self,
Expand All @@ -101,8 +63,6 @@ async def async_generate(
output_type: Optional[dict] = None,
max_tokens: Optional[int] = None,
max_new_tokens: Optional[int] = None,
callback: Optional[Callable] = None,
metadata: Optional[List] = None,
):
req_data = self.completion.make_llm_req_map(
prompt=prompt,
Expand All @@ -111,32 +71,13 @@ async def async_generate(
max_tokens=max_tokens,
max_new_tokens=max_new_tokens,
)

if isinstance(prompt, str) or (isinstance(prompt, list) and len(prompt) == 1):
result = await self.completion.async_generate(req_data)
if output_type is None:
if isinstance(prompt, list) and len(prompt) == 1:
result = [single_result["output"] for single_result in result]
else:
result = result["output"]
return result

assert isinstance(prompt, list)
if metadata is not None:
assert isinstance(metadata, list)
assert len(metadata) == len(prompt)
results = await self.async_inference_queue.submit(
req_data,
self.local_cache_file,
callback,
metadata,
token_optimizer=TokenOptimizer(model_name or self.model_name),
)

result = await self.completion.async_generate(req_data)
if output_type is None:
results = [single_result["output"] for single_result in results]

return results
if isinstance(prompt, list):
result = [single_result["output"] for single_result in result]
else:
result = result["output"]
return result

def upload_data(
self,
Expand Down Expand Up @@ -245,7 +186,6 @@ def train(
],
finetune_args: Optional[dict] = None,
gpu_config: Optional[dict] = None,
enable_peft: Optional[bool] = None,
peft_args: Optional[dict] = None,
is_public: Optional[bool] = None,
use_cached_model: Optional[bool] = None,
Expand All @@ -269,7 +209,6 @@ def train(
upload_file_path=self.upload_file_path,
finetune_args=finetune_args,
gpu_config=gpu_config,
enable_peft=enable_peft,
peft_args=peft_args,
is_public=is_public,
use_cached_model=use_cached_model,
Expand All @@ -289,7 +228,6 @@ def train_and_wait(
],
finetune_args: Optional[dict] = None,
gpu_config: Optional[dict] = None,
enable_peft: Optional[bool] = None,
peft_args: Optional[dict] = None,
is_public: Optional[bool] = None,
use_cached_model: Optional[bool] = None,
Expand All @@ -300,7 +238,6 @@ def train_and_wait(
data_or_dataset_id,
finetune_args=finetune_args,
gpu_config=gpu_config,
enable_peft=enable_peft,
peft_args=peft_args,
is_public=is_public,
use_cached_model=use_cached_model,
Expand Down
4 changes: 2 additions & 2 deletions lamini/api/rest_requests.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import aiohttp
import asyncio
import importlib.metadata
import logging
import requests

import aiohttp
import requests
from lamini.api.lamini_config import get_config, get_configured_key, get_configured_url
from lamini.error.error import (
APIError,
Expand Down
3 changes: 1 addition & 2 deletions lamini/api/streaming_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,8 @@ def __init__(
self,
api_key: str = None,
api_url: str = None,
config={},
):
self.config = get_config(config)
self.config = get_config()
self.api_key = api_key or lamini.api_key or get_configured_key(self.config)
self.api_url = api_url or lamini.api_url or get_configured_url(self.config)
self.api_prefix = self.api_url + "/v1/"
Expand Down
46 changes: 1 addition & 45 deletions lamini/api/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,12 @@ def __init__(
self,
api_key: Optional[str] = None,
api_url: Optional[str] = None,
config: Optional[dict] = {},
):
self.config = get_config(config)
self.config = get_config()
self.api_key = api_key or lamini.api_key or get_configured_key(self.config)
self.api_url = api_url or lamini.api_url or get_configured_url(self.config)
self.api_prefix = self.api_url + "/v1/"
self.ui_url = "https://app.lamini.ai"
self.model_config = self.config.get("model_config", None)

def train(
self,
Expand All @@ -30,7 +28,6 @@ def train(
upload_file_path: Optional[str] = None,
finetune_args: Optional[dict] = None,
gpu_config: Optional[dict] = None,
enable_peft: Optional[bool] = None,
peft_args: Optional[dict] = None,
is_public: Optional[bool] = None,
use_cached_model: Optional[bool] = None,
Expand All @@ -44,16 +41,12 @@ def train(
req_data["finetune_args"] = finetune_args
if gpu_config is not None:
req_data["gpu_config"] = gpu_config
if enable_peft is not None:
req_data["enable_peft"] = enable_peft
if peft_args is not None:
req_data["peft_args"] = peft_args
if is_public is not None:
req_data["is_public"] = is_public
if use_cached_model is not None:
req_data["use_cached_model"] = use_cached_model
if self.model_config:
req_data["model_config"] = self.model_config.as_dict()
if multi_node is not None:
req_data["multi_node"] = multi_node
url = self.api_prefix + "train"
Expand All @@ -69,43 +62,6 @@ def train(
# Add alias for tune
tune = train

def precise_train(
self,
model_name: str,
dataset_id: str,
upload_file_path: Optional[str] = None,
finetune_args: Optional[dict] = None,
gpu_config: Optional[dict] = None,
is_public: Optional[bool] = None,
use_cached_model: Optional[bool] = None,
):
req_data = {"model_name": model_name}
req_data["dataset_id"] = dataset_id
if upload_file_path is not None:
req_data["upload_file_path"] = upload_file_path
if finetune_args is not None:
req_data["finetune_args"] = finetune_args
if gpu_config is not None:
req_data["gpu_config"] = gpu_config
if is_public is not None:
req_data["is_public"] = is_public
if use_cached_model is not None:
req_data["use_cached_model"] = use_cached_model
if self.model_config:
req_data["model_config"] = self.model_config.as_dict()
url = self.api_prefix + "precise_train"

job = make_web_request(self.api_key, url, "post", req_data)
self.job_id = job["job_id"]
print(
f"Tuning job submitted! Check status of job {self.job_id} here: {self.ui_url}/train/{self.job_id}"
)

return job

# Add alias for tune
precise_tune = precise_train

def cancel_job(self, job_id=None):
if job_id is None:
job_id = self.job_id
Expand Down
14 changes: 9 additions & 5 deletions lamini/api/utils/completion.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import logging
from typing import List, Optional, Union

import aiohttp
import lamini
from lamini.api.lamini_config import get_config, get_configured_key, get_configured_url
from lamini.api.rest_requests import make_async_web_request, make_web_request

logger = logging.getLogger(__name__)


class Completion:
def __init__(self, api_key, api_url, config):
self.config = get_config(config)
def __init__(self, api_key, api_url):
self.config = get_config()
self.api_key = api_key or lamini.api_key or get_configured_key(self.config)
self.api_url = api_url or lamini.api_url or get_configured_url(self.config)
self.api_prefix = self.api_url + "/v1/"
self.model_config = self.config.get("model_config", None)

def generate(
self,
Expand Down Expand Up @@ -70,11 +72,13 @@ def make_llm_req_map(
req_data = {}
req_data["model_name"] = model_name
# TODO: prompt should be named prompt to signal it's a batch.
if isinstance(prompt, list) and len(prompt) > 20:
print(
"For large inference batches, consider using a Generation Pipeline instead: https://github.com/lamini-ai/lamini-examples/blob/main/05_data_pipeline/README.md"
)
req_data["prompt"] = prompt
req_data["output_type"] = output_type
req_data["max_tokens"] = max_tokens
if max_new_tokens is not None:
req_data["max_new_tokens"] = max_new_tokens
if self.model_config:
req_data["model_config"] = self.model_config.as_dict()
return req_data
Loading

0 comments on commit a9c3458

Please sign in to comment.