Skip to content

Commit

Permalink
Update to 3.0.5 (#69)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
edamamez and github-actions[bot] authored Aug 13, 2024
1 parent 2581b51 commit aa2b82e
Show file tree
Hide file tree
Showing 10 changed files with 373 additions and 106 deletions.
5 changes: 1 addition & 4 deletions lamini/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@
# isort: off
from lamini.error import error

from lamini.runners.llama_v2_runner import LlamaV2Runner
from lamini.runners.llama_v3_runner import LlamaV3Runner
from lamini.runners.basic_model_runner import BasicModelRunner
from lamini.runners.mistral_runner import MistralRunner
from lamini.api.lamini import Lamini
from lamini.api.classifier import Classifier
from lamini.api.embedding import Embedding
Expand All @@ -32,3 +28,4 @@
max_workers = int(os.environ.get("LAMINI_MAX_WORKERS", 4))
batch_size = int(os.environ.get("LAMINI_BATCH_SIZE", 5))
static_batching = bool(os.environ.get("LAMINI_STATIC_BATCHING", False))
bypass_reservation = bool(os.environ.get("LAMINI_BYPASS_RESERVATION", False))
40 changes: 14 additions & 26 deletions lamini/api/lamini.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from lamini.api.rest_requests import get_version
from lamini.api.train import Train
from lamini.api.utils.completion import Completion
from lamini.api.utils.upload_client import get_dataset_name, upload_to_blob
from lamini.api.utils.upload_client import upload_to_blob

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -98,37 +98,36 @@ def get_data_str(d):
output = self.trainer.get_upload_base_path()
self.upload_base_path = output["upload_base_path"]

dataset_id = get_dataset_name()

try:
if self.upload_base_path == "azure":
data_str = get_data_str(data)
output = self.trainer.create_blob_dataset_location(
self.upload_base_path, dataset_id, is_public
response = self.trainer.create_blob_dataset_location(
self.upload_base_path, is_public
)
self.upload_file_path = output["dataset_location"]
self.upload_file_path = response["dataset_location"]
upload_to_blob(data_str, self.upload_file_path)
self.trainer.update_blob_dataset_num_datapoints(
dataset_id, num_datapoints
response["dataset_id"], num_datapoints
)
print("Data pairs uploaded to blob.")
else:
output = self.trainer.upload_dataset_locally(
self.upload_base_path, dataset_id, is_public, data
response = self.trainer.upload_dataset_locally(
self.upload_base_path, is_public, data
)
self.upload_file_path = output["dataset_location"]
self.upload_file_path = response["dataset_location"]
print("Data pairs uploaded to local.")

print(response)
print(
f"\nYour dataset id is: {dataset_id} . Consider using this in the future to train using the same data. \nEg: "
f"llm.train(dataset_id='{dataset_id}')"
f"\nYour dataset id is: {response['dataset_id']} . Consider using this in the future to train using the same data. \nEg: "
f"llm.train(data_or_dataset_id='{response['dataset_id']}')"
)

except Exception as e:
print(f"Error uploading data pairs: {e}")
raise e

return dataset_id
return response["dataset_id"]

def upload_file(
self, file_path: str, input_key: str = "input", output_key: str = "output"
Expand Down Expand Up @@ -186,10 +185,8 @@ def train(
],
finetune_args: Optional[dict] = None,
gpu_config: Optional[dict] = None,
peft_args: Optional[dict] = None,
is_public: Optional[bool] = None,
use_cached_model: Optional[bool] = None,
multi_node: Optional[bool] = None,
**kwargs,
):
if isinstance(data_or_dataset_id, str):
dataset_id = data_or_dataset_id
Expand All @@ -199,7 +196,7 @@ def train(
base_path = self.trainer.get_upload_base_path()
self.upload_base_path = base_path["upload_base_path"]
existing_dataset = self.trainer.get_existing_dataset(
dataset_id, self.upload_base_path, is_public
dataset_id, self.upload_base_path
)
self.upload_file_path = existing_dataset["dataset_location"]

Expand All @@ -209,10 +206,7 @@ def train(
upload_file_path=self.upload_file_path,
finetune_args=finetune_args,
gpu_config=gpu_config,
peft_args=peft_args,
is_public=is_public,
use_cached_model=use_cached_model,
multi_node=multi_node,
)
job["dataset_id"] = dataset_id
return job
Expand All @@ -228,20 +222,14 @@ def train_and_wait(
],
finetune_args: Optional[dict] = None,
gpu_config: Optional[dict] = None,
peft_args: Optional[dict] = None,
is_public: Optional[bool] = None,
use_cached_model: Optional[bool] = None,
multi_node: Optional[bool] = None,
**kwargs,
):
job = self.train(
data_or_dataset_id,
finetune_args=finetune_args,
gpu_config=gpu_config,
peft_args=peft_args,
is_public=is_public,
use_cached_model=use_cached_model,
multi_node=multi_node,
)

try:
Expand Down
24 changes: 3 additions & 21 deletions lamini/api/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,7 @@ def train(
upload_file_path: Optional[str] = None,
finetune_args: Optional[dict] = None,
gpu_config: Optional[dict] = None,
peft_args: Optional[dict] = None,
is_public: Optional[bool] = None,
use_cached_model: Optional[bool] = None,
multi_node: Optional[bool] = None,
):
req_data = {"model_name": model_name}
req_data["dataset_id"] = dataset_id
Expand All @@ -41,14 +38,8 @@ def train(
req_data["finetune_args"] = finetune_args
if gpu_config is not None:
req_data["gpu_config"] = gpu_config
if peft_args is not None:
req_data["peft_args"] = peft_args
if is_public is not None:
req_data["is_public"] = is_public
if use_cached_model is not None:
req_data["use_cached_model"] = use_cached_model
if multi_node is not None:
req_data["multi_node"] = multi_node
url = self.api_prefix + "train"

job = make_web_request(self.api_key, url, "post", req_data)
Expand Down Expand Up @@ -102,21 +93,15 @@ def evaluate(self, job_id=None):

return make_web_request(self.api_key, url, "get")

def create_blob_dataset_location(
self, upload_base_path, dataset_id, is_public, data=None
):
def create_blob_dataset_location(self, upload_base_path, is_public):
url = self.api_prefix + "data"
req_data = {
"upload_base_path": upload_base_path,
"dataset_id": dataset_id,
}

if is_public is not None:
req_data["is_public"] = is_public

if data is not None:
req_data["data"] = data

return make_web_request(
self.api_key,
url,
Expand All @@ -142,11 +127,10 @@ def get_upload_base_path(self):
url = self.api_prefix + "get-upload-base-path"
return make_web_request(self.api_key, url, "get")

def upload_dataset_locally(self, upload_base_path, dataset_id, is_public, data):
def upload_dataset_locally(self, upload_base_path, is_public, data):
url = self.api_prefix + "local-data"
req_data = {}
req_data["upload_base_path"] = upload_base_path
req_data["dataset_id"] = dataset_id
req_data["data"] = SerializableGenerator(data)
if is_public is not None:
req_data["is_public"] = is_public
Expand All @@ -157,12 +141,10 @@ def upload_dataset_locally(self, upload_base_path, dataset_id, is_public, data):
req_data,
)

def get_existing_dataset(self, dataset_id, upload_base_path, is_public):
def get_existing_dataset(self, dataset_id, upload_base_path):
url = self.api_prefix + "existing-data"
req_data = {"dataset_id": dataset_id}
req_data["upload_base_path"] = upload_base_path
if is_public is not None:
req_data["is_public"] = is_public
return make_web_request(
self.api_key,
url,
Expand Down
120 changes: 112 additions & 8 deletions lamini/api/utils/completion.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,55 @@
import logging
from typing import List, Optional, Union
from typing import List, Optional, Union, Dict, Any

import aiohttp
import lamini
from lamini.api.lamini_config import get_config, get_configured_key, get_configured_url
from lamini.api.rest_requests import make_async_web_request, make_web_request

logger = logging.getLogger(__name__)


class Completion:
def __init__(self, api_key, api_url):
""" Hanlder for formatting and POST request for the completions
and streaming_completions API endpoints.
Parameters
----------
api_key: Optinal[str]
Lamini platform API key, if not provided the key stored
within ~.lamini/configure.yaml will be used. If either
don't exist then an error is raised.
api_url: Optional[str]
Lamini platform api url, only needed if a different url is needed outside of the
defined ones here: https://github.com/lamini-ai/lamini-platform/blob/main/sdk/lamini/api/lamini_config.py#L68
i.e. localhost, staging.lamini.ai, or api.lamini.ai
Additionally, LLAMA_ENVIRONMENT can be set as an environment variable
that will be grabbed for the url before any of the above defaults
"""

def __init__(self, api_key, api_url) -> None:

"""
Configuration dictionary for platform metadata provided by the following function:
https://github.com/lamini-ai/lamini-platform/blob/main/sdk/lamini/api/lamini_config.py
Configurations currently hold the following keys and data as a yaml format:
local:
url: <url>
staging:
url: <url>
production:
url: <url>
local:
key: <auth-key>
staging:
key: <auth-key>
production:
key:
<auth-key>
"""
self.config = get_config()

self.api_key = api_key or lamini.api_key or get_configured_key(self.config)
self.api_url = api_url or lamini.api_url or get_configured_url(self.config)
self.api_prefix = self.api_url + "/v1/"
Expand All @@ -23,7 +61,33 @@ def generate(
output_type: Optional[dict] = None,
max_tokens: Optional[int] = None,
max_new_tokens: Optional[int] = None,
):
) -> Dict[str, Any]:
"""Handles construction of the POST request headers and body, then
a web request is made with the response returned.
Parameters
----------
prompt: Union[str, List[str]]:
Input prompt for the LLM
model_name: str
LLM model name from HuggingFace
output_type: Optional[dict] = None
Json format for the LLM output
max_tokens: Optional[int] = None
Upper limit in total tokens
max_new_tokens: Optional[int] = None
Upper limit for newly generated tokens
Returns
-------
resp: Dict[str, Any]
Json data returned from POST request
"""

req_data = self.make_llm_req_map(
prompt=prompt,
model_name=model_name,
Expand All @@ -36,7 +100,23 @@ def generate(
)
return resp

async def async_generate(self, params, client: aiohttp.ClientSession = None):
async def async_generate(self, params: Dict[str, Any], client: aiohttp.ClientSession = None) -> Dict[str, Any]:
"""
Parameters
----------
params: Dict[str, Any]
POST Request input parameters
client: aiohttp.ClientSession = None
ClientSession handler
Returns
-------
resp: Dict[str, Any]
Json data returned from POST request
"""

if client is not None:
assert isinstance(client, aiohttp.ClientSession)
resp = await make_async_web_request(
Expand All @@ -61,14 +141,38 @@ def make_llm_req_map(
output_type: Optional[dict] = None,
max_tokens: Optional[int] = None,
max_new_tokens: Optional[int] = None,
):
) -> Dict[str, Any]:
"""Returns a dict of parameters for calling the remote LLM inference API.
NOTE: Copied from lamini.py.
TODO: Create a helper function that accepts all values and returns a dict. And replace callers
of self.make_llm_req_map() with the calling of the free function.
Parameters
----------
model_name: str
LLM model name from HuggingFace
prompt: Union[str, List[str]]:
Input prompt for the LLM
output_type: Optional[dict] = None
Json format for the LLM output
max_tokens: Optional[int] = None
Upper limit in total tokens
max_new_tokens: Optional[int] = None
Upper limit for newly generated tokens
Returns
-------
req_data: Dict[str, Any]
Constructed dictionary with parameters provided into the correctly
specified keys for a REST request.
"""

req_data = {}
req_data["model_name"] = model_name
# TODO: prompt should be named prompt to signal it's a batch.
Expand Down
18 changes: 15 additions & 3 deletions lamini/api/utils/iterators.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
from typing import Iterator
from typing import Iterator, AsyncGenerator, Any


async def async_iter(normal_iter: Iterator):
"""Adapt an normal iterator to an async iterator"""
async def async_iter(normal_iter: Iterator) -> AsyncGenerator[Any, None]:
"""Adapt an normal iterator to an async iterator
Parameters
----------
normal_iter: Iterator
Iterator to wrap with a yield generator
Yields
-------
item: Any
Items within the provided normal iterator
"""

for item in normal_iter:
yield item
Loading

0 comments on commit aa2b82e

Please sign in to comment.