Skip to content

Commit

Permalink
feat: trainer add post-pretrain and using api v2 (#241)
Browse files Browse the repository at this point in the history
* fix

* add more import on __init__.py in dataset package

* cookbook update

* improve local evaluation

* format

* add eval local function

* add comment

* fix unit test

* fix: trainer add api v2

* add

* fix: lint & fix trainer & add log

* feat: support mixtral_8x7b_instruct

* fix: save postpretrain

* fix: trainer support pptrain

* fix: trainer ut

* fix: ut script

* fix: ut

* fix: lint

* fix: 3.7 log_trace

* fix: 3.7 dict

* fix: trainer

* fix: ut

* fix: docs

* fix: issues

* fix: configs limit

---------

Co-authored-by: Dobiichi-Origami <[email protected]>
  • Loading branch information
danielhjz and Dobiichi-Origami authored Feb 2, 2024
1 parent abdc467 commit 00c42bf
Show file tree
Hide file tree
Showing 26 changed files with 1,890 additions and 955 deletions.
22 changes: 1 addition & 21 deletions cookbook/finetune/trainer_finetune.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"metadata": {},
"outputs": [],
"source": [
"! pip install \"qianfan>=0.2.8\" -U"
"! pip install \"qianfan>=0.3.0\" -U"
]
},
{
Expand Down Expand Up @@ -386,26 +386,6 @@
"trainer.output"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'10268'"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"o[\"model_id\"]"
]
},
{
"attachments": {},
"cell_type": "markdown",
Expand Down
53 changes: 48 additions & 5 deletions docs/trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
![trainer](./imgs/trainer.png)
## 快速开始

### Finetune
以下以LLMFinetune(对应千帆平台 SFT语言大模型)为例,介绍如何使用`Trainer`进行训练。

```python
Expand All @@ -23,14 +24,56 @@ ds: Dataset = Dataset.load(qianfan_dataset_id=111, is_download_to_local=False)
# 新建trainer LLMFinetune,最少传入train_type和dataset
# 注意fine-tune任务需要指定的数据集类型要求为有标注的非排序对话数据集。
trainer = LLMFinetune(
train_type="ERNIE-Bot-turbo-0725",
train_type="ERNIE-Speed",
dataset=ds,
)

trainer.run()
```

## 自定义训练参数
### PostPretrain
除了使用`LLMFinetune`进行模型微调外,我们还可以使用`PostPretrain`:

```python
from qianfan.trainer import PostPreTrain, LLMFinetune
from qianfan.trainer.configs import TrainConfig
from qianfan.trainer.consts import PeftType
from qianfan.dataset import Dataset

# 泛文本 数据集
ds = Dataset.load(qianfan_dataset_id="ds-ag138", is_download_to_local=False)

# postpretrain
trainer = PostPreTrain(
train_type="ERNIE-Speed",
dataset=ds,
)
trainer.run()
# 这一步可以拿到训练完成的PostPretrain任务信息:
print(trainer.output)


# sft数据集
sft_ds = Dataset.load(qianfan_dataset_id="ds-47j7ztjxfz60wb8x", is_download_to_local=False)
ppt_sft_trainer = LLMFinetune(
train_type="ERNIE-Speed",
dataset=sft_ds,
train_config=TrainConfig(
epoch=1,
learning_rate=0.00003,
max_seq_len=4096,
peft_type=PeftType.ALL,
),
name="qianfantrainer01"
previous_trainer=trainer,
)

ppt_sft_trainer.run()
# 拿到最终的可用于推理部署的模型:
print(ppt_sft_trainer.output)
```

### 自定义训练参数
如果需要自定义训练参数,可以根据不同的模型传入不同的TrainConfig 以指定训练过程中的参数,需要注意的是不同模型支持的参数不同,具体以API文档为准。
```python
import os
Expand All @@ -43,7 +86,7 @@ from qianfan.trainer import LLMFinetune
from qianfan.trainer.configs import TrainConfig

trainer = LLMFinetune(
train_type="ERNIE-Bot-turbo-0516",
train_type="ERNIE-Speed",
dataset=ds,
train_config=TrainConfig(
epochs=1, # 迭代轮次(Epoch),控制训练过程中的迭代轮数。
Expand All @@ -54,7 +97,7 @@ trainer = LLMFinetune(
trainer.run()
```

## 事件回调
### 事件回调

如果需要在训练过程中监控每个阶段的各个节点的状态,可以通过事件回调函数来实现

Expand All @@ -80,7 +123,7 @@ class MyEventHandler(EventHandler):

eh = MyEventHandler()
trainer = LLMFinetune(
train_type="Llama-2-13b",
train_type="ERNIE-Speed",
dataset=ds,
train_config=TrainConfig(
epochs=1,
Expand Down
2 changes: 1 addition & 1 deletion python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "qianfan"
version = "0.2.9"
version = "0.3.0"
description = "文心千帆大模型平台 Python SDK"
authors = []
license = "Apache-2.0"
Expand Down
20 changes: 8 additions & 12 deletions python/qianfan/common/client/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
from qianfan.trainer.configs import ModelInfo, TrainLimit
from qianfan.trainer.consts import ActionState, PeftType
from qianfan.trainer.event import Event, EventHandler
from qianfan.utils.utils import remove_suffix_list

trainer_app = typer.Typer(
no_args_is_help=True,
Expand Down Expand Up @@ -214,26 +213,23 @@ def print_trainer_config(config: ModelInfo) -> None:
table.add_column("")
for p in config.support_peft_types:
table.add_column(Pretty(p.value, overflow="fold"))
example = TrainLimit()
limit_fields = [
attr
for attr in dir(example)
if not attr.startswith("_") and not callable(getattr(example, attr))
]
from qianfan.trainer.configs import TrainConfig

limit_fields = (
TrainConfig().dict(exclude={"peft_type", "trainset_rate", "extras"}).keys()
)
for k in limit_fields:
if k in ["supported_hyper_params"]:
continue
row_objs = []
row_objs.append(remove_suffix_list(k, ["_options", "_limit"]))
row_objs.append(k)
has_not_none_limit = False
for peft in config.support_peft_types:
peft_limit: Optional[TrainLimit] = config.common_params_limit
if config.specific_peft_types_params_limit:
specific_train_limit = config.specific_peft_types_params_limit.get(peft)
if specific_train_limit is not None:
peft_limit = specific_train_limit | config.common_params_limit
if peft_limit.__getattribute__(k):
row_objs.append(peft_limit.__getattribute__(k))
if peft_limit and peft_limit.get(k):
row_objs.append(f"{peft_limit.get(k)}")
has_not_none_limit = True
else:
row_objs.append("---")
Expand Down
9 changes: 8 additions & 1 deletion python/qianfan/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ class DefaultValue:
ModelPublishStatusPollingInterval: float = 30
BatchRunStatusPollingInterval: float = 30
DeployStatusPollingInterval: float = 30
DefaultFinetuneTrainType: str = "ERNIE-Bot-turbo-0725"
DefaultFinetuneTrainType: str = "ERNIE-Speed"

# 目前可直接下载到本地的千帆数据集解压后的大小上限
# 后期研究更换为用户机内存大小的上限
Expand Down Expand Up @@ -172,6 +172,13 @@ class Consts:
FineTuneCreateTaskAPI: str = "/wenxinworkshop/finetune/createTask"
FineTuneCreateJobAPI: str = "/wenxinworkshop/finetune/createJob"
FineTuneStopJobAPI: str = "/wenxinworkshop/finetune/stopJob"
ConsoleAPIQueryAction: str = "Action"
FineTuneV2BaseRouteAPI: str = "/v2/finetuning"
FineTuneCreateJobAction: str = "CreateFineTuningJob"
FineTuneCreateTaskAction: str = "CreateFineTuningTask"
FineTuneJobListAction: str = "DescribeFineTuningJobs"
FineTuneTaskListAction: str = "DescribeFineTuningTasks"
FineTuneTaskDetailAction: str = "DescribeFineTuningTask"
ModelDetailAPI: str = "/wenxinworkshop/modelrepo/modelDetail"
ModelVersionDetailAPI: str = "/wenxinworkshop/modelrepo/modelVersionDetail"
ModelPublishAPI: str = "/wenxinworkshop/modelrepo/publishTrainModel"
Expand Down
17 changes: 8 additions & 9 deletions python/qianfan/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,17 @@ class Model(
"""model name"""
service: Optional["Service"] = None
"""model service"""
task_id: Optional[int]
task_id: Optional[str]
"""train tkas id"""
job_id: Optional[int]
job_id: Optional[str]
"""train job id"""

def __init__(
self,
id: Optional[str] = None,
version_id: Optional[str] = None,
task_id: Optional[int] = None,
job_id: Optional[int] = None,
task_id: Optional[str] = None,
job_id: Optional[str] = None,
name: Optional[str] = None,
**kwargs: Any,
):
Expand Down Expand Up @@ -215,11 +215,11 @@ def publish(self, name: str = "", **kwargs: Any) -> "Model":
self._wait_for_publish(**kwargs)

# 发布模型
self.model_name = name if name != "" else f"m_{self.task_id}_{self.job_id}"
self.model_name = name if name != "" else f"m_{self.job_id}_{self.task_id}"
model_publish_resp = ResourceModel.publish(
is_new=True,
model_name=self.model_name,
version_meta={"taskId": self.task_id, "iterationId": self.job_id},
version_meta={"taskId": self.job_id, "iterationId": self.task_id},
**kwargs,
)
log_info(
Expand All @@ -232,12 +232,11 @@ def publish(self, name: str = "", **kwargs: Any) -> "Model":
raise InvalidArgumentError("task id or job id not found")
# 判断训练任务已经训练完成
while True:
job_status_resp = api.FineTune.get_job(
job_status_resp = api.FineTune.V2.task_detail(
task_id=self.task_id,
job_id=self.job_id,
**kwargs,
)
job_status = job_status_resp["result"]["trainStatus"]
job_status = job_status_resp["result"]["runStatus"]
log_info(f"model publishing keep polling, current status {job_status}")
if job_status == console_const.TrainStatus.Running:
time.sleep(get_config().TRAIN_STATUS_POLLING_INTERVAL)
Expand Down
21 changes: 17 additions & 4 deletions python/qianfan/resources/console/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,13 @@ class ServiceStatus(str, Enum):


class TrainStatus(str, Enum):
Finish = "FINISH"
Finish = "Done"
"""训练完成"""
Running = "RUNNING"
Running = "Running"
"""训练进行中"""
Fail = "FAIL"
Fail = "Fail"
"""训练失败"""
Stop = "STOP"
Stop = "Stopped"
"""训练停止"""


Expand All @@ -158,9 +158,22 @@ class TrainDatasetType(int, Enum):
"""私有Bos数据集"""


class TrainDatasetSourceType(str, Enum):
Platform = "Platform"
PrivateBos = "Bos"


class TrainMode(str, Enum):
SFT = "SFT"
"""对应 LLMFinetune"""
PostPretrain = "PostPretrain"
"""PostPretrain """


class TrainParameterScale(str, Enum):
FullFineTuning = "FullFineTuning"
PromptTuning = "PromptTuning"
LoRA = "LoRA"


class DeployPoolType(int, Enum):
Expand Down
Loading

0 comments on commit 00c42bf

Please sign in to comment.