Skip to content

Commit

Permalink
Add trust_remote_code flag for samsum dataset
Browse files Browse the repository at this point in the history
Signed-off-by: Mamta Singh <[email protected]>
  • Loading branch information
quic-mamta committed Jan 10, 2025
1 parent a08d4eb commit 6b0c123
Show file tree
Hide file tree
Showing 6 changed files with 10 additions and 25 deletions.
4 changes: 4 additions & 0 deletions QEfficient/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
# -----------------------------------------------------------------------------

try:
import platform
import sys

sys.path.append(f"/opt/qti-aic/dev/lib/{platform.machine()}")
import qaicrt # noqa: F401

qaic_sdk_installed = True
Expand Down
3 changes: 0 additions & 3 deletions QEfficient/cloud/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
#
# -----------------------------------------------------------------------------

import os
import random
import warnings

Expand Down Expand Up @@ -58,8 +57,6 @@ def main(**kwargs):
update_config(train_config, **kwargs)
device = train_config.device

os.environ["HF_DATASETS_TRUST_REMOTE_CODE"] = "True"

# dist init
if train_config.enable_ddp:
# TODO: may have to init qccl backend, next try run with torchrun command
Expand Down
2 changes: 1 addition & 1 deletion QEfficient/finetune/configs/peft_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class lora_config:
bias = "none"
task_type: str = "CAUSAL_LM"
lora_dropout: float = 0.05
inference_mode: bool = False
inference_mode: bool = False # should be False for finetuning


# CAUTION prefix tuning is currently not supported
Expand Down
2 changes: 1 addition & 1 deletion QEfficient/finetune/configs/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class train_config:
save_metrics: bool = True # saves training metrics to a json file for later plotting
intermediate_step_save: int = 1000
batching_strategy: str = "packing"
enable_sorting_for_ddp: bool = "True"
enable_sorting_for_ddp: bool = True

# TODO: vbaddi: Uncomment post adding qaic to Pytorch Profiler
# flop_counter: bool = False # Enable flop counter to measure model throughput, can not be used with pytorch profiler at the same time.
Expand Down
18 changes: 1 addition & 17 deletions QEfficient/finetune/dataset/samsum_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,11 @@
#
# -----------------------------------------------------------------------------

from unittest.mock import patch

import datasets


@patch("builtins.input", return_value="N")
def load_samsum(split, _):
try:
ds = datasets.load_dataset("Samsung/samsum", split=split)
except ValueError as e:
if "trust_remote_code" in str(e):
raise ValueError(
"Loading Samsung/samsum requires you to execute the dataset script in that repo on your local machine. Make sure you have read the code there to avoid malicious use, then set HF_DATASETS_TRUST_REMOTE_CODE env variable to True."
) from e
else:
raise e
return ds


def get_preprocessed_samsum(dataset_config, tokenizer, split, context_length=None):
dataset = load_samsum(split)
dataset = datasets.load_dataset("Samsung/samsum", split=split, trust_remote_code=True)

prompt = "Summarize this dialog:\n{dialog}\n---\nSummary:\n"

Expand Down
6 changes: 3 additions & 3 deletions scripts/finetune/run_ft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
import warnings

import torch
from configs.training import train_config as TRAIN_CONFIG
from peft import AutoPeftModelForCausalLM
from Qefficient.finetune.configs.training import train_config as TRAIN_CONFIG
from transformers import AutoModelForCausalLM, AutoTokenizer

# Suppress all warnings
Expand All @@ -19,7 +19,7 @@
try:
import torch_qaic # noqa: F401

device = "qaic:1"
device = "qaic:0"
except ImportError as e:
print(f"Warning: {e}. Moving ahead without these qaic modules.")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
Expand All @@ -29,7 +29,7 @@
train_config.model_name,
use_cache=False,
attn_implementation="sdpa",
torch_dtype=torch.float16 if torch.cuda.is_available() or device == "qaic:1" else None,
torch_dtype=torch.float16 if torch.cuda.is_available() or device == "qaic:0" else None,
)

# Load the tokenizer and add special tokens
Expand Down

0 comments on commit 6b0c123

Please sign in to comment.