diff --git a/tests/smoketest.sh b/tests/smoketest.sh index 5e8d169c..686fcccf 100755 --- a/tests/smoketest.sh +++ b/tests/smoketest.sh @@ -2,7 +2,7 @@ set -eux -o pipefail # ############### Read-only parameters ############### -MODEL_NAME="instructlab/granite-7b-lab" +MODEL_NAME="/home/ec2-user/.cache/huggingface/hub/models--instructlab--granite-7b-lab/snapshots/4fb6a018d68ab813b95c7f470e424a70f2f7e561" # gets directory of current file. SCRIPT_DIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" CORRECT_WORKING_DIR="${SCRIPT_DIR}/../src/instructlab/training/" @@ -13,12 +13,13 @@ DATA_DIR="${TMP_DIR}/data" COMPUTED_DATA_PATH="${DATA_DIR}/data.jsonl" DEFAULT_DISTRIB_FRAMEWORK='fsdp' DISTRIB_FRAMEWORK="${1:-$DEFAULT_DISTRIB_FRAMEWORK}" # defaults to FSDP -DEFAULT_GPUS=8 +DEFAULT_GPUS=4 NUM_GPUS="${2:-$DEFAULT_GPUS}" # ############### User-modifiable parameters ############### # Change these as needed MAX_BATCH_LEN=60000 +MAX_SEQ_LEN=4096 NUM_SAMPLES_TRAINED_ON=5000 # upper-bound on training dataset size. # ############### Test Functions ############### @@ -63,7 +64,7 @@ function prepare_data () { python3 data_process.py \ --data_path="$SAMPLE_DATA_PATH" \ --data_output_path="$DATA_DIR" \ - --max_seq_len=4096 \ + --max_seq_len="${MAX_SEQ_LEN}" \ --model_name_or_path="$MODEL_NAME" # trim data so we only keep the first 'n' samples. @@ -203,17 +204,14 @@ function test_standard_loop_fsdp_lora() { --nproc_per_node="${NUM_GPUS}" \ main_ds.py \ --model_name_or_path="${MODEL_NAME}" \ - --is_granite \ --data_path="${COMPUTED_DATA_PATH}" \ --output_dir="${CHECKPOINTS_DIR}" \ --num_epochs=1 \ --effective_batch_size=128 \ --save_samples=0 \ --checkpoint_at_epoch \ - --accelerate_full_state_at_epoch \ --distributed_training_framework="${DISTRIB_FRAMEWORK}" \ --max_batch_len="${MAX_BATCH_LEN}" \ - --is_granite \ --lora_r=4 \ --lora_alpha=32 \ --lora_dropout=0.1 @@ -235,6 +233,7 @@ function main () { test_standard_loop_nongranite _cleanup_saved_checkpoints test_standard_loop + test_standard_loop_fsdp_lora } main