Skip to content

Commit

Permalink
Merge branch 'master' into kfertakis/rlhf_reward_offload
Browse files Browse the repository at this point in the history
  • Loading branch information
loadams authored Oct 29, 2024
2 parents 14960f1 + cab3361 commit 83eb5e6
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
AutoModel,
)
from huggingface_hub import snapshot_download
from transformers.deepspeed import HfDeepSpeedConfig
from transformers.integrations.deepspeed import HfDeepSpeedConfig

from dschat.utils.model.reward_model import RewardModel
from dschat.utils.utils import load_state_dict_into_model, print_rank_0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
import data.DST as DST # default special tokens
from torch.utils.data import DataLoader
from transformers.deepspeed import HfDeepSpeedConfig
from transformers.integrations.deepspeed import HfDeepSpeedConfig
import numpy as np
from .vis_proj import VisProjection_vit, VisProjection_perceiver

Expand Down
2 changes: 1 addition & 1 deletion inference/huggingface/zero_inference/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ deepspeed --num_gpus 1 run_model.py --model bigscience/bloom-7b1 --batch-size 8
Here is an example of running `meta-llama/Llama-2-7b-hf` with Zero-Inference using 4-bit model weights and offloading kv cache to CPU:

```sh
deepspeed --num_gpus 1 run_model.py --model meta-llama/Llama-2-7b-hf` --batch-size 8 --prompt-len 512 --gen-len 32 --cpu-offload --quant-bits 4 --kv-offload
deepspeed --num_gpus 1 run_model.py --model meta-llama/Llama-2-7b-hf --batch-size 8 --prompt-len 512 --gen-len 32 --cpu-offload --quant-bits 4 --kv-offload
```

## Performance Tuning Tips
Expand Down
2 changes: 1 addition & 1 deletion inference/huggingface/zero_inference/run_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from transformers import (AutoConfig, AutoTokenizer, AutoModelForCausalLM,
BloomForCausalLM, OPTForCausalLM, LlamaForCausalLM,
)
from transformers.deepspeed import HfDeepSpeedConfig
from transformers.integrations.deepspeed import HfDeepSpeedConfig
from utils import (GB, add_model_hooks, cache_bytes,
get_filename, get_quant_config, hidden_bytes, meta_to_cpu,
model_bytes, write_benchmark_log)
Expand Down

0 comments on commit 83eb5e6

Please sign in to comment.