From ceb1fd067b8558aa574ce7d23f722533e4afd0ad Mon Sep 17 00:00:00 2001 From: LiangSong Date: Thu, 11 May 2023 14:15:12 +0800 Subject: [PATCH] update vocab_size --- train_lm.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/train_lm.py b/train_lm.py index 4a1d865..11402e8 100644 --- a/train_lm.py +++ b/train_lm.py @@ -69,11 +69,12 @@ def main(argv): # Make the vocab size divisible by 16 # https://huggingface.co/docs/transformers/main_classes/deepspeed#how-to-choose-which-zero-stage-and-offloads-to-use-for-best-performance # https://developer.nvidia.com/blog/optimizing-gpu-performance-tensor-cores/ - vocab_size = math.ceil(tokenizer.vocab_size / 16) * 16 + # vocab_size = math.ceil(tokenizer.vocab_size / 16) * 16 + # logging.warning( + # "Round vocab_size from {} to {}.".format(tokenizer.vocab_size, vocab_size) + # ) + vocab_size = tokenizer.vocab_size model_config.vocab_size = vocab_size - logging.warning( - "Round vocab_size from {} to {}.".format(tokenizer.vocab_size, vocab_size) - ) model_config.pad_token_id = tokenizer.pad_token_id # 使用AutoModel可以在Deepspeed.zero.Init()下正确的生效,而直接使用如OpenLlamaModel不能正确生效,导致浪费大量内存空间 # https://github.com/huggingface/accelerate/pull/932