update vocab_size

This commit is contained in:
LiangSong 2023-05-11 14:15:12 +08:00
parent 73dafa7ad6
commit ceb1fd067b

View File

@ -69,11 +69,12 @@ def main(argv):
# Make the vocab size divisible by 16
# https://huggingface.co/docs/transformers/main_classes/deepspeed#how-to-choose-which-zero-stage-and-offloads-to-use-for-best-performance
# https://developer.nvidia.com/blog/optimizing-gpu-performance-tensor-cores/
vocab_size = math.ceil(tokenizer.vocab_size / 16) * 16
# vocab_size = math.ceil(tokenizer.vocab_size / 16) * 16
# logging.warning(
# "Round vocab_size from {} to {}.".format(tokenizer.vocab_size, vocab_size)
# )
vocab_size = tokenizer.vocab_size
model_config.vocab_size = vocab_size
logging.warning(
"Round vocab_size from {} to {}.".format(tokenizer.vocab_size, vocab_size)
)
model_config.pad_token_id = tokenizer.pad_token_id
# 使用AutoModel可以在Deepspeed.zero.Init()下正确的生效而直接使用如OpenLlamaModel不能正确生效导致浪费大量内存空间
# https://github.com/huggingface/accelerate/pull/932