Open-Llama/configs/pretrain_config.yaml
2023-05-14 01:00:50 +08:00

33 lines
828 B
YAML

data:
mode: "pretrain"
data:
wudao: "data/pretrain_data/part-wudao*.jsonl.zst"
the_pile: "data/pretrain_data/part-pile-1*.jsonl.zst"
pad_to_max: False
sequence_sample_mode: "none"
concat_multiple_sequence: True
num_sequences: 10
seq_length: 2048
tokenizer_model_path: "configs/tokenizer_models/llama_tokenizer_extended.model"
split_by_shard: False
train:
train_batch_size: 2
num_training_steps: 500000
num_warmup_steps: 2000
initializer_range: 1.0e-2
lr: 2.0e-4
weight_decay: 1.0e-1
ckpt: null
train_num_workers: 16
gradient_accumulation_steps: 12
prefetch_factor: 100
train_and_eval: False
gradient_checkpointing_enable: False
use_lora: False
# global step
log_interval: 5
eval_interval: 500
save_interval: 1000
work_dir: "data/saved_ckpt/7B"
project_name: "Llama Pretrain"