Open-Llama/configs/pretrain_config.yaml

30 lines
692 B
YAML
Raw Normal View History

2023-04-12 09:59:05 +00:00
data:
mode: "pretrain"
data:
mixed: "data/pretrain_data/part-*.jsonl.zst"
concat_multiple_sequence: True
num_sequences: 10
seq_length: 2048
tokenizer_model_path: "configs/llama_tokenizer_extended.model"
2023-04-12 09:59:05 +00:00
model:
initializer_range: 1.0e-2
hidden_dropout_prob: 0.1
attention_dropout_prob: 0.1
use_stable_embedding: True
shared_input_output_embedding: True
train:
train_batch_size: 2
num_training_steps: 1000000
num_warmup_steps: 2000
initializer_range: 1.0e-2
lr: 2.0e-4
weight_decay: 1.0e-1
ckpt: null
train_num_workers: 16
2023-04-12 09:59:05 +00:00
# global step
log_interval: 5
eval_interval: 200
save_interval: 800
work_dir: "data/saved_ckpt/"
project_name: "Llama Pretrain"