Open-Llama/configs/pretrain_config.yaml
2023-04-12 17:59:05 +08:00

25 lines
589 B
YAML

data:
patterns: ["data/pretrain_data/part-*.jsonl.zst"]
tokenizer_model_path: "configs/10w_vocab_wudao5_pile10.model"
model:
initializer_range: 1.0e-2
max_length: 1024
hidden_dropout_prob: 0.1
attention_dropout_prob: 0.1
use_stable_embedding: True
shared_input_output_embedding: True
train:
train_batch_size: 2
num_training_steps: 1000000
num_warmup_steps: 2000
initializer_range: 1.0e-2
lr: 2.0e-4
weight_decay: 1.0e-1
ckpt: null
# global step
log_interval: 5
eval_interval: 200
save_interval: 800
work_dir: "data/saved_ckpt/"
project_name: "Llama Pretrain"