update config
This commit is contained in:
parent
e18ead00cc
commit
bf2cac0a45
|
@ -17,7 +17,7 @@ train:
|
||||||
initializer_range: 1.0e-2
|
initializer_range: 1.0e-2
|
||||||
lr: 2.0e-4
|
lr: 2.0e-4
|
||||||
weight_decay: 1.0e-1
|
weight_decay: 1.0e-1
|
||||||
ckpt: "data/llama_raw_ckpt/7B/extended.pth"
|
ckpt: "data/saved_model/ckpt.pth"
|
||||||
train_num_workers: 16
|
train_num_workers: 16
|
||||||
gradient_accumulation_steps: 1
|
gradient_accumulation_steps: 1
|
||||||
prefetch_factor: 100
|
prefetch_factor: 100
|
||||||
|
|
|
@ -2,7 +2,6 @@ data:
|
||||||
mode: "pretrain"
|
mode: "pretrain"
|
||||||
data:
|
data:
|
||||||
wudao: "data/pretrain_data/part-wudao*.jsonl.zst"
|
wudao: "data/pretrain_data/part-wudao*.jsonl.zst"
|
||||||
# 由于加载了Llama模型的ckpt所以只使用少量英文数据
|
|
||||||
the_pile: "data/pretrain_data/part-pile-1*.jsonl.zst"
|
the_pile: "data/pretrain_data/part-pile-1*.jsonl.zst"
|
||||||
pad_to_max: False
|
pad_to_max: False
|
||||||
sequence_sample_mode: "none"
|
sequence_sample_mode: "none"
|
||||||
|
@ -18,8 +17,7 @@ train:
|
||||||
initializer_range: 1.0e-2
|
initializer_range: 1.0e-2
|
||||||
lr: 2.0e-4
|
lr: 2.0e-4
|
||||||
weight_decay: 1.0e-1
|
weight_decay: 1.0e-1
|
||||||
# 加载预训练权重,从头训练设为null
|
ckpt: null
|
||||||
ckpt: "data/llama_raw_ckpt/7B/extended.pth"
|
|
||||||
train_num_workers: 16
|
train_num_workers: 16
|
||||||
gradient_accumulation_steps: 12
|
gradient_accumulation_steps: 12
|
||||||
prefetch_factor: 100
|
prefetch_factor: 100
|
||||||
|
|
Loading…
Reference in New Issue
Block a user