max_length = 1024 train_batch_size = 2 num_training_steps = 1000000 num_warmup_steps = 2000 initializer_range = 1e-2 lr = 2e-4 weight_decay = 1e-1 tokenizer_model_path = "configs/10w_vocab_wudao5_pile10.model" patterns = ["data/pretrain_data/part-*.jsonl.zst"] # global step log_interval = 5 eval_interval = 200 save_interval = 800 work_dir = "data/saved_ckpt/"