diff --git a/README.md b/README.md index 46ad9de..b3b1233 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ * @Author: LiangSong(sl12160010@gmail.com) * @Date: 2023-03-10 21:18:35 * @LastEditors: LiangSong(sl12160010@gmail.com) - * @LastEditTime: 2023-05-04 08:33:26 + * @LastEditTime: 2023-05-04 20:23:09 * @FilePath: /Open-Llama/README.md * @Description: * @@ -45,7 +45,7 @@ inputs = tokenizer('user:implement quick sort in python\nsystem:', return_tensor for k, v in inputs.items(): inputs[k] = v.cuda() pred = model.generate(**inputs, max_new_tokens=512, do_sample=True) -print(tokenizer.decode(pred.cpu()[0]).strip()) +print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)) ``` 只经过预训练的CheckPoint也上传至[s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain)。 diff --git a/README_en.md b/README_en.md index 03704d1..f670c29 100644 --- a/README_en.md +++ b/README_en.md @@ -2,7 +2,7 @@ * @Author: LiangSong(sl12160010@gmail.com) * @Date: 2023-03-10 21:18:35 * @LastEditors: LiangSong(sl12160010@gmail.com) - * @LastEditTime: 2023-05-04 08:33:45 + * @LastEditTime: 2023-05-04 20:23:14 * @FilePath: /Open-Llama/README_en.md * @Description: * @@ -44,7 +44,7 @@ inputs = tokenizer('user:implement quick sort in python\nsystem:', return_tensor for k, v in inputs.items(): inputs[k] = v.cuda() pred = model.generate(**inputs, max_new_tokens=512, do_sample=True) -print(tokenizer.decode(pred.cpu()[0]).strip()) +print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)) ``` The CheckPoint after pre-training only is also uploaded to [s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain). diff --git a/chat_server.py b/chat_server.py index aa9b57b..96f551a 100644 --- a/chat_server.py +++ b/chat_server.py @@ -2,7 +2,7 @@ Author: LiangSong(sl12160010@gmail.com) Date: 2023-04-06 22:30:10 LastEditors: LiangSong(sl12160010@gmail.com) -LastEditTime: 2023-04-29 20:40:13 +LastEditTime: 2023-05-04 22:28:07 FilePath: /Open-Llama/chat_server.py Description: @@ -41,7 +41,7 @@ if "module" in ckpt: ckpt = ckpt["module"] raw_model.load_state_dict(ckpt) raw_model.eval() -model = raw_model.cuda() +model = raw_model.half().cuda() logging.warn("ready") @@ -88,8 +88,10 @@ with gr.Blocks() as demo: context = torch.cat(context, dim=-1) context = context[:, -1024:] inputs_len = context.shape[1] - context = context.cuda() - pred = model.generate(input_ids=context, max_new_tokens=512, do_sample=True) + context = context.half().cuda() + pred = model.generate( + input_ids=context, max_new_tokens=1024, do_sample=True + ) pred = pred[:, inputs_len:] pred = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True) logging.warn(pred)