update readme and add half to server

This commit is contained in:
LiangSong 2023-05-04 22:28:36 +08:00
parent 5c876121cb
commit 98ffab3a97
3 changed files with 10 additions and 8 deletions

View File

@ -2,7 +2,7 @@
* @Author: LiangSong(sl12160010@gmail.com) * @Author: LiangSong(sl12160010@gmail.com)
* @Date: 2023-03-10 21:18:35 * @Date: 2023-03-10 21:18:35
* @LastEditors: LiangSong(sl12160010@gmail.com) * @LastEditors: LiangSong(sl12160010@gmail.com)
* @LastEditTime: 2023-05-04 08:33:26 * @LastEditTime: 2023-05-04 20:23:09
* @FilePath: /Open-Llama/README.md * @FilePath: /Open-Llama/README.md
* @Description: * @Description:
* *
@ -45,7 +45,7 @@ inputs = tokenizer('user:implement quick sort in python\nsystem:', return_tensor
for k, v in inputs.items(): for k, v in inputs.items():
inputs[k] = v.cuda() inputs[k] = v.cuda()
pred = model.generate(**inputs, max_new_tokens=512, do_sample=True) pred = model.generate(**inputs, max_new_tokens=512, do_sample=True)
print(tokenizer.decode(pred.cpu()[0]).strip()) print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
``` ```
只经过预训练的CheckPoint也上传至[s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain)。 只经过预训练的CheckPoint也上传至[s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain)。

View File

@ -2,7 +2,7 @@
* @Author: LiangSong(sl12160010@gmail.com) * @Author: LiangSong(sl12160010@gmail.com)
* @Date: 2023-03-10 21:18:35 * @Date: 2023-03-10 21:18:35
* @LastEditors: LiangSong(sl12160010@gmail.com) * @LastEditors: LiangSong(sl12160010@gmail.com)
* @LastEditTime: 2023-05-04 08:33:45 * @LastEditTime: 2023-05-04 20:23:14
* @FilePath: /Open-Llama/README_en.md * @FilePath: /Open-Llama/README_en.md
* @Description: * @Description:
* *
@ -44,7 +44,7 @@ inputs = tokenizer('user:implement quick sort in python\nsystem:', return_tensor
for k, v in inputs.items(): for k, v in inputs.items():
inputs[k] = v.cuda() inputs[k] = v.cuda()
pred = model.generate(**inputs, max_new_tokens=512, do_sample=True) pred = model.generate(**inputs, max_new_tokens=512, do_sample=True)
print(tokenizer.decode(pred.cpu()[0]).strip()) print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
``` ```
The CheckPoint after pre-training only is also uploaded to [s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain). The CheckPoint after pre-training only is also uploaded to [s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain).

View File

@ -2,7 +2,7 @@
Author: LiangSong(sl12160010@gmail.com) Author: LiangSong(sl12160010@gmail.com)
Date: 2023-04-06 22:30:10 Date: 2023-04-06 22:30:10
LastEditors: LiangSong(sl12160010@gmail.com) LastEditors: LiangSong(sl12160010@gmail.com)
LastEditTime: 2023-04-29 20:40:13 LastEditTime: 2023-05-04 22:28:07
FilePath: /Open-Llama/chat_server.py FilePath: /Open-Llama/chat_server.py
Description: Description:
@ -41,7 +41,7 @@ if "module" in ckpt:
ckpt = ckpt["module"] ckpt = ckpt["module"]
raw_model.load_state_dict(ckpt) raw_model.load_state_dict(ckpt)
raw_model.eval() raw_model.eval()
model = raw_model.cuda() model = raw_model.half().cuda()
logging.warn("ready") logging.warn("ready")
@ -88,8 +88,10 @@ with gr.Blocks() as demo:
context = torch.cat(context, dim=-1) context = torch.cat(context, dim=-1)
context = context[:, -1024:] context = context[:, -1024:]
inputs_len = context.shape[1] inputs_len = context.shape[1]
context = context.cuda() context = context.half().cuda()
pred = model.generate(input_ids=context, max_new_tokens=512, do_sample=True) pred = model.generate(
input_ids=context, max_new_tokens=1024, do_sample=True
)
pred = pred[:, inputs_len:] pred = pred[:, inputs_len:]
pred = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True) pred = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)
logging.warn(pred) logging.warn(pred)