update readme and add half to server
This commit is contained in:
parent
5c876121cb
commit
98ffab3a97
|
@ -2,7 +2,7 @@
|
|||
* @Author: LiangSong(sl12160010@gmail.com)
|
||||
* @Date: 2023-03-10 21:18:35
|
||||
* @LastEditors: LiangSong(sl12160010@gmail.com)
|
||||
* @LastEditTime: 2023-05-04 08:33:26
|
||||
* @LastEditTime: 2023-05-04 20:23:09
|
||||
* @FilePath: /Open-Llama/README.md
|
||||
* @Description:
|
||||
*
|
||||
|
@ -45,7 +45,7 @@ inputs = tokenizer('user:implement quick sort in python\nsystem:', return_tensor
|
|||
for k, v in inputs.items():
|
||||
inputs[k] = v.cuda()
|
||||
pred = model.generate(**inputs, max_new_tokens=512, do_sample=True)
|
||||
print(tokenizer.decode(pred.cpu()[0]).strip())
|
||||
print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
|
||||
|
||||
```
|
||||
只经过预训练的CheckPoint也上传至[s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain)。
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
* @Author: LiangSong(sl12160010@gmail.com)
|
||||
* @Date: 2023-03-10 21:18:35
|
||||
* @LastEditors: LiangSong(sl12160010@gmail.com)
|
||||
* @LastEditTime: 2023-05-04 08:33:45
|
||||
* @LastEditTime: 2023-05-04 20:23:14
|
||||
* @FilePath: /Open-Llama/README_en.md
|
||||
* @Description:
|
||||
*
|
||||
|
@ -44,7 +44,7 @@ inputs = tokenizer('user:implement quick sort in python\nsystem:', return_tensor
|
|||
for k, v in inputs.items():
|
||||
inputs[k] = v.cuda()
|
||||
pred = model.generate(**inputs, max_new_tokens=512, do_sample=True)
|
||||
print(tokenizer.decode(pred.cpu()[0]).strip())
|
||||
print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
|
||||
|
||||
```
|
||||
The CheckPoint after pre-training only is also uploaded to [s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain).
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
Author: LiangSong(sl12160010@gmail.com)
|
||||
Date: 2023-04-06 22:30:10
|
||||
LastEditors: LiangSong(sl12160010@gmail.com)
|
||||
LastEditTime: 2023-04-29 20:40:13
|
||||
LastEditTime: 2023-05-04 22:28:07
|
||||
FilePath: /Open-Llama/chat_server.py
|
||||
Description:
|
||||
|
||||
|
@ -41,7 +41,7 @@ if "module" in ckpt:
|
|||
ckpt = ckpt["module"]
|
||||
raw_model.load_state_dict(ckpt)
|
||||
raw_model.eval()
|
||||
model = raw_model.cuda()
|
||||
model = raw_model.half().cuda()
|
||||
logging.warn("ready")
|
||||
|
||||
|
||||
|
@ -88,8 +88,10 @@ with gr.Blocks() as demo:
|
|||
context = torch.cat(context, dim=-1)
|
||||
context = context[:, -1024:]
|
||||
inputs_len = context.shape[1]
|
||||
context = context.cuda()
|
||||
pred = model.generate(input_ids=context, max_new_tokens=512, do_sample=True)
|
||||
context = context.half().cuda()
|
||||
pred = model.generate(
|
||||
input_ids=context, max_new_tokens=1024, do_sample=True
|
||||
)
|
||||
pred = pred[:, inputs_len:]
|
||||
pred = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)
|
||||
logging.warn(pred)
|
||||
|
|
Loading…
Reference in New Issue
Block a user