update readme and add half to server
This commit is contained in:
parent
5c876121cb
commit
98ffab3a97
|
@ -2,7 +2,7 @@
|
||||||
* @Author: LiangSong(sl12160010@gmail.com)
|
* @Author: LiangSong(sl12160010@gmail.com)
|
||||||
* @Date: 2023-03-10 21:18:35
|
* @Date: 2023-03-10 21:18:35
|
||||||
* @LastEditors: LiangSong(sl12160010@gmail.com)
|
* @LastEditors: LiangSong(sl12160010@gmail.com)
|
||||||
* @LastEditTime: 2023-05-04 08:33:26
|
* @LastEditTime: 2023-05-04 20:23:09
|
||||||
* @FilePath: /Open-Llama/README.md
|
* @FilePath: /Open-Llama/README.md
|
||||||
* @Description:
|
* @Description:
|
||||||
*
|
*
|
||||||
|
@ -45,7 +45,7 @@ inputs = tokenizer('user:implement quick sort in python\nsystem:', return_tensor
|
||||||
for k, v in inputs.items():
|
for k, v in inputs.items():
|
||||||
inputs[k] = v.cuda()
|
inputs[k] = v.cuda()
|
||||||
pred = model.generate(**inputs, max_new_tokens=512, do_sample=True)
|
pred = model.generate(**inputs, max_new_tokens=512, do_sample=True)
|
||||||
print(tokenizer.decode(pred.cpu()[0]).strip())
|
print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
|
||||||
|
|
||||||
```
|
```
|
||||||
只经过预训练的CheckPoint也上传至[s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain)。
|
只经过预训练的CheckPoint也上传至[s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain)。
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
* @Author: LiangSong(sl12160010@gmail.com)
|
* @Author: LiangSong(sl12160010@gmail.com)
|
||||||
* @Date: 2023-03-10 21:18:35
|
* @Date: 2023-03-10 21:18:35
|
||||||
* @LastEditors: LiangSong(sl12160010@gmail.com)
|
* @LastEditors: LiangSong(sl12160010@gmail.com)
|
||||||
* @LastEditTime: 2023-05-04 08:33:45
|
* @LastEditTime: 2023-05-04 20:23:14
|
||||||
* @FilePath: /Open-Llama/README_en.md
|
* @FilePath: /Open-Llama/README_en.md
|
||||||
* @Description:
|
* @Description:
|
||||||
*
|
*
|
||||||
|
@ -44,7 +44,7 @@ inputs = tokenizer('user:implement quick sort in python\nsystem:', return_tensor
|
||||||
for k, v in inputs.items():
|
for k, v in inputs.items():
|
||||||
inputs[k] = v.cuda()
|
inputs[k] = v.cuda()
|
||||||
pred = model.generate(**inputs, max_new_tokens=512, do_sample=True)
|
pred = model.generate(**inputs, max_new_tokens=512, do_sample=True)
|
||||||
print(tokenizer.decode(pred.cpu()[0]).strip())
|
print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
|
||||||
|
|
||||||
```
|
```
|
||||||
The CheckPoint after pre-training only is also uploaded to [s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain).
|
The CheckPoint after pre-training only is also uploaded to [s-JoL/Open-Llama-V1-pretrain](https://huggingface.co/s-JoL/Open-Llama-V1-pretrain).
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
Author: LiangSong(sl12160010@gmail.com)
|
Author: LiangSong(sl12160010@gmail.com)
|
||||||
Date: 2023-04-06 22:30:10
|
Date: 2023-04-06 22:30:10
|
||||||
LastEditors: LiangSong(sl12160010@gmail.com)
|
LastEditors: LiangSong(sl12160010@gmail.com)
|
||||||
LastEditTime: 2023-04-29 20:40:13
|
LastEditTime: 2023-05-04 22:28:07
|
||||||
FilePath: /Open-Llama/chat_server.py
|
FilePath: /Open-Llama/chat_server.py
|
||||||
Description:
|
Description:
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ if "module" in ckpt:
|
||||||
ckpt = ckpt["module"]
|
ckpt = ckpt["module"]
|
||||||
raw_model.load_state_dict(ckpt)
|
raw_model.load_state_dict(ckpt)
|
||||||
raw_model.eval()
|
raw_model.eval()
|
||||||
model = raw_model.cuda()
|
model = raw_model.half().cuda()
|
||||||
logging.warn("ready")
|
logging.warn("ready")
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,8 +88,10 @@ with gr.Blocks() as demo:
|
||||||
context = torch.cat(context, dim=-1)
|
context = torch.cat(context, dim=-1)
|
||||||
context = context[:, -1024:]
|
context = context[:, -1024:]
|
||||||
inputs_len = context.shape[1]
|
inputs_len = context.shape[1]
|
||||||
context = context.cuda()
|
context = context.half().cuda()
|
||||||
pred = model.generate(input_ids=context, max_new_tokens=512, do_sample=True)
|
pred = model.generate(
|
||||||
|
input_ids=context, max_new_tokens=1024, do_sample=True
|
||||||
|
)
|
||||||
pred = pred[:, inputs_len:]
|
pred = pred[:, inputs_len:]
|
||||||
pred = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)
|
pred = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)
|
||||||
logging.warn(pred)
|
logging.warn(pred)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user