update server
This commit is contained in:
parent
bc16df4751
commit
1a731953da
79
chat_server.py
Normal file
79
chat_server.py
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
"""
|
||||||
|
Author: LiangSong(sl12160010@gmail.com)
|
||||||
|
Date: 2023-04-06 22:30:10
|
||||||
|
LastEditors: LiangSong(sl12160010@gmail.com)
|
||||||
|
LastEditTime: 2023-04-06 23:13:54
|
||||||
|
FilePath: /Open-Llama/chat_server.py
|
||||||
|
Description:
|
||||||
|
|
||||||
|
Copyright (c) 2023 by LiangSong(sl12160010@gmail.com), All Rights Reserved.
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
import gradio as gr
|
||||||
|
import sentencepiece as spm
|
||||||
|
from dataset.tokenizer import Tokenizer
|
||||||
|
from transformers import LlamaForCausalLM, LlamaConfig
|
||||||
|
|
||||||
|
|
||||||
|
sp_model = spm.SentencePieceProcessor(
|
||||||
|
model_file="configs/10w_vocab_wudao5_pile10.model"
|
||||||
|
)
|
||||||
|
tokenizer = Tokenizer(sp_model)
|
||||||
|
raw_model = LlamaForCausalLM(
|
||||||
|
LlamaConfig(
|
||||||
|
vocab_size=tokenizer.vocab_size,
|
||||||
|
initializer_range=0.01,
|
||||||
|
pad_token_id=tokenizer.pad_id,
|
||||||
|
rms_norm_eps=1e-5,
|
||||||
|
hidden_dropout_prob=0.1,
|
||||||
|
attention_dropout_prob=0.1,
|
||||||
|
use_stable_embedding=True,
|
||||||
|
shared_input_output_embedding=True,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
ckpt = torch.load(
|
||||||
|
"data/saved_ckpt/instruction_tuning_3_epochs/37001.pt", map_location="cpu"
|
||||||
|
)
|
||||||
|
raw_model.load_state_dict(ckpt)
|
||||||
|
raw_model.eval()
|
||||||
|
model = raw_model.cuda()
|
||||||
|
print("ready")
|
||||||
|
|
||||||
|
with gr.Blocks() as demo:
|
||||||
|
chatbot = gr.Chatbot()
|
||||||
|
msg = gr.Textbox()
|
||||||
|
clear = gr.Button("Clear")
|
||||||
|
|
||||||
|
def user(user_message, history):
|
||||||
|
return "", history + [[user_message, None]]
|
||||||
|
|
||||||
|
def bot(history):
|
||||||
|
context = []
|
||||||
|
round = 0
|
||||||
|
for prompt, completion in history:
|
||||||
|
round += 1
|
||||||
|
if completion is None:
|
||||||
|
inputs = 'user:{}\nsystem:'.format(prompt)
|
||||||
|
inputs = tokenizer(inputs, return_tensors=True, add_special_tokens=False)
|
||||||
|
context.append(inputs['input_ids'])
|
||||||
|
else:
|
||||||
|
inputs = 'user:{}\nsystem:{}'.format(prompt, completion)
|
||||||
|
inputs = tokenizer(inputs, return_tensors=True, add_special_tokens=True)
|
||||||
|
context.append(inputs['input_ids'])
|
||||||
|
context = torch.cat(context, dim=-1)
|
||||||
|
context = context[:, -1024: ]
|
||||||
|
inputs_len = context.shape[1]
|
||||||
|
context = context.cuda()
|
||||||
|
pred = model.generate(input_ids=context, max_new_tokens=512, do_sample=True)
|
||||||
|
pred = pred[:, inputs_len:]
|
||||||
|
pred = tokenizer.decode(pred.cpu())[0]
|
||||||
|
bot_message = pred
|
||||||
|
history[-1][1] = bot_message
|
||||||
|
return history
|
||||||
|
|
||||||
|
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
||||||
|
bot, chatbot, chatbot
|
||||||
|
)
|
||||||
|
clear.click(lambda: None, None, chatbot, queue=False)
|
||||||
|
|
||||||
|
demo.launch()
|
|
@ -2,7 +2,7 @@
|
||||||
Author: LiangSong(sl12160010@gmail.com)
|
Author: LiangSong(sl12160010@gmail.com)
|
||||||
Date: 2023-03-20 21:39:47
|
Date: 2023-03-20 21:39:47
|
||||||
LastEditors: LiangSong(sl12160010@gmail.com)
|
LastEditors: LiangSong(sl12160010@gmail.com)
|
||||||
LastEditTime: 2023-04-05 22:35:01
|
LastEditTime: 2023-04-06 23:01:50
|
||||||
FilePath: /Open-Llama/dataset/tokenizer.py
|
FilePath: /Open-Llama/dataset/tokenizer.py
|
||||||
Description:
|
Description:
|
||||||
|
|
||||||
|
@ -145,14 +145,34 @@ class Tokenizer:
|
||||||
out["attention_mask"] = attention_mask
|
out["attention_mask"] = attention_mask
|
||||||
return out
|
return out
|
||||||
|
|
||||||
def decode(self, inputs):
|
def decode(self, inputs, max_rounds=None):
|
||||||
inputs = inputs.tolist()
|
inputs = inputs.tolist()
|
||||||
out = []
|
out = []
|
||||||
for i in inputs:
|
for i, ids in enumerate(inputs):
|
||||||
if self.eos_id in i:
|
count = 0
|
||||||
eos_idx = i.index(self.eos_id)
|
flag = False
|
||||||
i = i[:eos_idx]
|
for j, token in enumerate(ids):
|
||||||
out.append(i)
|
if token == self.eos_id:
|
||||||
|
if max_rounds is None:
|
||||||
|
flag = True
|
||||||
|
break
|
||||||
|
elif isinstance(max_rounds, int):
|
||||||
|
if count < max_rounds:
|
||||||
|
count += 1
|
||||||
|
else:
|
||||||
|
flag = True
|
||||||
|
break
|
||||||
|
elif isinstance(max_rounds, list):
|
||||||
|
if count < max_rounds[i]:
|
||||||
|
count += 1
|
||||||
|
else:
|
||||||
|
flag = True
|
||||||
|
break
|
||||||
|
if flag:
|
||||||
|
ids = ids[: j]
|
||||||
|
else:
|
||||||
|
ids = ids
|
||||||
|
out.append(ids)
|
||||||
out = self.sp_model.Decode(out)
|
out = self.sp_model.Decode(out)
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
Author: LiangSong(sl12160010@gmail.com)
|
Author: LiangSong(sl12160010@gmail.com)
|
||||||
Date: 2023-03-31 13:26:15
|
Date: 2023-03-31 13:26:15
|
||||||
LastEditors: LiangSong(sl12160010@gmail.com)
|
LastEditors: LiangSong(sl12160010@gmail.com)
|
||||||
LastEditTime: 2023-04-05 21:47:54
|
LastEditTime: 2023-04-06 03:45:44
|
||||||
FilePath: /Open-Llama/server.py
|
FilePath: /Open-Llama/server.py
|
||||||
Description:
|
Description:
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@ print("ready")
|
||||||
|
|
||||||
def question_answer(prompt):
|
def question_answer(prompt):
|
||||||
print(prompt)
|
print(prompt)
|
||||||
raw_inputs = "user:{}<s>system:".format(prompt)
|
raw_inputs = "user:{}\nsystem:".format(prompt)
|
||||||
inputs_len = len(raw_inputs)
|
inputs_len = len(raw_inputs)
|
||||||
inputs = tokenizer(raw_inputs, return_tensors=True, add_special_tokens=False)
|
inputs = tokenizer(raw_inputs, return_tensors=True, add_special_tokens=False)
|
||||||
for k, v in inputs.items():
|
for k, v in inputs.items():
|
||||||
|
|
114
speed_test.py
Normal file
114
speed_test.py
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
# import time
|
||||||
|
# import torch
|
||||||
|
# from colossalai.nn.optimizer import HybridAdam
|
||||||
|
# from deepspeed.ops.adam import FusedAdam
|
||||||
|
# from transformers import LlamaForCausalLM, LlamaConfig
|
||||||
|
# import lightning.pytorch as pl
|
||||||
|
|
||||||
|
# # define the LightningModule
|
||||||
|
# class LitAutoEncoder(pl.LightningModule):
|
||||||
|
# def __init__(self):
|
||||||
|
# super().__init__()
|
||||||
|
|
||||||
|
# def training_step(self, inputs, batch_idx):
|
||||||
|
# # training_step defines the train loop.
|
||||||
|
# # it is independent of forward
|
||||||
|
# # print(inputs.shape)
|
||||||
|
# out = self.model(input_ids=inputs, labels=inputs)
|
||||||
|
# loss = out.loss
|
||||||
|
# return loss
|
||||||
|
|
||||||
|
# def configure_optimizers(self):
|
||||||
|
# optimizer = HybridAdam(self.parameters(), lr=1e-5)
|
||||||
|
# return optimizer
|
||||||
|
|
||||||
|
# def configure_sharded_model(self):
|
||||||
|
# self.model = LlamaForCausalLM(
|
||||||
|
# LlamaConfig(
|
||||||
|
# vocab_size=32000,
|
||||||
|
# initializer_range=0.001,
|
||||||
|
# pad_token_id=0,
|
||||||
|
# rms_norm_eps=1e-5,
|
||||||
|
# hidden_dropout_prob=0.1,
|
||||||
|
# attention_dropout_prob=0.1,
|
||||||
|
# use_stable_embedding=False,
|
||||||
|
# shared_input_output_embedding=False,
|
||||||
|
# )
|
||||||
|
# )
|
||||||
|
|
||||||
|
|
||||||
|
# # init the autoencoder
|
||||||
|
# autoencoder = LitAutoEncoder()
|
||||||
|
# trainer = pl.Trainer(limit_train_batches=500, max_epochs=1, accelerator='gpu', devices=8, strategy="colossalai", precision=16)
|
||||||
|
# class FakeSet(torch.utils.data.Dataset):
|
||||||
|
# def __getitem__(self, idx):
|
||||||
|
# return torch.randint(0, 32000, (2048, ))
|
||||||
|
|
||||||
|
# def __len__(self):
|
||||||
|
# return 10000
|
||||||
|
# train_loader = torch.utils.data.DataLoader(FakeSet(), batch_size=1)
|
||||||
|
# trainer.fit(model=autoencoder, train_dataloaders=train_loader)
|
||||||
|
|
||||||
|
|
||||||
|
# import time
|
||||||
|
# import torch
|
||||||
|
# from accelerate import Accelerator
|
||||||
|
# from deepspeed.ops.adam import FusedAdam
|
||||||
|
# from transformers import LlamaForCausalLM, LlamaConfig
|
||||||
|
|
||||||
|
|
||||||
|
# accelerator = Accelerator()
|
||||||
|
# raw_model = LlamaForCausalLM(
|
||||||
|
# LlamaConfig(
|
||||||
|
# vocab_size=32000,
|
||||||
|
# initializer_range=0.001,
|
||||||
|
# pad_token_id=0,
|
||||||
|
# rms_norm_eps=1e-5,
|
||||||
|
# hidden_dropout_prob=0.1,
|
||||||
|
# attention_dropout_prob=0.1,
|
||||||
|
# use_stable_embedding=False,
|
||||||
|
# shared_input_output_embedding=False,
|
||||||
|
# )
|
||||||
|
# )
|
||||||
|
# optimizer = FusedAdam(raw_model.parameters(), lr=1e-5)
|
||||||
|
|
||||||
|
# import random
|
||||||
|
# import sentencepiece as spm
|
||||||
|
# from dataset.tokenizer import Tokenizer
|
||||||
|
# from dataset.data_iter import create_shard_kwargs, DataIter
|
||||||
|
# from torch.utils.data import DataLoader
|
||||||
|
|
||||||
|
# max_length = 2048
|
||||||
|
# tokenizer_model_path = 'configs/10w_vocab_wudao5_pile10.model'
|
||||||
|
# sp_model = spm.SentencePieceProcessor(model_file=tokenizer_model_path)
|
||||||
|
# tokenizer = Tokenizer(sp_model)
|
||||||
|
|
||||||
|
# paths = create_shard_kwargs(['1*'])
|
||||||
|
# random.shuffle(paths)
|
||||||
|
# data_set = DataIter(
|
||||||
|
# paths
|
||||||
|
# )
|
||||||
|
# train_loader = DataLoader(
|
||||||
|
# data_set,
|
||||||
|
# batch_size=1
|
||||||
|
# )
|
||||||
|
|
||||||
|
# model, optimizer, train_loader = accelerator.prepare(raw_model, optimizer, train_loader)
|
||||||
|
# inputs = torch.randint(0, 32000, (1, 2048), device=accelerator.device)
|
||||||
|
|
||||||
|
|
||||||
|
# for i in range(10):
|
||||||
|
# optimizer.zero_grad()
|
||||||
|
# out = model(input_ids=inputs, labels=inputs)
|
||||||
|
# loss = out.loss
|
||||||
|
# accelerator.backward(loss)
|
||||||
|
# optimizer.step()
|
||||||
|
# start_time = time.time()
|
||||||
|
# for i in range(500):
|
||||||
|
# optimizer.zero_grad()
|
||||||
|
# out = model(input_ids=inputs, labels=inputs)
|
||||||
|
# loss = out.loss
|
||||||
|
# accelerator.backward(loss)
|
||||||
|
# optimizer.step()
|
||||||
|
# end_time = time.time()
|
||||||
|
# accelerator.print(end_time - start_time)
|
Loading…
Reference in New Issue
Block a user