diff --git a/chat_server.py b/chat_server.py
index d438230..1b0f043 100644
--- a/chat_server.py
+++ b/chat_server.py
@@ -32,13 +32,15 @@ raw_model = LlamaForCausalLM(
)
)
ckpt = torch.load(
- "data/saved_ckpt/instruction_tuning_math_code_multiturn/36001.pt", map_location="cpu"
+ "data/saved_ckpt/instruction_tuning_math_code_multiturn/36001.pt",
+ map_location="cpu",
)
raw_model.load_state_dict(ckpt)
raw_model.eval()
model = raw_model.cuda()
print("ready")
+
def parse_codeblock(text):
lines = text.split("\n")
for i, line in enumerate(lines):
@@ -46,12 +48,13 @@ def parse_codeblock(text):
if line != "```":
lines[i] = f'
'
else:
- lines[i] = '
'
+ lines[i] = ""
else:
if i > 0:
lines[i] = "
" + line.replace("<", "<").replace(">", ">")
return "".join(lines)
+
with gr.Blocks() as demo:
gr.Markdown(
"""
@@ -75,15 +78,17 @@ with gr.Blocks() as demo:
for prompt, completion in history:
round += 1
if completion is None:
- inputs = 'user:{}\nsystem:'.format(prompt)
- inputs = tokenizer(inputs, return_tensors=True, add_special_tokens=False)
- context.append(inputs['input_ids'])
+ inputs = "user:{}\nsystem:".format(prompt)
+ inputs = tokenizer(
+ inputs, return_tensors=True, add_special_tokens=False
+ )
+ context.append(inputs["input_ids"])
else:
- inputs = 'user:{}\nsystem:{}'.format(prompt, completion)
+ inputs = "user:{}\nsystem:{}".format(prompt, completion)
inputs = tokenizer(inputs, return_tensors=True, add_special_tokens=True)
- context.append(inputs['input_ids'])
+ context.append(inputs["input_ids"])
context = torch.cat(context, dim=-1)
- context = context[:, -1024: ]
+ context = context[:, -1024:]
inputs_len = context.shape[1]
context = context.cuda()
pred = model.generate(input_ids=context, max_new_tokens=512, do_sample=True)
@@ -99,7 +104,7 @@ with gr.Blocks() as demo:
)
clear.click(lambda: None, None, chatbot, queue=False)
gr.Markdown(
- """
+ """
当前体验服务生成的所有内容都是由人工智能模型生成,我们对其生成内容的准确性、完整性和功能性不做任何保证,并且其生成的内容不代表我们的态度或观点。
联系方式: sl12160010@gmail.com 对于该项目有任何意见和建议都欢迎联系我.
diff --git a/dataset/tokenizer.py b/dataset/tokenizer.py
index 59f32f2..4a11aab 100644
--- a/dataset/tokenizer.py
+++ b/dataset/tokenizer.py
@@ -169,7 +169,7 @@ class Tokenizer:
flag = True
break
if flag:
- ids = ids[: j]
+ ids = ids[:j]
else:
ids = ids
out.append(ids)
diff --git a/speed_test.py b/speed_test.py
deleted file mode 100644
index 3b884ed..0000000
--- a/speed_test.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# import time
-# import torch
-# from colossalai.nn.optimizer import HybridAdam
-# from deepspeed.ops.adam import FusedAdam
-# from transformers import LlamaForCausalLM, LlamaConfig
-# import lightning.pytorch as pl
-
-# # define the LightningModule
-# class LitAutoEncoder(pl.LightningModule):
-# def __init__(self):
-# super().__init__()
-
-# def training_step(self, inputs, batch_idx):
-# # training_step defines the train loop.
-# # it is independent of forward
-# # print(inputs.shape)
-# out = self.model(input_ids=inputs, labels=inputs)
-# loss = out.loss
-# return loss
-
-# def configure_optimizers(self):
-# optimizer = HybridAdam(self.parameters(), lr=1e-5)
-# return optimizer
-
-# def configure_sharded_model(self):
-# self.model = LlamaForCausalLM(
-# LlamaConfig(
-# vocab_size=32000,
-# initializer_range=0.001,
-# pad_token_id=0,
-# rms_norm_eps=1e-5,
-# hidden_dropout_prob=0.1,
-# attention_dropout_prob=0.1,
-# use_stable_embedding=False,
-# shared_input_output_embedding=False,
-# )
-# )
-
-
-# # init the autoencoder
-# autoencoder = LitAutoEncoder()
-# trainer = pl.Trainer(limit_train_batches=500, max_epochs=1, accelerator='gpu', devices=8, strategy="colossalai", precision=16)
-# class FakeSet(torch.utils.data.Dataset):
-# def __getitem__(self, idx):
-# return torch.randint(0, 32000, (2048, ))
-
-# def __len__(self):
-# return 10000
-# train_loader = torch.utils.data.DataLoader(FakeSet(), batch_size=1)
-# trainer.fit(model=autoencoder, train_dataloaders=train_loader)
-
-
-# import time
-# import torch
-# from accelerate import Accelerator
-# from deepspeed.ops.adam import FusedAdam
-# from transformers import LlamaForCausalLM, LlamaConfig
-
-
-# accelerator = Accelerator()
-# raw_model = LlamaForCausalLM(
-# LlamaConfig(
-# vocab_size=32000,
-# initializer_range=0.001,
-# pad_token_id=0,
-# rms_norm_eps=1e-5,
-# hidden_dropout_prob=0.1,
-# attention_dropout_prob=0.1,
-# use_stable_embedding=False,
-# shared_input_output_embedding=False,
-# )
-# )
-# optimizer = FusedAdam(raw_model.parameters(), lr=1e-5)
-
-# import random
-# import sentencepiece as spm
-# from dataset.tokenizer import Tokenizer
-# from dataset.data_iter import create_shard_kwargs, DataIter
-# from torch.utils.data import DataLoader
-
-# max_length = 2048
-# tokenizer_model_path = 'configs/10w_vocab_wudao5_pile10.model'
-# sp_model = spm.SentencePieceProcessor(model_file=tokenizer_model_path)
-# tokenizer = Tokenizer(sp_model)
-
-# paths = create_shard_kwargs(['1*'])
-# random.shuffle(paths)
-# data_set = DataIter(
-# paths
-# )
-# train_loader = DataLoader(
-# data_set,
-# batch_size=1
-# )
-
-# model, optimizer, train_loader = accelerator.prepare(raw_model, optimizer, train_loader)
-# inputs = torch.randint(0, 32000, (1, 2048), device=accelerator.device)
-
-
-# for i in range(10):
-# optimizer.zero_grad()
-# out = model(input_ids=inputs, labels=inputs)
-# loss = out.loss
-# accelerator.backward(loss)
-# optimizer.step()
-# start_time = time.time()
-# for i in range(500):
-# optimizer.zero_grad()
-# out = model(input_ids=inputs, labels=inputs)
-# loss = out.loss
-# accelerator.backward(loss)
-# optimizer.step()
-# end_time = time.time()
-# accelerator.print(end_time - start_time)
\ No newline at end of file