diff --git a/chat_server.py b/chat_server.py index d438230..1b0f043 100644 --- a/chat_server.py +++ b/chat_server.py @@ -32,13 +32,15 @@ raw_model = LlamaForCausalLM( ) ) ckpt = torch.load( - "data/saved_ckpt/instruction_tuning_math_code_multiturn/36001.pt", map_location="cpu" + "data/saved_ckpt/instruction_tuning_math_code_multiturn/36001.pt", + map_location="cpu", ) raw_model.load_state_dict(ckpt) raw_model.eval() model = raw_model.cuda() print("ready") + def parse_codeblock(text): lines = text.split("\n") for i, line in enumerate(lines): @@ -46,12 +48,13 @@ def parse_codeblock(text): if line != "```": lines[i] = f'
'
             else:
-                lines[i] = '
' + lines[i] = "" else: if i > 0: lines[i] = "
" + line.replace("<", "<").replace(">", ">") return "".join(lines) + with gr.Blocks() as demo: gr.Markdown( """ @@ -75,15 +78,17 @@ with gr.Blocks() as demo: for prompt, completion in history: round += 1 if completion is None: - inputs = 'user:{}\nsystem:'.format(prompt) - inputs = tokenizer(inputs, return_tensors=True, add_special_tokens=False) - context.append(inputs['input_ids']) + inputs = "user:{}\nsystem:".format(prompt) + inputs = tokenizer( + inputs, return_tensors=True, add_special_tokens=False + ) + context.append(inputs["input_ids"]) else: - inputs = 'user:{}\nsystem:{}'.format(prompt, completion) + inputs = "user:{}\nsystem:{}".format(prompt, completion) inputs = tokenizer(inputs, return_tensors=True, add_special_tokens=True) - context.append(inputs['input_ids']) + context.append(inputs["input_ids"]) context = torch.cat(context, dim=-1) - context = context[:, -1024: ] + context = context[:, -1024:] inputs_len = context.shape[1] context = context.cuda() pred = model.generate(input_ids=context, max_new_tokens=512, do_sample=True) @@ -99,7 +104,7 @@ with gr.Blocks() as demo: ) clear.click(lambda: None, None, chatbot, queue=False) gr.Markdown( - """ + """ 当前体验服务生成的所有内容都是由人工智能模型生成,我们对其生成内容的准确性、完整性和功能性不做任何保证,并且其生成的内容不代表我们的态度或观点。 联系方式: sl12160010@gmail.com 对于该项目有任何意见和建议都欢迎联系我. diff --git a/dataset/tokenizer.py b/dataset/tokenizer.py index 59f32f2..4a11aab 100644 --- a/dataset/tokenizer.py +++ b/dataset/tokenizer.py @@ -169,7 +169,7 @@ class Tokenizer: flag = True break if flag: - ids = ids[: j] + ids = ids[:j] else: ids = ids out.append(ids) diff --git a/speed_test.py b/speed_test.py deleted file mode 100644 index 3b884ed..0000000 --- a/speed_test.py +++ /dev/null @@ -1,114 +0,0 @@ -# import time -# import torch -# from colossalai.nn.optimizer import HybridAdam -# from deepspeed.ops.adam import FusedAdam -# from transformers import LlamaForCausalLM, LlamaConfig -# import lightning.pytorch as pl - -# # define the LightningModule -# class LitAutoEncoder(pl.LightningModule): -# def __init__(self): -# super().__init__() - -# def training_step(self, inputs, batch_idx): -# # training_step defines the train loop. -# # it is independent of forward -# # print(inputs.shape) -# out = self.model(input_ids=inputs, labels=inputs) -# loss = out.loss -# return loss - -# def configure_optimizers(self): -# optimizer = HybridAdam(self.parameters(), lr=1e-5) -# return optimizer - -# def configure_sharded_model(self): -# self.model = LlamaForCausalLM( -# LlamaConfig( -# vocab_size=32000, -# initializer_range=0.001, -# pad_token_id=0, -# rms_norm_eps=1e-5, -# hidden_dropout_prob=0.1, -# attention_dropout_prob=0.1, -# use_stable_embedding=False, -# shared_input_output_embedding=False, -# ) -# ) - - -# # init the autoencoder -# autoencoder = LitAutoEncoder() -# trainer = pl.Trainer(limit_train_batches=500, max_epochs=1, accelerator='gpu', devices=8, strategy="colossalai", precision=16) -# class FakeSet(torch.utils.data.Dataset): -# def __getitem__(self, idx): -# return torch.randint(0, 32000, (2048, )) - -# def __len__(self): -# return 10000 -# train_loader = torch.utils.data.DataLoader(FakeSet(), batch_size=1) -# trainer.fit(model=autoencoder, train_dataloaders=train_loader) - - -# import time -# import torch -# from accelerate import Accelerator -# from deepspeed.ops.adam import FusedAdam -# from transformers import LlamaForCausalLM, LlamaConfig - - -# accelerator = Accelerator() -# raw_model = LlamaForCausalLM( -# LlamaConfig( -# vocab_size=32000, -# initializer_range=0.001, -# pad_token_id=0, -# rms_norm_eps=1e-5, -# hidden_dropout_prob=0.1, -# attention_dropout_prob=0.1, -# use_stable_embedding=False, -# shared_input_output_embedding=False, -# ) -# ) -# optimizer = FusedAdam(raw_model.parameters(), lr=1e-5) - -# import random -# import sentencepiece as spm -# from dataset.tokenizer import Tokenizer -# from dataset.data_iter import create_shard_kwargs, DataIter -# from torch.utils.data import DataLoader - -# max_length = 2048 -# tokenizer_model_path = 'configs/10w_vocab_wudao5_pile10.model' -# sp_model = spm.SentencePieceProcessor(model_file=tokenizer_model_path) -# tokenizer = Tokenizer(sp_model) - -# paths = create_shard_kwargs(['1*']) -# random.shuffle(paths) -# data_set = DataIter( -# paths -# ) -# train_loader = DataLoader( -# data_set, -# batch_size=1 -# ) - -# model, optimizer, train_loader = accelerator.prepare(raw_model, optimizer, train_loader) -# inputs = torch.randint(0, 32000, (1, 2048), device=accelerator.device) - - -# for i in range(10): -# optimizer.zero_grad() -# out = model(input_ids=inputs, labels=inputs) -# loss = out.loss -# accelerator.backward(loss) -# optimizer.step() -# start_time = time.time() -# for i in range(500): -# optimizer.zero_grad() -# out = model(input_ids=inputs, labels=inputs) -# loss = out.loss -# accelerator.backward(loss) -# optimizer.step() -# end_time = time.time() -# accelerator.print(end_time - start_time) \ No newline at end of file