update gradio, fix code format bug

This commit is contained in:
LiangSong 2023-05-04 18:18:52 +08:00
parent a1acc90988
commit 5c876121cb
2 changed files with 6 additions and 17 deletions

View File

@ -45,20 +45,6 @@ model = raw_model.cuda()
logging.warn("ready")
def parse_codeblock(text):
lines = text.split("\n")
for i, line in enumerate(lines):
if "```" in line:
if line != "```":
lines[i] = f'<pre><code class="{lines[i][3:]}">'
else:
lines[i] = "</code></pre>"
else:
if i > 0:
lines[i] = "<br/>" + line.replace("<", "&lt;").replace(">", "&gt;")
return "".join(lines)
with gr.Blocks() as demo:
gr.Markdown(
"""
@ -107,7 +93,7 @@ with gr.Blocks() as demo:
pred = pred[:, inputs_len:]
pred = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)
logging.warn(pred)
bot_message = parse_codeblock(pred)
bot_message = pred
history[-1][1] = bot_message
return history

View File

@ -25,15 +25,18 @@ random.shuffle(pile_paths)
paths = wudao_paths[:5] + pile_paths[:10]
dataset = load_dataset('json', data_files=paths, split="train", streaming=True)
dataset = load_dataset("json", data_files=paths, split="train", streaming=True)
dataset = dataset.shuffle(seed=42)
def transform(dataset):
for line in dataset:
if 'title' in line and 'content' in line:
if "title" in line and "content" in line:
yield line["title"] + "\n" + line["content"]
else:
yield line["text"]
data_iter = transform(dataset)
import io