update gradio, fix code format bug
This commit is contained in:
parent
a1acc90988
commit
5c876121cb
|
@ -45,20 +45,6 @@ model = raw_model.cuda()
|
|||
logging.warn("ready")
|
||||
|
||||
|
||||
def parse_codeblock(text):
|
||||
lines = text.split("\n")
|
||||
for i, line in enumerate(lines):
|
||||
if "```" in line:
|
||||
if line != "```":
|
||||
lines[i] = f'<pre><code class="{lines[i][3:]}">'
|
||||
else:
|
||||
lines[i] = "</code></pre>"
|
||||
else:
|
||||
if i > 0:
|
||||
lines[i] = "<br/>" + line.replace("<", "<").replace(">", ">")
|
||||
return "".join(lines)
|
||||
|
||||
|
||||
with gr.Blocks() as demo:
|
||||
gr.Markdown(
|
||||
"""
|
||||
|
@ -107,7 +93,7 @@ with gr.Blocks() as demo:
|
|||
pred = pred[:, inputs_len:]
|
||||
pred = tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)
|
||||
logging.warn(pred)
|
||||
bot_message = parse_codeblock(pred)
|
||||
bot_message = pred
|
||||
history[-1][1] = bot_message
|
||||
return history
|
||||
|
||||
|
|
|
@ -25,15 +25,18 @@ random.shuffle(pile_paths)
|
|||
|
||||
paths = wudao_paths[:5] + pile_paths[:10]
|
||||
|
||||
dataset = load_dataset('json', data_files=paths, split="train", streaming=True)
|
||||
dataset = load_dataset("json", data_files=paths, split="train", streaming=True)
|
||||
dataset = dataset.shuffle(seed=42)
|
||||
|
||||
|
||||
def transform(dataset):
|
||||
for line in dataset:
|
||||
if 'title' in line and 'content' in line:
|
||||
if "title" in line and "content" in line:
|
||||
yield line["title"] + "\n" + line["content"]
|
||||
else:
|
||||
yield line["text"]
|
||||
|
||||
|
||||
data_iter = transform(dataset)
|
||||
|
||||
import io
|
||||
|
|
Loading…
Reference in New Issue
Block a user