Spaces:

BlinkDL
/

RWKV-Gradio-2

Running on T4

App Files Files Community

update gradio version

#10

by mollysama - opened Aug 30

base: refs/heads/main

←

from: refs/pr/10

Discussion Files changed

+16

-16

Files changed (2) hide show

app.py +15 -14
requirements.txt +1 -2

app.py CHANGED Viewed

@@ -24,12 +24,13 @@ gen_limit = 1000
 ########################## text rwkv ################################################################
 from rwkv.utils import PIPELINE, PIPELINE_ARGS
-title_v6 = "rwkv7-g0a-7.2b-20250829-ctx4096"
-model_path_v6 = hf_hub_download(repo_id="BlinkDL/rwkv7-g1", filename=f"{title_v6}.pth")
-model_v6 = RWKV(model=model_path_v6.replace('.pth',''), strategy='cuda fp16')
-pipeline_v6 = PIPELINE(model_v6, "rwkv_vocab_v20230424")
-args = model_v6.args
 penalty_decay = 0.996
@@ -67,23 +68,23 @@ def evaluate(
     state = None
     for i in range(int(token_count)):
-        input_ids = pipeline_v6.encode(ctx)[-ctx_limit:] if i == 0 else [token]
         CHUNK_LEN = 512
-        # out, state = model_v6.forward(input_ids, state)
         while len(input_ids) > 0:
-            out, state = model_v6.forward(input_ids[:CHUNK_LEN], state)
             input_ids = input_ids[CHUNK_LEN:]
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
-        token = pipeline_v6.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
         if token in args.token_stop:
             break
         all_tokens += [token]
         for xxx in occurrence:
             occurrence[xxx] *= penalty_decay
-        ttt = pipeline_v6.decode([token])
         www = 1
         if ttt in ' \t0123456789':
             www = 0
@@ -94,7 +95,7 @@ def evaluate(
         else:
             occurrence[token] += www
-        tmp = pipeline_v6.decode(all_tokens[out_last:])
         if '\ufffd' not in tmp:
             out_str += tmp
             yield out_str.strip()
@@ -128,8 +129,8 @@ examples = [
 ]
 ##################################################################################################################
-with gr.Blocks(title=title_v6) as demo:
-    gr.HTML(f"<div style=\"text-align: center;\">\n<h1>{title_v6}</h1>\n</div>")
     with gr.Tab("=== Base Model (Raw Generation) ==="):
         gr.Markdown(f'This is [RWKV7 G0a](https://huggingface.co/BlinkDL/rwkv7-g1) 7.2B reasoning base LM - an attention-free pure RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM). Try topp0 penalty0 for math/code/translation. Supports 100+ world languages and code. Check [500+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Can try examples (bottom of page) *** (can edit them). Demo limited to ctxlen {ctx_limit}.')
@@ -151,5 +152,5 @@ with gr.Blocks(title=title_v6) as demo:
         clear.click(lambda: None, [], [output])
         data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
-demo.queue(concurrency_count=1, max_size=10)
 demo.launch(share=False)

 ########################## text rwkv ################################################################
 from rwkv.utils import PIPELINE, PIPELINE_ARGS
+title = "rwkv7-g0a-7.2b-20250829-ctx4096"
+model_path = hf_hub_download(repo_id="BlinkDL/rwkv7-g1", filename=f"{title}.pth")
+# model_path = "/home/molly/rwkv7-g0a-7.2b-20250829-ctx4096.pth"
+model = RWKV(model=model_path.replace('.pth',''), strategy='cuda fp16')
+pipeline = PIPELINE(model, "rwkv_vocab_v20230424")
+args = model.args
 penalty_decay = 0.996
     state = None
     for i in range(int(token_count)):
+        input_ids = pipeline.encode(ctx)[-ctx_limit:] if i == 0 else [token]
         CHUNK_LEN = 512
+        # out, state = model.forward(input_ids, state)
         while len(input_ids) > 0:
+            out, state = model.forward(input_ids[:CHUNK_LEN], state)
             input_ids = input_ids[CHUNK_LEN:]
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p)
         if token in args.token_stop:
             break
         all_tokens += [token]
         for xxx in occurrence:
             occurrence[xxx] *= penalty_decay
+        ttt = pipeline.decode([token])
         www = 1
         if ttt in ' \t0123456789':
             www = 0
         else:
             occurrence[token] += www
+        tmp = pipeline.decode(all_tokens[out_last:])
         if '\ufffd' not in tmp:
             out_str += tmp
             yield out_str.strip()
 ]
 ##################################################################################################################
+with gr.Blocks(title=title) as demo:
+    gr.HTML(f"<div style=\"text-align: center;\">\n<h1>{title}</h1>\n</div>")
     with gr.Tab("=== Base Model (Raw Generation) ==="):
         gr.Markdown(f'This is [RWKV7 G0a](https://huggingface.co/BlinkDL/rwkv7-g1) 7.2B reasoning base LM - an attention-free pure RNN [RWKV-LM](https://github.com/BlinkDL/RWKV-LM). Try topp0 penalty0 for math/code/translation. Supports 100+ world languages and code. Check [500+ Github RWKV projects](https://github.com/search?o=desc&p=1&q=rwkv&s=updated&type=Repositories). *** Can try examples (bottom of page) *** (can edit them). Demo limited to ctxlen {ctx_limit}.')
         clear.click(lambda: None, [], [output])
         data.click(lambda x: x, [data], [prompt, token_count, temperature, top_p, presence_penalty, count_penalty])
+demo.queue(default_concurrency_limit=1, max_size=10)
 demo.launch(share=False)

requirements.txt CHANGED Viewed

@@ -1,8 +1,7 @@
-gradio==3.28.1
 torch
 ninja
 tokenizers
 rwkv>=0.8.29
 pynvml
 huggingface_hub
-gradio==3.28.1

 torch
 ninja
 tokenizers
 rwkv>=0.8.29
 pynvml
 huggingface_hub
+gradio==5.44.1