abdeljalilELmajjodi commited on
Commit
75c3f8c
·
verified ·
1 Parent(s): 348a268

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -2
app.py CHANGED
@@ -3,6 +3,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import os
4
  import spaces
5
  import torch
 
 
6
 
7
 
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -33,6 +35,10 @@ examples = [
33
  , 256, 0.7, 0.9, 150, 8, 1.5],
34
  ]
35
 
 
 
 
 
36
  @spaces.GPU
37
  def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5):
38
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -49,8 +55,19 @@ def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150,
49
  pad_token_id=tokenizer.pad_token_id, # Explicit pad token
50
  eos_token_id=tokenizer.eos_token_id, # Explicit eos token
51
  )
52
- return tokenizer.decode(output[0], skip_special_tokens=True)
 
 
 
 
53
 
 
 
 
 
 
 
 
54
  if __name__ == "__main__":
55
  # Create the Gradio interface
56
  with gr.Blocks() as app:
@@ -70,5 +87,14 @@ if __name__ == "__main__":
70
  description="Enter a prompt and get AI-generated text using our pretrained LLM on Moroccan Darija.",
71
  examples=examples,
72
  )
73
-
 
 
 
 
 
 
 
 
 
74
  app.launch()
 
3
  import os
4
  import spaces
5
  import torch
6
+ from datasets import load_dataset
7
+ from huggingface_hub import CommitScheduler
8
 
9
 
10
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
35
  , 256, 0.7, 0.9, 150, 8, 1.5],
36
  ]
37
 
38
+ #inf_dataset=load_dataset("atlasia/atlaset_inference_ds",token=token,split="test",name="llm")
39
+ detected_commit=False
40
+ submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
41
+
42
  @spaces.GPU
43
  def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5):
44
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
55
  pad_token_id=tokenizer.pad_token_id, # Explicit pad token
56
  eos_token_id=tokenizer.eos_token_id, # Explicit eos token
57
  )
58
+ result=tokenizer.decode(output[0], skip_special_tokens=True)
59
+ #inf_dataset.add_item({"inputs":prompt,"outputs":result,"params":f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}"})
60
+ save_feedback(prompt,result,f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}")
61
+ detected_commit=True
62
+ return result
63
 
64
+ def save_feedback(input,output,params) -> None:
65
+ with scheduler.lock:
66
+ with feedback_file.open("a") as f:
67
+ f.write(json.dumps({"input": input, "output": output, "params": params}))
68
+ f.write("\n")
69
+ detected_commit=True
70
+
71
  if __name__ == "__main__":
72
  # Create the Gradio interface
73
  with gr.Blocks() as app:
 
87
  description="Enter a prompt and get AI-generated text using our pretrained LLM on Moroccan Darija.",
88
  examples=examples,
89
  )
90
+ if detected_commit:
91
+ print("[INFO] CommitScheduler...")
92
+ scheduler = CommitScheduler(
93
+ repo_id="atlasia/atlaset_inference_ds",
94
+ repo_type="dataset",
95
+ folder_path=submit_file,
96
+ every=5,
97
+ token=token
98
+ )
99
+ detected_commit=False
100
  app.launch()