Spaces:

atlasia
/

Al-Atlas-LLM

Running on Zero

App Files Files Community

abdeljalilELmajjodi commited on 7 days ago

Commit

75c3f8c

verified ·

1 Parent(s): 348a268

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -2

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import os
 import spaces
 import torch
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -33,6 +35,10 @@ examples = [
      , 256, 0.7, 0.9, 150, 8, 1.5],
 ]
 @spaces.GPU
 def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -49,8 +55,19 @@ def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150,
         pad_token_id=tokenizer.pad_token_id,  # Explicit pad token
         eos_token_id=tokenizer.eos_token_id,  # Explicit eos token
     )
-    return tokenizer.decode(output[0], skip_special_tokens=True)
 if __name__ == "__main__":
     # Create the Gradio interface
     with gr.Blocks() as app:
@@ -70,5 +87,14 @@ if __name__ == "__main__":
             description="Enter a prompt and get AI-generated text using our pretrained LLM on Moroccan Darija.",
             examples=examples,
         )
     app.launch()

 import os
 import spaces
 import torch
+from datasets import load_dataset
+from huggingface_hub import CommitScheduler
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
      , 256, 0.7, 0.9, 150, 8, 1.5],
 ]
+#inf_dataset=load_dataset("atlasia/atlaset_inference_ds",token=token,split="test",name="llm")
+detected_commit=False
+submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
 @spaces.GPU
 def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5):
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         pad_token_id=tokenizer.pad_token_id,  # Explicit pad token
         eos_token_id=tokenizer.eos_token_id,  # Explicit eos token
     )
+    result=tokenizer.decode(output[0], skip_special_tokens=True)
+    #inf_dataset.add_item({"inputs":prompt,"outputs":result,"params":f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}"})
+    save_feedback(prompt,result,f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}")
+    detected_commit=True
+    return result
+def save_feedback(input,output,params) -> None:
+    with scheduler.lock:
+        with feedback_file.open("a") as f:
+            f.write(json.dumps({"input": input, "output": output, "params": params}))
+            f.write("\n")
+    detected_commit=True
 if __name__ == "__main__":
     # Create the Gradio interface
     with gr.Blocks() as app:
             description="Enter a prompt and get AI-generated text using our pretrained LLM on Moroccan Darija.",
             examples=examples,
         )
+    if detected_commit:
+        print("[INFO] CommitScheduler...")
+        scheduler = CommitScheduler(
+            repo_id="atlasia/atlaset_inference_ds",
+            repo_type="dataset",
+            folder_path=submit_file,
+            every=5,
+            token=token
+        )
+        detected_commit=False
     app.launch()