Spaces:

borgo9
/

Nuclio_test

Sleeping

App Files Files Community

borgo9 commited on 27 days ago

Commit

4ff412d

verified ·

1 Parent(s): b82ee61

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -15

app.py CHANGED Viewed

@@ -77,24 +77,67 @@
 #     demo.launch()
-import gradio as gr
 import os
-HF_TOKEN = os.getenv("HF_TOKEN")  # Will be None if no secret is set
-# Simple function that echoes the input
-def echo(text):
-    return f"You typed: {text}"
-# Build a minimal Gradio interface
 demo = gr.Interface(
-    fn=echo,
-    inputs=gr.Textbox(label="Type something"),
-    outputs=gr.Textbox(label="Output"),
-    title="Test Gradio Space",
-    description="This is a minimal app to test if the Space starts."
 )
-# Launch the app
 if __name__ == "__main__":
     demo.launch()

 #     demo.launch()
 import os
+import gradio as gr
+from transformers import AutoTokenizer, AutoModel
+import torch
+import numpy as np
+# ----------------------------------------------------
+# 1. HF_TOKEN (optional, for private models)
+# ----------------------------------------------------
+HF_TOKEN = os.getenv("HF_TOKEN")  # Retrieved from Hugging Face Secrets
+# ----------------------------------------------------
+# 2. Load EmbeddingGemma-300M model
+# ----------------------------------------------------
+MODEL_NAME = "google/embeddinggemma-300m"
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
+model = AutoModel.from_pretrained(MODEL_NAME, use_auth_token=HF_TOKEN)
+# Function to encode text into embeddings
+def encode(texts):
+    """
+    Encode a list of texts into vector embeddings using EmbeddingGemma-300M.
+    Mean pooling over token embeddings is used.
+    """
+    inputs = tokenizer(
+        texts, return_tensors="pt", padding=True, truncation=True
+    )
+    with torch.no_grad():
+        outputs = model(**inputs)
+    # Mean pooling over tokens
+    embeddings = outputs.last_hidden_state.mean(dim=1)
+    # Convert to numpy float32
+    return embeddings.cpu().numpy().astype(np.float32)
+# ----------------------------------------------------
+# 3. Gradio test function
+# ----------------------------------------------------
+def test_encode(text):
+    """
+    Simple test function to check if embeddings are generated correctly.
+    Returns the shape of the resulting embedding vector.
+    """
+    emb = encode([text])
+    return f"Embedding shape: {emb.shape}"
+# ----------------------------------------------------
+# 4. Build Gradio Interface
+# ----------------------------------------------------
 demo = gr.Interface(
+    fn=test_encode,
+    inputs=gr.Textbox(label="Type some text"),
+    outputs=gr.Textbox(label="Embedding info"),
+    title="Test EmbeddingGemma-300M",
+    description="This Space tests whether the EmbeddingGemma-300M model can generate embeddings."
 )
+# ----------------------------------------------------
+# 5. Launch App
+# ----------------------------------------------------
 if __name__ == "__main__":
     demo.launch()