File size: 3,217 Bytes
20e7095 c33f9b4 d18e6c8 a4cec6f 20e7095 c33f9b4 9e6c3bb 1d6c7cd c33f9b4 1d6c7cd 20e7095 c33f9b4 20e7095 c33f9b4 1d6c7cd 20e7095 a4cec6f c33f9b4 a4cec6f d18e6c8 c33f9b4 d18e6c8 c33f9b4 d18e6c8 c33f9b4 d18e6c8 c33f9b4 d18e6c8 c33f9b4 d18e6c8 a4cec6f c33f9b4 729db5b c33f9b4 729db5b 3ea3aae c33f9b4 2dccd10 729db5b c33f9b4 045377e d304161 c33f9b4 045377e c33f9b4 c91426b d18e6c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
import pandas as pd
import io
import os
from pathlib import Path
from huggingface_hub import HfApi, Repository
df = pd.DataFrame()
def upload_csv(file):
global df
df = pd.read_csv(file.name)
if "text" not in df.columns or "label" not in df.columns:
return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
df["label"] = df["label"].fillna("")
return (
gr.update(value=df[["text","label"]], visible=True),
"✅ File uploaded — you can now edit labels."
)
def save_changes(edited_table):
global df
df = pd.DataFrame(edited_table, columns=["text","label"])
return "💾 Changes saved."
def download_csv():
global df
out_path = "annotated_data.csv"
df.to_csv(out_path, index=False)
return out_path
def push_to_hub(repo_name: str, hf_token: str) -> str:
global df
try:
api = HfApi()
api.create_repo(
repo_id=repo_name,
token=hf_token,
repo_type="dataset",
exist_ok=True
)
local_dir = Path(f"./{repo_name.replace('/', '_')}")
if local_dir.exists():
for child in local_dir.iterdir():
child.unlink()
local_dir.rmdir()
repo = Repository(
local_dir=str(local_dir),
clone_from=repo_name,
repo_type="dataset", # <-- important fix!
use_auth_token=hf_token
)
csv_path = local_dir / "data.csv"
df.to_csv(csv_path, index=False)
repo.push_to_hub(commit_message="📑 Update annotated data")
return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"
except Exception as e:
return f"❌ Push failed: {e}"
with gr.Blocks(theme=gr.themes.Default()) as app:
gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.")
with gr.Row():
file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
upload_btn = gr.Button("Upload")
df_table = gr.Dataframe(
headers=["text","label"],
label="📝 Editable Table",
interactive=True,
visible=False
)
status = gr.Textbox(label="Status", interactive=False)
with gr.Row():
save_btn = gr.Button("💾 Save")
download_btn = gr.Button("⬇️ Download CSV")
download_out = gr.File(label="📥 Downloaded File")
with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
repo_input = gr.Textbox(label="Repo (username/dataset-name)")
token_input = gr.Textbox(label="HF Token", type="password")
push_btn = gr.Button("🚀 Push")
push_status = gr.Textbox(label="Push Status", interactive=False)
upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status])
save_btn.click( save_changes, inputs=df_table, outputs=status)
download_btn.click(download_csv, outputs=download_out)
push_btn.click( push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
app.launch()
|