File size: 3,217 Bytes
20e7095
 
c33f9b4
 
d18e6c8
a4cec6f
20e7095
 
 
 
 
 
c33f9b4
 
9e6c3bb
1d6c7cd
c33f9b4
 
1d6c7cd
20e7095
c33f9b4
20e7095
c33f9b4
1d6c7cd
20e7095
 
a4cec6f
c33f9b4
 
 
 
 
a4cec6f
d18e6c8
 
c33f9b4
 
 
 
 
 
 
 
d18e6c8
c33f9b4
 
d18e6c8
c33f9b4
d18e6c8
 
 
c33f9b4
d18e6c8
 
c33f9b4
 
 
 
 
 
 
d18e6c8
 
a4cec6f
c33f9b4
729db5b
c33f9b4
729db5b
3ea3aae
c33f9b4
2dccd10
729db5b
c33f9b4
 
 
 
 
 
045377e
d304161
c33f9b4
045377e
 
c33f9b4
 
 
 
 
 
 
 
 
 
 
 
c91426b
d18e6c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr
import pandas as pd
import io
import os
from pathlib import Path
from huggingface_hub import HfApi, Repository

df = pd.DataFrame()

def upload_csv(file):
    global df
    df = pd.read_csv(file.name)
    if "text" not in df.columns or "label" not in df.columns:
        return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
    df["label"] = df["label"].fillna("")
    return (
        gr.update(value=df[["text","label"]], visible=True),
        "✅ File uploaded — you can now edit labels."
    )

def save_changes(edited_table):
    global df
    df = pd.DataFrame(edited_table, columns=["text","label"])
    return "💾 Changes saved."

def download_csv():
    global df
    out_path = "annotated_data.csv"
    df.to_csv(out_path, index=False)
    return out_path

def push_to_hub(repo_name: str, hf_token: str) -> str:
    global df
    try:
        api = HfApi()
        api.create_repo(
            repo_id=repo_name,
            token=hf_token,
            repo_type="dataset",
            exist_ok=True
        )

        local_dir = Path(f"./{repo_name.replace('/', '_')}")
        if local_dir.exists():
            for child in local_dir.iterdir():
                child.unlink()
            local_dir.rmdir()

        repo = Repository(
            local_dir=str(local_dir),
            clone_from=repo_name,
            repo_type="dataset",       # <-- important fix!
            use_auth_token=hf_token
        )

        csv_path = local_dir / "data.csv"
        df.to_csv(csv_path, index=False)

        repo.push_to_hub(commit_message="📑 Update annotated data")
        return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"

    except Exception as e:
        return f"❌ Push failed: {e}"

with gr.Blocks(theme=gr.themes.Default()) as app:
    gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
    gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.")

    with gr.Row():
        file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
        upload_btn = gr.Button("Upload")

    df_table = gr.Dataframe(
        headers=["text","label"],
        label="📝 Editable Table",
        interactive=True,
        visible=False
    )
    status = gr.Textbox(label="Status", interactive=False)

    with gr.Row():
        save_btn     = gr.Button("💾 Save")
        download_btn = gr.Button("⬇️ Download CSV")
        download_out = gr.File(label="📥 Downloaded File")

    with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
        repo_input  = gr.Textbox(label="Repo (username/dataset-name)")
        token_input = gr.Textbox(label="HF Token", type="password")
        push_btn    = gr.Button("🚀 Push")
        push_status = gr.Textbox(label="Push Status", interactive=False)

    upload_btn.click(upload_csv,     inputs=file_input,              outputs=[df_table, status])
    save_btn.click(  save_changes,   inputs=df_table,               outputs=status)
    download_btn.click(download_csv, outputs=download_out)
    push_btn.click(  push_to_hub,    inputs=[repo_input, token_input], outputs=push_status)

app.launch()