Suzana commited on
Commit
c33f9b4
·
verified ·
1 Parent(s): 045377e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -107
app.py CHANGED
@@ -1,150 +1,97 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import matplotlib.pyplot as plt
 
4
  from pathlib import Path
5
  from huggingface_hub import HfApi, Repository
6
 
7
- # Matplotlib styling
8
- plt.rcParams.update({"font.family":"sans-serif","font.size":10})
9
-
10
- # Global DataFrame
11
  df = pd.DataFrame()
12
 
13
  def upload_csv(file):
14
  global df
15
  df = pd.read_csv(file.name)
16
- if not {"text","label"}.issubset(df.columns):
17
- return (
18
- None,
19
- "❌ CSV must contain 'text' and 'label' columns.",
20
- gr.update(visible=False), # Save
21
- gr.update(visible=False), # Download CSV
22
- gr.update(visible=False), # Visualize
23
- gr.update(visible=False), # Push accordion
24
- )
25
  df["label"] = df["label"].fillna("")
26
  return (
27
- df[["text","label"]],
28
- "✅ Uploaded! You can now annotate and use the buttons below.",
29
- gr.update(visible=True), # Save
30
- gr.update(visible=True), # Download CSV
31
- gr.update(visible=True), # Visualize
32
- gr.update(visible=True), # Push accordion
33
  )
34
 
35
- def save_changes(table):
36
  global df
37
- df = pd.DataFrame(table, columns=["text","label"])
38
  return "💾 Changes saved."
39
 
40
  def download_csv():
41
  global df
42
- path = "annotated_data.csv"
43
- df.to_csv(path, index=False)
44
- return path
45
-
46
- def make_figure():
47
- counts = df["label"].value_counts().sort_values(ascending=False)
48
- labels, values = list(counts.index), list(counts.values)
49
- fig, (ax1, ax2) = plt.subplots(
50
- ncols=2,
51
- gridspec_kw={"width_ratios":[1,2]},
52
- figsize=(8, max(2, len(labels)*0.4)),
53
- tight_layout=True
54
- )
55
- ax1.axis("off")
56
- tbl = ax1.table(
57
- cellText=[[l,v] for l,v in zip(labels,values)],
58
- colLabels=["Label","Count"],
59
- loc="center"
60
- )
61
- tbl.auto_set_font_size(False); tbl.set_fontsize(10); tbl.scale(1,1.2)
62
- ax2.barh(labels, values, color="#222222")
63
- ax2.invert_yaxis(); ax2.set_xlabel("Count")
64
- return fig
65
-
66
- def visualize_and_download():
67
- fig = make_figure()
68
- png_path = "label_distribution.png"
69
- fig.savefig(png_path, dpi=150, bbox_inches="tight")
70
- return fig, png_path
71
-
72
- def push_to_hub(repo_name, hf_token):
73
  global df
74
  try:
75
  api = HfApi()
76
- api.create_repo(repo_id=repo_name, token=hf_token,
77
- repo_type="dataset", exist_ok=True)
78
- local_dir = Path(f"./{repo_name.replace('/','_')}")
 
 
 
 
 
79
  if local_dir.exists():
80
- for f in local_dir.iterdir(): f.unlink()
 
81
  local_dir.rmdir()
 
82
  repo = Repository(
83
  local_dir=str(local_dir),
84
  clone_from=repo_name,
85
- repo_type="dataset",
86
  use_auth_token=hf_token
87
  )
88
- df.to_csv(local_dir/"data.csv", index=False)
89
- repo.push_to_hub(commit_message="📑 Updated annotated data")
90
- return f"🚀 Pushed to datasets/{repo_name}"
 
 
 
 
91
  except Exception as e:
92
  return f"❌ Push failed: {e}"
93
 
94
- with gr.Blocks() as app:
95
  gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
96
- gr.Markdown("Upload a `.csv` (columns: **text**, **label**), then annotate, export, visualize, or push.")
97
 
98
- # Step 1: Upload
99
  with gr.Row():
100
- csv_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
101
  upload_btn = gr.Button("Upload")
102
 
103
- # Table + status
104
- table = gr.Dataframe(headers=["text","label"], interactive=True, visible=False)
 
 
 
 
105
  status = gr.Textbox(label="Status", interactive=False)
106
 
107
- # Step 2: Actions (hidden initially)
108
- with gr.Row(visible=False) as action_row:
109
  save_btn = gr.Button("💾 Save")
110
  download_btn = gr.Button("⬇️ Download CSV")
111
- visualize_btn= gr.Button("📊 Visualize")
112
-
113
- download_csv_out = gr.File(label="📥 Downloaded CSV", interactive=False)
114
- chart_plot = gr.Plot(label="Label Distribution", visible=False)
115
- download_chart_out = gr.File(label="📥 Downloaded Chart", interactive=False)
116
-
117
- # Push controls
118
- push_acc = gr.Accordion("📦 Push to Hugging Face Hub", open=False, visible=False)
119
- with push_acc:
120
- repo_in = gr.Textbox(label="Repo (username/dataset)")
121
- token_in = gr.Textbox(label="🔑 HF Token", type="password")
122
- push_btn = gr.Button("🚀 Push")
123
- push_out = gr.Textbox(label="Push Status", interactive=False)
124
-
125
- # Bind events
126
- upload_btn.click(
127
- upload_csv,
128
- inputs=csv_input,
129
- outputs=[table, status, save_btn, download_btn, visualize_btn, push_acc]
130
- )
131
- save_btn.click(
132
- save_changes,
133
- inputs=table,
134
- outputs=status
135
- )
136
- download_btn.click(
137
- download_csv,
138
- outputs=download_csv_out
139
- )
140
- visualize_btn.click(
141
- visualize_and_download,
142
- outputs=[chart_plot, download_chart_out]
143
- )
144
- push_btn.click(
145
- push_to_hub,
146
- inputs=[repo_in, token_in],
147
- outputs=push_out
148
- )
149
 
150
  app.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import io
4
+ import os
5
  from pathlib import Path
6
  from huggingface_hub import HfApi, Repository
7
 
 
 
 
 
8
  df = pd.DataFrame()
9
 
10
  def upload_csv(file):
11
  global df
12
  df = pd.read_csv(file.name)
13
+ if "text" not in df.columns or "label" not in df.columns:
14
+ return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
 
 
 
 
 
 
 
15
  df["label"] = df["label"].fillna("")
16
  return (
17
+ gr.update(value=df[["text","label"]], visible=True),
18
+ "✅ File uploaded — you can now edit labels."
 
 
 
 
19
  )
20
 
21
+ def save_changes(edited_table):
22
  global df
23
+ df = pd.DataFrame(edited_table, columns=["text","label"])
24
  return "💾 Changes saved."
25
 
26
  def download_csv():
27
  global df
28
+ out_path = "annotated_data.csv"
29
+ df.to_csv(out_path, index=False)
30
+ return out_path
31
+
32
+ def push_to_hub(repo_name: str, hf_token: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  global df
34
  try:
35
  api = HfApi()
36
+ api.create_repo(
37
+ repo_id=repo_name,
38
+ token=hf_token,
39
+ repo_type="dataset",
40
+ exist_ok=True
41
+ )
42
+
43
+ local_dir = Path(f"./{repo_name.replace('/', '_')}")
44
  if local_dir.exists():
45
+ for child in local_dir.iterdir():
46
+ child.unlink()
47
  local_dir.rmdir()
48
+
49
  repo = Repository(
50
  local_dir=str(local_dir),
51
  clone_from=repo_name,
52
+ repo_type="dataset", # <-- important fix!
53
  use_auth_token=hf_token
54
  )
55
+
56
+ csv_path = local_dir / "data.csv"
57
+ df.to_csv(csv_path, index=False)
58
+
59
+ repo.push_to_hub(commit_message="📑 Update annotated data")
60
+ return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"
61
+
62
  except Exception as e:
63
  return f"❌ Push failed: {e}"
64
 
65
+ with gr.Blocks(theme=gr.themes.Default()) as app:
66
  gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
67
+ gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.")
68
 
 
69
  with gr.Row():
70
+ file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
71
  upload_btn = gr.Button("Upload")
72
 
73
+ df_table = gr.Dataframe(
74
+ headers=["text","label"],
75
+ label="📝 Editable Table",
76
+ interactive=True,
77
+ visible=False
78
+ )
79
  status = gr.Textbox(label="Status", interactive=False)
80
 
81
+ with gr.Row():
 
82
  save_btn = gr.Button("💾 Save")
83
  download_btn = gr.Button("⬇️ Download CSV")
84
+ download_out = gr.File(label="📥 Downloaded File")
85
+
86
+ with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
87
+ repo_input = gr.Textbox(label="Repo (username/dataset-name)")
88
+ token_input = gr.Textbox(label="HF Token", type="password")
89
+ push_btn = gr.Button("🚀 Push")
90
+ push_status = gr.Textbox(label="Push Status", interactive=False)
91
+
92
+ upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status])
93
+ save_btn.click( save_changes, inputs=df_table, outputs=status)
94
+ download_btn.click(download_csv, outputs=download_out)
95
+ push_btn.click( push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  app.launch()