Suzana commited on
Commit
2dccd10
·
verified ·
1 Parent(s): c91426b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -71
app.py CHANGED
@@ -1,12 +1,9 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import io
4
- import os
5
  from pathlib import Path
6
  from huggingface_hub import HfApi, Repository
7
- import matplotlib.pyplot as plt
8
 
9
- # Set a clean, sans-serif default font
10
  plt.rcParams.update({
11
  "font.family": "sans-serif",
12
  "font.size": 10,
@@ -20,26 +17,26 @@ def upload_csv(file):
20
  df = pd.read_csv(file.name)
21
  if "text" not in df.columns or "label" not in df.columns:
22
  return (
23
- gr.update(visible=False), # hide table
24
- "❌ CSV must contain `text` and `label` columns.",
25
- gr.update(visible=False), # hide Save
26
- gr.update(visible=False), # hide Download CSV
27
- gr.update(visible=False), # hide Visualize
28
- gr.update(visible=False), # hide Push Accordion
29
  )
30
  df["label"] = df["label"].fillna("")
31
  return (
32
- gr.update(value=df[["text","label"]], visible=True),
33
- "✅ File uploaded — you can now annotate.",
34
  gr.update(visible=True),
35
  gr.update(visible=True),
36
  gr.update(visible=True),
37
  gr.update(visible=True),
38
  )
39
 
40
- def save_changes(edited):
41
  global df
42
- df = pd.DataFrame(edited, columns=["text","label"])
43
  return "💾 Changes saved."
44
 
45
  def download_csv():
@@ -48,10 +45,10 @@ def download_csv():
48
  df.to_csv(path, index=False)
49
  return path
50
 
51
- def create_distribution_figure(df_input):
52
- counts = df_input["label"].value_counts().sort_values(ascending=False)
53
- labels = counts.index.tolist()
54
- values = counts.values.tolist()
55
 
56
  fig, (ax_table, ax_bar) = plt.subplots(
57
  ncols=2,
@@ -59,31 +56,18 @@ def create_distribution_figure(df_input):
59
  figsize=(8, max(2, len(labels)*0.4)),
60
  tight_layout=True
61
  )
62
-
63
  # Table
64
  ax_table.axis("off")
65
- table_data = [[lab, cnt] for lab, cnt in zip(labels, values)]
66
- tbl = ax_table.table(
67
- cellText=table_data,
68
- colLabels=["Label", "Count"],
69
- cellLoc="center",
70
- loc="center"
71
- )
72
- tbl.auto_set_font_size(False)
73
- tbl.set_fontsize(10)
74
- tbl.scale(1, 1.2)
75
-
76
  # Bar chart
77
- ax_bar.barh(labels, values, color="#222222")
78
- ax_bar.invert_yaxis()
79
- ax_bar.set_xlabel("Count")
80
- ax_bar.set_ylabel("")
81
-
82
  return fig
83
 
84
  def visualize_and_download_chart():
85
- global df
86
- fig = create_distribution_figure(df)
87
  out_path = "label_distribution.png"
88
  fig.savefig(out_path, dpi=150, bbox_inches="tight")
89
  return fig, out_path
@@ -94,12 +78,10 @@ def push_to_hub(repo_name, hf_token):
94
  api = HfApi()
95
  api.create_repo(repo_id=repo_name, token=hf_token,
96
  repo_type="dataset", exist_ok=True)
97
-
98
  local_dir = Path(f"./{repo_name.replace('/','_')}")
99
  if local_dir.exists():
100
  for f in local_dir.iterdir(): f.unlink()
101
  local_dir.rmdir()
102
-
103
  repo = Repository(
104
  local_dir=str(local_dir),
105
  clone_from=repo_name,
@@ -113,46 +95,47 @@ def push_to_hub(repo_name, hf_token):
113
  return f"❌ Push failed: {e}"
114
 
115
  with gr.Blocks(theme=gr.themes.Default()) as app:
116
- gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
117
- gr.Markdown("**Step 1:** Upload a `.csv` with **text** + **label** columns.")
 
118
 
119
- # Step 1: Upload only
120
  with gr.Row():
121
- upload_file = gr.File(label="📁 Upload CSV", file_types=[".csv"])
122
- upload_btn = gr.Button("Upload")
123
-
124
- # Hidden until upload
125
- df_table = gr.Dataframe(headers=["text","label"], visible=False, interactive=True)
126
- status = gr.Textbox(label="Status")
127
- save_btn = gr.Button("💾 Save", visible=False)
128
- download_btn = gr.Button("⬇️ Download CSV", visible=False)
129
- download_csv_out = gr.File(label="📥 Download CSV", visible=False)
130
-
131
- visualize_btn = gr.Button("📊 Visualize Distribution", visible=False)
132
- chart_plot = gr.Plot(label="Label Distribution", visible=False)
133
- download_chart= gr.File(label="📥 Download Chart", visible=False)
134
-
135
- # Push accordion hidden
136
- with gr.Accordion("📦 Push to Hugging Face Hub", open=False, visible=False) as push_acc:
137
- repo_in = gr.Textbox(label="Repo (username/dataset-name)")
138
- token_in = gr.Textbox(label="🔑 HF Token", type="password")
139
- push_btn = gr.Button("🚀 Push")
140
- push_status = gr.Textbox(label="Push Status")
141
-
142
- # Bind actions
 
 
 
143
  upload_btn.click(
144
  upload_csv,
145
- inputs=upload_file,
146
- outputs=[df_table, status,
147
  save_btn, download_btn, visualize_btn, push_acc]
148
  )
149
- save_btn.click(save_changes, inputs=df_table, outputs=status)
150
  download_btn.click(download_csv, outputs=download_csv_out)
151
  visualize_btn.click(visualize_and_download_chart,
152
- outputs=[chart_plot, download_chart])
153
  push_btn.click(push_to_hub, inputs=[repo_in, token_in], outputs=push_status)
154
 
155
- # Step 2 instruction
156
- gr.Markdown("**Step 2:** Edit labels, then Save, Visualize or Publish.")
157
-
158
  app.launch()
 
1
  import gradio as gr
2
  import pandas as pd
3
+ import matplotlib.pyplot as plt
 
4
  from pathlib import Path
5
  from huggingface_hub import HfApi, Repository
 
6
 
 
7
  plt.rcParams.update({
8
  "font.family": "sans-serif",
9
  "font.size": 10,
 
17
  df = pd.read_csv(file.name)
18
  if "text" not in df.columns or "label" not in df.columns:
19
  return (
20
+ None, # table
21
+ "❌ CSV must contain 'text' and 'label' columns.",
22
+ gr.update(visible=False), # save
23
+ gr.update(visible=False), # download CSV
24
+ gr.update(visible=False), # visualize
25
+ gr.update(visible=False), # push accordion
26
  )
27
  df["label"] = df["label"].fillna("")
28
  return (
29
+ df[["text","label"]],
30
+ "✅ File uploaded — you can now annotate and use the buttons below.",
31
  gr.update(visible=True),
32
  gr.update(visible=True),
33
  gr.update(visible=True),
34
  gr.update(visible=True),
35
  )
36
 
37
+ def save_changes(table):
38
  global df
39
+ df = pd.DataFrame(table, columns=["text","label"])
40
  return "💾 Changes saved."
41
 
42
  def download_csv():
 
45
  df.to_csv(path, index=False)
46
  return path
47
 
48
+ def create_distribution_figure():
49
+ global df
50
+ counts = df["label"].value_counts().sort_values(ascending=False)
51
+ labels, values = counts.index.tolist(), counts.values.tolist()
52
 
53
  fig, (ax_table, ax_bar) = plt.subplots(
54
  ncols=2,
 
56
  figsize=(8, max(2, len(labels)*0.4)),
57
  tight_layout=True
58
  )
 
59
  # Table
60
  ax_table.axis("off")
61
+ data = [[l,v] for l,v in zip(labels, values)]
62
+ tbl = ax_table.table(cellText=data, colLabels=["Label","Count"], loc="center")
63
+ tbl.auto_set_font_size(False); tbl.set_fontsize(10); tbl.scale(1,1.2)
 
 
 
 
 
 
 
 
64
  # Bar chart
65
+ ax_bar.barh(labels, values, color="#222")
66
+ ax_bar.invert_yaxis(); ax_bar.set_xlabel("Count")
 
 
 
67
  return fig
68
 
69
  def visualize_and_download_chart():
70
+ fig = create_distribution_figure()
 
71
  out_path = "label_distribution.png"
72
  fig.savefig(out_path, dpi=150, bbox_inches="tight")
73
  return fig, out_path
 
78
  api = HfApi()
79
  api.create_repo(repo_id=repo_name, token=hf_token,
80
  repo_type="dataset", exist_ok=True)
 
81
  local_dir = Path(f"./{repo_name.replace('/','_')}")
82
  if local_dir.exists():
83
  for f in local_dir.iterdir(): f.unlink()
84
  local_dir.rmdir()
 
85
  repo = Repository(
86
  local_dir=str(local_dir),
87
  clone_from=repo_name,
 
95
  return f"❌ Push failed: {e}"
96
 
97
  with gr.Blocks(theme=gr.themes.Default()) as app:
98
+ gr.Markdown("## 🏷️ Label It! Text Annotation Tool\n"
99
+ "Upload a `.csv` (with **text** + **label** columns), "
100
+ "then annotate, export, visualize, or publish.")
101
 
102
+ # Step 1: Upload
103
  with gr.Row():
104
+ file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
105
+ upload_btn = gr.Button("Upload")
106
+
107
+ # Editable table
108
+ table = gr.Dataframe(headers=["text","label"], interactive=True, visible=False)
109
+ status = gr.Textbox(label="Status", interactive=False)
110
+
111
+ # Step 2 buttons (hidden initially)
112
+ with gr.Row(visible=False) as action_row:
113
+ save_btn = gr.Button("💾 Save")
114
+ download_btn = gr.Button("⬇️ Download CSV")
115
+ visualize_btn= gr.Button("📊 Visualize Distribution")
116
+ download_csv_out = gr.File(label="📥 Download CSV")
117
+ chart_plot = gr.Plot(label="Label Distribution")
118
+ download_chart_out = gr.File(label="📥 Download Chart")
119
+
120
+ # Push accordion
121
+ push_acc = gr.Accordion("📦 Push to Hugging Face Hub", open=False, visible=False)
122
+ with push_acc:
123
+ repo_in = gr.Textbox(label="Repo (username/dataset-name)")
124
+ token_in = gr.Textbox(label="🔑 HF Token", type="password")
125
+ push_btn = gr.Button("🚀 Push")
126
+ push_status = gr.Textbox(label="Push Status", interactive=False)
127
+
128
+ # Event bindings
129
  upload_btn.click(
130
  upload_csv,
131
+ inputs=file_input,
132
+ outputs=[table, status,
133
  save_btn, download_btn, visualize_btn, push_acc]
134
  )
135
+ save_btn.click(save_changes, inputs=table, outputs=status)
136
  download_btn.click(download_csv, outputs=download_csv_out)
137
  visualize_btn.click(visualize_and_download_chart,
138
+ outputs=[chart_plot, download_chart_out])
139
  push_btn.click(push_to_hub, inputs=[repo_in, token_in], outputs=push_status)
140
 
 
 
 
141
  app.launch()