Suzana commited on
Commit
c91426b
·
verified ·
1 Parent(s): 9e6c3bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -54
app.py CHANGED
@@ -6,6 +6,12 @@ from pathlib import Path
6
  from huggingface_hub import HfApi, Repository
7
  import matplotlib.pyplot as plt
8
 
 
 
 
 
 
 
9
  # Global DataFrame
10
  df = pd.DataFrame()
11
 
@@ -13,23 +19,34 @@ def upload_csv(file):
13
  global df
14
  df = pd.read_csv(file.name)
15
  if "text" not in df.columns or "label" not in df.columns:
16
- return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns."
 
 
 
 
 
 
 
17
  df["label"] = df["label"].fillna("")
18
  return (
19
  gr.update(value=df[["text","label"]], visible=True),
20
- "✅ File uploaded — you can now edit labels."
 
 
 
 
21
  )
22
 
23
- def save_changes(edited_table):
24
  global df
25
- df = pd.DataFrame(edited_table, columns=["text","label"])
26
  return "💾 Changes saved."
27
 
28
  def download_csv():
29
  global df
30
- out_path = "annotated_data.csv"
31
- df.to_csv(out_path, index=False)
32
- return out_path
33
 
34
  def create_distribution_figure(df_input):
35
  counts = df_input["label"].value_counts().sort_values(ascending=False)
@@ -37,44 +54,50 @@ def create_distribution_figure(df_input):
37
  values = counts.values.tolist()
38
 
39
  fig, (ax_table, ax_bar) = plt.subplots(
40
- nrows=1, ncols=2,
41
- gridspec_kw={"width_ratios": [1, 2]},
42
- figsize=(8, max(2, len(labels) * 0.3))
 
43
  )
44
 
45
  # Table
46
  ax_table.axis("off")
47
  table_data = [[lab, cnt] for lab, cnt in zip(labels, values)]
48
- tbl = ax_table.table(cellText=table_data, colLabels=["Label","Count"], loc="center")
 
 
 
 
 
49
  tbl.auto_set_font_size(False)
50
  tbl.set_fontsize(10)
51
- tbl.scale(1, 1.5)
52
 
53
  # Bar chart
54
- ax_bar.barh(labels, values)
55
  ax_bar.invert_yaxis()
56
  ax_bar.set_xlabel("Count")
57
  ax_bar.set_ylabel("")
58
 
59
- plt.tight_layout()
60
  return fig
61
 
62
  def visualize_and_download_chart():
63
  global df
64
  fig = create_distribution_figure(df)
65
- chart_path = "label_distribution.png"
66
- fig.savefig(chart_path, dpi=150)
67
- return fig, chart_path
68
 
69
- def push_to_hub(repo_name: str, hf_token: str) -> str:
70
  global df
71
  try:
72
  api = HfApi()
73
- api.create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
 
74
 
75
- local_dir = Path(f"./{repo_name.replace('/', '_')}")
76
  if local_dir.exists():
77
- for child in local_dir.iterdir(): child.unlink()
78
  local_dir.rmdir()
79
 
80
  repo = Repository(
@@ -83,9 +106,7 @@ def push_to_hub(repo_name: str, hf_token: str) -> str:
83
  repo_type="dataset",
84
  use_auth_token=hf_token
85
  )
86
-
87
- csv_path = local_dir / "data.csv"
88
- df.to_csv(csv_path, index=False)
89
  repo.push_to_hub(commit_message="📑 Update annotated data")
90
  return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"
91
  except Exception as e:
@@ -93,37 +114,45 @@ def push_to_hub(repo_name: str, hf_token: str) -> str:
93
 
94
  with gr.Blocks(theme=gr.themes.Default()) as app:
95
  gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
96
- gr.Markdown("Upload a `.csv` with **text** + **label** columns, annotate in-place, then export, visualize, or publish.")
97
-
98
- with gr.Row():
99
- file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
100
- upload_btn = gr.Button("Upload")
101
-
102
- df_table = gr.Dataframe(headers=["text","label"], label="📝 Editable Table",
103
- interactive=True, visible=False)
104
- status = gr.Textbox(label="Status", interactive=False)
105
-
106
- with gr.Row():
107
- save_btn = gr.Button("💾 Save")
108
- download_btn = gr.Button("⬇️ Download CSV")
109
- download_out = gr.File(label="📥 Downloaded File")
110
-
111
  with gr.Row():
112
- visualize_btn = gr.Button("📊 Visualize Distribution")
113
- chart_plot = gr.Plot(label="Label Distribution")
114
- download_chart = gr.File(label="📥 Download Chart")
115
-
116
- with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
117
- repo_input = gr.Textbox(label="Repo (username/dataset-name)")
118
- token_input = gr.Textbox(label="🔑 HF Token", type="password")
119
- push_btn = gr.Button("🚀 Push")
120
- push_status = gr.Textbox(label="Push Status", interactive=False)
121
-
122
- # Bind events
123
- upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  save_btn.click(save_changes, inputs=df_table, outputs=status)
125
- download_btn.click(download_csv, outputs=download_out)
126
- visualize_btn.click(visualize_and_download_chart, outputs=[chart_plot, download_chart])
127
- push_btn.click(push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
 
 
 
 
128
 
129
  app.launch()
 
6
  from huggingface_hub import HfApi, Repository
7
  import matplotlib.pyplot as plt
8
 
9
+ # Set a clean, sans-serif default font
10
+ plt.rcParams.update({
11
+ "font.family": "sans-serif",
12
+ "font.size": 10,
13
+ })
14
+
15
  # Global DataFrame
16
  df = pd.DataFrame()
17
 
 
19
  global df
20
  df = pd.read_csv(file.name)
21
  if "text" not in df.columns or "label" not in df.columns:
22
+ return (
23
+ gr.update(visible=False), # hide table
24
+ "❌ CSV must contain `text` and `label` columns.",
25
+ gr.update(visible=False), # hide Save
26
+ gr.update(visible=False), # hide Download CSV
27
+ gr.update(visible=False), # hide Visualize
28
+ gr.update(visible=False), # hide Push Accordion
29
+ )
30
  df["label"] = df["label"].fillna("")
31
  return (
32
  gr.update(value=df[["text","label"]], visible=True),
33
+ "✅ File uploaded — you can now annotate.",
34
+ gr.update(visible=True),
35
+ gr.update(visible=True),
36
+ gr.update(visible=True),
37
+ gr.update(visible=True),
38
  )
39
 
40
+ def save_changes(edited):
41
  global df
42
+ df = pd.DataFrame(edited, columns=["text","label"])
43
  return "💾 Changes saved."
44
 
45
  def download_csv():
46
  global df
47
+ path = "annotated_data.csv"
48
+ df.to_csv(path, index=False)
49
+ return path
50
 
51
  def create_distribution_figure(df_input):
52
  counts = df_input["label"].value_counts().sort_values(ascending=False)
 
54
  values = counts.values.tolist()
55
 
56
  fig, (ax_table, ax_bar) = plt.subplots(
57
+ ncols=2,
58
+ gridspec_kw={"width_ratios": [1,2]},
59
+ figsize=(8, max(2, len(labels)*0.4)),
60
+ tight_layout=True
61
  )
62
 
63
  # Table
64
  ax_table.axis("off")
65
  table_data = [[lab, cnt] for lab, cnt in zip(labels, values)]
66
+ tbl = ax_table.table(
67
+ cellText=table_data,
68
+ colLabels=["Label", "Count"],
69
+ cellLoc="center",
70
+ loc="center"
71
+ )
72
  tbl.auto_set_font_size(False)
73
  tbl.set_fontsize(10)
74
+ tbl.scale(1, 1.2)
75
 
76
  # Bar chart
77
+ ax_bar.barh(labels, values, color="#222222")
78
  ax_bar.invert_yaxis()
79
  ax_bar.set_xlabel("Count")
80
  ax_bar.set_ylabel("")
81
 
 
82
  return fig
83
 
84
  def visualize_and_download_chart():
85
  global df
86
  fig = create_distribution_figure(df)
87
+ out_path = "label_distribution.png"
88
+ fig.savefig(out_path, dpi=150, bbox_inches="tight")
89
+ return fig, out_path
90
 
91
+ def push_to_hub(repo_name, hf_token):
92
  global df
93
  try:
94
  api = HfApi()
95
+ api.create_repo(repo_id=repo_name, token=hf_token,
96
+ repo_type="dataset", exist_ok=True)
97
 
98
+ local_dir = Path(f"./{repo_name.replace('/','_')}")
99
  if local_dir.exists():
100
+ for f in local_dir.iterdir(): f.unlink()
101
  local_dir.rmdir()
102
 
103
  repo = Repository(
 
106
  repo_type="dataset",
107
  use_auth_token=hf_token
108
  )
109
+ df.to_csv(local_dir/"data.csv", index=False)
 
 
110
  repo.push_to_hub(commit_message="📑 Update annotated data")
111
  return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}"
112
  except Exception as e:
 
114
 
115
  with gr.Blocks(theme=gr.themes.Default()) as app:
116
  gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
117
+ gr.Markdown("**Step 1:** Upload a `.csv` with **text** + **label** columns.")
118
+
119
+ # Step 1: Upload only
 
 
 
 
 
 
 
 
 
 
 
 
120
  with gr.Row():
121
+ upload_file = gr.File(label="📁 Upload CSV", file_types=[".csv"])
122
+ upload_btn = gr.Button("Upload")
123
+
124
+ # Hidden until upload
125
+ df_table = gr.Dataframe(headers=["text","label"], visible=False, interactive=True)
126
+ status = gr.Textbox(label="Status")
127
+ save_btn = gr.Button("💾 Save", visible=False)
128
+ download_btn = gr.Button("⬇️ Download CSV", visible=False)
129
+ download_csv_out = gr.File(label="📥 Download CSV", visible=False)
130
+
131
+ visualize_btn = gr.Button("📊 Visualize Distribution", visible=False)
132
+ chart_plot = gr.Plot(label="Label Distribution", visible=False)
133
+ download_chart= gr.File(label="📥 Download Chart", visible=False)
134
+
135
+ # Push accordion hidden
136
+ with gr.Accordion("📦 Push to Hugging Face Hub", open=False, visible=False) as push_acc:
137
+ repo_in = gr.Textbox(label="Repo (username/dataset-name)")
138
+ token_in = gr.Textbox(label="🔑 HF Token", type="password")
139
+ push_btn = gr.Button("🚀 Push")
140
+ push_status = gr.Textbox(label="Push Status")
141
+
142
+ # Bind actions
143
+ upload_btn.click(
144
+ upload_csv,
145
+ inputs=upload_file,
146
+ outputs=[df_table, status,
147
+ save_btn, download_btn, visualize_btn, push_acc]
148
+ )
149
  save_btn.click(save_changes, inputs=df_table, outputs=status)
150
+ download_btn.click(download_csv, outputs=download_csv_out)
151
+ visualize_btn.click(visualize_and_download_chart,
152
+ outputs=[chart_plot, download_chart])
153
+ push_btn.click(push_to_hub, inputs=[repo_in, token_in], outputs=push_status)
154
+
155
+ # Step 2 instruction
156
+ gr.Markdown("**Step 2:** Edit labels, then Save, Visualize or Publish.")
157
 
158
  app.launch()