Suzana commited on
Commit
1d6c7cd
·
verified ·
1 Parent(s): 8277138

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -37
app.py CHANGED
@@ -4,80 +4,76 @@ import io
4
  import os
5
  from huggingface_hub import HfApi, Repository
6
 
7
- # Global variable to store the DataFrame
8
  df = pd.DataFrame()
9
 
10
- # Upload CSV
11
  def upload_csv(file):
12
  global df
13
  df = pd.read_csv(file.name)
14
  if "text" not in df.columns or "label" not in df.columns:
15
- return None, "❌ CSV must contain 'text' and 'label' columns."
16
  df["label"] = df["label"].fillna("")
17
- return df[["text", "label"]], "✅ File uploaded. You can now annotate."
 
 
 
 
18
 
19
- # Save changes from the editable table
20
- def save_changes(edited_data):
21
  global df
22
- df = pd.DataFrame(edited_data, columns=["text", "label"])
23
- return " Changes saved."
24
 
25
- # Download the updated CSV
26
  def download_csv():
27
  global df
28
  csv_bytes = df.to_csv(index=False).encode("utf-8")
29
  return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv")
30
 
31
- # Push to Hugging Face Hub
32
  def push_to_hub(repo_name, hf_token):
33
  global df
34
  repo_url = f"https://huggingface.co/datasets/{repo_name}"
35
  local_path = f"./{repo_name.replace('/', '_')}"
36
-
37
  if os.path.exists(local_path):
38
  os.system(f"rm -rf {local_path}")
39
-
40
  HfApi().create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
41
  repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token)
42
  df.to_csv(f"{local_path}/data.csv", index=False)
43
  repo.push_to_hub()
44
-
45
- return f"🚀 Successfully pushed to: {repo_url}"
46
 
47
- # Gradio app
48
- with gr.Blocks(title="CSV Labeling App") as app:
49
- gr.Markdown("# 🏷️ Label it! Text Annotation Tool")
50
- gr.Markdown("Upload a `.csv` with `text` and `label` columns. You can annotate, save, and publish your data.")
51
 
52
  with gr.Row():
53
  file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
54
- upload_button = gr.Button("Upload")
55
 
56
- dataframe = gr.Dataframe(
57
- headers=["text", "label"],
58
- label="📝 Annotate Labels Below",
59
  interactive=True,
60
  visible=False,
61
- row_count=10,
62
- col_count=(2, "fixed")
63
  )
64
-
65
  status = gr.Textbox(label="Status", interactive=False)
66
 
67
  with gr.Row():
68
- save_btn = gr.Button("💾 Save")
69
- download_btn = gr.Button("⬇️ Download CSV")
70
- download_file = gr.File(label="📥 Download", interactive=False)
71
 
72
- with gr.Row():
73
- repo_input = gr.Textbox(label="📦 HF Dataset Repo (e.g. username/my-dataset)")
74
- token_input = gr.Textbox(label="🔑 HF Token", type="password")
75
- push_btn = gr.Button("🚀 Push to Hub")
76
  push_status = gr.Textbox(label="Push Status", interactive=False)
77
 
78
- upload_button.click(fn=upload_csv, inputs=file_input, outputs=[dataframe, status])
79
- save_btn.click(fn=save_changes, inputs=dataframe, outputs=status)
80
- download_btn.click(fn=download_csv, outputs=download_file)
81
- push_btn.click(fn=push_to_hub, inputs=[repo_input, token_input], outputs=push_status)
 
 
82
 
83
- app.launch()
 
 
4
  import os
5
  from huggingface_hub import HfApi, Repository
6
 
7
+ # Global DataFrame
8
  df = pd.DataFrame()
9
 
 
10
  def upload_csv(file):
11
  global df
12
  df = pd.read_csv(file.name)
13
  if "text" not in df.columns or "label" not in df.columns:
14
+ return gr.update(visible=False), "❌ CSV must have 'text' and 'label' columns."
15
  df["label"] = df["label"].fillna("")
16
+ # Show the dataframe and status
17
+ return (
18
+ gr.update(value=df[["text","label"]], visible=True),
19
+ "✅ File uploaded — you can now edit the labels below."
20
+ )
21
 
22
+ def save_changes(edited_table):
 
23
  global df
24
+ df = pd.DataFrame(edited_table, columns=["text","label"])
25
+ return "💾 Changes saved."
26
 
 
27
  def download_csv():
28
  global df
29
  csv_bytes = df.to_csv(index=False).encode("utf-8")
30
  return gr.File.update(value=io.BytesIO(csv_bytes), filename="annotated_data.csv")
31
 
 
32
  def push_to_hub(repo_name, hf_token):
33
  global df
34
  repo_url = f"https://huggingface.co/datasets/{repo_name}"
35
  local_path = f"./{repo_name.replace('/', '_')}"
 
36
  if os.path.exists(local_path):
37
  os.system(f"rm -rf {local_path}")
 
38
  HfApi().create_repo(repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True)
39
  repo = Repository(local_dir=local_path, clone_from=repo_url, token=hf_token)
40
  df.to_csv(f"{local_path}/data.csv", index=False)
41
  repo.push_to_hub()
42
+ return f"🚀 Data pushed to {repo_url}"
 
43
 
44
+ with gr.Blocks(theme=gr.themes.Default()) as app:
45
+ gr.Markdown("## 🏷️ Label It! Text Annotation Tool")
46
+ gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.")
 
47
 
48
  with gr.Row():
49
  file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"])
50
+ upload_btn = gr.Button("Upload")
51
 
52
+ df_table = gr.Dataframe(
53
+ headers=["text","label"],
 
54
  interactive=True,
55
  visible=False,
56
+ label="📝 Editable Table"
 
57
  )
 
58
  status = gr.Textbox(label="Status", interactive=False)
59
 
60
  with gr.Row():
61
+ save_btn = gr.Button("💾 Save")
62
+ download_btn= gr.Button("⬇️ Download CSV")
63
+ download_out= gr.File(label="Download")
64
 
65
+ with gr.Accordion("📦 Push to Hugging Face Hub", open=False):
66
+ repo_input = gr.Textbox(label="Repo (username/dataset-name)")
67
+ token_input = gr.Textbox(label="HF Token", type="password")
68
+ push_btn = gr.Button("🚀 Push")
69
  push_status = gr.Textbox(label="Push Status", interactive=False)
70
 
71
+ # ------------------------
72
+ upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status])
73
+ save_btn.click( save_changes, inputs=df_table, outputs=status)
74
+ download_btn.click(download_csv, outputs=download_out)
75
+ push_btn.click( push_to_hub, inputs=[repo_input,token_input], outputs=push_status)
76
+ # ------------------------
77
 
78
+ # Launch the app
79
+ app.launch()