pingnieuk commited on
Commit
761cd5d
1 Parent(s): c37408c

read datasets csv

Browse files
Files changed (1) hide show
  1. src/utils.py +18 -1
src/utils.py CHANGED
@@ -1,5 +1,5 @@
 
1
  from huggingface_hub import snapshot_download
2
- import time
3
 
4
 
5
  def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
@@ -8,5 +8,22 @@ def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
8
  snapshot_download(repo_id=repo_id, revision=revision, local_dir=local_dir, repo_type=repo_type, max_workers=max_workers)
9
  return
10
  except Exception:
 
11
  time.sleep(60)
12
  return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
  from huggingface_hub import snapshot_download
 
3
 
4
 
5
  def my_snapshot_download(repo_id, revision, local_dir, repo_type, max_workers):
 
8
  snapshot_download(repo_id=repo_id, revision=revision, local_dir=local_dir, repo_type=repo_type, max_workers=max_workers)
9
  return
10
  except Exception:
11
+ import time
12
  time.sleep(60)
13
  return
14
+
15
+
16
+ def get_dataset_url(row):
17
+ dataset_name = row['Benchmark']
18
+ dataset_url = row['Dataset Link']
19
+ benchmark = f'<a target="_blank" href="{dataset_url}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{dataset_name}</a>'
20
+ return benchmark
21
+
22
+ def get_dataset_summary_table(file_path):
23
+ df = pd.read_csv(file_path)
24
+
25
+ df['Benchmark'] = df.apply(lambda x: get_dataset_url(x), axis=1)
26
+
27
+ df = df[['Category', 'Benchmark', 'Data Split', 'Data Size', 'Language']]
28
+
29
+ return df