Canstralian commited on
Commit
2b5c692
·
verified ·
1 Parent(s): 4bd99cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -11
app.py CHANGED
@@ -6,25 +6,54 @@ from kaggle.api.kaggle_api_extended import KaggleApi
6
 
7
  # Kaggle authentication
8
  api = KaggleApi()
9
- api.authenticate()
 
 
 
 
 
10
 
11
- # Download dataset from Kaggle (example dataset)
12
- os.system("kaggle datasets download -d <dataset-path>") # Replace <dataset-path>
 
 
 
 
 
 
13
 
14
- # Extract the dataset (if needed)
15
- os.system("unzip ./<dataset-name>.zip -d ./data/") # Replace <dataset-name> with actual dataset name
 
 
 
 
 
 
16
 
17
- # Load the Hugging Face dataset
18
- hf_dataset = load_dataset('dataset_name') # Replace 'dataset_name' with the name of the dataset
19
- hf_df = pd.DataFrame(hf_dataset['train']) # Convert to pandas DataFrame
 
 
 
 
 
 
20
 
21
- # Load Kaggle dataset
22
- kaggle_df = pd.read_csv('./data/kaggle_dataset.csv') # Replace with the appropriate path
 
 
 
 
 
 
23
 
24
  # Merge datasets
25
  merged_df = pd.concat([hf_df, kaggle_df], ignore_index=True)
26
 
27
- # Example function to display merged data (this can be modified based on your needs)
28
  def display_data():
29
  return merged_df.head()
30
 
 
6
 
7
  # Kaggle authentication
8
  api = KaggleApi()
9
+ try:
10
+ api.authenticate()
11
+ print("Kaggle authentication successful.")
12
+ except Exception as e:
13
+ print(f"Kaggle authentication failed: {e}")
14
+ exit(1)
15
 
16
+ # Download dataset from Kaggle (replace with your dataset path)
17
+ kaggle_dataset_path = "<dataset-path>" # Replace with actual dataset path
18
+ try:
19
+ os.system(f"kaggle datasets download -d {kaggle_dataset_path}")
20
+ print(f"Dataset {kaggle_dataset_path} downloaded successfully.")
21
+ except Exception as e:
22
+ print(f"Failed to download Kaggle dataset: {e}")
23
+ exit(1)
24
 
25
+ # Extract the dataset (replace with actual dataset name)
26
+ dataset_name = "<dataset-name>" # Replace with actual dataset name
27
+ try:
28
+ os.system(f"unzip ./{dataset_name}.zip -d ./data/")
29
+ print(f"Dataset {dataset_name} extracted successfully.")
30
+ except Exception as e:
31
+ print(f"Failed to extract dataset: {e}")
32
+ exit(1)
33
 
34
+ # Load Hugging Face dataset
35
+ hf_dataset_name = 'dataset_name' # Replace with actual Hugging Face dataset name
36
+ try:
37
+ hf_dataset = load_dataset(hf_dataset_name)
38
+ hf_df = pd.DataFrame(hf_dataset['train'])
39
+ print(f"Hugging Face dataset {hf_dataset_name} loaded successfully.")
40
+ except Exception as e:
41
+ print(f"Failed to load Hugging Face dataset: {e}")
42
+ exit(1)
43
 
44
+ # Load Kaggle dataset (replace with actual path)
45
+ kaggle_df_path = './data/kaggle_dataset.csv' # Replace with the actual path to Kaggle dataset
46
+ try:
47
+ kaggle_df = pd.read_csv(kaggle_df_path)
48
+ print(f"Kaggle dataset loaded from {kaggle_df_path}.")
49
+ except Exception as e:
50
+ print(f"Failed to load Kaggle dataset: {e}")
51
+ exit(1)
52
 
53
  # Merge datasets
54
  merged_df = pd.concat([hf_df, kaggle_df], ignore_index=True)
55
 
56
+ # Function to display merged data
57
  def display_data():
58
  return merged_df.head()
59