File size: 2,061 Bytes
5915e4e
4bd99cd
 
 
 
5915e4e
4bd99cd
 
2b5c692
 
 
 
 
 
4bd99cd
2b5c692
 
 
 
 
 
 
 
4bd99cd
2b5c692
 
 
 
 
 
 
 
4bd99cd
2b5c692
 
 
 
 
 
 
 
 
4bd99cd
2b5c692
 
 
 
 
 
 
 
4bd99cd
 
 
 
2b5c692
4bd99cd
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
import os
import pandas as pd
from datasets import load_dataset
from kaggle.api.kaggle_api_extended import KaggleApi

# Kaggle authentication
api = KaggleApi()
try:
    api.authenticate()
    print("Kaggle authentication successful.")
except Exception as e:
    print(f"Kaggle authentication failed: {e}")
    exit(1)

# Download dataset from Kaggle (replace with your dataset path)
kaggle_dataset_path = "<dataset-path>"  # Replace with actual dataset path
try:
    os.system(f"kaggle datasets download -d {kaggle_dataset_path}")
    print(f"Dataset {kaggle_dataset_path} downloaded successfully.")
except Exception as e:
    print(f"Failed to download Kaggle dataset: {e}")
    exit(1)

# Extract the dataset (replace with actual dataset name)
dataset_name = "<dataset-name>"  # Replace with actual dataset name
try:
    os.system(f"unzip ./{dataset_name}.zip -d ./data/")
    print(f"Dataset {dataset_name} extracted successfully.")
except Exception as e:
    print(f"Failed to extract dataset: {e}")
    exit(1)

# Load Hugging Face dataset
hf_dataset_name = 'dataset_name'  # Replace with actual Hugging Face dataset name
try:
    hf_dataset = load_dataset(hf_dataset_name)
    hf_df = pd.DataFrame(hf_dataset['train'])
    print(f"Hugging Face dataset {hf_dataset_name} loaded successfully.")
except Exception as e:
    print(f"Failed to load Hugging Face dataset: {e}")
    exit(1)

# Load Kaggle dataset (replace with actual path)
kaggle_df_path = './data/kaggle_dataset.csv'  # Replace with the actual path to Kaggle dataset
try:
    kaggle_df = pd.read_csv(kaggle_df_path)
    print(f"Kaggle dataset loaded from {kaggle_df_path}.")
except Exception as e:
    print(f"Failed to load Kaggle dataset: {e}")
    exit(1)

# Merge datasets
merged_df = pd.concat([hf_df, kaggle_df], ignore_index=True)

# Function to display merged data
def display_data():
    return merged_df.head()

# Create Gradio interface to display data
iface = gr.Interface(fn=display_data, inputs=[], outputs="dataframe")

# Launch the Gradio app
iface.launch()