Canstralian's picture
Update app.py
2b5c692 verified
import gradio as gr
import os
import pandas as pd
from datasets import load_dataset
from kaggle.api.kaggle_api_extended import KaggleApi
# Kaggle authentication
api = KaggleApi()
try:
api.authenticate()
print("Kaggle authentication successful.")
except Exception as e:
print(f"Kaggle authentication failed: {e}")
exit(1)
# Download dataset from Kaggle (replace with your dataset path)
kaggle_dataset_path = "<dataset-path>" # Replace with actual dataset path
try:
os.system(f"kaggle datasets download -d {kaggle_dataset_path}")
print(f"Dataset {kaggle_dataset_path} downloaded successfully.")
except Exception as e:
print(f"Failed to download Kaggle dataset: {e}")
exit(1)
# Extract the dataset (replace with actual dataset name)
dataset_name = "<dataset-name>" # Replace with actual dataset name
try:
os.system(f"unzip ./{dataset_name}.zip -d ./data/")
print(f"Dataset {dataset_name} extracted successfully.")
except Exception as e:
print(f"Failed to extract dataset: {e}")
exit(1)
# Load Hugging Face dataset
hf_dataset_name = 'dataset_name' # Replace with actual Hugging Face dataset name
try:
hf_dataset = load_dataset(hf_dataset_name)
hf_df = pd.DataFrame(hf_dataset['train'])
print(f"Hugging Face dataset {hf_dataset_name} loaded successfully.")
except Exception as e:
print(f"Failed to load Hugging Face dataset: {e}")
exit(1)
# Load Kaggle dataset (replace with actual path)
kaggle_df_path = './data/kaggle_dataset.csv' # Replace with the actual path to Kaggle dataset
try:
kaggle_df = pd.read_csv(kaggle_df_path)
print(f"Kaggle dataset loaded from {kaggle_df_path}.")
except Exception as e:
print(f"Failed to load Kaggle dataset: {e}")
exit(1)
# Merge datasets
merged_df = pd.concat([hf_df, kaggle_df], ignore_index=True)
# Function to display merged data
def display_data():
return merged_df.head()
# Create Gradio interface to display data
iface = gr.Interface(fn=display_data, inputs=[], outputs="dataframe")
# Launch the Gradio app
iface.launch()