|
import requests
|
|
import json
|
|
import os
|
|
import os
|
|
from dotenv import load_dotenv
|
|
|
|
load_dotenv()
|
|
API_KEY = os.getenv("hf_token")
|
|
|
|
|
|
def generate_schema(user_prompt):
|
|
""" Generates a synthetic dataset schema using Hugging Face API. """
|
|
|
|
system_prompt = """
|
|
You are an expert data scientist designing synthetic datasets.
|
|
For any given dataset description, generate:
|
|
- Column names
|
|
- Data types (string, int, float, date)
|
|
- Approximate row count
|
|
|
|
Output in **pure JSON** format like:
|
|
{
|
|
"columns": ["PatientID", "Age", "Gender", "Diagnosis"],
|
|
"types": ["int", "int", "string", "string"],
|
|
"size": 500
|
|
}
|
|
"""
|
|
|
|
payload = {
|
|
"inputs": system_prompt + "\n\nUser request: " + user_prompt,
|
|
"options": {"wait_for_model": True}
|
|
}
|
|
|
|
response = requests.post(HF_MODEL_URL, headers=HEADERS, json=payload)
|
|
|
|
if response.status_code == 200:
|
|
try:
|
|
output = response.json()[0]['generated_text']
|
|
schema = json.loads(output.strip())
|
|
return schema
|
|
except json.JSONDecodeError:
|
|
return {"error": "Invalid JSON output from model. Try again."}
|
|
else:
|
|
return {"error": f"API request failed. Status Code: {response.status_code}"} |