File size: 1,280 Bytes
f0edb7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import requests
import json
import os
import os
from dotenv import load_dotenv

load_dotenv()
API_KEY = os.getenv("hf_token")


def generate_schema(user_prompt):
    """ Generates a synthetic dataset schema using Hugging Face API. """

    system_prompt = """

You are an expert data scientist designing synthetic datasets.

For any given dataset description, generate:

- Column names

- Data types (string, int, float, date)

- Approximate row count



Output in **pure JSON** format like:

{

    "columns": ["PatientID", "Age", "Gender", "Diagnosis"],

    "types": ["int", "int", "string", "string"],

    "size": 500

}

"""

    payload = {
        "inputs": system_prompt + "\n\nUser request: " + user_prompt,
        "options": {"wait_for_model": True}
    }

    response = requests.post(HF_MODEL_URL, headers=HEADERS, json=payload)

    if response.status_code == 200:
        try:
            output = response.json()[0]['generated_text']
            schema = json.loads(output.strip())  # Convert to JSON
            return schema
        except json.JSONDecodeError:
            return {"error": "Invalid JSON output from model. Try again."}
    else:
        return {"error": f"API request failed. Status Code: {response.status_code}"}