infinitymatter commited on
Commit
925f18e
·
verified ·
1 Parent(s): c753563

Update generate_schema.py

Browse files
Files changed (1) hide show
  1. generate_schema.py +44 -43
generate_schema.py CHANGED
@@ -1,44 +1,45 @@
1
- import requests
2
- import json
3
- import os
4
- import os
5
- from dotenv import load_dotenv
6
-
7
- load_dotenv()
8
- API_KEY = os.getenv("hf_token")
9
-
10
-
11
- def generate_schema(user_prompt):
12
- """ Generates a synthetic dataset schema using Hugging Face API. """
13
-
14
- system_prompt = """
15
- You are an expert data scientist designing synthetic datasets.
16
- For any given dataset description, generate:
17
- - Column names
18
- - Data types (string, int, float, date)
19
- - Approximate row count
20
-
21
- Output in **pure JSON** format like:
22
- {
23
- "columns": ["PatientID", "Age", "Gender", "Diagnosis"],
24
- "types": ["int", "int", "string", "string"],
25
- "size": 500
26
- }
27
- """
28
-
29
- payload = {
30
- "inputs": system_prompt + "\n\nUser request: " + user_prompt,
31
- "options": {"wait_for_model": True}
32
- }
33
-
34
- response = requests.post(HF_MODEL_URL, headers=HEADERS, json=payload)
35
-
36
- if response.status_code == 200:
37
- try:
38
- output = response.json()[0]['generated_text']
39
- schema = json.loads(output.strip()) # Convert to JSON
40
- return schema
41
- except json.JSONDecodeError:
42
- return {"error": "Invalid JSON output from model. Try again."}
43
- else:
 
44
  return {"error": f"API request failed. Status Code: {response.status_code}"}
 
1
+ import requests
2
+ import json
3
+ import os
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import streamlit as st
7
+
8
+ API_KEY = st.secrets["hf_token"]
9
+
10
+
11
+
12
+ def generate_schema(user_prompt):
13
+ """ Generates a synthetic dataset schema using Hugging Face API. """
14
+
15
+ system_prompt = """
16
+ You are an expert data scientist designing synthetic datasets.
17
+ For any given dataset description, generate:
18
+ - Column names
19
+ - Data types (string, int, float, date)
20
+ - Approximate row count
21
+
22
+ Output in **pure JSON** format like:
23
+ {
24
+ "columns": ["PatientID", "Age", "Gender", "Diagnosis"],
25
+ "types": ["int", "int", "string", "string"],
26
+ "size": 500
27
+ }
28
+ """
29
+
30
+ payload = {
31
+ "inputs": system_prompt + "\n\nUser request: " + user_prompt,
32
+ "options": {"wait_for_model": True}
33
+ }
34
+
35
+ response = requests.post(HF_MODEL_URL, headers=HEADERS, json=payload)
36
+
37
+ if response.status_code == 200:
38
+ try:
39
+ output = response.json()[0]['generated_text']
40
+ schema = json.loads(output.strip()) # Convert to JSON
41
+ return schema
42
+ except json.JSONDecodeError:
43
+ return {"error": "Invalid JSON output from model. Try again."}
44
+ else:
45
  return {"error": f"API request failed. Status Code: {response.status_code}"}