Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
*NetCom β WooCommerce CSV/Excel Processor* | |
Robust edition β catches and logs every recoverable error so one failure never | |
brings the whole pipeline down. Only small, surgical changes were made. | |
""" | |
import gradio as gr | |
import pandas as pd | |
import tempfile | |
import os, sys, json, re, hashlib, asyncio, aiohttp, traceback | |
from io import BytesIO | |
from pathlib import Path | |
from functools import lru_cache | |
import openai | |
import gradio_client.utils | |
# ββββββββββββββββββββββββββββββ HELPERS ββββββββββββββββββββββββββββββ | |
def _log(err: Exception, msg: str = ""): | |
"""Log errors without stopping execution.""" | |
print(f"[WARN] {msg}: {err}", file=sys.stderr) | |
traceback.print_exception(err) | |
# Patch: tolerate bad JSON-schemas produced by some OpenAI tools | |
_original_json_schema_to_python_type = gradio_client.utils._json_schema_to_python_type | |
def _fixed_json_schema_to_python_type(schema, defs=None): | |
try: | |
if isinstance(schema, bool): | |
return "any" | |
return _original_json_schema_to_python_type(schema, defs) | |
except Exception as e: # last-chance fallback | |
_log(e, "json_schema_to_python_type failed") | |
return "any" | |
gradio_client.utils._json_schema_to_python_type = _fixed_json_schema_to_python_type | |
# ββββββββββββββββββββββββββββββ DISK CACHE ββββββββββββββββββββββββββββββ | |
CACHE_DIR = Path("ai_response_cache"); CACHE_DIR.mkdir(exist_ok=True) | |
def _cache_path(prompt): # deterministic path | |
return CACHE_DIR / f"{hashlib.md5(prompt.encode()).hexdigest()}.json" | |
def get_cached_response(prompt): | |
try: | |
p = _cache_path(prompt) | |
if p.exists(): | |
return json.loads(p.read_text(encoding="utf-8"))["response"] | |
except Exception as e: | |
_log(e, "reading cache") | |
return None | |
def cache_response(prompt, response): | |
try: | |
_cache_path(prompt).write_text( | |
json.dumps({"prompt": prompt, "response": response}), encoding="utf-8" | |
) | |
except Exception as e: | |
_log(e, "writing cache") | |
# ββββββββββββββββββββββββββββββ OPENAI ββββββββββββββββββββββββββββββ | |
async def _call_openai(client, prompt): | |
"""Single protected OpenAI call.""" | |
try: | |
rsp = await client.chat.completions.create( | |
model="gpt-4o-mini", | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0, | |
) | |
return rsp.choices[0].message.content | |
except Exception as e: | |
_log(e, "OpenAI error") | |
return f"Error: {e}" | |
async def process_text_batch_async(client, prompts): | |
"""Return results in original order, resilient to any error.""" | |
results, tasks = {}, [] | |
for p in prompts: | |
cached = get_cached_response(p) | |
if cached is not None: | |
results[p] = cached | |
else: | |
tasks.append(asyncio.create_task(_call_openai(client, p))) | |
for prompt, task in zip([p for p in prompts if p not in results], tasks): | |
try: | |
res = await task | |
except Exception as e: | |
_log(e, "async OpenAI task") | |
res = f"Error: {e}" | |
cache_response(prompt, res) | |
results[prompt] = res | |
return [results[p] for p in prompts] | |
async def process_text_with_ai_async(texts, instruction): | |
if not texts: | |
return [] | |
client = openai.AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY")) | |
batch_size, out = 500, [] | |
for i in range(0, len(texts), batch_size): | |
prompts = [f"{instruction}\n\nText: {t}" for t in texts[i : i + batch_size]] | |
out.extend(await process_text_batch_async(client, prompts)) | |
return out | |
# ββββββββββββββββββββββββββββββ MAIN TRANSFORM ββββββββββββββββββββββββββββββ | |
def process_woocommerce_data_in_memory(upload): | |
"""Convert NetCom β Woo CSV/XLSX; every stage guarded.""" | |
try: | |
# brand β logo mapping | |
brand_logo = { | |
"Amazon Web Services": "/wp-content/uploads/2025/04/aws.png", | |
"Cisco": "/wp-content/uploads/2025/04/cisco-e1738593292198-1.webp", | |
"Microsoft": "/wp-content/uploads/2025/04/Microsoft-e1737494120985-1.png", | |
"Google Cloud": "/wp-content/uploads/2025/04/Google_Cloud.png", | |
"EC Council": "/wp-content/uploads/2025/04/Ec_Council.png", | |
"ITIL": "/wp-content/uploads/2025/04/ITIL.webp", | |
"PMI": "/wp-content/uploads/2025/04/PMI.png", | |
"Comptia": "/wp-content/uploads/2025/04/Comptia.png", | |
"Autodesk": "/wp-content/uploads/2025/04/autodesk.png", | |
"ISC2": "/wp-content/uploads/2025/04/ISC2.png", | |
"AICerts": "/wp-content/uploads/2025/04/aicerts-logo-1.png", | |
} | |
default_prereq = ( | |
"No specific prerequisites are required for this course. " | |
"Basic computer literacy and familiarity with fundamental concepts in the " | |
"subject area are recommended for the best learning experience." | |
) | |
# ---------------- I/O ---------------- | |
ext = Path(upload.name).suffix.lower() | |
try: | |
if ext in {".xlsx", ".xls"}: | |
try: | |
df = pd.read_excel(upload.name, sheet_name="Active Schedules") | |
except Exception as e: | |
_log(e, "Excel read failed (falling back to first sheet)") | |
df = pd.read_excel(upload.name, sheet_name=0) | |
else: # CSV | |
try: | |
df = pd.read_csv(upload.name, encoding="latin1") | |
except Exception as e: | |
_log(e, "CSV read failed (trying utf-8)") | |
df = pd.read_csv(upload.name, encoding="utf-8", errors="ignore") | |
except Exception as e: | |
_log(e, "file read totally failed") | |
raise | |
df.columns = df.columns.str.strip() | |
# --------- column harmonisation (new vs old formats) ---------- | |
rename_map = { | |
"Decription": "Description", | |
"description": "Description", | |
"Objectives": "Objectives", | |
"objectives": "Objectives", | |
"RequiredPrerequisite": "Required Prerequisite", | |
"Required Pre-requisite": "Required Prerequisite", | |
"RequiredPre-requisite": "Required Prerequisite", | |
} | |
df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True) | |
# duration if missing | |
if "Duration" not in df.columns: | |
try: | |
df["Duration"] = ( | |
pd.to_datetime(df["Course End Date"]) - pd.to_datetime(df["Course Start Date"]) | |
).dt.days.add(1) | |
except Exception as e: | |
_log(e, "duration calc failed") | |
df["Duration"] = "" | |
# ---------------- ASYNC AI ---------------- | |
loop = asyncio.new_event_loop() | |
asyncio.set_event_loop(loop) | |
col_desc = "Description" | |
col_obj = "Objectives" | |
col_prereq = "Required Prerequisite" | |
try: | |
res = loop.run_until_complete( | |
asyncio.gather( | |
process_text_with_ai_async( | |
df[col_desc].fillna("").tolist(), | |
"Create a concise 250-character summary of this course description:", | |
), | |
process_text_with_ai_async( | |
df[col_desc].fillna("").tolist(), | |
"Condense this description to maximum 750 characters in paragraph format, with clean formatting:", | |
), | |
process_text_with_ai_async( | |
df[col_obj].fillna("").tolist(), | |
"Format these objectives into a bullet list format with clean formatting. Start each bullet with 'β’ ':", | |
), | |
process_text_with_ai_async( | |
df["Outline"].fillna("").tolist(), | |
"Format this agenda into a bullet list format with clean formatting. Start each bullet with 'β’ ':", | |
), | |
) | |
) | |
except Exception as e: | |
_log(e, "async AI gather failed") | |
res = [[""] * len(df)] * 4 | |
finally: | |
loop.close() | |
short_desc, long_desc, objectives, agendas = res | |
# prerequisites handled synchronously (tiny) | |
prereq_out = [] | |
for p in df[col_prereq].fillna("").tolist(): | |
if not p.strip(): | |
prereq_out.append(default_prereq) | |
else: | |
try: | |
prereq_out.append( | |
asyncio.run( | |
process_text_with_ai_async( | |
[p], | |
"Format these prerequisites into a bullet list format with clean formatting. Start each bullet with 'β’ ':", | |
) | |
)[0] | |
) | |
except Exception as e: | |
_log(e, "prereq AI failed") | |
prereq_out.append(default_prereq) | |
# ---------------- DATAFRAME BUILD ---------------- | |
try: | |
df["Short_Description"] = short_desc | |
df["Condensed_Description"] = long_desc | |
df["Formatted_Objectives"] = objectives | |
df["Formatted_Prerequisites"] = prereq_out | |
df["Formatted_Agenda"] = agendas | |
except Exception as e: | |
_log(e, "adding AI columns") | |
# 2. aggregate date/time | |
df = df.sort_values(["Course ID", "Course Start Date"]) | |
date_agg = ( | |
df.groupby("Course ID")["Course Start Date"] | |
.apply(lambda x: ",".join(x.astype(str).unique())) | |
.reset_index(name="Aggregated_Dates") | |
) | |
time_agg = ( | |
df.groupby("Course ID") | |
.apply( | |
lambda d: ",".join( | |
f"{s}-{e} {tz}" | |
for s, e, tz in zip( | |
d["Course Start Time"], d["Course End Time"], d["Time Zone"] | |
) | |
) | |
) | |
.reset_index(name="Aggregated_Times") | |
) | |
parent = df.drop_duplicates(subset=["Course ID"]).merge(date_agg).merge(time_agg) | |
woo_parent_df = pd.DataFrame( | |
{ | |
"Type": "variable", | |
"SKU": parent["Course ID"], | |
"Name": parent["Course Name"], | |
"Published": 1, | |
"Visibility in catalog": "visible", | |
"Short description": parent["Short_Description"], | |
"Description": parent["Condensed_Description"], | |
"Tax status": "taxable", | |
"In stock?": 1, | |
"Regular price": parent["SRP Pricing"].replace("[\\$,]", "", regex=True), | |
"Categories": "courses", | |
"Images": parent["Vendor"].map(brand_logo).fillna(""), | |
"Parent": "", | |
"Brands": parent["Vendor"], | |
"Attribute 1 name": "Date", | |
"Attribute 1 value(s)": parent["Aggregated_Dates"], | |
"Attribute 1 visible": "visible", | |
"Attribute 1 global": 1, | |
"Attribute 2 name": "Location", | |
"Attribute 2 value(s)": "Virtual", | |
"Attribute 2 visible": "visible", | |
"Attribute 2 global": 1, | |
"Attribute 3 name": "Time", | |
"Attribute 3 value(s)": parent["Aggregated_Times"], | |
"Attribute 3 visible": "visible", | |
"Attribute 3 global": 1, | |
"Meta: outline": parent["Formatted_Agenda"], | |
"Meta: days": parent["Duration"], | |
"Meta: location": "Virtual", | |
"Meta: overview": parent["Target Audience"], | |
"Meta: objectives": parent["Formatted_Objectives"], | |
"Meta: prerequisites": parent["Formatted_Prerequisites"], | |
"Meta: agenda": parent["Formatted_Agenda"], | |
} | |
) | |
woo_child_df = pd.DataFrame( | |
{ | |
"Type": "variation, virtual", | |
"SKU": df["Course SID"], | |
"Name": df["Course Name"], | |
"Published": 1, | |
"Visibility in catalog": "visible", | |
"Short description": df["Short_Description"], | |
"Description": df["Condensed_Description"], | |
"Tax status": "taxable", | |
"In stock?": 1, | |
"Regular price": df["SRP Pricing"].replace("[\\$,]", "", regex=True), | |
"Categories": "courses", | |
"Images": df["Vendor"].map(brand_logo).fillna(""), | |
"Parent": df["Course ID"], | |
"Brands": df["Vendor"], | |
"Attribute 1 name": "Date", | |
"Attribute 1 value(s)": df["Course Start Date"], | |
"Attribute 1 visible": "visible", | |
"Attribute 1 global": 1, | |
"Attribute 2 name": "Location", | |
"Attribute 2 value(s)": "Virtual", | |
"Attribute 2 visible": "visible", | |
"Attribute 2 global": 1, | |
"Attribute 3 name": "Time", | |
"Attribute 3 value(s)": df.apply( | |
lambda r: f"{r['Course Start Time']}-{r['Course End Time']} {r['Time Zone']}", | |
axis=1, | |
), | |
"Attribute 3 visible": "visible", | |
"Attribute 3 global": 1, | |
"Meta: outline": df["Formatted_Agenda"], | |
"Meta: days": df["Duration"], | |
"Meta: location": "Virtual", | |
"Meta: overview": df["Target Audience"], | |
"Meta: objectives": df["Formatted_Objectives"], | |
"Meta: prerequisites": df["Formatted_Prerequisites"], | |
"Meta: agenda": df["Formatted_Agenda"], | |
} | |
) | |
final_cols = [ | |
"Type", | |
"SKU", | |
"Name", | |
"Published", | |
"Visibility in catalog", | |
"Short description", | |
"Description", | |
"Tax status", | |
"In stock?", | |
"Regular price", | |
"Categories", | |
"Images", | |
"Parent", | |
"Brands", | |
"Attribute 1 name", | |
"Attribute 1 value(s)", | |
"Attribute 1 visible", | |
"Attribute 1 global", | |
"Attribute 2 name", | |
"Attribute 2 value(s)", | |
"Attribute 2 visible", | |
"Attribute 2 global", | |
"Attribute 3 name", | |
"Attribute 3 value(s)", | |
"Attribute 3 visible", | |
"Attribute 3 global", | |
"Meta: outline", | |
"Meta: days", | |
"Meta: location", | |
"Meta: overview", | |
"Meta: objectives", | |
"Meta: prerequisites", | |
"Meta: agenda", | |
] | |
woo_final_df = pd.concat([woo_parent_df, woo_child_df], ignore_index=True)[ | |
final_cols | |
] | |
buf = BytesIO() | |
woo_final_df.to_csv(buf, index=False, encoding="utf-8-sig") | |
buf.seek(0) | |
return buf | |
except Exception as e: | |
_log(e, "fatal transformation error") | |
err_buf = BytesIO() | |
pd.DataFrame({"error": [str(e)]}).to_csv(err_buf, index=False) | |
err_buf.seek(0) | |
return err_buf | |
# ββββββββββββββββββββββββββββββ GRADIO BINDINGS ββββββββββββββββββββββββββββββ | |
def process_file(file): | |
try: | |
out_io = process_woocommerce_data_in_memory(file) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as tmp: | |
tmp.write(out_io.getvalue()) | |
return tmp.name | |
except Exception as e: | |
_log(e, "top-level process_file") | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".txt") as tmp: | |
tmp.write(f"Processing failed:\n{e}".encode()) | |
return tmp.name | |
interface = gr.Interface( | |
fn=process_file, | |
inputs=gr.File(label="Upload NetCom Schedule", file_types=[".csv", ".xlsx", ".xls"]), | |
outputs=gr.File(label="Download WooCommerce CSV"), | |
title="NetCom β WooCommerce CSV/Excel Processor", | |
description="Upload a NetCom Reseller Schedule CSV or XLSX to generate a WooCommerce-ready CSV.", | |
analytics_enabled=False, | |
) | |
if __name__ == "__main__": # run | |
if not os.getenv("OPENAI_API_KEY"): | |
print("[WARN] OPENAI_API_KEY not set; AI steps will error out.") | |
interface.launch() | |