Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
# -*- coding: utf-8 -*-
|
3 |
|
4 |
"""
|
5 |
-
*NetCom β WooCommerce CSV Processor*
|
6 |
Robust edition β catches and logs every recoverable error so one failure never
|
7 |
brings the whole pipeline down. Only small, surgical changes were made.
|
8 |
"""
|
@@ -81,7 +81,6 @@ async def process_text_batch_async(client, prompts):
|
|
81 |
else:
|
82 |
tasks.append(asyncio.create_task(_call_openai(client, p)))
|
83 |
|
84 |
-
# Wait for *all* tasks, collecting exceptions too
|
85 |
for prompt, task in zip([p for p in prompts if p not in results], tasks):
|
86 |
try:
|
87 |
res = await task
|
@@ -104,7 +103,7 @@ async def process_text_with_ai_async(texts, instruction):
|
|
104 |
|
105 |
# ββββββββββββββββββββββββββββββ MAIN TRANSFORM ββββββββββββββββββββββββββββββ
|
106 |
def process_woocommerce_data_in_memory(upload):
|
107 |
-
"""Convert NetCom β Woo CSV; every stage guarded."""
|
108 |
try:
|
109 |
# brand β logo mapping
|
110 |
brand_logo = {
|
@@ -127,30 +126,69 @@ def process_woocommerce_data_in_memory(upload):
|
|
127 |
)
|
128 |
|
129 |
# ---------------- I/O ----------------
|
|
|
130 |
try:
|
131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
132 |
except Exception as e:
|
133 |
-
_log(e, "
|
134 |
-
|
|
|
135 |
df.columns = df.columns.str.strip()
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
# ---------------- ASYNC AI ----------------
|
138 |
loop = asyncio.new_event_loop()
|
139 |
asyncio.set_event_loop(loop)
|
140 |
|
|
|
|
|
|
|
|
|
141 |
try:
|
142 |
res = loop.run_until_complete(
|
143 |
asyncio.gather(
|
144 |
process_text_with_ai_async(
|
145 |
-
df[
|
146 |
"Create a concise 250-character summary of this course description:",
|
147 |
),
|
148 |
process_text_with_ai_async(
|
149 |
-
df[
|
150 |
"Condense this description to maximum 750 characters in paragraph format, with clean formatting:",
|
151 |
),
|
152 |
process_text_with_ai_async(
|
153 |
-
df[
|
154 |
"Format these objectives into a bullet list format with clean formatting. Start each bullet with 'β’ ':",
|
155 |
),
|
156 |
process_text_with_ai_async(
|
@@ -161,7 +199,7 @@ def process_woocommerce_data_in_memory(upload):
|
|
161 |
)
|
162 |
except Exception as e:
|
163 |
_log(e, "async AI gather failed")
|
164 |
-
res = [[""] * len(df)] * 4
|
165 |
finally:
|
166 |
loop.close()
|
167 |
|
@@ -169,7 +207,7 @@ def process_woocommerce_data_in_memory(upload):
|
|
169 |
|
170 |
# prerequisites handled synchronously (tiny)
|
171 |
prereq_out = []
|
172 |
-
for p in df[
|
173 |
if not p.strip():
|
174 |
prereq_out.append(default_prereq)
|
175 |
else:
|
@@ -196,7 +234,6 @@ def process_woocommerce_data_in_memory(upload):
|
|
196 |
except Exception as e:
|
197 |
_log(e, "adding AI columns")
|
198 |
|
199 |
-
# β¦ (rest identical to original script β only guarded sections changed) β¦
|
200 |
# 2. aggregate date/time
|
201 |
df = df.sort_values(["Course ID", "Course Start Date"])
|
202 |
date_agg = (
|
@@ -220,7 +257,6 @@ def process_woocommerce_data_in_memory(upload):
|
|
220 |
parent = df.drop_duplicates(subset=["Course ID"]).merge(date_agg).merge(time_agg)
|
221 |
woo_parent_df = pd.DataFrame(
|
222 |
{
|
223 |
-
# unchanged fields ...
|
224 |
"Type": "variable",
|
225 |
"SKU": parent["Course ID"],
|
226 |
"Name": parent["Course Name"],
|
@@ -344,7 +380,6 @@ def process_woocommerce_data_in_memory(upload):
|
|
344 |
return buf
|
345 |
except Exception as e:
|
346 |
_log(e, "fatal transformation error")
|
347 |
-
# Return a tiny CSV explaining the failure instead of crashing
|
348 |
err_buf = BytesIO()
|
349 |
pd.DataFrame({"error": [str(e)]}).to_csv(err_buf, index=False)
|
350 |
err_buf.seek(0)
|
@@ -365,14 +400,14 @@ def process_file(file):
|
|
365 |
|
366 |
interface = gr.Interface(
|
367 |
fn=process_file,
|
368 |
-
inputs=gr.File(label="Upload NetCom
|
369 |
outputs=gr.File(label="Download WooCommerce CSV"),
|
370 |
-
title="NetCom β WooCommerce CSV Processor",
|
371 |
-
description="Upload a NetCom Reseller Schedule CSV to generate a WooCommerce-ready CSV.",
|
372 |
analytics_enabled=False,
|
373 |
)
|
374 |
|
375 |
if __name__ == "__main__": # run
|
376 |
if not os.getenv("OPENAI_API_KEY"):
|
377 |
print("[WARN] OPENAI_API_KEY not set; AI steps will error out.")
|
378 |
-
interface.launch()
|
|
|
2 |
# -*- coding: utf-8 -*-
|
3 |
|
4 |
"""
|
5 |
+
*NetCom β WooCommerce CSV/Excel Processor*
|
6 |
Robust edition β catches and logs every recoverable error so one failure never
|
7 |
brings the whole pipeline down. Only small, surgical changes were made.
|
8 |
"""
|
|
|
81 |
else:
|
82 |
tasks.append(asyncio.create_task(_call_openai(client, p)))
|
83 |
|
|
|
84 |
for prompt, task in zip([p for p in prompts if p not in results], tasks):
|
85 |
try:
|
86 |
res = await task
|
|
|
103 |
|
104 |
# ββββββββββββββββββββββββββββββ MAIN TRANSFORM ββββββββββββββββββββββββββββββ
|
105 |
def process_woocommerce_data_in_memory(upload):
|
106 |
+
"""Convert NetCom β Woo CSV/XLSX; every stage guarded."""
|
107 |
try:
|
108 |
# brand β logo mapping
|
109 |
brand_logo = {
|
|
|
126 |
)
|
127 |
|
128 |
# ---------------- I/O ----------------
|
129 |
+
ext = Path(upload.name).suffix.lower()
|
130 |
try:
|
131 |
+
if ext in {".xlsx", ".xls"}:
|
132 |
+
try:
|
133 |
+
df = pd.read_excel(upload.name, sheet_name="Active Schedules")
|
134 |
+
except Exception as e:
|
135 |
+
_log(e, "Excel read failed (falling back to first sheet)")
|
136 |
+
df = pd.read_excel(upload.name, sheet_name=0)
|
137 |
+
else: # CSV
|
138 |
+
try:
|
139 |
+
df = pd.read_csv(upload.name, encoding="latin1")
|
140 |
+
except Exception as e:
|
141 |
+
_log(e, "CSV read failed (trying utf-8)")
|
142 |
+
df = pd.read_csv(upload.name, encoding="utf-8", errors="ignore")
|
143 |
except Exception as e:
|
144 |
+
_log(e, "file read totally failed")
|
145 |
+
raise
|
146 |
+
|
147 |
df.columns = df.columns.str.strip()
|
148 |
|
149 |
+
# --------- column harmonisation (new vs old formats) ----------
|
150 |
+
rename_map = {
|
151 |
+
"Decription": "Description",
|
152 |
+
"description": "Description",
|
153 |
+
"Objectives": "Objectives",
|
154 |
+
"objectives": "Objectives",
|
155 |
+
"RequiredPrerequisite": "Required Prerequisite",
|
156 |
+
"Required Pre-requisite": "Required Prerequisite",
|
157 |
+
"RequiredPre-requisite": "Required Prerequisite",
|
158 |
+
}
|
159 |
+
df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns}, inplace=True)
|
160 |
+
|
161 |
+
# duration if missing
|
162 |
+
if "Duration" not in df.columns:
|
163 |
+
try:
|
164 |
+
df["Duration"] = (
|
165 |
+
pd.to_datetime(df["Course End Date"]) - pd.to_datetime(df["Course Start Date"])
|
166 |
+
).dt.days.add(1)
|
167 |
+
except Exception as e:
|
168 |
+
_log(e, "duration calc failed")
|
169 |
+
df["Duration"] = ""
|
170 |
+
|
171 |
# ---------------- ASYNC AI ----------------
|
172 |
loop = asyncio.new_event_loop()
|
173 |
asyncio.set_event_loop(loop)
|
174 |
|
175 |
+
col_desc = "Description"
|
176 |
+
col_obj = "Objectives"
|
177 |
+
col_prereq = "Required Prerequisite"
|
178 |
+
|
179 |
try:
|
180 |
res = loop.run_until_complete(
|
181 |
asyncio.gather(
|
182 |
process_text_with_ai_async(
|
183 |
+
df[col_desc].fillna("").tolist(),
|
184 |
"Create a concise 250-character summary of this course description:",
|
185 |
),
|
186 |
process_text_with_ai_async(
|
187 |
+
df[col_desc].fillna("").tolist(),
|
188 |
"Condense this description to maximum 750 characters in paragraph format, with clean formatting:",
|
189 |
),
|
190 |
process_text_with_ai_async(
|
191 |
+
df[col_obj].fillna("").tolist(),
|
192 |
"Format these objectives into a bullet list format with clean formatting. Start each bullet with 'β’ ':",
|
193 |
),
|
194 |
process_text_with_ai_async(
|
|
|
199 |
)
|
200 |
except Exception as e:
|
201 |
_log(e, "async AI gather failed")
|
202 |
+
res = [[""] * len(df)] * 4
|
203 |
finally:
|
204 |
loop.close()
|
205 |
|
|
|
207 |
|
208 |
# prerequisites handled synchronously (tiny)
|
209 |
prereq_out = []
|
210 |
+
for p in df[col_prereq].fillna("").tolist():
|
211 |
if not p.strip():
|
212 |
prereq_out.append(default_prereq)
|
213 |
else:
|
|
|
234 |
except Exception as e:
|
235 |
_log(e, "adding AI columns")
|
236 |
|
|
|
237 |
# 2. aggregate date/time
|
238 |
df = df.sort_values(["Course ID", "Course Start Date"])
|
239 |
date_agg = (
|
|
|
257 |
parent = df.drop_duplicates(subset=["Course ID"]).merge(date_agg).merge(time_agg)
|
258 |
woo_parent_df = pd.DataFrame(
|
259 |
{
|
|
|
260 |
"Type": "variable",
|
261 |
"SKU": parent["Course ID"],
|
262 |
"Name": parent["Course Name"],
|
|
|
380 |
return buf
|
381 |
except Exception as e:
|
382 |
_log(e, "fatal transformation error")
|
|
|
383 |
err_buf = BytesIO()
|
384 |
pd.DataFrame({"error": [str(e)]}).to_csv(err_buf, index=False)
|
385 |
err_buf.seek(0)
|
|
|
400 |
|
401 |
interface = gr.Interface(
|
402 |
fn=process_file,
|
403 |
+
inputs=gr.File(label="Upload NetCom Schedule", file_types=[".csv", ".xlsx", ".xls"]),
|
404 |
outputs=gr.File(label="Download WooCommerce CSV"),
|
405 |
+
title="NetCom β WooCommerce CSV/Excel Processor",
|
406 |
+
description="Upload a NetCom Reseller Schedule CSV or XLSX to generate a WooCommerce-ready CSV.",
|
407 |
analytics_enabled=False,
|
408 |
)
|
409 |
|
410 |
if __name__ == "__main__": # run
|
411 |
if not os.getenv("OPENAI_API_KEY"):
|
412 |
print("[WARN] OPENAI_API_KEY not set; AI steps will error out.")
|
413 |
+
interface.launch()
|