timeki commited on
Commit
5b1b83b
·
1 Parent(s): f9c4c84

Standardize loggin

Browse files
Files changed (1) hide show
  1. climateqa/logging.py +31 -3
climateqa/logging.py CHANGED
@@ -4,13 +4,38 @@ import json
4
  from huggingface_hub import HfApi
5
  import gradio as gr
6
  import csv
 
 
 
 
 
7
 
8
  def serialize_docs(docs:list)->list:
 
 
 
 
 
 
 
 
 
 
 
 
9
  new_docs = []
10
  for doc in docs:
11
  new_doc = {}
12
  new_doc["page_content"] = doc.page_content
13
- new_doc["metadata"] = doc.metadata
 
 
 
 
 
 
 
 
14
  new_docs.append(new_doc)
15
  return new_docs
16
 
@@ -108,7 +133,7 @@ def log_on_huggingface(log_filename, logs):
108
  return
109
 
110
  # Get repository name from environment or use default
111
- repo_id = os.getenv("HF_DATASET_REPO", "timeki/climateqa_logs")
112
 
113
  # Initialize HfApi
114
  api = HfApi(token=hf_token)
@@ -159,9 +184,12 @@ def log_interaction_to_huggingface(history, output_query, sources, docs, share_c
159
  }
160
  # Log to Hugging Face
161
  log_on_huggingface(f"chat/{timestamp}.json", logs)
 
 
 
162
  except Exception as e:
163
  print(f"Error logging to Hugging Face: {e}")
164
- error_msg = f"ClimateQ&A Error: {str(e)[:100]} - The error has been noted, try another question and if the error remains, you can contact us :)"
165
  raise gr.Error(error_msg)
166
 
167
  def log_drias_interaction_to_huggingface(query, sql_query, user_id):
 
4
  from huggingface_hub import HfApi
5
  import gradio as gr
6
  import csv
7
+ import pandas as pd
8
+ import io
9
+ from typing import TypedDict, List
10
+ from constants import DOCUMENT_METADATA_DEFAULT_VALUES
11
+
12
 
13
  def serialize_docs(docs:list)->list:
14
+ """Convert document objects to a simplified format compatible with Hugging Face datasets.
15
+
16
+ This function processes document objects by extracting their page content and metadata,
17
+ normalizing the metadata structure to ensure consistency. It applies default values
18
+ from DOCUMENT_METADATA_DEFAULT_VALUES for any missing metadata fields.
19
+
20
+ Args:
21
+ docs (list): List of document objects, each with page_content and metadata attributes
22
+
23
+ Returns:
24
+ list: List of dictionaries with standardized "page_content" and "metadata" fields
25
+ """
26
  new_docs = []
27
  for doc in docs:
28
  new_doc = {}
29
  new_doc["page_content"] = doc.page_content
30
+ new_doc["metadata"] = {}
31
+
32
+ # Define default empty values by field type
33
+
34
+
35
+ # Copy each metadata field, using default if missing
36
+ for field, default_value in DOCUMENT_METADATA_DEFAULT_VALUES.items():
37
+ new_doc["metadata"][field] = doc.metadata.get(field, default_value)
38
+
39
  new_docs.append(new_doc)
40
  return new_docs
41
 
 
133
  return
134
 
135
  # Get repository name from environment or use default
136
+ repo_id = os.getenv("HF_DATASET_REPO", "Ekimetrics/climateqa_logs")
137
 
138
  # Initialize HfApi
139
  api = HfApi(token=hf_token)
 
184
  }
185
  # Log to Hugging Face
186
  log_on_huggingface(f"chat/{timestamp}.json", logs)
187
+ print(f"Logged interaction to Hugging Face")
188
+ else:
189
+ print("Did not log to Hugging Face because GRADIO_ENV is local")
190
  except Exception as e:
191
  print(f"Error logging to Hugging Face: {e}")
192
+ error_msg = f"ClimateQ&A Error: {str(e)[:100]})"
193
  raise gr.Error(error_msg)
194
 
195
  def log_drias_interaction_to_huggingface(query, sql_query, user_id):