timeki commited on
Commit
2de91ee
·
1 Parent(s): 7a891a7

correct logs formating for dataviewer

Browse files
Files changed (2) hide show
  1. climateqa/constants.py +1 -1
  2. climateqa/logging.py +16 -10
climateqa/constants.py CHANGED
@@ -97,7 +97,7 @@ DOCUMENT_METADATA_DEFAULT_VALUES = {
97
  "content": "",
98
  "reranking_score": 0.0,
99
  "query_used_for_retrieval": "",
100
- "sources_used": [],
101
  "question_used": "",
102
  "index_used": ""
103
  }
 
97
  "content": "",
98
  "reranking_score": 0.0,
99
  "query_used_for_retrieval": "",
100
+ "sources_used": [""],
101
  "question_used": "",
102
  "index_used": ""
103
  }
climateqa/logging.py CHANGED
@@ -7,7 +7,7 @@ import csv
7
  import pandas as pd
8
  import io
9
  from typing import TypedDict, List
10
- from constants import DOCUMENT_METADATA_DEFAULT_VALUES
11
 
12
 
13
  def serialize_docs(docs:list)->list:
@@ -25,18 +25,24 @@ def serialize_docs(docs:list)->list:
25
  """
26
  new_docs = []
27
  for doc in docs:
28
- new_doc = {}
29
- new_doc["page_content"] = doc.page_content
30
- new_doc["metadata"] = {}
 
 
31
 
32
- # Define default empty values by field type
33
-
34
-
35
- # Copy each metadata field, using default if missing
36
  for field, default_value in DOCUMENT_METADATA_DEFAULT_VALUES.items():
37
- new_doc["metadata"][field] = doc.metadata.get(field, default_value)
38
-
 
 
 
 
39
  new_docs.append(new_doc)
 
 
 
40
  return new_docs
41
 
42
  ## AZURE LOGGING - DEPRECATED
 
7
  import pandas as pd
8
  import io
9
  from typing import TypedDict, List
10
+ from climateqa.constants import DOCUMENT_METADATA_DEFAULT_VALUES
11
 
12
 
13
  def serialize_docs(docs:list)->list:
 
25
  """
26
  new_docs = []
27
  for doc in docs:
28
+ # Make sure we have a clean doc format
29
+ new_doc = {
30
+ "page_content": doc.get("page_content", ""),
31
+ "metadata": {}
32
+ }
33
 
34
+ # Ensure all metadata fields exist with defaults if missing
 
 
 
35
  for field, default_value in DOCUMENT_METADATA_DEFAULT_VALUES.items():
36
+ new_value = (doc.get("metadata", {}).get(field, default_value))
37
+ try:
38
+ new_doc["metadata"][field] = type(default_value)(new_value)
39
+ except:
40
+ new_doc["metadata"][field] = default_value
41
+
42
  new_docs.append(new_doc)
43
+
44
+ if new_docs == []:
45
+ new_docs = [{"page_content": "No documents found", "metadata": DOCUMENT_METADATA_DEFAULT_VALUES}]
46
  return new_docs
47
 
48
  ## AZURE LOGGING - DEPRECATED