oberbics commited on
Commit
47c367a
·
verified ·
1 Parent(s): 99101f6

Update data.py

Browse files
Files changed (1) hide show
  1. data.py +181 -89
data.py CHANGED
@@ -1,100 +1,192 @@
1
- import json
 
 
 
 
 
 
 
2
 
 
 
3
 
4
- def extract_leaves(item, path=None, leaves=None):
5
- """
6
- Extracts the leaves of a nested dictionary or list.
7
- """
8
- if leaves is None:
9
- leaves = []
10
- if path is None:
11
- path = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
- if isinstance(item, dict):
14
- for key, value in item.items():
15
- extract_leaves(value, path + [key], leaves)
16
- elif isinstance(item, list):
17
- for value in item:
18
- extract_leaves(value, path, leaves)
19
- else:
20
- if item != '':
21
- leaves.append((path, item))
22
- return leaves
23
-
24
- def split_document(document, window_size, overlap, tokenizer):
25
- """
26
- Splits a document into chunks of a specified window size with an overlap.
27
- """
28
- tokens = tokenizer.tokenize(document)
29
- print(f"\tLength of document: {len(tokens)} tokens")
30
-
31
- chunks = []
32
- if len(tokens) > window_size:
33
- for i in range(0, len(tokens), window_size-overlap):
34
- print(f"\t{i} to {i + len(tokens[i:i + window_size])}")
35
- chunk = tokenizer.convert_tokens_to_string(tokens[i:i + window_size])
36
- chunks.append(chunk)
37
 
38
- if i + len(tokens[i:i + window_size]) >= len(tokens):
39
- break
40
- else:
41
- chunks.append(document)
42
- print(f"\tSplit into {len(chunks)} chunks")
 
 
 
 
 
 
43
 
44
- return chunks
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
- def handle_broken_output(pred, prev):
47
- """
48
- Handles broken or empty JSON output by returning the previous prediction.
49
- """
50
  try:
51
- if all([(v in ["", []]) for v in json.loads(pred).values()]):
52
- # if empty json, return previous
53
- pred = prev
54
- except:
55
- # if broken json, return previous
56
- pred = prev
57
-
58
- return pred
59
-
60
- def clean_json_text(text):
61
- """
62
- Cleans JSON text by removing leading/trailing whitespace and escaping special characters.
63
- """
64
- text = text.strip()
65
- text = text.replace("\#", "#").replace("\&", "&")
66
- return text
67
-
68
- def sync_empty_fields(dict1, dict2):
69
- """
70
- Synchronize empty fields between two dictionaries.
 
 
 
 
 
 
 
71
 
72
- Adds empty fields to dict1 based on dict2, or removes them if they don't exist in dict2.
 
 
 
 
 
73
 
74
- Args:
75
- dict1 (dict): The dictionary to be modified.
76
- dict2 (dict): The reference dictionary with empty fields to be synced.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
- Returns:
79
- dict: The modified dict1 with synced empty fields.
80
- """
81
- if not isinstance(dict1, dict) or not isinstance(dict2, dict):
82
- return dict1
 
 
 
83
 
84
- # Traverse dict2 to add or remove empty fields in dict1
85
- for key, value in dict2.items():
86
- if isinstance(value, dict): # Handle nested dictionaries
87
- dict1[key] = sync_empty_fields(dict1.get(key, {}), value)
88
- elif value in (None, "", [], {}): # Empty field in dict2
89
- if key not in dict1:
90
- dict1[key] = value # Add empty field to dict1 if not present
91
- else:
92
- if key in dict1 and dict1[key] in (None, "", [], {}):
93
- del dict1[key] # Remove empty field from dict1 if not in dict2
94
-
95
- # Optionally, remove any extra fields in dict1 that are not in dict2
96
- keys_to_remove = [key for key in dict1 if key not in dict2]
97
- for key in keys_to_remove:
98
- del dict1[key]
99
-
100
- return dict1
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import folium
4
+ from geopy.geocoders import Nominatim
5
+ from geopy.extra.rate_limiter import RateLimiter
6
+ import tempfile
7
+ from typing import Optional, Tuple
8
+ import warnings
9
 
10
+ # Suppress warnings
11
+ warnings.filterwarnings("ignore")
12
 
13
+ # Historical Tile Providers
14
+ HISTORICAL_TILES = {
15
+ "David Rumsey (1790)": {
16
+ "url": "https://map1.davidrumsey.com/tiles/rumsey/SDSC1790/{z}/{x}/{y}.png",
17
+ "attr": "David Rumsey Map Collection",
18
+ "min_year": 1700,
19
+ "max_year": 1800
20
+ },
21
+ "David Rumsey (1860)": {
22
+ "url": "https://map1.davidrumsey.com/tiles/rumsey/SDSC1860/{z}/{x}/{y}.png",
23
+ "attr": "David Rumsey Map Collection",
24
+ "min_year": 1801,
25
+ "max_year": 1900
26
+ },
27
+ "Stamen (1915)": {
28
+ "url": "https://stamen-tiles.a.ssl.fastly.net/toner-lite/{z}/{x}/{y}.png",
29
+ "attr": "Stamen Maps",
30
+ "min_year": 1901,
31
+ "max_year": 1920
32
+ },
33
+ "OpenHistoricalMap": {
34
+ "url": "https://tile.openhistoricalmap.org/{z}/{x}/{y}.png",
35
+ "attr": "OpenHistoricalMap",
36
+ "min_year": 1700,
37
+ "max_year": 2023
38
+ }
39
+ }
40
 
41
+ class Geocoder:
42
+ def __init__(self):
43
+ self.geolocator = Nominatim(user_agent="historical_mapper", timeout=10)
44
+ self.geocode = RateLimiter(self.geolocator.geocode, min_delay_seconds=1)
45
+ self.cache = {}
46
+
47
+ def get_coords(self, location: str) -> Optional[Tuple[float, float]]:
48
+ if not location or pd.isna(location):
49
+ return None
50
+ if location in self.cache:
51
+ return self.cache[location]
52
+
53
+ try:
54
+ result = self.geocode(location)
55
+ if result:
56
+ coords = (result.latitude, result.longitude)
57
+ self.cache[location] = coords
58
+ return coords
59
+ except Exception as e:
60
+ print(f"Geocoding failed for '{location}': {str(e)}")
61
+
62
+ self.cache[location] = None
63
+ return None
 
64
 
65
+ def get_tile_layer(year: int):
66
+ """Select the most appropriate tile layer for the given year"""
67
+ for name, config in HISTORICAL_TILES.items():
68
+ if config["min_year"] <= year <= config["max_year"]:
69
+ return folium.TileLayer(
70
+ tiles=config["url"],
71
+ attr=config["attr"],
72
+ name=name,
73
+ overlay=False
74
+ )
75
+ return folium.TileLayer("OpenStreetMap")
76
 
77
+ def create_historical_map(df: pd.DataFrame, location_col: str, year: int = 1900) -> str:
78
+ geocoder = Geocoder()
79
+
80
+ # Create map with historical base layer
81
+ base_layer = get_tile_layer(year)
82
+ m = folium.Map(location=[40, -10], zoom_start=2, control_scale=True)
83
+ base_layer.add_to(m)
84
+
85
+ # Add all other historical layers as options
86
+ for name, config in HISTORICAL_TILES.items():
87
+ if config["url"] != base_layer.tiles:
88
+ folium.TileLayer(
89
+ tiles=config["url"],
90
+ attr=config["attr"],
91
+ name=f"{name} ({config['min_year']}-{config['max_year']})",
92
+ overlay=False
93
+ ).add_to(m)
94
+
95
+ # Add markers with historical styling
96
+ coords_list = []
97
+ for loc in df[location_col].dropna().unique():
98
+ coords = geocoder.get_coords(str(loc))
99
+ if coords:
100
+ folium.Marker(
101
+ location=coords,
102
+ popup=f"<b>{loc}</b><br>Year: {year}",
103
+ icon=folium.Icon(
104
+ color="red",
105
+ icon="info-sign",
106
+ prefix="fa"
107
+ )
108
+ ).add_to(m)
109
+ coords_list.append(coords)
110
+
111
+ # Add layer control and fit bounds
112
+ folium.LayerControl().add_to(m)
113
+ if coords_list:
114
+ m.fit_bounds(coords_list)
115
+
116
+ return m._repr_html_()
117
 
118
+ def process_file(file_obj, location_col, year):
 
 
 
119
  try:
120
+ # Read input file
121
+ df = pd.read_excel(file_obj.name)
122
+
123
+ # Validate column exists
124
+ if location_col not in df.columns:
125
+ return None, f"Column '{location_col}' not found", None
126
+
127
+ # Create historical map
128
+ map_html = create_historical_map(df, location_col, year)
129
+
130
+ # Save processed data
131
+ with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as tmp:
132
+ df.to_excel(tmp.name, index=False)
133
+ processed_path = tmp.name
134
+
135
+ # Generate stats
136
+ stats = (
137
+ f"Total locations: {len(df)}\n"
138
+ f"Unique places: {df[location_col].nunique()}\n"
139
+ f"Map year: {year}"
140
+ )
141
+
142
+ return (
143
+ f"<div style='width:100%; height:70vh'>{map_html}</div>",
144
+ stats,
145
+ processed_path
146
+ )
147
 
148
+ except Exception as e:
149
+ return None, f"Error: {str(e)}", None
150
+
151
+ # Gradio Interface
152
+ with gr.Blocks(title="Historical Map Explorer", theme=gr.themes.Soft()) as app:
153
+ gr.Markdown("# Historical Location Mapper")
154
 
155
+ with gr.Row():
156
+ with gr.Column():
157
+ file_input = gr.File(
158
+ label="Upload Excel File",
159
+ file_types=[".xlsx", ".xls"],
160
+ type="filepath"
161
+ )
162
+ location_col = gr.Textbox(
163
+ label="Location Column Name",
164
+ value="locations",
165
+ placeholder="Enter exact column name with locations"
166
+ )
167
+ year = gr.Slider(
168
+ minimum=1700,
169
+ maximum=2023,
170
+ value=1900,
171
+ step=1,
172
+ label="Map Year"
173
+ )
174
+ map_btn = gr.Button("Generate Historical Map", variant="primary")
175
 
176
+ with gr.Column():
177
+ map_display = gr.HTML(
178
+ label="Historical Map",
179
+ value="<div style='text-align:center;padding:20px;'>"
180
+ "Map will appear here after processing</div>"
181
+ )
182
+ stats_output = gr.Textbox(label="Statistics")
183
+ download_output = gr.File(label="Download Processed Data")
184
 
185
+ map_btn.click(
186
+ process_file,
187
+ inputs=[file_input, location_col, year],
188
+ outputs=[map_display, stats_output, download_output]
189
+ )
190
+
191
+ if __name__ == "__main__":
192
+ app.launch()