Shenuki commited on
Commit
2a3aa81
·
verified ·
1 Parent(s): e4640d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -46
app.py CHANGED
@@ -1,77 +1,85 @@
1
- import spacy
 
2
  import requests
3
  import wikipedia
4
  import gradio as gr
 
 
 
 
 
 
 
 
5
 
6
- # 1) Load spaCy small English model (make sure to add en_core_web_sm in requirements.txt)
7
- nlp = spacy.load("en_core_web_sm")
 
 
 
 
 
 
 
 
 
8
 
9
- # 2) Helper: Overpass query for POIs
10
  def fetch_osm(lat, lon, osm_filter, limit=5):
11
- overpass = """
12
  [out:json][timeout:25];
13
  (
14
- node{filt}(around:1000,{lat},{lon});
15
- way{filt}(around:1000,{lat},{lon});
16
- rel{filt}(around:1000,{lat},{lon});
17
  );
18
- out center {lim};
19
- """.format(filt=osm_filter, lat=lat, lon=lon, lim=limit)
20
- r = requests.post("https://overpass-api.de/api/interpreter", data={"data": overpass})
21
  elems = r.json().get("elements", [])
22
- results = []
23
- for el in elems:
24
- name = el.get("tags", {}).get("name")
25
  if name:
26
- results.append({"name": name, **({"info": el["tags"].get("cuisine")} if "cuisine" in el["tags"] else {})})
27
- return results
28
-
29
- # 3) Geocode via Nominatim
30
- def geocode(place: str):
31
- r = requests.get(
32
- "https://nominatim.openstreetmap.org/search",
33
- params={"q": place, "format": "json", "limit": 1},
34
- headers={"User-Agent":"iVoiceContext/1.0"}
35
- )
36
- data = r.json()
37
- if not data: return None
38
- return float(data[0]["lat"]), float(data[0]["lon"])
39
 
40
- # 4) Main context extractor
41
- def get_context(text):
42
- doc = nlp(text)
 
 
43
  out = {}
44
- # gather unique entities of interest
45
- for ent in {e.text for e in doc.ents if e.label_ in ("GPE","LOC","PERSON","ORG")}:
46
- label = next(e.label_ for e in doc.ents if e.text == ent)
47
- if label in ("GPE","LOC"):
48
- geo = geocode(ent)
49
  if not geo:
50
- out[ent] = {"type":"location","error":"could not geocode"}
51
  else:
52
  lat, lon = geo
53
- out[ent] = {
54
  "type": "location",
55
  "restaurants": fetch_osm(lat, lon, '["amenity"="restaurant"]'),
56
  "attractions": fetch_osm(lat, lon, '["tourism"="attraction"]'),
57
  }
58
- else: # PERSON or ORG
 
59
  try:
60
- summ = wikipedia.summary(ent, sentences=2)
61
  except Exception:
62
- summ = "No summary available"
63
- out[ent] = {"type":"wiki","summary": summ}
64
  if not out:
65
- return {"error":"no named entities found"}
66
  return out
67
 
68
- # 5) Gradio interface
69
  iface = gr.Interface(
70
  fn=get_context,
71
- inputs=gr.Textbox(lines=3, placeholder="Enter or paste your translated text…"),
72
  outputs="json",
73
- title="iVoice Context-Aware API",
74
- description="Extracts people, places, orgs from text and returns nearby POIs or Wikipedia summaries."
75
  )
76
 
77
  if __name__ == "__main__":
 
1
+ # app.py
2
+
3
  import requests
4
  import wikipedia
5
  import gradio as gr
6
+ from transformers import pipeline
7
+
8
+ # 1) Load the BERT NER pipeline
9
+ ner = pipeline(
10
+ "ner",
11
+ model="dslim/bert-base-NER-uncased",
12
+ grouped_entities=True
13
+ )
14
 
15
+ # 2) Geocode via Nominatim
16
+ def geocode(place: str):
17
+ resp = requests.get(
18
+ "https://nominatim.openstreetmap.org/search",
19
+ params={"q": place, "format": "json", "limit": 1},
20
+ headers={"User-Agent": "iVoiceContext/1.0"}
21
+ )
22
+ data = resp.json()
23
+ if not data:
24
+ return None
25
+ return float(data[0]["lat"]), float(data[0]["lon"])
26
 
27
+ # 3) Fetch POIs via Overpass
28
  def fetch_osm(lat, lon, osm_filter, limit=5):
29
+ query = f"""
30
  [out:json][timeout:25];
31
  (
32
+ node{osm_filter}(around:1000,{lat},{lon});
33
+ way{osm_filter}(around:1000,{lat},{lon});
34
+ rel{osm_filter}(around:1000,{lat},{lon});
35
  );
36
+ out center {limit};
37
+ """
38
+ r = requests.post("https://overpass-api.de/api/interpreter", data={"data": query})
39
  elems = r.json().get("elements", [])
40
+ items = []
41
+ for e in elems:
42
+ name = e.get("tags", {}).get("name")
43
  if name:
44
+ items.append({"name": name})
45
+ return items
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ # 4) Main function
48
+ def get_context(text: str):
49
+ results = ner(text)
50
+ # de-duplicate entities by text
51
+ ents = {ent["word"]: ent["entity_group"] for ent in results}
52
  out = {}
53
+ for word, label in ents.items():
54
+ if label == "LOC":
55
+ geo = geocode(word)
 
 
56
  if not geo:
57
+ out[word] = {"type":"location", "error":"could not geocode"}
58
  else:
59
  lat, lon = geo
60
+ out[word] = {
61
  "type": "location",
62
  "restaurants": fetch_osm(lat, lon, '["amenity"="restaurant"]'),
63
  "attractions": fetch_osm(lat, lon, '["tourism"="attraction"]'),
64
  }
65
+ else:
66
+ # for PERSON, ORG, MISC, etc → Wikipedia
67
  try:
68
+ summary = wikipedia.summary(word, sentences=2)
69
  except Exception:
70
+ summary = "No summary available."
71
+ out[word] = {"type": "wiki", "summary": summary}
72
  if not out:
73
+ return {"error": "No entities found"}
74
  return out
75
 
76
+ # 5) Gradio UI
77
  iface = gr.Interface(
78
  fn=get_context,
79
+ inputs=gr.Textbox(lines=3, placeholder="Paste your translated text…"),
80
  outputs="json",
81
+ title="iVoice Context-Aware",
82
+ description="BERT NER geocode LOC Overpass POIs Wikipedia for others"
83
  )
84
 
85
  if __name__ == "__main__":