Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -48,7 +48,7 @@ def parse_action(string: str):
|
|
48 |
|
49 |
VERBOSE = True
|
50 |
MAX_HISTORY = 100
|
51 |
-
MAX_DATA =
|
52 |
|
53 |
def format_prompt(message, history):
|
54 |
prompt = "<s>"
|
@@ -289,7 +289,8 @@ def find_all(purpose,task,history, url):
|
|
289 |
print (f"trying URL:: {url}")
|
290 |
try:
|
291 |
if url != "" and url != None:
|
292 |
-
rawp = []
|
|
|
293 |
source = requests.get(url)
|
294 |
#source = urllib.request.urlopen(url).read()
|
295 |
soup = bs4.BeautifulSoup(source.content,'lxml')
|
@@ -303,18 +304,22 @@ def find_all(purpose,task,history, url):
|
|
303 |
print(soup.title.parent.name)
|
304 |
#rawp.append([tag.name for tag in soup.find_all()] )
|
305 |
print([tag.name for tag in soup.find_all()])
|
306 |
-
rawp=soup
|
|
|
|
|
|
|
|
|
307 |
c=0
|
308 |
-
rl = len(
|
309 |
-
print(rl)
|
310 |
-
for
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
print (c)
|
315 |
if rl > MAX_DATA:
|
316 |
print("compressing...")
|
317 |
-
rawp = compress_data(
|
318 |
print (rawp)
|
319 |
history += "observation: the search results are:\n {}\n".format(rawp)
|
320 |
task = "complete?"
|
|
|
48 |
|
49 |
VERBOSE = True
|
50 |
MAX_HISTORY = 100
|
51 |
+
MAX_DATA = 1000
|
52 |
|
53 |
def format_prompt(message, history):
|
54 |
prompt = "<s>"
|
|
|
289 |
print (f"trying URL:: {url}")
|
290 |
try:
|
291 |
if url != "" and url != None:
|
292 |
+
#rawp = []
|
293 |
+
out = []
|
294 |
source = requests.get(url)
|
295 |
#source = urllib.request.urlopen(url).read()
|
296 |
soup = bs4.BeautifulSoup(source.content,'lxml')
|
|
|
304 |
print(soup.title.parent.name)
|
305 |
#rawp.append([tag.name for tag in soup.find_all()] )
|
306 |
print([tag.name for tag in soup.find_all()])
|
307 |
+
rawp=(f'RAW TEXT RETURNED:\n*********\n{soup.text}\n*********\n')
|
308 |
+
out.append(rawp)
|
309 |
+
q=("a","p","span","content","article")
|
310 |
+
for p in soup.find_all(f'{q}'):
|
311 |
+
out.append([{q:p.string,"additional":z,"parent":p.parent.name,"previous":[b for b in p.previous],"first-child":[b.name for b in p.children],"content":p}])
|
312 |
c=0
|
313 |
+
rl = len(out)
|
314 |
+
print(f'rl:: {rl}')
|
315 |
+
for ea in out:
|
316 |
+
for i in str(ea):
|
317 |
+
if i == " " or i==",":
|
318 |
+
c +=1
|
319 |
+
print (f'c:: {c}')
|
320 |
if rl > MAX_DATA:
|
321 |
print("compressing...")
|
322 |
+
rawp = compress_data(c,purpose,task,out)
|
323 |
print (rawp)
|
324 |
history += "observation: the search results are:\n {}\n".format(rawp)
|
325 |
task = "complete?"
|