OmkarGhugarkar commited on
Commit
e59d7a4
·
verified ·
1 Parent(s): 7b8ff52

First Commit of Files

Browse files
Files changed (2) hide show
  1. app.py +155 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import http.client
2
+ import urllib.request, urllib.parse, urllib.error
3
+ import base64
4
+ import json
5
+ import re
6
+ import google.generativeai as genai
7
+ import os
8
+ from PyPDF2 import PdfReader
9
+ from bs4 import BeautifulSoup
10
+ import streamlit as st
11
+ import time
12
+
13
+ google_api_key = "AIzaSyDdnoC2syL3bor01IRbbaPLZSEgJkYB7BI"
14
+
15
+ headers = {'Authorization': 'Token %s' % 'ca819b0b0853b9a9a76f0f421a884b88035c87b0', \
16
+ 'Accept': 'application/json'}
17
+ basehost = 'api.indiankanoon.org'
18
+
19
+ def call_api(url):
20
+ connection = http.client.HTTPSConnection(basehost)
21
+ connection.request('POST', url, headers = headers)
22
+ response = connection.getresponse()
23
+ results = response.read()
24
+ return results
25
+
26
+ def search(q, pagenum, maxpages):
27
+ q = urllib.parse.quote_plus(q.encode('utf8'))
28
+ url = '/search/?formInput=%s&pagenum=%d&maxpages=%d' % (q, pagenum, maxpages)
29
+ return call_api(url)
30
+
31
+ def fetch_doc(docid):
32
+ url = '/doc/%d/' % docid
33
+
34
+ args = []
35
+ if args:
36
+ url = url + '?' + '&'.join(args)
37
+ return call_api(url)
38
+
39
+ def fetch_orig_doc(docid):
40
+ url = '/origdoc/%d/' % docid
41
+ return url, call_api(url)
42
+
43
+ def get_file_extension(mtype):
44
+ t = 'unkwn'
45
+ if not mtype:
46
+ print (mtype)
47
+ elif re.match('text/html', mtype):
48
+ t = 'html'
49
+ elif re.match('application/postscript', mtype):
50
+ t = 'ps'
51
+ elif re.match('application/pdf', mtype):
52
+ t = 'pdf'
53
+ elif re.match('text/plain', mtype):
54
+ t = 'txt'
55
+ elif re.match('image/png', mtype):
56
+ t = 'png'
57
+ return t
58
+
59
+ def save_original(docid, orig, origpath,path):
60
+ obj = json.loads(orig)
61
+ if 'errmsg' in obj:
62
+ return
63
+
64
+ doc = base64.b64decode(obj['doc'])
65
+
66
+ extension = get_file_extension(obj['Content-Type'])
67
+ name = origpath.split('/')[-2]
68
+ filepath = path + '/' + name + '.%s' % extension
69
+ filehandle = open(filepath, 'wb')
70
+ filehandle.write(doc)
71
+ filehandle.close()
72
+
73
+ def pipeline(q):
74
+
75
+ genai.configure(api_key=google_api_key)
76
+ model = genai.GenerativeModel("gemini-1.5-flash")
77
+ response = model.generate_content(f"Make this sentence grammatically correct. Reply back only the sentence nothing more. {q}")
78
+ q = response.text
79
+ print(q)
80
+ folder_path = q.split()[0]
81
+ try:
82
+ os.mkdir(folder_path)
83
+ except:
84
+ print("Folder already exist")
85
+ result = search(q,0,1)
86
+ obj = json.loads(result)
87
+
88
+ docs = obj['docs']
89
+ print(len(docs))
90
+ for doc in docs:
91
+ docid = doc['tid']
92
+ title = doc['title']
93
+ toc = {'docid': docid, 'title': title, 'position': 1, \
94
+ 'date': doc['publishdate'], 'court': doc['docsource']}
95
+ origpath, orig = fetch_orig_doc(docid)
96
+ d = json.loads(orig)
97
+ save_original(docid, orig, origpath,folder_path)
98
+ print(docid)
99
+
100
+ files = os.listdir(folder_path)
101
+ summary = ''
102
+ files.sort()
103
+ print("Going Through the files now")
104
+ for file in files:
105
+ if file.endswith('.html'):
106
+ # Read HTML content from a file
107
+ print(file)
108
+ time.sleep(30)
109
+ with open(f'{folder_path}/{file}', 'r', encoding='utf-8') as data:
110
+ try:
111
+ html_cont = data.read()
112
+ except:
113
+ continue
114
+
115
+ # Parse the HTML content
116
+ soup = BeautifulSoup(html_cont, 'html.parser')
117
+
118
+ # Extract text from all tags
119
+ all_tags = soup.find_all()
120
+ text = ''
121
+ for tag in all_tags:
122
+ text+= tag.get_text() + " "
123
+
124
+ count = len(re.findall(r'\w+', text))
125
+ print("count ", count)
126
+
127
+ if file.endswith('.pdf'):
128
+ print(file)
129
+ reader = PdfReader(f'{folder_path}/{file}')
130
+ text = ""
131
+ for page in reader.pages:
132
+ text += page.extract_text() + "\n"
133
+ count = len(re.findall(r'\w+', text))
134
+ print("count ", count)
135
+
136
+ response = model.generate_content(f"Write a summary for me of this case in a systematic manner. Explicitally refer to all the penal codes mentioned in those {text}")
137
+ print(response.prompt_feedback)
138
+ try:
139
+ summary += response.text + f" Reference link - https://indiankanoon.org/doc/{docid}" +"#"*100
140
+ except:
141
+ print(f"File skipped {origpath}")
142
+
143
+ print("Doing Final")
144
+ time.sleep(60)
145
+ final_response = model.generate_content(f"You are a lawyer and want to do a good research for a case {q}. You have collated past cases {summary}. Now use these past evidences to make a good research for the case {q}. Intensively use the penal codes those are mentioned in the past cases. Be very careful as this is a senstive matter. The answer should be based only on the past cases. At the end of the answer give the reference links to cases cited also. Remember you are going to report it to a client, so please be polite and use positive sentences. The client is unaware summaries are being used in the background, reply in a professional manner.")
146
+ print(final_response.text)
147
+ return final_response.text
148
+
149
+
150
+ q = "can a wife who instituted criminal cases against husband also initiate discplinary proceedings against the husband at his workplace , basing on the same set of allegations ?"
151
+ st.title("Please have patience, we will run on 2M+ Tokens, might take upto 5 mins :)")
152
+ question = st.text_input("", placeholder="Can a wife who instituted criminal cases against her husband also initiate disciplinary proceedings against him at his workplace, based on the same set of allegations?", label_visibility="collapsed")
153
+ if question:
154
+ st.markdown(pipeline(question))
155
+
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ beautifulsoup4
2
+ pypdf2
3
+ google-generativeai