gholap310 commited on
Commit
4b8483e
·
verified ·
1 Parent(s): d5319a5

Upload 3 files

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. app.py +79 -0
  3. electricity_corpus.json +3 -0
  4. requirements.txt +11 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ electricity_corpus.json filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {\rtf1\ansi\ansicpg1252\cocoartf2822
2
+ \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fmodern\fcharset0 Courier;}
3
+ {\colortbl;\red255\green255\blue255;\red131\green0\blue165;\red245\green245\blue245;\red0\green0\blue0;
4
+ \red15\green112\blue1;\red86\green65\blue25;\red144\green1\blue18;\red0\green0\blue255;\red0\green0\blue109;
5
+ \red19\green85\blue52;}
6
+ {\*\expandedcolortbl;;\cssrgb\c59216\c13725\c70588;\cssrgb\c96863\c96863\c96863;\cssrgb\c0\c0\c0;
7
+ \cssrgb\c0\c50196\c0;\cssrgb\c41569\c32157\c12941;\cssrgb\c63922\c8235\c8235;\cssrgb\c0\c0\c100000;\cssrgb\c0\c6275\c50196;
8
+ \cssrgb\c6667\c40000\c26667;}
9
+ \margl1440\margr1440\vieww11520\viewh8400\viewkind0
10
+ \deftab720
11
+ \pard\pardeftab720\partightenfactor0
12
+
13
+ \f0\fs28 \cf2 \cb3 \expnd0\expndtw0\kerning0
14
+ \outl0\strokewidth0 \strokec2 import\cf0 \strokec4 json\cb1 \
15
+ \cf2 \cb3 \strokec2 from\cf0 \strokec4 sklearn.feature_extraction.text \cf2 \strokec2 import\cf0 \strokec4 TfidfVectorizer\cb1 \
16
+ \cf2 \cb3 \strokec2 from\cf0 \strokec4 sklearn.metrics.pairwise \cf2 \strokec2 import\cf0 \strokec4 cosine_similarity\cb1 \
17
+ \cf2 \cb3 \strokec2 from\cf0 \strokec4 transformers \cf2 \strokec2 import\cf0 \strokec4 pipeline\cb1 \
18
+ \cf2 \cb3 \strokec2 import\cf0 \strokec4 gradio \cf2 \strokec2 as\cf0 \strokec4 gr\cb1 \
19
+ \
20
+ \pard\pardeftab720\partightenfactor0
21
+ \cf5 \cb3 \strokec5 # Load your natural-language corpus\cf0 \cb1 \strokec4 \
22
+ \pard\pardeftab720\partightenfactor0
23
+ \cf2 \cb3 \strokec2 with\cf0 \strokec4 \cf6 \strokec6 open\cf0 \strokec4 (\cf7 \strokec7 "electricity_corpus.json"\cf0 \strokec4 , \cf7 \strokec7 "r"\cf0 \strokec4 ) \cf2 \strokec2 as\cf0 \strokec4 f:\cb1 \
24
+ \pard\pardeftab720\partightenfactor0
25
+ \cf0 \cb3 corpus = json.load(f)\cb1 \
26
+ \
27
+ \pard\pardeftab720\partightenfactor0
28
+ \cf5 \cb3 \strokec5 # Build TF-IDF index\cf0 \cb1 \strokec4 \
29
+ \pard\pardeftab720\partightenfactor0
30
+ \cf0 \cb3 vectorizer = TfidfVectorizer()\cb1 \
31
+ \cb3 tfidf_matrix = vectorizer.fit_transform(corpus)\cb1 \
32
+ \
33
+ \pard\pardeftab720\partightenfactor0
34
+ \cf5 \cb3 \strokec5 # Load the QA model\cf0 \cb1 \strokec4 \
35
+ \pard\pardeftab720\partightenfactor0
36
+ \cf0 \cb3 qa_pipeline = pipeline(\cf7 \strokec7 "question-answering"\cf0 \strokec4 , model=\cf7 \strokec7 "distilbert-base-cased-distilled-squad"\cf0 \strokec4 )\cb1 \
37
+ \
38
+ \pard\pardeftab720\partightenfactor0
39
+ \cf5 \cb3 \strokec5 # Function to retrieve top matching rows\cf0 \cb1 \strokec4 \
40
+ \pard\pardeftab720\partightenfactor0
41
+ \cf8 \cb3 \strokec8 def\cf0 \strokec4 \cf6 \strokec6 get_top_contexts\cf0 \strokec4 (\cf9 \strokec9 question\cf0 \strokec4 , \cf9 \strokec9 top_k\cf0 \strokec4 =\cf10 \strokec10 3\cf0 \strokec4 ):\cb1 \
42
+ \pard\pardeftab720\partightenfactor0
43
+ \cf0 \cb3 question_vec = vectorizer.transform([question])\cb1 \
44
+ \cb3 similarities = cosine_similarity(question_vec, tfidf_matrix).flatten()\cb1 \
45
+ \cb3 top_indices = similarities.argsort()[-top_k:][::\cf10 \strokec10 -1\cf0 \strokec4 ]\cb1 \
46
+ \cb3 \cf2 \strokec2 return\cf0 \strokec4 [corpus[i] \cf2 \strokec2 for\cf0 \strokec4 i \cf8 \strokec8 in\cf0 \strokec4 top_indices]\cb1 \
47
+ \
48
+ \pard\pardeftab720\partightenfactor0
49
+ \cf5 \cb3 \strokec5 # Main logic to get answer\cf0 \cb1 \strokec4 \
50
+ \pard\pardeftab720\partightenfactor0
51
+ \cf8 \cb3 \strokec8 def\cf0 \strokec4 \cf6 \strokec6 answer_question\cf0 \strokec4 (\cf9 \strokec9 question\cf0 \strokec4 , \cf9 \strokec9 top_k\cf0 \strokec4 =\cf10 \strokec10 3\cf0 \strokec4 ):\cb1 \
52
+ \pard\pardeftab720\partightenfactor0
53
+ \cf0 \cb3 \cf2 \strokec2 if\cf0 \strokec4 \cf8 \strokec8 not\cf0 \strokec4 question.strip():\cb1 \
54
+ \cb3 \cf2 \strokec2 return\cf0 \strokec4 \cf7 \strokec7 "Please enter a valid question."\cf0 \cb1 \strokec4 \
55
+ \
56
+ \cb3 contexts = get_top_contexts(question, top_k)\cb1 \
57
+ \cb3 combined_context = \cf7 \strokec7 " "\cf0 \strokec4 .join(contexts)[:\cf10 \strokec10 4096\cf0 \strokec4 ] \cf5 \strokec5 # truncate to model max input\cf0 \cb1 \strokec4 \
58
+ \cb3 result = qa_pipeline(question=question, context=combined_context)\cb1 \
59
+ \cb3 \cf2 \strokec2 return\cf0 \strokec4 result[\cf7 \strokec7 "answer"\cf0 \strokec4 ]\cb1 \
60
+ \
61
+ \pard\pardeftab720\partightenfactor0
62
+ \cf5 \cb3 \strokec5 # Gradio interface\cf0 \cb1 \strokec4 \
63
+ \pard\pardeftab720\partightenfactor0
64
+ \cf0 \cb3 iface = gr.Interface(\cb1 \
65
+ \cb3 fn=answer_question,\cb1 \
66
+ \cb3 inputs=gr.Textbox(label=\cf7 \strokec7 "Ask your question about electricity usage..."\cf0 \strokec4 ),\cb1 \
67
+ \cb3 outputs=gr.Textbox(label=\cf7 \strokec7 "Answer"\cf0 \strokec4 ),\cb1 \
68
+ \cb3 title=\cf7 \strokec7 "\uc0\u55357 \u56588 Electricity Data Q&A"\cf0 \strokec4 ,\cb1 \
69
+ \cb3 description=\cf7 \strokec7 "Ask questions like 'What was the price for residential in Texas in Jan 2001?' or 'Which state had highest revenue in Jan 2001?'"\cf0 \strokec4 ,\cb1 \
70
+ \cb3 )\cb1 \
71
+ \
72
+ \pard\pardeftab720\partightenfactor0
73
+ \cf5 \cb3 \strokec5 # Run the app\cf0 \cb1 \strokec4 \
74
+ \pard\pardeftab720\partightenfactor0
75
+ \cf2 \cb3 \strokec2 if\cf0 \strokec4 \cf9 \strokec9 __name__\cf0 \strokec4 == \cf7 \strokec7 "__main__"\cf0 \strokec4 :\cb1 \
76
+ \pard\pardeftab720\partightenfactor0
77
+ \cf0 \cb3 iface.launch()\cb1 \
78
+ \
79
+ }
electricity_corpus.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b38b73e99e20d7cbdaf2e0807a4f7784d7ebd549803bbece88d624bbb07d21b0
3
+ size 13422039
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {\rtf1\ansi\ansicpg1252\cocoartf2822
2
+ \cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
3
+ {\colortbl;\red255\green255\blue255;}
4
+ {\*\expandedcolortbl;;}
5
+ \margl1440\margr1440\vieww11520\viewh8400\viewkind0
6
+ \pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
7
+
8
+ \f0\fs24 \cf0 transformers\
9
+ scikit-learn\
10
+ gradio\
11
+ }