Upload 3 files
Browse files- .gitattributes +1 -0
- app.py +79 -0
- electricity_corpus.json +3 -0
- requirements.txt +11 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
electricity_corpus.json filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{\rtf1\ansi\ansicpg1252\cocoartf2822
|
2 |
+
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fmodern\fcharset0 Courier;}
|
3 |
+
{\colortbl;\red255\green255\blue255;\red131\green0\blue165;\red245\green245\blue245;\red0\green0\blue0;
|
4 |
+
\red15\green112\blue1;\red86\green65\blue25;\red144\green1\blue18;\red0\green0\blue255;\red0\green0\blue109;
|
5 |
+
\red19\green85\blue52;}
|
6 |
+
{\*\expandedcolortbl;;\cssrgb\c59216\c13725\c70588;\cssrgb\c96863\c96863\c96863;\cssrgb\c0\c0\c0;
|
7 |
+
\cssrgb\c0\c50196\c0;\cssrgb\c41569\c32157\c12941;\cssrgb\c63922\c8235\c8235;\cssrgb\c0\c0\c100000;\cssrgb\c0\c6275\c50196;
|
8 |
+
\cssrgb\c6667\c40000\c26667;}
|
9 |
+
\margl1440\margr1440\vieww11520\viewh8400\viewkind0
|
10 |
+
\deftab720
|
11 |
+
\pard\pardeftab720\partightenfactor0
|
12 |
+
|
13 |
+
\f0\fs28 \cf2 \cb3 \expnd0\expndtw0\kerning0
|
14 |
+
\outl0\strokewidth0 \strokec2 import\cf0 \strokec4 json\cb1 \
|
15 |
+
\cf2 \cb3 \strokec2 from\cf0 \strokec4 sklearn.feature_extraction.text \cf2 \strokec2 import\cf0 \strokec4 TfidfVectorizer\cb1 \
|
16 |
+
\cf2 \cb3 \strokec2 from\cf0 \strokec4 sklearn.metrics.pairwise \cf2 \strokec2 import\cf0 \strokec4 cosine_similarity\cb1 \
|
17 |
+
\cf2 \cb3 \strokec2 from\cf0 \strokec4 transformers \cf2 \strokec2 import\cf0 \strokec4 pipeline\cb1 \
|
18 |
+
\cf2 \cb3 \strokec2 import\cf0 \strokec4 gradio \cf2 \strokec2 as\cf0 \strokec4 gr\cb1 \
|
19 |
+
\
|
20 |
+
\pard\pardeftab720\partightenfactor0
|
21 |
+
\cf5 \cb3 \strokec5 # Load your natural-language corpus\cf0 \cb1 \strokec4 \
|
22 |
+
\pard\pardeftab720\partightenfactor0
|
23 |
+
\cf2 \cb3 \strokec2 with\cf0 \strokec4 \cf6 \strokec6 open\cf0 \strokec4 (\cf7 \strokec7 "electricity_corpus.json"\cf0 \strokec4 , \cf7 \strokec7 "r"\cf0 \strokec4 ) \cf2 \strokec2 as\cf0 \strokec4 f:\cb1 \
|
24 |
+
\pard\pardeftab720\partightenfactor0
|
25 |
+
\cf0 \cb3 corpus = json.load(f)\cb1 \
|
26 |
+
\
|
27 |
+
\pard\pardeftab720\partightenfactor0
|
28 |
+
\cf5 \cb3 \strokec5 # Build TF-IDF index\cf0 \cb1 \strokec4 \
|
29 |
+
\pard\pardeftab720\partightenfactor0
|
30 |
+
\cf0 \cb3 vectorizer = TfidfVectorizer()\cb1 \
|
31 |
+
\cb3 tfidf_matrix = vectorizer.fit_transform(corpus)\cb1 \
|
32 |
+
\
|
33 |
+
\pard\pardeftab720\partightenfactor0
|
34 |
+
\cf5 \cb3 \strokec5 # Load the QA model\cf0 \cb1 \strokec4 \
|
35 |
+
\pard\pardeftab720\partightenfactor0
|
36 |
+
\cf0 \cb3 qa_pipeline = pipeline(\cf7 \strokec7 "question-answering"\cf0 \strokec4 , model=\cf7 \strokec7 "distilbert-base-cased-distilled-squad"\cf0 \strokec4 )\cb1 \
|
37 |
+
\
|
38 |
+
\pard\pardeftab720\partightenfactor0
|
39 |
+
\cf5 \cb3 \strokec5 # Function to retrieve top matching rows\cf0 \cb1 \strokec4 \
|
40 |
+
\pard\pardeftab720\partightenfactor0
|
41 |
+
\cf8 \cb3 \strokec8 def\cf0 \strokec4 \cf6 \strokec6 get_top_contexts\cf0 \strokec4 (\cf9 \strokec9 question\cf0 \strokec4 , \cf9 \strokec9 top_k\cf0 \strokec4 =\cf10 \strokec10 3\cf0 \strokec4 ):\cb1 \
|
42 |
+
\pard\pardeftab720\partightenfactor0
|
43 |
+
\cf0 \cb3 question_vec = vectorizer.transform([question])\cb1 \
|
44 |
+
\cb3 similarities = cosine_similarity(question_vec, tfidf_matrix).flatten()\cb1 \
|
45 |
+
\cb3 top_indices = similarities.argsort()[-top_k:][::\cf10 \strokec10 -1\cf0 \strokec4 ]\cb1 \
|
46 |
+
\cb3 \cf2 \strokec2 return\cf0 \strokec4 [corpus[i] \cf2 \strokec2 for\cf0 \strokec4 i \cf8 \strokec8 in\cf0 \strokec4 top_indices]\cb1 \
|
47 |
+
\
|
48 |
+
\pard\pardeftab720\partightenfactor0
|
49 |
+
\cf5 \cb3 \strokec5 # Main logic to get answer\cf0 \cb1 \strokec4 \
|
50 |
+
\pard\pardeftab720\partightenfactor0
|
51 |
+
\cf8 \cb3 \strokec8 def\cf0 \strokec4 \cf6 \strokec6 answer_question\cf0 \strokec4 (\cf9 \strokec9 question\cf0 \strokec4 , \cf9 \strokec9 top_k\cf0 \strokec4 =\cf10 \strokec10 3\cf0 \strokec4 ):\cb1 \
|
52 |
+
\pard\pardeftab720\partightenfactor0
|
53 |
+
\cf0 \cb3 \cf2 \strokec2 if\cf0 \strokec4 \cf8 \strokec8 not\cf0 \strokec4 question.strip():\cb1 \
|
54 |
+
\cb3 \cf2 \strokec2 return\cf0 \strokec4 \cf7 \strokec7 "Please enter a valid question."\cf0 \cb1 \strokec4 \
|
55 |
+
\
|
56 |
+
\cb3 contexts = get_top_contexts(question, top_k)\cb1 \
|
57 |
+
\cb3 combined_context = \cf7 \strokec7 " "\cf0 \strokec4 .join(contexts)[:\cf10 \strokec10 4096\cf0 \strokec4 ] \cf5 \strokec5 # truncate to model max input\cf0 \cb1 \strokec4 \
|
58 |
+
\cb3 result = qa_pipeline(question=question, context=combined_context)\cb1 \
|
59 |
+
\cb3 \cf2 \strokec2 return\cf0 \strokec4 result[\cf7 \strokec7 "answer"\cf0 \strokec4 ]\cb1 \
|
60 |
+
\
|
61 |
+
\pard\pardeftab720\partightenfactor0
|
62 |
+
\cf5 \cb3 \strokec5 # Gradio interface\cf0 \cb1 \strokec4 \
|
63 |
+
\pard\pardeftab720\partightenfactor0
|
64 |
+
\cf0 \cb3 iface = gr.Interface(\cb1 \
|
65 |
+
\cb3 fn=answer_question,\cb1 \
|
66 |
+
\cb3 inputs=gr.Textbox(label=\cf7 \strokec7 "Ask your question about electricity usage..."\cf0 \strokec4 ),\cb1 \
|
67 |
+
\cb3 outputs=gr.Textbox(label=\cf7 \strokec7 "Answer"\cf0 \strokec4 ),\cb1 \
|
68 |
+
\cb3 title=\cf7 \strokec7 "\uc0\u55357 \u56588 Electricity Data Q&A"\cf0 \strokec4 ,\cb1 \
|
69 |
+
\cb3 description=\cf7 \strokec7 "Ask questions like 'What was the price for residential in Texas in Jan 2001?' or 'Which state had highest revenue in Jan 2001?'"\cf0 \strokec4 ,\cb1 \
|
70 |
+
\cb3 )\cb1 \
|
71 |
+
\
|
72 |
+
\pard\pardeftab720\partightenfactor0
|
73 |
+
\cf5 \cb3 \strokec5 # Run the app\cf0 \cb1 \strokec4 \
|
74 |
+
\pard\pardeftab720\partightenfactor0
|
75 |
+
\cf2 \cb3 \strokec2 if\cf0 \strokec4 \cf9 \strokec9 __name__\cf0 \strokec4 == \cf7 \strokec7 "__main__"\cf0 \strokec4 :\cb1 \
|
76 |
+
\pard\pardeftab720\partightenfactor0
|
77 |
+
\cf0 \cb3 iface.launch()\cb1 \
|
78 |
+
\
|
79 |
+
}
|
electricity_corpus.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b38b73e99e20d7cbdaf2e0807a4f7784d7ebd549803bbece88d624bbb07d21b0
|
3 |
+
size 13422039
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{\rtf1\ansi\ansicpg1252\cocoartf2822
|
2 |
+
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
|
3 |
+
{\colortbl;\red255\green255\blue255;}
|
4 |
+
{\*\expandedcolortbl;;}
|
5 |
+
\margl1440\margr1440\vieww11520\viewh8400\viewkind0
|
6 |
+
\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\pardirnatural\partightenfactor0
|
7 |
+
|
8 |
+
\f0\fs24 \cf0 transformers\
|
9 |
+
scikit-learn\
|
10 |
+
gradio\
|
11 |
+
}
|