Spaces:
Sleeping
Sleeping
darthPanda
commited on
Commit
•
a0504e0
1
Parent(s):
33a1e6c
Commit message
Browse files- Dockerfile +16 -0
- Makefile +8 -0
- app.ipynb +245 -0
- requirements.txt +8 -0
Dockerfile
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
RUN python3 -m pip install --no-cache-dir --upgrade pip
|
7 |
+
RUN python3 -m pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY . .
|
10 |
+
|
11 |
+
CMD ["panel", "serve", "/code/LangChain_QA_Panel_App.ipynb", "--address", "0.0.0.0", "--port", "7860", "--allow-websocket-origin", "sophiamyang-panel-pdf-qa.hf.space", "--allow-websocket-origin", "0.0.0.0:7860"]
|
12 |
+
|
13 |
+
RUN mkdir /.cache
|
14 |
+
RUN chmod 777 /.cache
|
15 |
+
RUN mkdir .chroma
|
16 |
+
RUN chmod 777 .chroma
|
Makefile
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.PHONY: commit push
|
2 |
+
|
3 |
+
commit:
|
4 |
+
git add .
|
5 |
+
git commit -m "Commit message"
|
6 |
+
|
7 |
+
push: commit
|
8 |
+
git push
|
app.ipynb
ADDED
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [],
|
8 |
+
"source": [
|
9 |
+
"import os \n",
|
10 |
+
"from langchain.chains import RetrievalQA\n",
|
11 |
+
"from langchain.llms import OpenAI\n",
|
12 |
+
"from langchain.document_loaders import TextLoader\n",
|
13 |
+
"from langchain.document_loaders import PyPDFLoader\n",
|
14 |
+
"from langchain.indexes import VectorstoreIndexCreator\n",
|
15 |
+
"from langchain.text_splitter import CharacterTextSplitter\n",
|
16 |
+
"from langchain.embeddings import OpenAIEmbeddings\n",
|
17 |
+
"from langchain.vectorstores import Chroma\n",
|
18 |
+
"import panel as pn\n",
|
19 |
+
"import tempfile"
|
20 |
+
]
|
21 |
+
},
|
22 |
+
{
|
23 |
+
"cell_type": "code",
|
24 |
+
"execution_count": 2,
|
25 |
+
"metadata": {},
|
26 |
+
"outputs": [
|
27 |
+
{
|
28 |
+
"data": {
|
29 |
+
"application/javascript": "(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, js_modules, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n if (js_modules == null) js_modules = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls.length === 0 && js_modules.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n var skip = [];\n if (window.requirejs) {\n window.requirejs.config({'packages': {}, 'paths': {'Quill': 'https://cdn.quilljs.com/1.3.6/quill', 'gridstack': 'https://cdn.jsdelivr.net/npm/[email protected]/dist/gridstack-h5', 'notyf': 'https://cdn.jsdelivr.net/npm/notyf@3/notyf.min'}, 'shim': {'gridstack': {'exports': 'GridStack'}}});\n require([\"Quill\"], function(Quill) {\n\twindow.Quill = Quill\n\ton_load()\n })\n require([\"gridstack\"], function(GridStack) {\n\twindow.GridStack = GridStack\n\ton_load()\n })\n require([\"notyf\"], function() {\n\ton_load()\n })\n root._bokeh_is_loading = css_urls.length + 3;\n } else {\n root._bokeh_is_loading = css_urls.length + js_urls.length + js_modules.length;\n } if (((window['Quill'] !== undefined) && (!(window['Quill'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['GridStack'] !== undefined) && (!(window['GridStack'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/gridstack/[email protected]/dist/gridstack-h5.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } if (((window['Notyf'] !== undefined) && (!(window['Notyf'] instanceof HTMLElement))) || window.requirejs) {\n var urls = ['https://cdn.holoviz.org/panel/0.14.4/dist/bundled/notificationarea/notyf@3/notyf.min.js'];\n for (var i = 0; i < urls.length; i++) {\n skip.push(urls[i])\n }\n } for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n for (var i = 0; i < js_modules.length; i++) {\n var url = js_modules[i];\n if (skip.indexOf(url) >= 0) {\n\tif (!window.requirejs) {\n\t on_load();\n\t}\n\tcontinue;\n }\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n element.type = \"module\";\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n if (!js_urls.length && !js_modules.length) {\n on_load()\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n var js_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-gl-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-widgets-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-tables-2.4.3.min.js\", \"https://cdn.bokeh.org/bokeh/release/bokeh-mathjax-2.4.3.min.js\", \"https://unpkg.com/@holoviz/[email protected]/dist/panel.min.js\"];\n var js_modules = [];\n var css_urls = [\"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.bubble.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/bundled/quillinput/1.3.6/quill.snow.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/card.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/json.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/debugger.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/loading.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/widgets.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/markdown.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/alerts.css\", \"https://cdn.holoviz.org/panel/0.14.4/dist/css/dataframe.css\"];\n var inline_js = [ function(Bokeh) {\n inject_raw_css(\"\\n .bk.pn-loading.arc:before {\\n background-image: url(\\\"data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHN0eWxlPSJtYXJnaW46IGF1dG87IGJhY2tncm91bmQ6IG5vbmU7IGRpc3BsYXk6IGJsb2NrOyBzaGFwZS1yZW5kZXJpbmc6IGF1dG87IiB2aWV3Qm94PSIwIDAgMTAwIDEwMCIgcHJlc2VydmVBc3BlY3RSYXRpbz0ieE1pZFlNaWQiPiAgPGNpcmNsZSBjeD0iNTAiIGN5PSI1MCIgZmlsbD0ibm9uZSIgc3Ryb2tlPSIjYzNjM2MzIiBzdHJva2Utd2lkdGg9IjEwIiByPSIzNSIgc3Ryb2tlLWRhc2hhcnJheT0iMTY0LjkzMzYxNDMxMzQ2NDE1IDU2Ljk3Nzg3MTQzNzgyMTM4Ij4gICAgPGFuaW1hdGVUcmFuc2Zvcm0gYXR0cmlidXRlTmFtZT0idHJhbnNmb3JtIiB0eXBlPSJyb3RhdGUiIHJlcGVhdENvdW50PSJpbmRlZmluaXRlIiBkdXI9IjFzIiB2YWx1ZXM9IjAgNTAgNTA7MzYwIDUwIDUwIiBrZXlUaW1lcz0iMDsxIj48L2FuaW1hdGVUcmFuc2Zvcm0+ICA8L2NpcmNsZT48L3N2Zz4=\\\");\\n background-size: auto calc(min(50%, 400px));\\n }\\n \");\n }, function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\nfunction(Bokeh) {} // ensure no trailing comma for IE\n ];\n\n function run_inline_js() {\n if ((root.Bokeh !== undefined) || (force === true)) {\n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }} else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n }\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, js_modules, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));",
|
30 |
+
"application/vnd.holoviews_load.v0+json": ""
|
31 |
+
},
|
32 |
+
"metadata": {},
|
33 |
+
"output_type": "display_data"
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"data": {
|
37 |
+
"application/javascript": "\nif ((window.PyViz === undefined) || (window.PyViz instanceof HTMLElement)) {\n window.PyViz = {comms: {}, comm_status:{}, kernels:{}, receivers: {}, plot_index: []}\n}\n\n\n function JupyterCommManager() {\n }\n\n JupyterCommManager.prototype.register_target = function(plot_id, comm_id, msg_handler) {\n if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n comm_manager.register_target(comm_id, function(comm) {\n comm.on_msg(msg_handler);\n });\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n window.PyViz.kernels[plot_id].registerCommTarget(comm_id, function(comm) {\n comm.onMsg = msg_handler;\n });\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n google.colab.kernel.comms.registerTarget(comm_id, (comm) => {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n console.log(message)\n var content = {data: message.data, comm_id};\n var buffers = []\n for (var buffer of message.buffers || []) {\n buffers.push(new DataView(buffer))\n }\n var metadata = message.metadata || {};\n var msg = {content, buffers, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n })\n }\n }\n\n JupyterCommManager.prototype.get_client_comm = function(plot_id, comm_id, msg_handler) {\n if (comm_id in window.PyViz.comms) {\n return window.PyViz.comms[comm_id];\n } else if (window.comm_manager || ((window.Jupyter !== undefined) && (Jupyter.notebook.kernel != null))) {\n var comm_manager = window.comm_manager || Jupyter.notebook.kernel.comm_manager;\n var comm = comm_manager.new_comm(comm_id, {}, {}, {}, comm_id);\n if (msg_handler) {\n comm.on_msg(msg_handler);\n }\n } else if ((plot_id in window.PyViz.kernels) && (window.PyViz.kernels[plot_id])) {\n var comm = window.PyViz.kernels[plot_id].connectToComm(comm_id);\n comm.open();\n if (msg_handler) {\n comm.onMsg = msg_handler;\n }\n } else if (typeof google != 'undefined' && google.colab.kernel != null) {\n var comm_promise = google.colab.kernel.comms.open(comm_id)\n comm_promise.then((comm) => {\n window.PyViz.comms[comm_id] = comm;\n if (msg_handler) {\n var messages = comm.messages[Symbol.asyncIterator]();\n function processIteratorResult(result) {\n var message = result.value;\n var content = {data: message.data};\n var metadata = message.metadata || {comm_id};\n var msg = {content, metadata}\n msg_handler(msg);\n return messages.next().then(processIteratorResult);\n }\n return messages.next().then(processIteratorResult);\n }\n }) \n var sendClosure = (data, metadata, buffers, disposeOnDone) => {\n return comm_promise.then((comm) => {\n comm.send(data, metadata, buffers, disposeOnDone);\n });\n };\n var comm = {\n send: sendClosure\n };\n }\n window.PyViz.comms[comm_id] = comm;\n return comm;\n }\n window.PyViz.comm_manager = new JupyterCommManager();\n \n\n\nvar JS_MIME_TYPE = 'application/javascript';\nvar HTML_MIME_TYPE = 'text/html';\nvar EXEC_MIME_TYPE = 'application/vnd.holoviews_exec.v0+json';\nvar CLASS_NAME = 'output';\n\n/**\n * Render data to the DOM node\n */\nfunction render(props, node) {\n var div = document.createElement(\"div\");\n var script = document.createElement(\"script\");\n node.appendChild(div);\n node.appendChild(script);\n}\n\n/**\n * Handle when a new output is added\n */\nfunction handle_add_output(event, handle) {\n var output_area = handle.output_area;\n var output = handle.output;\n if ((output.data == undefined) || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n return\n }\n var id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n if (id !== undefined) {\n var nchildren = toinsert.length;\n var html_node = toinsert[nchildren-1].children[0];\n html_node.innerHTML = output.data[HTML_MIME_TYPE];\n var scripts = [];\n var nodelist = html_node.querySelectorAll(\"script\");\n for (var i in nodelist) {\n if (nodelist.hasOwnProperty(i)) {\n scripts.push(nodelist[i])\n }\n }\n\n scripts.forEach( function (oldScript) {\n var newScript = document.createElement(\"script\");\n var attrs = [];\n var nodemap = oldScript.attributes;\n for (var j in nodemap) {\n if (nodemap.hasOwnProperty(j)) {\n attrs.push(nodemap[j])\n }\n }\n attrs.forEach(function(attr) { newScript.setAttribute(attr.name, attr.value) });\n newScript.appendChild(document.createTextNode(oldScript.innerHTML));\n oldScript.parentNode.replaceChild(newScript, oldScript);\n });\n if (JS_MIME_TYPE in output.data) {\n toinsert[nchildren-1].children[1].textContent = output.data[JS_MIME_TYPE];\n }\n output_area._hv_plot_id = id;\n if ((window.Bokeh !== undefined) && (id in Bokeh.index)) {\n window.PyViz.plot_index[id] = Bokeh.index[id];\n } else {\n window.PyViz.plot_index[id] = null;\n }\n } else if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n var bk_div = document.createElement(\"div\");\n bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n var script_attrs = bk_div.children[0].attributes;\n for (var i = 0; i < script_attrs.length; i++) {\n toinsert[toinsert.length - 1].childNodes[1].setAttribute(script_attrs[i].name, script_attrs[i].value);\n }\n // store reference to server id on output_area\n output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n }\n}\n\n/**\n * Handle when an output is cleared or removed\n */\nfunction handle_clear_output(event, handle) {\n var id = handle.cell.output_area._hv_plot_id;\n var server_id = handle.cell.output_area._bokeh_server_id;\n if (((id === undefined) || !(id in PyViz.plot_index)) && (server_id !== undefined)) { return; }\n var comm = window.PyViz.comm_manager.get_client_comm(\"hv-extension-comm\", \"hv-extension-comm\", function () {});\n if (server_id !== null) {\n comm.send({event_type: 'server_delete', 'id': server_id});\n return;\n } else if (comm !== null) {\n comm.send({event_type: 'delete', 'id': id});\n }\n delete PyViz.plot_index[id];\n if ((window.Bokeh !== undefined) & (id in window.Bokeh.index)) {\n var doc = window.Bokeh.index[id].model.document\n doc.clear();\n const i = window.Bokeh.documents.indexOf(doc);\n if (i > -1) {\n window.Bokeh.documents.splice(i, 1);\n }\n }\n}\n\n/**\n * Handle kernel restart event\n */\nfunction handle_kernel_cleanup(event, handle) {\n delete PyViz.comms[\"hv-extension-comm\"];\n window.PyViz.plot_index = {}\n}\n\n/**\n * Handle update_display_data messages\n */\nfunction handle_update_output(event, handle) {\n handle_clear_output(event, {cell: {output_area: handle.output_area}})\n handle_add_output(event, handle)\n}\n\nfunction register_renderer(events, OutputArea) {\n function append_mime(data, metadata, element) {\n // create a DOM node to render to\n var toinsert = this.create_output_subarea(\n metadata,\n CLASS_NAME,\n EXEC_MIME_TYPE\n );\n this.keyboard_manager.register_events(toinsert);\n // Render to node\n var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n render(props, toinsert[0]);\n element.append(toinsert);\n return toinsert\n }\n\n events.on('output_added.OutputArea', handle_add_output);\n events.on('output_updated.OutputArea', handle_update_output);\n events.on('clear_output.CodeCell', handle_clear_output);\n events.on('delete.Cell', handle_clear_output);\n events.on('kernel_ready.Kernel', handle_kernel_cleanup);\n\n OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n safe: true,\n index: 0\n });\n}\n\nif (window.Jupyter !== undefined) {\n try {\n var events = require('base/js/events');\n var OutputArea = require('notebook/js/outputarea').OutputArea;\n if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n register_renderer(events, OutputArea);\n }\n } catch(err) {\n }\n}\n",
|
38 |
+
"application/vnd.holoviews_load.v0+json": ""
|
39 |
+
},
|
40 |
+
"metadata": {},
|
41 |
+
"output_type": "display_data"
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"data": {
|
45 |
+
"text/html": [
|
46 |
+
"<style>.bk-root, .bk-root .bk:before, .bk-root .bk:after {\n",
|
47 |
+
" font-family: var(--jp-ui-font-size1);\n",
|
48 |
+
" font-size: var(--jp-ui-font-size1);\n",
|
49 |
+
" color: var(--jp-ui-font-color1);\n",
|
50 |
+
"}\n",
|
51 |
+
"</style>"
|
52 |
+
]
|
53 |
+
},
|
54 |
+
"metadata": {},
|
55 |
+
"output_type": "display_data"
|
56 |
+
}
|
57 |
+
],
|
58 |
+
"source": [
|
59 |
+
"pn.extension('texteditor', template=\"bootstrap\", sizing_mode='stretch_width')\n",
|
60 |
+
"pn.state.template.param.update(\n",
|
61 |
+
" main_max_width=\"690px\",\n",
|
62 |
+
" header_background=\"#F08080\",\n",
|
63 |
+
")"
|
64 |
+
]
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"cell_type": "code",
|
68 |
+
"execution_count": 3,
|
69 |
+
"metadata": {},
|
70 |
+
"outputs": [],
|
71 |
+
"source": [
|
72 |
+
"file_input = pn.widgets.FileInput(width=300)\n",
|
73 |
+
"\n",
|
74 |
+
"openaikey = pn.widgets.PasswordInput(\n",
|
75 |
+
" value=\"\", placeholder=\"Enter your OpenAI API Key here...\", width=300\n",
|
76 |
+
")\n",
|
77 |
+
"prompt = pn.widgets.TextEditor(\n",
|
78 |
+
" value=\"\", placeholder=\"Enter your questions here...\", height=160, toolbar=False\n",
|
79 |
+
")\n",
|
80 |
+
"run_button = pn.widgets.Button(name=\"Run!\")\n",
|
81 |
+
"\n",
|
82 |
+
"select_k = pn.widgets.IntSlider(\n",
|
83 |
+
" name=\"Number of relevant chunks\", start=1, end=5, step=1, value=2\n",
|
84 |
+
")\n",
|
85 |
+
"select_chain_type = pn.widgets.RadioButtonGroup(\n",
|
86 |
+
" name='Chain type', \n",
|
87 |
+
" options=['stuff', 'map_reduce', \"refine\", \"map_rerank\"]\n",
|
88 |
+
")\n",
|
89 |
+
"\n",
|
90 |
+
"widgets = pn.Row(\n",
|
91 |
+
" pn.Column(prompt, run_button, margin=5),\n",
|
92 |
+
" pn.Card(\n",
|
93 |
+
" \"Chain type:\",\n",
|
94 |
+
" pn.Column(select_chain_type, select_k),\n",
|
95 |
+
" title=\"Advanced settings\", margin=10\n",
|
96 |
+
" ), width=600\n",
|
97 |
+
")"
|
98 |
+
]
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"cell_type": "code",
|
102 |
+
"execution_count": 4,
|
103 |
+
"metadata": {},
|
104 |
+
"outputs": [],
|
105 |
+
"source": [
|
106 |
+
"def qa(file, query, chain_type, k):\n",
|
107 |
+
" # load document\n",
|
108 |
+
" loader = PyPDFLoader(file)\n",
|
109 |
+
" documents = loader.load()\n",
|
110 |
+
" # split the documents into chunks\n",
|
111 |
+
" text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)\n",
|
112 |
+
" texts = text_splitter.split_documents(documents)\n",
|
113 |
+
" # select which embeddings we want to use\n",
|
114 |
+
" embeddings = OpenAIEmbeddings()\n",
|
115 |
+
" # create the vectorestore to use as the index\n",
|
116 |
+
" db = Chroma.from_documents(texts, embeddings)\n",
|
117 |
+
" # expose this index in a retriever interface\n",
|
118 |
+
" retriever = db.as_retriever(search_type=\"similarity\", search_kwargs={\"k\": k})\n",
|
119 |
+
" # create a chain to answer questions \n",
|
120 |
+
" qa = RetrievalQA.from_chain_type(\n",
|
121 |
+
" llm=OpenAI(), chain_type=chain_type, retriever=retriever, return_source_documents=True)\n",
|
122 |
+
" result = qa({\"query\": query})\n",
|
123 |
+
" print(result['result'])\n",
|
124 |
+
" return result"
|
125 |
+
]
|
126 |
+
},
|
127 |
+
{
|
128 |
+
"cell_type": "code",
|
129 |
+
"execution_count": 5,
|
130 |
+
"metadata": {},
|
131 |
+
"outputs": [],
|
132 |
+
"source": [
|
133 |
+
"convos = [] # store all panel objects in a list\n",
|
134 |
+
"\n",
|
135 |
+
"def qa_result(_):\n",
|
136 |
+
" os.environ[\"OPENAI_API_KEY\"] = openaikey.value\n",
|
137 |
+
" \n",
|
138 |
+
" # save pdf file to a temp file \n",
|
139 |
+
" if file_input.value is not None:\n",
|
140 |
+
" file_input.save(\"/.cache/temp.pdf\")\n",
|
141 |
+
" \n",
|
142 |
+
" prompt_text = prompt.value\n",
|
143 |
+
" if prompt_text:\n",
|
144 |
+
" result = qa(file=\"/.cache/temp.pdf\", query=prompt_text, chain_type=select_chain_type.value, k=select_k.value)\n",
|
145 |
+
" convos.extend([\n",
|
146 |
+
" pn.Row(\n",
|
147 |
+
" pn.panel(\"\\U0001F60A\", width=10),\n",
|
148 |
+
" prompt_text,\n",
|
149 |
+
" width=600\n",
|
150 |
+
" ),\n",
|
151 |
+
" pn.Row(\n",
|
152 |
+
" pn.panel(\"\\U0001F916\", width=10),\n",
|
153 |
+
" pn.Column(\n",
|
154 |
+
" result[\"result\"],\n",
|
155 |
+
" \"Relevant source text:\",\n",
|
156 |
+
" pn.pane.Markdown('\\n--------------------------------------------------------------------\\n'.join(doc.page_content for doc in result[\"source_documents\"]))\n",
|
157 |
+
" )\n",
|
158 |
+
" )\n",
|
159 |
+
" ])\n",
|
160 |
+
" #return convos\n",
|
161 |
+
" return pn.Column(*convos, margin=15, width=575, min_height=400)"
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "code",
|
166 |
+
"execution_count": 6,
|
167 |
+
"metadata": {},
|
168 |
+
"outputs": [],
|
169 |
+
"source": [
|
170 |
+
"qa_interactive = pn.panel(\n",
|
171 |
+
" pn.bind(qa_result, run_button),\n",
|
172 |
+
" loading_indicator=True,\n",
|
173 |
+
")"
|
174 |
+
]
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"cell_type": "code",
|
178 |
+
"execution_count": 7,
|
179 |
+
"metadata": {},
|
180 |
+
"outputs": [],
|
181 |
+
"source": [
|
182 |
+
"output = pn.WidgetBox('*Output will show up here:*', qa_interactive, width=630, scroll=True)"
|
183 |
+
]
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"cell_type": "code",
|
187 |
+
"execution_count": 9,
|
188 |
+
"metadata": {},
|
189 |
+
"outputs": [
|
190 |
+
{
|
191 |
+
"data": {
|
192 |
+
"application/vnd.jupyter.widget-view+json": {
|
193 |
+
"model_id": "d572f1a081e741cbaca0638d4d0a860e",
|
194 |
+
"version_major": 2,
|
195 |
+
"version_minor": 0
|
196 |
+
},
|
197 |
+
"text/plain": [
|
198 |
+
"BokehModel(combine_events=True, render_bundle={'docs_json': {'c482f79e-56ca-4f46-b90f-25b4af3de8df': {'defs': …"
|
199 |
+
]
|
200 |
+
},
|
201 |
+
"execution_count": 9,
|
202 |
+
"metadata": {},
|
203 |
+
"output_type": "execute_result"
|
204 |
+
}
|
205 |
+
],
|
206 |
+
"source": [
|
207 |
+
"# layout\n",
|
208 |
+
"pn.Column(\n",
|
209 |
+
" pn.pane.Markdown(\"\"\"\n",
|
210 |
+
" ## \\U0001F60A! Question Answering with your PDF file\n",
|
211 |
+
" \n",
|
212 |
+
" 1) Upload a PDF. 2) Enter OpenAI API key. This costs $. Set up billing at [OpenAI](https://platform.openai.com/account). 3) Type a question and click \"Run\".\n",
|
213 |
+
" \n",
|
214 |
+
" \"\"\"),\n",
|
215 |
+
" pn.Row(file_input,openaikey),\n",
|
216 |
+
" output,\n",
|
217 |
+
" widgets\n",
|
218 |
+
"\n",
|
219 |
+
").servable()"
|
220 |
+
]
|
221 |
+
}
|
222 |
+
],
|
223 |
+
"metadata": {
|
224 |
+
"kernelspec": {
|
225 |
+
"display_name": "env1",
|
226 |
+
"language": "python",
|
227 |
+
"name": "python3"
|
228 |
+
},
|
229 |
+
"language_info": {
|
230 |
+
"codemirror_mode": {
|
231 |
+
"name": "ipython",
|
232 |
+
"version": 3
|
233 |
+
},
|
234 |
+
"file_extension": ".py",
|
235 |
+
"mimetype": "text/x-python",
|
236 |
+
"name": "python",
|
237 |
+
"nbconvert_exporter": "python",
|
238 |
+
"pygments_lexer": "ipython3",
|
239 |
+
"version": "3.9.13"
|
240 |
+
},
|
241 |
+
"orig_nbformat": 4
|
242 |
+
},
|
243 |
+
"nbformat": 4,
|
244 |
+
"nbformat_minor": 2
|
245 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
langchain
|
2 |
+
openai
|
3 |
+
chromadb
|
4 |
+
pypdf
|
5 |
+
tiktoken
|
6 |
+
panel
|
7 |
+
notebook
|
8 |
+
jupyter_bokeh
|