DanielIglesias97 commited on
Commit
6aa0bc7
·
1 Parent(s): 6c86e55

We have removed the flask application and replaced it with a

Browse files

streamlit app with the same functionality to be able to execute it
in the HuggingFace space.

Dockerfile CHANGED
@@ -1,5 +1,5 @@
1
  # Use an official Python runtime as a parent image
2
- FROM python:3.9-slim
3
 
4
  RUN apt-get update && \
5
  apt-get install -y git
@@ -27,5 +27,12 @@ RUN pip install --no-cache-dir -r requirements.txt
27
  # Copy the application code into the container
28
  COPY --chown=user . .
29
 
30
- # Run the application
31
- CMD ["python", "server.py"]
 
 
 
 
 
 
 
 
1
  # Use an official Python runtime as a parent image
2
+ FROM python:3.9-slim AS base
3
 
4
  RUN apt-get update && \
5
  apt-get install -y git
 
27
  # Copy the application code into the container
28
  COPY --chown=user . .
29
 
30
+ # Stage: Execute a test for the text search engine.
31
+ FROM base AS debug
32
+
33
+ CMD ["python", "-m", "pdb", "text_search_engine.py"]
34
+
35
+ # Stage: Execute the Streamlit application.
36
+ FROM base AS run
37
+
38
+ CMD ["streamlit", "run", "streamlit_app.py", "--server.port", "7860"]
config.cfg CHANGED
@@ -1,4 +1,6 @@
1
- [DEFAULT]
 
 
2
  embeddings_csv_path = /home/user/app/data/movie_embeddings.csv
3
 
4
  [TEST]
 
1
+ [SERVER]
2
+ host_ip_address = 0.0.0.0
3
+ port_number = 7860
4
  embeddings_csv_path = /home/user/app/data/movie_embeddings.csv
5
 
6
  [TEST]
index.html DELETED
@@ -1,39 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Cool Hello World</title>
7
- <style>
8
- body {
9
- background-color: #2D2D2D;
10
- display: flex;
11
- justify-content: center;
12
- align-items: center;
13
- height: 100vh;
14
- margin: 0;
15
- font-family: 'Arial', sans-serif;
16
- }
17
- .container {
18
- text-align: center;
19
- }
20
- h1 {
21
- color: #C26356;
22
- font-size: 4em;
23
- text-shadow: 2px 2px 4px rgba(0,0,0,0.5);
24
- margin-bottom: 20px;
25
- }
26
- p {
27
- color: white;
28
- font-size: 1.5em;
29
- }
30
- </style>
31
- </head>
32
- <body>
33
- <div class="container">
34
- <h1>Flask server for a text search engine</h1>
35
- <p>This repository contains the code of a Flask server that allows for a search within a dataset of movie reviews.</p>
36
- <p>If you want to check the code, click the Files tab!</p>
37
- </div>
38
- </body>
39
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  datasets==3.2.0
2
- flask==3.1.0
 
3
  numpy==2.0.2
4
  pandas==2.2.3
5
  scikit-learn==1.6.1
6
- git+https://github.com/UKPLab/sentence-transformers.git@e2a0098b0fbe10bf9a140a9b1d4c2a3451f1571f
7
- faiss-cpu==1.9.0.post1
 
1
  datasets==3.2.0
2
+ faiss-cpu==1.9.0.post1
3
+ git+https://github.com/UKPLab/sentence-transformers.git@e2a0098b0fbe10bf9a140a9b1d4c2a3451f1571f
4
  numpy==2.0.2
5
  pandas==2.2.3
6
  scikit-learn==1.6.1
7
+ streamlit==1.42.0
 
server.py DELETED
@@ -1,29 +0,0 @@
1
- import configparser
2
- from flask import Flask, request, render_template
3
- from image_search_engine import ImageSearchEngine
4
- import os
5
-
6
- app = Flask(__name__, template_folder='/home/user/app/views', static_url_path='/static')
7
-
8
- config = configparser.ConfigParser()
9
- config.read('config.cfg')
10
- embeddings_csv_path = config['DEFAULT']['embeddings_csv_path']
11
-
12
- image_search_engine_manager = ImageSearchEngine(embeddings_csv_path)
13
- df, model = image_search_engine_manager.load_data_and_model()
14
- image_search_engine_manager.generate_embeddings(df, model, overwrite=False)
15
-
16
- @app.route('/')
17
- def search():
18
- query = request.args.get('query')
19
- results = []
20
-
21
- if ((query!=None) and len(query.strip())>0):
22
- results = image_search_engine_manager.semantic_search(query, model)
23
-
24
- return render_template("search.html", results=results)
25
-
26
- def main():
27
- app.run(host="0.0.0.0", port="5000", debug=True)
28
-
29
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
static/css/styles.css DELETED
@@ -1,77 +0,0 @@
1
- /* General Styles */
2
- body {
3
- font-family: Arial, sans-serif;
4
- background-color: #f4f4f4;
5
- margin: 0;
6
- padding: 0;
7
- color: #333;
8
- }
9
-
10
- .container {
11
- max-width: 800px;
12
- margin: 50px auto;
13
- padding: 20px;
14
- background-color: #fff;
15
- box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
16
- border-radius: 8px;
17
- }
18
-
19
- h1 {
20
- text-align: center;
21
- color: #444;
22
- margin-bottom: 30px;
23
- }
24
-
25
- hr {
26
- display: block;
27
- margin-top: 0.5em;
28
- margin-bottom: 0.5em;
29
- margin-left: auto;
30
- margin-right: auto;
31
- border-style: inset;
32
- border-width: 1px;
33
- }
34
-
35
- /* Review List Styles */
36
- .review-list {
37
- list-style: none;
38
- padding: 0;
39
- }
40
-
41
- .review-item {
42
- background-color: #f9f9f9;
43
- margin-bottom: 20px;
44
- padding: 20px;
45
- border-radius: 8px;
46
- border: 1px solid #ddd;
47
- transition: transform 0.2s ease, box-shadow 0.2s ease;
48
- }
49
-
50
- .review-item:hover {
51
- transform: translateY(-5px);
52
- box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1);
53
- }
54
-
55
- .review-text {
56
- font-size: 16px;
57
- line-height: 1.6;
58
- margin: 0 0 10px 0;
59
- }
60
-
61
- .review-label {
62
- font-size: 14px;
63
- font-weight: bold;
64
- color: #fff;
65
- background-color: #007bff;
66
- padding: 5px 10px;
67
- border-radius: 4px;
68
- display: inline-block;
69
- }
70
-
71
- .review-label.positive {
72
- background-color: #28a745;
73
- }
74
-
75
- .review-label.negative {
76
- background-color: #dc3545;
77
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
streamlit_app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import configparser
2
+ import streamlit as st
3
+ from text_search_engine import TextSearchEngine
4
+
5
+ config = configparser.ConfigParser()
6
+ config.read('config.cfg')
7
+ embeddings_csv_path = config['SERVER']['embeddings_csv_path']
8
+
9
+ text_search_engine_manager = TextSearchEngine(embeddings_csv_path)
10
+ df, model = text_search_engine_manager.load_data_and_model()
11
+ text_search_engine_manager.generate_embeddings(df, model, overwrite=False)
12
+
13
+ st.title("Text Search Engine")
14
+ text_search = st.text_input("Search movie reviews by query", value="")
15
+
16
+ if (text_search):
17
+ results = text_search_engine_manager.semantic_search(text_search, model)
18
+
19
+ for current_result in results['text'].values:
20
+ st.markdown("%s"%current_result)
21
+ st.divider()
image_search_engine.py → text_search_engine.py RENAMED
@@ -3,14 +3,16 @@ import configparser
3
  from datasets import load_dataset
4
  from sentence_transformers import SentenceTransformer
5
  from sklearn.metrics.pairwise import cosine_similarity
 
6
  import numpy as np
7
  import os
8
  import pandas as pd
9
 
10
- class ImageSearchEngine():
11
 
12
  def __init__(self, embeddings_csv_path):
13
  self.embeddings_csv_path = embeddings_csv_path
 
14
 
15
  def load_data_and_model(self):
16
  # Load a sample dataset (Stanford Movie Review Dataset)
@@ -60,17 +62,17 @@ if __name__ == "__main__":
60
  config = configparser.ConfigParser()
61
  config.read('config.cfg')
62
 
63
- embeddings_csv_path = config['DEFAULT']['embeddings_csv_path']
64
 
65
- image_search_engine_manager = ImageSearchEngine(embeddings_csv_path)
66
 
67
  # Generate and save embeddings (run once)
68
- df, model = image_search_engine_manager.load_data_and_model()
69
 
70
- image_search_engine_manager.generate_embeddings(df, model, overwrite=False)
71
 
72
  # Example search
73
  query = config['TEST']['query']
74
- results = image_search_engine_manager.semantic_search(query, model)
75
 
76
  print('Results -> ', results)
 
3
  from datasets import load_dataset
4
  from sentence_transformers import SentenceTransformer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
+ import torch
7
  import numpy as np
8
  import os
9
  import pandas as pd
10
 
11
+ class TextSearchEngine():
12
 
13
  def __init__(self, embeddings_csv_path):
14
  self.embeddings_csv_path = embeddings_csv_path
15
+ torch.classes.__path__ = []
16
 
17
  def load_data_and_model(self):
18
  # Load a sample dataset (Stanford Movie Review Dataset)
 
62
  config = configparser.ConfigParser()
63
  config.read('config.cfg')
64
 
65
+ embeddings_csv_path = config['SERVER']['embeddings_csv_path']
66
 
67
+ text_search_engine_manager = TextSearchEngine(embeddings_csv_path)
68
 
69
  # Generate and save embeddings (run once)
70
+ df, model = text_search_engine_manager.load_data_and_model()
71
 
72
+ text_search_engine_manager.generate_embeddings(df, model, overwrite=False)
73
 
74
  # Example search
75
  query = config['TEST']['query']
76
+ results = text_search_engine_manager.semantic_search(query, model)
77
 
78
  print('Results -> ', results)
views/search.html DELETED
@@ -1,26 +0,0 @@
1
- <!DOCTYPE html>
2
- <html lang="en">
3
- <head>
4
- <meta charset="UTF-8">
5
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Movie Reviews</title>
7
- <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='css/styles.css') }}">
8
- </head>
9
- <body>
10
- <form action="/">
11
- <input type="text" placeholder="Search.." name="query">
12
- <button type="submit">Submit</button>
13
- </form>
14
- <div class="container">
15
- <h1>Movie Reviews</h1>
16
- <ul class="review-list">
17
- {% if results|length > 0 %}
18
- {% for item in results['text'].values %}
19
- <li>{{ item }}</li>
20
- <hr>
21
- {% endfor %}
22
- {% endif %}
23
- </ul>
24
- </div>
25
- </body>
26
- </html>