zenith04 commited on
Commit
6bc88c9
·
verified ·
1 Parent(s): 6a064e7

Upload 75 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .dockerignore +9 -0
  2. .gitattributes +4 -0
  3. .gitignore +181 -0
  4. Dockerfile +26 -0
  5. LICENSE +21 -0
  6. app.py +34 -0
  7. chromadb/chroma.sqlite3 +3 -0
  8. chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/data_level0.bin +3 -0
  9. chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/header.bin +0 -0
  10. chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/index_metadata.pickle +3 -0
  11. chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/length.bin +0 -0
  12. chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/link_lists.bin +0 -0
  13. constraints.txt +6 -0
  14. data/anime_data_7490.csv +0 -0
  15. data/anime_data_7490.txt +0 -0
  16. predict_trial.py +21 -0
  17. requirements.txt +44 -0
  18. research/trials.ipynb +616 -0
  19. setup.py +48 -0
  20. src/recommendationSystem.egg-info/PKG-INFO +98 -0
  21. src/recommendationSystem.egg-info/SOURCES.txt +23 -0
  22. src/recommendationSystem.egg-info/dependency_links.txt +1 -0
  23. src/recommendationSystem.egg-info/top_level.txt +1 -0
  24. src/recommendationSystem/__init__.py +0 -0
  25. src/recommendationSystem/__pycache__/__init__.cpython-311.pyc +0 -0
  26. src/recommendationSystem/__pycache__/__init__.cpython-313.pyc +0 -0
  27. src/recommendationSystem/chatbot/__init__.py +0 -0
  28. src/recommendationSystem/chatbot/__pycache__/__init__.cpython-311.pyc +0 -0
  29. src/recommendationSystem/chatbot/__pycache__/__init__.cpython-313.pyc +0 -0
  30. src/recommendationSystem/chatbot/client_module/__init__.py +0 -0
  31. src/recommendationSystem/chatbot/client_module/__pycache__/__init__.cpython-311.pyc +0 -0
  32. src/recommendationSystem/chatbot/client_module/__pycache__/__init__.cpython-313.pyc +0 -0
  33. src/recommendationSystem/chatbot/client_module/__pycache__/utils.cpython-311.pyc +0 -0
  34. src/recommendationSystem/chatbot/client_module/__pycache__/utils.cpython-313.pyc +0 -0
  35. src/recommendationSystem/chatbot/client_module/utils.py +56 -0
  36. src/recommendationSystem/chatbot/server_modules/__init__.py +0 -0
  37. src/recommendationSystem/chatbot/server_modules/__pycache__/__init__.cpython-311.pyc +0 -0
  38. src/recommendationSystem/chatbot/server_modules/__pycache__/__init__.cpython-313.pyc +0 -0
  39. src/recommendationSystem/chatbot/server_modules/__pycache__/llm.cpython-311.pyc +0 -0
  40. src/recommendationSystem/chatbot/server_modules/__pycache__/llm.cpython-313.pyc +0 -0
  41. src/recommendationSystem/chatbot/server_modules/__pycache__/load_vector_store.cpython-311.pyc +0 -0
  42. src/recommendationSystem/chatbot/server_modules/__pycache__/load_vector_store.cpython-313.pyc +0 -0
  43. src/recommendationSystem/chatbot/server_modules/__pycache__/query_handler.cpython-311.pyc +0 -0
  44. src/recommendationSystem/chatbot/server_modules/__pycache__/query_handler.cpython-313.pyc +0 -0
  45. src/recommendationSystem/chatbot/server_modules/llm.py +30 -0
  46. src/recommendationSystem/chatbot/server_modules/load_vector_store.py +19 -0
  47. src/recommendationSystem/chatbot/server_modules/query_handler.py +9 -0
  48. src/recommendationSystem/components/__init__.py +0 -0
  49. src/recommendationSystem/components/__pycache__/__init__.cpython-311.pyc +0 -0
  50. src/recommendationSystem/components/__pycache__/__init__.cpython-313.pyc +0 -0
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .git
2
+ *.log
3
+ *.pyc
4
+ __pycache__/
5
+ template.py
6
+ rsys/
7
+ logs/
8
+ artifact/
9
+ try.py
.gitattributes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ chromadb_anime_data_7490_e5largev2_1.5K_batch filter=lfs diff=lfs merge=lfs -text
2
+ chromadb/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
3
+ chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/data_level0.bin filter=lfs diff=lfs merge=lfs -text
4
+ chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/index_metadata.pickle filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+
176
+ # project files
177
+ template.py
178
+ rsys/
179
+ logs/
180
+ artifact/
181
+ try.py
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use official Python image
2
+ FROM python:3.13-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Copy all files to the containedocker rmi recommendation-system
8
+ COPY . .
9
+
10
+ # Install dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt -c constraints.txt
12
+
13
+ # Optional: install as a local package (important!)
14
+ RUN pip install -e .
15
+
16
+ # Expose Streamlit default port
17
+ EXPOSE 8501
18
+
19
+ # Run your app in both FastAPI and Streamlit
20
+ #CMD ["python", "run_both.py", "run"]
21
+
22
+
23
+ # Run Streamlit app
24
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
25
+
26
+
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Satkar Sarvankar
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import pickle
4
+
5
+ #------------------------------------RAG BASED CHATBOT ---------------------------------------
6
+
7
+ import sys
8
+ import os
9
+ sys.path.append(os.path.abspath("src"))
10
+
11
+ from recommendationSystem.chatbot.client_module.utils import chatbot
12
+
13
+ chatbot()
14
+ #render_history_download()
15
+
16
+ #----------------------------------- RECOMMEDATION SYSTEM -----------------------------------------
17
+
18
+ from utils import fetch_transformed_data, anime_info
19
+
20
+ st.title("Anime Recommender System")
21
+
22
+ data_path, matrix_path = fetch_transformed_data()
23
+
24
+ anime_data = pd.read_csv(data_path)
25
+ similarity_matrix = pickle.load(open(file=matrix_path,mode='rb'))
26
+
27
+ select_anime_name = st.selectbox(
28
+ "Choose Anime Name : ",
29
+ anime_data['title'].values,
30
+ index=None,
31
+ placeholder="Select the anime for recommendation..."
32
+ )
33
+
34
+ anime_info(anime_name=select_anime_name,anime_data=anime_data,similarity_matrix=similarity_matrix)
chromadb/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d162cc7bc68f7c91bbf4dfe6512879f4e271945a716693ba632605e683a77d
3
+ size 98844672
chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2715e5c7896dcfb5a4f87f33d40a203783d63405f7be69010fdf6acf706cffa1
3
+ size 42360000
chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/header.bin ADDED
Binary file (100 Bytes). View file
 
chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f9f0724d8d13813a585a4381c5ff5e1068a9759b89f9a92ac5873b9d415551d
3
+ size 372160
chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/length.bin ADDED
Binary file (40 kB). View file
 
chromadb/d51dabe8-c547-49cd-8dde-bc9590ea4b56/link_lists.bin ADDED
Binary file (69.6 kB). View file
 
constraints.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Not required but getting installed due to langchain & Huggingface
2
+ tensorflow==0.0.0
3
+ tensorflow-intel==0.0.0
4
+ keras==0.0.0
5
+ ml-dtypes==0.0.0
6
+ tensorboard==0.0.0
data/anime_data_7490.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/anime_data_7490.txt ADDED
The diff for this file is too large to render. See raw diff
 
predict_trial.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ #import pandas as pd
3
+ #import pickle
4
+ import os
5
+
6
+ from recommendationSystem.components.data_ingestion import DataIngestion
7
+ from recommendationSystem.components.data_transformation import DataTransformation
8
+ from recommendationSystem.utils.common import CustomException
9
+
10
+ class return_data:
11
+ def __init__(self):
12
+ pass
13
+
14
+ def predict(self):
15
+ try:
16
+ obj = DataIngestion().initiate_data_ingestion()
17
+ matrix_path, data_path = DataTransformation().initiate_data_transformation_obj(obj)
18
+ return matrix_path,data_path
19
+
20
+ except Exception as e:
21
+ raise CustomException(e,sys)
requirements.txt ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ numpy
2
+ pandas
3
+ matplotlib
4
+ seaborn
5
+ scikit-learn
6
+ nltk
7
+ streamlit
8
+ ipykernel
9
+ docker
10
+ dill
11
+
12
+ # chatbot requirements
13
+
14
+ # Web Framework
15
+ fastapi
16
+ uvicorn[standard]
17
+
18
+ # LangChain & Ecosystem
19
+ langchain
20
+ langchain-community
21
+ langchain-core
22
+ langchain-groq
23
+ langchain-huggingface
24
+
25
+ # Vectorstore
26
+ chromadb
27
+
28
+ protobuf==3.20.3
29
+
30
+ # Embeddings
31
+ sentence-transformers
32
+
33
+ # Environment Variables
34
+ python-dotenv
35
+
36
+ # Typing & Utilities
37
+ pydantic
38
+ requests
39
+
40
+ # Logging (optional but recommended)
41
+ #loguru
42
+ python-multipart
43
+
44
+ #-e.
research/trials.ipynb ADDED
@@ -0,0 +1,616 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "253d3df7",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import pandas as pd\n",
11
+ "import numpy as np\n",
12
+ "import os"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 74,
18
+ "id": "66bcd730",
19
+ "metadata": {},
20
+ "outputs": [
21
+ {
22
+ "data": {
23
+ "text/plain": [
24
+ "'C:\\\\Users\\\\satka\\\\OneDrive\\\\Desktop\\\\recommendation-system\\\\data'"
25
+ ]
26
+ },
27
+ "execution_count": 74,
28
+ "metadata": {},
29
+ "output_type": "execute_result"
30
+ }
31
+ ],
32
+ "source": [
33
+ "%pwd"
34
+ ]
35
+ },
36
+ {
37
+ "cell_type": "code",
38
+ "execution_count": 2,
39
+ "id": "b4ed7319",
40
+ "metadata": {},
41
+ "outputs": [],
42
+ "source": [
43
+ "os.chdir(\"C:/Users/satka/OneDrive/Desktop/recommendation-system/data\")"
44
+ ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 3,
49
+ "id": "0c897fa0",
50
+ "metadata": {},
51
+ "outputs": [],
52
+ "source": [
53
+ "df = pd.read_csv(\"anime_data_24.csv\")\n",
54
+ "df_links = pd.read_csv('anime_links.csv')"
55
+ ]
56
+ },
57
+ {
58
+ "cell_type": "code",
59
+ "execution_count": 4,
60
+ "id": "ec96f943",
61
+ "metadata": {},
62
+ "outputs": [],
63
+ "source": [
64
+ "anime = df.merge(df_links,on='name')"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 5,
70
+ "id": "6500fbb8",
71
+ "metadata": {},
72
+ "outputs": [],
73
+ "source": [
74
+ "anime = anime[['name','sypnopsis','image','type','episodes','status','studios','source','genres','demographic','links']]"
75
+ ]
76
+ },
77
+ {
78
+ "cell_type": "code",
79
+ "execution_count": 56,
80
+ "id": "c28abf0f",
81
+ "metadata": {},
82
+ "outputs": [
83
+ {
84
+ "data": {
85
+ "text/plain": [
86
+ "'Two years after the Summit War in which Straw Hat pirate Luffy lost his brother Ace, the story takes place on the Sabaody Archipelago. The protagonist, a young girl who has a strong admiration for Nami, sets off on a small adventure. This is an ensemble drama that focuses on people who do not \"pursue\" ONE PIECE, depicting the reunion of the Straw Hat Pirates from their perspective.\\n\\n(Source: Official site, translated)'"
87
+ ]
88
+ },
89
+ "execution_count": 56,
90
+ "metadata": {},
91
+ "output_type": "execute_result"
92
+ }
93
+ ],
94
+ "source": [
95
+ "'\\n'.join(anime[anime.name ==\"One Piece Fan Letter\"].sypnopsis.to_list())"
96
+ ]
97
+ },
98
+ {
99
+ "cell_type": "code",
100
+ "execution_count": 79,
101
+ "id": "e0f37141",
102
+ "metadata": {},
103
+ "outputs": [],
104
+ "source": [
105
+ "#anime = anime.iloc[0:8000]"
106
+ ]
107
+ },
108
+ {
109
+ "cell_type": "code",
110
+ "execution_count": 80,
111
+ "id": "196a74c3",
112
+ "metadata": {},
113
+ "outputs": [
114
+ {
115
+ "data": {
116
+ "text/plain": [
117
+ "Index(['name', 'sypnopsis', 'image', 'type', 'episodes', 'status', 'studios',\n",
118
+ " 'source', 'genres', 'demographic', 'links'],\n",
119
+ " dtype='object')"
120
+ ]
121
+ },
122
+ "execution_count": 80,
123
+ "metadata": {},
124
+ "output_type": "execute_result"
125
+ }
126
+ ],
127
+ "source": [
128
+ "anime.columns"
129
+ ]
130
+ },
131
+ {
132
+ "cell_type": "code",
133
+ "execution_count": 81,
134
+ "id": "c895f232",
135
+ "metadata": {},
136
+ "outputs": [
137
+ {
138
+ "name": "stdout",
139
+ "output_type": "stream",
140
+ "text": [
141
+ "<class 'pandas.core.frame.DataFrame'>\n",
142
+ "RangeIndex: 13501 entries, 0 to 13500\n",
143
+ "Data columns (total 11 columns):\n",
144
+ " # Column Non-Null Count Dtype \n",
145
+ "--- ------ -------------- ----- \n",
146
+ " 0 name 13501 non-null object\n",
147
+ " 1 sypnopsis 13499 non-null object\n",
148
+ " 2 image 13492 non-null object\n",
149
+ " 3 type 13501 non-null object\n",
150
+ " 4 episodes 13501 non-null object\n",
151
+ " 5 status 13501 non-null object\n",
152
+ " 6 studios 13501 non-null object\n",
153
+ " 7 source 13501 non-null object\n",
154
+ " 8 genres 11823 non-null object\n",
155
+ " 9 demographic 13466 non-null object\n",
156
+ " 10 links 13501 non-null object\n",
157
+ "dtypes: object(11)\n",
158
+ "memory usage: 1.1+ MB\n"
159
+ ]
160
+ }
161
+ ],
162
+ "source": [
163
+ "anime.info()"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": 82,
169
+ "id": "dc7662fd",
170
+ "metadata": {},
171
+ "outputs": [],
172
+ "source": [
173
+ "anime.dropna(inplace=True)"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": 83,
179
+ "id": "edc035da",
180
+ "metadata": {},
181
+ "outputs": [],
182
+ "source": [
183
+ "anime['sypnopsis_length'] = [len(i) for i in anime.sypnopsis]"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "execution_count": 84,
189
+ "id": "99183feb",
190
+ "metadata": {},
191
+ "outputs": [],
192
+ "source": [
193
+ "anime = anime[anime['sypnopsis_length'] > 300]"
194
+ ]
195
+ },
196
+ {
197
+ "cell_type": "code",
198
+ "execution_count": 85,
199
+ "id": "a932d424",
200
+ "metadata": {},
201
+ "outputs": [],
202
+ "source": [
203
+ "anime['tags'] = anime['sypnopsis']+\" \" + anime['type']+\" \" + anime['episodes']+\" \" + anime['status'] +\" \"+ anime['studios'] +\" \"+ anime['source']+\" \" + anime['genres']+\" \" + anime['demographic']"
204
+ ]
205
+ },
206
+ {
207
+ "cell_type": "code",
208
+ "execution_count": 117,
209
+ "id": "f76e7a39",
210
+ "metadata": {},
211
+ "outputs": [],
212
+ "source": [
213
+ "anime_1 = anime.copy()\n",
214
+ "anime_1 = anime_1[['image','name','tags','links']]"
215
+ ]
216
+ },
217
+ {
218
+ "cell_type": "code",
219
+ "execution_count": 118,
220
+ "id": "8ab84681",
221
+ "metadata": {},
222
+ "outputs": [],
223
+ "source": [
224
+ "anime_1 = anime_1.reset_index(drop=True)\n",
225
+ "anime_1 = anime_1.rename({'name':'title'},axis=1)"
226
+ ]
227
+ },
228
+ {
229
+ "cell_type": "code",
230
+ "execution_count": null,
231
+ "id": "521b43bf",
232
+ "metadata": {},
233
+ "outputs": [],
234
+ "source": [
235
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
236
+ "\n",
237
+ "cv = CountVectorizer(max_features=5000,stop_words='english')"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "execution_count": 120,
243
+ "id": "bb0e6617",
244
+ "metadata": {},
245
+ "outputs": [],
246
+ "source": [
247
+ "from nltk.stem.porter import PorterStemmer\n",
248
+ "ps = PorterStemmer()"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": null,
254
+ "id": "92d8d53e",
255
+ "metadata": {},
256
+ "outputs": [],
257
+ "source": [
258
+ "from sklearn.metrics.pairwise import cosine_similarity"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": 126,
264
+ "id": "8c101543",
265
+ "metadata": {},
266
+ "outputs": [],
267
+ "source": [
268
+ "def removing_blank_lines(text):\n",
269
+ " return text.replace('\\n',\" \")\n",
270
+ "\n",
271
+ "def removing_pre_suff_ix(text):\n",
272
+ " y = []\n",
273
+ " \n",
274
+ " for i in text.split():\n",
275
+ " y.append(ps.stem(i))\n",
276
+ " \n",
277
+ " return \" \".join(y)\n",
278
+ "\n",
279
+ "def converting_into_vectors(text):\n",
280
+ " vec = cv.fit_transform(text).toarray()\n",
281
+ " return vec\n",
282
+ "\n",
283
+ "def finding_similarity(vec):\n",
284
+ " similarity = cosine_similarity(vec)\n",
285
+ " return similarity"
286
+ ]
287
+ },
288
+ {
289
+ "cell_type": "code",
290
+ "execution_count": 127,
291
+ "id": "01e8e16b",
292
+ "metadata": {},
293
+ "outputs": [],
294
+ "source": [
295
+ "anime_1.tags = anime_1.tags.apply(removing_blank_lines)\n",
296
+ "anime_1.tags = anime_1.tags.apply(removing_blank_lines)\n",
297
+ "vectors = converting_into_vectors(anime_1.tags)\n",
298
+ "similarity = finding_similarity(vectors)"
299
+ ]
300
+ },
301
+ {
302
+ "cell_type": "code",
303
+ "execution_count": 131,
304
+ "id": "c7b531b9",
305
+ "metadata": {},
306
+ "outputs": [
307
+ {
308
+ "data": {
309
+ "text/html": [
310
+ "<div>\n",
311
+ "<style scoped>\n",
312
+ " .dataframe tbody tr th:only-of-type {\n",
313
+ " vertical-align: middle;\n",
314
+ " }\n",
315
+ "\n",
316
+ " .dataframe tbody tr th {\n",
317
+ " vertical-align: top;\n",
318
+ " }\n",
319
+ "\n",
320
+ " .dataframe thead th {\n",
321
+ " text-align: right;\n",
322
+ " }\n",
323
+ "</style>\n",
324
+ "<table border=\"1\" class=\"dataframe\">\n",
325
+ " <thead>\n",
326
+ " <tr style=\"text-align: right;\">\n",
327
+ " <th></th>\n",
328
+ " <th>image</th>\n",
329
+ " <th>title</th>\n",
330
+ " <th>tags</th>\n",
331
+ " <th>links</th>\n",
332
+ " </tr>\n",
333
+ " </thead>\n",
334
+ " <tbody>\n",
335
+ " <tr>\n",
336
+ " <th>30</th>\n",
337
+ " <td>https://cdn.myanimelist.net/images/anime/12/89...</td>\n",
338
+ " <td>Gintama.: Shirogane no Tamashii-hen</td>\n",
339
+ " <td>after the fierc battl on rakuyou, the untold p...</td>\n",
340
+ " <td>https://myanimelist.net/anime/36838/Gintama__S...</td>\n",
341
+ " </tr>\n",
342
+ " <tr>\n",
343
+ " <th>31</th>\n",
344
+ " <td>https://cdn.myanimelist.net/images/anime/1170/...</td>\n",
345
+ " <td>Vinland Saga Season 2</td>\n",
346
+ " <td>after hi father' death and the destruct of hi ...</td>\n",
347
+ " <td>https://myanimelist.net/anime/49387/Vinland_Sa...</td>\n",
348
+ " </tr>\n",
349
+ " <tr>\n",
350
+ " <th>32</th>\n",
351
+ " <td>https://cdn.myanimelist.net/images/anime/1741/...</td>\n",
352
+ " <td>Monogatari Series: Off &amp; Monster Season</td>\n",
353
+ " <td>koyomi araragi spent hi last year of high scho...</td>\n",
354
+ " <td>https://myanimelist.net/anime/57864/Monogatari...</td>\n",
355
+ " </tr>\n",
356
+ " <tr>\n",
357
+ " <th>33</th>\n",
358
+ " <td>https://cdn.myanimelist.net/images/anime/1792/...</td>\n",
359
+ " <td>Jujutsu Kaisen 2nd Season</td>\n",
360
+ " <td>the year is 2006, and the hall of tokyo prefec...</td>\n",
361
+ " <td>https://myanimelist.net/anime/51009/Jujutsu_Ka...</td>\n",
362
+ " </tr>\n",
363
+ " <tr>\n",
364
+ " <th>34</th>\n",
365
+ " <td>https://cdn.myanimelist.net/images/anime/1918/...</td>\n",
366
+ " <td>Mob Psycho 100 II</td>\n",
367
+ " <td>shigeo \"mob\" kageyama is now matur and underst...</td>\n",
368
+ " <td>https://myanimelist.net/anime/37510/Mob_Psycho...</td>\n",
369
+ " </tr>\n",
370
+ " <tr>\n",
371
+ " <th>35</th>\n",
372
+ " <td>https://cdn.myanimelist.net/images/anime/1643/...</td>\n",
373
+ " <td>Boku no Kokoro no Yabai Yatsu 2nd Season</td>\n",
374
+ " <td>after an event winter break, kyoutar ichikawa ...</td>\n",
375
+ " <td>https://myanimelist.net/anime/55690/Boku_no_Ko...</td>\n",
376
+ " </tr>\n",
377
+ " <tr>\n",
378
+ " <th>36</th>\n",
379
+ " <td>https://cdn.myanimelist.net/images/anime/1000/...</td>\n",
380
+ " <td>Shingeki no Kyojin: The Final Season</td>\n",
381
+ " <td>gabi braun and falco grice have been train the...</td>\n",
382
+ " <td>https://myanimelist.net/anime/40028/Shingeki_n...</td>\n",
383
+ " </tr>\n",
384
+ " <tr>\n",
385
+ " <th>37</th>\n",
386
+ " <td>https://cdn.myanimelist.net/images/anime/1084/...</td>\n",
387
+ " <td>Kizumonogatari III: Reiketsu-hen</td>\n",
388
+ " <td>after help reviv the legendari vampir kiss-sho...</td>\n",
389
+ " <td>https://myanimelist.net/anime/31758/Kizumonoga...</td>\n",
390
+ " </tr>\n",
391
+ " <tr>\n",
392
+ " <th>38</th>\n",
393
+ " <td>https://cdn.myanimelist.net/images/anime/1448/...</td>\n",
394
+ " <td>Bocchi the Rock!</td>\n",
395
+ " <td>yearn to make friend and perform live with a b...</td>\n",
396
+ " <td>https://myanimelist.net/anime/47917/Bocchi_the...</td>\n",
397
+ " </tr>\n",
398
+ " <tr>\n",
399
+ " <th>39</th>\n",
400
+ " <td>https://cdn.myanimelist.net/images/anime/7/819...</td>\n",
401
+ " <td>Haikyuu!! Karasuno Koukou vs. Shiratorizawa Ga...</td>\n",
402
+ " <td>after the victori against aoba jousai high, ka...</td>\n",
403
+ " <td>https://myanimelist.net/anime/32935/Haikyuu_Ka...</td>\n",
404
+ " </tr>\n",
405
+ " </tbody>\n",
406
+ "</table>\n",
407
+ "</div>"
408
+ ],
409
+ "text/plain": [
410
+ " image \\\n",
411
+ "30 https://cdn.myanimelist.net/images/anime/12/89... \n",
412
+ "31 https://cdn.myanimelist.net/images/anime/1170/... \n",
413
+ "32 https://cdn.myanimelist.net/images/anime/1741/... \n",
414
+ "33 https://cdn.myanimelist.net/images/anime/1792/... \n",
415
+ "34 https://cdn.myanimelist.net/images/anime/1918/... \n",
416
+ "35 https://cdn.myanimelist.net/images/anime/1643/... \n",
417
+ "36 https://cdn.myanimelist.net/images/anime/1000/... \n",
418
+ "37 https://cdn.myanimelist.net/images/anime/1084/... \n",
419
+ "38 https://cdn.myanimelist.net/images/anime/1448/... \n",
420
+ "39 https://cdn.myanimelist.net/images/anime/7/819... \n",
421
+ "\n",
422
+ " title \\\n",
423
+ "30 Gintama.: Shirogane no Tamashii-hen \n",
424
+ "31 Vinland Saga Season 2 \n",
425
+ "32 Monogatari Series: Off & Monster Season \n",
426
+ "33 Jujutsu Kaisen 2nd Season \n",
427
+ "34 Mob Psycho 100 II \n",
428
+ "35 Boku no Kokoro no Yabai Yatsu 2nd Season \n",
429
+ "36 Shingeki no Kyojin: The Final Season \n",
430
+ "37 Kizumonogatari III: Reiketsu-hen \n",
431
+ "38 Bocchi the Rock! \n",
432
+ "39 Haikyuu!! Karasuno Koukou vs. Shiratorizawa Ga... \n",
433
+ "\n",
434
+ " tags \\\n",
435
+ "30 after the fierc battl on rakuyou, the untold p... \n",
436
+ "31 after hi father' death and the destruct of hi ... \n",
437
+ "32 koyomi araragi spent hi last year of high scho... \n",
438
+ "33 the year is 2006, and the hall of tokyo prefec... \n",
439
+ "34 shigeo \"mob\" kageyama is now matur and underst... \n",
440
+ "35 after an event winter break, kyoutar ichikawa ... \n",
441
+ "36 gabi braun and falco grice have been train the... \n",
442
+ "37 after help reviv the legendari vampir kiss-sho... \n",
443
+ "38 yearn to make friend and perform live with a b... \n",
444
+ "39 after the victori against aoba jousai high, ka... \n",
445
+ "\n",
446
+ " links \n",
447
+ "30 https://myanimelist.net/anime/36838/Gintama__S... \n",
448
+ "31 https://myanimelist.net/anime/49387/Vinland_Sa... \n",
449
+ "32 https://myanimelist.net/anime/57864/Monogatari... \n",
450
+ "33 https://myanimelist.net/anime/51009/Jujutsu_Ka... \n",
451
+ "34 https://myanimelist.net/anime/37510/Mob_Psycho... \n",
452
+ "35 https://myanimelist.net/anime/55690/Boku_no_Ko... \n",
453
+ "36 https://myanimelist.net/anime/40028/Shingeki_n... \n",
454
+ "37 https://myanimelist.net/anime/31758/Kizumonoga... \n",
455
+ "38 https://myanimelist.net/anime/47917/Bocchi_the... \n",
456
+ "39 https://myanimelist.net/anime/32935/Haikyuu_Ka... "
457
+ ]
458
+ },
459
+ "execution_count": 131,
460
+ "metadata": {},
461
+ "output_type": "execute_result"
462
+ }
463
+ ],
464
+ "source": [
465
+ "anime_1[30:40]"
466
+ ]
467
+ },
468
+ {
469
+ "cell_type": "code",
470
+ "execution_count": null,
471
+ "id": "163acbb5",
472
+ "metadata": {},
473
+ "outputs": [],
474
+ "source": [
475
+ "#from sklearn.metrics.pairwise import cosine_similarity\n",
476
+ "#similarity = cosine_similarity(vectors)"
477
+ ]
478
+ },
479
+ {
480
+ "cell_type": "code",
481
+ "execution_count": 141,
482
+ "id": "404a1f15",
483
+ "metadata": {},
484
+ "outputs": [],
485
+ "source": [
486
+ "def recommend(anime):\n",
487
+ " anime_index = anime_1[anime_1['title']== anime].index[0]\n",
488
+ " distances = np.around(similarity[anime_index],2)\n",
489
+ " anime_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[1:8]\n",
490
+ "\n",
491
+ " for i in anime_list:\n",
492
+ " #print(' index no. ', '|', ' title ', '|', ' similarity score ')\n",
493
+ " print(i[0], '|', anime_1.iloc[i[0]].title, '|', i[1])\n",
494
+ " print(anime_1.iloc[i[0]].links, '\\n')\n",
495
+ " \n"
496
+ ]
497
+ },
498
+ {
499
+ "cell_type": "code",
500
+ "execution_count": 142,
501
+ "id": "bfa64fd5",
502
+ "metadata": {},
503
+ "outputs": [
504
+ {
505
+ "name": "stdout",
506
+ "output_type": "stream",
507
+ "text": [
508
+ "3720 | Radiant | 0.28\n",
509
+ "https://myanimelist.net/anime/37202/Radiant \n",
510
+ "\n",
511
+ "6790 | Dokyuu Hentai HxEros | 0.28\n",
512
+ "https://myanimelist.net/anime/40623/Dokyuu_Hentai_HxEros \n",
513
+ "\n",
514
+ "100 | Jujutsu Kaisen | 0.27\n",
515
+ "https://myanimelist.net/anime/40748/Jujutsu_Kaisen \n",
516
+ "\n",
517
+ "169 | Jujutsu Kaisen 0 Movie | 0.25\n",
518
+ "https://myanimelist.net/anime/48561/Jujutsu_Kaisen_0_Movie \n",
519
+ "\n",
520
+ "666 | Dead Dead Demons Dededede Destruction (OVA) | 0.25\n",
521
+ "https://myanimelist.net/anime/58883/Dead_Dead_Demons_Dededede_Destruction_OVA \n",
522
+ "\n",
523
+ "1651 | Dead Dead Demons Dededede Destruction | 0.25\n",
524
+ "https://myanimelist.net/anime/51358/Dead_Dead_Demons_Dededede_Destruction \n",
525
+ "\n",
526
+ "2390 | True Tears | 0.25\n",
527
+ "https://myanimelist.net/anime/2129/True_Tears \n",
528
+ "\n"
529
+ ]
530
+ }
531
+ ],
532
+ "source": [
533
+ "recommend('Jujutsu Kaisen 2nd Season')"
534
+ ]
535
+ },
536
+ {
537
+ "cell_type": "code",
538
+ "execution_count": 100,
539
+ "id": "791c8dca",
540
+ "metadata": {},
541
+ "outputs": [
542
+ {
543
+ "data": {
544
+ "text/plain": [
545
+ "[(720, np.float64(0.4583333333333335)),\n",
546
+ " (123, np.float64(0.37709985557577297)),\n",
547
+ " (42, np.float64(0.32076651393589245)),\n",
548
+ " (26, np.float64(0.3051285766293647)),\n",
549
+ " (110, np.float64(0.29120520167670094))]"
550
+ ]
551
+ },
552
+ "execution_count": 100,
553
+ "metadata": {},
554
+ "output_type": "execute_result"
555
+ }
556
+ ],
557
+ "source": [
558
+ "sorted(list(enumerate(similarity[anime_1[anime_1['title']== 'Shingeki no Kyojin Season 3 Part 2'].index[0]])),reverse=True,key=lambda x:x[1])[1:6]"
559
+ ]
560
+ },
561
+ {
562
+ "cell_type": "code",
563
+ "execution_count": 101,
564
+ "id": "1564ee97",
565
+ "metadata": {},
566
+ "outputs": [
567
+ {
568
+ "data": {
569
+ "text/plain": [
570
+ "image https://cdn.myanimelist.net/images/anime/1710/...\n",
571
+ "title Shingeki no Kyojin Season 2 Movie: Kakusei no ...\n",
572
+ "tags eren yeager and other of the 104th train corp ...\n",
573
+ "links https://myanimelist.net/anime/36702/Shingeki_n...\n",
574
+ "Name: 720, dtype: object"
575
+ ]
576
+ },
577
+ "execution_count": 101,
578
+ "metadata": {},
579
+ "output_type": "execute_result"
580
+ }
581
+ ],
582
+ "source": [
583
+ "anime_1.iloc[720]"
584
+ ]
585
+ },
586
+ {
587
+ "cell_type": "code",
588
+ "execution_count": null,
589
+ "id": "438daaf5",
590
+ "metadata": {},
591
+ "outputs": [],
592
+ "source": []
593
+ }
594
+ ],
595
+ "metadata": {
596
+ "kernelspec": {
597
+ "display_name": "Python 3",
598
+ "language": "python",
599
+ "name": "python3"
600
+ },
601
+ "language_info": {
602
+ "codemirror_mode": {
603
+ "name": "ipython",
604
+ "version": 3
605
+ },
606
+ "file_extension": ".py",
607
+ "mimetype": "text/x-python",
608
+ "name": "python",
609
+ "nbconvert_exporter": "python",
610
+ "pygments_lexer": "ipython3",
611
+ "version": "3.13.2"
612
+ }
613
+ },
614
+ "nbformat": 4,
615
+ "nbformat_minor": 5
616
+ }
setup.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Importing Libraries
2
+ from setuptools import find_packages, setup
3
+ #from typing import List
4
+
5
+
6
+ # Decription of the local package
7
+ with open("README.md","r",encoding="utf-8") as f:
8
+ long_description = f.read()
9
+
10
+ # Define metadata variables
11
+ __version__ = '0.0.1'
12
+ src_name = 'recommendation-system'
13
+ author_name = 'Satkar'
14
+ author_user_name = 'Zenith40'
15
+ src_repo = 'recommendationSystem' # name of folder inside the src
16
+ author_email = '[email protected]'
17
+
18
+ # Function to read requirements.txt file
19
+ '''HYPEN_E_DOT = '-e.'
20
+ def get_requirements(file_path:str) -> List[str]:
21
+ requirements = []
22
+
23
+ with open(file_path) as file_obj:
24
+ requirements = file_obj.readlines()
25
+ requirements = [req.replace("\n","") for req in requirements]
26
+
27
+ if HYPEN_E_DOT in requirements:
28
+ requirements.remove(HYPEN_E_DOT)
29
+ '''
30
+
31
+
32
+ # Package Information
33
+ setup(
34
+ name = src_repo,
35
+ version = __version__,
36
+ author = author_name,
37
+ author_email = '[email protected]',
38
+ description = 'Recommendation system + RAG Chatbot',
39
+ long_description=long_description,
40
+ long_description_content_type='text/markdown',
41
+ url = f"https://github.com/{author_user_name}/{src_name}",
42
+ project_urls={
43
+ "Bug Tracker" : f"https://github.com/{author_user_name}/{src_name}/issues"
44
+ },
45
+ package_dir={'':'src'},
46
+ packages = find_packages(where='src'),
47
+ #install_requires = get_requirements('requirements.txt')
48
+ )
src/recommendationSystem.egg-info/PKG-INFO ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: recommendationSystem
3
+ Version: 0.0.1
4
+ Summary: Recommendation system + RAG Chatbot
5
+ Home-page: https://github.com/Zenith40/recommendation-system
6
+ Author: Satkar
7
+ Author-email: [email protected]
8
+ Project-URL: Bug Tracker, https://github.com/Zenith40/recommendation-system/issues
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+
12
+ # 🎌 Anime Recommendation System
13
+
14
+ This project is an anime recommendation engine that suggests similar anime based on the storyline using **cosine similarity**. It processes and analyzes a dataset of over **7,000+ anime** titles collected through web scraping, and applies **natural language processing (NLP)** techniques to generate meaningful recommendations.
15
+
16
+ The application is containerized using **Docker** for consistent deployment across environments.
17
+
18
+ ---
19
+
20
+ ## 🚀 Features
21
+
22
+ - Recommends animes based on **storyline similarity**
23
+ - Built using **CountVectorizer + Porter Stemmer + Cosine Similarity**
24
+ - Cleaned and preprocessed data pipeline from raw scraped sources
25
+ - Fast, scalable recommendation system
26
+ - Dockerized for smooth deployment
27
+
28
+ ---
29
+
30
+ ## 🛠 Tech Stack
31
+
32
+ - **Python 3.9+**
33
+ - **scikit-learn** for CountVectorizer & Cosine Similarity
34
+ - **NLTK** for text preprocessing (PorterStemmer)
35
+ - **Flask/FastAPI** (in `app.py`)
36
+ - **Docker** for containerization
37
+ - **BeautifulSoup / Scrapy** (for scraping, if applicable)
38
+
39
+ ---
40
+
41
+ ## 📊 Methodology
42
+
43
+ 1. **Data Collection**
44
+ - Scraped storyline and metadata for 12,000+ animes.
45
+ - Structured data in a tabular format and removed duplicates, nulls, and irrelevant entries.
46
+
47
+ 2. **Text Preprocessing**
48
+ - Converted to lowercase
49
+ - Removed special characters and stop words
50
+ - Applied **Porter Stemming** to normalize tokens
51
+
52
+ 3. **Feature Extraction**
53
+ - Used **CountVectorizer** to convert text into token count vectors
54
+ - Removed high-dimensional noise with feature filtering
55
+
56
+ 4. **Similarity Calculation**
57
+ - Used **Cosine Similarity** to compute similarity scores between anime storylines
58
+
59
+ 5. **Recommendation**
60
+ - Returned the top N animes with the highest similarity to a selected title
61
+
62
+ ---
63
+
64
+ ## 📦 Docker Deployment
65
+
66
+ * To run the app using Docker: https://hub.docker.com/r/zenith40/recommendation-system
67
+
68
+ ### Pull the Image
69
+ * docker pull zenith40/recommendation-system:Ver-0.0.1
70
+
71
+ ### Run the container
72
+ * docker run -p 8501:8501 zenith40/recommendation-system:Ver-0.0.1
73
+ * Then, open your browser and navigate to http://localhost:8501
74
+
75
+ ---
76
+
77
+ ## 🌱 Future Improvements
78
+ 🔍 Integrate TF-IDF or Word2Vec/Doc2Vec for deeper context awareness
79
+
80
+ 🧠 Experiment with transformer-based models (e.g., BERT embeddings)
81
+
82
+ 🌐 Deploy with a front-end UI for better user interaction
83
+
84
+ ⚡ Implement caching for faster response times on repeated queries
85
+
86
+ 📱 Build a mobile app version with React Native or Flutter
87
+
88
+ 📊 Add filters based on genre, year, popularity, or user ratings
89
+
90
+ 👥 Integrate with collaborative filtering or hybrid recommendation systems
91
+
92
+ 📄 License
93
+ This project is licensed under the MIT License
94
+
95
+ 🙌 Acknowledgements
96
+ * NLTK
97
+
98
+ * scikit-learn
src/recommendationSystem.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE
2
+ README.md
3
+ setup.py
4
+ src/recommendationSystem/__init__.py
5
+ src/recommendationSystem.egg-info/PKG-INFO
6
+ src/recommendationSystem.egg-info/SOURCES.txt
7
+ src/recommendationSystem.egg-info/dependency_links.txt
8
+ src/recommendationSystem.egg-info/top_level.txt
9
+ src/recommendationSystem/chatbot/__init__.py
10
+ src/recommendationSystem/chatbot/client_module/__init__.py
11
+ src/recommendationSystem/chatbot/client_module/utils.py
12
+ src/recommendationSystem/chatbot/server_modules/__init__.py
13
+ src/recommendationSystem/chatbot/server_modules/llm.py
14
+ src/recommendationSystem/chatbot/server_modules/load_vector_store.py
15
+ src/recommendationSystem/chatbot/server_modules/query_handler.py
16
+ src/recommendationSystem/components/__init__.py
17
+ src/recommendationSystem/components/data_ingestion.py
18
+ src/recommendationSystem/components/data_transformation.py
19
+ src/recommendationSystem/config/__init__.py
20
+ src/recommendationSystem/config/configuration.py
21
+ src/recommendationSystem/logging/__init__.py
22
+ src/recommendationSystem/utils/__init__.py
23
+ src/recommendationSystem/utils/common.py
src/recommendationSystem.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
src/recommendationSystem.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ recommendationSystem
src/recommendationSystem/__init__.py ADDED
File without changes
src/recommendationSystem/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (199 Bytes). View file
 
src/recommendationSystem/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (187 Bytes). View file
 
src/recommendationSystem/chatbot/__init__.py ADDED
File without changes
src/recommendationSystem/chatbot/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (207 Bytes). View file
 
src/recommendationSystem/chatbot/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (195 Bytes). View file
 
src/recommendationSystem/chatbot/client_module/__init__.py ADDED
File without changes
src/recommendationSystem/chatbot/client_module/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (221 Bytes). View file
 
src/recommendationSystem/chatbot/client_module/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (209 Bytes). View file
 
src/recommendationSystem/chatbot/client_module/__pycache__/utils.cpython-311.pyc ADDED
Binary file (3.67 kB). View file
 
src/recommendationSystem/chatbot/client_module/__pycache__/utils.cpython-313.pyc ADDED
Binary file (3.27 kB). View file
 
src/recommendationSystem/chatbot/client_module/utils.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------- CHATBOT------------------------------------------------
2
+
3
+ import streamlit as st
4
+
5
+ def cache_clear(x):
6
+ if st.sidebar.button("Reset Chat History",use_container_width=True):
7
+ x.clear()
8
+
9
+ #from recommendationSystem.chatbot.client_module.api import ask_question
10
+ from recommendationSystem.chatbot.server_modules.llm import get_llm_chain
11
+ from recommendationSystem.chatbot.server_modules.load_vector_store import use_vectorstore
12
+ from recommendationSystem.chatbot.server_modules.query_handler import query_chain
13
+
14
+
15
+ def chatbot():
16
+ vectorstore = use_vectorstore()
17
+ chain = get_llm_chain(vectorstore)
18
+ url = "https://raw.githubusercontent.com/Zenith40/Recommendation-system/refs/heads/main/data/anime_data_7490.txt"
19
+
20
+ if "messages" not in st.session_state:
21
+ st.session_state.messages = []
22
+
23
+ with st.sidebar:
24
+
25
+ st.title("🍥 RARE AT YOUR SERVICE ")
26
+ st.write("RAG-based Anime Recommendation Engine")
27
+
28
+ # Input and response
29
+ user_input = st.chat_input(placeholder="Ask Anything")
30
+ cache_clear(st.session_state.messages)
31
+
32
+ if user_input:
33
+ #response = ask_question(user_input)
34
+ #if response.status_code == 200:
35
+ response=query_chain(chain,user_input=user_input)
36
+ #data = response.json()
37
+ answer = response["response"]
38
+ #sources = response.get("sources",[])
39
+ #st.chat_message("assistant").markdown(answer)
40
+ st.session_state.messages.insert(0,{"role": "assistant", "content": answer})
41
+ st.session_state.messages.insert(0,{"role": "user", "content": user_input})
42
+ st.markdown(f"📄 Source : [Anime_Data.txt](%s)" %url)
43
+ #else:
44
+ #st.error(f"Error: {response.text}")
45
+
46
+ # Render existing chat history
47
+ for msg in st.session_state.messages:
48
+ st.chat_message(msg["role"]).markdown(msg["content"])
49
+
50
+
51
+ # History Downloader
52
+
53
+ def render_history_download():
54
+ if st.session_state.get("message"):
55
+ chat_text="\n\n".join([f"{m['role'].upper()}: {m['content']}" for m in st.session_state.messages])
56
+ st.download_button("Download Chat History",chat_text,file_name="chat_history.txt",mime="text/plain")
src/recommendationSystem/chatbot/server_modules/__init__.py ADDED
File without changes
src/recommendationSystem/chatbot/server_modules/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (222 Bytes). View file
 
src/recommendationSystem/chatbot/server_modules/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (210 Bytes). View file
 
src/recommendationSystem/chatbot/server_modules/__pycache__/llm.cpython-311.pyc ADDED
Binary file (1.1 kB). View file
 
src/recommendationSystem/chatbot/server_modules/__pycache__/llm.cpython-313.pyc ADDED
Binary file (958 Bytes). View file
 
src/recommendationSystem/chatbot/server_modules/__pycache__/load_vector_store.cpython-311.pyc ADDED
Binary file (876 Bytes). View file
 
src/recommendationSystem/chatbot/server_modules/__pycache__/load_vector_store.cpython-313.pyc ADDED
Binary file (769 Bytes). View file
 
src/recommendationSystem/chatbot/server_modules/__pycache__/query_handler.cpython-311.pyc ADDED
Binary file (512 Bytes). View file
 
src/recommendationSystem/chatbot/server_modules/__pycache__/query_handler.cpython-313.pyc ADDED
Binary file (470 Bytes). View file
 
src/recommendationSystem/chatbot/server_modules/llm.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_groq import ChatGroq
4
+ from langchain.chains import RetrievalQA
5
+
6
+ load_dotenv()
7
+
8
+ api_key = os.getenv("GROQ_API_KEY")
9
+
10
+
11
+ def get_llm_chain(vectorstore):
12
+ llm=ChatGroq(
13
+ groq_api_key=api_key,
14
+ model_name="llama3-70b-8192"
15
+ )
16
+
17
+ retriever=vectorstore.as_retriever(
18
+ search_kwargs={
19
+ "k":8,
20
+ #"lambda_mult": 0.5
21
+ }
22
+ )
23
+
24
+
25
+ return RetrievalQA.from_chain_type(
26
+ llm=llm,
27
+ chain_type="stuff",
28
+ retriever=retriever,
29
+ return_source_documents=True
30
+ )
src/recommendationSystem/chatbot/server_modules/load_vector_store.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.vectorstores import Chroma
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ import os
4
+
5
+ save_path = os.path.join("chromadb")
6
+
7
+ def use_vectorstore():
8
+
9
+ embeddings = HuggingFaceEmbeddings(
10
+ model_name="intfloat/e5-large-v2",
11
+ #encode_kwargs={'batch_size': 32, 'normalize_embeddings': True}
12
+ )
13
+
14
+ vectorstore = Chroma(
15
+ persist_directory=save_path,
16
+ embedding_function=embeddings
17
+ )
18
+
19
+ return vectorstore
src/recommendationSystem/chatbot/server_modules/query_handler.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Query Chain
2
+
3
+ def query_chain(chain,user_input:str):
4
+ result = chain({"query": user_input})
5
+ response = {
6
+ "response":result["result"],
7
+ #"sources":[doc.metadata.get('source','') for doc in result["source_documents"]]
8
+ }
9
+ return response
src/recommendationSystem/components/__init__.py ADDED
File without changes
src/recommendationSystem/components/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (210 Bytes). View file
 
src/recommendationSystem/components/__pycache__/__init__.cpython-313.pyc ADDED
Binary file (198 Bytes). View file