zenith04 commited on
Commit
1a364ca
·
verified ·
1 Parent(s): 81c0ff7

Update src/recommendationSystem/utils/common.py

Browse files
Files changed (1) hide show
  1. src/recommendationSystem/utils/common.py +112 -112
src/recommendationSystem/utils/common.py CHANGED
@@ -1,112 +1,112 @@
1
- # --------------------------------- CUSTOM EXCEPTION CLASS --------------------------------------------
2
-
3
- # Importing Libraries
4
- import sys
5
-
6
-
7
- # Defining structure of exception message
8
- def error_message_detail(error:Exception,error_detail:sys):
9
- _,_,exec_tb = error_detail.exc_info()
10
- file_name = exec_tb.tb_frame.f_code.co_filename
11
- line_number = exec_tb.tb_lineno
12
- error_message = f"Error occured in python script name [{file_name}] " \
13
- f"line number [{line_number}] error message [{error}]"
14
- return error_message
15
-
16
- # Getting the exception message from sys
17
- class CustomException(Exception):
18
- def __init__(self, error_message:Exception, error_detail:sys):
19
- super().__init__(str(error_message))
20
- self.error_message = error_message_detail(error_message,error_detail=error_detail)
21
-
22
- def __str__(self):
23
- return self.error_message
24
-
25
-
26
-
27
-
28
-
29
- # --------------------------------- PREPROCESSING THE TEXT IN THE DATAFRAME --------------------------------------------
30
-
31
- # Importing Libraries
32
- from nltk.stem.porter import PorterStemmer
33
- from sklearn.feature_extraction.text import CountVectorizer
34
- from sklearn.metrics.pairwise import cosine_similarity
35
-
36
-
37
-
38
- def removing_blank_lines(text):
39
- return text.replace('\n'," ")
40
-
41
- def removing_pre_suff_ix(text):
42
- y = []
43
-
44
- for i in text.split():
45
- y.append(PorterStemmer().stem(i))
46
-
47
- return " ".join(y)
48
-
49
- def converting_into_vectors(text):
50
- vec = CountVectorizer(max_features=5000,stop_words='english').fit_transform(text).toarray()
51
- return vec
52
-
53
- def finding_similarity(vec):
54
- similarity = cosine_similarity(vec)
55
- return similarity
56
-
57
- # --------------------------------- SAVING FILES --------------------------------------------
58
-
59
- import os
60
- import dill
61
-
62
- from recommendationSystem.logging import logger
63
-
64
-
65
- def save_object(file_path,obj):
66
- try:
67
- dir_path = os.path.dirname(file_path)
68
- os.makedirs(dir_path,exist_ok=True)
69
-
70
- with open(file_path,'wb') as file_obj:
71
- dill.dump(obj,file_obj)
72
-
73
- except Exception as e:
74
- raise CustomException(e,sys)
75
-
76
-
77
- # --------------------------------- Prediction File --------------------------------------------
78
-
79
- import numpy as np
80
-
81
- def recommend(data,matrix,anime):
82
- anime_index = data[data.title == anime].index[0]
83
- distances = np.around(matrix[anime_index],2)
84
- anime_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[0:10]
85
-
86
- recommended_anime = []
87
- recommended_anime_poster = []
88
- recommended_anime_link = []
89
- recommended_similarity_score = []
90
-
91
- for i in anime_list:
92
- # anime
93
- recommended_anime.append(data.iloc[i[0]].title)
94
- # posters
95
- recommended_anime_poster.append(data.iloc[i[0]].image)
96
- # links
97
- recommended_anime_link.append(data.iloc[i[0]].links)
98
- # score
99
- recommended_similarity_score.append(f'{np.around(i[1]*100,2)} %')
100
- return recommended_anime, recommended_anime_poster, recommended_anime_link, recommended_similarity_score
101
-
102
- # --------------------------------- Find Story of the anime --------------------------------------------
103
-
104
- import pandas as pd
105
-
106
- def find_anime(label):
107
- dataframe_path = os.path.join("artifact","data.csv")
108
- df = pd.read_csv(dataframe_path)
109
- story = '\n'.join(df[df.name == label].sypnopsis.to_list())
110
- return story
111
-
112
-
 
1
+ # --------------------------------- CUSTOM EXCEPTION CLASS --------------------------------------------
2
+
3
+ # Importing Libraries
4
+ import sys
5
+
6
+
7
+ # Defining structure of exception message
8
+ def error_message_detail(error:Exception,error_detail:sys):
9
+ _,_,exec_tb = error_detail.exc_info()
10
+ file_name = exec_tb.tb_frame.f_code.co_filename
11
+ line_number = exec_tb.tb_lineno
12
+ error_message = f"Error occured in python script name [{file_name}] " \
13
+ f"line number [{line_number}] error message [{error}]"
14
+ return error_message
15
+
16
+ # Getting the exception message from sys
17
+ class CustomException(Exception):
18
+ def __init__(self, error_message:Exception, error_detail:sys):
19
+ super().__init__(str(error_message))
20
+ self.error_message = error_message_detail(error_message,error_detail=error_detail)
21
+
22
+ def __str__(self):
23
+ return self.error_message
24
+
25
+
26
+
27
+
28
+
29
+ # --------------------------------- PREPROCESSING THE TEXT IN THE DATAFRAME --------------------------------------------
30
+
31
+ # Importing Libraries
32
+ from nltk.stem.porter import PorterStemmer
33
+ from sklearn.feature_extraction.text import CountVectorizer
34
+ from sklearn.metrics.pairwise import cosine_similarity
35
+
36
+
37
+
38
+ def removing_blank_lines(text):
39
+ return text.replace('\n'," ")
40
+
41
+ def removing_pre_suff_ix(text):
42
+ y = []
43
+
44
+ for i in text.split():
45
+ y.append(PorterStemmer().stem(i))
46
+
47
+ return " ".join(y)
48
+
49
+ def converting_into_vectors(text):
50
+ vec = CountVectorizer(max_features=5000,stop_words='english').fit_transform(text).toarray()
51
+ return vec
52
+
53
+ def finding_similarity(vec):
54
+ similarity = cosine_similarity(vec)
55
+ return similarity
56
+
57
+ # --------------------------------- SAVING FILES --------------------------------------------
58
+
59
+ import os
60
+ import dill
61
+
62
+ from recommendationSystem.logging import logger
63
+
64
+
65
+ def save_object(file_path,obj):
66
+ try:
67
+ dir_path = os.path.dirname(file_path)
68
+ os.makedirs(dir_path,exist_ok=True)
69
+
70
+ with open(file_path,'wb') as file_obj:
71
+ dill.dump(obj,file_obj)
72
+
73
+ except Exception as e:
74
+ raise CustomException(e,sys)
75
+
76
+
77
+ # --------------------------------- Prediction File --------------------------------------------
78
+
79
+ import numpy as np
80
+
81
+ def recommend(data,matrix,anime):
82
+ anime_index = data[data.title == anime].index[0]
83
+ distances = np.around(matrix[anime_index],2)
84
+ anime_list = sorted(list(enumerate(distances)),reverse=True,key=lambda x:x[1])[0:10]
85
+
86
+ recommended_anime = []
87
+ recommended_anime_poster = []
88
+ recommended_anime_link = []
89
+ recommended_similarity_score = []
90
+
91
+ for i in anime_list:
92
+ # anime
93
+ recommended_anime.append(data.iloc[i[0]].title)
94
+ # posters
95
+ recommended_anime_poster.append(data.iloc[i[0]].image)
96
+ # links
97
+ recommended_anime_link.append(data.iloc[i[0]].links)
98
+ # score
99
+ recommended_similarity_score.append(f'{np.around(i[1]*100,2)} %')
100
+ return recommended_anime, recommended_anime_poster, recommended_anime_link, recommended_similarity_score
101
+
102
+ # --------------------------------- Find Story of the anime --------------------------------------------
103
+
104
+ import pandas as pd
105
+
106
+ def find_anime(label):
107
+ dataframe_path = os.path.join("/tmp/artifact","data.csv")
108
+ df = pd.read_csv(dataframe_path)
109
+ story = '\n'.join(df[df.name == label].sypnopsis.to_list())
110
+ return story
111
+
112
+