Update choosingdata.py
Browse files- choosingdata.py +30 -42
choosingdata.py
CHANGED
@@ -1,75 +1,63 @@
|
|
1 |
import pandas as pd
|
2 |
|
3 |
-
|
|
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
df['
|
|
|
|
|
9 |
|
10 |
-
|
11 |
-
bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
|
12 |
-
labels = ['Low', 'Medium', 'High']
|
13 |
-
df['valence_level'] = pd.cut(df['valence'], bins=bins, labels=labels, include_lowest=True)
|
14 |
|
15 |
-
|
16 |
-
bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
|
17 |
-
labels = [1, 2, 3]
|
18 |
-
df['speechiness_level'] = pd.cut(df['speechiness'], bins=bins, labels=labels, include_lowest=True).astype(int)
|
19 |
|
20 |
-
|
21 |
-
bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
|
22 |
-
labels = [1, 2, 3]
|
23 |
-
df['fres_level'] = pd.cut(df['fres'], bins=bins, labels=labels, include_lowest=True).astype(int)
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
-
|
|
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
# dance_choice = input("Which level do you want for danceability?")
|
43 |
-
# valence_choice = input("Which level do you want for valence?")
|
44 |
-
# difficulty_choice = input("Which level do you want for the difficulty?")
|
45 |
|
46 |
def recommendation(df, dance_choice, valence_choice, difficulty_choice):
|
47 |
if dance_choice == "Low":
|
48 |
df = df[df['danceability_level'] == "Low"]
|
49 |
-
|
50 |
elif dance_choice == "Medium":
|
51 |
df = df[df['danceability_level'] == "Medium"]
|
52 |
-
|
53 |
elif dance_choice == "High":
|
54 |
df = df[df['danceability_level'] == "High"]
|
55 |
|
56 |
if valence_choice == "Negative":
|
57 |
df = df[df['valence_level'] == "Low"]
|
58 |
-
|
59 |
elif valence_choice == "Neutral":
|
60 |
df = df[df['valence_level'] == "Medium"]
|
61 |
-
|
62 |
elif valence_choice == "Positive":
|
63 |
df = df[df['valence_level'] == "High"]
|
64 |
|
65 |
if difficulty_choice == "Easy":
|
66 |
df = df[df['difficulty_level'] == "Low"]
|
67 |
-
|
68 |
elif difficulty_choice == "Medium":
|
69 |
df = df[df['difficulty_level'] == "Medium"]
|
70 |
-
|
71 |
elif difficulty_choice == "Hard":
|
72 |
df = df[df['difficulty_level'] == "High"]
|
73 |
-
|
74 |
chosen = df.sample() # random choose 1 song
|
75 |
-
return chosen
|
|
|
1 |
import pandas as pd
|
2 |
|
3 |
+
def process_data(file_path):
|
4 |
+
df = pd.read_json(file_path)
|
5 |
|
6 |
+
df['danceability_level'] = categorize_level(df['danceability'])
|
7 |
+
df['valence_level'] = categorize_level(df['valence'])
|
8 |
+
df['speechiness_level'] = categorize_numeric_level(df['speechiness'])
|
9 |
+
df['fres_level'] = categorize_numeric_level(df['fres'])
|
10 |
+
df['vocabComplex_level'] = categorize_numeric_level(df['vocabComplex'])
|
11 |
+
df['avgSyllable_level'] = categorize_numeric_level(df['avgSyllable'])
|
12 |
|
13 |
+
df['difficulty'] = calculate_difficulty(df)
|
|
|
|
|
|
|
14 |
|
15 |
+
df['difficulty_level'] = categorize_difficulty(df['difficulty'])
|
|
|
|
|
|
|
16 |
|
17 |
+
return df
|
|
|
|
|
|
|
18 |
|
19 |
+
def categorize_level(column):
|
20 |
+
percentiles = column.quantile([0, 0.33, 0.66, 1])
|
21 |
+
bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
|
22 |
+
labels = ['Low', 'Medium', 'High']
|
23 |
+
return pd.cut(column, bins=bins, labels=labels, include_lowest=True)
|
24 |
|
25 |
+
def categorize_numeric_level(column):
|
26 |
+
percentiles = column.quantile([0, 0.33, 0.66, 1])
|
27 |
+
bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
|
28 |
+
labels = [1, 2, 3]
|
29 |
+
return pd.cut(column, bins=bins, labels=labels, include_lowest=True).astype(int)
|
30 |
|
31 |
+
def calculate_difficulty(df):
|
32 |
+
return df['speechiness_level'] + df['fres_level'] + df['vocabComplex_level'] + df['avgSyllable_level']
|
33 |
|
34 |
+
def categorize_difficulty(column):
|
35 |
+
percentiles = column.quantile([0, 0.33, 0.66, 1])
|
36 |
+
bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
|
37 |
+
labels = ["Low", "Medium", "High"]
|
38 |
+
return pd.cut(column, bins=bins, labels=labels, include_lowest=True)
|
|
|
|
|
|
|
39 |
|
40 |
def recommendation(df, dance_choice, valence_choice, difficulty_choice):
|
41 |
if dance_choice == "Low":
|
42 |
df = df[df['danceability_level'] == "Low"]
|
|
|
43 |
elif dance_choice == "Medium":
|
44 |
df = df[df['danceability_level'] == "Medium"]
|
|
|
45 |
elif dance_choice == "High":
|
46 |
df = df[df['danceability_level'] == "High"]
|
47 |
|
48 |
if valence_choice == "Negative":
|
49 |
df = df[df['valence_level'] == "Low"]
|
|
|
50 |
elif valence_choice == "Neutral":
|
51 |
df = df[df['valence_level'] == "Medium"]
|
|
|
52 |
elif valence_choice == "Positive":
|
53 |
df = df[df['valence_level'] == "High"]
|
54 |
|
55 |
if difficulty_choice == "Easy":
|
56 |
df = df[df['difficulty_level'] == "Low"]
|
|
|
57 |
elif difficulty_choice == "Medium":
|
58 |
df = df[df['difficulty_level'] == "Medium"]
|
|
|
59 |
elif difficulty_choice == "Hard":
|
60 |
df = df[df['difficulty_level'] == "High"]
|
61 |
+
|
62 |
chosen = df.sample() # random choose 1 song
|
63 |
+
return chosen
|