jchoo commited on
Commit
1a49479
·
verified ·
1 Parent(s): 38971f0

Update choosingdata.py

Browse files
Files changed (1) hide show
  1. choosingdata.py +30 -42
choosingdata.py CHANGED
@@ -1,75 +1,63 @@
1
  import pandas as pd
2
 
3
- df = pd.read_json('data.json')
 
4
 
5
- percentiles = df['danceability'].quantile([0, 0.33, 0.66, 1])
6
- bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
7
- labels = ['Low', 'Medium', 'High']
8
- df['danceability_level'] = pd.cut(df['danceability'], bins=bins, labels=labels, include_lowest=True)
 
 
9
 
10
- percentiles = df['valence'].quantile([0, 0.33, 0.66, 1])
11
- bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
12
- labels = ['Low', 'Medium', 'High']
13
- df['valence_level'] = pd.cut(df['valence'], bins=bins, labels=labels, include_lowest=True)
14
 
15
- percentiles = df['speechiness'].quantile([0, 0.33, 0.66, 1])
16
- bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
17
- labels = [1, 2, 3]
18
- df['speechiness_level'] = pd.cut(df['speechiness'], bins=bins, labels=labels, include_lowest=True).astype(int)
19
 
20
- percentiles = df['fres'].quantile([0, 0.33, 0.66, 1])
21
- bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
22
- labels = [1, 2, 3]
23
- df['fres_level'] = pd.cut(df['fres'], bins=bins, labels=labels, include_lowest=True).astype(int)
24
 
25
- percentiles = df['vocabComplex'].quantile([0, 0.33, 0.66, 1])
26
- bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
27
- labels = [1, 2, 3]
28
- df['vocabComplex_level'] = pd.cut(df['vocabComplex'], bins=bins, labels=labels, include_lowest=True).astype(int)
 
29
 
30
- percentiles = df['avgSyllable'].quantile([0, 0.33, 0.66, 1])
31
- bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
32
- labels = [1, 2, 3]
33
- df['avgSyllable_level'] = pd.cut(df['avgSyllable'], bins=bins, labels=labels, include_lowest=True).astype(int)
 
34
 
35
- df['difficulty'] = df['speechiness_level'] + df['fres_level'] + df['vocabComplex_level'] + df['avgSyllable_level']
 
36
 
37
- percentiles = df['difficulty'].quantile([0, 0.33, 0.66, 1])
38
- bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
39
- labels = ["Low", "Medium", "High"]
40
- df['difficulty_level'] = pd.cut(df['difficulty'], bins=bins, labels=labels, include_lowest=True)
41
-
42
- # dance_choice = input("Which level do you want for danceability?")
43
- # valence_choice = input("Which level do you want for valence?")
44
- # difficulty_choice = input("Which level do you want for the difficulty?")
45
 
46
  def recommendation(df, dance_choice, valence_choice, difficulty_choice):
47
  if dance_choice == "Low":
48
  df = df[df['danceability_level'] == "Low"]
49
-
50
  elif dance_choice == "Medium":
51
  df = df[df['danceability_level'] == "Medium"]
52
-
53
  elif dance_choice == "High":
54
  df = df[df['danceability_level'] == "High"]
55
 
56
  if valence_choice == "Negative":
57
  df = df[df['valence_level'] == "Low"]
58
-
59
  elif valence_choice == "Neutral":
60
  df = df[df['valence_level'] == "Medium"]
61
-
62
  elif valence_choice == "Positive":
63
  df = df[df['valence_level'] == "High"]
64
 
65
  if difficulty_choice == "Easy":
66
  df = df[df['difficulty_level'] == "Low"]
67
-
68
  elif difficulty_choice == "Medium":
69
  df = df[df['difficulty_level'] == "Medium"]
70
-
71
  elif difficulty_choice == "Hard":
72
  df = df[df['difficulty_level'] == "High"]
73
-
74
  chosen = df.sample() # random choose 1 song
75
- return chosen
 
1
  import pandas as pd
2
 
3
+ def process_data(file_path):
4
+ df = pd.read_json(file_path)
5
 
6
+ df['danceability_level'] = categorize_level(df['danceability'])
7
+ df['valence_level'] = categorize_level(df['valence'])
8
+ df['speechiness_level'] = categorize_numeric_level(df['speechiness'])
9
+ df['fres_level'] = categorize_numeric_level(df['fres'])
10
+ df['vocabComplex_level'] = categorize_numeric_level(df['vocabComplex'])
11
+ df['avgSyllable_level'] = categorize_numeric_level(df['avgSyllable'])
12
 
13
+ df['difficulty'] = calculate_difficulty(df)
 
 
 
14
 
15
+ df['difficulty_level'] = categorize_difficulty(df['difficulty'])
 
 
 
16
 
17
+ return df
 
 
 
18
 
19
+ def categorize_level(column):
20
+ percentiles = column.quantile([0, 0.33, 0.66, 1])
21
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
22
+ labels = ['Low', 'Medium', 'High']
23
+ return pd.cut(column, bins=bins, labels=labels, include_lowest=True)
24
 
25
+ def categorize_numeric_level(column):
26
+ percentiles = column.quantile([0, 0.33, 0.66, 1])
27
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
28
+ labels = [1, 2, 3]
29
+ return pd.cut(column, bins=bins, labels=labels, include_lowest=True).astype(int)
30
 
31
+ def calculate_difficulty(df):
32
+ return df['speechiness_level'] + df['fres_level'] + df['vocabComplex_level'] + df['avgSyllable_level']
33
 
34
+ def categorize_difficulty(column):
35
+ percentiles = column.quantile([0, 0.33, 0.66, 1])
36
+ bins = [percentiles.iloc[0], percentiles.iloc[1], percentiles.iloc[2], percentiles.iloc[3]]
37
+ labels = ["Low", "Medium", "High"]
38
+ return pd.cut(column, bins=bins, labels=labels, include_lowest=True)
 
 
 
39
 
40
  def recommendation(df, dance_choice, valence_choice, difficulty_choice):
41
  if dance_choice == "Low":
42
  df = df[df['danceability_level'] == "Low"]
 
43
  elif dance_choice == "Medium":
44
  df = df[df['danceability_level'] == "Medium"]
 
45
  elif dance_choice == "High":
46
  df = df[df['danceability_level'] == "High"]
47
 
48
  if valence_choice == "Negative":
49
  df = df[df['valence_level'] == "Low"]
 
50
  elif valence_choice == "Neutral":
51
  df = df[df['valence_level'] == "Medium"]
 
52
  elif valence_choice == "Positive":
53
  df = df[df['valence_level'] == "High"]
54
 
55
  if difficulty_choice == "Easy":
56
  df = df[df['difficulty_level'] == "Low"]
 
57
  elif difficulty_choice == "Medium":
58
  df = df[df['difficulty_level'] == "Medium"]
 
59
  elif difficulty_choice == "Hard":
60
  df = df[df['difficulty_level'] == "High"]
61
+
62
  chosen = df.sample() # random choose 1 song
63
+ return chosen