rivapereira123 commited on
Commit
30de495
Β·
verified Β·
1 Parent(s): bb549b2

Upload 10 files

Browse files
README.md CHANGED
@@ -1,14 +1,10 @@
1
  ---
2
- title: Cupid App
3
- emoji: πŸ‘
4
- colorFrom: red
5
  colorTo: pink
6
  sdk: streamlit
7
- sdk_version: 1.44.1
8
  app_file: app.py
9
  pinned: false
10
- license: mit
11
- short_description: An attempt to deploy a lighter version on what i did
12
  ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Cupid AI Streamlit
3
+ emoji: πŸ’˜
4
+ colorFrom: pink
5
  colorTo: pink
6
  sdk: streamlit
7
+ sdk_version: "1.30.0"
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
 
 
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import torch
4
+ import numpy as np
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
7
+
8
+ # Load models
9
+ @st.cache_resource
10
+ def load_models():
11
+ match_model = joblib.load("cupid_match_model_best.pkl")
12
+ sentiment_model = DistilBertForSequenceClassification.from_pretrained("sentiment_model")
13
+ tokenizer = DistilBertTokenizerFast.from_pretrained("sentiment_model")
14
+ return match_model, sentiment_model, tokenizer
15
+
16
+ match_model, sentiment_model, tokenizer = load_models()
17
+
18
+ st.title("Cupid AI πŸ’˜")
19
+ st.write("Enter your profile to find match compatibility and sentiment analysis.")
20
+
21
+ # User input form
22
+ with st.form("profile_form"):
23
+ age = st.number_input("Age", min_value=18, max_value=100, value=25)
24
+ gender = st.selectbox("Gender", ["male", "female"])
25
+ orientation = st.selectbox("Orientation", ["single", "open", "complicated"])
26
+ traits = st.multiselect("Select Your Traits", ["romantic", "adventurous", "funny", "outgoing", "creative", "intellectual"])
27
+ essay = st.text_area("Tell us about yourself")
28
+ submitted = st.form_submit_button("Find Matches")
29
+
30
+ # Feature prep
31
+ def prepare_input_for_model(user_traits, age, gender):
32
+ gender_val = 0 if gender.lower() == "male" else 1
33
+ all_traits = ["romantic", "adventurous", "funny", "outgoing", "creative", "intellectual"]
34
+ trait_vec = [1 if trait in user_traits else 0 for trait in all_traits]
35
+ vec = trait_vec + [age, gender_val]
36
+ if len(vec) < 6:
37
+ vec.extend([0] * (6 - len(vec)))
38
+ elif len(vec) > 6:
39
+ vec = vec[:6]
40
+ return vec
41
+
42
+ def predict_sentiment(text):
43
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
44
+ with torch.no_grad():
45
+ logits = sentiment_model(**inputs).logits
46
+ pred = torch.argmax(logits, dim=1).item()
47
+ emoji = "😊" if pred == 1 else "😞"
48
+ confidence = torch.softmax(logits, dim=1).max().item()
49
+ return emoji, round(confidence * 100, 2)
50
+
51
+ if submitted:
52
+ features = prepare_input_for_model(traits, age, gender)
53
+ sentiment, conf = predict_sentiment(essay)
54
+ st.success(f"Sentiment: {sentiment} (Confidence: {conf}%)")
55
+
56
+ st.write("### Compatibility Score")
57
+ st.write(f"Input vector: {features}")
58
+ st.write("Using cosine similarity to calculate match score...")
59
+
60
+ # For demo, compare to a few mock profiles
61
+ mock_profiles = {
62
+ "Alex": prepare_input_for_model(["romantic", "adventurous"], 30, "male"),
63
+ "Sophia": prepare_input_for_model(["optimistic", "outdoorsy"], 27, "female"),
64
+ "Olivia": prepare_input_for_model(["empathetic", "creative"], 29, "female")
65
+ }
66
+
67
+ for name, vec in mock_profiles.items():
68
+ score = cosine_similarity([features], [vec])[0][0]
69
+ st.write(f"{name}: {round(score * 100, 2)}% match πŸ’˜")
config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "DistilBertForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "dim": 768,
8
+ "dropout": 0.1,
9
+ "hidden_dim": 3072,
10
+ "initializer_range": 0.02,
11
+ "max_position_embeddings": 512,
12
+ "model_type": "distilbert",
13
+ "n_heads": 12,
14
+ "n_layers": 6,
15
+ "pad_token_id": 0,
16
+ "problem_type": "single_label_classification",
17
+ "qa_dropout": 0.1,
18
+ "seq_classif_dropout": 0.2,
19
+ "sinusoidal_pos_embds": false,
20
+ "tie_weights_": true,
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.50.3",
23
+ "vocab_size": 30522
24
+ }
cupid_match_model_best.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:121111d8acdd3cb31ac606e941e70e00b1741432436524b53eef4d61126cbae2
3
+ size 26107
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40aeb76eac3aa33d233d25ac285e098a34dc0bd9d0a55d4ea57f999b15bafcfd
3
+ size 267832560
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ scikit-learn
3
+ numpy
4
+ torch
5
+ transformers
6
+ joblib
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff