Spaces:

rivapereira123
/

cupid-app

Sleeping

App Files Files Community

rivapereira123 commited on Apr 13

Commit

30de495

verified ·

1 Parent(s): bb549b2

Upload 10 files

Browse files

Files changed (10) hide show

README.md +4 -8
app.py +69 -0
config.json +24 -0
cupid_match_model_best.pkl +3 -0
model.safetensors +3 -0
requirements.txt +6 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +56 -0
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,14 +1,10 @@
 ---
-title: Cupid App
-emoji: 👁
-colorFrom: red
 colorTo: pink
 sdk: streamlit
-sdk_version: 1.44.1
 app_file: app.py
 pinned: false
-license: mit
-short_description: An attempt to deploy a lighter version on what i did
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Cupid AI Streamlit
+emoji: 💘
+colorFrom: pink
 colorTo: pink
 sdk: streamlit
+sdk_version: "1.30.0"
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import streamlit as st
+import joblib
+import torch
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
+# Load models
+@st.cache_resource
+def load_models():
+    match_model = joblib.load("cupid_match_model_best.pkl")
+    sentiment_model = DistilBertForSequenceClassification.from_pretrained("sentiment_model")
+    tokenizer = DistilBertTokenizerFast.from_pretrained("sentiment_model")
+    return match_model, sentiment_model, tokenizer
+match_model, sentiment_model, tokenizer = load_models()
+st.title("Cupid AI 💘")
+st.write("Enter your profile to find match compatibility and sentiment analysis.")
+# User input form
+with st.form("profile_form"):
+    age = st.number_input("Age", min_value=18, max_value=100, value=25)
+    gender = st.selectbox("Gender", ["male", "female"])
+    orientation = st.selectbox("Orientation", ["single", "open", "complicated"])
+    traits = st.multiselect("Select Your Traits", ["romantic", "adventurous", "funny", "outgoing", "creative", "intellectual"])
+    essay = st.text_area("Tell us about yourself")
+    submitted = st.form_submit_button("Find Matches")
+# Feature prep
+def prepare_input_for_model(user_traits, age, gender):
+    gender_val = 0 if gender.lower() == "male" else 1
+    all_traits = ["romantic", "adventurous", "funny", "outgoing", "creative", "intellectual"]
+    trait_vec = [1 if trait in user_traits else 0 for trait in all_traits]
+    vec = trait_vec + [age, gender_val]
+    if len(vec) < 6:
+        vec.extend([0] * (6 - len(vec)))
+    elif len(vec) > 6:
+        vec = vec[:6]
+    return vec
+def predict_sentiment(text):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
+    with torch.no_grad():
+        logits = sentiment_model(**inputs).logits
+    pred = torch.argmax(logits, dim=1).item()
+    emoji = "😊" if pred == 1 else "😞"
+    confidence = torch.softmax(logits, dim=1).max().item()
+    return emoji, round(confidence * 100, 2)
+if submitted:
+    features = prepare_input_for_model(traits, age, gender)
+    sentiment, conf = predict_sentiment(essay)
+    st.success(f"Sentiment: {sentiment} (Confidence: {conf}%)")
+    st.write("### Compatibility Score")
+    st.write(f"Input vector: {features}")
+    st.write("Using cosine similarity to calculate match score...")
+    # For demo, compare to a few mock profiles
+    mock_profiles = {
+        "Alex": prepare_input_for_model(["romantic", "adventurous"], 30, "male"),
+        "Sophia": prepare_input_for_model(["optimistic", "outdoorsy"], 27, "female"),
+        "Olivia": prepare_input_for_model(["empathetic", "creative"], 29, "female")
+    }
+    for name, vec in mock_profiles.items():
+        score = cosine_similarity([features], [vec])[0][0]
+        st.write(f"{name}: {round(score * 100, 2)}% match 💘")

config.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.50.3",
+  "vocab_size": 30522
+}

cupid_match_model_best.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:121111d8acdd3cb31ac606e941e70e00b1741432436524b53eef4d61126cbae2
+size 26107

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40aeb76eac3aa33d233d25ac285e098a34dc0bd9d0a55d4ea57f999b15bafcfd
+size 267832560

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit
+scikit-learn
+numpy
+torch
+transformers
+joblib

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff