rrichaz commited on
Commit
d126556
1 Parent(s): 1602f13

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.txt +13 -0
  2. app.py +77 -0
  3. requirements.txt +11 -0
README.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: ♨️NLPSentenceSimilarityHeatmapCluster🗺️Streamlit
3
+ emoji: ♨️🗺️
4
+ colorFrom: gray
5
+ colorTo: pink
6
+ sdk: streamlit
7
+ sdk_version: 1.10.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import nltk
3
+ from transformers import pipeline
4
+ from sentence_transformers import SentenceTransformer
5
+ from scipy.spatial.distance import cosine
6
+ import numpy as np
7
+ import seaborn as sns
8
+ import matplotlib.pyplot as plt
9
+ from sklearn.cluster import KMeans
10
+ import tensorflow as tf
11
+ import tensorflow_hub as hub
12
+
13
+
14
+ def cluster_examples(messages, embed, nc=3):
15
+ km = KMeans(
16
+ n_clusters=nc, init='random',
17
+ n_init=10, max_iter=300,
18
+ tol=1e-04, random_state=0
19
+ )
20
+ km = km.fit_predict(embed)
21
+ for n in range(nc):
22
+ idxs = [i for i in range(len(km)) if km[i] == n]
23
+ ms = [messages[i] for i in idxs]
24
+ st.markdown ("CLUSTER : %d"%n)
25
+ for m in ms:
26
+ st.markdown (m)
27
+
28
+
29
+ def plot_heatmap(labels, heatmap, rotation=90):
30
+ sns.set(font_scale=1.2)
31
+ fig, ax = plt.subplots()
32
+ g = sns.heatmap(
33
+ heatmap,
34
+ xticklabels=labels,
35
+ yticklabels=labels,
36
+ vmin=-1,
37
+ vmax=1,
38
+ cmap="coolwarm")
39
+ g.set_xticklabels(labels, rotation=rotation)
40
+ g.set_title("Textual Similarity")
41
+
42
+ st.pyplot(fig)
43
+ #plt.show()
44
+
45
+ #st.header("Sentence Similarity Demo")
46
+
47
+ # Streamlit text boxes
48
+ text = st.text_area('Enter sentences:', value="Self confidence in outcomes helps us win and to make us successful.\nShe has a seriously impressive intellect and mind.\nStimulating and deep conversation helps us develop and grow.\nFrom basic quantum particles we get aerodynamics, friction, surface tension, weather, electromagnetism.\nIf she actively engages and comments positively, her anger disappears adapting into win-win's favor.\nI love interesting topics of conversation and the understanding and exploration of thoughts.\nThere is the ability to manipulate things the way you want in your mind to go how you want when you are self confident, that we don’t understand yet.")
49
+
50
+ nc = st.slider('Select a number of clusters:', min_value=1, max_value=15, value=3)
51
+
52
+ model_type = st.radio("Choose model:", ('Sentence Transformer', 'Universal Sentence Encoder'), index=0)
53
+
54
+ # Model setup
55
+ if model_type == "Sentence Transformer":
56
+ model = SentenceTransformer('paraphrase-distilroberta-base-v1')
57
+ elif model_type == "Universal Sentence Encoder":
58
+ model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
59
+ model = hub.load(model_url)
60
+
61
+ nltk.download('punkt')
62
+
63
+ # Run model
64
+ if text:
65
+ sentences = nltk.tokenize.sent_tokenize(text)
66
+ if model_type == "Sentence Transformer":
67
+ embed = model.encode(sentences)
68
+ elif model_type == "Universal Sentence Encoder":
69
+ embed = model(sentences).numpy()
70
+ sim = np.zeros([len(embed), len(embed)])
71
+ for i,em in enumerate(embed):
72
+ for j,ea in enumerate(embed):
73
+ sim[i][j] = 1.0-cosine(em,ea)
74
+ st.subheader("Similarity Heatmap")
75
+ plot_heatmap(sentences, sim)
76
+ st.subheader("Results from K-Means Clustering")
77
+ cluster_examples(sentences, embed, nc)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ sentence_transformers
4
+ nltk
5
+ scipy
6
+ numpy
7
+ seaborn
8
+ matplotlib
9
+ sklearn
10
+ tensorflow_hub
11
+ tensorflow