File size: 2,286 Bytes
ca11711
bd4a4f5
 
 
 
ca11711
bd4a4f5
ca11711
 
bd4a4f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca11711
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

def greet(name, name2, name3):
    return "Hello " + name + "!!"

# Dummy function to simulate getting embeddings from different models
def get_embeddings(model_name, data):
    np.random.seed(0)  # For reproducibility
    return np.random.rand(len(data), 128)  # Simulate 128-dimensional embeddings

def visualize_embeddings(model1, model2, data):
    # Convert input data to list
    data = data.split(',')
    data = [item.strip() for item in data]
    
    # Get embeddings
    embeddings1 = get_embeddings(model1, data)
    embeddings2 = get_embeddings(model2, data)
    
    # Combine embeddings
    combined_embeddings = np.concatenate((embeddings1, embeddings2), axis=0)
    
    # Reduce dimensions using PCA for initial dimensionality reduction
    pca = PCA(n_components=2) #, svd_solver='randomized')
    pca_embeddings = pca.fit_transform(combined_embeddings)
    tsne_embeddings = pca_embeddings
    
    # Further reduce dimensions using t-SNE
    # tsne = TSNE(n_components=2, random_state=0)
    # tsne_embeddings = tsne.fit_transform(pca_embeddings)
    
    # Plot the embeddings
    plt.figure(figsize=(10, 5))
    plt.scatter(tsne_embeddings[:len(data), 0], tsne_embeddings[:len(data), 1], label=model1, alpha=0.5)
    plt.scatter(tsne_embeddings[len(data):, 0], tsne_embeddings[len(data):, 1], label=model2, alpha=0.5)
    plt.legend()
    plt.title('Embeddings Visualization')
    plt.xlabel('Dimension 1')
    plt.ylabel('Dimension 2')
    
    # Save the plot to a file and return the file path
    plt.savefig('embeddings_plot.png')
    plt.close()
    
    return 'embeddings_plot.png'

# demo = gr.Interface(fn=greet, inputs="text", outputs="text")
# Define Gradio interface
# Model 1 - sentence-transformers/sentence-t5-large
# Model 2 - nomic-ai/nomic-embed-text-v1
demo = gr.Interface(
    fn=visualize_embeddings,
    inputs=[
        gr.Textbox(label="Model 1 Name"),
        gr.Textbox(label="Model 2 Name"),
        gr.Textbox(lines=2, label="Data (comma-separated)")
    ],
    outputs="image",
    title="Embeddings Visualizer",
    description="Visualize embeddings from different models"
)
demo.launch()