Adding submit model instructions and route
Browse files- .gitignore +2 -1
- app.py +74 -1
- static/figures/cardinal.svg +629 -538
- static/figures/ordinal.svg +627 -536
- static/leaderboard.csv +19 -17
- templates/about.html +1 -1
- templates/failed_submission.html +221 -0
- templates/index.html +19 -2
- templates/model_detail.html +1 -1
- templates/model_submitted.html +223 -0
- templates/new_model.html +312 -0
.gitignore
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
.idea/
|
| 2 |
__pycache__/*
|
| 3 |
-
copy_data.sh
|
|
|
|
|
|
| 1 |
.idea/
|
| 2 |
__pycache__/*
|
| 3 |
+
copy_data.sh
|
| 4 |
+
uploads/*
|
app.py
CHANGED
|
@@ -1,9 +1,21 @@
|
|
| 1 |
-
|
| 2 |
import pandas as pd
|
| 3 |
import utils
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
app = Flask(__name__)
|
|
|
|
|
|
|
|
|
|
| 6 |
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
@app.route('/')
|
| 9 |
def index():
|
|
@@ -55,5 +67,66 @@ def model_detail(model_name):
|
|
| 55 |
def about():
|
| 56 |
return render_template('about.html')
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
if __name__ == '__main__':
|
| 59 |
app.run(host='0.0.0.0', port=7860, debug=True)
|
|
|
|
| 1 |
+
import os
|
| 2 |
import pandas as pd
|
| 3 |
import utils
|
| 4 |
+
import base64
|
| 5 |
+
import shutil
|
| 6 |
+
import zipfile
|
| 7 |
+
from flask import Flask, render_template, request, redirect, url_for
|
| 8 |
+
from postmarker.core import PostmarkClient
|
| 9 |
+
from werkzeug.utils import secure_filename
|
| 10 |
|
| 11 |
app = Flask(__name__)
|
| 12 |
+
app.config['UPLOAD_FOLDER'] = 'uploads' # Directory where files will be stored
|
| 13 |
+
os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
|
| 14 |
+
app.config['ALLOWED_EXTENSIONS'] = {'zip'}
|
| 15 |
|
| 16 |
+
def allowed_file(filename):
|
| 17 |
+
return '.' in filename and \
|
| 18 |
+
filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
|
| 19 |
|
| 20 |
@app.route('/')
|
| 21 |
def index():
|
|
|
|
| 67 |
def about():
|
| 68 |
return render_template('about.html')
|
| 69 |
|
| 70 |
+
@app.route('/new_model')
|
| 71 |
+
def new_model():
|
| 72 |
+
return render_template('new_model.html')
|
| 73 |
+
|
| 74 |
+
@app.route('/model_submitted')
|
| 75 |
+
def model_submitted():
|
| 76 |
+
return render_template('model_submitted.html')
|
| 77 |
+
|
| 78 |
+
@app.route('/failed_submission')
|
| 79 |
+
def failed_submission():
|
| 80 |
+
return render_template('failed_submission.html')
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
@app.route('/submit_model', methods=['POST'])
|
| 84 |
+
def submit_model():
|
| 85 |
+
model_name = request.form['model_name']
|
| 86 |
+
pull_request_link = request.form['pull_request_link']
|
| 87 |
+
email = request.form['email']
|
| 88 |
+
description = request.form['description']
|
| 89 |
+
|
| 90 |
+
# Handle ZIP file upload
|
| 91 |
+
if 'model_files' not in request.files:
|
| 92 |
+
return redirect(url_for('failed_submission'))
|
| 93 |
+
|
| 94 |
+
file = request.files['model_files']
|
| 95 |
+
|
| 96 |
+
if file and allowed_file(file.filename):
|
| 97 |
+
filename = secure_filename(file.filename)
|
| 98 |
+
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
|
| 99 |
+
file.save(file_path)
|
| 100 |
+
|
| 101 |
+
# Read the file content and encode it in base64
|
| 102 |
+
with open(file_path, 'rb') as f:
|
| 103 |
+
file_content = base64.b64encode(f.read()).decode('ascii')
|
| 104 |
+
|
| 105 |
+
# Set up Postmark email client
|
| 106 |
+
postmark = PostmarkClient(server_token=os.getenv('POSTMARK_SERVER_API'))
|
| 107 |
+
|
| 108 |
+
# Send the email with the attachment
|
| 109 |
+
postmark.emails.send(
|
| 110 |
+
From='[email protected]',
|
| 111 |
+
To='[email protected]',
|
| 112 |
+
Subject=f'Stick to Your Role! Model Submission: {model_name}',
|
| 113 |
+
HtmlBody=f"""
|
| 114 |
+
<p><strong>Model Name:</strong> {model_name}</p>
|
| 115 |
+
<p><strong>Pull Request Link:</strong> {pull_request_link}</p>
|
| 116 |
+
<p><strong>Email:</strong> {email}</p>
|
| 117 |
+
<p><strong>Description:</strong> {description}</p>
|
| 118 |
+
""",
|
| 119 |
+
Attachments=[{
|
| 120 |
+
'Name': filename,
|
| 121 |
+
'Content': file_content,
|
| 122 |
+
'ContentType': 'application/zip'
|
| 123 |
+
}]
|
| 124 |
+
)
|
| 125 |
+
else:
|
| 126 |
+
return redirect(url_for('failed_submission'))
|
| 127 |
+
|
| 128 |
+
return redirect(url_for('model_submitted'))
|
| 129 |
+
|
| 130 |
+
|
| 131 |
if __name__ == '__main__':
|
| 132 |
app.run(host='0.0.0.0', port=7860, debug=True)
|
static/figures/cardinal.svg
CHANGED
|
|
|
|
static/figures/ordinal.svg
CHANGED
|
|
|
|
static/leaderboard.csv
CHANGED
|
@@ -1,18 +1,20 @@
|
|
| 1 |
Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,Separability,CFI,SRMR,RMSEA
|
| 2 |
-
phi-3-mini-128k-instruct,0.
|
| 3 |
-
phi-3-medium-128k-instruct,0.
|
| 4 |
-
Mistral-7B-Instruct-v0.1,0.
|
| 5 |
-
Mistral-7B-Instruct-v0.2,0.
|
| 6 |
-
Mistral-7B-Instruct-v0.3,0.
|
| 7 |
-
Mixtral-8x7B-Instruct-v0.1,0.
|
| 8 |
-
Mixtral-8x22B-Instruct-v0.1,0.
|
| 9 |
-
command_r_plus,0.
|
| 10 |
-
llama_3_8b_instruct,0.
|
| 11 |
-
llama_3_70b_instruct,0.
|
| 12 |
-
llama_3.1_8b_instruct,0.
|
| 13 |
-
llama_3.1_70b_instruct,0.
|
| 14 |
-
Qwen2-7B-Instruct,0.
|
| 15 |
-
Qwen2-72B-Instruct,0.
|
| 16 |
-
gpt-3.5-turbo-0125,0.
|
| 17 |
-
gpt-4o-0513,0.
|
| 18 |
-
|
|
|
|
|
|
|
|
|
| 1 |
Model,Ordinal (Win rate),Cardinal (Score),RO Stability,Stress,Separability,CFI,SRMR,RMSEA
|
| 2 |
+
phi-3-mini-128k-instruct,0.32853223593964337,0.4571976280473622,0.039299993295009855,0.281800547806919,0.963768115942029,0.7509527777777777,0.25489166666666674,0.22045000000000003
|
| 3 |
+
phi-3-medium-128k-instruct,0.34224965706447186,0.46871557360419164,0.09692037989916814,0.2651981204439735,0.9975845410628019,0.6727694444444445,0.2984500000000001,0.2759472222222221
|
| 4 |
+
Mistral-7B-Instruct-v0.1,0.19958847736625512,0.38323622857524176,0.027216280472015988,0.2829498135031582,0.995169082125604,0.500288888888889,0.45314444444444446,0.4191027777777777
|
| 5 |
+
Mistral-7B-Instruct-v0.2,0.38545953360768176,0.4692343788574553,0.14417876497818388,0.265188983528973,1.0,0.5787944444444445,0.35010277777777776,0.3171083333333333
|
| 6 |
+
Mistral-7B-Instruct-v0.3,0.2702331961591221,0.4168826678339619,0.07960539866974455,0.2742399030139009,0.9975845410628019,0.5231444444444444,0.4214972222222223,0.3914694444444443
|
| 7 |
+
Mixtral-8x7B-Instruct-v0.1,0.4746227709190672,0.5307045793457128,0.21473356319081474,0.2624402608740656,1.0,0.6766166666666665,0.25611666666666666,0.24065277777777772
|
| 8 |
+
Mixtral-8x22B-Instruct-v0.1,0.2791495198902606,0.41811429894732177,0.1414001940345544,0.2548838005881672,0.9654589371980676,0.45902777777777776,0.4849916666666666,0.4871833333333333
|
| 9 |
+
command_r_plus,0.5761316872427983,0.6136142726835458,0.3429686514651868,0.23811982320641845,0.963768115942029,0.7772111111111112,0.17755277777777778,0.17465277777777777
|
| 10 |
+
llama_3_8b_instruct,0.49108367626886146,0.5571604188191388,0.24527785038654715,0.245806400289881,0.961352657004831,0.7348277777777779,0.20952222222222228,0.20751944444444437
|
| 11 |
+
llama_3_70b_instruct,0.718792866941015,0.7573878472446817,0.607020698814379,0.18525883672204868,1.0,0.8298166666666668,0.10965277777777771,0.14649722222222217
|
| 12 |
+
llama_3.1_8b_instruct,0.5521262002743484,0.6056589663453942,0.4295080949846363,0.22060228669473025,0.9710144927536233,0.6379333333333334,0.3225500000000001,0.3328972222222223
|
| 13 |
+
llama_3.1_70b_instruct,0.7517146776406035,0.78874072958529,0.691365862744007,0.1709718847084183,0.9944444444444444,0.8203805555555554,0.14023055555555552,0.17041944444444446
|
| 14 |
+
Qwen2-7B-Instruct,0.4465020576131687,0.5256131964101429,0.25108519506513916,0.25776537005719313,0.9855072463768116,0.6248583333333334,0.32358611111111113,0.3028361111111111
|
| 15 |
+
Qwen2-72B-Instruct,0.5802469135802469,0.6858608495773215,0.6465993243020925,0.20297742879025626,0.9833333333333333,0.5559722222222221,0.3575638888888889,0.39241388888888884
|
| 16 |
+
gpt-3.5-turbo-0125,0.22565157750342937,0.4028828123262879,0.08240359836763214,0.28728574920060357,1.0,0.4998916666666666,0.47583055555555553,0.4404444444444445
|
| 17 |
+
gpt-4o-0513,0.705761316872428,0.707844597747704,0.5122163952167618,0.19201420113771173,1.0,0.7998694444444445,0.14606111111111109,0.1400583333333334
|
| 18 |
+
gpt-4o-mini-2024-07-18,0.37517146776406035,0.4740062039155729,0.13575309046266867,0.2707065266105181,1.0,0.6141777777777777,0.32648055555555555,0.29394722222222214
|
| 19 |
+
Mistral-Large-Instruct-2407,0.7613168724279836,0.8046038845509005,0.7644582301049158,0.16944638941325085,0.994806763285024,0.7604888888888888,0.18767499999999993,0.21457222222222228
|
| 20 |
+
dummy,0.14609053497942384,0.3585809973377891,-0.009004148398032956,0.2928877637010999,1.0,0.5076361111111111,0.4973388888888889,0.4541638888888889
|
templates/about.html
CHANGED
|
@@ -349,7 +349,7 @@ their expression of that value).
|
|
| 349 |
</p>
|
| 350 |
</div>
|
| 351 |
<div class="back-button">
|
| 352 |
-
<a href="{{ url_for('index') }}" class="custom-button mt-3">
|
| 353 |
</div>
|
| 354 |
<div class="citation-section">
|
| 355 |
<p>If you found this project useful, please cite our related paper:</p>
|
|
|
|
| 349 |
</p>
|
| 350 |
</div>
|
| 351 |
<div class="back-button">
|
| 352 |
+
<a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
|
| 353 |
</div>
|
| 354 |
<div class="citation-section">
|
| 355 |
<p>If you found this project useful, please cite our related paper:</p>
|
templates/failed_submission.html
ADDED
|
@@ -0,0 +1,221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Stick To Your Role! About</title>
|
| 7 |
+
<!-- Include Bootstrap CSS for styling -->
|
| 8 |
+
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/css/bootstrap.min.css">
|
| 9 |
+
<!-- Include DataTables CSS -->
|
| 10 |
+
<link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/dataTables.bootstrap5.min.css">
|
| 11 |
+
<!-- Custom CSS for additional styling -->
|
| 12 |
+
<style>
|
| 13 |
+
body {
|
| 14 |
+
background-color: #f8f9fa;
|
| 15 |
+
font-family: 'Arial', sans-serif;
|
| 16 |
+
}
|
| 17 |
+
.container {
|
| 18 |
+
max-width: 1200px; /* Limit the width of the container */
|
| 19 |
+
margin: auto; /* Center the container */
|
| 20 |
+
padding: 20px; /* Add some padding */
|
| 21 |
+
background: #fff;
|
| 22 |
+
border-radius: 8px;
|
| 23 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
| 24 |
+
}
|
| 25 |
+
h1 {
|
| 26 |
+
color: #333;
|
| 27 |
+
text-align: center;
|
| 28 |
+
}
|
| 29 |
+
h2 {
|
| 30 |
+
color: #333;
|
| 31 |
+
margin-top: 30px;
|
| 32 |
+
text-align: center;
|
| 33 |
+
}
|
| 34 |
+
.table-responsive {
|
| 35 |
+
margin-top: 20px;
|
| 36 |
+
}
|
| 37 |
+
table {
|
| 38 |
+
border-collapse: separate;
|
| 39 |
+
border-spacing: 0;
|
| 40 |
+
font-size: 14px; /* Reduce the font size */
|
| 41 |
+
width: 100%;
|
| 42 |
+
border: none; /* Remove any default border */
|
| 43 |
+
}
|
| 44 |
+
table thead th {
|
| 45 |
+
background-color: #610b5d;
|
| 46 |
+
color: white;
|
| 47 |
+
border: 1px solid #dee2e6;
|
| 48 |
+
text-align: left;
|
| 49 |
+
}
|
| 50 |
+
table tbody tr {
|
| 51 |
+
background-color: #fff;
|
| 52 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 53 |
+
}
|
| 54 |
+
table tbody tr:hover {
|
| 55 |
+
background-color: #f1f1f1;
|
| 56 |
+
}
|
| 57 |
+
table td, table th {
|
| 58 |
+
padding: 10px; /* Reduce padding */
|
| 59 |
+
border: 1px solid #dee2e6;
|
| 60 |
+
}
|
| 61 |
+
table th:first-child {
|
| 62 |
+
border-top-left-radius: 10px;
|
| 63 |
+
}
|
| 64 |
+
table th:last-child {
|
| 65 |
+
border-top-right-radius: 10px;
|
| 66 |
+
}
|
| 67 |
+
.section{
|
| 68 |
+
padding-top: 19px;
|
| 69 |
+
text-align: left;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.section p {
|
| 73 |
+
padding-left: 150px;
|
| 74 |
+
padding-right: 150px;
|
| 75 |
+
text-indent: 2em;
|
| 76 |
+
margin: auto;
|
| 77 |
+
margin-bottom: 10px;
|
| 78 |
+
text-align: left;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
.section ol, ul {
|
| 82 |
+
padding-left: 150px;
|
| 83 |
+
padding-right: 150px;
|
| 84 |
+
margin: auto;
|
| 85 |
+
margin-bottom: 20px;
|
| 86 |
+
margin-left: 50px;
|
| 87 |
+
text-align: left;
|
| 88 |
+
margin-top: 0px;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.citation-section {
|
| 92 |
+
width: 100%;
|
| 93 |
+
margin-top: 50px;
|
| 94 |
+
text-align: center;
|
| 95 |
+
}
|
| 96 |
+
.citation-box {
|
| 97 |
+
background-color: #f8f9fa;
|
| 98 |
+
border: 1px solid #dee2e6;
|
| 99 |
+
border-radius: 8px;
|
| 100 |
+
padding: 10px;
|
| 101 |
+
margin-top: 5px;
|
| 102 |
+
font-size: 15px;
|
| 103 |
+
text-align: left;
|
| 104 |
+
font-family: 'Courier New', Courier, monospace;
|
| 105 |
+
white-space: pre;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
.image-container-structure {
|
| 109 |
+
display: flex;
|
| 110 |
+
justify-content: center;
|
| 111 |
+
gap: 10px;
|
| 112 |
+
margin-bottom: 40px;
|
| 113 |
+
max-width: 70%; /* Adjust the width as needed */
|
| 114 |
+
margin: auto;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.image-container-structure a {
|
| 118 |
+
flex: 1;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.image-container-structure img {
|
| 122 |
+
max-width: 100%;
|
| 123 |
+
height: auto;
|
| 124 |
+
display: block;
|
| 125 |
+
margin: auto;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.image-container {
|
| 129 |
+
width: 100%;
|
| 130 |
+
margin-bottom: 40px;
|
| 131 |
+
}
|
| 132 |
+
.image-container #admin-questionnaire {
|
| 133 |
+
width: 50%;
|
| 134 |
+
height: auto;
|
| 135 |
+
display: block;
|
| 136 |
+
margin: auto;
|
| 137 |
+
}
|
| 138 |
+
.image-container #ro-image {
|
| 139 |
+
width: 70%;
|
| 140 |
+
height: auto;
|
| 141 |
+
display: block;
|
| 142 |
+
margin: auto;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
.section-title {
|
| 146 |
+
font-size: 24px;
|
| 147 |
+
font-weight: bold;
|
| 148 |
+
text-align: center;
|
| 149 |
+
margin-bottom: 40px;
|
| 150 |
+
padding: 20px; /* Add padding for more margin around text */
|
| 151 |
+
background-color: #610b5d;
|
| 152 |
+
color: #fff; /* Ensure text is readable on dark background */
|
| 153 |
+
border-radius: 15px; /* Rounded edges */
|
| 154 |
+
}
|
| 155 |
+
.back-button {
|
| 156 |
+
text-align: center;
|
| 157 |
+
margin-top: 50px;
|
| 158 |
+
}
|
| 159 |
+
.custom-button {
|
| 160 |
+
background-color: #610b5d;
|
| 161 |
+
color: #fff; /* Set white text color */
|
| 162 |
+
border-radius: 15px; /* Rounded edges */
|
| 163 |
+
padding: 10px 20px; /* Padding for the button */
|
| 164 |
+
font-size: 18px; /* Increase font size */
|
| 165 |
+
text-decoration: none; /* Remove underline */
|
| 166 |
+
}
|
| 167 |
+
.custom-button:hover {
|
| 168 |
+
background-color: #812b7d;
|
| 169 |
+
color: #fff;
|
| 170 |
+
}
|
| 171 |
+
</style>
|
| 172 |
+
</head>
|
| 173 |
+
<body>
|
| 174 |
+
<div class="container">
|
| 175 |
+
<h1 class="mt-5">Stick To Your Role! Leaderboard</h1>
|
| 176 |
+
<div class="table-responsive">
|
| 177 |
+
<!-- Render the table HTML here -->
|
| 178 |
+
{{ table_html|safe }}
|
| 179 |
+
</div>
|
| 180 |
+
<div class="section">
|
| 181 |
+
<div class="section-title">There was an issue with your submission.</div>
|
| 182 |
+
<p>
|
| 183 |
+
Try again or contact us at <a href= "mailto: [email protected]">[email protected]</a>.
|
| 184 |
+
</p>
|
| 185 |
+
<div class="back-button">
|
| 186 |
+
<a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
|
| 187 |
+
</div>
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
</div>
|
| 191 |
+
|
| 192 |
+
<!-- Include jQuery -->
|
| 193 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
| 194 |
+
<!-- Include Bootstrap JS -->
|
| 195 |
+
<script src="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
|
| 196 |
+
<!-- Include DataTables JS -->
|
| 197 |
+
<script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
|
| 198 |
+
<script src="https://cdn.datatables.net/1.11.5/js/dataTables.bootstrap5.min.js"></script>
|
| 199 |
+
<!-- Initialize DataTables -->
|
| 200 |
+
<script>
|
| 201 |
+
$(document).ready(function() {
|
| 202 |
+
const table = $('table').DataTable({
|
| 203 |
+
"paging": false,
|
| 204 |
+
"info": false,
|
| 205 |
+
"columnDefs": [
|
| 206 |
+
{ "orderable": false, "targets": 0 },
|
| 207 |
+
{ "searchable": false, "targets": 0 }
|
| 208 |
+
],
|
| 209 |
+
"order": [[ 2, 'desc' ]],
|
| 210 |
+
"drawCallback": function(settings) {
|
| 211 |
+
var api = this.api();
|
| 212 |
+
api.column(0, {order:'applied'}).nodes().each(function(cell, i) {
|
| 213 |
+
cell.innerHTML = i + 1;
|
| 214 |
+
});
|
| 215 |
+
}
|
| 216 |
+
});
|
| 217 |
+
});
|
| 218 |
+
|
| 219 |
+
</script>
|
| 220 |
+
</body>
|
| 221 |
+
</html>
|
templates/index.html
CHANGED
|
@@ -41,6 +41,14 @@
|
|
| 41 |
text-align: left;
|
| 42 |
}
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
.table-responsive {
|
| 45 |
margin-top: 20px;
|
| 46 |
max-width: 1000px; /* Adjust the width as needed */
|
|
@@ -195,8 +203,8 @@
|
|
| 195 |
As proposed in our <a href="https://arxiv.org/abs/2402.14846">paper</a>,
|
| 196 |
unwanted context-dependence should be seen as a <b>property of LLMs</b> - a dimension of LLM comparison (alongside others such as model size speed or expressed knowledge).
|
| 197 |
This leaderboard aims to provide such a comparison and extends our paper with a more focused and elaborate experimental setup.
|
| 198 |
-
Standard benchmarks present MANY questions from the SAME MINIMAL contexts (e.g. multiple choice questions),
|
| 199 |
-
we present SAME questions from MANY different contexts
|
| 200 |
</p>
|
| 201 |
<div class="table-responsive main-table">
|
| 202 |
<!-- Render the table HTML here -->
|
|
@@ -238,6 +246,9 @@
|
|
| 238 |
<div class="about-button">
|
| 239 |
<a href="{{ url_for('about') }}" class="custom-button mt-3">Learn More About This Project</a>
|
| 240 |
</div>
|
|
|
|
|
|
|
|
|
|
| 241 |
<div class="citation-section">
|
| 242 |
<p>
|
| 243 |
If you found this project useful, please cite our related paper,
|
|
@@ -253,6 +264,12 @@
|
|
| 253 |
}
|
| 254 |
</div>
|
| 255 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
</div>
|
| 257 |
|
| 258 |
<!-- Include jQuery -->
|
|
|
|
| 41 |
text-align: left;
|
| 42 |
}
|
| 43 |
|
| 44 |
+
ul {
|
| 45 |
+
margin: auto; /* Center the table */
|
| 46 |
+
margin-top: 20px;
|
| 47 |
+
margin-bottom: 10px;
|
| 48 |
+
max-width: 1000px; /* Adjust the width as needed */
|
| 49 |
+
text-align: left;
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
.table-responsive {
|
| 53 |
margin-top: 20px;
|
| 54 |
max-width: 1000px; /* Adjust the width as needed */
|
|
|
|
| 203 |
As proposed in our <a href="https://arxiv.org/abs/2402.14846">paper</a>,
|
| 204 |
unwanted context-dependence should be seen as a <b>property of LLMs</b> - a dimension of LLM comparison (alongside others such as model size speed or expressed knowledge).
|
| 205 |
This leaderboard aims to provide such a comparison and extends our paper with a more focused and elaborate experimental setup.
|
| 206 |
+
Standard benchmarks present <b>MANY</b> questions from the <b>SAME MINIMAL contexts</b> (e.g. multiple choice questions),
|
| 207 |
+
we present <b>SAME</b> questions from <b>MANY different contexts</b>.
|
| 208 |
</p>
|
| 209 |
<div class="table-responsive main-table">
|
| 210 |
<!-- Render the table HTML here -->
|
|
|
|
| 246 |
<div class="about-button">
|
| 247 |
<a href="{{ url_for('about') }}" class="custom-button mt-3">Learn More About This Project</a>
|
| 248 |
</div>
|
| 249 |
+
<div class="about-button">
|
| 250 |
+
<a href="{{ url_for('new_model') }}" class="custom-button mt-3">Submit a model</a>
|
| 251 |
+
</div>
|
| 252 |
<div class="citation-section">
|
| 253 |
<p>
|
| 254 |
If you found this project useful, please cite our related paper,
|
|
|
|
| 264 |
}
|
| 265 |
</div>
|
| 266 |
</div>
|
| 267 |
+
<ul>
|
| 268 |
+
<li>Contact: <a href="mailto: [email protected]">[email protected]</a></li>
|
| 269 |
+
<li>See the <a href="https://sites.google.com/view/llmvaluestability">Project website<a/></li>
|
| 270 |
+
<li>See the Flowers team <a href="http://developmentalsystems.org">blog</a> and <a href="https://flowers.inria.fr/">website</a></li>
|
| 271 |
+
<li>See Grgur's website and other projects: <a href="https://grgkovac.github.io/">https://grgkovac.github.io/</a></li>
|
| 272 |
+
</ul>
|
| 273 |
</div>
|
| 274 |
|
| 275 |
<!-- Include jQuery -->
|
templates/model_detail.html
CHANGED
|
@@ -140,7 +140,7 @@
|
|
| 140 |
</div>
|
| 141 |
</div>
|
| 142 |
<div class="back-button">
|
| 143 |
-
<a href="{{ url_for('index') }}" class="custom-button mt-3">
|
| 144 |
</div>
|
| 145 |
</div>
|
| 146 |
|
|
|
|
| 140 |
</div>
|
| 141 |
</div>
|
| 142 |
<div class="back-button">
|
| 143 |
+
<a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
|
| 144 |
</div>
|
| 145 |
</div>
|
| 146 |
|
templates/model_submitted.html
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Stick To Your Role! About</title>
|
| 7 |
+
<!-- Include Bootstrap CSS for styling -->
|
| 8 |
+
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/css/bootstrap.min.css">
|
| 9 |
+
<!-- Include DataTables CSS -->
|
| 10 |
+
<link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/dataTables.bootstrap5.min.css">
|
| 11 |
+
<!-- Custom CSS for additional styling -->
|
| 12 |
+
<style>
|
| 13 |
+
body {
|
| 14 |
+
background-color: #f8f9fa;
|
| 15 |
+
font-family: 'Arial', sans-serif;
|
| 16 |
+
}
|
| 17 |
+
.container {
|
| 18 |
+
max-width: 1200px; /* Limit the width of the container */
|
| 19 |
+
margin: auto; /* Center the container */
|
| 20 |
+
padding: 20px; /* Add some padding */
|
| 21 |
+
background: #fff;
|
| 22 |
+
border-radius: 8px;
|
| 23 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
| 24 |
+
}
|
| 25 |
+
h1 {
|
| 26 |
+
color: #333;
|
| 27 |
+
text-align: center;
|
| 28 |
+
}
|
| 29 |
+
h2 {
|
| 30 |
+
color: #333;
|
| 31 |
+
margin-top: 30px;
|
| 32 |
+
text-align: center;
|
| 33 |
+
}
|
| 34 |
+
.table-responsive {
|
| 35 |
+
margin-top: 20px;
|
| 36 |
+
}
|
| 37 |
+
table {
|
| 38 |
+
border-collapse: separate;
|
| 39 |
+
border-spacing: 0;
|
| 40 |
+
font-size: 14px; /* Reduce the font size */
|
| 41 |
+
width: 100%;
|
| 42 |
+
border: none; /* Remove any default border */
|
| 43 |
+
}
|
| 44 |
+
table thead th {
|
| 45 |
+
background-color: #610b5d;
|
| 46 |
+
color: white;
|
| 47 |
+
border: 1px solid #dee2e6;
|
| 48 |
+
text-align: left;
|
| 49 |
+
}
|
| 50 |
+
table tbody tr {
|
| 51 |
+
background-color: #fff;
|
| 52 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 53 |
+
}
|
| 54 |
+
table tbody tr:hover {
|
| 55 |
+
background-color: #f1f1f1;
|
| 56 |
+
}
|
| 57 |
+
table td, table th {
|
| 58 |
+
padding: 10px; /* Reduce padding */
|
| 59 |
+
border: 1px solid #dee2e6;
|
| 60 |
+
}
|
| 61 |
+
table th:first-child {
|
| 62 |
+
border-top-left-radius: 10px;
|
| 63 |
+
}
|
| 64 |
+
table th:last-child {
|
| 65 |
+
border-top-right-radius: 10px;
|
| 66 |
+
}
|
| 67 |
+
.section{
|
| 68 |
+
padding-top: 19px;
|
| 69 |
+
text-align: left;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.section p {
|
| 73 |
+
padding-left: 150px;
|
| 74 |
+
padding-right: 150px;
|
| 75 |
+
text-indent: 2em;
|
| 76 |
+
margin: auto;
|
| 77 |
+
margin-bottom: 10px;
|
| 78 |
+
text-align: left;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
.section ol, ul {
|
| 82 |
+
padding-left: 150px;
|
| 83 |
+
padding-right: 150px;
|
| 84 |
+
margin: auto;
|
| 85 |
+
margin-bottom: 20px;
|
| 86 |
+
margin-left: 50px;
|
| 87 |
+
text-align: left;
|
| 88 |
+
margin-top: 0px;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
.citation-section {
|
| 92 |
+
width: 100%;
|
| 93 |
+
margin-top: 50px;
|
| 94 |
+
text-align: center;
|
| 95 |
+
}
|
| 96 |
+
.citation-box {
|
| 97 |
+
background-color: #f8f9fa;
|
| 98 |
+
border: 1px solid #dee2e6;
|
| 99 |
+
border-radius: 8px;
|
| 100 |
+
padding: 10px;
|
| 101 |
+
margin-top: 5px;
|
| 102 |
+
font-size: 15px;
|
| 103 |
+
text-align: left;
|
| 104 |
+
font-family: 'Courier New', Courier, monospace;
|
| 105 |
+
white-space: pre;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
.image-container-structure {
|
| 109 |
+
display: flex;
|
| 110 |
+
justify-content: center;
|
| 111 |
+
gap: 10px;
|
| 112 |
+
margin-bottom: 40px;
|
| 113 |
+
max-width: 70%; /* Adjust the width as needed */
|
| 114 |
+
margin: auto;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
.image-container-structure a {
|
| 118 |
+
flex: 1;
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.image-container-structure img {
|
| 122 |
+
max-width: 100%;
|
| 123 |
+
height: auto;
|
| 124 |
+
display: block;
|
| 125 |
+
margin: auto;
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
.image-container {
|
| 129 |
+
width: 100%;
|
| 130 |
+
margin-bottom: 40px;
|
| 131 |
+
}
|
| 132 |
+
.image-container #admin-questionnaire {
|
| 133 |
+
width: 50%;
|
| 134 |
+
height: auto;
|
| 135 |
+
display: block;
|
| 136 |
+
margin: auto;
|
| 137 |
+
}
|
| 138 |
+
.image-container #ro-image {
|
| 139 |
+
width: 70%;
|
| 140 |
+
height: auto;
|
| 141 |
+
display: block;
|
| 142 |
+
margin: auto;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
.section-title {
|
| 146 |
+
font-size: 24px;
|
| 147 |
+
font-weight: bold;
|
| 148 |
+
text-align: center;
|
| 149 |
+
margin-bottom: 40px;
|
| 150 |
+
padding: 20px; /* Add padding for more margin around text */
|
| 151 |
+
background-color: #610b5d;
|
| 152 |
+
color: #fff; /* Ensure text is readable on dark background */
|
| 153 |
+
border-radius: 15px; /* Rounded edges */
|
| 154 |
+
}
|
| 155 |
+
.back-button {
|
| 156 |
+
text-align: center;
|
| 157 |
+
margin-top: 50px;
|
| 158 |
+
}
|
| 159 |
+
.custom-button {
|
| 160 |
+
background-color: #610b5d;
|
| 161 |
+
color: #fff; /* Set white text color */
|
| 162 |
+
border-radius: 15px; /* Rounded edges */
|
| 163 |
+
padding: 10px 20px; /* Padding for the button */
|
| 164 |
+
font-size: 18px; /* Increase font size */
|
| 165 |
+
text-decoration: none; /* Remove underline */
|
| 166 |
+
}
|
| 167 |
+
.custom-button:hover {
|
| 168 |
+
background-color: #812b7d;
|
| 169 |
+
color: #fff;
|
| 170 |
+
}
|
| 171 |
+
</style>
|
| 172 |
+
</head>
|
| 173 |
+
<body>
|
| 174 |
+
<div class="container">
|
| 175 |
+
<h1 class="mt-5">Stick To Your Role! Leaderboard</h1>
|
| 176 |
+
<div class="table-responsive">
|
| 177 |
+
<!-- Render the table HTML here -->
|
| 178 |
+
{{ table_html|safe }}
|
| 179 |
+
</div>
|
| 180 |
+
<div class="section">
|
| 181 |
+
<div class="section-title">Thank you for submitting your model!</div>
|
| 182 |
+
<p>
|
| 183 |
+
We will get back to you to confirm the reception of the model.
|
| 184 |
+
If we do not get back to you in the period of two weeks please contact us at:
|
| 185 |
+
<a href= "mailto: [email protected]">[email protected]</a>.
|
| 186 |
+
</p>
|
| 187 |
+
<div class="back-button">
|
| 188 |
+
<a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
|
| 189 |
+
</div>
|
| 190 |
+
</div>
|
| 191 |
+
</div>
|
| 192 |
+
</div>
|
| 193 |
+
|
| 194 |
+
<!-- Include jQuery -->
|
| 195 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
| 196 |
+
<!-- Include Bootstrap JS -->
|
| 197 |
+
<script src="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
|
| 198 |
+
<!-- Include DataTables JS -->
|
| 199 |
+
<script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
|
| 200 |
+
<script src="https://cdn.datatables.net/1.11.5/js/dataTables.bootstrap5.min.js"></script>
|
| 201 |
+
<!-- Initialize DataTables -->
|
| 202 |
+
<script>
|
| 203 |
+
$(document).ready(function() {
|
| 204 |
+
const table = $('table').DataTable({
|
| 205 |
+
"paging": false,
|
| 206 |
+
"info": false,
|
| 207 |
+
"columnDefs": [
|
| 208 |
+
{ "orderable": false, "targets": 0 },
|
| 209 |
+
{ "searchable": false, "targets": 0 }
|
| 210 |
+
],
|
| 211 |
+
"order": [[ 2, 'desc' ]],
|
| 212 |
+
"drawCallback": function(settings) {
|
| 213 |
+
var api = this.api();
|
| 214 |
+
api.column(0, {order:'applied'}).nodes().each(function(cell, i) {
|
| 215 |
+
cell.innerHTML = i + 1;
|
| 216 |
+
});
|
| 217 |
+
}
|
| 218 |
+
});
|
| 219 |
+
});
|
| 220 |
+
|
| 221 |
+
</script>
|
| 222 |
+
</body>
|
| 223 |
+
</html>
|
templates/new_model.html
ADDED
|
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Stick To Your Role! About</title>
|
| 7 |
+
<!-- Include Bootstrap CSS for styling -->
|
| 8 |
+
<link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/css/bootstrap.min.css">
|
| 9 |
+
<!-- Include DataTables CSS -->
|
| 10 |
+
<link rel="stylesheet" href="https://cdn.datatables.net/1.11.5/css/dataTables.bootstrap5.min.css">
|
| 11 |
+
<!-- Custom CSS for additional styling -->
|
| 12 |
+
<style>
|
| 13 |
+
body {
|
| 14 |
+
background-color: #f8f9fa;
|
| 15 |
+
font-family: 'Arial', sans-serif;
|
| 16 |
+
}
|
| 17 |
+
.container {
|
| 18 |
+
max-width: 1200px; /* Limit the width of the container */
|
| 19 |
+
margin: auto; /* Center the container */
|
| 20 |
+
padding: 20px; /* Add some padding */
|
| 21 |
+
background: #fff;
|
| 22 |
+
border-radius: 8px;
|
| 23 |
+
box-shadow: 0 4px 8px rgba(0,0,0,0.1);
|
| 24 |
+
}
|
| 25 |
+
h1 {
|
| 26 |
+
color: #333;
|
| 27 |
+
text-align: center;
|
| 28 |
+
}
|
| 29 |
+
h2 {
|
| 30 |
+
color: #333;
|
| 31 |
+
margin-top: 30px;
|
| 32 |
+
text-align: center;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
.section {
|
| 36 |
+
padding-top: 19px;
|
| 37 |
+
text-align: left;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.section p {
|
| 41 |
+
padding-left: 150px;
|
| 42 |
+
padding-right: 150px;
|
| 43 |
+
text-indent: 2em;
|
| 44 |
+
margin: auto;
|
| 45 |
+
margin-bottom: 10px;
|
| 46 |
+
text-align: left;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.section ol, ul {
|
| 50 |
+
padding-left: 150px;
|
| 51 |
+
padding-right: 150px;
|
| 52 |
+
margin: auto;
|
| 53 |
+
margin-bottom: 20px;
|
| 54 |
+
margin-left: 50px;
|
| 55 |
+
text-align: left;
|
| 56 |
+
margin-top: 0px;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.citation-section {
|
| 60 |
+
width: 100%;
|
| 61 |
+
margin-top: 50px;
|
| 62 |
+
text-align: center;
|
| 63 |
+
}
|
| 64 |
+
.citation-box {
|
| 65 |
+
background-color: #f8f9fa;
|
| 66 |
+
border: 1px solid #dee2e6;
|
| 67 |
+
border-radius: 8px;
|
| 68 |
+
padding: 10px;
|
| 69 |
+
margin-top: 5px;
|
| 70 |
+
font-size: 15px;
|
| 71 |
+
text-align: left;
|
| 72 |
+
font-family: 'Courier New', Courier, monospace;
|
| 73 |
+
white-space: pre;
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
.image-container-structure {
|
| 77 |
+
display: flex;
|
| 78 |
+
justify-content: center;
|
| 79 |
+
gap: 10px;
|
| 80 |
+
margin-bottom: 40px;
|
| 81 |
+
max-width: 70%; /* Adjust the width as needed */
|
| 82 |
+
margin: auto;
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
.image-container-structure a {
|
| 86 |
+
flex: 1;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.image-container-structure img {
|
| 90 |
+
max-width: 100%;
|
| 91 |
+
height: auto;
|
| 92 |
+
display: block;
|
| 93 |
+
margin: auto;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
.image-container {
|
| 97 |
+
width: 100%;
|
| 98 |
+
margin-bottom: 40px;
|
| 99 |
+
}
|
| 100 |
+
.image-container #admin-questionnaire {
|
| 101 |
+
width: 50%;
|
| 102 |
+
height: auto;
|
| 103 |
+
display: block;
|
| 104 |
+
margin: auto;
|
| 105 |
+
}
|
| 106 |
+
.image-container #ro-image {
|
| 107 |
+
width: 70%;
|
| 108 |
+
height: auto;
|
| 109 |
+
display: block;
|
| 110 |
+
margin: auto;
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
.section-title {
|
| 114 |
+
font-size: 24px;
|
| 115 |
+
font-weight: bold;
|
| 116 |
+
text-align: center;
|
| 117 |
+
margin-bottom: 40px;
|
| 118 |
+
padding: 20px; /* Add padding for more margin around text */
|
| 119 |
+
background-color: #610b5d;
|
| 120 |
+
color: #fff; /* Ensure text is readable on dark background */
|
| 121 |
+
border-radius: 15px; /* Rounded edges */
|
| 122 |
+
}
|
| 123 |
+
.back-button {
|
| 124 |
+
text-align: center;
|
| 125 |
+
margin-top: 50px;
|
| 126 |
+
}
|
| 127 |
+
.custom-button {
|
| 128 |
+
background-color: #610b5d;
|
| 129 |
+
color: #fff; /* Set white text color */
|
| 130 |
+
border-radius: 15px; /* Rounded edges */
|
| 131 |
+
padding: 10px 20px; /* Padding for the button */
|
| 132 |
+
font-size: 18px; /* Increase font size */
|
| 133 |
+
text-decoration: none; /* Remove underline */
|
| 134 |
+
}
|
| 135 |
+
.custom-button:hover {
|
| 136 |
+
background-color: #812b7d;
|
| 137 |
+
color: #fff;
|
| 138 |
+
}
|
| 139 |
+
.form-container {
|
| 140 |
+
max-width: 80%; /* Adjust as needed */
|
| 141 |
+
margin: 20px 100px; /* Center horizontally */
|
| 142 |
+
padding: 50px 150px;
|
| 143 |
+
text-align: center;
|
| 144 |
+
background-color: #f8f9fa;
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
.form-row {
|
| 148 |
+
max-width: 100%;
|
| 149 |
+
margin-bottom: 20px;
|
| 150 |
+
text-align: left;
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
.form-label {
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
.col-md-4 {
|
| 157 |
+
width: 100%
|
| 158 |
+
}
|
| 159 |
+
.col-md-8 {
|
| 160 |
+
width: 100%
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.form-content {
|
| 164 |
+
margin-bottom: 15px;
|
| 165 |
+
min-width: 100%;
|
| 166 |
+
}
|
| 167 |
+
.form-content::placeholder {
|
| 168 |
+
color: #aaa;
|
| 169 |
+
font-style: italic;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.file-input {
|
| 173 |
+
margin-top: 10px;
|
| 174 |
+
}
|
| 175 |
+
</style>
|
| 176 |
+
</head>
|
| 177 |
+
<body>
|
| 178 |
+
<div class="container">
|
| 179 |
+
<h1 class="mt-5">Stick To Your Role! Leaderboard</h1>
|
| 180 |
+
<div class="table-responsive">
|
| 181 |
+
<!-- Render the table HTML here -->
|
| 182 |
+
{{ table_html|safe }}
|
| 183 |
+
</div>
|
| 184 |
+
<div class="section">
|
| 185 |
+
<div id="evaluate_custom_model" class="section-title">Evaluate a custom model</div>
|
| 186 |
+
<p>
|
| 187 |
+
To evaluate a custom model you can use our <a href="https://gitlab.inria.fr/gkovac/value_stability">open-source code</a>.
|
| 188 |
+
If a model is in the huggingface transformers format (saved either localy or on the hub),
|
| 189 |
+
it can be simply added by adding a config file.
|
| 190 |
+
The model can then be evaluated as any other model.
|
| 191 |
+
To do so, follow the <a href="https://gitlab.inria.fr/gkovac/value_stability/-/blob/master/README.md?ref_type=heads#adding-a-new-model">instructions</a> in the README.md file.
|
| 192 |
+
</p>
|
| 193 |
+
</div>
|
| 194 |
+
<div class="section" id="paper">
|
| 195 |
+
<div class="section-title">Submit a custom model to the Stick To Your Role! Leaderboard</div>
|
| 196 |
+
<p>
|
| 197 |
+
If you want, your model can be to the Stick To Your Role! Leaderboard, as an unofficial submission.
|
| 198 |
+
A separate list of models containing both official and unofficial submissions will be created.
|
| 199 |
+
The procedure is as follows:
|
| 200 |
+
</p>
|
| 201 |
+
<ol>
|
| 202 |
+
<li>
|
| 203 |
+
<b> Add and evaluate your model </b> - Add your model as a config file as described <a href="{{ url_for('new_model', _anchor='evaluate_custom_model') }}">above</a>.
|
| 204 |
+
This procedure should result in 9 json files as such:
|
| 205 |
+
<code>`Leaderboard/results/stability_leaderboard/<your_model_name>/chunk_0_<timestamp>/results.json`</code>
|
| 206 |
+
</li>
|
| 207 |
+
<li>
|
| 208 |
+
<b> Submit the config file </b> - Create a pull request to our <a href="https://gitlab.inria.fr/gkovac/value_stability">repository</a> from a branch <code>"unofficial_model/<your_model_name>"</code>.
|
| 209 |
+
The pull request should ideally only add the config file in <code>`./models/leaderboard_configs`</code>.
|
| 210 |
+
If additional changes are needed, they should ideally be constrained to a new model class (see <a href="https://gitlab.inria.fr/gkovac/value_stability/-/blob/master/models/huggingfacemodel.py?ref_type=heads">huggingfacemodel.py</a> for reference).
|
| 211 |
+
<li>
|
| 212 |
+
<b> Submit the model results </b> - submit the *json files as a ZIP using the form below.
|
| 213 |
+
We will integrate the model's results on our side, and rerank models with yours included.
|
| 214 |
+
</li>
|
| 215 |
+
</ol>
|
| 216 |
+
<div class="form-container">
|
| 217 |
+
<form id="model-submission-form" method="POST" action="{{ url_for('submit_model') }}" enctype="multipart/form-data">
|
| 218 |
+
<div class="form-row row">
|
| 219 |
+
<div class="col-md-4">
|
| 220 |
+
<label for="model_name" class="form-label">Model Name:</label>
|
| 221 |
+
</div>
|
| 222 |
+
<div class="col-md-8">
|
| 223 |
+
<input type="text" class="form-content" id="model_name" name="model_name" required>
|
| 224 |
+
</div>
|
| 225 |
+
</div>
|
| 226 |
+
<div class="form-row row">
|
| 227 |
+
<div class="col-md-4">
|
| 228 |
+
<label for="pull_request_link" class="form-label">Pull Request Link:</label>
|
| 229 |
+
</div>
|
| 230 |
+
<div class="col-md-8">
|
| 231 |
+
<input type="url" class="form-content" id="pull_request_link" name="pull_request_link" required>
|
| 232 |
+
</div>
|
| 233 |
+
</div>
|
| 234 |
+
<div class="form-row row">
|
| 235 |
+
<div class="col-md-4">
|
| 236 |
+
<label for="email" class="form-label">Email:</label>
|
| 237 |
+
</div>
|
| 238 |
+
<div class="col-md-8">
|
| 239 |
+
<input type="email" class="form-content" id="email" name="email" required>
|
| 240 |
+
</div>
|
| 241 |
+
</div>
|
| 242 |
+
<div class="form-row row">
|
| 243 |
+
<div class="col-md-4">
|
| 244 |
+
<label for="description" class="form-label">Description:</label>
|
| 245 |
+
</div>
|
| 246 |
+
<div class="col-md-8">
|
| 247 |
+
<textarea class="form-content" id="description" name="description" placeholder="Various details on the model training and architecture (e.g. dataset, model size, optimizer, etc.)" rows="3" required></textarea>
|
| 248 |
+
</div>
|
| 249 |
+
</div>
|
| 250 |
+
<div class="form-row row">
|
| 251 |
+
<div class="col-md-4">
|
| 252 |
+
<label for="model_files" class="form-label">
|
| 253 |
+
Upload the Model results directory as a ZIP file
|
| 254 |
+
(<code>Leaderboard/results/stability_leaderboard/<your_model_name></code>):
|
| 255 |
+
</label>
|
| 256 |
+
</div>
|
| 257 |
+
<div class="col-md-8">
|
| 258 |
+
<input type="file" id="model_files" name="model_files" class="file-input" accept=".zip" required>
|
| 259 |
+
<small class="form-text text-muted">
|
| 260 |
+
Please upload a ZIP file containing the results directory.
|
| 261 |
+
</small>
|
| 262 |
+
</div>
|
| 263 |
+
</div>
|
| 264 |
+
<button type="submit" class="btn custom-button mt-3">Submit</button>
|
| 265 |
+
</form>
|
| 266 |
+
</div>
|
| 267 |
+
</div>
|
| 268 |
+
<div class="back-button">
|
| 269 |
+
<a href="{{ url_for('index') }}" class="custom-button mt-3">Main page</a>
|
| 270 |
+
</div>
|
| 271 |
+
<div class="citation-section">
|
| 272 |
+
<p>If you found this project useful, please cite our related paper:</p>
|
| 273 |
+
<div class="citation-box" id="citation-text">
|
| 274 |
+
@article{kovavc2024stick,
|
| 275 |
+
title={Stick to your Role! Stability of Personal Values Expressed in Large Language Models},
|
| 276 |
+
author={Kova{\v{c}}, Grgur and Portelas, R{\'e}my and Sawayama, Masataka and Dominey, Peter Ford and Oudeyer, Pierre-Yves},
|
| 277 |
+
journal={arXiv preprint arXiv:2402.14846},
|
| 278 |
+
year={2024}
|
| 279 |
+
}
|
| 280 |
+
</div>
|
| 281 |
+
</div>
|
| 282 |
+
</div>
|
| 283 |
+
|
| 284 |
+
<!-- Include jQuery -->
|
| 285 |
+
<script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
|
| 286 |
+
<!-- Include Bootstrap JS -->
|
| 287 |
+
<script src="https://stackpath.bootstrapcdn.com/bootstrap/5.1.3/js/bootstrap.bundle.min.js"></script>
|
| 288 |
+
<!-- Include DataTables JS -->
|
| 289 |
+
<script src="https://cdn.datatables.net/1.11.5/js/jquery.dataTables.min.js"></script>
|
| 290 |
+
<script src="https://cdn.datatables.net/1.11.5/js/dataTables.bootstrap5.min.js"></script>
|
| 291 |
+
<!-- Initialize DataTables -->
|
| 292 |
+
<script>
|
| 293 |
+
$(document).ready(function() {
|
| 294 |
+
const table = $('table').DataTable({
|
| 295 |
+
"paging": false,
|
| 296 |
+
"info": false,
|
| 297 |
+
"columnDefs": [
|
| 298 |
+
{ "orderable": false, "targets": 0 },
|
| 299 |
+
{ "searchable": false, "targets": 0 }
|
| 300 |
+
],
|
| 301 |
+
"order": [[ 2, 'desc' ]],
|
| 302 |
+
"drawCallback": function(settings) {
|
| 303 |
+
var api = this.api();
|
| 304 |
+
api.column(0, {order:'applied'}).nodes().each(function(cell, i) {
|
| 305 |
+
cell.innerHTML = i + 1;
|
| 306 |
+
});
|
| 307 |
+
}
|
| 308 |
+
});
|
| 309 |
+
});
|
| 310 |
+
</script>
|
| 311 |
+
</body>
|
| 312 |
+
</html>
|