DontPlanToEnd commited on
Commit
799f51a
·
verified ·
1 Parent(s): 892c34b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -9
app.py CHANGED
@@ -552,6 +552,17 @@ ugi_category_columns = [
552
  create_numeric_column(col, width=120) for col in UGI_CATEGORY_COLS
553
  ]
554
 
 
 
 
 
 
 
 
 
 
 
 
555
  political_columns = [
556
  {
557
  "headerName": "Ideology",
@@ -810,12 +821,13 @@ app.layout = html.Div([
810
  id='additional-columns-filter',
811
  options=[
812
  {'label': 'UGI Categories', 'value': 'ugi_categories'},
 
813
  {'label': 'Political Test Axes', 'value': 'political_axes'}
814
  ],
815
  value=[],
816
  inline=True,
817
  style={'display': 'inline-block'},
818
- labelStyle={'fontWeight': 'normal', 'marginRight': '15px'} # Add consistent spacing
819
  )
820
  ], style={'marginBottom': '13px', 'padding': '0 20px', 'overflow': 'hidden'}),
821
 
@@ -865,15 +877,53 @@ app.layout = html.Div([
865
 
866
  # Description
867
  html.Div([
868
- html.H3("About"),
 
 
 
869
 
870
- html.P([html.Strong("UGI:"), " Uncensored General Intelligence. A benchmark measuring both willingness to answer and accuracy in fact-based contentious questions. The test set is made of roughly 100 questions/tasks, covering topics that are commonly difficult to get LLMs to answer. The leaderboard's questions are kept private in order to avoid the common problem of not knowing if a model is intelligent or if it was just trained on the test questions."]),
 
 
 
 
 
 
 
 
 
 
 
 
 
871
 
872
- html.P([html.Strong("W/10:"), " Willingness/10. A more narrow subset of the UGI questions, solely focused on measuring how far a model can be pushed before going against its instructions or refusing to answer."]),
873
 
874
- html.P("A high UGI but low W/10 could mean for example that the model can provide a lot of accurate sensitive information, but will refuse to form the information into something it sees as offensive or against its rules."),
 
 
 
 
 
 
 
 
 
 
 
 
875
 
876
- html.P([html.Strong("NatInt:"), " Natural Intelligence. A general knowledge quiz covering real-world subjects that llms are not commonly benchmarked on, such as pop culture trivia. This measures if the model understands a diverse range of topics, as opposed to over-training on textbook information and the types of questions commonly tested on benchmarks."]),
 
 
 
 
 
 
 
 
 
 
877
 
878
  html.P([html.Strong("Coding:"), " A simple 50 question quiz measuring how vast a model's programming knowledge is. Each question is worth 2 points."]),
879
 
@@ -1002,10 +1052,17 @@ def update_columns(additional_columns):
1002
 
1003
  # Add UGI category columns if selected
1004
  if 'ugi_categories' in additional_columns:
1005
- current_columns.extend(ugi_category_columns) # Use the pre-defined ugi_category_columns
 
 
 
 
 
 
 
1006
 
1007
- # Add remaining base columns (W/10, NatInt, Coding, Political Lean)
1008
- current_columns.extend(columnDefs[7:11])
1009
 
1010
  # Add political columns if selected
1011
  if 'political_axes' in additional_columns:
 
552
  create_numeric_column(col, width=120) for col in UGI_CATEGORY_COLS
553
  ]
554
 
555
+ w10_type_columns = [
556
+ create_numeric_column("W/10-Direct", width=120, filterParams={
557
+ "defaultOption": "greaterThanOrEqual",
558
+ "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
559
+ }),
560
+ create_numeric_column("W/10-Adherence", width=120, filterParams={
561
+ "defaultOption": "greaterThanOrEqual",
562
+ "filterOptions": ['equals', 'notEqual', 'greaterThan', 'greaterThanOrEqual', 'lessThan', 'lessThanOrEqual', 'inRange']
563
+ })
564
+ ]
565
+
566
  political_columns = [
567
  {
568
  "headerName": "Ideology",
 
821
  id='additional-columns-filter',
822
  options=[
823
  {'label': 'UGI Categories', 'value': 'ugi_categories'},
824
+ {'label': 'W/10 Types', 'value': 'w10_types'},
825
  {'label': 'Political Test Axes', 'value': 'political_axes'}
826
  ],
827
  value=[],
828
  inline=True,
829
  style={'display': 'inline-block'},
830
+ labelStyle={'fontWeight': 'normal', 'marginRight': '15px'}
831
  )
832
  ], style={'marginBottom': '13px', 'padding': '0 20px', 'overflow': 'hidden'}),
833
 
 
877
 
878
  # Description
879
  html.Div([
880
+ html.H3("About", style={'fontSize': '22px', 'marginBottom': '0px'}),
881
+
882
+ html.P([html.Strong("UGI:"), " Uncensored General Intelligence. A benchmark measuring both willingness to answer and accuracy in fact-based contentious questions. The test set is made of roughly 100 questions/tasks, covering topics that are commonly difficult to get LLMs to answer. The leaderboard's questions are kept private in order to avoid the common problem of not knowing if a model is intelligent or if it was just trained on the test questions."],
883
+ style={'marginTop': '7px', 'marginBottom': '4px'}),
884
 
885
+ html.Details([
886
+ html.Summary("Categories",
887
+ style={
888
+ 'fontWeight': 'normal',
889
+ 'fontSize': '1em',
890
+ 'marginLeft': '20px',
891
+ 'cursor': 'pointer'
892
+ }),
893
+ html.Ul([
894
+ html.Li("Unruly: Taboo Underground Knowledge"),
895
+ html.Li("Internet: Knowledge of controversial/explicit web content"),
896
+ html.Li("Societal/Political: Awareness of contentious socio-political issues")
897
+ ], style={'marginTop': '0px', 'marginBottom': '16px', 'marginLeft': '40px'})
898
+ ], style={'marginBottom': '16px'}),
899
 
900
+ html.P([html.Strong("W/10:"), " Willingness/10. A more narrow subset of the UGI questions, solely focused on measuring how far a model can be pushed before going against its instructions or refusing to answer."], style={'marginBottom': '4px'}),
901
 
902
+ html.Details([
903
+ html.Summary("Types",
904
+ style={
905
+ 'fontWeight': 'normal',
906
+ 'fontSize': '1em',
907
+ 'marginLeft': '20px',
908
+ 'cursor': 'pointer'
909
+ }),
910
+ html.Ul([
911
+ html.Li("Direct: Measures if the model directly refuses to respond to certain prompts"),
912
+ html.Li("Adherence: Some models might not explicitly refuse to do something, though will still deviate from the instructions as a way to get out of doing it, or simply due to lack of instruction following capabilities")
913
+ ], style={'marginTop': '0px', 'marginBottom': '16px', 'marginLeft': '40px'})
914
+ ], style={'marginBottom': '16px'}),
915
 
916
+ html.P([
917
+ "A high UGI but low W/10 could mean for example that the model can provide a lot of accurate sensitive information, but will refuse to form the information into something it sees as offensive or against its rules.",
918
+ html.Br(),
919
+ html.Br()
920
+ ]),
921
+
922
+ html.P([
923
+ html.Strong("Benchmarks not focused on censorship:"),
924
+ html.Div(style={'margin': '6px 0'}),
925
+ html.Strong("NatInt:"), " Natural Intelligence. A general knowledge quiz covering real-world subjects that llms are not commonly benchmarked on, such as pop culture trivia. This measures if the model understands a diverse range of topics, as opposed to over-training on textbook information and the types of questions commonly tested on benchmarks."
926
+ ]),
927
 
928
  html.P([html.Strong("Coding:"), " A simple 50 question quiz measuring how vast a model's programming knowledge is. Each question is worth 2 points."]),
929
 
 
1052
 
1053
  # Add UGI category columns if selected
1054
  if 'ugi_categories' in additional_columns:
1055
+ current_columns.extend(ugi_category_columns)
1056
+
1057
+ # Add W/10 column
1058
+ current_columns.extend(columnDefs[7:8]) # Add just the W/10 column
1059
+
1060
+ # Add W/10 type columns if selected
1061
+ if 'w10_types' in additional_columns:
1062
+ current_columns.extend(w10_type_columns)
1063
 
1064
+ # Add remaining base columns (NatInt, Coding, Political Lean)
1065
+ current_columns.extend(columnDefs[8:11])
1066
 
1067
  # Add political columns if selected
1068
  if 'political_axes' in additional_columns: