chenmouxiang commited on
Commit
0cd46d4
·
verified ·
1 Parent(s): 7b9ce4e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -50,12 +50,12 @@ def generate_plot(E, A, k, alpha):
50
  return plt
51
 
52
 
53
- OUTPUT_TEMPLATE = """Loss for a {n}B model when P={p} is: **{loss}**. It is equivalant to:
54
 
55
- - A {n1}B model with P=1;
56
- - A {n2}B model with P=2;
57
- - A {n4}B model with P=4;
58
- - A {n8}B model with P=8;
59
 
60
  Note: The equivalent parameters are for reference only. In some reasoning tasks, scaling the parallel streams will obtain more performance gains than the loss benefits!
61
 
@@ -77,13 +77,12 @@ def process_inputs(E, A, k, alpha, n, p):
77
 
78
  # Create interface
79
 
80
- HEAD = """# Parallel Scaling Law Visualization
81
 
82
- $$
83
- \\text{Loss}=E+\\left(
84
- \\frac{A}{\\text{Parameters}\\times (1+k\\log P)}
85
- \\right)^{\\alpha}
86
- $$
87
  """
88
 
89
  with gr.Blocks() as demo:
@@ -91,6 +90,12 @@ with gr.Blocks() as demo:
91
 
92
  with gr.Row():
93
  with gr.Column():
 
 
 
 
 
 
94
 
95
  # Input values
96
  N = gr.Number(value=2.8, label="N: Number of Non-Embedding Model Parameters (in Billion)")
@@ -111,11 +116,12 @@ with gr.Blocks() as demo:
111
  param_k = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['k'], label="k")
112
  param_alpha = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['alpha'], label="alpha")
113
 
114
- submit_btn = gr.Button("Estimate Loss and Equivalant Model Parameters")
115
 
116
 
117
  plot, output = process_inputs(PARAM_SETS["Stack-V2-Python"]['E'], PARAM_SETS["Stack-V2-Python"]['A'], PARAM_SETS["Stack-V2-Python"]['k'], PARAM_SETS["Stack-V2-Python"]['alpha'], 2.8, 4)
118
  with gr.Column():
 
 
119
  # Output section
120
  plot_output = gr.Plot(label="Scaling Law Curve", value=plot)
121
  result_output = gr.Markdown(label="Result", value=output)
 
50
  return plt
51
 
52
 
53
+ OUTPUT_TEMPLATE = """Loss for a {n}B model when P={p} is: **{loss:.5f}**. It is equivalant to:
54
 
55
+ - A **{n1}B** model with **P=1**;
56
+ - A **{n2}B** model with **P=2**;
57
+ - A **{n4}B** model with **P=4**;
58
+ - A **{n8}B** model with **P=8**;
59
 
60
  Note: The equivalent parameters are for reference only. In some reasoning tasks, scaling the parallel streams will obtain more performance gains than the loss benefits!
61
 
 
77
 
78
  # Create interface
79
 
80
+ HEAD = """<div align="center">
81
 
82
+ # Parallel Scaling Law Visualization
83
+
84
+ [![Paper](https://img.shields.io/badge/arXiv-2505.10475-red)](https://arxiv.org/abs/2505.10475)
85
+ </div>
 
86
  """
87
 
88
  with gr.Blocks() as demo:
 
90
 
91
  with gr.Row():
92
  with gr.Column():
93
+
94
+ gr.Markdown("""$$
95
+ \\text{Loss}=E+\\left(
96
+ \\frac{A}{\\text{Parameters}\\times (1+k\\log P)}
97
+ \\right)^{\\alpha}
98
+ $$""")
99
 
100
  # Input values
101
  N = gr.Number(value=2.8, label="N: Number of Non-Embedding Model Parameters (in Billion)")
 
116
  param_k = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['k'], label="k")
117
  param_alpha = gr.Number(value=PARAM_SETS["Stack-V2-Python"]['alpha'], label="alpha")
118
 
 
119
 
120
 
121
  plot, output = process_inputs(PARAM_SETS["Stack-V2-Python"]['E'], PARAM_SETS["Stack-V2-Python"]['A'], PARAM_SETS["Stack-V2-Python"]['k'], PARAM_SETS["Stack-V2-Python"]['alpha'], 2.8, 4)
122
  with gr.Column():
123
+
124
+ submit_btn = gr.Button("Calculate")
125
  # Output section
126
  plot_output = gr.Plot(label="Scaling Law Curve", value=plot)
127
  result_output = gr.Markdown(label="Result", value=output)