Vedant Pungliya commited on
Commit
6d06b0d
·
unverified ·
1 Parent(s): 552e495
codenet_4000_Example/java/input.in ADDED
The diff for this file is too large to render. See raw diff
 
codenet_4000_Example/java/layer12/kmeans/clusters-kmeans-350.txt ADDED
The diff for this file is too large to render. See raw diff
 
convert.py DELETED
The diff for this file is too large to render. See raw diff
 
pert.py CHANGED
@@ -34,7 +34,6 @@ def compute_jaccard_matrix(clusters_a, clusters_b):
34
  # Dictionary mapping perturbation names to their descriptions
35
  perturbation_descriptions = {
36
  "Scope Modification": "Identifies variables in complex scopes and moves them to unrelated blocks.",
37
- "Lexical Similarity Modification": "Generates lexical variations of class and variable names with different casing.",
38
  "Log Modification": "Adds logging statements to blocks of code for tracking execution flow.",
39
  "Operator Modification": "Modifies boolean expressions by negating them in various contexts.",
40
  "Pointer Modification": "Add C style pointer to the code.",
@@ -43,7 +42,8 @@ perturbation_descriptions = {
43
  "Try Catch Modification": "Converts switch statements into equivalent if statements.",
44
  "Unused Statement Modification": "Inserts unused statements into blocks of code for testing/debugging.",
45
  "Exact Name Class Variable Modification": "Renames classes and variables to a specific randomly generated name.",
46
- "Casing Class Variable Modification": "Generates lexical variations of class and variable names with different casing."
 
47
  }
48
 
49
  def compute_and_log_csi(file_orig, file_pert, perturbation_name, output_csv="results/csi_summary.csv"):
@@ -85,13 +85,6 @@ compute_and_log_csi(
85
  output_csv="results/csi_summary.csv"
86
  )
87
 
88
- compute_and_log_csi(
89
- "codenet_4000_del_15000/Java/layer12/kmeans/clusters-kmeans-350.txt",
90
- "codenet_4000_lexical_similar/java/layer12/kmeans/clusters-kmeans-350.txt",
91
- perturbation_name="Lexical Similarity Modification",
92
- output_csv="results/csi_summary.csv"
93
- )
94
-
95
  compute_and_log_csi(
96
  "codenet_4000_del_15000/Java/layer12/kmeans/clusters-kmeans-350.txt",
97
  "codenet_4000_log/java/layer12/kmeans/clusters-kmeans-350.txt",
@@ -116,7 +109,7 @@ compute_and_log_csi(
116
  compute_and_log_csi(
117
  "codenet_4000_del_15000/Java/layer12/kmeans/clusters-kmeans-350.txt",
118
  "codenet_4000_POS/java/layer12/kmeans/clusters-kmeans-350.txt",
119
- perturbation_name="POS Modification",
120
  output_csv="results/csi_summary.csv"
121
  )
122
 
@@ -155,4 +148,11 @@ compute_and_log_csi(
155
  output_csv="results/csi_summary.csv"
156
  )
157
 
 
 
 
 
 
 
 
158
  # You can now call compute_and_log_csi again and again for other perturbations!
 
34
  # Dictionary mapping perturbation names to their descriptions
35
  perturbation_descriptions = {
36
  "Scope Modification": "Identifies variables in complex scopes and moves them to unrelated blocks.",
 
37
  "Log Modification": "Adds logging statements to blocks of code for tracking execution flow.",
38
  "Operator Modification": "Modifies boolean expressions by negating them in various contexts.",
39
  "Pointer Modification": "Add C style pointer to the code.",
 
42
  "Try Catch Modification": "Converts switch statements into equivalent if statements.",
43
  "Unused Statement Modification": "Inserts unused statements into blocks of code for testing/debugging.",
44
  "Exact Name Class Variable Modification": "Renames classes and variables to a specific randomly generated name.",
45
+ "Casing Class Variable Modification": "Generates lexical variations of class and variable names with different casing.",
46
+ "Onecase Modification": "Generates lexical variations of class and variable names with just 1 letter uppercase wither for class anme or variable name."
47
  }
48
 
49
  def compute_and_log_csi(file_orig, file_pert, perturbation_name, output_csv="results/csi_summary.csv"):
 
85
  output_csv="results/csi_summary.csv"
86
  )
87
 
 
 
 
 
 
 
 
88
  compute_and_log_csi(
89
  "codenet_4000_del_15000/Java/layer12/kmeans/clusters-kmeans-350.txt",
90
  "codenet_4000_log/java/layer12/kmeans/clusters-kmeans-350.txt",
 
109
  compute_and_log_csi(
110
  "codenet_4000_del_15000/Java/layer12/kmeans/clusters-kmeans-350.txt",
111
  "codenet_4000_POS/java/layer12/kmeans/clusters-kmeans-350.txt",
112
+ perturbation_name="POS finetuned",
113
  output_csv="results/csi_summary.csv"
114
  )
115
 
 
148
  output_csv="results/csi_summary.csv"
149
  )
150
 
151
+ compute_and_log_csi(
152
+ "codenet_4000_del_15000/Java/layer12/kmeans/clusters-kmeans-350.txt",
153
+ "codenet_4000_Onecase/java/layer12/kmeans/clusters-kmeans-350.txt",
154
+ perturbation_name="Onecase Modification",
155
+ output_csv="results/csi_summary.csv"
156
+ )
157
+
158
  # You can now call compute_and_log_csi again and again for other perturbations!
remove.py DELETED
@@ -1,224 +0,0 @@
1
- def remove_lines(filepath, lines_to_remove):
2
- # Read the file
3
- with open(filepath, 'r', encoding='utf-8') as f:
4
- file_content = f.read()
5
-
6
- # Split content into lines
7
- lines = file_content.split('\n')
8
-
9
- # Create a set of line numbers to remove for O(1) lookup
10
- remove_set = set(lines_to_remove)
11
-
12
- # Keep lines that aren't in the remove set
13
- filtered_lines = [line for i, line in enumerate(lines, 1) if i not in remove_set]
14
-
15
- # Join lines back together
16
- new_content = '\n'.join(filtered_lines)
17
-
18
- # Write back to the same file
19
- with open(filepath, 'w', encoding='utf-8') as f:
20
- f.write(new_content)
21
-
22
- lines_to_remove = [
23
- 5,
24
- 11,
25
- 26,
26
- 46,
27
- 53,
28
- 84,
29
- 117,
30
- 174,
31
- 175,
32
- 209,
33
- 212,
34
- 219,
35
- 220,
36
- 268,
37
- 272,
38
- 277,
39
- 294,
40
- 319,
41
- 322,
42
- 333,
43
- 369,
44
- 402,
45
- 437,
46
- 451,
47
- 471,
48
- 471,
49
- 471,
50
- 480,
51
- 494,
52
- 502,
53
- 514,
54
- 564,
55
- 569,
56
- 579,
57
- 592,
58
- 599,
59
- 602,
60
- 602,
61
- 619,
62
- 647,
63
- 679,
64
- 681,
65
- 685,
66
- 688,
67
- 781,
68
- 795,
69
- 833,
70
- 843,
71
- 859,
72
- 860,
73
- 899,
74
- 911,
75
- 941,
76
- 947,
77
- 989,
78
- 993,
79
- 1100,
80
- 1111,
81
- 1120,
82
- 1123,
83
- 1126,
84
- 1153,
85
- 1165,
86
- 1173,
87
- 1183,
88
- 1186,
89
- 1186,
90
- 1220,
91
- 1230,
92
- 1238,
93
- 1242,
94
- 1247,
95
- 1274,
96
- 1285,
97
- 1289,
98
- 1324,
99
- 1358,
100
- 1385,
101
- 1397,
102
- 1402,
103
- 1465,
104
- 1474,
105
- 1504,
106
- 1507,
107
- 1517,
108
- 1563,
109
- 1592,
110
- 1605,
111
- 1614,
112
- 1626,
113
- 1648,
114
- 1648,
115
- 1689,
116
- 1702,
117
- 1730,
118
- 1730,
119
- 1737,
120
- 1769,
121
- 1784,
122
- 1799,
123
- 1824,
124
- 1834,
125
- 1840,
126
- 1853,
127
- 1860,
128
- 1872,
129
- 1941,
130
- 2038,
131
- 2045,
132
- 2081,
133
- 2096,
134
- 2108,
135
- 2115,
136
- 2115,
137
- 2147,
138
- 2149,
139
- 2165,
140
- 2167,
141
- 2173,
142
- 2195,
143
- 2216,
144
- 2275,
145
- 2278,
146
- 2282,
147
- 2285,
148
- 2327,
149
- 2339,
150
- 2347,
151
- 2348,
152
- 2348,
153
- 2425,
154
- 2444,
155
- 2476,
156
- 2477,
157
- 2482,
158
- 2482,
159
- 2486,
160
- 2499,
161
- 2515,
162
- 2529,
163
- 2529,
164
- 2559,
165
- 2565,
166
- 2567,
167
- 2573,
168
- 2582,
169
- 2633,
170
- 2641,
171
- 2677,
172
- 2705,
173
- 2719,
174
- 2744,
175
- 2756,
176
- 2821,
177
- 2860,
178
- 2864,
179
- 2936,
180
- 2955,
181
- 2992,
182
- 3022,
183
- 3041,
184
- 3064,
185
- 3074,
186
- 3121,
187
- 3123,
188
- 3160,
189
- 3170,
190
- 3172,
191
- 3179,
192
- 3180,
193
- 3195,
194
- 3199,
195
- 3208,
196
- 3208,
197
- 3259,
198
- 3269,
199
- 3280,
200
- 3299,
201
- 3300,
202
- 3323,
203
- 3334,
204
- 3352,
205
- 3364,
206
- 3365,
207
- 3378,
208
- 3405,
209
- 3424,
210
- 3438,
211
- 3492,
212
- 3511,
213
- 3512,
214
- 3533,
215
- 3572,
216
- 3579,
217
- 3710,
218
- 3730,
219
- 3735,
220
- 3759,
221
- 3787,
222
- 3793
223
- ]
224
- remove_lines('input.in', lines_to_remove)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
results/csi_summary.csv CHANGED
@@ -1,12 +1,12 @@
1
  Perturbation,Average Jaccard,CSI,Description
2
  Scope Modification,0.6788942354152336,0.32110576458476636,Identifies variables in complex scopes and moves them to unrelated blocks.
3
- Lexical Similarity Modification,0.5546761391746684,0.4453238608253316,Generates lexical variations of class and variable names with different casing.
4
  Log Modification,0.5597545985057552,0.44024540149424485,Adds logging statements to blocks of code for tracking execution flow.
5
  Operator Modification,0.7675911973340813,0.23240880266591868,Modifies boolean expressions by negating them in various contexts.
6
  Pointer Modification,0.7341816285924795,0.2658183714075205,Add C style pointer to the code.
7
- POS Modification,0.39399085068850775,0.6060091493114923,No description available
8
  Random Modification,0.5314837325594708,0.4685162674405292,"Permutes statements within basic blocks, allowing different execution orders."
9
  Try Catch Modification,0.6985673658171294,0.3014326341828706,Converts switch statements into equivalent if statements.
10
  Unused Statement Modification,0.5844954343120634,0.4155045656879366,Inserts unused statements into blocks of code for testing/debugging.
11
  Exact Name Class Variable Modification,0.675121649837896,0.324878350162104,Renames classes and variables to a specific randomly generated name.
12
  Casing Class Variable Modification,0.6722713965133429,0.3277286034866571,Generates lexical variations of class and variable names with different casing.
 
 
1
  Perturbation,Average Jaccard,CSI,Description
2
  Scope Modification,0.6788942354152336,0.32110576458476636,Identifies variables in complex scopes and moves them to unrelated blocks.
 
3
  Log Modification,0.5597545985057552,0.44024540149424485,Adds logging statements to blocks of code for tracking execution flow.
4
  Operator Modification,0.7675911973340813,0.23240880266591868,Modifies boolean expressions by negating them in various contexts.
5
  Pointer Modification,0.7341816285924795,0.2658183714075205,Add C style pointer to the code.
6
+ POS finetuned,0.39399085068850775,0.6060091493114923,Clusters based on finetuned POS codebert model
7
  Random Modification,0.5314837325594708,0.4685162674405292,"Permutes statements within basic blocks, allowing different execution orders."
8
  Try Catch Modification,0.6985673658171294,0.3014326341828706,Converts switch statements into equivalent if statements.
9
  Unused Statement Modification,0.5844954343120634,0.4155045656879366,Inserts unused statements into blocks of code for testing/debugging.
10
  Exact Name Class Variable Modification,0.675121649837896,0.324878350162104,Renames classes and variables to a specific randomly generated name.
11
  Casing Class Variable Modification,0.6722713965133429,0.3277286034866571,Generates lexical variations of class and variable names with different casing.
12
+ Onecase Modification,0.665697304921991,0.334302695078009,Generates lexical variations of class and variable names with just 1 letter uppercase wither for class anme or variable name.