NEOAI commited on
Commit
a44cfb4
·
1 Parent(s): ff07c09

开源SpaceExploreAI模型:基于Transformer模型训练的5M的大语言模型

Browse files
SpaceExploreAI_training_curves.png ADDED
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "space_explore_ai_financial",
3
+ "architectures": [
4
+ "SpaceExploreAIFinancialModel"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "gelu",
9
+ "initializer_range": 0.02,
10
+ "layer_norm_epsilon": 1e-12,
11
+ "pad_token_id": 0,
12
+ "unk_token_id": 3,
13
+ "transformers_version": "4.36.0",
14
+ "use_cache": true,
15
+ "vocab_size": 69,
16
+ "tokenizer_class": "FinancialFeatureProcessor",
17
+ "model_name": "SpaceExploreAI",
18
+ "hidden_size": 768,
19
+ "num_hidden_layers": 12,
20
+ "num_attention_heads": 12,
21
+ "max_position_embeddings": 1024,
22
+ "model_max_length": 1024
23
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "pad_token": "<pad>",
3
+ "unk_token": "<unk>",
4
+ "bos_token": "<bos>",
5
+ "eos_token": "<eos>"
6
+ }
tokenizer.json ADDED
@@ -0,0 +1,715 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "special": true,
9
+ "content": "<pad>",
10
+ "single_word": false,
11
+ "lstrip": false,
12
+ "rstrip": false,
13
+ "normalized": false
14
+ },
15
+ {
16
+ "id": 1,
17
+ "special": true,
18
+ "content": "<bos>",
19
+ "single_word": false,
20
+ "lstrip": false,
21
+ "rstrip": false,
22
+ "normalized": false
23
+ },
24
+ {
25
+ "id": 2,
26
+ "special": true,
27
+ "content": "<eos>",
28
+ "single_word": false,
29
+ "lstrip": false,
30
+ "rstrip": false,
31
+ "normalized": false
32
+ },
33
+ {
34
+ "id": 3,
35
+ "special": true,
36
+ "content": "<unk>",
37
+ "single_word": false,
38
+ "lstrip": false,
39
+ "rstrip": false,
40
+ "normalized": false
41
+ },
42
+ {
43
+ "id": 4,
44
+ "special": false,
45
+ "content": "date",
46
+ "single_word": true,
47
+ "lstrip": false,
48
+ "rstrip": false,
49
+ "normalized": false
50
+ },
51
+ {
52
+ "id": 5,
53
+ "special": false,
54
+ "content": "close",
55
+ "single_word": true,
56
+ "lstrip": false,
57
+ "rstrip": false,
58
+ "normalized": false
59
+ },
60
+ {
61
+ "id": 6,
62
+ "special": false,
63
+ "content": "high",
64
+ "single_word": true,
65
+ "lstrip": false,
66
+ "rstrip": false,
67
+ "normalized": false
68
+ },
69
+ {
70
+ "id": 7,
71
+ "special": false,
72
+ "content": "low",
73
+ "single_word": true,
74
+ "lstrip": false,
75
+ "rstrip": false,
76
+ "normalized": false
77
+ },
78
+ {
79
+ "id": 8,
80
+ "special": false,
81
+ "content": "open",
82
+ "single_word": true,
83
+ "lstrip": false,
84
+ "rstrip": false,
85
+ "normalized": false
86
+ },
87
+ {
88
+ "id": 9,
89
+ "special": false,
90
+ "content": "volume",
91
+ "single_word": true,
92
+ "lstrip": false,
93
+ "rstrip": false,
94
+ "normalized": false
95
+ },
96
+ {
97
+ "id": 10,
98
+ "special": false,
99
+ "content": "day_of_week",
100
+ "single_word": true,
101
+ "lstrip": false,
102
+ "rstrip": false,
103
+ "normalized": false
104
+ },
105
+ {
106
+ "id": 11,
107
+ "special": false,
108
+ "content": "day_of_month",
109
+ "single_word": true,
110
+ "lstrip": false,
111
+ "rstrip": false,
112
+ "normalized": false
113
+ },
114
+ {
115
+ "id": 12,
116
+ "special": false,
117
+ "content": "week_of_year",
118
+ "single_word": true,
119
+ "lstrip": false,
120
+ "rstrip": false,
121
+ "normalized": false
122
+ },
123
+ {
124
+ "id": 13,
125
+ "special": false,
126
+ "content": "month",
127
+ "single_word": true,
128
+ "lstrip": false,
129
+ "rstrip": false,
130
+ "normalized": false
131
+ },
132
+ {
133
+ "id": 14,
134
+ "special": false,
135
+ "content": "quarter",
136
+ "single_word": true,
137
+ "lstrip": false,
138
+ "rstrip": false,
139
+ "normalized": false
140
+ },
141
+ {
142
+ "id": 15,
143
+ "special": false,
144
+ "content": "year",
145
+ "single_word": true,
146
+ "lstrip": false,
147
+ "rstrip": false,
148
+ "normalized": false
149
+ },
150
+ {
151
+ "id": 16,
152
+ "special": false,
153
+ "content": "is_month_start",
154
+ "single_word": true,
155
+ "lstrip": false,
156
+ "rstrip": false,
157
+ "normalized": false
158
+ },
159
+ {
160
+ "id": 17,
161
+ "special": false,
162
+ "content": "is_month_end",
163
+ "single_word": true,
164
+ "lstrip": false,
165
+ "rstrip": false,
166
+ "normalized": false
167
+ },
168
+ {
169
+ "id": 18,
170
+ "special": false,
171
+ "content": "is_week_start",
172
+ "single_word": true,
173
+ "lstrip": false,
174
+ "rstrip": false,
175
+ "normalized": false
176
+ },
177
+ {
178
+ "id": 19,
179
+ "special": false,
180
+ "content": "is_week_end",
181
+ "single_word": true,
182
+ "lstrip": false,
183
+ "rstrip": false,
184
+ "normalized": false
185
+ },
186
+ {
187
+ "id": 20,
188
+ "special": false,
189
+ "content": "close_lag_1",
190
+ "single_word": true,
191
+ "lstrip": false,
192
+ "rstrip": false,
193
+ "normalized": false
194
+ },
195
+ {
196
+ "id": 21,
197
+ "special": false,
198
+ "content": "close_lag_2",
199
+ "single_word": true,
200
+ "lstrip": false,
201
+ "rstrip": false,
202
+ "normalized": false
203
+ },
204
+ {
205
+ "id": 22,
206
+ "special": false,
207
+ "content": "close_lag_3",
208
+ "single_word": true,
209
+ "lstrip": false,
210
+ "rstrip": false,
211
+ "normalized": false
212
+ },
213
+ {
214
+ "id": 23,
215
+ "special": false,
216
+ "content": "close_lag_5",
217
+ "single_word": true,
218
+ "lstrip": false,
219
+ "rstrip": false,
220
+ "normalized": false
221
+ },
222
+ {
223
+ "id": 24,
224
+ "special": false,
225
+ "content": "close_lag_10",
226
+ "single_word": true,
227
+ "lstrip": false,
228
+ "rstrip": false,
229
+ "normalized": false
230
+ },
231
+ {
232
+ "id": 25,
233
+ "special": false,
234
+ "content": "high_lag_1",
235
+ "single_word": true,
236
+ "lstrip": false,
237
+ "rstrip": false,
238
+ "normalized": false
239
+ },
240
+ {
241
+ "id": 26,
242
+ "special": false,
243
+ "content": "high_lag_2",
244
+ "single_word": true,
245
+ "lstrip": false,
246
+ "rstrip": false,
247
+ "normalized": false
248
+ },
249
+ {
250
+ "id": 27,
251
+ "special": false,
252
+ "content": "high_lag_3",
253
+ "single_word": true,
254
+ "lstrip": false,
255
+ "rstrip": false,
256
+ "normalized": false
257
+ },
258
+ {
259
+ "id": 28,
260
+ "special": false,
261
+ "content": "high_lag_5",
262
+ "single_word": true,
263
+ "lstrip": false,
264
+ "rstrip": false,
265
+ "normalized": false
266
+ },
267
+ {
268
+ "id": 29,
269
+ "special": false,
270
+ "content": "high_lag_10",
271
+ "single_word": true,
272
+ "lstrip": false,
273
+ "rstrip": false,
274
+ "normalized": false
275
+ },
276
+ {
277
+ "id": 30,
278
+ "special": false,
279
+ "content": "low_lag_1",
280
+ "single_word": true,
281
+ "lstrip": false,
282
+ "rstrip": false,
283
+ "normalized": false
284
+ },
285
+ {
286
+ "id": 31,
287
+ "special": false,
288
+ "content": "low_lag_2",
289
+ "single_word": true,
290
+ "lstrip": false,
291
+ "rstrip": false,
292
+ "normalized": false
293
+ },
294
+ {
295
+ "id": 32,
296
+ "special": false,
297
+ "content": "low_lag_3",
298
+ "single_word": true,
299
+ "lstrip": false,
300
+ "rstrip": false,
301
+ "normalized": false
302
+ },
303
+ {
304
+ "id": 33,
305
+ "special": false,
306
+ "content": "low_lag_5",
307
+ "single_word": true,
308
+ "lstrip": false,
309
+ "rstrip": false,
310
+ "normalized": false
311
+ },
312
+ {
313
+ "id": 34,
314
+ "special": false,
315
+ "content": "low_lag_10",
316
+ "single_word": true,
317
+ "lstrip": false,
318
+ "rstrip": false,
319
+ "normalized": false
320
+ },
321
+ {
322
+ "id": 35,
323
+ "special": false,
324
+ "content": "volume_lag_1",
325
+ "single_word": true,
326
+ "lstrip": false,
327
+ "rstrip": false,
328
+ "normalized": false
329
+ },
330
+ {
331
+ "id": 36,
332
+ "special": false,
333
+ "content": "volume_lag_2",
334
+ "single_word": true,
335
+ "lstrip": false,
336
+ "rstrip": false,
337
+ "normalized": false
338
+ },
339
+ {
340
+ "id": 37,
341
+ "special": false,
342
+ "content": "volume_lag_3",
343
+ "single_word": true,
344
+ "lstrip": false,
345
+ "rstrip": false,
346
+ "normalized": false
347
+ },
348
+ {
349
+ "id": 38,
350
+ "special": false,
351
+ "content": "volume_lag_5",
352
+ "single_word": true,
353
+ "lstrip": false,
354
+ "rstrip": false,
355
+ "normalized": false
356
+ },
357
+ {
358
+ "id": 39,
359
+ "special": false,
360
+ "content": "volume_lag_10",
361
+ "single_word": true,
362
+ "lstrip": false,
363
+ "rstrip": false,
364
+ "normalized": false
365
+ },
366
+ {
367
+ "id": 40,
368
+ "special": false,
369
+ "content": "future_return_1d",
370
+ "single_word": true,
371
+ "lstrip": false,
372
+ "rstrip": false,
373
+ "normalized": false
374
+ },
375
+ {
376
+ "id": 41,
377
+ "special": false,
378
+ "content": "future_return_2d",
379
+ "single_word": true,
380
+ "lstrip": false,
381
+ "rstrip": false,
382
+ "normalized": false
383
+ },
384
+ {
385
+ "id": 42,
386
+ "special": false,
387
+ "content": "future_return_10d",
388
+ "single_word": true,
389
+ "lstrip": false,
390
+ "rstrip": false,
391
+ "normalized": false
392
+ },
393
+ {
394
+ "id": 43,
395
+ "special": false,
396
+ "content": "past_return_1d",
397
+ "single_word": true,
398
+ "lstrip": false,
399
+ "rstrip": false,
400
+ "normalized": false
401
+ },
402
+ {
403
+ "id": 44,
404
+ "special": false,
405
+ "content": "past_return_2d",
406
+ "single_word": true,
407
+ "lstrip": false,
408
+ "rstrip": false,
409
+ "normalized": false
410
+ },
411
+ {
412
+ "id": 45,
413
+ "special": false,
414
+ "content": "past_return_5d",
415
+ "single_word": true,
416
+ "lstrip": false,
417
+ "rstrip": false,
418
+ "normalized": false
419
+ },
420
+ {
421
+ "id": 46,
422
+ "special": false,
423
+ "content": "past_return_10d",
424
+ "single_word": true,
425
+ "lstrip": false,
426
+ "rstrip": false,
427
+ "normalized": false
428
+ },
429
+ {
430
+ "id": 47,
431
+ "special": false,
432
+ "content": "daily_range",
433
+ "single_word": true,
434
+ "lstrip": false,
435
+ "rstrip": false,
436
+ "normalized": false
437
+ },
438
+ {
439
+ "id": 48,
440
+ "special": false,
441
+ "content": "daily_range_abs",
442
+ "single_word": true,
443
+ "lstrip": false,
444
+ "rstrip": false,
445
+ "normalized": false
446
+ },
447
+ {
448
+ "id": 49,
449
+ "special": false,
450
+ "content": "gap",
451
+ "single_word": true,
452
+ "lstrip": false,
453
+ "rstrip": false,
454
+ "normalized": false
455
+ },
456
+ {
457
+ "id": 50,
458
+ "special": false,
459
+ "content": "close_to_open",
460
+ "single_word": true,
461
+ "lstrip": false,
462
+ "rstrip": false,
463
+ "normalized": false
464
+ },
465
+ {
466
+ "id": 51,
467
+ "special": false,
468
+ "content": "close_to_high",
469
+ "single_word": true,
470
+ "lstrip": false,
471
+ "rstrip": false,
472
+ "normalized": false
473
+ },
474
+ {
475
+ "id": 52,
476
+ "special": false,
477
+ "content": "close_to_low",
478
+ "single_word": true,
479
+ "lstrip": false,
480
+ "rstrip": false,
481
+ "normalized": false
482
+ },
483
+ {
484
+ "id": 53,
485
+ "special": false,
486
+ "content": "volatility_5d",
487
+ "single_word": true,
488
+ "lstrip": false,
489
+ "rstrip": false,
490
+ "normalized": false
491
+ },
492
+ {
493
+ "id": 54,
494
+ "special": false,
495
+ "content": "volatility_10d",
496
+ "single_word": true,
497
+ "lstrip": false,
498
+ "rstrip": false,
499
+ "normalized": false
500
+ },
501
+ {
502
+ "id": 55,
503
+ "special": false,
504
+ "content": "volatility_21d",
505
+ "single_word": true,
506
+ "lstrip": false,
507
+ "rstrip": false,
508
+ "normalized": false
509
+ },
510
+ {
511
+ "id": 56,
512
+ "special": false,
513
+ "content": "atr_5d",
514
+ "single_word": true,
515
+ "lstrip": false,
516
+ "rstrip": false,
517
+ "normalized": false
518
+ },
519
+ {
520
+ "id": 57,
521
+ "special": false,
522
+ "content": "atr_10d",
523
+ "single_word": true,
524
+ "lstrip": false,
525
+ "rstrip": false,
526
+ "normalized": false
527
+ },
528
+ {
529
+ "id": 58,
530
+ "special": false,
531
+ "content": "atr_21d",
532
+ "single_word": true,
533
+ "lstrip": false,
534
+ "rstrip": false,
535
+ "normalized": false
536
+ },
537
+ {
538
+ "id": 59,
539
+ "special": false,
540
+ "content": "momentum_5d",
541
+ "single_word": true,
542
+ "lstrip": false,
543
+ "rstrip": false,
544
+ "normalized": false
545
+ },
546
+ {
547
+ "id": 60,
548
+ "special": false,
549
+ "content": "momentum_10d",
550
+ "single_word": true,
551
+ "lstrip": false,
552
+ "rstrip": false,
553
+ "normalized": false
554
+ },
555
+ {
556
+ "id": 61,
557
+ "special": false,
558
+ "content": "momentum_21d",
559
+ "single_word": true,
560
+ "lstrip": false,
561
+ "rstrip": false,
562
+ "normalized": false
563
+ },
564
+ {
565
+ "id": 62,
566
+ "special": false,
567
+ "content": "volume_change_5d",
568
+ "single_word": true,
569
+ "lstrip": false,
570
+ "rstrip": false,
571
+ "normalized": false
572
+ },
573
+ {
574
+ "id": 63,
575
+ "special": false,
576
+ "content": "volume_change_10d",
577
+ "single_word": true,
578
+ "lstrip": false,
579
+ "rstrip": false,
580
+ "normalized": false
581
+ },
582
+ {
583
+ "id": 64,
584
+ "special": false,
585
+ "content": "volume_change_21d",
586
+ "single_word": true,
587
+ "lstrip": false,
588
+ "rstrip": false,
589
+ "normalized": false
590
+ },
591
+ {
592
+ "id": 65,
593
+ "special": false,
594
+ "content": "close_to_ma_5d",
595
+ "single_word": true,
596
+ "lstrip": false,
597
+ "rstrip": false,
598
+ "normalized": false
599
+ },
600
+ {
601
+ "id": 66,
602
+ "special": false,
603
+ "content": "close_to_ma_10d",
604
+ "single_word": true,
605
+ "lstrip": false,
606
+ "rstrip": false,
607
+ "normalized": false
608
+ },
609
+ {
610
+ "id": 67,
611
+ "special": false,
612
+ "content": "close_to_ma_21d",
613
+ "single_word": true,
614
+ "lstrip": false,
615
+ "rstrip": false,
616
+ "normalized": false
617
+ },
618
+ {
619
+ "id": 68,
620
+ "special": false,
621
+ "content": "volume_change_rate",
622
+ "single_word": true,
623
+ "lstrip": false,
624
+ "rstrip": false,
625
+ "normalized": false
626
+ }
627
+ ],
628
+ "normalizer": {
629
+ "type": "FinancialFeatureNormalizer"
630
+ },
631
+ "pre_tokenizer": {
632
+ "type": "FinancialFeaturePreTokenizer"
633
+ },
634
+ "post_processor": {
635
+ "type": "FinancialFeaturePostProcessor"
636
+ },
637
+ "decoder": {
638
+ "type": "FinancialFeatureDecoder"
639
+ },
640
+ "model": {
641
+ "type": "FinancialFeatureProcessor",
642
+ "vocab": {
643
+ "<pad>": 0,
644
+ "<bos>": 1,
645
+ "<eos>": 2,
646
+ "<unk>": 3,
647
+ "date": 4,
648
+ "close": 5,
649
+ "high": 6,
650
+ "low": 7,
651
+ "open": 8,
652
+ "volume": 9,
653
+ "day_of_week": 10,
654
+ "day_of_month": 11,
655
+ "week_of_year": 12,
656
+ "month": 13,
657
+ "quarter": 14,
658
+ "year": 15,
659
+ "is_month_start": 16,
660
+ "is_month_end": 17,
661
+ "is_week_start": 18,
662
+ "is_week_end": 19,
663
+ "close_lag_1": 20,
664
+ "close_lag_2": 21,
665
+ "close_lag_3": 22,
666
+ "close_lag_5": 23,
667
+ "close_lag_10": 24,
668
+ "high_lag_1": 25,
669
+ "high_lag_2": 26,
670
+ "high_lag_3": 27,
671
+ "high_lag_5": 28,
672
+ "high_lag_10": 29,
673
+ "low_lag_1": 30,
674
+ "low_lag_2": 31,
675
+ "low_lag_3": 32,
676
+ "low_lag_5": 33,
677
+ "low_lag_10": 34,
678
+ "volume_lag_1": 35,
679
+ "volume_lag_2": 36,
680
+ "volume_lag_3": 37,
681
+ "volume_lag_5": 38,
682
+ "volume_lag_10": 39,
683
+ "future_return_1d": 40,
684
+ "future_return_2d": 41,
685
+ "future_return_10d": 42,
686
+ "past_return_1d": 43,
687
+ "past_return_2d": 44,
688
+ "past_return_5d": 45,
689
+ "past_return_10d": 46,
690
+ "daily_range": 47,
691
+ "daily_range_abs": 48,
692
+ "gap": 49,
693
+ "close_to_open": 50,
694
+ "close_to_high": 51,
695
+ "close_to_low": 52,
696
+ "volatility_5d": 53,
697
+ "volatility_10d": 54,
698
+ "volatility_21d": 55,
699
+ "atr_5d": 56,
700
+ "atr_10d": 57,
701
+ "atr_21d": 58,
702
+ "momentum_5d": 59,
703
+ "momentum_10d": 60,
704
+ "momentum_21d": 61,
705
+ "volume_change_5d": 62,
706
+ "volume_change_10d": 63,
707
+ "volume_change_21d": 64,
708
+ "close_to_ma_5d": 65,
709
+ "close_to_ma_10d": 66,
710
+ "close_to_ma_21d": 67,
711
+ "volume_change_rate": 68
712
+ },
713
+ "feature_dim": 69
714
+ }
715
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_dim": 64,
3
+ "feature_groups": [
4
+ "price",
5
+ "time",
6
+ "lag",
7
+ "return",
8
+ "volatility",
9
+ "volume"
10
+ ],
11
+ "feature_names": [
12
+ "date",
13
+ "close",
14
+ "high",
15
+ "low",
16
+ "open",
17
+ "volume",
18
+ "day_of_week",
19
+ "day_of_month",
20
+ "week_of_year",
21
+ "month",
22
+ "quarter",
23
+ "year",
24
+ "is_month_start",
25
+ "is_month_end",
26
+ "is_week_start",
27
+ "is_week_end",
28
+ "close_lag_1",
29
+ "close_lag_2",
30
+ "close_lag_3",
31
+ "close_lag_5",
32
+ "close_lag_10",
33
+ "high_lag_1",
34
+ "high_lag_2",
35
+ "high_lag_3",
36
+ "high_lag_5",
37
+ "high_lag_10",
38
+ "low_lag_1",
39
+ "low_lag_2",
40
+ "low_lag_3",
41
+ "low_lag_5",
42
+ "low_lag_10",
43
+ "volume_lag_1",
44
+ "volume_lag_2",
45
+ "volume_lag_3",
46
+ "volume_lag_5",
47
+ "volume_lag_10",
48
+ "future_return_1d",
49
+ "future_return_2d",
50
+ "future_return_10d",
51
+ "past_return_1d",
52
+ "past_return_2d",
53
+ "past_return_5d",
54
+ "past_return_10d",
55
+ "daily_range",
56
+ "daily_range_abs",
57
+ "gap",
58
+ "close_to_open",
59
+ "close_to_high",
60
+ "close_to_low",
61
+ "volatility_5d",
62
+ "volatility_10d",
63
+ "volatility_21d",
64
+ "atr_5d",
65
+ "atr_10d",
66
+ "atr_21d",
67
+ "momentum_5d",
68
+ "momentum_10d",
69
+ "momentum_21d",
70
+ "volume_change_5d",
71
+ "volume_change_10d",
72
+ "volume_change_21d",
73
+ "close_to_ma_5d",
74
+ "close_to_ma_10d",
75
+ "close_to_ma_21d",
76
+ "volume_change_rate"
77
+ ],
78
+ "do_lower_case": false,
79
+ "model_max_length": 1024,
80
+ "tokenizer_class": "FinancialFeatureProcessor",
81
+ "bos_token": "<bos>",
82
+ "eos_token": "<eos>",
83
+ "pad_token": "<pad>",
84
+ "unk_token": "<unk>"
85
+ }
usage.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # SpaceExploreAI-Small-Base-Regression-5M
3
+
4
+ ## 模型描述
5
+
6
+ SpaceExploreAI-Small-Base-Regression-5M 是一个基于深度学习的金融时序预测模型,专为股票价格趋势分析和预测而设计。这是 SpaceExploreAI 系列的小型版本,具有约 5M 参数,针对regression任务进行了优化。
7
+
8
+ ### 主要特点
9
+
10
+ - **轻量化设计**:仅有 5M 参数,适合资源受限环境
11
+ - **回归预测:专为价格预测等回归任务优化**
12
+ - **Transformer 架构**:基于 Transformer 架构,集成了 RoPE 旋转位置编码技术
13
+ - **多头gqa注意力**:使用先进的多头注意力机制捕捉时间序列数据模式
14
+
15
+ ## 技术规格
16
+
17
+ - **参数量**:约 5M
18
+ - **模型类型**:Transformer
19
+ - **隐藏层大小**:256
20
+ - **隐藏层数量**:4
21
+ - **注意力头数量**:4
22
+ - **注意力类型**:gqa
23
+ - **归一化类型**:rmsnorm
24
+ - **最大序列长度**:32
25
+ - **预测类型**:regression
26
+ - **使用 MoE**:是,混合专家模型增强了模型的表达能力
27
+ - **MOE配置**:8个专家,每个token使用2个专家,中间层大小1024
28
+
29
+ ## 使用示例
30
+
31
+ ```python
32
+ from transformers import AutoTokenizer, AutoModel
33
+ import torch
34
+
35
+ # 加载模型和分词器
36
+ model_name = "SpaceExploreAI/SpaceExploreAI-Small-Base-Regression-5M"
37
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
38
+ model = AutoModel.from_pretrained(model_name)
39
+
40
+ # 准备输入数据 (假设您已经有了处理好的金融数据)
41
+ inputs = torch.tensor([[0.1, 0.2, 0.3, ...]]) # 您的金融序列数据
42
+
43
+ # 进行预测
44
+ with torch.no_grad():
45
+ outputs = model(inputs)
46
+ predictions = outputs.last_hidden_state
47
+ ```
48
+
49
+ ## 免责声明
50
+
51
+ SpaceExploreAI仅供大模型AI学习、量化交易学习,不可以用于商业用途、不可以以此为投资逻辑,后果自负。
52
+
53
+ ## 许可证
54
+
55
+ [Apache License 2.0](LICENSE)
vocab.txt ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <pad>
2
+ <bos>
3
+ <eos>
4
+ <unk>
5
+ date
6
+ close
7
+ high
8
+ low
9
+ open
10
+ volume
11
+ day_of_week
12
+ day_of_month
13
+ week_of_year
14
+ month
15
+ quarter
16
+ year
17
+ is_month_start
18
+ is_month_end
19
+ is_week_start
20
+ is_week_end
21
+ close_lag_1
22
+ close_lag_2
23
+ close_lag_3
24
+ close_lag_5
25
+ close_lag_10
26
+ high_lag_1
27
+ high_lag_2
28
+ high_lag_3
29
+ high_lag_5
30
+ high_lag_10
31
+ low_lag_1
32
+ low_lag_2
33
+ low_lag_3
34
+ low_lag_5
35
+ low_lag_10
36
+ volume_lag_1
37
+ volume_lag_2
38
+ volume_lag_3
39
+ volume_lag_5
40
+ volume_lag_10
41
+ future_return_1d
42
+ future_return_2d
43
+ future_return_10d
44
+ past_return_1d
45
+ past_return_2d
46
+ past_return_5d
47
+ past_return_10d
48
+ daily_range
49
+ daily_range_abs
50
+ gap
51
+ close_to_open
52
+ close_to_high
53
+ close_to_low
54
+ volatility_5d
55
+ volatility_10d
56
+ volatility_21d
57
+ atr_5d
58
+ atr_10d
59
+ atr_21d
60
+ momentum_5d
61
+ momentum_10d
62
+ momentum_21d
63
+ volume_change_5d
64
+ volume_change_10d
65
+ volume_change_21d
66
+ close_to_ma_5d
67
+ close_to_ma_10d
68
+ close_to_ma_21d
69
+ volume_change_rate