aus10powell commited on
Commit
5983cce
·
1 Parent(s): cfd82fd

Upload 77 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. Dockerfile +19 -0
  2. app.py +225 -0
  3. models/BarackObama/added_tokens.json +3 -0
  4. models/BarackObama/config.json +39 -0
  5. models/BarackObama/merges.txt +0 -0
  6. models/BarackObama/pytorch_model.bin +3 -0
  7. models/BarackObama/special_tokens_map.json +24 -0
  8. models/BarackObama/tokenizer_config.json +34 -0
  9. models/BarackObama/training_args.bin +3 -0
  10. models/BarackObama/vocab.json +0 -0
  11. models/alikarimi_ak8/added_tokens.json +3 -0
  12. models/alikarimi_ak8/config.json +39 -0
  13. models/alikarimi_ak8/merges.txt +0 -0
  14. models/alikarimi_ak8/pytorch_model.bin +3 -0
  15. models/alikarimi_ak8/special_tokens_map.json +24 -0
  16. models/alikarimi_ak8/tokenizer_config.json +34 -0
  17. models/alikarimi_ak8/training_args.bin +3 -0
  18. models/alikarimi_ak8/vocab.json +0 -0
  19. models/cathiedwood/added_tokens.json +3 -0
  20. models/cathiedwood/config.json +39 -0
  21. models/cathiedwood/merges.txt +0 -0
  22. models/cathiedwood/pytorch_model.bin +3 -0
  23. models/cathiedwood/special_tokens_map.json +24 -0
  24. models/cathiedwood/tokenizer_config.json +34 -0
  25. models/cathiedwood/training_args.bin +3 -0
  26. models/cathiedwood/vocab.json +0 -0
  27. models/elonmusk/added_tokens.json +3 -0
  28. models/elonmusk/config.json +39 -0
  29. models/elonmusk/merges.txt +0 -0
  30. models/elonmusk/pytorch_model.bin +3 -0
  31. models/elonmusk/special_tokens_map.json +24 -0
  32. models/elonmusk/tokenizer_config.json +34 -0
  33. models/elonmusk/training_args.bin +3 -0
  34. models/elonmusk/vocab.json +0 -0
  35. models/taylorlorenz/added_tokens.json +3 -0
  36. models/taylorlorenz/config.json +39 -0
  37. models/taylorlorenz/merges.txt +0 -0
  38. models/taylorlorenz/pytorch_model.bin +3 -0
  39. models/taylorlorenz/special_tokens_map.json +24 -0
  40. models/taylorlorenz/tokenizer_config.json +34 -0
  41. models/taylorlorenz/training_args.bin +3 -0
  42. models/taylorlorenz/vocab.json +0 -0
  43. models/ylecun/added_tokens.json +3 -0
  44. models/ylecun/config.json +39 -0
  45. models/ylecun/merges.txt +0 -0
  46. models/ylecun/pytorch_model.bin +3 -0
  47. models/ylecun/special_tokens_map.json +24 -0
  48. models/ylecun/tokenizer_config.json +34 -0
  49. models/ylecun/training_args.bin +3 -0
  50. models/ylecun/vocab.json +0 -0
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.10
3
+
4
+ WORKDIR /code
5
+
6
+ COPY ./requirements.txt /code/requirements.txt
7
+
8
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
9
+
10
+ RUN useradd -m -u 1000 user
11
+ USER user
12
+ ENV HOME=/home/user \
13
+ PATH=/home/user/.local/bin:$PATH
14
+
15
+ WORKDIR $HOME/app
16
+
17
+ COPY --chown=user . $HOME/app
18
+
19
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI endpoint
2
+ To run locally use 'uvicorn app:app --host localhost --port 7860'
3
+ or
4
+ `python -m uvicorn app:app --reload --host localhost --port 7860`
5
+ """
6
+ import datetime as dt
7
+ import json
8
+ import logging
9
+ import numpy as np
10
+ import os
11
+ import random
12
+ from typing import Dict, List
13
+
14
+ import uvicorn
15
+ from fastapi import FastAPI, HTTPException, Request, Response
16
+ from fastapi.responses import HTMLResponse
17
+ from fastapi.staticfiles import StaticFiles
18
+ from fastapi.templating import Jinja2Templates
19
+
20
+ import scripts.sentiment as sentiment
21
+ import scripts.twitter_scraper as ts
22
+ import scripts.utils as utils
23
+ from scripts import generative
24
+ import nltk
25
+
26
+ logging.basicConfig(level=logging.INFO)
27
+
28
+ app = FastAPI()
29
+ templates = Jinja2Templates(directory="templates")
30
+ app.mount("/static", StaticFiles(directory="static"), name="static")
31
+
32
+ # Construct absolute path to models folder
33
+ models_path = os.path.abspath("models")
34
+
35
+ username_list = [
36
+ "alikarimi_ak8",
37
+ "elonmusk",
38
+ "BarackObama",
39
+ "taylorlorenz",
40
+ "cathiedwood",
41
+ "ylecun",
42
+ ]
43
+
44
+ ## Static objects/paths
45
+ start_date = dt.date(year=2023, month=2, day=1)
46
+ end_date = dt.date(year=2023, month=3, day=22)
47
+
48
+
49
+ @app.get("/", response_class=HTMLResponse)
50
+ async def webpage(request: Request):
51
+ return templates.TemplateResponse("index.html", {"request": request})
52
+
53
+
54
+ @app.get("/accounts")
55
+ def get_accounts() -> List[dict]:
56
+ import pandas as pd
57
+
58
+ logging.info(f"Pulling account information on {username_list}")
59
+ account_info_list = [
60
+ ts.get_twitter_account_info(twitter_handle=account) for account in username_list
61
+ ]
62
+ df_account = pd.DataFrame(account_info_list)
63
+ df_account = df_account.style.bar(
64
+ subset=["follower_count", "friends_count"], color="#d65f5f"
65
+ )
66
+ df_account = df_account.format(
67
+ {"follower_count": "{:,.0f}", "friends_count": "{:,.0f}"}
68
+ )
69
+ return HTMLResponse(content=df_account.to_html(classes="center"), status_code=200)
70
+
71
+
72
+ @app.get("/tweets/{username}", response_model=dict)
73
+ def get_tweets(username: str) -> dict:
74
+ if username in username_list:
75
+ # query = f"from:{username} since:{start_date} until:{end_date}"
76
+ return ts.get_tweets(query=query)
77
+ else:
78
+ return {"detail": "Account not in scope of project."}
79
+
80
+
81
+ @app.get("/audience/{username}", response_model=dict)
82
+ def get_audience(username: str) -> dict:
83
+ if username in username_list:
84
+ query = f"from:{username} since:{start_date} until:{end_date}"
85
+ tweets = ts.get_tweets(query=query)
86
+
87
+ n_samples = 5
88
+ # Random sample 3 tweets from user
89
+ tweets_sampled = random.sample(tweets, n_samples)
90
+
91
+ # Get all replies to sampled tweets
92
+ tweet_threads = []
93
+ for tweet in tweets_sampled:
94
+ threads = ts.get_replies(
95
+ username=tweet["username"],
96
+ conversation_id=tweet["conversation_id"],
97
+ max_tweets=100,
98
+ )
99
+ tweet_threads += threads
100
+
101
+ # Get usernames from sample threads tweets
102
+ usernames = [t["username"] for t in tweet_threads]
103
+ # Get user info from sample replies to sampled tweets of user
104
+ info_accounts = [
105
+ ts.get_twitter_account_info(twitter_handle=account) for account in usernames
106
+ ]
107
+
108
+ # "follower_count":1,"friends_count":20,"verified":false}
109
+ # Get stats for followers/audience engaging with tweets
110
+ follower_counts = [
111
+ info_accounts[i]["follower_count"] for i in range(len(info_accounts))
112
+ ]
113
+ friends_counts = [
114
+ info_accounts[i]["friends_count"] for i in range(len(info_accounts))
115
+ ]
116
+ verified_counts = [
117
+ 1 if info_accounts[i]["verified"] == True else 0
118
+ for i in range(len(info_accounts))
119
+ ]
120
+ return {
121
+ "sample_size": len(info_accounts),
122
+ "mean_follower_count": round(np.mean(follower_counts), 3),
123
+ "mean_friends_count": round(np.mean(friends_counts), 3),
124
+ "mean_verified": round(np.mean(verified_counts), 3),
125
+ }
126
+ else:
127
+ response = Response(content="Account not in scope of project.", status_code=404)
128
+ return response
129
+
130
+
131
+ @app.get("/sentiment/{username}")
132
+ async def get_sentiment(username: str) -> Dict[str, Dict[str, float]]:
133
+ if username not in username_list:
134
+ raise HTTPException(status_code=404, detail="Account not in scope of project.")
135
+
136
+ query = f"from:{username} since:{start_date} until:{end_date}"
137
+ tweets = ts.get_tweets(query=query)
138
+ n_samples = 5
139
+ tweets_sampled = random.sample(tweets, n_samples)
140
+
141
+ tweet_threads = []
142
+ for tweet in tweets_sampled:
143
+ threads = ts.get_replies(
144
+ username=tweet["username"],
145
+ conversation_id=tweet["conversation_id"],
146
+ max_tweets=100,
147
+ )
148
+ tweet_threads += threads
149
+
150
+ print(
151
+ f"Total replies to {n_samples} sampled tweets from username: {username}, {len(tweet_threads)}"
152
+ )
153
+
154
+ ## Sentiment scoring
155
+ print(f"Running tweet sentiment scoring on username: {username} tweets")
156
+ tweets_scores = sentiment.get_tweets_sentiment(tweets=tweets)
157
+ mean_tweets_score = round(np.mean(tweets_scores), 2)
158
+ ci_tweets = utils.wilson_score_interval(tweets_scores)
159
+
160
+ # Get sentiment of the threads from tweets
161
+ # Get username tweets sentiment
162
+ print(f"Running tweet thread sentiment scoring on username: {username} tweets")
163
+ threads_scores = sentiment.get_tweets_sentiment(tweets=tweet_threads)
164
+ mean_threads_score = round(np.mean(threads_scores), 2)
165
+ ci_threads = utils.wilson_score_interval(threads_scores)
166
+
167
+ return {
168
+ "thread_level": {
169
+ "mean": mean_threads_score,
170
+ "confidence_interal": ci_threads,
171
+ },
172
+ "audience_level": {
173
+ "mean": mean_tweets_score,
174
+ "confidence_interval": ci_tweets,
175
+ },
176
+ }
177
+
178
+
179
+ @app.post("/api/generate")
180
+ async def generate_text(request: Request):
181
+ print("*" * 50)
182
+ data = await request.json()
183
+ print("*" * 50)
184
+ print("POST Request:")
185
+
186
+ # Check length of input, if it is greater than 10 tokens, the text is sent off to a summarizer to generate:
187
+
188
+ generated_text = generative.generate_account_text(
189
+ prompt=data["text"], model_dir=os.path.join(models_path, data["account"])
190
+ )
191
+ # return one example
192
+ generated_text = generated_text[0]["generated_text"]
193
+
194
+ ###################################################
195
+ ## Clean up generate text
196
+ # Get rid of final sentence
197
+ sentences = nltk.sent_tokenize(generated_text)
198
+ unique_sentences = set()
199
+ non_duplicate_sentences = []
200
+ for sentence in sentences:
201
+ if sentence not in unique_sentences:
202
+ non_duplicate_sentences.append(sentence)
203
+ unique_sentences.add(sentence)
204
+ final_text = " ".join(non_duplicate_sentences[:-1])
205
+
206
+ return {"generated_text": final_text}
207
+
208
+
209
+ @app.get("/examples1")
210
+ async def read_examples():
211
+ with open("templates/charts/handle_sentiment_breakdown.html") as f:
212
+ html = f.read()
213
+ return HTMLResponse(content=html)
214
+
215
+
216
+ @app.get("/examples2")
217
+ async def read_examples():
218
+ with open("templates/charts/handle_sentiment_timesteps.html") as f:
219
+ html = f.read()
220
+ return HTMLResponse(content=html)
221
+
222
+ # uvicorn --workers=2 app:app
223
+ # if __name__ == "__main__":
224
+ # # uvicorn.run(app, host="0.0.0.0", port=8000)
225
+ # uvicorn.run("app:app", host="127.0.0.1", port=5049, reload=True)
models/BarackObama/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/BarackObama/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/BarackObama/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/BarackObama/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18c6382bf4651ffdfb464fcafe980119de8bf39d52e7e1ba5678130fcd1e9469
3
+ size 510395581
models/BarackObama/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/BarackObama/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/BarackObama/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71684cf5cce747c17720cae37368baf794483b4018c616e0d26487886590e338
3
+ size 3387
models/BarackObama/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/alikarimi_ak8/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/alikarimi_ak8/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/alikarimi_ak8/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/alikarimi_ak8/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfffa9a2034ca45dd1be4946de2c080045ed4c221e652275f13024078cd8e604
3
+ size 510395581
models/alikarimi_ak8/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/alikarimi_ak8/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/alikarimi_ak8/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20869c5642f6be95f3a342a6133749dd150bfdecffedab7860d7e767a521ed74
3
+ size 3387
models/alikarimi_ak8/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/cathiedwood/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/cathiedwood/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/cathiedwood/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/cathiedwood/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecac4ae5b95aa181da1616db839314ca186f138d59bd2322960623bc537497e9
3
+ size 510395581
models/cathiedwood/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/cathiedwood/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/cathiedwood/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e74b31b37904075ebd8760cd82fe42a6f239f874c9989a7fc199126050a4a5
3
+ size 3387
models/cathiedwood/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/elonmusk/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/elonmusk/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/elonmusk/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/elonmusk/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0305a3b73d7f97d5fd0ac0409407fe5fa80e32de4f5ae8b73305bbab896aff5
3
+ size 510395581
models/elonmusk/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/elonmusk/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/elonmusk/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c50926e7c005b420f813373fa239e70fac310bdbcafef95f95dfb1e3145c544
3
+ size 3387
models/elonmusk/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/taylorlorenz/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/taylorlorenz/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/taylorlorenz/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/taylorlorenz/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5075410d32a65bc98c38d578df472ecb270124c0f053b73477e6b7f1b3377003
3
+ size 510395581
models/taylorlorenz/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/taylorlorenz/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/taylorlorenz/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad406657c6f7bdf6c5a07979f9b57f380adf0376b6c3d99b27a1ce2f9903e9f6
3
+ size 3387
models/taylorlorenz/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
models/ylecun/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 50257
3
+ }
models/ylecun/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.24.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
models/ylecun/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/ylecun/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfffa9a2034ca45dd1be4946de2c080045ed4c221e652275f13024078cd8e604
3
+ size 510395581
models/ylecun/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "[PAD]",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
models/ylecun/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 1024,
22
+ "name_or_path": "gpt2",
23
+ "pad_token": null,
24
+ "special_tokens_map_file": null,
25
+ "tokenizer_class": "GPT2Tokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
models/ylecun/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20869c5642f6be95f3a342a6133749dd150bfdecffedab7860d7e767a521ed74
3
+ size 3387
models/ylecun/vocab.json ADDED
The diff for this file is too large to render. See raw diff