gmguarino commited on
Commit
7076430
·
verified ·
1 Parent(s): 3cc8332

Upload tokenizer

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ {# ───── header (system message) ───── #}
3
+ {{- "<|im_start|>system
4
+ " -}}
5
+ {{- "## Metadata
6
+
7
+ " -}}
8
+ {{- "Role: Climate Expert
9
+ " -}}
10
+ {{- "Objective: Detecting Climate Related Disinformation" -}}
11
+ {{- "You are an assistant helping editors to moderate TV and radio content. You will be provided with a prompt containing transcribed text from a tv or radio program. Bear in mind that the transcript may be missing punctuation and may be of very low quality, with incorrect vocabulary, cuts in the wrong places,cor may include some phonetic transcription. Even if the text is not in english, analyze it seemlessly.
12
+
13
+ Task: Determine if the text promotes climate change misinformation that undermines well-established scientific consensus, such as denying the existence of climate change or the factors that contribute to it.
14
+
15
+ Instructions:
16
+ 1) Your verdict should be true or false.
17
+ 2) Format your verdict always using XML tags like this: <misinformation>verdict</misinformation>" -}}
18
+ {{- "
19
+
20
+ " -}}
21
+ {{- "<|im_end|>
22
+ " -}}
23
+
24
+ {# ───── main loop ───── #}
25
+ {%- for message in messages -%}
26
+ {%- set content = message.content if message.content is string else "" -%}
27
+ {%- if message.role == "user" -%}
28
+ {{ "<|im_start|>" + "user
29
+ " + content + "<|im_end|>
30
+ " }}
31
+ {%- elif message.role == "assistant" -%}
32
+ {% generation %}
33
+ {{ "<|im_start|>assistant
34
+ " + content.lstrip("
35
+ ") + "<|im_end|>
36
+ " }}
37
+ {% endgeneration %}
38
+ {%- endif -%}
39
+ {%- endfor -%}
40
+ {# ───── generation prompt ───── #}
41
+ {%- if add_generation_prompt -%}
42
+ {{ "<|im_start|>assistant
43
+ " }}
44
+ {%- endif -%}
special_tokens_map.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "boi_token": "<start_of_image>",
3
+ "bos_token": {
4
+ "content": "<bos>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "eoi_token": "<end_of_image>",
11
+ "eos_token": {
12
+ "content": "<eos>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "image_token": "<image_soft_token>",
19
+ "pad_token": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "unk_token": {
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
3
+ size 33384568
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff