uday610 commited on
Commit
8b064a7
·
verified ·
1 Parent(s): 3e33a9b

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ model.data filter=lfs diff=lfs merge=lfs -text
genai_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model": {
3
+ "bos_token_id": 0,
4
+ "context_length": 8192,
5
+ "decoder": {
6
+ "session_options": {
7
+ "log_id": "onnxruntime-genai",
8
+ "custom_ops_library": "onnxruntime_vitis_ai_custom_ops.dll",
9
+ "provider_options": [
10
+ {
11
+ "VitisAI": {
12
+ "config_file": ".\\libs\\vaip_llm.json"
13
+ }
14
+ }
15
+ ]
16
+ },
17
+ "filename": "model.onnx",
18
+ "head_size": 128,
19
+ "hidden_size": 4096,
20
+ "inputs": {
21
+ "input_ids": "input_ids",
22
+ "attention_mask": "attention_mask",
23
+ "past_key_names": "past_key_values.%d.key",
24
+ "past_value_names": "past_key_values.%d.value"
25
+ },
26
+ "outputs": {
27
+ "logits": "logits",
28
+ "present_key_names": "present.%d.key",
29
+ "present_value_names": "present.%d.value"
30
+ },
31
+ "num_attention_heads": 32,
32
+ "num_hidden_layers": 28,
33
+ "num_key_value_heads": 2
34
+ },
35
+ "eos_token_id": 2,
36
+ "pad_token_id": 0,
37
+ "type": "chatglm",
38
+ "vocab_size": 65024
39
+ },
40
+ "search": {
41
+ "diversity_penalty": 0.0,
42
+ "do_sample": false,
43
+ "early_stopping": true,
44
+ "length_penalty": 1.0,
45
+ "max_length": 8192,
46
+ "min_length": 0,
47
+ "no_repeat_ngram_size": 0,
48
+ "num_beams": 1,
49
+ "num_return_sequences": 1,
50
+ "past_present_share_buffer": true,
51
+ "repetition_penalty": 1.0,
52
+ "temperature": 1.0,
53
+ "top_k": 1,
54
+ "top_p": 1.0
55
+ }
56
+ }
model.data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e60c0f2bd5314b3b51269d293f6a0f7775020cf6e9c1d8bbf057c48e10ff64
3
+ size 3791874048
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67931c5992f4721670bc6f73491da788c977f2546dedb76697d497d6cecd54a7
3
+ size 213850
prompts.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 2048------------------------------
2
+ In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and suggest the most likely diagnosis. It could also recommend a personalized treatment plan based on the patientΓÇÖs genetic makeup, lifestyle, and other factors. This level of precision medicine has the potential to improve patient outcomes significantly. However, the implementation of AI in healthcare is not without its challenges. One of the main concerns is the ethical implications of using AI in such a sensitive field. For instance, who is responsible if an AI system makes a wrong diagnosis? How do we ensure that AI systems are not biased in their decision-making processes? Moreover, there are concerns about patient privacy and the security of sensitive medical data. Another challenge is the integration of AI systems into existing healthcare infrastructure. Many healthcare systems are already burdened with outdated technology and limited resources. Integrating advanced AI systems into these environments requires significant investment and training. Additionally, there is the question of how AI will impact the roles of healthcare professionals. While AI can assist doctors and nurses in their work, there is concern that it could also lead to job displacement. Despite these challenges, the potential benefits of AI in healthcare are immense. For example, AI-powered robots are already being used in some hospitals to assist with surgeries. These robots can perform delicate procedures with a level of precision that is difficult for humans to achieve. AI is also being used to develop new drugs and treatment plans. By analyzing the molecular structure of diseases, AI can help researchers identify potential treatments faster than traditional methods. Moreover, AI has In recent years, artificial intelligence (AI) has revolutionized numerous industries, with healthcare being one of the most promising fields. The integration of AI in healthcare systems has the potential to transform patient care, diagnostics, and treatment plans. Imagine a world where AI-powered algorithms can predict diseases before they manifest, provide personalized treatment plans based on genetic information, and even assist in complex surgeries with unparalleled precision. One of the most significant advantages of AI in healthcare is its ability to analyze vast amounts of data quickly and accurately. Traditional methods of data analysis in healthcare often involve manual processes that are time-consuming and prone to human error. AI, on the other hand, can sift through millions of patient records, medical images, and research papers in a fraction of the time it would take a human. This ability to process and analyze big data allows for more accurate diagnoses and more effective treatment plans. For example, consider the case of a patient presenting with symptoms that could indicate several different conditions. An AI system could analyze the patientΓÇÖs medical history, compare it with millions of other cases, and
special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<unk>",
5
+ "unk_token": "<unk>"
6
+ }
tokenization_chatglm.py ADDED
@@ -0,0 +1,506 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import re
4
+ from typing import List, Optional, Union, Dict
5
+ from sentencepiece import SentencePieceProcessor
6
+ from transformers import AddedToken, PreTrainedTokenizer, PreTrainedTokenizerFast
7
+ from transformers.convert_slow_tokenizer import (
8
+ SLOW_TO_FAST_CONVERTERS,
9
+ SpmConverter,
10
+ decoders,
11
+ normalizers,
12
+ pre_tokenizers,
13
+ processors,
14
+ )
15
+ from transformers.utils import logging, PaddingStrategy
16
+ from transformers.tokenization_utils_base import EncodedInput, BatchEncoding
17
+
18
+
19
+ logger = logging.get_logger(__name__)
20
+
21
+ ADDITIONAL_SPECIAL_TOKENS = [
22
+ "[MASK]",
23
+ "[gMASK]",
24
+ "[sMASK]",
25
+ "<!sop!>",
26
+ "<!eop!>",
27
+ "<|system|>",
28
+ "<|user|>",
29
+ "<|assistant|>",
30
+ "<|observation|>",
31
+ ]
32
+ PREFIX_TOKENS = ["[gMASK]", "<!sop!>"]
33
+
34
+ DUMMY_PREFIX_INDICATOR_FOR_FAST = "<!dummy-prefix!>"
35
+
36
+
37
+ class SPTokenizer:
38
+ def __init__(self, model_path: str):
39
+ # reload tokenizer
40
+ assert os.path.isfile(model_path), model_path
41
+ self.sp_model = SentencePieceProcessor(model_file=model_path)
42
+
43
+ # BOS / EOS token IDs
44
+ self.n_words: int = self.sp_model.vocab_size()
45
+ self.bos_id: int = self.sp_model.bos_id()
46
+ self.eos_id: int = self.sp_model.eos_id()
47
+ self.pad_id: int = self.sp_model.unk_id()
48
+ assert self.sp_model.vocab_size() == self.sp_model.get_piece_size()
49
+
50
+ special_tokens = ADDITIONAL_SPECIAL_TOKENS
51
+ self.special_tokens = {}
52
+ self.index_special_tokens = {}
53
+ for token in special_tokens:
54
+ self.special_tokens[token] = self.n_words
55
+ self.index_special_tokens[self.n_words] = token
56
+ self.n_words += 1
57
+ self.role_special_token_expression = "|".join([re.escape(token) for token in special_tokens]) # for apply_chat_template
58
+
59
+ def tokenize(self, s: str, encode_special_tokens=False):
60
+ if encode_special_tokens:
61
+ last_index = 0
62
+ t = []
63
+ for match in re.finditer(self.role_special_token_expression, s):
64
+ if last_index < match.start():
65
+ t.extend(self.sp_model.EncodeAsPieces(s[last_index:match.start()]))
66
+ t.append(s[match.start():match.end()])
67
+ last_index = match.end()
68
+ if last_index < len(s):
69
+ t.extend(self.sp_model.EncodeAsPieces(s[last_index:]))
70
+ return t
71
+ else:
72
+ return self.sp_model.EncodeAsPieces(s)
73
+
74
+ def encode(self, s: str, bos: bool = False, eos: bool = False) -> List[int]:
75
+ assert type(s) is str
76
+ t = self.sp_model.encode(s)
77
+ if bos:
78
+ t = [self.bos_id] + t
79
+ if eos:
80
+ t = t + [self.eos_id]
81
+ return t
82
+
83
+ def decode(self, t: List[int]) -> str:
84
+ text, buffer = "", []
85
+ for token in t:
86
+ if token in self.index_special_tokens:
87
+ if buffer:
88
+ text += self.sp_model.decode(buffer)
89
+ buffer = []
90
+ text += self.index_special_tokens[token]
91
+ else:
92
+ buffer.append(token)
93
+ if buffer:
94
+ text += self.sp_model.decode(buffer)
95
+ return text
96
+
97
+ def decode_tokens(self, tokens: List[str]) -> str:
98
+ text = self.sp_model.DecodePieces(tokens)
99
+ return text
100
+
101
+ def convert_token_to_id(self, token):
102
+ """ Converts a token (str) in an id using the vocab. """
103
+ if token in self.special_tokens:
104
+ return self.special_tokens[token]
105
+ return self.sp_model.PieceToId(token)
106
+
107
+ def convert_id_to_token(self, index):
108
+ """Converts an index (integer) in a token (str) using the vocab."""
109
+ if index in self.index_special_tokens:
110
+ return self.index_special_tokens[index]
111
+ if index in [self.eos_id, self.bos_id, self.pad_id] or index < 0 or index >= self.sp_model.vocab_size():
112
+ return ""
113
+ return self.sp_model.IdToPiece(index)
114
+
115
+
116
+ class ChatGLMTokenizer(PreTrainedTokenizer):
117
+
118
+ vocab_files_names = {"vocab_file": "tokenizer.model"}
119
+ model_input_names = ["input_ids", "attention_mask", "position_ids"]
120
+
121
+ def __init__(
122
+ self,
123
+ vocab_file,
124
+ padding_side="left",
125
+ clean_up_tokenization_spaces=False,
126
+ encode_special_tokens=False,
127
+ **kwargs
128
+ ):
129
+ self.name = "GLMTokenizer"
130
+ self.vocab_file = vocab_file
131
+ self.tokenizer = SPTokenizer(vocab_file)
132
+ self.special_tokens = {
133
+ "<bos>": self.tokenizer.bos_id,
134
+ "<eos>": self.tokenizer.eos_id,
135
+ "<unk>": self.tokenizer.pad_id,
136
+ "<pad>": self.tokenizer.pad_id
137
+ }
138
+ self.encode_special_tokens = encode_special_tokens
139
+
140
+ super().__init__(
141
+ padding_side=padding_side,
142
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
143
+ **kwargs
144
+ )
145
+
146
+ def get_command(self, token):
147
+ if token in self.special_tokens:
148
+ return self.special_tokens[token]
149
+ assert token in self.tokenizer.special_tokens, f"{token} is not a special token for {self.name}"
150
+ return self.tokenizer.special_tokens[token]
151
+
152
+ @property
153
+ def unk_token(self) -> str:
154
+ return self.tokenizer.sp_model.IdToPiece(self.get_command("<unk>"))
155
+
156
+ @property
157
+ def pad_token(self) -> str:
158
+ return self.tokenizer.sp_model.IdToPiece(self.get_command("<pad>"))
159
+
160
+ @property
161
+ def eos_token(self) -> str:
162
+ return self.tokenizer.sp_model.IdToPiece(self.get_command("<eos>"))
163
+
164
+ @property
165
+ def unk_token_id(self) -> int:
166
+ return self.get_command("<unk>")
167
+
168
+ @property
169
+ def pad_token_id(self) -> int:
170
+ return self.get_command("<pad>")
171
+
172
+ @property
173
+ def eos_token_id(self):
174
+ return self.get_command("<eos>")
175
+
176
+ @unk_token.setter
177
+ def unk_token(self, value):
178
+ logger.warning("Setting unk_token is not supported, use the default one.")
179
+
180
+ @pad_token.setter
181
+ def pad_token(self, value):
182
+ logger.warning("Setting pad_token is not supported, use the default one.")
183
+
184
+ @eos_token.setter
185
+ def eos_token(self, value):
186
+ logger.warning("Setting eos_token is not supported, use the default one.")
187
+
188
+ @property
189
+ def vocab_size(self):
190
+ return self.tokenizer.n_words
191
+
192
+ def get_vocab(self):
193
+ """ Returns vocab as a dict """
194
+ vocab = {self._convert_id_to_token(i): i for i in range(self.vocab_size)}
195
+ vocab.update(self.added_tokens_encoder)
196
+ return vocab
197
+
198
+ def _tokenize(self, text, **kwargs):
199
+ return self.tokenizer.tokenize(text, encode_special_tokens=self.encode_special_tokens)
200
+
201
+ def _convert_token_to_id(self, token):
202
+ """ Converts a token (str) in an id using the vocab. """
203
+ return self.tokenizer.convert_token_to_id(token)
204
+
205
+ def _convert_id_to_token(self, index):
206
+ """Converts an index (integer) in a token (str) using the vocab."""
207
+ return self.tokenizer.convert_id_to_token(index)
208
+
209
+ def convert_tokens_to_string(self, tokens: List[str]) -> str:
210
+ return self.tokenizer.decode_tokens(tokens)
211
+
212
+ def save_vocabulary(self, save_directory, filename_prefix=None):
213
+ """
214
+ Save the vocabulary and special tokens file to a directory.
215
+
216
+ Args:
217
+ save_directory (`str`):
218
+ The directory in which to save the vocabulary.
219
+ filename_prefix (`str`, *optional*):
220
+ An optional prefix to add to the named of the saved files.
221
+
222
+ Returns:
223
+ `Tuple(str)`: Paths to the files saved.
224
+ """
225
+ if os.path.isdir(save_directory):
226
+ vocab_file = os.path.join(
227
+ save_directory, self.vocab_files_names["vocab_file"]
228
+ )
229
+ else:
230
+ vocab_file = save_directory
231
+
232
+ with open(self.vocab_file, 'rb') as fin:
233
+ proto_str = fin.read()
234
+
235
+ with open(vocab_file, "wb") as writer:
236
+ writer.write(proto_str)
237
+
238
+ return (vocab_file,)
239
+
240
+ def get_prefix_tokens(self):
241
+ return list(map(self.get_command, PREFIX_TOKENS))
242
+
243
+ def build_single_message(self, role, metadata, message):
244
+ assert role in ["system", "user", "assistant", "observation"], role
245
+ role_tokens = [self.get_command(f"<|{role}|>")] + self.tokenizer.encode(f"{metadata}\n")
246
+ message_tokens = self.tokenizer.encode(message)
247
+ tokens = role_tokens + message_tokens
248
+ return tokens
249
+
250
+ def build_chat_input(self, query, history=None, role="user"):
251
+ if history is None:
252
+ history = []
253
+ input_ids = []
254
+ for item in history:
255
+ content = item["content"]
256
+ if item["role"] == "system" and "tools" in item:
257
+ content = content + "\n" + json.dumps(item["tools"], indent=4, ensure_ascii=False)
258
+ input_ids.extend(self.build_single_message(item["role"], item.get("metadata", ""), content))
259
+ input_ids.extend(self.build_single_message(role, "", query))
260
+ input_ids.extend([self.get_command("<|assistant|>")])
261
+ return self.batch_encode_plus([input_ids], return_tensors="pt", is_split_into_words=True)
262
+
263
+ def build_inputs_with_special_tokens(
264
+ self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
265
+ ) -> List[int]:
266
+ """
267
+ Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and
268
+ adding special tokens. A BERT sequence has the following format:
269
+
270
+ - single sequence: `[CLS] X [SEP]`
271
+ - pair of sequences: `[CLS] A [SEP] B [SEP]`
272
+
273
+ Args:
274
+ token_ids_0 (`List[int]`):
275
+ List of IDs to which the special tokens will be added.
276
+ token_ids_1 (`List[int]`, *optional*):
277
+ Optional second list of IDs for sequence pairs.
278
+
279
+ Returns:
280
+ `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens.
281
+ """
282
+ prefix_tokens = self.get_prefix_tokens()
283
+ token_ids_0 = prefix_tokens + token_ids_0
284
+ if token_ids_1 is not None:
285
+ token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
286
+ return token_ids_0
287
+
288
+ def _pad(
289
+ self,
290
+ encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
291
+ max_length: Optional[int] = None,
292
+ padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
293
+ pad_to_multiple_of: Optional[int] = None,
294
+ return_attention_mask: Optional[bool] = None,
295
+ ) -> dict:
296
+ """
297
+ Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
298
+
299
+ Args:
300
+ encoded_inputs:
301
+ Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
302
+ max_length: maximum length of the returned list and optionally padding length (see below).
303
+ Will truncate by taking into account the special tokens.
304
+ padding_strategy: PaddingStrategy to use for padding.
305
+
306
+ - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
307
+ - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
308
+ - PaddingStrategy.DO_NOT_PAD: Do not pad
309
+ The tokenizer padding sides are defined in self.padding_side:
310
+
311
+ - 'left': pads on the left of the sequences
312
+ - 'right': pads on the right of the sequences
313
+ pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
314
+ This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
315
+ `>= 7.5` (Volta).
316
+ return_attention_mask:
317
+ (optional) Set to False to avoid returning attention mask (default: set to model specifics)
318
+ """
319
+ # Load from model defaults
320
+ assert self.padding_side == "left"
321
+
322
+ required_input = encoded_inputs[self.model_input_names[0]]
323
+ seq_length = len(required_input)
324
+
325
+ if padding_strategy == PaddingStrategy.LONGEST:
326
+ max_length = len(required_input)
327
+
328
+ if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
329
+ max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
330
+
331
+ needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
332
+
333
+ # Initialize attention mask if not present.
334
+ if "attention_mask" not in encoded_inputs:
335
+ encoded_inputs["attention_mask"] = [1] * seq_length
336
+
337
+ if "position_ids" not in encoded_inputs:
338
+ encoded_inputs["position_ids"] = list(range(seq_length))
339
+
340
+ if needs_to_be_padded:
341
+ difference = max_length - len(required_input)
342
+
343
+ if "attention_mask" in encoded_inputs:
344
+ encoded_inputs["attention_mask"] = [0] * difference + encoded_inputs["attention_mask"]
345
+ if "position_ids" in encoded_inputs:
346
+ encoded_inputs["position_ids"] = [0] * difference + encoded_inputs["position_ids"]
347
+ encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
348
+
349
+ return encoded_inputs
350
+
351
+
352
+ class ChatGLMTokenizerFast(PreTrainedTokenizerFast):
353
+ # multiple breaking changes, no backward-compatibility
354
+ slow_tokenizer_class = ChatGLMTokenizer
355
+ vocab_files_names = {
356
+ **ChatGLMTokenizer.vocab_files_names,
357
+ **PreTrainedTokenizerFast.vocab_files_names,
358
+ }
359
+
360
+ def __init__(self, **kwargs):
361
+ kwargs.setdefault("clean_up_tokenization_spaces", False)
362
+ kwargs.setdefault("bos_token", "<s>")
363
+ kwargs.setdefault("eos_token", "</s>")
364
+ kwargs.setdefault("unk_token", "<unk>")
365
+ kwargs.setdefault("pad_token", "<unk>")
366
+ super().__init__(**kwargs)
367
+
368
+ @property
369
+ def dummy_prefix_indicator(self):
370
+ return DUMMY_PREFIX_INDICATOR_FOR_FAST
371
+
372
+ @property
373
+ def can_save_slow_tokenizer(self) -> bool:
374
+ # multiple breaking changes
375
+ return False
376
+
377
+ def save_pretrained(self, *args, **kwargs):
378
+ if not self.can_save_slow_tokenizer:
379
+ logger.warning(
380
+ f"{type(self).__name__} does not support saving slow tokenizer. "
381
+ "Saving it at the same directory may break the original tokenizer. "
382
+ "Please keep a backup beforehand."
383
+ )
384
+
385
+ return super().save_pretrained(*args, **kwargs)
386
+
387
+ def build_single_message_prompt(self, role, metadata, message):
388
+ assert role in ["system", "user", "assistant", "observation"], role
389
+ return (
390
+ f"<|{role}|>"
391
+ f"{self.dummy_prefix_indicator}{metadata}\n"
392
+ f"{self.dummy_prefix_indicator}{message}"
393
+ )
394
+
395
+ def build_chat_prompt(self, query, history=None, role="user", metadata=""):
396
+ inputs = []
397
+
398
+ for item in history or []:
399
+ content = item["content"]
400
+
401
+ if item["role"] == "system" and "tools" in item:
402
+ content += "\n" + json.dumps(
403
+ item["tools"], indent=4, ensure_ascii=False
404
+ )
405
+
406
+ inputs.append(
407
+ self.build_single_message_prompt(
408
+ item["role"], item.get("metadata", ""), content
409
+ )
410
+ )
411
+
412
+ inputs.append(self.build_single_message_prompt(role, metadata, query))
413
+ inputs.append("<|assistant|>")
414
+
415
+ return "".join(inputs)
416
+
417
+ def build_chat_input(self, *args, **kwargs):
418
+ return self.batch_encode_plus(
419
+ [self.build_chat_prompt(*args, **kwargs)],
420
+ return_tensors="pt",
421
+ )
422
+
423
+
424
+ ChatGLMTokenizer.register_for_auto_class()
425
+ ChatGLMTokenizerFast.register_for_auto_class()
426
+
427
+
428
+ class ChatGLMTokenizerConverter(SpmConverter):
429
+ handle_byte_fallback = True
430
+
431
+ def normalizer(self, proto):
432
+ return normalizers.Sequence(
433
+ [
434
+ normalizers.Replace(
435
+ pattern=DUMMY_PREFIX_INDICATOR_FOR_FAST, content="▁"
436
+ ),
437
+ normalizers.Replace(pattern=" ", content="▁"),
438
+ ]
439
+ )
440
+
441
+ def pre_tokenizer(self, replacement, add_prefix_space):
442
+ # NOTE: don't use Metaspace, it won't merge spaces into one token
443
+ # without Metaspace: " " => ["▁▁"]
444
+ # with Metaspace: " " => ["▁", "▁"]
445
+ return pre_tokenizers.Split(DUMMY_PREFIX_INDICATOR_FOR_FAST, "merged_with_next")
446
+
447
+ def decoder(self, replacement, add_prefix_space):
448
+ return decoders.Sequence(
449
+ [
450
+ decoders.ByteFallback(),
451
+ decoders.Metaspace(replacement="▁", add_prefix_space=True),
452
+ ]
453
+ )
454
+
455
+ def tokenizer(self, proto):
456
+ tokenizer = super().tokenizer(proto)
457
+
458
+ tokenizer.model.byte_fallback = True
459
+
460
+ assert tokenizer.token_to_id("<unk>") == 0
461
+ assert tokenizer.token_to_id("<s>") == 1
462
+ assert tokenizer.token_to_id("</s>") == 2
463
+ special_tokens = [
464
+ "<unk>",
465
+ "<s>",
466
+ "</s>",
467
+ *ADDITIONAL_SPECIAL_TOKENS,
468
+ ]
469
+
470
+ tokenizer.add_special_tokens(
471
+ [AddedToken(token, special=True) for token in special_tokens]
472
+ )
473
+
474
+ return tokenizer
475
+
476
+ def converted(self):
477
+ tokenizer = super().converted()
478
+
479
+ # Post processors
480
+ prefix_token_ids = list(map(tokenizer.token_to_id, PREFIX_TOKENS))
481
+ assert all(i is not None for i in prefix_token_ids)
482
+ prefix_template = " ".join(PREFIX_TOKENS)
483
+
484
+ template_special_tokens = list(frozenset(zip(PREFIX_TOKENS, prefix_token_ids)))
485
+
486
+ if "</s>" not in PREFIX_TOKENS:
487
+ eos_token_id = tokenizer.token_to_id("</s>")
488
+ assert eos_token_id is not None
489
+ template_special_tokens.append(("</s>", eos_token_id))
490
+
491
+ post = processors.TemplateProcessing(
492
+ single=f"{prefix_template} $A",
493
+ pair=f"{prefix_template} $A $B:1 </s>:1",
494
+ special_tokens=template_special_tokens,
495
+ )
496
+ if tokenizer.post_processor is None:
497
+ tokenizer.post_processor = post
498
+ else:
499
+ tokenizer.post_processor = processors.Sequence(
500
+ [tokenizer.post_processor, post]
501
+ )
502
+
503
+ return tokenizer
504
+
505
+
506
+ SLOW_TO_FAST_CONVERTERS[ChatGLMTokenizer.__name__] = ChatGLMTokenizerConverter
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
3
+ size 1018370
tokenizer_config.json ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "64789": {
28
+ "content": "[MASK]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "64790": {
36
+ "content": "[gMASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "64791": {
44
+ "content": "[sMASK]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "64792": {
52
+ "content": "<!sop!>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "64793": {
60
+ "content": "<!eop!>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "64794": {
68
+ "content": "<|system|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "64795": {
76
+ "content": "<|user|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "64796": {
84
+ "content": "<|assistant|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "64797": {
92
+ "content": "<|observation|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ }
99
+ },
100
+ "auto_map": {
101
+ "AutoTokenizer": [
102
+ "tokenization_chatglm.ChatGLMTokenizer",
103
+ "tokenization_chatglm.ChatGLMTokenizerFast"
104
+ ]
105
+ },
106
+ "bos_token": "<s>",
107
+ "chat_template": "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|> \n {{ message['content'] }}{% else %}<|{{ message['role'] }}|> \n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
108
+ "clean_up_tokenization_spaces": false,
109
+ "do_lower_case": false,
110
+ "eos_token": "</s>",
111
+ "model_max_length": 1000000000000000019884624838656,
112
+ "pad_token": "<unk>",
113
+ "padding_side": "left",
114
+ "remove_space": false,
115
+ "tokenizer_class": "ChatGLMTokenizer",
116
+ "unk_token": "<unk>"
117
+ }